xref: /titanic_53/usr/src/cmd/zoneadmd/vplat.c (revision 52782930452a219339a03491668e436bcef2efd9)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
21ffbafc53Scomay 
227c478bd9Sstevel@tonic-gate /*
23ea8dc4b6Seschrock  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * This module contains functions used to bring up and tear down the
317c478bd9Sstevel@tonic-gate  * Virtual Platform: [un]mounting file-systems, [un]plumbing network
327c478bd9Sstevel@tonic-gate  * interfaces, [un]configuring devices, establishing resource controls,
337c478bd9Sstevel@tonic-gate  * and creating/destroying the zone in the kernel.  These actions, on
347c478bd9Sstevel@tonic-gate  * the way up, ready the zone; on the way down, they halt the zone.
357c478bd9Sstevel@tonic-gate  * See the much longer block comment at the beginning of zoneadmd.c
367c478bd9Sstevel@tonic-gate  * for a bigger picture of how the whole program functions.
37108322fbScarlsonj  *
38108322fbScarlsonj  * This module also has primary responsibility for the layout of "scratch
39108322fbScarlsonj  * zones."  These are mounted, but inactive, zones that are used during
40108322fbScarlsonj  * operating system upgrade and potentially other administrative action.  The
41108322fbScarlsonj  * scratch zone environment is similar to the miniroot environment.  The zone's
42108322fbScarlsonj  * actual root is mounted read-write on /a, and the standard paths (/usr,
43108322fbScarlsonj  * /sbin, /lib) all lead to read-only copies of the running system's binaries.
44108322fbScarlsonj  * This allows the administrative tools to manipulate the zone using "-R /a"
45108322fbScarlsonj  * without relying on any binaries in the zone itself.
46108322fbScarlsonj  *
47108322fbScarlsonj  * If the scratch zone is on an alternate root (Live Upgrade [LU] boot
48108322fbScarlsonj  * environment), then we must resolve the lofs mounts used there to uncover
49108322fbScarlsonj  * writable (unshared) resources.  Shared resources, though, are always
50108322fbScarlsonj  * read-only.  In addition, if the "same" zone with a different root path is
51108322fbScarlsonj  * currently running, then "/b" inside the zone points to the running zone's
52108322fbScarlsonj  * root.  This allows LU to synchronize configuration files during the upgrade
53108322fbScarlsonj  * process.
54108322fbScarlsonj  *
55108322fbScarlsonj  * To construct this environment, this module creates a tmpfs mount on
56108322fbScarlsonj  * $ZONEPATH/lu.  Inside this scratch area, the miniroot-like environment as
57108322fbScarlsonj  * described above is constructed on the fly.  The zone is then created using
58108322fbScarlsonj  * $ZONEPATH/lu as the root.
59108322fbScarlsonj  *
60108322fbScarlsonj  * Note that scratch zones are inactive.  The zone's bits are not running and
61108322fbScarlsonj  * likely cannot be run correctly until upgrade is done.  Init is not running
62108322fbScarlsonj  * there, nor is SMF.  Because of this, the "mounted" state of a scratch zone
63108322fbScarlsonj  * is not a part of the usual halt/ready/boot state machine.
647c478bd9Sstevel@tonic-gate  */
657c478bd9Sstevel@tonic-gate 
667c478bd9Sstevel@tonic-gate #include <sys/param.h>
677c478bd9Sstevel@tonic-gate #include <sys/mount.h>
687c478bd9Sstevel@tonic-gate #include <sys/mntent.h>
697c478bd9Sstevel@tonic-gate #include <sys/socket.h>
707c478bd9Sstevel@tonic-gate #include <sys/utsname.h>
717c478bd9Sstevel@tonic-gate #include <sys/types.h>
727c478bd9Sstevel@tonic-gate #include <sys/stat.h>
737c478bd9Sstevel@tonic-gate #include <sys/sockio.h>
747c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
757c478bd9Sstevel@tonic-gate #include <sys/conf.h>
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate #include <inet/tcp.h>
787c478bd9Sstevel@tonic-gate #include <arpa/inet.h>
797c478bd9Sstevel@tonic-gate #include <netinet/in.h>
807c478bd9Sstevel@tonic-gate #include <net/route.h>
817c478bd9Sstevel@tonic-gate 
827c478bd9Sstevel@tonic-gate #include <stdio.h>
837c478bd9Sstevel@tonic-gate #include <errno.h>
847c478bd9Sstevel@tonic-gate #include <fcntl.h>
857c478bd9Sstevel@tonic-gate #include <unistd.h>
867c478bd9Sstevel@tonic-gate #include <rctl.h>
877c478bd9Sstevel@tonic-gate #include <stdlib.h>
887c478bd9Sstevel@tonic-gate #include <string.h>
897c478bd9Sstevel@tonic-gate #include <strings.h>
907c478bd9Sstevel@tonic-gate #include <wait.h>
917c478bd9Sstevel@tonic-gate #include <limits.h>
927c478bd9Sstevel@tonic-gate #include <libgen.h>
93fa9e4066Sahrens #include <libzfs.h>
94facf4a8dSllai1 #include <libdevinfo.h>
957c478bd9Sstevel@tonic-gate #include <zone.h>
967c478bd9Sstevel@tonic-gate #include <assert.h>
97555afedfScarlsonj #include <libcontract.h>
98555afedfScarlsonj #include <libcontract_priv.h>
99555afedfScarlsonj #include <uuid/uuid.h>
1007c478bd9Sstevel@tonic-gate 
1017c478bd9Sstevel@tonic-gate #include <sys/mntio.h>
1027c478bd9Sstevel@tonic-gate #include <sys/mnttab.h>
1037c478bd9Sstevel@tonic-gate #include <sys/fs/autofs.h>	/* for _autofssys() */
1047c478bd9Sstevel@tonic-gate #include <sys/fs/lofs_info.h>
105fa9e4066Sahrens #include <sys/fs/zfs.h>
1067c478bd9Sstevel@tonic-gate 
1077c478bd9Sstevel@tonic-gate #include <pool.h>
1087c478bd9Sstevel@tonic-gate #include <sys/pool.h>
1097c478bd9Sstevel@tonic-gate 
1107c478bd9Sstevel@tonic-gate #include <libzonecfg.h>
11139d3e169Sevanl #include <synch.h>
11222321485Svp157776 
1137c478bd9Sstevel@tonic-gate #include "zoneadmd.h"
11445916cd2Sjpk #include <tsol/label.h>
11545916cd2Sjpk #include <libtsnet.h>
11645916cd2Sjpk #include <sys/priv.h>
1177c478bd9Sstevel@tonic-gate 
1187c478bd9Sstevel@tonic-gate #define	V4_ADDR_LEN	32
1197c478bd9Sstevel@tonic-gate #define	V6_ADDR_LEN	128
1207c478bd9Sstevel@tonic-gate 
1217c478bd9Sstevel@tonic-gate /* 0755 is the default directory mode. */
1227c478bd9Sstevel@tonic-gate #define	DEFAULT_DIR_MODE \
1237c478bd9Sstevel@tonic-gate 	(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)
1247c478bd9Sstevel@tonic-gate 
1257c478bd9Sstevel@tonic-gate #define	IPD_DEFAULT_OPTS \
1267c478bd9Sstevel@tonic-gate 	MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
1277c478bd9Sstevel@tonic-gate 
1287c478bd9Sstevel@tonic-gate #define	DFSTYPES	"/etc/dfs/fstypes"
12945916cd2Sjpk #define	MAXTNZLEN	2048
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate /*
132facf4a8dSllai1  * This is the set of directories and devices (relative to <zone_root>/dev)
133facf4a8dSllai1  * which must be present in every zone.  Users can augment this list with
134facf4a8dSllai1  * additional device rules in their zone configuration, but at present cannot
135facf4a8dSllai1  * remove any of the this set of standard devices.
1367c478bd9Sstevel@tonic-gate  */
137facf4a8dSllai1 static const char *standard_devs[] = {
138facf4a8dSllai1 	"arp",
139facf4a8dSllai1 	"conslog",
140facf4a8dSllai1 	"cpu/self/cpuid",
141facf4a8dSllai1 	"crypto",
142facf4a8dSllai1 	"cryptoadm",
143facf4a8dSllai1 	"dsk",
144*52782930Sszhou 	"dtrace/*",
145*52782930Sszhou 	"dtrace/provider/*",
146facf4a8dSllai1 	"fd",
147facf4a8dSllai1 	"kstat",
148facf4a8dSllai1 	"lo0",
149facf4a8dSllai1 	"lo1",
150facf4a8dSllai1 	"lo2",
151facf4a8dSllai1 	"lo3",
152facf4a8dSllai1 	"log",
153facf4a8dSllai1 	"logindmux",
154facf4a8dSllai1 	"null",
155facf4a8dSllai1 #ifdef __sparc
156facf4a8dSllai1 	"openprom",
157facf4a8dSllai1 #endif
158facf4a8dSllai1 	"poll",
159facf4a8dSllai1 	"pool",
160facf4a8dSllai1 	"ptmx",
161facf4a8dSllai1 	"pts/*",
162facf4a8dSllai1 	"random",
163facf4a8dSllai1 	"rdsk",
164facf4a8dSllai1 	"rmt",
165facf4a8dSllai1 	"sad/user",
166facf4a8dSllai1 	"swap",
167facf4a8dSllai1 	"sysevent",
168facf4a8dSllai1 	"tcp",
169facf4a8dSllai1 	"tcp6",
170facf4a8dSllai1 	"term",
171facf4a8dSllai1 	"ticlts",
172facf4a8dSllai1 	"ticots",
173facf4a8dSllai1 	"ticotsord",
174facf4a8dSllai1 	"tty",
175facf4a8dSllai1 	"udp",
176facf4a8dSllai1 	"udp6",
177facf4a8dSllai1 	"urandom",
178facf4a8dSllai1 	"zero",
179facf4a8dSllai1 	"zfs",
180facf4a8dSllai1 	NULL
181facf4a8dSllai1 };
1827c478bd9Sstevel@tonic-gate 
183facf4a8dSllai1 struct source_target {
184facf4a8dSllai1 	const char *source;
185facf4a8dSllai1 	const char *target;
1867c478bd9Sstevel@tonic-gate };
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate /*
189facf4a8dSllai1  * Set of symlinks (relative to <zone_root>/dev) which must be present in
190facf4a8dSllai1  * every zone.
1917c478bd9Sstevel@tonic-gate  */
192facf4a8dSllai1 static struct source_target standard_devlinks[] = {
193facf4a8dSllai1 	{ "stderr",	"./fd/2" },
194facf4a8dSllai1 	{ "stdin",	"./fd/0" },
195facf4a8dSllai1 	{ "stdout",	"./fd/1" },
196facf4a8dSllai1 	{ "dtremote",	"/dev/null" },
197facf4a8dSllai1 	{ "console",	"zconsole" },
198facf4a8dSllai1 	{ "syscon",	"zconsole" },
199facf4a8dSllai1 	{ "sysmsg",	"zconsole" },
200facf4a8dSllai1 	{ "systty",	"zconsole" },
201facf4a8dSllai1 	{ "msglog",	"zconsole" },
202facf4a8dSllai1 	{ NULL, NULL }
2037c478bd9Sstevel@tonic-gate };
2047c478bd9Sstevel@tonic-gate 
205facf4a8dSllai1 static int vplat_mount_dev(zlog_t *);
2067c478bd9Sstevel@tonic-gate 
2077c478bd9Sstevel@tonic-gate /* for routing socket */
2087c478bd9Sstevel@tonic-gate static int rts_seqno = 0;
2097c478bd9Sstevel@tonic-gate 
210108322fbScarlsonj /* mangled zone name when mounting in an alternate root environment */
211108322fbScarlsonj static char kernzone[ZONENAME_MAX];
212108322fbScarlsonj 
213108322fbScarlsonj /* array of cached mount entries for resolve_lofs */
214108322fbScarlsonj static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
215108322fbScarlsonj 
21645916cd2Sjpk /* for Trusted Extensions */
21745916cd2Sjpk static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *);
21845916cd2Sjpk static int tsol_mounts(zlog_t *, char *, char *);
21945916cd2Sjpk static void tsol_unmounts(zlog_t *, char *);
22045916cd2Sjpk static m_label_t *zlabel = NULL;
22145916cd2Sjpk static m_label_t *zid_label = NULL;
22245916cd2Sjpk static priv_set_t *zprivs = NULL;
22345916cd2Sjpk 
2247c478bd9Sstevel@tonic-gate /* from libsocket, not in any header file */
2257c478bd9Sstevel@tonic-gate extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
2267c478bd9Sstevel@tonic-gate 
2277c478bd9Sstevel@tonic-gate /*
228108322fbScarlsonj  * An optimization for build_mnttable: reallocate (and potentially copy the
229108322fbScarlsonj  * data) only once every N times through the loop.
230108322fbScarlsonj  */
231108322fbScarlsonj #define	MNTTAB_HUNK	32
232108322fbScarlsonj 
233108322fbScarlsonj /*
2347c478bd9Sstevel@tonic-gate  * Private autofs system call
2357c478bd9Sstevel@tonic-gate  */
2367c478bd9Sstevel@tonic-gate extern int _autofssys(int, void *);
2377c478bd9Sstevel@tonic-gate 
2387c478bd9Sstevel@tonic-gate static int
2397c478bd9Sstevel@tonic-gate autofs_cleanup(zoneid_t zoneid)
2407c478bd9Sstevel@tonic-gate {
2417c478bd9Sstevel@tonic-gate 	/*
2427c478bd9Sstevel@tonic-gate 	 * Ask autofs to unmount all trigger nodes in the given zone.
2437c478bd9Sstevel@tonic-gate 	 */
2447c478bd9Sstevel@tonic-gate 	return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid));
2457c478bd9Sstevel@tonic-gate }
2467c478bd9Sstevel@tonic-gate 
247108322fbScarlsonj static void
248108322fbScarlsonj free_mnttable(struct mnttab *mnt_array, uint_t nelem)
249108322fbScarlsonj {
250108322fbScarlsonj 	uint_t i;
251108322fbScarlsonj 
252108322fbScarlsonj 	if (mnt_array == NULL)
253108322fbScarlsonj 		return;
254108322fbScarlsonj 	for (i = 0; i < nelem; i++) {
255108322fbScarlsonj 		free(mnt_array[i].mnt_mountp);
256108322fbScarlsonj 		free(mnt_array[i].mnt_fstype);
257108322fbScarlsonj 		free(mnt_array[i].mnt_special);
258108322fbScarlsonj 		free(mnt_array[i].mnt_mntopts);
259108322fbScarlsonj 		assert(mnt_array[i].mnt_time == NULL);
260108322fbScarlsonj 	}
261108322fbScarlsonj 	free(mnt_array);
262108322fbScarlsonj }
263108322fbScarlsonj 
264108322fbScarlsonj /*
265108322fbScarlsonj  * Build the mount table for the zone rooted at "zroot", storing the resulting
266108322fbScarlsonj  * array of struct mnttabs in "mnt_arrayp" and the number of elements in the
267108322fbScarlsonj  * array in "nelemp".
268108322fbScarlsonj  */
269108322fbScarlsonj static int
270108322fbScarlsonj build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab,
271108322fbScarlsonj     struct mnttab **mnt_arrayp, uint_t *nelemp)
272108322fbScarlsonj {
273108322fbScarlsonj 	struct mnttab mnt;
274108322fbScarlsonj 	struct mnttab *mnts;
275108322fbScarlsonj 	struct mnttab *mnp;
276108322fbScarlsonj 	uint_t nmnt;
277108322fbScarlsonj 
278108322fbScarlsonj 	rewind(mnttab);
279108322fbScarlsonj 	resetmnttab(mnttab);
280108322fbScarlsonj 	nmnt = 0;
281108322fbScarlsonj 	mnts = NULL;
282108322fbScarlsonj 	while (getmntent(mnttab, &mnt) == 0) {
283108322fbScarlsonj 		struct mnttab *tmp_array;
284108322fbScarlsonj 
285108322fbScarlsonj 		if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0)
286108322fbScarlsonj 			continue;
287108322fbScarlsonj 		if (nmnt % MNTTAB_HUNK == 0) {
288108322fbScarlsonj 			tmp_array = realloc(mnts,
289108322fbScarlsonj 			    (nmnt + MNTTAB_HUNK) * sizeof (*mnts));
290108322fbScarlsonj 			if (tmp_array == NULL) {
291108322fbScarlsonj 				free_mnttable(mnts, nmnt);
292108322fbScarlsonj 				return (-1);
293108322fbScarlsonj 			}
294108322fbScarlsonj 			mnts = tmp_array;
295108322fbScarlsonj 		}
296108322fbScarlsonj 		mnp = &mnts[nmnt++];
297108322fbScarlsonj 
298108322fbScarlsonj 		/*
299108322fbScarlsonj 		 * Zero out any fields we're not using.
300108322fbScarlsonj 		 */
301108322fbScarlsonj 		(void) memset(mnp, 0, sizeof (*mnp));
302108322fbScarlsonj 
303108322fbScarlsonj 		if (mnt.mnt_special != NULL)
304108322fbScarlsonj 			mnp->mnt_special = strdup(mnt.mnt_special);
305108322fbScarlsonj 		if (mnt.mnt_mntopts != NULL)
306108322fbScarlsonj 			mnp->mnt_mntopts = strdup(mnt.mnt_mntopts);
307108322fbScarlsonj 		mnp->mnt_mountp = strdup(mnt.mnt_mountp);
308108322fbScarlsonj 		mnp->mnt_fstype = strdup(mnt.mnt_fstype);
309108322fbScarlsonj 		if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) ||
310108322fbScarlsonj 		    (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) ||
311108322fbScarlsonj 		    mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) {
312108322fbScarlsonj 			zerror(zlogp, B_TRUE, "memory allocation failed");
313108322fbScarlsonj 			free_mnttable(mnts, nmnt);
314108322fbScarlsonj 			return (-1);
315108322fbScarlsonj 		}
316108322fbScarlsonj 	}
317108322fbScarlsonj 	*mnt_arrayp = mnts;
318108322fbScarlsonj 	*nelemp = nmnt;
319108322fbScarlsonj 	return (0);
320108322fbScarlsonj }
321108322fbScarlsonj 
322108322fbScarlsonj /*
323108322fbScarlsonj  * This is an optimization.  The resolve_lofs function is used quite frequently
324108322fbScarlsonj  * to manipulate file paths, and on a machine with a large number of zones,
325108322fbScarlsonj  * there will be a huge number of mounted file systems.  Thus, we trigger a
326108322fbScarlsonj  * reread of the list of mount points
327108322fbScarlsonj  */
328108322fbScarlsonj static void
329108322fbScarlsonj lofs_discard_mnttab(void)
330108322fbScarlsonj {
331108322fbScarlsonj 	free_mnttable(resolve_lofs_mnts,
332108322fbScarlsonj 	    resolve_lofs_mnt_max - resolve_lofs_mnts);
333108322fbScarlsonj 	resolve_lofs_mnts = resolve_lofs_mnt_max = NULL;
334108322fbScarlsonj }
335108322fbScarlsonj 
336108322fbScarlsonj static int
337108322fbScarlsonj lofs_read_mnttab(zlog_t *zlogp)
338108322fbScarlsonj {
339108322fbScarlsonj 	FILE *mnttab;
340108322fbScarlsonj 	uint_t nmnts;
341108322fbScarlsonj 
342108322fbScarlsonj 	if ((mnttab = fopen(MNTTAB, "r")) == NULL)
343108322fbScarlsonj 		return (-1);
344108322fbScarlsonj 	if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts,
345108322fbScarlsonj 	    &nmnts) == -1) {
346108322fbScarlsonj 		(void) fclose(mnttab);
347108322fbScarlsonj 		return (-1);
348108322fbScarlsonj 	}
349108322fbScarlsonj 	(void) fclose(mnttab);
350108322fbScarlsonj 	resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts;
351108322fbScarlsonj 	return (0);
352108322fbScarlsonj }
353108322fbScarlsonj 
354108322fbScarlsonj /*
355108322fbScarlsonj  * This function loops over potential loopback mounts and symlinks in a given
356108322fbScarlsonj  * path and resolves them all down to an absolute path.
357108322fbScarlsonj  */
358108322fbScarlsonj static void
359108322fbScarlsonj resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen)
360108322fbScarlsonj {
361108322fbScarlsonj 	int len, arlen;
362108322fbScarlsonj 	const char *altroot;
363108322fbScarlsonj 	char tmppath[MAXPATHLEN];
364108322fbScarlsonj 	boolean_t outside_altroot;
365108322fbScarlsonj 
366108322fbScarlsonj 	if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1)
367108322fbScarlsonj 		return;
368108322fbScarlsonj 	tmppath[len] = '\0';
369108322fbScarlsonj 	(void) strlcpy(path, tmppath, sizeof (tmppath));
370108322fbScarlsonj 
371108322fbScarlsonj 	/* This happens once per zoneadmd operation. */
372108322fbScarlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
373108322fbScarlsonj 		return;
374108322fbScarlsonj 
375108322fbScarlsonj 	altroot = zonecfg_get_root();
376108322fbScarlsonj 	arlen = strlen(altroot);
377108322fbScarlsonj 	outside_altroot = B_FALSE;
378108322fbScarlsonj 	for (;;) {
379108322fbScarlsonj 		struct mnttab *mnp;
380108322fbScarlsonj 
381108322fbScarlsonj 		for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
382108322fbScarlsonj 		    mnp++) {
383108322fbScarlsonj 			if (mnp->mnt_fstype == NULL ||
384108322fbScarlsonj 			    mnp->mnt_mountp == NULL ||
385108322fbScarlsonj 			    mnp->mnt_special == NULL ||
386108322fbScarlsonj 			    strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0)
387108322fbScarlsonj 				continue;
388108322fbScarlsonj 			len = strlen(mnp->mnt_mountp);
389108322fbScarlsonj 			if (strncmp(mnp->mnt_mountp, path, len) == 0 &&
390108322fbScarlsonj 			    (path[len] == '/' || path[len] == '\0'))
391108322fbScarlsonj 				break;
392108322fbScarlsonj 		}
393108322fbScarlsonj 		if (mnp >= resolve_lofs_mnt_max)
394108322fbScarlsonj 			break;
395108322fbScarlsonj 		if (outside_altroot) {
396108322fbScarlsonj 			char *cp;
397108322fbScarlsonj 			int olen = sizeof (MNTOPT_RO) - 1;
398108322fbScarlsonj 
399108322fbScarlsonj 			/*
400108322fbScarlsonj 			 * If we run into a read-only mount outside of the
401108322fbScarlsonj 			 * alternate root environment, then the user doesn't
402108322fbScarlsonj 			 * want this path to be made read-write.
403108322fbScarlsonj 			 */
404108322fbScarlsonj 			if (mnp->mnt_mntopts != NULL &&
405108322fbScarlsonj 			    (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) !=
406108322fbScarlsonj 			    NULL &&
407108322fbScarlsonj 			    (cp == mnp->mnt_mntopts || cp[-1] == ',') &&
408108322fbScarlsonj 			    (cp[olen] == '\0' || cp[olen] == ',')) {
409108322fbScarlsonj 				break;
410108322fbScarlsonj 			}
411108322fbScarlsonj 		} else if (arlen > 0 &&
412108322fbScarlsonj 		    (strncmp(mnp->mnt_special, altroot, arlen) != 0 ||
413108322fbScarlsonj 		    (mnp->mnt_special[arlen] != '\0' &&
414108322fbScarlsonj 		    mnp->mnt_special[arlen] != '/'))) {
415108322fbScarlsonj 			outside_altroot = B_TRUE;
416108322fbScarlsonj 		}
417108322fbScarlsonj 		/* use temporary buffer because new path might be longer */
418108322fbScarlsonj 		(void) snprintf(tmppath, sizeof (tmppath), "%s%s",
419108322fbScarlsonj 		    mnp->mnt_special, path + len);
420108322fbScarlsonj 		if ((len = resolvepath(tmppath, path, pathlen)) == -1)
421108322fbScarlsonj 			break;
422108322fbScarlsonj 		path[len] = '\0';
423108322fbScarlsonj 	}
424108322fbScarlsonj }
425108322fbScarlsonj 
426108322fbScarlsonj /*
427108322fbScarlsonj  * For a regular mount, check if a replacement lofs mount is needed because the
428108322fbScarlsonj  * referenced device is already mounted somewhere.
429108322fbScarlsonj  */
430108322fbScarlsonj static int
431108322fbScarlsonj check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr)
432108322fbScarlsonj {
433108322fbScarlsonj 	struct mnttab *mnp;
434108322fbScarlsonj 	zone_fsopt_t *optptr, *onext;
435108322fbScarlsonj 
436108322fbScarlsonj 	/* This happens once per zoneadmd operation. */
437108322fbScarlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
438108322fbScarlsonj 		return (-1);
439108322fbScarlsonj 
440108322fbScarlsonj 	/*
441108322fbScarlsonj 	 * If this special node isn't already in use, then it's ours alone;
442108322fbScarlsonj 	 * no need to worry about conflicting mounts.
443108322fbScarlsonj 	 */
444108322fbScarlsonj 	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
445108322fbScarlsonj 	    mnp++) {
446108322fbScarlsonj 		if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0)
447108322fbScarlsonj 			break;
448108322fbScarlsonj 	}
449108322fbScarlsonj 	if (mnp >= resolve_lofs_mnt_max)
450108322fbScarlsonj 		return (0);
451108322fbScarlsonj 
452108322fbScarlsonj 	/*
453108322fbScarlsonj 	 * Convert this duplicate mount into a lofs mount.
454108322fbScarlsonj 	 */
455108322fbScarlsonj 	(void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp,
456108322fbScarlsonj 	    sizeof (fsptr->zone_fs_special));
457108322fbScarlsonj 	(void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS,
458108322fbScarlsonj 	    sizeof (fsptr->zone_fs_type));
459108322fbScarlsonj 	fsptr->zone_fs_raw[0] = '\0';
460108322fbScarlsonj 
461108322fbScarlsonj 	/*
462108322fbScarlsonj 	 * Discard all but one of the original options and set that to be the
463108322fbScarlsonj 	 * same set of options used for inherit package directory resources.
464108322fbScarlsonj 	 */
465108322fbScarlsonj 	optptr = fsptr->zone_fs_options;
466108322fbScarlsonj 	if (optptr == NULL) {
467108322fbScarlsonj 		optptr = malloc(sizeof (*optptr));
468108322fbScarlsonj 		if (optptr == NULL) {
469108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s",
470108322fbScarlsonj 			    fsptr->zone_fs_dir);
471108322fbScarlsonj 			return (-1);
472108322fbScarlsonj 		}
473108322fbScarlsonj 	} else {
474108322fbScarlsonj 		while ((onext = optptr->zone_fsopt_next) != NULL) {
475108322fbScarlsonj 			optptr->zone_fsopt_next = onext->zone_fsopt_next;
476108322fbScarlsonj 			free(onext);
477108322fbScarlsonj 		}
478108322fbScarlsonj 	}
479108322fbScarlsonj 	(void) strcpy(optptr->zone_fsopt_opt, IPD_DEFAULT_OPTS);
480108322fbScarlsonj 	optptr->zone_fsopt_next = NULL;
481108322fbScarlsonj 	fsptr->zone_fs_options = optptr;
482108322fbScarlsonj 	return (0);
483108322fbScarlsonj }
484108322fbScarlsonj 
4857c478bd9Sstevel@tonic-gate static int
4867c478bd9Sstevel@tonic-gate make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode)
4877c478bd9Sstevel@tonic-gate {
4887c478bd9Sstevel@tonic-gate 	char path[MAXPATHLEN];
4897c478bd9Sstevel@tonic-gate 	struct stat st;
4907c478bd9Sstevel@tonic-gate 
4917c478bd9Sstevel@tonic-gate 	if (snprintf(path, sizeof (path), "%s%s", prefix, subdir) >
4927c478bd9Sstevel@tonic-gate 	    sizeof (path)) {
4937c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "pathname %s%s is too long", prefix,
4947c478bd9Sstevel@tonic-gate 		    subdir);
4957c478bd9Sstevel@tonic-gate 		return (-1);
4967c478bd9Sstevel@tonic-gate 	}
4977c478bd9Sstevel@tonic-gate 
4987c478bd9Sstevel@tonic-gate 	if (lstat(path, &st) == 0) {
4997c478bd9Sstevel@tonic-gate 		/*
5007c478bd9Sstevel@tonic-gate 		 * We don't check the file mode since presumably the zone
5017c478bd9Sstevel@tonic-gate 		 * administrator may have had good reason to change the mode,
5027c478bd9Sstevel@tonic-gate 		 * and we don't need to second guess him.
5037c478bd9Sstevel@tonic-gate 		 */
5047c478bd9Sstevel@tonic-gate 		if (!S_ISDIR(st.st_mode)) {
50545916cd2Sjpk 			if (is_system_labeled() &&
50645916cd2Sjpk 			    S_ISREG(st.st_mode)) {
50745916cd2Sjpk 				/*
50845916cd2Sjpk 				 * The need to mount readonly copies of
50945916cd2Sjpk 				 * global zone /etc/ files is unique to
51045916cd2Sjpk 				 * Trusted Extensions.
51145916cd2Sjpk 				 */
51245916cd2Sjpk 				if (strncmp(subdir, "/etc/",
51345916cd2Sjpk 				    strlen("/etc/")) != 0) {
51445916cd2Sjpk 					zerror(zlogp, B_FALSE,
51545916cd2Sjpk 					    "%s is not in /etc", path);
5167c478bd9Sstevel@tonic-gate 					return (-1);
5177c478bd9Sstevel@tonic-gate 				}
51845916cd2Sjpk 			} else {
51945916cd2Sjpk 				zerror(zlogp, B_FALSE,
52045916cd2Sjpk 				    "%s is not a directory", path);
52145916cd2Sjpk 				return (-1);
52245916cd2Sjpk 			}
52345916cd2Sjpk 		}
5247c478bd9Sstevel@tonic-gate 	} else if (mkdirp(path, mode) != 0) {
5257c478bd9Sstevel@tonic-gate 		if (errno == EROFS)
5267c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "Could not mkdir %s.\nIt is on "
5277c478bd9Sstevel@tonic-gate 			    "a read-only file system in this local zone.\nMake "
5287c478bd9Sstevel@tonic-gate 			    "sure %s exists in the global zone.", path, subdir);
5297c478bd9Sstevel@tonic-gate 		else
5307c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "mkdirp of %s failed", path);
5317c478bd9Sstevel@tonic-gate 		return (-1);
5327c478bd9Sstevel@tonic-gate 	}
5337c478bd9Sstevel@tonic-gate 	return (0);
5347c478bd9Sstevel@tonic-gate }
5357c478bd9Sstevel@tonic-gate 
5367c478bd9Sstevel@tonic-gate static void
5377c478bd9Sstevel@tonic-gate free_remote_fstypes(char **types)
5387c478bd9Sstevel@tonic-gate {
5397c478bd9Sstevel@tonic-gate 	uint_t i;
5407c478bd9Sstevel@tonic-gate 
5417c478bd9Sstevel@tonic-gate 	if (types == NULL)
5427c478bd9Sstevel@tonic-gate 		return;
5437c478bd9Sstevel@tonic-gate 	for (i = 0; types[i] != NULL; i++)
5447c478bd9Sstevel@tonic-gate 		free(types[i]);
5457c478bd9Sstevel@tonic-gate 	free(types);
5467c478bd9Sstevel@tonic-gate }
5477c478bd9Sstevel@tonic-gate 
5487c478bd9Sstevel@tonic-gate static char **
5497c478bd9Sstevel@tonic-gate get_remote_fstypes(zlog_t *zlogp)
5507c478bd9Sstevel@tonic-gate {
5517c478bd9Sstevel@tonic-gate 	char **types = NULL;
5527c478bd9Sstevel@tonic-gate 	FILE *fp;
5537c478bd9Sstevel@tonic-gate 	char buf[MAXPATHLEN];
5547c478bd9Sstevel@tonic-gate 	char fstype[MAXPATHLEN];
5557c478bd9Sstevel@tonic-gate 	uint_t lines = 0;
5567c478bd9Sstevel@tonic-gate 	uint_t i;
5577c478bd9Sstevel@tonic-gate 
5587c478bd9Sstevel@tonic-gate 	if ((fp = fopen(DFSTYPES, "r")) == NULL) {
5597c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "failed to open %s", DFSTYPES);
5607c478bd9Sstevel@tonic-gate 		return (NULL);
5617c478bd9Sstevel@tonic-gate 	}
5627c478bd9Sstevel@tonic-gate 	/*
5637c478bd9Sstevel@tonic-gate 	 * Count the number of lines
5647c478bd9Sstevel@tonic-gate 	 */
5657c478bd9Sstevel@tonic-gate 	while (fgets(buf, sizeof (buf), fp) != NULL)
5667c478bd9Sstevel@tonic-gate 		lines++;
5677c478bd9Sstevel@tonic-gate 	if (lines == 0)	/* didn't read anything; empty file */
5687c478bd9Sstevel@tonic-gate 		goto out;
5697c478bd9Sstevel@tonic-gate 	rewind(fp);
5707c478bd9Sstevel@tonic-gate 	/*
5717c478bd9Sstevel@tonic-gate 	 * Allocate enough space for a NULL-terminated array.
5727c478bd9Sstevel@tonic-gate 	 */
5737c478bd9Sstevel@tonic-gate 	types = calloc(lines + 1, sizeof (char *));
5747c478bd9Sstevel@tonic-gate 	if (types == NULL) {
5757c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
5767c478bd9Sstevel@tonic-gate 		goto out;
5777c478bd9Sstevel@tonic-gate 	}
5787c478bd9Sstevel@tonic-gate 	i = 0;
5797c478bd9Sstevel@tonic-gate 	while (fgets(buf, sizeof (buf), fp) != NULL) {
5807c478bd9Sstevel@tonic-gate 		/* LINTED - fstype is big enough to hold buf */
5817c478bd9Sstevel@tonic-gate 		if (sscanf(buf, "%s", fstype) == 0) {
5827c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "unable to parse %s", DFSTYPES);
5837c478bd9Sstevel@tonic-gate 			free_remote_fstypes(types);
5847c478bd9Sstevel@tonic-gate 			types = NULL;
5857c478bd9Sstevel@tonic-gate 			goto out;
5867c478bd9Sstevel@tonic-gate 		}
5877c478bd9Sstevel@tonic-gate 		types[i] = strdup(fstype);
5887c478bd9Sstevel@tonic-gate 		if (types[i] == NULL) {
5897c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "memory allocation failed");
5907c478bd9Sstevel@tonic-gate 			free_remote_fstypes(types);
5917c478bd9Sstevel@tonic-gate 			types = NULL;
5927c478bd9Sstevel@tonic-gate 			goto out;
5937c478bd9Sstevel@tonic-gate 		}
5947c478bd9Sstevel@tonic-gate 		i++;
5957c478bd9Sstevel@tonic-gate 	}
5967c478bd9Sstevel@tonic-gate out:
5977c478bd9Sstevel@tonic-gate 	(void) fclose(fp);
5987c478bd9Sstevel@tonic-gate 	return (types);
5997c478bd9Sstevel@tonic-gate }
6007c478bd9Sstevel@tonic-gate 
6017c478bd9Sstevel@tonic-gate static boolean_t
6027c478bd9Sstevel@tonic-gate is_remote_fstype(const char *fstype, char *const *remote_fstypes)
6037c478bd9Sstevel@tonic-gate {
6047c478bd9Sstevel@tonic-gate 	uint_t i;
6057c478bd9Sstevel@tonic-gate 
6067c478bd9Sstevel@tonic-gate 	if (remote_fstypes == NULL)
6077c478bd9Sstevel@tonic-gate 		return (B_FALSE);
6087c478bd9Sstevel@tonic-gate 	for (i = 0; remote_fstypes[i] != NULL; i++) {
6097c478bd9Sstevel@tonic-gate 		if (strcmp(remote_fstypes[i], fstype) == 0)
6107c478bd9Sstevel@tonic-gate 			return (B_TRUE);
6117c478bd9Sstevel@tonic-gate 	}
6127c478bd9Sstevel@tonic-gate 	return (B_FALSE);
6137c478bd9Sstevel@tonic-gate }
6147c478bd9Sstevel@tonic-gate 
615108322fbScarlsonj /*
616108322fbScarlsonj  * This converts a zone root path (normally of the form .../root) to a Live
617108322fbScarlsonj  * Upgrade scratch zone root (of the form .../lu).
618108322fbScarlsonj  */
6197c478bd9Sstevel@tonic-gate static void
620108322fbScarlsonj root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
6217c478bd9Sstevel@tonic-gate {
622108322fbScarlsonj 	if (!isresolved && zonecfg_in_alt_root())
623108322fbScarlsonj 		resolve_lofs(zlogp, zroot, zrootlen);
624108322fbScarlsonj 	(void) strcpy(strrchr(zroot, '/') + 1, "lu");
6257c478bd9Sstevel@tonic-gate }
6267c478bd9Sstevel@tonic-gate 
6277c478bd9Sstevel@tonic-gate /*
6287c478bd9Sstevel@tonic-gate  * The general strategy for unmounting filesystems is as follows:
6297c478bd9Sstevel@tonic-gate  *
6307c478bd9Sstevel@tonic-gate  * - Remote filesystems may be dead, and attempting to contact them as
6317c478bd9Sstevel@tonic-gate  * part of a regular unmount may hang forever; we want to always try to
6327c478bd9Sstevel@tonic-gate  * forcibly unmount such filesystems and only fall back to regular
6337c478bd9Sstevel@tonic-gate  * unmounts if the filesystem doesn't support forced unmounts.
6347c478bd9Sstevel@tonic-gate  *
6357c478bd9Sstevel@tonic-gate  * - We don't want to unnecessarily corrupt metadata on local
6367c478bd9Sstevel@tonic-gate  * filesystems (ie UFS), so we want to start off with graceful unmounts,
6377c478bd9Sstevel@tonic-gate  * and only escalate to doing forced unmounts if we get stuck.
6387c478bd9Sstevel@tonic-gate  *
6397c478bd9Sstevel@tonic-gate  * We start off walking backwards through the mount table.  This doesn't
6407c478bd9Sstevel@tonic-gate  * give us strict ordering but ensures that we try to unmount submounts
6417c478bd9Sstevel@tonic-gate  * first.  We thus limit the number of failed umount2(2) calls.
6427c478bd9Sstevel@tonic-gate  *
6437c478bd9Sstevel@tonic-gate  * The mechanism for determining if we're stuck is to count the number
6447c478bd9Sstevel@tonic-gate  * of failed unmounts each iteration through the mount table.  This
6457c478bd9Sstevel@tonic-gate  * gives us an upper bound on the number of filesystems which remain
6467c478bd9Sstevel@tonic-gate  * mounted (autofs trigger nodes are dealt with separately).  If at the
6477c478bd9Sstevel@tonic-gate  * end of one unmount+autofs_cleanup cycle we still have the same number
6487c478bd9Sstevel@tonic-gate  * of mounts that we started out with, we're stuck and try a forced
6497c478bd9Sstevel@tonic-gate  * unmount.  If that fails (filesystem doesn't support forced unmounts)
6507c478bd9Sstevel@tonic-gate  * then we bail and are unable to teardown the zone.  If it succeeds,
6517c478bd9Sstevel@tonic-gate  * we're no longer stuck so we continue with our policy of trying
6527c478bd9Sstevel@tonic-gate  * graceful mounts first.
6537c478bd9Sstevel@tonic-gate  *
6547c478bd9Sstevel@tonic-gate  * Zone must be down (ie, no processes or threads active).
6557c478bd9Sstevel@tonic-gate  */
6567c478bd9Sstevel@tonic-gate static int
657108322fbScarlsonj unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
6587c478bd9Sstevel@tonic-gate {
6597c478bd9Sstevel@tonic-gate 	int error = 0;
6607c478bd9Sstevel@tonic-gate 	FILE *mnttab;
6617c478bd9Sstevel@tonic-gate 	struct mnttab *mnts;
6627c478bd9Sstevel@tonic-gate 	uint_t nmnt;
6637c478bd9Sstevel@tonic-gate 	char zroot[MAXPATHLEN + 1];
6647c478bd9Sstevel@tonic-gate 	size_t zrootlen;
6657c478bd9Sstevel@tonic-gate 	uint_t oldcount = UINT_MAX;
6667c478bd9Sstevel@tonic-gate 	boolean_t stuck = B_FALSE;
6677c478bd9Sstevel@tonic-gate 	char **remote_fstypes = NULL;
6687c478bd9Sstevel@tonic-gate 
6697c478bd9Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
6707c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "unable to determine zone root");
6717c478bd9Sstevel@tonic-gate 		return (-1);
6727c478bd9Sstevel@tonic-gate 	}
673108322fbScarlsonj 	if (unmount_cmd)
674108322fbScarlsonj 		root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
6757c478bd9Sstevel@tonic-gate 
6767c478bd9Sstevel@tonic-gate 	(void) strcat(zroot, "/");
6777c478bd9Sstevel@tonic-gate 	zrootlen = strlen(zroot);
6787c478bd9Sstevel@tonic-gate 
67945916cd2Sjpk 	/*
68045916cd2Sjpk 	 * For Trusted Extensions unmount each higher level zone's mount
68145916cd2Sjpk 	 * of our zone's /export/home
68245916cd2Sjpk 	 */
68348451833Scarlsonj 	if (!unmount_cmd)
68445916cd2Sjpk 		tsol_unmounts(zlogp, zone_name);
68545916cd2Sjpk 
6867c478bd9Sstevel@tonic-gate 	if ((mnttab = fopen(MNTTAB, "r")) == NULL) {
6877c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "failed to open %s", MNTTAB);
6887c478bd9Sstevel@tonic-gate 		return (-1);
6897c478bd9Sstevel@tonic-gate 	}
6907c478bd9Sstevel@tonic-gate 	/*
6917c478bd9Sstevel@tonic-gate 	 * Use our hacky mntfs ioctl so we see everything, even mounts with
6927c478bd9Sstevel@tonic-gate 	 * MS_NOMNTTAB.
6937c478bd9Sstevel@tonic-gate 	 */
6947c478bd9Sstevel@tonic-gate 	if (ioctl(fileno(mnttab), MNTIOC_SHOWHIDDEN, NULL) < 0) {
6957c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to configure %s", MNTTAB);
6967c478bd9Sstevel@tonic-gate 		error++;
6977c478bd9Sstevel@tonic-gate 		goto out;
6987c478bd9Sstevel@tonic-gate 	}
6997c478bd9Sstevel@tonic-gate 
7007c478bd9Sstevel@tonic-gate 	/*
7017c478bd9Sstevel@tonic-gate 	 * Build the list of remote fstypes so we know which ones we
7027c478bd9Sstevel@tonic-gate 	 * should forcibly unmount.
7037c478bd9Sstevel@tonic-gate 	 */
7047c478bd9Sstevel@tonic-gate 	remote_fstypes = get_remote_fstypes(zlogp);
7057c478bd9Sstevel@tonic-gate 	for (; /* ever */; ) {
7067c478bd9Sstevel@tonic-gate 		uint_t newcount = 0;
7077c478bd9Sstevel@tonic-gate 		boolean_t unmounted;
7087c478bd9Sstevel@tonic-gate 		struct mnttab *mnp;
7097c478bd9Sstevel@tonic-gate 		char *path;
7107c478bd9Sstevel@tonic-gate 		uint_t i;
7117c478bd9Sstevel@tonic-gate 
7127c478bd9Sstevel@tonic-gate 		mnts = NULL;
7137c478bd9Sstevel@tonic-gate 		nmnt = 0;
7147c478bd9Sstevel@tonic-gate 		/*
7157c478bd9Sstevel@tonic-gate 		 * MNTTAB gives us a way to walk through mounted
7167c478bd9Sstevel@tonic-gate 		 * filesystems; we need to be able to walk them in
7177c478bd9Sstevel@tonic-gate 		 * reverse order, so we build a list of all mounted
7187c478bd9Sstevel@tonic-gate 		 * filesystems.
7197c478bd9Sstevel@tonic-gate 		 */
7207c478bd9Sstevel@tonic-gate 		if (build_mnttable(zlogp, zroot, zrootlen, mnttab, &mnts,
7217c478bd9Sstevel@tonic-gate 		    &nmnt) != 0) {
7227c478bd9Sstevel@tonic-gate 			error++;
7237c478bd9Sstevel@tonic-gate 			goto out;
7247c478bd9Sstevel@tonic-gate 		}
7257c478bd9Sstevel@tonic-gate 		for (i = 0; i < nmnt; i++) {
7267c478bd9Sstevel@tonic-gate 			mnp = &mnts[nmnt - i - 1]; /* access in reverse order */
7277c478bd9Sstevel@tonic-gate 			path = mnp->mnt_mountp;
7287c478bd9Sstevel@tonic-gate 			unmounted = B_FALSE;
7297c478bd9Sstevel@tonic-gate 			/*
7307c478bd9Sstevel@tonic-gate 			 * Try forced unmount first for remote filesystems.
7317c478bd9Sstevel@tonic-gate 			 *
7327c478bd9Sstevel@tonic-gate 			 * Not all remote filesystems support forced unmounts,
7337c478bd9Sstevel@tonic-gate 			 * so if this fails (ENOTSUP) we'll continue on
7347c478bd9Sstevel@tonic-gate 			 * and try a regular unmount.
7357c478bd9Sstevel@tonic-gate 			 */
7367c478bd9Sstevel@tonic-gate 			if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) {
7377c478bd9Sstevel@tonic-gate 				if (umount2(path, MS_FORCE) == 0)
7387c478bd9Sstevel@tonic-gate 					unmounted = B_TRUE;
7397c478bd9Sstevel@tonic-gate 			}
7407c478bd9Sstevel@tonic-gate 			/*
7417c478bd9Sstevel@tonic-gate 			 * Try forced unmount if we're stuck.
7427c478bd9Sstevel@tonic-gate 			 */
7437c478bd9Sstevel@tonic-gate 			if (stuck) {
7447c478bd9Sstevel@tonic-gate 				if (umount2(path, MS_FORCE) == 0) {
7457c478bd9Sstevel@tonic-gate 					unmounted = B_TRUE;
7467c478bd9Sstevel@tonic-gate 					stuck = B_FALSE;
7477c478bd9Sstevel@tonic-gate 				} else {
7487c478bd9Sstevel@tonic-gate 					/*
7497c478bd9Sstevel@tonic-gate 					 * The first failure indicates a
7507c478bd9Sstevel@tonic-gate 					 * mount we won't be able to get
7517c478bd9Sstevel@tonic-gate 					 * rid of automatically, so we
7527c478bd9Sstevel@tonic-gate 					 * bail.
7537c478bd9Sstevel@tonic-gate 					 */
7547c478bd9Sstevel@tonic-gate 					error++;
7557c478bd9Sstevel@tonic-gate 					zerror(zlogp, B_FALSE,
7567c478bd9Sstevel@tonic-gate 					    "unable to unmount '%s'", path);
7577c478bd9Sstevel@tonic-gate 					free_mnttable(mnts, nmnt);
7587c478bd9Sstevel@tonic-gate 					goto out;
7597c478bd9Sstevel@tonic-gate 				}
7607c478bd9Sstevel@tonic-gate 			}
7617c478bd9Sstevel@tonic-gate 			/*
7627c478bd9Sstevel@tonic-gate 			 * Try regular unmounts for everything else.
7637c478bd9Sstevel@tonic-gate 			 */
7647c478bd9Sstevel@tonic-gate 			if (!unmounted && umount2(path, 0) != 0)
7657c478bd9Sstevel@tonic-gate 				newcount++;
7667c478bd9Sstevel@tonic-gate 		}
7677c478bd9Sstevel@tonic-gate 		free_mnttable(mnts, nmnt);
7687c478bd9Sstevel@tonic-gate 
7697c478bd9Sstevel@tonic-gate 		if (newcount == 0)
7707c478bd9Sstevel@tonic-gate 			break;
7717c478bd9Sstevel@tonic-gate 		if (newcount >= oldcount) {
7727c478bd9Sstevel@tonic-gate 			/*
7737c478bd9Sstevel@tonic-gate 			 * Last round didn't unmount anything; we're stuck and
7747c478bd9Sstevel@tonic-gate 			 * should start trying forced unmounts.
7757c478bd9Sstevel@tonic-gate 			 */
7767c478bd9Sstevel@tonic-gate 			stuck = B_TRUE;
7777c478bd9Sstevel@tonic-gate 		}
7787c478bd9Sstevel@tonic-gate 		oldcount = newcount;
7797c478bd9Sstevel@tonic-gate 
7807c478bd9Sstevel@tonic-gate 		/*
7817c478bd9Sstevel@tonic-gate 		 * Autofs doesn't let you unmount its trigger nodes from
7827c478bd9Sstevel@tonic-gate 		 * userland so we have to tell the kernel to cleanup for us.
7837c478bd9Sstevel@tonic-gate 		 */
7847c478bd9Sstevel@tonic-gate 		if (autofs_cleanup(zoneid) != 0) {
7857c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "unable to remove autofs nodes");
7867c478bd9Sstevel@tonic-gate 			error++;
7877c478bd9Sstevel@tonic-gate 			goto out;
7887c478bd9Sstevel@tonic-gate 		}
7897c478bd9Sstevel@tonic-gate 	}
7907c478bd9Sstevel@tonic-gate 
7917c478bd9Sstevel@tonic-gate out:
7927c478bd9Sstevel@tonic-gate 	free_remote_fstypes(remote_fstypes);
7937c478bd9Sstevel@tonic-gate 	(void) fclose(mnttab);
7947c478bd9Sstevel@tonic-gate 	return (error ? -1 : 0);
7957c478bd9Sstevel@tonic-gate }
7967c478bd9Sstevel@tonic-gate 
7977c478bd9Sstevel@tonic-gate static int
7987c478bd9Sstevel@tonic-gate fs_compare(const void *m1, const void *m2)
7997c478bd9Sstevel@tonic-gate {
8007c478bd9Sstevel@tonic-gate 	struct zone_fstab *i = (struct zone_fstab *)m1;
8017c478bd9Sstevel@tonic-gate 	struct zone_fstab *j = (struct zone_fstab *)m2;
8027c478bd9Sstevel@tonic-gate 
8037c478bd9Sstevel@tonic-gate 	return (strcmp(i->zone_fs_dir, j->zone_fs_dir));
8047c478bd9Sstevel@tonic-gate }
8057c478bd9Sstevel@tonic-gate 
8067c478bd9Sstevel@tonic-gate /*
8077c478bd9Sstevel@tonic-gate  * Fork and exec (and wait for) the mentioned binary with the provided
8087c478bd9Sstevel@tonic-gate  * arguments.  Returns (-1) if something went wrong with fork(2) or exec(2),
8097c478bd9Sstevel@tonic-gate  * returns the exit status otherwise.
8107c478bd9Sstevel@tonic-gate  *
8117c478bd9Sstevel@tonic-gate  * If we were unable to exec the provided pathname (for whatever
8127c478bd9Sstevel@tonic-gate  * reason), we return the special token ZEXIT_EXEC.  The current value
8137c478bd9Sstevel@tonic-gate  * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the
8147c478bd9Sstevel@tonic-gate  * consumers of this function; any future consumers must make sure this
8157c478bd9Sstevel@tonic-gate  * remains the case.
8167c478bd9Sstevel@tonic-gate  */
8177c478bd9Sstevel@tonic-gate static int
8187c478bd9Sstevel@tonic-gate forkexec(zlog_t *zlogp, const char *path, char *const argv[])
8197c478bd9Sstevel@tonic-gate {
8207c478bd9Sstevel@tonic-gate 	pid_t child_pid;
8217c478bd9Sstevel@tonic-gate 	int child_status = 0;
8227c478bd9Sstevel@tonic-gate 
8237c478bd9Sstevel@tonic-gate 	/*
8247c478bd9Sstevel@tonic-gate 	 * Do not let another thread localize a message while we are forking.
8257c478bd9Sstevel@tonic-gate 	 */
8267c478bd9Sstevel@tonic-gate 	(void) mutex_lock(&msglock);
8277c478bd9Sstevel@tonic-gate 	child_pid = fork();
8287c478bd9Sstevel@tonic-gate 	(void) mutex_unlock(&msglock);
8297c478bd9Sstevel@tonic-gate 	if (child_pid == -1) {
8307c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not fork for %s", argv[0]);
8317c478bd9Sstevel@tonic-gate 		return (-1);
8327c478bd9Sstevel@tonic-gate 	} else if (child_pid == 0) {
8337c478bd9Sstevel@tonic-gate 		closefrom(0);
8341390a385Sgjelinek 		/* redirect stdin, stdout & stderr to /dev/null */
8351390a385Sgjelinek 		(void) open("/dev/null", O_RDONLY);	/* stdin */
8361390a385Sgjelinek 		(void) open("/dev/null", O_WRONLY);	/* stdout */
8371390a385Sgjelinek 		(void) open("/dev/null", O_WRONLY);	/* stderr */
8387c478bd9Sstevel@tonic-gate 		(void) execv(path, argv);
8397c478bd9Sstevel@tonic-gate 		/*
8407c478bd9Sstevel@tonic-gate 		 * Since we are in the child, there is no point calling zerror()
8417c478bd9Sstevel@tonic-gate 		 * since there is nobody waiting to consume it.  So exit with a
8427c478bd9Sstevel@tonic-gate 		 * special code that the parent will recognize and call zerror()
8437c478bd9Sstevel@tonic-gate 		 * accordingly.
8447c478bd9Sstevel@tonic-gate 		 */
8457c478bd9Sstevel@tonic-gate 
8467c478bd9Sstevel@tonic-gate 		_exit(ZEXIT_EXEC);
8477c478bd9Sstevel@tonic-gate 	} else {
8487c478bd9Sstevel@tonic-gate 		(void) waitpid(child_pid, &child_status, 0);
8497c478bd9Sstevel@tonic-gate 	}
8507c478bd9Sstevel@tonic-gate 
8517c478bd9Sstevel@tonic-gate 	if (WIFSIGNALED(child_status)) {
8527c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
8537c478bd9Sstevel@tonic-gate 		    "signal %d", path, WTERMSIG(child_status));
8547c478bd9Sstevel@tonic-gate 		return (-1);
8557c478bd9Sstevel@tonic-gate 	}
8567c478bd9Sstevel@tonic-gate 	assert(WIFEXITED(child_status));
8577c478bd9Sstevel@tonic-gate 	if (WEXITSTATUS(child_status) == ZEXIT_EXEC) {
8587c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "failed to exec %s", path);
8597c478bd9Sstevel@tonic-gate 		return (-1);
8607c478bd9Sstevel@tonic-gate 	}
8617c478bd9Sstevel@tonic-gate 	return (WEXITSTATUS(child_status));
8627c478bd9Sstevel@tonic-gate }
8637c478bd9Sstevel@tonic-gate 
8647c478bd9Sstevel@tonic-gate static int
8657c478bd9Sstevel@tonic-gate dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev)
8667c478bd9Sstevel@tonic-gate {
8677c478bd9Sstevel@tonic-gate 	char cmdbuf[MAXPATHLEN];
8687c478bd9Sstevel@tonic-gate 	char *argv[4];
8697c478bd9Sstevel@tonic-gate 	int status;
8707c478bd9Sstevel@tonic-gate 
8717c478bd9Sstevel@tonic-gate 	/*
8727c478bd9Sstevel@tonic-gate 	 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but
8737c478bd9Sstevel@tonic-gate 	 * that would cost us an extra fork/exec without buying us anything.
8747c478bd9Sstevel@tonic-gate 	 */
8757c478bd9Sstevel@tonic-gate 	if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype)
8767c478bd9Sstevel@tonic-gate 	    > sizeof (cmdbuf)) {
8777c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "file-system type %s too long", fstype);
8787c478bd9Sstevel@tonic-gate 		return (-1);
8797c478bd9Sstevel@tonic-gate 	}
8807c478bd9Sstevel@tonic-gate 
8817c478bd9Sstevel@tonic-gate 	argv[0] = "fsck";
8827c478bd9Sstevel@tonic-gate 	argv[1] = "-m";
8837c478bd9Sstevel@tonic-gate 	argv[2] = (char *)rawdev;
8847c478bd9Sstevel@tonic-gate 	argv[3] = NULL;
8857c478bd9Sstevel@tonic-gate 
8867c478bd9Sstevel@tonic-gate 	status = forkexec(zlogp, cmdbuf, argv);
8877c478bd9Sstevel@tonic-gate 	if (status == 0 || status == -1)
8887c478bd9Sstevel@tonic-gate 		return (status);
8897c478bd9Sstevel@tonic-gate 	zerror(zlogp, B_FALSE, "fsck of '%s' failed with exit status %d; "
8907c478bd9Sstevel@tonic-gate 	    "run fsck manually", rawdev, status);
8917c478bd9Sstevel@tonic-gate 	return (-1);
8927c478bd9Sstevel@tonic-gate }
8937c478bd9Sstevel@tonic-gate 
8947c478bd9Sstevel@tonic-gate static int
8957c478bd9Sstevel@tonic-gate domount(zlog_t *zlogp, const char *fstype, const char *opts,
8967c478bd9Sstevel@tonic-gate     const char *special, const char *directory)
8977c478bd9Sstevel@tonic-gate {
8987c478bd9Sstevel@tonic-gate 	char cmdbuf[MAXPATHLEN];
8997c478bd9Sstevel@tonic-gate 	char *argv[6];
9007c478bd9Sstevel@tonic-gate 	int status;
9017c478bd9Sstevel@tonic-gate 
9027c478bd9Sstevel@tonic-gate 	/*
9037c478bd9Sstevel@tonic-gate 	 * We could alternatively have called /usr/sbin/mount -F <fstype>, but
9047c478bd9Sstevel@tonic-gate 	 * that would cost us an extra fork/exec without buying us anything.
9057c478bd9Sstevel@tonic-gate 	 */
9067c478bd9Sstevel@tonic-gate 	if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype)
9077c478bd9Sstevel@tonic-gate 	    > sizeof (cmdbuf)) {
9087c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "file-system type %s too long", fstype);
9097c478bd9Sstevel@tonic-gate 		return (-1);
9107c478bd9Sstevel@tonic-gate 	}
9117c478bd9Sstevel@tonic-gate 	argv[0] = "mount";
9127c478bd9Sstevel@tonic-gate 	if (opts[0] == '\0') {
9137c478bd9Sstevel@tonic-gate 		argv[1] = (char *)special;
9147c478bd9Sstevel@tonic-gate 		argv[2] = (char *)directory;
9157c478bd9Sstevel@tonic-gate 		argv[3] = NULL;
9167c478bd9Sstevel@tonic-gate 	} else {
9177c478bd9Sstevel@tonic-gate 		argv[1] = "-o";
9187c478bd9Sstevel@tonic-gate 		argv[2] = (char *)opts;
9197c478bd9Sstevel@tonic-gate 		argv[3] = (char *)special;
9207c478bd9Sstevel@tonic-gate 		argv[4] = (char *)directory;
9217c478bd9Sstevel@tonic-gate 		argv[5] = NULL;
9227c478bd9Sstevel@tonic-gate 	}
9237c478bd9Sstevel@tonic-gate 
9247c478bd9Sstevel@tonic-gate 	status = forkexec(zlogp, cmdbuf, argv);
9257c478bd9Sstevel@tonic-gate 	if (status == 0 || status == -1)
9267c478bd9Sstevel@tonic-gate 		return (status);
9277c478bd9Sstevel@tonic-gate 	if (opts[0] == '\0')
9287c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "\"%s %s %s\" "
9297c478bd9Sstevel@tonic-gate 		    "failed with exit code %d",
9307c478bd9Sstevel@tonic-gate 		    cmdbuf, special, directory, status);
9317c478bd9Sstevel@tonic-gate 	else
9327c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "\"%s -o %s %s %s\" "
9337c478bd9Sstevel@tonic-gate 		    "failed with exit code %d",
9347c478bd9Sstevel@tonic-gate 		    cmdbuf, opts, special, directory, status);
9357c478bd9Sstevel@tonic-gate 	return (-1);
9367c478bd9Sstevel@tonic-gate }
9377c478bd9Sstevel@tonic-gate 
9387c478bd9Sstevel@tonic-gate /*
9397c478bd9Sstevel@tonic-gate  * Make sure if a given path exists, it is not a sym-link, and is a directory.
9407c478bd9Sstevel@tonic-gate  */
9417c478bd9Sstevel@tonic-gate static int
9427c478bd9Sstevel@tonic-gate check_path(zlog_t *zlogp, const char *path)
9437c478bd9Sstevel@tonic-gate {
9447c478bd9Sstevel@tonic-gate 	struct stat statbuf;
9457c478bd9Sstevel@tonic-gate 	char respath[MAXPATHLEN];
9467c478bd9Sstevel@tonic-gate 	int res;
9477c478bd9Sstevel@tonic-gate 
9487c478bd9Sstevel@tonic-gate 	if (lstat(path, &statbuf) != 0) {
9497c478bd9Sstevel@tonic-gate 		if (errno == ENOENT)
9507c478bd9Sstevel@tonic-gate 			return (0);
9517c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "can't stat %s", path);
9527c478bd9Sstevel@tonic-gate 		return (-1);
9537c478bd9Sstevel@tonic-gate 	}
9547c478bd9Sstevel@tonic-gate 	if (S_ISLNK(statbuf.st_mode)) {
9557c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s is a symlink", path);
9567c478bd9Sstevel@tonic-gate 		return (-1);
9577c478bd9Sstevel@tonic-gate 	}
9587c478bd9Sstevel@tonic-gate 	if (!S_ISDIR(statbuf.st_mode)) {
95945916cd2Sjpk 		if (is_system_labeled() && S_ISREG(statbuf.st_mode)) {
96045916cd2Sjpk 			/*
96145916cd2Sjpk 			 * The need to mount readonly copies of
96245916cd2Sjpk 			 * global zone /etc/ files is unique to
96345916cd2Sjpk 			 * Trusted Extensions.
96445916cd2Sjpk 			 * The check for /etc/ via strstr() is to
96545916cd2Sjpk 			 * allow paths like $ZONEROOT/etc/passwd
96645916cd2Sjpk 			 */
96745916cd2Sjpk 			if (strstr(path, "/etc/") == NULL) {
96845916cd2Sjpk 				zerror(zlogp, B_FALSE,
96945916cd2Sjpk 				    "%s is not in /etc", path);
97045916cd2Sjpk 				return (-1);
97145916cd2Sjpk 			}
97245916cd2Sjpk 		} else {
9737c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s is not a directory", path);
9747c478bd9Sstevel@tonic-gate 			return (-1);
9757c478bd9Sstevel@tonic-gate 		}
97645916cd2Sjpk 	}
9777c478bd9Sstevel@tonic-gate 	if ((res = resolvepath(path, respath, sizeof (respath))) == -1) {
9787c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to resolve path %s", path);
9797c478bd9Sstevel@tonic-gate 		return (-1);
9807c478bd9Sstevel@tonic-gate 	}
9817c478bd9Sstevel@tonic-gate 	respath[res] = '\0';
9827c478bd9Sstevel@tonic-gate 	if (strcmp(path, respath) != 0) {
9837c478bd9Sstevel@tonic-gate 		/*
9847c478bd9Sstevel@tonic-gate 		 * We don't like ".."s and "."s throwing us off
9857c478bd9Sstevel@tonic-gate 		 */
9867c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s is not a canonical path", path);
9877c478bd9Sstevel@tonic-gate 		return (-1);
9887c478bd9Sstevel@tonic-gate 	}
9897c478bd9Sstevel@tonic-gate 	return (0);
9907c478bd9Sstevel@tonic-gate }
9917c478bd9Sstevel@tonic-gate 
9927c478bd9Sstevel@tonic-gate /*
9937c478bd9Sstevel@tonic-gate  * Check every component of rootpath/relpath.  If any component fails (ie,
9947c478bd9Sstevel@tonic-gate  * exists but isn't the canonical path to a directory), it is returned in
9957c478bd9Sstevel@tonic-gate  * badpath, which is assumed to be at least of size MAXPATHLEN.
9967c478bd9Sstevel@tonic-gate  *
9977c478bd9Sstevel@tonic-gate  * Relpath must begin with '/'.
9987c478bd9Sstevel@tonic-gate  */
9997c478bd9Sstevel@tonic-gate static boolean_t
10007c478bd9Sstevel@tonic-gate valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *relpath)
10017c478bd9Sstevel@tonic-gate {
10027c478bd9Sstevel@tonic-gate 	char abspath[MAXPATHLEN], *slashp;
10037c478bd9Sstevel@tonic-gate 
10047c478bd9Sstevel@tonic-gate 	/*
10057c478bd9Sstevel@tonic-gate 	 * Make sure abspath has at least one '/' after its rootpath
10067c478bd9Sstevel@tonic-gate 	 * component, and ends with '/'.
10077c478bd9Sstevel@tonic-gate 	 */
10087c478bd9Sstevel@tonic-gate 	if (snprintf(abspath, sizeof (abspath), "%s%s/", rootpath, relpath) >
10097c478bd9Sstevel@tonic-gate 	    sizeof (abspath)) {
10107c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "pathname %s%s is too long", rootpath,
10117c478bd9Sstevel@tonic-gate 		    relpath);
10127c478bd9Sstevel@tonic-gate 		return (B_FALSE);
10137c478bd9Sstevel@tonic-gate 	}
10147c478bd9Sstevel@tonic-gate 
10157c478bd9Sstevel@tonic-gate 	slashp = &abspath[strlen(rootpath)];
10167c478bd9Sstevel@tonic-gate 	assert(*slashp == '/');
10177c478bd9Sstevel@tonic-gate 	do {
10187c478bd9Sstevel@tonic-gate 		*slashp = '\0';
10197c478bd9Sstevel@tonic-gate 		if (check_path(zlogp, abspath) != 0)
10207c478bd9Sstevel@tonic-gate 			return (B_FALSE);
10217c478bd9Sstevel@tonic-gate 		*slashp = '/';
10227c478bd9Sstevel@tonic-gate 		slashp++;
10237c478bd9Sstevel@tonic-gate 	} while ((slashp = strchr(slashp, '/')) != NULL);
10247c478bd9Sstevel@tonic-gate 	return (B_TRUE);
10257c478bd9Sstevel@tonic-gate }
10267c478bd9Sstevel@tonic-gate 
10277c478bd9Sstevel@tonic-gate static int
10287c478bd9Sstevel@tonic-gate mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath)
10297c478bd9Sstevel@tonic-gate {
10307c478bd9Sstevel@tonic-gate 	char    path[MAXPATHLEN];
1031108322fbScarlsonj 	char	specpath[MAXPATHLEN];
10327c478bd9Sstevel@tonic-gate 	char    optstr[MAX_MNTOPT_STR];
10337c478bd9Sstevel@tonic-gate 	zone_fsopt_t *optptr;
10347c478bd9Sstevel@tonic-gate 
10357c478bd9Sstevel@tonic-gate 	if (!valid_mount_path(zlogp, rootpath, fsptr->zone_fs_dir)) {
10367c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
10377c478bd9Sstevel@tonic-gate 		    rootpath, fsptr->zone_fs_dir);
10387c478bd9Sstevel@tonic-gate 		return (-1);
10397c478bd9Sstevel@tonic-gate 	}
10407c478bd9Sstevel@tonic-gate 
10417c478bd9Sstevel@tonic-gate 	if (make_one_dir(zlogp, rootpath, fsptr->zone_fs_dir,
10427c478bd9Sstevel@tonic-gate 	    DEFAULT_DIR_MODE) != 0)
10437c478bd9Sstevel@tonic-gate 		return (-1);
10447c478bd9Sstevel@tonic-gate 
10457c478bd9Sstevel@tonic-gate 	(void) snprintf(path, sizeof (path), "%s%s", rootpath,
10467c478bd9Sstevel@tonic-gate 	    fsptr->zone_fs_dir);
10477c478bd9Sstevel@tonic-gate 
10487c478bd9Sstevel@tonic-gate 	if (strlen(fsptr->zone_fs_special) == 0) {
10497c478bd9Sstevel@tonic-gate 		/*
10507c478bd9Sstevel@tonic-gate 		 * A zero-length special is how we distinguish IPDs from
1051108322fbScarlsonj 		 * general-purpose FSs.  Make sure it mounts from a place that
1052108322fbScarlsonj 		 * can be seen via the alternate zone's root.
10537c478bd9Sstevel@tonic-gate 		 */
1054108322fbScarlsonj 		if (snprintf(specpath, sizeof (specpath), "%s%s",
1055108322fbScarlsonj 		    zonecfg_get_root(), fsptr->zone_fs_dir) >=
1056108322fbScarlsonj 		    sizeof (specpath)) {
1057108322fbScarlsonj 			zerror(zlogp, B_FALSE, "cannot mount %s: path too "
1058108322fbScarlsonj 			    "long in alternate root", fsptr->zone_fs_dir);
1059108322fbScarlsonj 			return (-1);
1060108322fbScarlsonj 		}
1061108322fbScarlsonj 		if (zonecfg_in_alt_root())
1062108322fbScarlsonj 			resolve_lofs(zlogp, specpath, sizeof (specpath));
10637c478bd9Sstevel@tonic-gate 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS,
1064108322fbScarlsonj 		    specpath, path) != 0) {
10657c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "failed to loopback mount %s",
1066108322fbScarlsonj 			    specpath);
10677c478bd9Sstevel@tonic-gate 			return (-1);
10687c478bd9Sstevel@tonic-gate 		}
10697c478bd9Sstevel@tonic-gate 		return (0);
10707c478bd9Sstevel@tonic-gate 	}
10717c478bd9Sstevel@tonic-gate 
10727c478bd9Sstevel@tonic-gate 	/*
10737c478bd9Sstevel@tonic-gate 	 * In general the strategy here is to do just as much verification as
10747c478bd9Sstevel@tonic-gate 	 * necessary to avoid crashing or otherwise doing something bad; if the
10757c478bd9Sstevel@tonic-gate 	 * administrator initiated the operation via zoneadm(1m), he'll get
10767c478bd9Sstevel@tonic-gate 	 * auto-verification which will let him know what's wrong.  If he
10777c478bd9Sstevel@tonic-gate 	 * modifies the zone configuration of a running zone and doesn't attempt
10787c478bd9Sstevel@tonic-gate 	 * to verify that it's OK we won't crash but won't bother trying to be
10797c478bd9Sstevel@tonic-gate 	 * too helpful either.  zoneadm verify is only a couple keystrokes away.
10807c478bd9Sstevel@tonic-gate 	 */
10817c478bd9Sstevel@tonic-gate 	if (!zonecfg_valid_fs_type(fsptr->zone_fs_type)) {
10827c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "cannot mount %s on %s: "
10837c478bd9Sstevel@tonic-gate 		    "invalid file-system type %s", fsptr->zone_fs_special,
10847c478bd9Sstevel@tonic-gate 		    fsptr->zone_fs_dir, fsptr->zone_fs_type);
10857c478bd9Sstevel@tonic-gate 		return (-1);
10867c478bd9Sstevel@tonic-gate 	}
10877c478bd9Sstevel@tonic-gate 
10887c478bd9Sstevel@tonic-gate 	/*
1089108322fbScarlsonj 	 * If we're looking at an alternate root environment, then construct
1090108322fbScarlsonj 	 * read-only loopback mounts as necessary.  For all lofs mounts, make
1091108322fbScarlsonj 	 * sure that the 'special' entry points inside the alternate root.  (We
1092108322fbScarlsonj 	 * don't do this with other mounts, as devfs isn't in the alternate
1093108322fbScarlsonj 	 * root, and we need to assume the device environment is roughly the
1094108322fbScarlsonj 	 * same.)
1095108322fbScarlsonj 	 */
1096108322fbScarlsonj 	if (zonecfg_in_alt_root()) {
1097108322fbScarlsonj 		struct stat64 st;
1098108322fbScarlsonj 
1099108322fbScarlsonj 		if (stat64(fsptr->zone_fs_special, &st) != -1 &&
1100108322fbScarlsonj 		    S_ISBLK(st.st_mode) &&
1101108322fbScarlsonj 		    check_lofs_needed(zlogp, fsptr) == -1)
1102108322fbScarlsonj 			return (-1);
1103108322fbScarlsonj 		if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) {
1104108322fbScarlsonj 			if (snprintf(specpath, sizeof (specpath), "%s%s",
1105108322fbScarlsonj 			    zonecfg_get_root(), fsptr->zone_fs_special) >=
1106108322fbScarlsonj 			    sizeof (specpath)) {
1107108322fbScarlsonj 				zerror(zlogp, B_FALSE, "cannot mount %s: path "
1108108322fbScarlsonj 				    "too long in alternate root",
1109108322fbScarlsonj 				    fsptr->zone_fs_special);
1110108322fbScarlsonj 				return (-1);
1111108322fbScarlsonj 			}
1112108322fbScarlsonj 			resolve_lofs(zlogp, specpath, sizeof (specpath));
1113108322fbScarlsonj 			(void) strlcpy(fsptr->zone_fs_special, specpath,
1114108322fbScarlsonj 			    sizeof (fsptr->zone_fs_special));
1115108322fbScarlsonj 		}
1116108322fbScarlsonj 	}
1117108322fbScarlsonj 
1118108322fbScarlsonj 	/*
11197c478bd9Sstevel@tonic-gate 	 * Run 'fsck -m' if there's a device to fsck.
11207c478bd9Sstevel@tonic-gate 	 */
11217c478bd9Sstevel@tonic-gate 	if (fsptr->zone_fs_raw[0] != '\0' &&
11227c478bd9Sstevel@tonic-gate 	    dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_raw) != 0)
11237c478bd9Sstevel@tonic-gate 		return (-1);
11247c478bd9Sstevel@tonic-gate 
11257c478bd9Sstevel@tonic-gate 	/*
11267c478bd9Sstevel@tonic-gate 	 * Build up mount option string.
11277c478bd9Sstevel@tonic-gate 	 */
11287c478bd9Sstevel@tonic-gate 	optstr[0] = '\0';
11297c478bd9Sstevel@tonic-gate 	if (fsptr->zone_fs_options != NULL) {
11307c478bd9Sstevel@tonic-gate 		(void) strlcpy(optstr, fsptr->zone_fs_options->zone_fsopt_opt,
11317c478bd9Sstevel@tonic-gate 		    sizeof (optstr));
11327c478bd9Sstevel@tonic-gate 		for (optptr = fsptr->zone_fs_options->zone_fsopt_next;
11337c478bd9Sstevel@tonic-gate 		    optptr != NULL; optptr = optptr->zone_fsopt_next) {
11347c478bd9Sstevel@tonic-gate 			(void) strlcat(optstr, ",", sizeof (optstr));
11357c478bd9Sstevel@tonic-gate 			(void) strlcat(optstr, optptr->zone_fsopt_opt,
11367c478bd9Sstevel@tonic-gate 			    sizeof (optstr));
11377c478bd9Sstevel@tonic-gate 		}
11387c478bd9Sstevel@tonic-gate 	}
11397c478bd9Sstevel@tonic-gate 	return (domount(zlogp, fsptr->zone_fs_type, optstr,
11407c478bd9Sstevel@tonic-gate 	    fsptr->zone_fs_special, path));
11417c478bd9Sstevel@tonic-gate }
11427c478bd9Sstevel@tonic-gate 
11437c478bd9Sstevel@tonic-gate static void
11447c478bd9Sstevel@tonic-gate free_fs_data(struct zone_fstab *fsarray, uint_t nelem)
11457c478bd9Sstevel@tonic-gate {
11467c478bd9Sstevel@tonic-gate 	uint_t i;
11477c478bd9Sstevel@tonic-gate 
11487c478bd9Sstevel@tonic-gate 	if (fsarray == NULL)
11497c478bd9Sstevel@tonic-gate 		return;
11507c478bd9Sstevel@tonic-gate 	for (i = 0; i < nelem; i++)
11517c478bd9Sstevel@tonic-gate 		zonecfg_free_fs_option_list(fsarray[i].zone_fs_options);
11527c478bd9Sstevel@tonic-gate 	free(fsarray);
11537c478bd9Sstevel@tonic-gate }
11547c478bd9Sstevel@tonic-gate 
1155108322fbScarlsonj /*
1156f4368d3dSvp157776  * This function initiates the creation of a small Solaris Environment for
1157f4368d3dSvp157776  * scratch zone. The Environment creation process is split up into two
1158f4368d3dSvp157776  * functions(build_mounted_pre_var() and build_mounted_post_var()). It
1159f4368d3dSvp157776  * is done this way because:
1160f4368d3dSvp157776  * 	We need to have both /etc and /var in the root of the scratchzone.
1161f4368d3dSvp157776  * 	We loopback mount zone's own /etc and /var into the root of the
1162f4368d3dSvp157776  * 	scratch zone. Unlike /etc, /var can be a seperate filesystem. So we
1163f4368d3dSvp157776  * 	need to delay the mount of /var till the zone's root gets populated.
1164f4368d3dSvp157776  *	So mounting of localdirs[](/etc and /var) have been moved to the
1165f4368d3dSvp157776  * 	build_mounted_post_var() which gets called only after the zone
1166f4368d3dSvp157776  * 	specific filesystems are mounted.
1167108322fbScarlsonj  */
1168108322fbScarlsonj static boolean_t
1169f4368d3dSvp157776 build_mounted_pre_var(zlog_t *zlogp, char *rootpath,
1170f4368d3dSvp157776     size_t rootlen, const char *zonepath)
1171108322fbScarlsonj {
1172108322fbScarlsonj 	char tmp[MAXPATHLEN], fromdir[MAXPATHLEN];
1173108322fbScarlsonj 	char luroot[MAXPATHLEN];
1174108322fbScarlsonj 	const char **cpp;
1175108322fbScarlsonj 	static const char *mkdirs[] = {
11763f604e0fSdp 		"/system", "/system/contract", "/system/object", "/proc",
11773f604e0fSdp 		"/dev", "/tmp", "/a", NULL
1178108322fbScarlsonj 	};
1179108322fbScarlsonj 	char *altstr;
1180f4368d3dSvp157776 	FILE *fp;
1181108322fbScarlsonj 	uuid_t uuid;
1182108322fbScarlsonj 
1183108322fbScarlsonj 	resolve_lofs(zlogp, rootpath, rootlen);
1184108322fbScarlsonj 	(void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
1185108322fbScarlsonj 	resolve_lofs(zlogp, luroot, sizeof (luroot));
1186108322fbScarlsonj 	(void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot);
1187108322fbScarlsonj 	(void) symlink("./usr/bin", tmp);
1188108322fbScarlsonj 
1189108322fbScarlsonj 	/*
1190108322fbScarlsonj 	 * These are mostly special mount points; not handled here.  (See
1191108322fbScarlsonj 	 * zone_mount_early.)
1192108322fbScarlsonj 	 */
1193108322fbScarlsonj 	for (cpp = mkdirs; *cpp != NULL; cpp++) {
1194108322fbScarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1195108322fbScarlsonj 		if (mkdir(tmp, 0755) != 0) {
1196108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1197108322fbScarlsonj 			return (B_FALSE);
1198108322fbScarlsonj 		}
1199108322fbScarlsonj 	}
1200f4368d3dSvp157776 	/*
1201f4368d3dSvp157776 	 * This is here to support lucopy.  If there's an instance of this same
1202f4368d3dSvp157776 	 * zone on the current running system, then we mount its root up as
1203f4368d3dSvp157776 	 * read-only inside the scratch zone.
1204f4368d3dSvp157776 	 */
1205f4368d3dSvp157776 	(void) zonecfg_get_uuid(zone_name, uuid);
1206f4368d3dSvp157776 	altstr = strdup(zonecfg_get_root());
1207f4368d3dSvp157776 	if (altstr == NULL) {
1208f4368d3dSvp157776 		zerror(zlogp, B_TRUE, "memory allocation failed");
1209f4368d3dSvp157776 		return (B_FALSE);
1210f4368d3dSvp157776 	}
1211f4368d3dSvp157776 	zonecfg_set_root("");
1212f4368d3dSvp157776 	(void) strlcpy(tmp, zone_name, sizeof (tmp));
1213f4368d3dSvp157776 	(void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp));
1214f4368d3dSvp157776 	if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK &&
1215f4368d3dSvp157776 	    strcmp(fromdir, rootpath) != 0) {
1216f4368d3dSvp157776 		(void) snprintf(tmp, sizeof (tmp), "%s/b", luroot);
1217f4368d3dSvp157776 		if (mkdir(tmp, 0755) != 0) {
1218f4368d3dSvp157776 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1219f4368d3dSvp157776 			return (B_FALSE);
1220f4368d3dSvp157776 		}
1221f4368d3dSvp157776 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, fromdir,
1222f4368d3dSvp157776 		    tmp) != 0) {
1223f4368d3dSvp157776 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1224f4368d3dSvp157776 			    fromdir);
1225f4368d3dSvp157776 			return (B_FALSE);
1226f4368d3dSvp157776 		}
1227f4368d3dSvp157776 	}
1228f4368d3dSvp157776 	zonecfg_set_root(altstr);
1229f4368d3dSvp157776 	free(altstr);
1230f4368d3dSvp157776 
1231f4368d3dSvp157776 	if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) {
1232f4368d3dSvp157776 		zerror(zlogp, B_TRUE, "cannot open zone mapfile");
1233f4368d3dSvp157776 		return (B_FALSE);
1234f4368d3dSvp157776 	}
1235f4368d3dSvp157776 	(void) ftruncate(fileno(fp), 0);
1236f4368d3dSvp157776 	if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) {
1237f4368d3dSvp157776 		zerror(zlogp, B_TRUE, "cannot add zone mapfile entry");
1238f4368d3dSvp157776 	}
1239f4368d3dSvp157776 	zonecfg_close_scratch(fp);
1240f4368d3dSvp157776 	(void) snprintf(tmp, sizeof (tmp), "%s/a", luroot);
1241f4368d3dSvp157776 	if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0)
1242f4368d3dSvp157776 		return (B_FALSE);
1243f4368d3dSvp157776 	(void) strlcpy(rootpath, tmp, rootlen);
1244f4368d3dSvp157776 	return (B_TRUE);
1245f4368d3dSvp157776 }
1246f4368d3dSvp157776 
1247f4368d3dSvp157776 
1248f4368d3dSvp157776 static boolean_t
1249f4368d3dSvp157776 build_mounted_post_var(zlog_t *zlogp, char *rootpath, const char *zonepath)
1250f4368d3dSvp157776 {
1251f4368d3dSvp157776 	char tmp[MAXPATHLEN], fromdir[MAXPATHLEN];
1252f4368d3dSvp157776 	char luroot[MAXPATHLEN];
1253f4368d3dSvp157776 	const char **cpp;
1254f4368d3dSvp157776 	static const char *localdirs[] = {
1255f4368d3dSvp157776 		"/etc", "/var", NULL
1256f4368d3dSvp157776 	};
1257f4368d3dSvp157776 	static const char *loopdirs[] = {
1258f4368d3dSvp157776 		"/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform",
1259f4368d3dSvp157776 		"/usr", NULL
1260f4368d3dSvp157776 	};
1261f4368d3dSvp157776 	static const char *tmpdirs[] = {
1262f4368d3dSvp157776 		"/tmp", "/var/run", NULL
1263f4368d3dSvp157776 	};
1264f4368d3dSvp157776 	struct stat st;
1265f4368d3dSvp157776 
1266f4368d3dSvp157776 	(void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
1267108322fbScarlsonj 
1268108322fbScarlsonj 	/*
1269108322fbScarlsonj 	 * These are mounted read-write from the zone undergoing upgrade.  We
1270108322fbScarlsonj 	 * must be careful not to 'leak' things from the main system into the
1271108322fbScarlsonj 	 * zone, and this accomplishes that goal.
1272108322fbScarlsonj 	 */
1273108322fbScarlsonj 	for (cpp = localdirs; *cpp != NULL; cpp++) {
1274108322fbScarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1275108322fbScarlsonj 		(void) snprintf(fromdir, sizeof (fromdir), "%s%s", rootpath,
1276108322fbScarlsonj 		    *cpp);
1277108322fbScarlsonj 		if (mkdir(tmp, 0755) != 0) {
1278108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1279108322fbScarlsonj 			return (B_FALSE);
1280108322fbScarlsonj 		}
1281108322fbScarlsonj 		if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp) != 0) {
1282108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1283108322fbScarlsonj 			    *cpp);
1284108322fbScarlsonj 			return (B_FALSE);
1285108322fbScarlsonj 		}
1286108322fbScarlsonj 	}
1287108322fbScarlsonj 
1288108322fbScarlsonj 	/*
1289108322fbScarlsonj 	 * These are things mounted read-only from the running system because
1290108322fbScarlsonj 	 * they contain binaries that must match system.
1291108322fbScarlsonj 	 */
1292108322fbScarlsonj 	for (cpp = loopdirs; *cpp != NULL; cpp++) {
1293108322fbScarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1294108322fbScarlsonj 		if (mkdir(tmp, 0755) != 0) {
1295108322fbScarlsonj 			if (errno != EEXIST) {
1296108322fbScarlsonj 				zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1297108322fbScarlsonj 				return (B_FALSE);
1298108322fbScarlsonj 			}
1299108322fbScarlsonj 			if (lstat(tmp, &st) != 0) {
1300108322fbScarlsonj 				zerror(zlogp, B_TRUE, "cannot stat %s", tmp);
1301108322fbScarlsonj 				return (B_FALSE);
1302108322fbScarlsonj 			}
1303108322fbScarlsonj 			/*
1304108322fbScarlsonj 			 * Ignore any non-directories encountered.  These are
1305108322fbScarlsonj 			 * things that have been converted into symlinks
1306108322fbScarlsonj 			 * (/etc/fs and /etc/lib) and no longer need a lofs
1307108322fbScarlsonj 			 * fixup.
1308108322fbScarlsonj 			 */
1309108322fbScarlsonj 			if (!S_ISDIR(st.st_mode))
1310108322fbScarlsonj 				continue;
1311108322fbScarlsonj 		}
1312108322fbScarlsonj 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, *cpp,
1313108322fbScarlsonj 		    tmp) != 0) {
1314108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1315108322fbScarlsonj 			    *cpp);
1316108322fbScarlsonj 			return (B_FALSE);
1317108322fbScarlsonj 		}
1318108322fbScarlsonj 	}
1319108322fbScarlsonj 
1320108322fbScarlsonj 	/*
1321108322fbScarlsonj 	 * These are things with tmpfs mounted inside.
1322108322fbScarlsonj 	 */
1323108322fbScarlsonj 	for (cpp = tmpdirs; *cpp != NULL; cpp++) {
1324108322fbScarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1325108322fbScarlsonj 		if (mkdir(tmp, 0755) != 0 && errno != EEXIST) {
1326108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1327108322fbScarlsonj 			return (B_FALSE);
1328108322fbScarlsonj 		}
1329108322fbScarlsonj 		if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) {
1330108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp);
1331108322fbScarlsonj 			return (B_FALSE);
1332108322fbScarlsonj 		}
1333108322fbScarlsonj 	}
1334108322fbScarlsonj 	return (B_TRUE);
1335108322fbScarlsonj }
1336108322fbScarlsonj 
13377c478bd9Sstevel@tonic-gate static int
1338108322fbScarlsonj mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd)
13397c478bd9Sstevel@tonic-gate {
13407c478bd9Sstevel@tonic-gate 	char	rootpath[MAXPATHLEN];
13417c478bd9Sstevel@tonic-gate 	char	zonepath[MAXPATHLEN];
13427c478bd9Sstevel@tonic-gate 	int	num_fs = 0, i;
13437c478bd9Sstevel@tonic-gate 	struct zone_fstab fstab, *fs_ptr = NULL, *tmp_ptr;
13447c478bd9Sstevel@tonic-gate 	struct zone_fstab *fsp;
13457c478bd9Sstevel@tonic-gate 	zone_dochandle_t handle = NULL;
13467c478bd9Sstevel@tonic-gate 	zone_state_t zstate;
13477c478bd9Sstevel@tonic-gate 
13487c478bd9Sstevel@tonic-gate 	if (zone_get_state(zone_name, &zstate) != Z_OK ||
1349108322fbScarlsonj 	    (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
13507c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
1351108322fbScarlsonj 		    "zone must be in '%s' or '%s' state to mount file-systems",
1352108322fbScarlsonj 		    zone_state_str(ZONE_STATE_READY),
1353108322fbScarlsonj 		    zone_state_str(ZONE_STATE_MOUNTED));
13547c478bd9Sstevel@tonic-gate 		goto bad;
13557c478bd9Sstevel@tonic-gate 	}
13567c478bd9Sstevel@tonic-gate 
13577c478bd9Sstevel@tonic-gate 	if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
13587c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone path");
13597c478bd9Sstevel@tonic-gate 		goto bad;
13607c478bd9Sstevel@tonic-gate 	}
13617c478bd9Sstevel@tonic-gate 
13627c478bd9Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
13637c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone root");
13647c478bd9Sstevel@tonic-gate 		goto bad;
13657c478bd9Sstevel@tonic-gate 	}
13667c478bd9Sstevel@tonic-gate 
13677c478bd9Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
1368ffbafc53Scomay 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
13697c478bd9Sstevel@tonic-gate 		goto bad;
13707c478bd9Sstevel@tonic-gate 	}
13717c478bd9Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
13727c478bd9Sstevel@tonic-gate 	    zonecfg_setfsent(handle) != Z_OK) {
13737c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
13747c478bd9Sstevel@tonic-gate 		goto bad;
13757c478bd9Sstevel@tonic-gate 	}
13767c478bd9Sstevel@tonic-gate 
13777c478bd9Sstevel@tonic-gate 	/*
13787c478bd9Sstevel@tonic-gate 	 * Iterate through the rest of the filesystems, first the IPDs, then
13797c478bd9Sstevel@tonic-gate 	 * the general FSs.  Sort them all, then mount them in sorted order.
13807c478bd9Sstevel@tonic-gate 	 * This is to make sure the higher level directories (e.g., /usr)
13817c478bd9Sstevel@tonic-gate 	 * get mounted before any beneath them (e.g., /usr/local).
13827c478bd9Sstevel@tonic-gate 	 */
13837c478bd9Sstevel@tonic-gate 	if (zonecfg_setipdent(handle) != Z_OK) {
13847c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
13857c478bd9Sstevel@tonic-gate 		goto bad;
13867c478bd9Sstevel@tonic-gate 	}
13877c478bd9Sstevel@tonic-gate 	while (zonecfg_getipdent(handle, &fstab) == Z_OK) {
13887c478bd9Sstevel@tonic-gate 		num_fs++;
13897c478bd9Sstevel@tonic-gate 		if ((tmp_ptr = realloc(fs_ptr,
13907c478bd9Sstevel@tonic-gate 		    num_fs * sizeof (*tmp_ptr))) == NULL) {
13917c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "memory allocation failed");
13927c478bd9Sstevel@tonic-gate 			num_fs--;
13937c478bd9Sstevel@tonic-gate 			(void) zonecfg_endipdent(handle);
13947c478bd9Sstevel@tonic-gate 			goto bad;
13957c478bd9Sstevel@tonic-gate 		}
13967c478bd9Sstevel@tonic-gate 		fs_ptr = tmp_ptr;
13977c478bd9Sstevel@tonic-gate 		fsp = &fs_ptr[num_fs - 1];
13987c478bd9Sstevel@tonic-gate 		/*
13997c478bd9Sstevel@tonic-gate 		 * IPDs logically only have a mount point; all other properties
14007c478bd9Sstevel@tonic-gate 		 * are implied.
14017c478bd9Sstevel@tonic-gate 		 */
14027c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_dir,
14037c478bd9Sstevel@tonic-gate 		    fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir));
14047c478bd9Sstevel@tonic-gate 		fsp->zone_fs_special[0] = '\0';
14057c478bd9Sstevel@tonic-gate 		fsp->zone_fs_raw[0] = '\0';
14067c478bd9Sstevel@tonic-gate 		fsp->zone_fs_type[0] = '\0';
14077c478bd9Sstevel@tonic-gate 		fsp->zone_fs_options = NULL;
14087c478bd9Sstevel@tonic-gate 	}
14097c478bd9Sstevel@tonic-gate 	(void) zonecfg_endipdent(handle);
14107c478bd9Sstevel@tonic-gate 
14117c478bd9Sstevel@tonic-gate 	if (zonecfg_setfsent(handle) != Z_OK) {
14127c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
14137c478bd9Sstevel@tonic-gate 		goto bad;
14147c478bd9Sstevel@tonic-gate 	}
14157c478bd9Sstevel@tonic-gate 	while (zonecfg_getfsent(handle, &fstab) == Z_OK) {
1416fa9e4066Sahrens 		/*
1417fa9e4066Sahrens 		 * ZFS filesystems will not be accessible under an alternate
1418fa9e4066Sahrens 		 * root, since the pool will not be known.  Ignore them in this
1419fa9e4066Sahrens 		 * case.
1420fa9e4066Sahrens 		 */
1421fa9e4066Sahrens 		if (mount_cmd && strcmp(fstab.zone_fs_type, MNTTYPE_ZFS) == 0)
1422fa9e4066Sahrens 			continue;
1423fa9e4066Sahrens 
14247c478bd9Sstevel@tonic-gate 		num_fs++;
14257c478bd9Sstevel@tonic-gate 		if ((tmp_ptr = realloc(fs_ptr,
14267c478bd9Sstevel@tonic-gate 		    num_fs * sizeof (*tmp_ptr))) == NULL) {
14277c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "memory allocation failed");
14287c478bd9Sstevel@tonic-gate 			num_fs--;
14297c478bd9Sstevel@tonic-gate 			(void) zonecfg_endfsent(handle);
14307c478bd9Sstevel@tonic-gate 			goto bad;
14317c478bd9Sstevel@tonic-gate 		}
14327c478bd9Sstevel@tonic-gate 		fs_ptr = tmp_ptr;
14337c478bd9Sstevel@tonic-gate 		fsp = &fs_ptr[num_fs - 1];
14347c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_dir,
14357c478bd9Sstevel@tonic-gate 		    fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir));
14367c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_special, fstab.zone_fs_special,
14377c478bd9Sstevel@tonic-gate 		    sizeof (fsp->zone_fs_special));
14387c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_raw, fstab.zone_fs_raw,
14397c478bd9Sstevel@tonic-gate 		    sizeof (fsp->zone_fs_raw));
14407c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_type, fstab.zone_fs_type,
14417c478bd9Sstevel@tonic-gate 		    sizeof (fsp->zone_fs_type));
14427c478bd9Sstevel@tonic-gate 		fsp->zone_fs_options = fstab.zone_fs_options;
14437c478bd9Sstevel@tonic-gate 	}
14447c478bd9Sstevel@tonic-gate 	(void) zonecfg_endfsent(handle);
14457c478bd9Sstevel@tonic-gate 	zonecfg_fini_handle(handle);
14467c478bd9Sstevel@tonic-gate 	handle = NULL;
14477c478bd9Sstevel@tonic-gate 
1448108322fbScarlsonj 	/*
1449facf4a8dSllai1 	 * When we're mounting a zone for administration, / is the
1450facf4a8dSllai1 	 * scratch zone and dev is mounted at /dev.  The to-be-upgraded
1451facf4a8dSllai1 	 * zone is mounted at /a, and we set up that environment so that
1452facf4a8dSllai1 	 * process can access both the running system's utilities
1453facf4a8dSllai1 	 * and the to-be-modified zone's files.  The only exception
1454facf4a8dSllai1 	 * is the zone's /dev which isn't mounted at all, which is
1455facf4a8dSllai1 	 * the same as global zone installation where /a/dev and
1456facf4a8dSllai1 	 * /a/devices are not mounted.
1457f4368d3dSvp157776 	 * Zone mounting is done in three phases.
1458f4368d3dSvp157776 	 *   1) Create and populate lu directory (build_mounted_pre_var()).
1459f4368d3dSvp157776 	 *   2) Mount the required filesystems as per the zone configuration.
1460f4368d3dSvp157776 	 *   3) Set up the rest of the scratch zone environment
1461f4368d3dSvp157776 	 *	(build_mounted_post_var()).
1462108322fbScarlsonj 	 */
1463108322fbScarlsonj 	if (mount_cmd &&
1464f4368d3dSvp157776 	    !build_mounted_pre_var(zlogp,
1465f4368d3dSvp157776 	    rootpath, sizeof (rootpath), zonepath))
1466108322fbScarlsonj 		goto bad;
1467108322fbScarlsonj 
14687c478bd9Sstevel@tonic-gate 	qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
14697c478bd9Sstevel@tonic-gate 	for (i = 0; i < num_fs; i++) {
14707c478bd9Sstevel@tonic-gate 		if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0)
14717c478bd9Sstevel@tonic-gate 			goto bad;
14727c478bd9Sstevel@tonic-gate 	}
1473f4368d3dSvp157776 	if (mount_cmd &&
1474f4368d3dSvp157776 	    !build_mounted_post_var(zlogp, rootpath, zonepath))
1475f4368d3dSvp157776 		goto bad;
147645916cd2Sjpk 
147745916cd2Sjpk 	/*
147845916cd2Sjpk 	 * For Trusted Extensions cross-mount each lower level /export/home
147945916cd2Sjpk 	 */
148048451833Scarlsonj 	if (!mount_cmd && tsol_mounts(zlogp, zone_name, rootpath) != 0)
148145916cd2Sjpk 		goto bad;
148245916cd2Sjpk 
14837c478bd9Sstevel@tonic-gate 	free_fs_data(fs_ptr, num_fs);
14847c478bd9Sstevel@tonic-gate 
14857c478bd9Sstevel@tonic-gate 	/*
14867c478bd9Sstevel@tonic-gate 	 * Everything looks fine.
14877c478bd9Sstevel@tonic-gate 	 */
14887c478bd9Sstevel@tonic-gate 	return (0);
14897c478bd9Sstevel@tonic-gate 
14907c478bd9Sstevel@tonic-gate bad:
14917c478bd9Sstevel@tonic-gate 	if (handle != NULL)
14927c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
14937c478bd9Sstevel@tonic-gate 	free_fs_data(fs_ptr, num_fs);
14947c478bd9Sstevel@tonic-gate 	return (-1);
14957c478bd9Sstevel@tonic-gate }
14967c478bd9Sstevel@tonic-gate 
14977c478bd9Sstevel@tonic-gate /* caller makes sure neither parameter is NULL */
14987c478bd9Sstevel@tonic-gate static int
14997c478bd9Sstevel@tonic-gate addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr)
15007c478bd9Sstevel@tonic-gate {
15017c478bd9Sstevel@tonic-gate 	int prefixlen;
15027c478bd9Sstevel@tonic-gate 
15037c478bd9Sstevel@tonic-gate 	prefixlen = atoi(prefixstr);
15047c478bd9Sstevel@tonic-gate 	if (prefixlen < 0 || prefixlen > maxprefixlen)
15057c478bd9Sstevel@tonic-gate 		return (1);
15067c478bd9Sstevel@tonic-gate 	while (prefixlen > 0) {
15077c478bd9Sstevel@tonic-gate 		if (prefixlen >= 8) {
15087c478bd9Sstevel@tonic-gate 			*maskstr++ = 0xFF;
15097c478bd9Sstevel@tonic-gate 			prefixlen -= 8;
15107c478bd9Sstevel@tonic-gate 			continue;
15117c478bd9Sstevel@tonic-gate 		}
15127c478bd9Sstevel@tonic-gate 		*maskstr |= 1 << (8 - prefixlen);
15137c478bd9Sstevel@tonic-gate 		prefixlen--;
15147c478bd9Sstevel@tonic-gate 	}
15157c478bd9Sstevel@tonic-gate 	return (0);
15167c478bd9Sstevel@tonic-gate }
15177c478bd9Sstevel@tonic-gate 
15187c478bd9Sstevel@tonic-gate /*
15197c478bd9Sstevel@tonic-gate  * Tear down all interfaces belonging to the given zone.  This should
15207c478bd9Sstevel@tonic-gate  * be called with the zone in a state other than "running", so that
15217c478bd9Sstevel@tonic-gate  * interfaces can't be assigned to the zone after this returns.
15227c478bd9Sstevel@tonic-gate  *
15237c478bd9Sstevel@tonic-gate  * If anything goes wrong, log an error message and return an error.
15247c478bd9Sstevel@tonic-gate  */
15257c478bd9Sstevel@tonic-gate static int
15267c478bd9Sstevel@tonic-gate unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id)
15277c478bd9Sstevel@tonic-gate {
15287c478bd9Sstevel@tonic-gate 	struct lifnum lifn;
15297c478bd9Sstevel@tonic-gate 	struct lifconf lifc;
15307c478bd9Sstevel@tonic-gate 	struct lifreq *lifrp, lifrl;
15317c478bd9Sstevel@tonic-gate 	int64_t lifc_flags = LIFC_NOXMIT | LIFC_ALLZONES;
15327c478bd9Sstevel@tonic-gate 	int num_ifs, s, i, ret_code = 0;
15337c478bd9Sstevel@tonic-gate 	uint_t bufsize;
15347c478bd9Sstevel@tonic-gate 	char *buf = NULL;
15357c478bd9Sstevel@tonic-gate 
15367c478bd9Sstevel@tonic-gate 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
15377c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get socket");
15387c478bd9Sstevel@tonic-gate 		ret_code = -1;
15397c478bd9Sstevel@tonic-gate 		goto bad;
15407c478bd9Sstevel@tonic-gate 	}
15417c478bd9Sstevel@tonic-gate 	lifn.lifn_family = AF_UNSPEC;
15427c478bd9Sstevel@tonic-gate 	lifn.lifn_flags = (int)lifc_flags;
15437c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) {
15447c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
15457c478bd9Sstevel@tonic-gate 		    "could not determine number of interfaces");
15467c478bd9Sstevel@tonic-gate 		ret_code = -1;
15477c478bd9Sstevel@tonic-gate 		goto bad;
15487c478bd9Sstevel@tonic-gate 	}
15497c478bd9Sstevel@tonic-gate 	num_ifs = lifn.lifn_count;
15507c478bd9Sstevel@tonic-gate 	bufsize = num_ifs * sizeof (struct lifreq);
15517c478bd9Sstevel@tonic-gate 	if ((buf = malloc(bufsize)) == NULL) {
15527c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
15537c478bd9Sstevel@tonic-gate 		ret_code = -1;
15547c478bd9Sstevel@tonic-gate 		goto bad;
15557c478bd9Sstevel@tonic-gate 	}
15567c478bd9Sstevel@tonic-gate 	lifc.lifc_family = AF_UNSPEC;
15577c478bd9Sstevel@tonic-gate 	lifc.lifc_flags = (int)lifc_flags;
15587c478bd9Sstevel@tonic-gate 	lifc.lifc_len = bufsize;
15597c478bd9Sstevel@tonic-gate 	lifc.lifc_buf = buf;
15607c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) {
15617c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get configured interfaces");
15627c478bd9Sstevel@tonic-gate 		ret_code = -1;
15637c478bd9Sstevel@tonic-gate 		goto bad;
15647c478bd9Sstevel@tonic-gate 	}
15657c478bd9Sstevel@tonic-gate 	lifrp = lifc.lifc_req;
15667c478bd9Sstevel@tonic-gate 	for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, lifrp++) {
15677c478bd9Sstevel@tonic-gate 		(void) close(s);
15687c478bd9Sstevel@tonic-gate 		if ((s = socket(lifrp->lifr_addr.ss_family, SOCK_DGRAM, 0)) <
15697c478bd9Sstevel@tonic-gate 		    0) {
15707c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not get socket",
15717c478bd9Sstevel@tonic-gate 			    lifrl.lifr_name);
15727c478bd9Sstevel@tonic-gate 			ret_code = -1;
15737c478bd9Sstevel@tonic-gate 			continue;
15747c478bd9Sstevel@tonic-gate 		}
15757c478bd9Sstevel@tonic-gate 		(void) memset(&lifrl, 0, sizeof (lifrl));
15767c478bd9Sstevel@tonic-gate 		(void) strncpy(lifrl.lifr_name, lifrp->lifr_name,
15777c478bd9Sstevel@tonic-gate 		    sizeof (lifrl.lifr_name));
15787c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifrl) < 0) {
15797c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
15807c478bd9Sstevel@tonic-gate 			    "%s: could not determine zone interface belongs to",
15817c478bd9Sstevel@tonic-gate 			    lifrl.lifr_name);
15827c478bd9Sstevel@tonic-gate 			ret_code = -1;
15837c478bd9Sstevel@tonic-gate 			continue;
15847c478bd9Sstevel@tonic-gate 		}
15857c478bd9Sstevel@tonic-gate 		if (lifrl.lifr_zoneid == zone_id) {
15867c478bd9Sstevel@tonic-gate 			if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) {
15877c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_TRUE,
15887c478bd9Sstevel@tonic-gate 				    "%s: could not remove interface",
15897c478bd9Sstevel@tonic-gate 				    lifrl.lifr_name);
15907c478bd9Sstevel@tonic-gate 				ret_code = -1;
15917c478bd9Sstevel@tonic-gate 				continue;
15927c478bd9Sstevel@tonic-gate 			}
15937c478bd9Sstevel@tonic-gate 		}
15947c478bd9Sstevel@tonic-gate 	}
15957c478bd9Sstevel@tonic-gate bad:
15967c478bd9Sstevel@tonic-gate 	if (s > 0)
15977c478bd9Sstevel@tonic-gate 		(void) close(s);
15987c478bd9Sstevel@tonic-gate 	if (buf)
15997c478bd9Sstevel@tonic-gate 		free(buf);
16007c478bd9Sstevel@tonic-gate 	return (ret_code);
16017c478bd9Sstevel@tonic-gate }
16027c478bd9Sstevel@tonic-gate 
16037c478bd9Sstevel@tonic-gate static union	sockunion {
16047c478bd9Sstevel@tonic-gate 	struct	sockaddr sa;
16057c478bd9Sstevel@tonic-gate 	struct	sockaddr_in sin;
16067c478bd9Sstevel@tonic-gate 	struct	sockaddr_dl sdl;
16077c478bd9Sstevel@tonic-gate 	struct	sockaddr_in6 sin6;
16087c478bd9Sstevel@tonic-gate } so_dst, so_ifp;
16097c478bd9Sstevel@tonic-gate 
16107c478bd9Sstevel@tonic-gate static struct {
16117c478bd9Sstevel@tonic-gate 	struct	rt_msghdr hdr;
16127c478bd9Sstevel@tonic-gate 	char	space[512];
16137c478bd9Sstevel@tonic-gate } rtmsg;
16147c478bd9Sstevel@tonic-gate 
16157c478bd9Sstevel@tonic-gate static int
16167c478bd9Sstevel@tonic-gate salen(struct sockaddr *sa)
16177c478bd9Sstevel@tonic-gate {
16187c478bd9Sstevel@tonic-gate 	switch (sa->sa_family) {
16197c478bd9Sstevel@tonic-gate 	case AF_INET:
16207c478bd9Sstevel@tonic-gate 		return (sizeof (struct sockaddr_in));
16217c478bd9Sstevel@tonic-gate 	case AF_LINK:
16227c478bd9Sstevel@tonic-gate 		return (sizeof (struct sockaddr_dl));
16237c478bd9Sstevel@tonic-gate 	case AF_INET6:
16247c478bd9Sstevel@tonic-gate 		return (sizeof (struct sockaddr_in6));
16257c478bd9Sstevel@tonic-gate 	default:
16267c478bd9Sstevel@tonic-gate 		return (sizeof (struct sockaddr));
16277c478bd9Sstevel@tonic-gate 	}
16287c478bd9Sstevel@tonic-gate }
16297c478bd9Sstevel@tonic-gate 
16307c478bd9Sstevel@tonic-gate #define	ROUNDUP_LONG(a) \
16317c478bd9Sstevel@tonic-gate 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
16327c478bd9Sstevel@tonic-gate 
16337c478bd9Sstevel@tonic-gate /*
16347c478bd9Sstevel@tonic-gate  * Look up which zone is using a given IP address.  The address in question
16357c478bd9Sstevel@tonic-gate  * is expected to have been stuffed into the structure to which lifr points
16367c478bd9Sstevel@tonic-gate  * via a previous SIOCGLIFADDR ioctl().
16377c478bd9Sstevel@tonic-gate  *
16387c478bd9Sstevel@tonic-gate  * This is done using black router socket magic.
16397c478bd9Sstevel@tonic-gate  *
16407c478bd9Sstevel@tonic-gate  * Return the name of the zone on success or NULL on failure.
16417c478bd9Sstevel@tonic-gate  *
16427c478bd9Sstevel@tonic-gate  * This is a lot of code for a simple task; a new ioctl request to take care
16437c478bd9Sstevel@tonic-gate  * of this might be a useful RFE.
16447c478bd9Sstevel@tonic-gate  */
16457c478bd9Sstevel@tonic-gate 
16467c478bd9Sstevel@tonic-gate static char *
16477c478bd9Sstevel@tonic-gate who_is_using(zlog_t *zlogp, struct lifreq *lifr)
16487c478bd9Sstevel@tonic-gate {
16497c478bd9Sstevel@tonic-gate 	static char answer[ZONENAME_MAX];
16507c478bd9Sstevel@tonic-gate 	pid_t pid;
16517c478bd9Sstevel@tonic-gate 	int s, rlen, l, i;
16527c478bd9Sstevel@tonic-gate 	char *cp = rtmsg.space;
16537c478bd9Sstevel@tonic-gate 	struct sockaddr_dl *ifp = NULL;
16547c478bd9Sstevel@tonic-gate 	struct sockaddr *sa;
16557c478bd9Sstevel@tonic-gate 	char save_if_name[LIFNAMSIZ];
16567c478bd9Sstevel@tonic-gate 
16577c478bd9Sstevel@tonic-gate 	answer[0] = '\0';
16587c478bd9Sstevel@tonic-gate 
16597c478bd9Sstevel@tonic-gate 	pid = getpid();
16607c478bd9Sstevel@tonic-gate 	if ((s = socket(PF_ROUTE, SOCK_RAW, 0)) < 0) {
16617c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get routing socket");
16627c478bd9Sstevel@tonic-gate 		return (NULL);
16637c478bd9Sstevel@tonic-gate 	}
16647c478bd9Sstevel@tonic-gate 
16657c478bd9Sstevel@tonic-gate 	if (lifr->lifr_addr.ss_family == AF_INET) {
16667c478bd9Sstevel@tonic-gate 		struct sockaddr_in *sin4;
16677c478bd9Sstevel@tonic-gate 
16687c478bd9Sstevel@tonic-gate 		so_dst.sa.sa_family = AF_INET;
16697c478bd9Sstevel@tonic-gate 		sin4 = (struct sockaddr_in *)&lifr->lifr_addr;
16707c478bd9Sstevel@tonic-gate 		so_dst.sin.sin_addr = sin4->sin_addr;
16717c478bd9Sstevel@tonic-gate 	} else {
16727c478bd9Sstevel@tonic-gate 		struct sockaddr_in6 *sin6;
16737c478bd9Sstevel@tonic-gate 
16747c478bd9Sstevel@tonic-gate 		so_dst.sa.sa_family = AF_INET6;
16757c478bd9Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr;
16767c478bd9Sstevel@tonic-gate 		so_dst.sin6.sin6_addr = sin6->sin6_addr;
16777c478bd9Sstevel@tonic-gate 	}
16787c478bd9Sstevel@tonic-gate 
16797c478bd9Sstevel@tonic-gate 	so_ifp.sa.sa_family = AF_LINK;
16807c478bd9Sstevel@tonic-gate 
16817c478bd9Sstevel@tonic-gate 	(void) memset(&rtmsg, 0, sizeof (rtmsg));
16827c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_type = RTM_GET;
16837c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_flags = RTF_UP | RTF_HOST;
16847c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_version = RTM_VERSION;
16857c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_seq = ++rts_seqno;
16867c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_addrs = RTA_IFP | RTA_DST;
16877c478bd9Sstevel@tonic-gate 
16887c478bd9Sstevel@tonic-gate 	l = ROUNDUP_LONG(salen(&so_dst.sa));
16897c478bd9Sstevel@tonic-gate 	(void) memmove(cp, &(so_dst), l);
16907c478bd9Sstevel@tonic-gate 	cp += l;
16917c478bd9Sstevel@tonic-gate 	l = ROUNDUP_LONG(salen(&so_ifp.sa));
16927c478bd9Sstevel@tonic-gate 	(void) memmove(cp, &(so_ifp), l);
16937c478bd9Sstevel@tonic-gate 	cp += l;
16947c478bd9Sstevel@tonic-gate 
16957c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_msglen = l = cp - (char *)&rtmsg;
16967c478bd9Sstevel@tonic-gate 
16977c478bd9Sstevel@tonic-gate 	if ((rlen = write(s, &rtmsg, l)) < 0) {
16987c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "writing to routing socket");
16997c478bd9Sstevel@tonic-gate 		return (NULL);
17007c478bd9Sstevel@tonic-gate 	} else if (rlen < (int)rtmsg.hdr.rtm_msglen) {
17017c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
17027c478bd9Sstevel@tonic-gate 		    "write to routing socket got only %d for len\n", rlen);
17037c478bd9Sstevel@tonic-gate 		return (NULL);
17047c478bd9Sstevel@tonic-gate 	}
17057c478bd9Sstevel@tonic-gate 	do {
17067c478bd9Sstevel@tonic-gate 		l = read(s, &rtmsg, sizeof (rtmsg));
17077c478bd9Sstevel@tonic-gate 	} while (l > 0 && (rtmsg.hdr.rtm_seq != rts_seqno ||
17087c478bd9Sstevel@tonic-gate 	    rtmsg.hdr.rtm_pid != pid));
17097c478bd9Sstevel@tonic-gate 	if (l < 0) {
17107c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "reading from routing socket");
17117c478bd9Sstevel@tonic-gate 		return (NULL);
17127c478bd9Sstevel@tonic-gate 	}
17137c478bd9Sstevel@tonic-gate 
17147c478bd9Sstevel@tonic-gate 	if (rtmsg.hdr.rtm_version != RTM_VERSION) {
17157c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
17167c478bd9Sstevel@tonic-gate 		    "routing message version %d not understood",
17177c478bd9Sstevel@tonic-gate 		    rtmsg.hdr.rtm_version);
17187c478bd9Sstevel@tonic-gate 		return (NULL);
17197c478bd9Sstevel@tonic-gate 	}
17207c478bd9Sstevel@tonic-gate 	if (rtmsg.hdr.rtm_msglen != (ushort_t)l) {
17217c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "message length mismatch, "
17227c478bd9Sstevel@tonic-gate 		    "expected %d bytes, returned %d bytes",
17237c478bd9Sstevel@tonic-gate 		    rtmsg.hdr.rtm_msglen, l);
17247c478bd9Sstevel@tonic-gate 		return (NULL);
17257c478bd9Sstevel@tonic-gate 	}
17267c478bd9Sstevel@tonic-gate 	if (rtmsg.hdr.rtm_errno != 0)  {
17277c478bd9Sstevel@tonic-gate 		errno = rtmsg.hdr.rtm_errno;
17287c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "RTM_GET routing socket message");
17297c478bd9Sstevel@tonic-gate 		return (NULL);
17307c478bd9Sstevel@tonic-gate 	}
17317c478bd9Sstevel@tonic-gate 	if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) {
17327c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "interface not found");
17337c478bd9Sstevel@tonic-gate 		return (NULL);
17347c478bd9Sstevel@tonic-gate 	}
17357c478bd9Sstevel@tonic-gate 	cp = ((char *)(&rtmsg.hdr + 1));
17367c478bd9Sstevel@tonic-gate 	for (i = 1; i != 0; i <<= 1) {
17377c478bd9Sstevel@tonic-gate 		/* LINTED E_BAD_PTR_CAST_ALIGN */
17387c478bd9Sstevel@tonic-gate 		sa = (struct sockaddr *)cp;
17397c478bd9Sstevel@tonic-gate 		if (i != RTA_IFP) {
17407c478bd9Sstevel@tonic-gate 			if ((i & rtmsg.hdr.rtm_addrs) != 0)
17417c478bd9Sstevel@tonic-gate 				cp += ROUNDUP_LONG(salen(sa));
17427c478bd9Sstevel@tonic-gate 			continue;
17437c478bd9Sstevel@tonic-gate 		}
17447c478bd9Sstevel@tonic-gate 		if (sa->sa_family == AF_LINK &&
17457c478bd9Sstevel@tonic-gate 		    ((struct sockaddr_dl *)sa)->sdl_nlen != 0)
17467c478bd9Sstevel@tonic-gate 			ifp = (struct sockaddr_dl *)sa;
17477c478bd9Sstevel@tonic-gate 		break;
17487c478bd9Sstevel@tonic-gate 	}
17497c478bd9Sstevel@tonic-gate 	if (ifp == NULL) {
17507c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "interface could not be determined");
17517c478bd9Sstevel@tonic-gate 		return (NULL);
17527c478bd9Sstevel@tonic-gate 	}
17537c478bd9Sstevel@tonic-gate 
17547c478bd9Sstevel@tonic-gate 	/*
17557c478bd9Sstevel@tonic-gate 	 * We need to set the I/F name to what we got above, then do the
17567c478bd9Sstevel@tonic-gate 	 * appropriate ioctl to get its zone name.  But lifr->lifr_name is
17577c478bd9Sstevel@tonic-gate 	 * used by the calling function to do a REMOVEIF, so if we leave the
17587c478bd9Sstevel@tonic-gate 	 * "good" zone's I/F name in place, *that* I/F will be removed instead
17597c478bd9Sstevel@tonic-gate 	 * of the bad one.  So we save the old (bad) I/F name before over-
17607c478bd9Sstevel@tonic-gate 	 * writing it and doing the ioctl, then restore it after the ioctl.
17617c478bd9Sstevel@tonic-gate 	 */
17627c478bd9Sstevel@tonic-gate 	(void) strlcpy(save_if_name, lifr->lifr_name, sizeof (save_if_name));
17637c478bd9Sstevel@tonic-gate 	(void) strncpy(lifr->lifr_name, ifp->sdl_data, ifp->sdl_nlen);
17647c478bd9Sstevel@tonic-gate 	lifr->lifr_name[ifp->sdl_nlen] = '\0';
17657c478bd9Sstevel@tonic-gate 	i = ioctl(s, SIOCGLIFZONE, lifr);
17667c478bd9Sstevel@tonic-gate 	(void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name));
17677c478bd9Sstevel@tonic-gate 	if (i < 0) {
17687c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
17697c478bd9Sstevel@tonic-gate 		    "%s: could not determine the zone interface belongs to",
17707c478bd9Sstevel@tonic-gate 		    lifr->lifr_name);
17717c478bd9Sstevel@tonic-gate 		return (NULL);
17727c478bd9Sstevel@tonic-gate 	}
17737c478bd9Sstevel@tonic-gate 	if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0)
17747c478bd9Sstevel@tonic-gate 		(void) snprintf(answer, sizeof (answer), "%d",
17757c478bd9Sstevel@tonic-gate 		    lifr->lifr_zoneid);
17767c478bd9Sstevel@tonic-gate 
17777c478bd9Sstevel@tonic-gate 	if (strlen(answer) > 0)
17787c478bd9Sstevel@tonic-gate 		return (answer);
17797c478bd9Sstevel@tonic-gate 	return (NULL);
17807c478bd9Sstevel@tonic-gate }
17817c478bd9Sstevel@tonic-gate 
17827c478bd9Sstevel@tonic-gate typedef struct mcast_rtmsg_s {
17837c478bd9Sstevel@tonic-gate 	struct rt_msghdr	m_rtm;
17847c478bd9Sstevel@tonic-gate 	union {
17857c478bd9Sstevel@tonic-gate 		struct {
17867c478bd9Sstevel@tonic-gate 			struct sockaddr_in	m_dst;
17877c478bd9Sstevel@tonic-gate 			struct sockaddr_in	m_gw;
17887c478bd9Sstevel@tonic-gate 			struct sockaddr_in	m_netmask;
17897c478bd9Sstevel@tonic-gate 		} m_v4;
17907c478bd9Sstevel@tonic-gate 		struct {
17917c478bd9Sstevel@tonic-gate 			struct sockaddr_in6	m_dst;
17927c478bd9Sstevel@tonic-gate 			struct sockaddr_in6	m_gw;
17937c478bd9Sstevel@tonic-gate 			struct sockaddr_in6	m_netmask;
17947c478bd9Sstevel@tonic-gate 		} m_v6;
17957c478bd9Sstevel@tonic-gate 	} m_u;
17967c478bd9Sstevel@tonic-gate } mcast_rtmsg_t;
17977c478bd9Sstevel@tonic-gate #define	m_dst4		m_u.m_v4.m_dst
17987c478bd9Sstevel@tonic-gate #define	m_dst6		m_u.m_v6.m_dst
17997c478bd9Sstevel@tonic-gate #define	m_gw4		m_u.m_v4.m_gw
18007c478bd9Sstevel@tonic-gate #define	m_gw6		m_u.m_v6.m_gw
18017c478bd9Sstevel@tonic-gate #define	m_netmask4	m_u.m_v4.m_netmask
18027c478bd9Sstevel@tonic-gate #define	m_netmask6	m_u.m_v6.m_netmask
18037c478bd9Sstevel@tonic-gate 
18047c478bd9Sstevel@tonic-gate /*
18057c478bd9Sstevel@tonic-gate  * Configures a single interface: a new virtual interface is added, based on
18067c478bd9Sstevel@tonic-gate  * the physical interface nwiftabptr->zone_nwif_physical, with the address
18077c478bd9Sstevel@tonic-gate  * specified in nwiftabptr->zone_nwif_address, for zone zone_id.  Note that
18087c478bd9Sstevel@tonic-gate  * the "address" can be an IPv6 address (with a /prefixlength required), an
18097c478bd9Sstevel@tonic-gate  * IPv4 address (with a /prefixlength optional), or a name; for the latter,
18107c478bd9Sstevel@tonic-gate  * an IPv4 name-to-address resolution will be attempted.
18117c478bd9Sstevel@tonic-gate  *
18127c478bd9Sstevel@tonic-gate  * A default interface route for multicast is created on the first IPv4 and
18137c478bd9Sstevel@tonic-gate  * IPv6 interfaces (that have the IFF_MULTICAST flag set), respectively.
18147c478bd9Sstevel@tonic-gate  * This should really be done in the init scripts if we ever allow zones to
18157c478bd9Sstevel@tonic-gate  * modify the routing tables.
18167c478bd9Sstevel@tonic-gate  *
18177c478bd9Sstevel@tonic-gate  * If anything goes wrong, we log an detailed error message, attempt to tear
18187c478bd9Sstevel@tonic-gate  * down whatever we set up and return an error.
18197c478bd9Sstevel@tonic-gate  */
18207c478bd9Sstevel@tonic-gate static int
18217c478bd9Sstevel@tonic-gate configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
18227c478bd9Sstevel@tonic-gate     struct zone_nwiftab *nwiftabptr, boolean_t *mcast_rt_v4_setp,
18237c478bd9Sstevel@tonic-gate     boolean_t *mcast_rt_v6_setp)
18247c478bd9Sstevel@tonic-gate {
18257c478bd9Sstevel@tonic-gate 	struct lifreq lifr;
18267c478bd9Sstevel@tonic-gate 	struct sockaddr_in netmask4;
18277c478bd9Sstevel@tonic-gate 	struct sockaddr_in6 netmask6;
18287c478bd9Sstevel@tonic-gate 	struct in_addr in4;
18297c478bd9Sstevel@tonic-gate 	struct in6_addr in6;
18307c478bd9Sstevel@tonic-gate 	sa_family_t af;
18317c478bd9Sstevel@tonic-gate 	char *slashp = strchr(nwiftabptr->zone_nwif_address, '/');
18327c478bd9Sstevel@tonic-gate 	mcast_rtmsg_t mcast_rtmsg;
18337c478bd9Sstevel@tonic-gate 	int s;
18347c478bd9Sstevel@tonic-gate 	int rs;
18357c478bd9Sstevel@tonic-gate 	int rlen;
18367c478bd9Sstevel@tonic-gate 	boolean_t got_netmask = B_FALSE;
18377c478bd9Sstevel@tonic-gate 	char addrstr4[INET_ADDRSTRLEN];
18387c478bd9Sstevel@tonic-gate 	int res;
18397c478bd9Sstevel@tonic-gate 
18407c478bd9Sstevel@tonic-gate 	res = zonecfg_valid_net_address(nwiftabptr->zone_nwif_address, &lifr);
18417c478bd9Sstevel@tonic-gate 	if (res != Z_OK) {
18427c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s: %s", zonecfg_strerror(res),
18437c478bd9Sstevel@tonic-gate 		    nwiftabptr->zone_nwif_address);
18447c478bd9Sstevel@tonic-gate 		return (-1);
18457c478bd9Sstevel@tonic-gate 	}
18467c478bd9Sstevel@tonic-gate 	af = lifr.lifr_addr.ss_family;
18477c478bd9Sstevel@tonic-gate 	if (af == AF_INET)
18487c478bd9Sstevel@tonic-gate 		in4 = ((struct sockaddr_in *)(&lifr.lifr_addr))->sin_addr;
18497c478bd9Sstevel@tonic-gate 	else
18507c478bd9Sstevel@tonic-gate 		in6 = ((struct sockaddr_in6 *)(&lifr.lifr_addr))->sin6_addr;
18517c478bd9Sstevel@tonic-gate 
18527c478bd9Sstevel@tonic-gate 	if ((s = socket(af, SOCK_DGRAM, 0)) < 0) {
18537c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get socket");
18547c478bd9Sstevel@tonic-gate 		return (-1);
18557c478bd9Sstevel@tonic-gate 	}
18567c478bd9Sstevel@tonic-gate 
18577c478bd9Sstevel@tonic-gate 	(void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical,
18587c478bd9Sstevel@tonic-gate 	    sizeof (lifr.lifr_name));
18597c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) {
186022321485Svp157776 		/*
186122321485Svp157776 		 * Here, we know that the interface can't be brought up.
186222321485Svp157776 		 * A similar warning message was already printed out to
186322321485Svp157776 		 * the console by zoneadm(1M) so instead we log the
186422321485Svp157776 		 * message to syslog and continue.
186522321485Svp157776 		 */
186622321485Svp157776 		zerror(&logsys, B_TRUE, "WARNING: skipping interface "
186722321485Svp157776 		    "'%s' which may not be present/plumbed in the "
186822321485Svp157776 		    "global zone.", lifr.lifr_name);
18697c478bd9Sstevel@tonic-gate 		(void) close(s);
187022321485Svp157776 		return (Z_OK);
18717c478bd9Sstevel@tonic-gate 	}
18727c478bd9Sstevel@tonic-gate 
18737c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) {
18747c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
18757c478bd9Sstevel@tonic-gate 		    "%s: could not set IP address to %s",
18767c478bd9Sstevel@tonic-gate 		    lifr.lifr_name, nwiftabptr->zone_nwif_address);
18777c478bd9Sstevel@tonic-gate 		goto bad;
18787c478bd9Sstevel@tonic-gate 	}
18797c478bd9Sstevel@tonic-gate 
18807c478bd9Sstevel@tonic-gate 	/* Preserve literal IPv4 address for later potential printing. */
18817c478bd9Sstevel@tonic-gate 	if (af == AF_INET)
18827c478bd9Sstevel@tonic-gate 		(void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN);
18837c478bd9Sstevel@tonic-gate 
18847c478bd9Sstevel@tonic-gate 	lifr.lifr_zoneid = zone_id;
18857c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) {
18867c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s: could not place interface into zone",
18877c478bd9Sstevel@tonic-gate 		    lifr.lifr_name);
18887c478bd9Sstevel@tonic-gate 		goto bad;
18897c478bd9Sstevel@tonic-gate 	}
18907c478bd9Sstevel@tonic-gate 
18917c478bd9Sstevel@tonic-gate 	if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) {
18927c478bd9Sstevel@tonic-gate 		got_netmask = B_TRUE;	/* default setting will be correct */
18937c478bd9Sstevel@tonic-gate 	} else {
18947c478bd9Sstevel@tonic-gate 		if (af == AF_INET) {
18957c478bd9Sstevel@tonic-gate 			/*
18967c478bd9Sstevel@tonic-gate 			 * The IPv4 netmask can be determined either
18977c478bd9Sstevel@tonic-gate 			 * directly if a prefix length was supplied with
18987c478bd9Sstevel@tonic-gate 			 * the address or via the netmasks database.  Not
18997c478bd9Sstevel@tonic-gate 			 * being able to determine it is a common failure,
19007c478bd9Sstevel@tonic-gate 			 * but it often is not fatal to operation of the
19017c478bd9Sstevel@tonic-gate 			 * interface.  In that case, a warning will be
19027c478bd9Sstevel@tonic-gate 			 * printed after the rest of the interface's
19037c478bd9Sstevel@tonic-gate 			 * parameters have been configured.
19047c478bd9Sstevel@tonic-gate 			 */
19057c478bd9Sstevel@tonic-gate 			(void) memset(&netmask4, 0, sizeof (netmask4));
19067c478bd9Sstevel@tonic-gate 			if (slashp != NULL) {
19077c478bd9Sstevel@tonic-gate 				if (addr2netmask(slashp + 1, V4_ADDR_LEN,
19087c478bd9Sstevel@tonic-gate 				    (uchar_t *)&netmask4.sin_addr) != 0) {
19097c478bd9Sstevel@tonic-gate 					*slashp = '/';
19107c478bd9Sstevel@tonic-gate 					zerror(zlogp, B_FALSE,
19117c478bd9Sstevel@tonic-gate 					    "%s: invalid prefix length in %s",
19127c478bd9Sstevel@tonic-gate 					    lifr.lifr_name,
19137c478bd9Sstevel@tonic-gate 					    nwiftabptr->zone_nwif_address);
19147c478bd9Sstevel@tonic-gate 					goto bad;
19157c478bd9Sstevel@tonic-gate 				}
19167c478bd9Sstevel@tonic-gate 				got_netmask = B_TRUE;
19177c478bd9Sstevel@tonic-gate 			} else if (getnetmaskbyaddr(in4,
19187c478bd9Sstevel@tonic-gate 			    &netmask4.sin_addr) == 0) {
19197c478bd9Sstevel@tonic-gate 				got_netmask = B_TRUE;
19207c478bd9Sstevel@tonic-gate 			}
19217c478bd9Sstevel@tonic-gate 			if (got_netmask) {
19227c478bd9Sstevel@tonic-gate 				netmask4.sin_family = af;
19237c478bd9Sstevel@tonic-gate 				(void) memcpy(&lifr.lifr_addr, &netmask4,
19247c478bd9Sstevel@tonic-gate 				    sizeof (netmask4));
19257c478bd9Sstevel@tonic-gate 			}
19267c478bd9Sstevel@tonic-gate 		} else {
19277c478bd9Sstevel@tonic-gate 			(void) memset(&netmask6, 0, sizeof (netmask6));
19287c478bd9Sstevel@tonic-gate 			if (addr2netmask(slashp + 1, V6_ADDR_LEN,
19297c478bd9Sstevel@tonic-gate 			    (uchar_t *)&netmask6.sin6_addr) != 0) {
19307c478bd9Sstevel@tonic-gate 				*slashp = '/';
19317c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
19327c478bd9Sstevel@tonic-gate 				    "%s: invalid prefix length in %s",
19337c478bd9Sstevel@tonic-gate 				    lifr.lifr_name,
19347c478bd9Sstevel@tonic-gate 				    nwiftabptr->zone_nwif_address);
19357c478bd9Sstevel@tonic-gate 				goto bad;
19367c478bd9Sstevel@tonic-gate 			}
19377c478bd9Sstevel@tonic-gate 			got_netmask = B_TRUE;
19387c478bd9Sstevel@tonic-gate 			netmask6.sin6_family = af;
19397c478bd9Sstevel@tonic-gate 			(void) memcpy(&lifr.lifr_addr, &netmask6,
19407c478bd9Sstevel@tonic-gate 			    sizeof (netmask6));
19417c478bd9Sstevel@tonic-gate 		}
19427c478bd9Sstevel@tonic-gate 		if (got_netmask &&
19437c478bd9Sstevel@tonic-gate 		    ioctl(s, SIOCSLIFNETMASK, (caddr_t)&lifr) < 0) {
19447c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not set netmask",
19457c478bd9Sstevel@tonic-gate 			    lifr.lifr_name);
19467c478bd9Sstevel@tonic-gate 			goto bad;
19477c478bd9Sstevel@tonic-gate 		}
19487c478bd9Sstevel@tonic-gate 
19497c478bd9Sstevel@tonic-gate 		/*
19507c478bd9Sstevel@tonic-gate 		 * This doesn't set the broadcast address at all. Rather, it
19517c478bd9Sstevel@tonic-gate 		 * gets, then sets the interface's address, relying on the fact
19527c478bd9Sstevel@tonic-gate 		 * that resetting the address will reset the broadcast address.
19537c478bd9Sstevel@tonic-gate 		 */
19547c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) {
19557c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not get address",
19567c478bd9Sstevel@tonic-gate 			    lifr.lifr_name);
19577c478bd9Sstevel@tonic-gate 			goto bad;
19587c478bd9Sstevel@tonic-gate 		}
19597c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) {
19607c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
19617c478bd9Sstevel@tonic-gate 			    "%s: could not reset broadcast address",
19627c478bd9Sstevel@tonic-gate 			    lifr.lifr_name);
19637c478bd9Sstevel@tonic-gate 			goto bad;
19647c478bd9Sstevel@tonic-gate 		}
19657c478bd9Sstevel@tonic-gate 	}
19667c478bd9Sstevel@tonic-gate 
19677c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
19687c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s: could not get flags",
19697c478bd9Sstevel@tonic-gate 		    lifr.lifr_name);
19707c478bd9Sstevel@tonic-gate 		goto bad;
19717c478bd9Sstevel@tonic-gate 	}
19727c478bd9Sstevel@tonic-gate 	lifr.lifr_flags |= IFF_UP;
19737c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
19747c478bd9Sstevel@tonic-gate 		int save_errno = errno;
19757c478bd9Sstevel@tonic-gate 		char *zone_using;
19767c478bd9Sstevel@tonic-gate 
19777c478bd9Sstevel@tonic-gate 		/*
19787c478bd9Sstevel@tonic-gate 		 * If we failed with something other than EADDRNOTAVAIL,
19797c478bd9Sstevel@tonic-gate 		 * then skip to the end.  Otherwise, look up our address,
19807c478bd9Sstevel@tonic-gate 		 * then call a function to determine which zone is already
19817c478bd9Sstevel@tonic-gate 		 * using that address.
19827c478bd9Sstevel@tonic-gate 		 */
19837c478bd9Sstevel@tonic-gate 		if (errno != EADDRNOTAVAIL) {
19847c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
19857c478bd9Sstevel@tonic-gate 			    "%s: could not bring interface up", lifr.lifr_name);
19867c478bd9Sstevel@tonic-gate 			goto bad;
19877c478bd9Sstevel@tonic-gate 		}
19887c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) {
19897c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not get address",
19907c478bd9Sstevel@tonic-gate 			    lifr.lifr_name);
19917c478bd9Sstevel@tonic-gate 			goto bad;
19927c478bd9Sstevel@tonic-gate 		}
19937c478bd9Sstevel@tonic-gate 		zone_using = who_is_using(zlogp, &lifr);
19947c478bd9Sstevel@tonic-gate 		errno = save_errno;
19957c478bd9Sstevel@tonic-gate 		if (zone_using == NULL)
19967c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
19977c478bd9Sstevel@tonic-gate 			    "%s: could not bring interface up", lifr.lifr_name);
19987c478bd9Sstevel@tonic-gate 		else
19997c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not bring interface "
20007c478bd9Sstevel@tonic-gate 			    "up: address in use by zone '%s'", lifr.lifr_name,
20017c478bd9Sstevel@tonic-gate 			    zone_using);
20027c478bd9Sstevel@tonic-gate 		goto bad;
20037c478bd9Sstevel@tonic-gate 	}
20047c478bd9Sstevel@tonic-gate 	if ((lifr.lifr_flags & IFF_MULTICAST) && ((af == AF_INET &&
20057c478bd9Sstevel@tonic-gate 	    mcast_rt_v4_setp != NULL && *mcast_rt_v4_setp == B_FALSE) ||
20067c478bd9Sstevel@tonic-gate 	    (af == AF_INET6 &&
20077c478bd9Sstevel@tonic-gate 	    mcast_rt_v6_setp != NULL && *mcast_rt_v6_setp == B_FALSE))) {
20087c478bd9Sstevel@tonic-gate 		rs = socket(PF_ROUTE, SOCK_RAW, 0);
20097c478bd9Sstevel@tonic-gate 		if (rs < 0) {
20107c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not create "
20117c478bd9Sstevel@tonic-gate 			    "routing socket", lifr.lifr_name);
20127c478bd9Sstevel@tonic-gate 			goto bad;
20137c478bd9Sstevel@tonic-gate 		}
20147c478bd9Sstevel@tonic-gate 		(void) shutdown(rs, 0);
20157c478bd9Sstevel@tonic-gate 		(void) memset((void *)&mcast_rtmsg, 0, sizeof (mcast_rtmsg_t));
20167c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_msglen =  sizeof (struct rt_msghdr) +
20177c478bd9Sstevel@tonic-gate 		    3 * (af == AF_INET ? sizeof (struct sockaddr_in) :
20187c478bd9Sstevel@tonic-gate 		    sizeof (struct sockaddr_in6));
20197c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_version = RTM_VERSION;
20207c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_type = RTM_ADD;
20217c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_flags = RTF_UP;
20227c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_addrs =
20237c478bd9Sstevel@tonic-gate 		    RTA_DST | RTA_GATEWAY | RTA_NETMASK;
20247c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_seq = ++rts_seqno;
20257c478bd9Sstevel@tonic-gate 		if (af == AF_INET) {
20267c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_dst4.sin_family = AF_INET;
20277c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_dst4.sin_addr.s_addr =
20287c478bd9Sstevel@tonic-gate 			    htonl(INADDR_UNSPEC_GROUP);
20297c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_gw4.sin_family = AF_INET;
20307c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_gw4.sin_addr = in4;
20317c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_netmask4.sin_family = AF_INET;
20327c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_netmask4.sin_addr.s_addr =
20337c478bd9Sstevel@tonic-gate 			    htonl(IN_CLASSD_NET);
20347c478bd9Sstevel@tonic-gate 		} else {
20357c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_dst6.sin6_family = AF_INET6;
20367c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_dst6.sin6_addr.s6_addr[0] = 0xffU;
20377c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_gw6.sin6_family = AF_INET6;
20387c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_gw6.sin6_addr = in6;
20397c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_netmask6.sin6_family = AF_INET6;
20407c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_netmask6.sin6_addr.s6_addr[0] = 0xffU;
20417c478bd9Sstevel@tonic-gate 		}
20427c478bd9Sstevel@tonic-gate 		rlen = write(rs, (char *)&mcast_rtmsg,
20437c478bd9Sstevel@tonic-gate 		    mcast_rtmsg.m_rtm.rtm_msglen);
204422321485Svp157776 		/*
204522321485Svp157776 		 * The write to the multicast socket will fail if the
204622321485Svp157776 		 * interface belongs to a failed IPMP group. This is a
204722321485Svp157776 		 * non-fatal error and the zone will continue booting.
204822321485Svp157776 		 * While the zone is running, if any interface in the
204922321485Svp157776 		 * failed IPMP group recovers, the zone will fallback to
205022321485Svp157776 		 * using that interface.
205122321485Svp157776 		 */
20527c478bd9Sstevel@tonic-gate 		if (rlen < mcast_rtmsg.m_rtm.rtm_msglen) {
20537c478bd9Sstevel@tonic-gate 			if (rlen < 0) {
205422321485Svp157776 				zerror(zlogp, B_TRUE, "WARNING: interface "
205522321485Svp157776 				    "'%s' not available as default for "
205622321485Svp157776 				    "multicast.", lifr.lifr_name);
20577c478bd9Sstevel@tonic-gate 			} else {
205822321485Svp157776 				zerror(zlogp, B_FALSE, "WARNING: interface "
205922321485Svp157776 				    "'%s' not available as default for "
206022321485Svp157776 				    "multicast; routing socket returned "
206122321485Svp157776 				    "unexpected %d bytes.",
206222321485Svp157776 				    lifr.lifr_name, rlen);
20637c478bd9Sstevel@tonic-gate 			}
206422321485Svp157776 		} else {
206522321485Svp157776 
20667c478bd9Sstevel@tonic-gate 			if (af == AF_INET) {
20677c478bd9Sstevel@tonic-gate 				*mcast_rt_v4_setp = B_TRUE;
20687c478bd9Sstevel@tonic-gate 			} else {
20697c478bd9Sstevel@tonic-gate 				*mcast_rt_v6_setp = B_TRUE;
20707c478bd9Sstevel@tonic-gate 			}
207122321485Svp157776 		}
20727c478bd9Sstevel@tonic-gate 		(void) close(rs);
20737c478bd9Sstevel@tonic-gate 	}
20747c478bd9Sstevel@tonic-gate 
20757c478bd9Sstevel@tonic-gate 	if (!got_netmask) {
20767c478bd9Sstevel@tonic-gate 		/*
20777c478bd9Sstevel@tonic-gate 		 * A common, but often non-fatal problem, is that the system
20787c478bd9Sstevel@tonic-gate 		 * cannot find the netmask for an interface address. This is
20797c478bd9Sstevel@tonic-gate 		 * often caused by it being only in /etc/inet/netmasks, but
20807c478bd9Sstevel@tonic-gate 		 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not
20817c478bd9Sstevel@tonic-gate 		 * in that. This doesn't show up at boot because the netmask
20827c478bd9Sstevel@tonic-gate 		 * is obtained from /etc/inet/netmasks when no network
20837c478bd9Sstevel@tonic-gate 		 * interfaces are up, but isn't consulted when NIS/NIS+ is
20847c478bd9Sstevel@tonic-gate 		 * available. We warn the user here that something like this
20857c478bd9Sstevel@tonic-gate 		 * has happened and we're just running with a default and
20867c478bd9Sstevel@tonic-gate 		 * possible incorrect netmask.
20877c478bd9Sstevel@tonic-gate 		 */
20887c478bd9Sstevel@tonic-gate 		char buffer[INET6_ADDRSTRLEN];
20897c478bd9Sstevel@tonic-gate 		void  *addr;
20907c478bd9Sstevel@tonic-gate 
20917c478bd9Sstevel@tonic-gate 		if (af == AF_INET)
20927c478bd9Sstevel@tonic-gate 			addr = &((struct sockaddr_in *)
20937c478bd9Sstevel@tonic-gate 			    (&lifr.lifr_addr))->sin_addr;
20947c478bd9Sstevel@tonic-gate 		else
20957c478bd9Sstevel@tonic-gate 			addr = &((struct sockaddr_in6 *)
20967c478bd9Sstevel@tonic-gate 			    (&lifr.lifr_addr))->sin6_addr;
20977c478bd9Sstevel@tonic-gate 
20987c478bd9Sstevel@tonic-gate 		/* Find out what netmask interface is going to be using */
20997c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 ||
21007c478bd9Sstevel@tonic-gate 		    inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL)
21017c478bd9Sstevel@tonic-gate 			goto bad;
21027c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
21037c478bd9Sstevel@tonic-gate 		    "WARNING: %s: no matching subnet found in netmasks(4) for "
21047c478bd9Sstevel@tonic-gate 		    "%s; using default of %s.",
21057c478bd9Sstevel@tonic-gate 		    lifr.lifr_name, addrstr4, buffer);
21067c478bd9Sstevel@tonic-gate 	}
21077c478bd9Sstevel@tonic-gate 
21087c478bd9Sstevel@tonic-gate 	(void) close(s);
21097c478bd9Sstevel@tonic-gate 	return (Z_OK);
21107c478bd9Sstevel@tonic-gate bad:
21117c478bd9Sstevel@tonic-gate 	(void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr);
21127c478bd9Sstevel@tonic-gate 	(void) close(s);
21137c478bd9Sstevel@tonic-gate 	return (-1);
21147c478bd9Sstevel@tonic-gate }
21157c478bd9Sstevel@tonic-gate 
21167c478bd9Sstevel@tonic-gate /*
21177c478bd9Sstevel@tonic-gate  * Sets up network interfaces based on information from the zone configuration.
21187c478bd9Sstevel@tonic-gate  * An IPv4 loopback interface is set up "for free", modeling the global system.
21197c478bd9Sstevel@tonic-gate  * If any of the configuration interfaces were IPv6, then an IPv6 loopback
21207c478bd9Sstevel@tonic-gate  * address is set up as well.
21217c478bd9Sstevel@tonic-gate  *
21227c478bd9Sstevel@tonic-gate  * If anything goes wrong, we log a general error message, attempt to tear down
21237c478bd9Sstevel@tonic-gate  * whatever we set up, and return an error.
21247c478bd9Sstevel@tonic-gate  */
21257c478bd9Sstevel@tonic-gate static int
21267c478bd9Sstevel@tonic-gate configure_network_interfaces(zlog_t *zlogp)
21277c478bd9Sstevel@tonic-gate {
21287c478bd9Sstevel@tonic-gate 	zone_dochandle_t handle;
21297c478bd9Sstevel@tonic-gate 	struct zone_nwiftab nwiftab, loopback_iftab;
21307c478bd9Sstevel@tonic-gate 	boolean_t saw_v6 = B_FALSE;
21317c478bd9Sstevel@tonic-gate 	boolean_t mcast_rt_v4_set = B_FALSE;
21327c478bd9Sstevel@tonic-gate 	boolean_t mcast_rt_v6_set = B_FALSE;
21337c478bd9Sstevel@tonic-gate 	zoneid_t zoneid;
21347c478bd9Sstevel@tonic-gate 
21357c478bd9Sstevel@tonic-gate 	if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) {
21367c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to get zoneid");
21377c478bd9Sstevel@tonic-gate 		return (-1);
21387c478bd9Sstevel@tonic-gate 	}
21397c478bd9Sstevel@tonic-gate 
21407c478bd9Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
21417c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
21427c478bd9Sstevel@tonic-gate 		return (-1);
21437c478bd9Sstevel@tonic-gate 	}
21447c478bd9Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
21457c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
21467c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
21477c478bd9Sstevel@tonic-gate 		return (-1);
21487c478bd9Sstevel@tonic-gate 	}
21497c478bd9Sstevel@tonic-gate 	if (zonecfg_setnwifent(handle) == Z_OK) {
21507c478bd9Sstevel@tonic-gate 		for (;;) {
21517c478bd9Sstevel@tonic-gate 			struct in6_addr in6;
21527c478bd9Sstevel@tonic-gate 
21537c478bd9Sstevel@tonic-gate 			if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
21547c478bd9Sstevel@tonic-gate 				break;
21557c478bd9Sstevel@tonic-gate 			if (configure_one_interface(zlogp, zoneid,
21567c478bd9Sstevel@tonic-gate 			    &nwiftab, &mcast_rt_v4_set, &mcast_rt_v6_set) !=
21577c478bd9Sstevel@tonic-gate 			    Z_OK) {
21587c478bd9Sstevel@tonic-gate 				(void) zonecfg_endnwifent(handle);
21597c478bd9Sstevel@tonic-gate 				zonecfg_fini_handle(handle);
21607c478bd9Sstevel@tonic-gate 				return (-1);
21617c478bd9Sstevel@tonic-gate 			}
21627c478bd9Sstevel@tonic-gate 			if (inet_pton(AF_INET6, nwiftab.zone_nwif_address,
21637c478bd9Sstevel@tonic-gate 			    &in6) == 1)
21647c478bd9Sstevel@tonic-gate 				saw_v6 = B_TRUE;
21657c478bd9Sstevel@tonic-gate 		}
21667c478bd9Sstevel@tonic-gate 		(void) zonecfg_endnwifent(handle);
21677c478bd9Sstevel@tonic-gate 	}
21687c478bd9Sstevel@tonic-gate 	zonecfg_fini_handle(handle);
21697c478bd9Sstevel@tonic-gate 	(void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0",
21707c478bd9Sstevel@tonic-gate 	    sizeof (loopback_iftab.zone_nwif_physical));
21717c478bd9Sstevel@tonic-gate 	(void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1",
21727c478bd9Sstevel@tonic-gate 	    sizeof (loopback_iftab.zone_nwif_address));
21737c478bd9Sstevel@tonic-gate 	if (configure_one_interface(zlogp, zoneid, &loopback_iftab, NULL, NULL)
21747c478bd9Sstevel@tonic-gate 	    != Z_OK) {
21757c478bd9Sstevel@tonic-gate 		return (-1);
21767c478bd9Sstevel@tonic-gate 	}
21777c478bd9Sstevel@tonic-gate 	if (saw_v6) {
21787c478bd9Sstevel@tonic-gate 		(void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128",
21797c478bd9Sstevel@tonic-gate 		    sizeof (loopback_iftab.zone_nwif_address));
21807c478bd9Sstevel@tonic-gate 		if (configure_one_interface(zlogp, zoneid,
21817c478bd9Sstevel@tonic-gate 		    &loopback_iftab, NULL, NULL) != Z_OK) {
21827c478bd9Sstevel@tonic-gate 			return (-1);
21837c478bd9Sstevel@tonic-gate 		}
21847c478bd9Sstevel@tonic-gate 	}
21857c478bd9Sstevel@tonic-gate 	return (0);
21867c478bd9Sstevel@tonic-gate }
21877c478bd9Sstevel@tonic-gate 
21887c478bd9Sstevel@tonic-gate static int
21897c478bd9Sstevel@tonic-gate tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid,
21907c478bd9Sstevel@tonic-gate     const struct sockaddr_storage *local, const struct sockaddr_storage *remote)
21917c478bd9Sstevel@tonic-gate {
21927c478bd9Sstevel@tonic-gate 	int fd;
21937c478bd9Sstevel@tonic-gate 	struct strioctl ioc;
21947c478bd9Sstevel@tonic-gate 	tcp_ioc_abort_conn_t conn;
21957c478bd9Sstevel@tonic-gate 	int error;
21967c478bd9Sstevel@tonic-gate 
21977c478bd9Sstevel@tonic-gate 	conn.ac_local = *local;
21987c478bd9Sstevel@tonic-gate 	conn.ac_remote = *remote;
21997c478bd9Sstevel@tonic-gate 	conn.ac_start = TCPS_SYN_SENT;
22007c478bd9Sstevel@tonic-gate 	conn.ac_end = TCPS_TIME_WAIT;
22017c478bd9Sstevel@tonic-gate 	conn.ac_zoneid = zoneid;
22027c478bd9Sstevel@tonic-gate 
22037c478bd9Sstevel@tonic-gate 	ioc.ic_cmd = TCP_IOC_ABORT_CONN;
22047c478bd9Sstevel@tonic-gate 	ioc.ic_timout = -1; /* infinite timeout */
22057c478bd9Sstevel@tonic-gate 	ioc.ic_len = sizeof (conn);
22067c478bd9Sstevel@tonic-gate 	ioc.ic_dp = (char *)&conn;
22077c478bd9Sstevel@tonic-gate 
22087c478bd9Sstevel@tonic-gate 	if ((fd = open("/dev/tcp", O_RDONLY)) < 0) {
22097c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to open %s", "/dev/tcp");
22107c478bd9Sstevel@tonic-gate 		return (-1);
22117c478bd9Sstevel@tonic-gate 	}
22127c478bd9Sstevel@tonic-gate 
22137c478bd9Sstevel@tonic-gate 	error = ioctl(fd, I_STR, &ioc);
22147c478bd9Sstevel@tonic-gate 	(void) close(fd);
22157c478bd9Sstevel@tonic-gate 	if (error == 0 || errno == ENOENT)	/* ENOENT is not an error */
22167c478bd9Sstevel@tonic-gate 		return (0);
22177c478bd9Sstevel@tonic-gate 	return (-1);
22187c478bd9Sstevel@tonic-gate }
22197c478bd9Sstevel@tonic-gate 
22207c478bd9Sstevel@tonic-gate static int
22217c478bd9Sstevel@tonic-gate tcp_abort_connections(zlog_t *zlogp, zoneid_t zoneid)
22227c478bd9Sstevel@tonic-gate {
22237c478bd9Sstevel@tonic-gate 	struct sockaddr_storage l, r;
22247c478bd9Sstevel@tonic-gate 	struct sockaddr_in *local, *remote;
22257c478bd9Sstevel@tonic-gate 	struct sockaddr_in6 *local6, *remote6;
22267c478bd9Sstevel@tonic-gate 	int error;
22277c478bd9Sstevel@tonic-gate 
22287c478bd9Sstevel@tonic-gate 	/*
22297c478bd9Sstevel@tonic-gate 	 * Abort IPv4 connections.
22307c478bd9Sstevel@tonic-gate 	 */
22317c478bd9Sstevel@tonic-gate 	bzero(&l, sizeof (*local));
22327c478bd9Sstevel@tonic-gate 	local = (struct sockaddr_in *)&l;
22337c478bd9Sstevel@tonic-gate 	local->sin_family = AF_INET;
22347c478bd9Sstevel@tonic-gate 	local->sin_addr.s_addr = INADDR_ANY;
22357c478bd9Sstevel@tonic-gate 	local->sin_port = 0;
22367c478bd9Sstevel@tonic-gate 
22377c478bd9Sstevel@tonic-gate 	bzero(&r, sizeof (*remote));
22387c478bd9Sstevel@tonic-gate 	remote = (struct sockaddr_in *)&r;
22397c478bd9Sstevel@tonic-gate 	remote->sin_family = AF_INET;
22407c478bd9Sstevel@tonic-gate 	remote->sin_addr.s_addr = INADDR_ANY;
22417c478bd9Sstevel@tonic-gate 	remote->sin_port = 0;
22427c478bd9Sstevel@tonic-gate 
22437c478bd9Sstevel@tonic-gate 	if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
22447c478bd9Sstevel@tonic-gate 		return (error);
22457c478bd9Sstevel@tonic-gate 
22467c478bd9Sstevel@tonic-gate 	/*
22477c478bd9Sstevel@tonic-gate 	 * Abort IPv6 connections.
22487c478bd9Sstevel@tonic-gate 	 */
22497c478bd9Sstevel@tonic-gate 	bzero(&l, sizeof (*local6));
22507c478bd9Sstevel@tonic-gate 	local6 = (struct sockaddr_in6 *)&l;
22517c478bd9Sstevel@tonic-gate 	local6->sin6_family = AF_INET6;
22527c478bd9Sstevel@tonic-gate 	local6->sin6_port = 0;
22537c478bd9Sstevel@tonic-gate 	local6->sin6_addr = in6addr_any;
22547c478bd9Sstevel@tonic-gate 
22557c478bd9Sstevel@tonic-gate 	bzero(&r, sizeof (*remote6));
22567c478bd9Sstevel@tonic-gate 	remote6 = (struct sockaddr_in6 *)&r;
22577c478bd9Sstevel@tonic-gate 	remote6->sin6_family = AF_INET6;
22587c478bd9Sstevel@tonic-gate 	remote6->sin6_port = 0;
22597c478bd9Sstevel@tonic-gate 	remote6->sin6_addr = in6addr_any;
22607c478bd9Sstevel@tonic-gate 
22617c478bd9Sstevel@tonic-gate 	if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
22627c478bd9Sstevel@tonic-gate 		return (error);
22637c478bd9Sstevel@tonic-gate 	return (0);
22647c478bd9Sstevel@tonic-gate }
22657c478bd9Sstevel@tonic-gate 
22667c478bd9Sstevel@tonic-gate static int
2267ffbafc53Scomay get_privset(zlog_t *zlogp, priv_set_t *privs, boolean_t mount_cmd)
2268ffbafc53Scomay {
2269ffbafc53Scomay 	int error = -1;
2270ffbafc53Scomay 	zone_dochandle_t handle;
2271ffbafc53Scomay 	char *privname = NULL;
2272ffbafc53Scomay 
2273ffbafc53Scomay 	if (mount_cmd) {
2274ffbafc53Scomay 		if (zonecfg_default_privset(privs) == Z_OK)
2275ffbafc53Scomay 			return (0);
2276ffbafc53Scomay 		zerror(zlogp, B_FALSE,
2277ffbafc53Scomay 		    "failed to determine the zone's default privilege set");
2278ffbafc53Scomay 		return (-1);
2279ffbafc53Scomay 	}
2280ffbafc53Scomay 
2281ffbafc53Scomay 	if ((handle = zonecfg_init_handle()) == NULL) {
2282ffbafc53Scomay 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2283ffbafc53Scomay 		return (-1);
2284ffbafc53Scomay 	}
2285ffbafc53Scomay 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2286ffbafc53Scomay 		zerror(zlogp, B_FALSE, "invalid configuration");
2287ffbafc53Scomay 		zonecfg_fini_handle(handle);
2288ffbafc53Scomay 		return (-1);
2289ffbafc53Scomay 	}
2290ffbafc53Scomay 
2291ffbafc53Scomay 	switch (zonecfg_get_privset(handle, privs, &privname)) {
2292ffbafc53Scomay 	case Z_OK:
2293ffbafc53Scomay 		error = 0;
2294ffbafc53Scomay 		break;
2295ffbafc53Scomay 	case Z_PRIV_PROHIBITED:
2296ffbafc53Scomay 		zerror(zlogp, B_FALSE, "privilege \"%s\" is not permitted "
2297ffbafc53Scomay 		    "within the zone's privilege set", privname);
2298ffbafc53Scomay 		break;
2299ffbafc53Scomay 	case Z_PRIV_REQUIRED:
2300ffbafc53Scomay 		zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing "
2301ffbafc53Scomay 		    "from the zone's privilege set", privname);
2302ffbafc53Scomay 		break;
2303ffbafc53Scomay 	case Z_PRIV_UNKNOWN:
2304ffbafc53Scomay 		zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified "
2305ffbafc53Scomay 		    "in the zone's privilege set", privname);
2306ffbafc53Scomay 		break;
2307ffbafc53Scomay 	default:
2308ffbafc53Scomay 		zerror(zlogp, B_FALSE, "failed to determine the zone's "
2309ffbafc53Scomay 		    "privilege set");
2310ffbafc53Scomay 		break;
2311ffbafc53Scomay 	}
2312ffbafc53Scomay 
2313ffbafc53Scomay 	free(privname);
2314ffbafc53Scomay 	zonecfg_fini_handle(handle);
2315ffbafc53Scomay 	return (error);
2316ffbafc53Scomay }
2317ffbafc53Scomay 
2318ffbafc53Scomay static int
23197c478bd9Sstevel@tonic-gate get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
23207c478bd9Sstevel@tonic-gate {
23217c478bd9Sstevel@tonic-gate 	nvlist_t *nvl = NULL;
23227c478bd9Sstevel@tonic-gate 	char *nvl_packed = NULL;
23237c478bd9Sstevel@tonic-gate 	size_t nvl_size = 0;
23247c478bd9Sstevel@tonic-gate 	nvlist_t **nvlv = NULL;
23257c478bd9Sstevel@tonic-gate 	int rctlcount = 0;
23267c478bd9Sstevel@tonic-gate 	int error = -1;
23277c478bd9Sstevel@tonic-gate 	zone_dochandle_t handle;
23287c478bd9Sstevel@tonic-gate 	struct zone_rctltab rctltab;
23297c478bd9Sstevel@tonic-gate 	rctlblk_t *rctlblk = NULL;
23307c478bd9Sstevel@tonic-gate 
23317c478bd9Sstevel@tonic-gate 	*bufp = NULL;
23327c478bd9Sstevel@tonic-gate 	*bufsizep = 0;
23337c478bd9Sstevel@tonic-gate 
23347c478bd9Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
23357c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
23367c478bd9Sstevel@tonic-gate 		return (-1);
23377c478bd9Sstevel@tonic-gate 	}
23387c478bd9Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
23397c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
23407c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
23417c478bd9Sstevel@tonic-gate 		return (-1);
23427c478bd9Sstevel@tonic-gate 	}
23437c478bd9Sstevel@tonic-gate 
23447c478bd9Sstevel@tonic-gate 	rctltab.zone_rctl_valptr = NULL;
23457c478bd9Sstevel@tonic-gate 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
23467c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
23477c478bd9Sstevel@tonic-gate 		goto out;
23487c478bd9Sstevel@tonic-gate 	}
23497c478bd9Sstevel@tonic-gate 
23507c478bd9Sstevel@tonic-gate 	if (zonecfg_setrctlent(handle) != Z_OK) {
23517c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
23527c478bd9Sstevel@tonic-gate 		goto out;
23537c478bd9Sstevel@tonic-gate 	}
23547c478bd9Sstevel@tonic-gate 
23557c478bd9Sstevel@tonic-gate 	if ((rctlblk = malloc(rctlblk_size())) == NULL) {
23567c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
23577c478bd9Sstevel@tonic-gate 		goto out;
23587c478bd9Sstevel@tonic-gate 	}
23597c478bd9Sstevel@tonic-gate 	while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
23607c478bd9Sstevel@tonic-gate 		struct zone_rctlvaltab *rctlval;
23617c478bd9Sstevel@tonic-gate 		uint_t i, count;
23627c478bd9Sstevel@tonic-gate 		const char *name = rctltab.zone_rctl_name;
23637c478bd9Sstevel@tonic-gate 
23647c478bd9Sstevel@tonic-gate 		/* zoneadm should have already warned about unknown rctls. */
23657c478bd9Sstevel@tonic-gate 		if (!zonecfg_is_rctl(name)) {
23667c478bd9Sstevel@tonic-gate 			zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
23677c478bd9Sstevel@tonic-gate 			rctltab.zone_rctl_valptr = NULL;
23687c478bd9Sstevel@tonic-gate 			continue;
23697c478bd9Sstevel@tonic-gate 		}
23707c478bd9Sstevel@tonic-gate 		count = 0;
23717c478bd9Sstevel@tonic-gate 		for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
23727c478bd9Sstevel@tonic-gate 		    rctlval = rctlval->zone_rctlval_next) {
23737c478bd9Sstevel@tonic-gate 			count++;
23747c478bd9Sstevel@tonic-gate 		}
23757c478bd9Sstevel@tonic-gate 		if (count == 0) {	/* ignore */
23767c478bd9Sstevel@tonic-gate 			continue;	/* Nothing to free */
23777c478bd9Sstevel@tonic-gate 		}
23787c478bd9Sstevel@tonic-gate 		if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL)
23797c478bd9Sstevel@tonic-gate 			goto out;
23807c478bd9Sstevel@tonic-gate 		i = 0;
23817c478bd9Sstevel@tonic-gate 		for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
23827c478bd9Sstevel@tonic-gate 		    rctlval = rctlval->zone_rctlval_next, i++) {
23837c478bd9Sstevel@tonic-gate 			if (nvlist_alloc(&nvlv[i], NV_UNIQUE_NAME, 0) != 0) {
23847c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_TRUE, "%s failed",
23857c478bd9Sstevel@tonic-gate 				    "nvlist_alloc");
23867c478bd9Sstevel@tonic-gate 				goto out;
23877c478bd9Sstevel@tonic-gate 			}
23887c478bd9Sstevel@tonic-gate 			if (zonecfg_construct_rctlblk(rctlval, rctlblk)
23897c478bd9Sstevel@tonic-gate 			    != Z_OK) {
23907c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "invalid rctl value: "
23917c478bd9Sstevel@tonic-gate 				    "(priv=%s,limit=%s,action=%s)",
23927c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_priv,
23937c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_limit,
23947c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_action);
23957c478bd9Sstevel@tonic-gate 				goto out;
23967c478bd9Sstevel@tonic-gate 			}
23977c478bd9Sstevel@tonic-gate 			if (!zonecfg_valid_rctl(name, rctlblk)) {
23987c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
23997c478bd9Sstevel@tonic-gate 				    "(priv=%s,limit=%s,action=%s) is not a "
24007c478bd9Sstevel@tonic-gate 				    "valid value for rctl '%s'",
24017c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_priv,
24027c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_limit,
24037c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_action,
24047c478bd9Sstevel@tonic-gate 				    name);
24057c478bd9Sstevel@tonic-gate 				goto out;
24067c478bd9Sstevel@tonic-gate 			}
24077c478bd9Sstevel@tonic-gate 			if (nvlist_add_uint64(nvlv[i], "privilege",
24087c478bd9Sstevel@tonic-gate 			    rctlblk_get_privilege(rctlblk)) != 0) {
24097c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s failed",
24107c478bd9Sstevel@tonic-gate 				    "nvlist_add_uint64");
24117c478bd9Sstevel@tonic-gate 				goto out;
24127c478bd9Sstevel@tonic-gate 			}
24137c478bd9Sstevel@tonic-gate 			if (nvlist_add_uint64(nvlv[i], "limit",
24147c478bd9Sstevel@tonic-gate 			    rctlblk_get_value(rctlblk)) != 0) {
24157c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s failed",
24167c478bd9Sstevel@tonic-gate 				    "nvlist_add_uint64");
24177c478bd9Sstevel@tonic-gate 				goto out;
24187c478bd9Sstevel@tonic-gate 			}
24197c478bd9Sstevel@tonic-gate 			if (nvlist_add_uint64(nvlv[i], "action",
24207c478bd9Sstevel@tonic-gate 			    (uint_t)rctlblk_get_local_action(rctlblk, NULL))
24217c478bd9Sstevel@tonic-gate 			    != 0) {
24227c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s failed",
24237c478bd9Sstevel@tonic-gate 				    "nvlist_add_uint64");
24247c478bd9Sstevel@tonic-gate 				goto out;
24257c478bd9Sstevel@tonic-gate 			}
24267c478bd9Sstevel@tonic-gate 		}
24277c478bd9Sstevel@tonic-gate 		zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
24287c478bd9Sstevel@tonic-gate 		rctltab.zone_rctl_valptr = NULL;
24297c478bd9Sstevel@tonic-gate 		if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
24307c478bd9Sstevel@tonic-gate 		    != 0) {
24317c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s failed",
24327c478bd9Sstevel@tonic-gate 			    "nvlist_add_nvlist_array");
24337c478bd9Sstevel@tonic-gate 			goto out;
24347c478bd9Sstevel@tonic-gate 		}
24357c478bd9Sstevel@tonic-gate 		for (i = 0; i < count; i++)
24367c478bd9Sstevel@tonic-gate 			nvlist_free(nvlv[i]);
24377c478bd9Sstevel@tonic-gate 		free(nvlv);
24387c478bd9Sstevel@tonic-gate 		nvlv = NULL;
24397c478bd9Sstevel@tonic-gate 		rctlcount++;
24407c478bd9Sstevel@tonic-gate 	}
24417c478bd9Sstevel@tonic-gate 	(void) zonecfg_endrctlent(handle);
24427c478bd9Sstevel@tonic-gate 
24437c478bd9Sstevel@tonic-gate 	if (rctlcount == 0) {
24447c478bd9Sstevel@tonic-gate 		error = 0;
24457c478bd9Sstevel@tonic-gate 		goto out;
24467c478bd9Sstevel@tonic-gate 	}
24477c478bd9Sstevel@tonic-gate 	if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0)
24487c478bd9Sstevel@tonic-gate 	    != 0) {
24497c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack");
24507c478bd9Sstevel@tonic-gate 		goto out;
24517c478bd9Sstevel@tonic-gate 	}
24527c478bd9Sstevel@tonic-gate 
24537c478bd9Sstevel@tonic-gate 	error = 0;
24547c478bd9Sstevel@tonic-gate 	*bufp = nvl_packed;
24557c478bd9Sstevel@tonic-gate 	*bufsizep = nvl_size;
24567c478bd9Sstevel@tonic-gate 
24577c478bd9Sstevel@tonic-gate out:
24587c478bd9Sstevel@tonic-gate 	free(rctlblk);
24597c478bd9Sstevel@tonic-gate 	zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
24607c478bd9Sstevel@tonic-gate 	if (error && nvl_packed != NULL)
24617c478bd9Sstevel@tonic-gate 		free(nvl_packed);
24627c478bd9Sstevel@tonic-gate 	if (nvl != NULL)
24637c478bd9Sstevel@tonic-gate 		nvlist_free(nvl);
24647c478bd9Sstevel@tonic-gate 	if (nvlv != NULL)
24657c478bd9Sstevel@tonic-gate 		free(nvlv);
24667c478bd9Sstevel@tonic-gate 	if (handle != NULL)
24677c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
24687c478bd9Sstevel@tonic-gate 	return (error);
24697c478bd9Sstevel@tonic-gate }
24707c478bd9Sstevel@tonic-gate 
24717c478bd9Sstevel@tonic-gate static int
24727c478bd9Sstevel@tonic-gate get_zone_pool(zlog_t *zlogp, char *poolbuf, size_t bufsz)
24737c478bd9Sstevel@tonic-gate {
24747c478bd9Sstevel@tonic-gate 	zone_dochandle_t handle;
24757c478bd9Sstevel@tonic-gate 	int error;
24767c478bd9Sstevel@tonic-gate 
24777c478bd9Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
24787c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2479ffbafc53Scomay 		return (Z_NOMEM);
24807c478bd9Sstevel@tonic-gate 	}
2481ffbafc53Scomay 	error = zonecfg_get_snapshot_handle(zone_name, handle);
2482ffbafc53Scomay 	if (error != Z_OK) {
24837c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
24847c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
2485ffbafc53Scomay 		return (error);
24867c478bd9Sstevel@tonic-gate 	}
24877c478bd9Sstevel@tonic-gate 	error = zonecfg_get_pool(handle, poolbuf, bufsz);
24887c478bd9Sstevel@tonic-gate 	zonecfg_fini_handle(handle);
24897c478bd9Sstevel@tonic-gate 	return (error);
24907c478bd9Sstevel@tonic-gate }
24917c478bd9Sstevel@tonic-gate 
24927c478bd9Sstevel@tonic-gate static int
2493fa9e4066Sahrens get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
2494fa9e4066Sahrens {
2495fa9e4066Sahrens 	zone_dochandle_t handle;
2496fa9e4066Sahrens 	struct zone_dstab dstab;
2497fa9e4066Sahrens 	size_t total, offset, len;
2498fa9e4066Sahrens 	int error = -1;
2499fa9e4066Sahrens 	char *str;
2500fa9e4066Sahrens 
2501fa9e4066Sahrens 	*bufp = NULL;
2502fa9e4066Sahrens 	*bufsizep = 0;
2503fa9e4066Sahrens 
2504fa9e4066Sahrens 	if ((handle = zonecfg_init_handle()) == NULL) {
2505fa9e4066Sahrens 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2506fa9e4066Sahrens 		return (-1);
2507fa9e4066Sahrens 	}
2508fa9e4066Sahrens 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2509fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "invalid configuration");
2510fa9e4066Sahrens 		zonecfg_fini_handle(handle);
2511fa9e4066Sahrens 		return (-1);
2512fa9e4066Sahrens 	}
2513fa9e4066Sahrens 
2514fa9e4066Sahrens 	if (zonecfg_setdsent(handle) != Z_OK) {
2515fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
2516fa9e4066Sahrens 		goto out;
2517fa9e4066Sahrens 	}
2518fa9e4066Sahrens 
2519fa9e4066Sahrens 	total = 0;
2520fa9e4066Sahrens 	while (zonecfg_getdsent(handle, &dstab) == Z_OK)
2521fa9e4066Sahrens 		total += strlen(dstab.zone_dataset_name) + 1;
2522fa9e4066Sahrens 	(void) zonecfg_enddsent(handle);
2523fa9e4066Sahrens 
2524fa9e4066Sahrens 	if (total == 0) {
2525fa9e4066Sahrens 		error = 0;
2526fa9e4066Sahrens 		goto out;
2527fa9e4066Sahrens 	}
2528fa9e4066Sahrens 
2529fa9e4066Sahrens 	if ((str = malloc(total)) == NULL) {
2530fa9e4066Sahrens 		zerror(zlogp, B_TRUE, "memory allocation failed");
2531fa9e4066Sahrens 		goto out;
2532fa9e4066Sahrens 	}
2533fa9e4066Sahrens 
2534fa9e4066Sahrens 	if (zonecfg_setdsent(handle) != Z_OK) {
2535fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
2536fa9e4066Sahrens 		goto out;
2537fa9e4066Sahrens 	}
2538fa9e4066Sahrens 	offset = 0;
2539fa9e4066Sahrens 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
2540fa9e4066Sahrens 		len = strlen(dstab.zone_dataset_name);
2541fa9e4066Sahrens 		(void) strlcpy(str + offset, dstab.zone_dataset_name,
2542fa9e4066Sahrens 		    sizeof (dstab.zone_dataset_name) - offset);
2543fa9e4066Sahrens 		offset += len;
2544fa9e4066Sahrens 		if (offset != total - 1)
2545fa9e4066Sahrens 			str[offset++] = ',';
2546fa9e4066Sahrens 	}
2547fa9e4066Sahrens 	(void) zonecfg_enddsent(handle);
2548fa9e4066Sahrens 
2549fa9e4066Sahrens 	error = 0;
2550fa9e4066Sahrens 	*bufp = str;
2551fa9e4066Sahrens 	*bufsizep = total;
2552fa9e4066Sahrens 
2553fa9e4066Sahrens out:
2554fa9e4066Sahrens 	if (error != 0 && str != NULL)
2555fa9e4066Sahrens 		free(str);
2556fa9e4066Sahrens 	if (handle != NULL)
2557fa9e4066Sahrens 		zonecfg_fini_handle(handle);
2558fa9e4066Sahrens 
2559fa9e4066Sahrens 	return (error);
2560fa9e4066Sahrens }
2561fa9e4066Sahrens 
2562fa9e4066Sahrens static int
2563fa9e4066Sahrens validate_datasets(zlog_t *zlogp)
2564fa9e4066Sahrens {
2565fa9e4066Sahrens 	zone_dochandle_t handle;
2566fa9e4066Sahrens 	struct zone_dstab dstab;
2567fa9e4066Sahrens 	zfs_handle_t *zhp;
256899653d4eSeschrock 	libzfs_handle_t *hdl;
2569fa9e4066Sahrens 
2570fa9e4066Sahrens 	if ((handle = zonecfg_init_handle()) == NULL) {
2571fa9e4066Sahrens 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2572fa9e4066Sahrens 		return (-1);
2573fa9e4066Sahrens 	}
2574fa9e4066Sahrens 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2575fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "invalid configuration");
2576fa9e4066Sahrens 		zonecfg_fini_handle(handle);
2577fa9e4066Sahrens 		return (-1);
2578fa9e4066Sahrens 	}
2579fa9e4066Sahrens 
2580fa9e4066Sahrens 	if (zonecfg_setdsent(handle) != Z_OK) {
2581fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "invalid configuration");
2582fa9e4066Sahrens 		zonecfg_fini_handle(handle);
2583fa9e4066Sahrens 		return (-1);
2584fa9e4066Sahrens 	}
2585fa9e4066Sahrens 
258699653d4eSeschrock 	if ((hdl = libzfs_init()) == NULL) {
258799653d4eSeschrock 		zerror(zlogp, B_FALSE, "opening ZFS library");
258899653d4eSeschrock 		zonecfg_fini_handle(handle);
258999653d4eSeschrock 		return (-1);
259099653d4eSeschrock 	}
2591fa9e4066Sahrens 
2592fa9e4066Sahrens 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
2593fa9e4066Sahrens 
259499653d4eSeschrock 		if ((zhp = zfs_open(hdl, dstab.zone_dataset_name,
2595fa9e4066Sahrens 		    ZFS_TYPE_FILESYSTEM)) == NULL) {
2596fa9e4066Sahrens 			zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'",
2597fa9e4066Sahrens 			    dstab.zone_dataset_name);
2598fa9e4066Sahrens 			zonecfg_fini_handle(handle);
259999653d4eSeschrock 			libzfs_fini(hdl);
2600fa9e4066Sahrens 			return (-1);
2601fa9e4066Sahrens 		}
2602fa9e4066Sahrens 
2603fa9e4066Sahrens 		/*
2604fa9e4066Sahrens 		 * Automatically set the 'zoned' property.  We check the value
2605fa9e4066Sahrens 		 * first because we'll get EPERM if it is already set.
2606fa9e4066Sahrens 		 */
2607fa9e4066Sahrens 		if (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
2608fa9e4066Sahrens 		    zfs_prop_set(zhp, ZFS_PROP_ZONED, "on") != 0) {
2609fa9e4066Sahrens 			zerror(zlogp, B_FALSE, "cannot set 'zoned' "
2610fa9e4066Sahrens 			    "property for ZFS dataset '%s'\n",
2611fa9e4066Sahrens 			    dstab.zone_dataset_name);
2612fa9e4066Sahrens 			zonecfg_fini_handle(handle);
2613fa9e4066Sahrens 			zfs_close(zhp);
261499653d4eSeschrock 			libzfs_fini(hdl);
2615fa9e4066Sahrens 			return (-1);
2616fa9e4066Sahrens 		}
2617fa9e4066Sahrens 
2618fa9e4066Sahrens 		zfs_close(zhp);
2619fa9e4066Sahrens 	}
2620fa9e4066Sahrens 	(void) zonecfg_enddsent(handle);
2621fa9e4066Sahrens 
2622fa9e4066Sahrens 	zonecfg_fini_handle(handle);
262399653d4eSeschrock 	libzfs_fini(hdl);
2624fa9e4066Sahrens 
2625fa9e4066Sahrens 	return (0);
2626fa9e4066Sahrens }
2627fa9e4066Sahrens 
2628fa9e4066Sahrens static int
26297c478bd9Sstevel@tonic-gate bind_to_pool(zlog_t *zlogp, zoneid_t zoneid)
26307c478bd9Sstevel@tonic-gate {
26317c478bd9Sstevel@tonic-gate 	pool_conf_t *poolconf;
26327c478bd9Sstevel@tonic-gate 	pool_t *pool;
26337c478bd9Sstevel@tonic-gate 	char poolname[MAXPATHLEN];
26347c478bd9Sstevel@tonic-gate 	int status;
26357c478bd9Sstevel@tonic-gate 	int error;
26367c478bd9Sstevel@tonic-gate 
26377c478bd9Sstevel@tonic-gate 	/*
26387c478bd9Sstevel@tonic-gate 	 * Find the pool mentioned in the zone configuration, and bind to it.
26397c478bd9Sstevel@tonic-gate 	 */
26407c478bd9Sstevel@tonic-gate 	error = get_zone_pool(zlogp, poolname, sizeof (poolname));
26417c478bd9Sstevel@tonic-gate 	if (error == Z_NO_ENTRY || (error == Z_OK && strlen(poolname) == 0)) {
26427c478bd9Sstevel@tonic-gate 		/*
26437c478bd9Sstevel@tonic-gate 		 * The property is not set on the zone, so the pool
26447c478bd9Sstevel@tonic-gate 		 * should be bound to the default pool.  But that's
26457c478bd9Sstevel@tonic-gate 		 * already done by the kernel, so we can just return.
26467c478bd9Sstevel@tonic-gate 		 */
26477c478bd9Sstevel@tonic-gate 		return (0);
26487c478bd9Sstevel@tonic-gate 	}
26497c478bd9Sstevel@tonic-gate 	if (error != Z_OK) {
26507c478bd9Sstevel@tonic-gate 		/*
26517c478bd9Sstevel@tonic-gate 		 * Not an error, even though it shouldn't be happening.
26527c478bd9Sstevel@tonic-gate 		 */
26537c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
26547c478bd9Sstevel@tonic-gate 		    "WARNING: unable to retrieve default pool.");
26557c478bd9Sstevel@tonic-gate 		return (0);
26567c478bd9Sstevel@tonic-gate 	}
26577c478bd9Sstevel@tonic-gate 	/*
26587c478bd9Sstevel@tonic-gate 	 * Don't do anything if pools aren't enabled.
26597c478bd9Sstevel@tonic-gate 	 */
26607c478bd9Sstevel@tonic-gate 	if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED) {
26617c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: pools facility not active; "
26627c478bd9Sstevel@tonic-gate 		    "zone will not be bound to pool '%s'.", poolname);
26637c478bd9Sstevel@tonic-gate 		return (0);
26647c478bd9Sstevel@tonic-gate 	}
26657c478bd9Sstevel@tonic-gate 	/*
26667c478bd9Sstevel@tonic-gate 	 * Try to provide a sane error message if the requested pool doesn't
26677c478bd9Sstevel@tonic-gate 	 * exist.
26687c478bd9Sstevel@tonic-gate 	 */
26697c478bd9Sstevel@tonic-gate 	if ((poolconf = pool_conf_alloc()) == NULL) {
26707c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "pool_conf_alloc");
26717c478bd9Sstevel@tonic-gate 		return (-1);
26727c478bd9Sstevel@tonic-gate 	}
26737c478bd9Sstevel@tonic-gate 	if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) !=
26747c478bd9Sstevel@tonic-gate 	    PO_SUCCESS) {
26757c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "pool_conf_open");
26767c478bd9Sstevel@tonic-gate 		pool_conf_free(poolconf);
26777c478bd9Sstevel@tonic-gate 		return (-1);
26787c478bd9Sstevel@tonic-gate 	}
26797c478bd9Sstevel@tonic-gate 	pool = pool_get_pool(poolconf, poolname);
26807c478bd9Sstevel@tonic-gate 	(void) pool_conf_close(poolconf);
26817c478bd9Sstevel@tonic-gate 	pool_conf_free(poolconf);
26827c478bd9Sstevel@tonic-gate 	if (pool == NULL) {
26837c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: pool '%s' not found; "
26847c478bd9Sstevel@tonic-gate 		    "using default pool.", poolname);
26857c478bd9Sstevel@tonic-gate 		return (0);
26867c478bd9Sstevel@tonic-gate 	}
26877c478bd9Sstevel@tonic-gate 	/*
26887c478bd9Sstevel@tonic-gate 	 * Bind the zone to the pool.
26897c478bd9Sstevel@tonic-gate 	 */
26907c478bd9Sstevel@tonic-gate 	if (pool_set_binding(poolname, P_ZONEID, zoneid) != PO_SUCCESS) {
26917c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: unable to bind to pool '%s'; "
26927c478bd9Sstevel@tonic-gate 		    "using default pool.", poolname);
26937c478bd9Sstevel@tonic-gate 	}
26947c478bd9Sstevel@tonic-gate 	return (0);
26957c478bd9Sstevel@tonic-gate }
26967c478bd9Sstevel@tonic-gate 
269745916cd2Sjpk /*
269845916cd2Sjpk  * Mount lower level home directories into/from current zone
269945916cd2Sjpk  * Share exported directories specified in dfstab for zone
270045916cd2Sjpk  */
270145916cd2Sjpk static int
270245916cd2Sjpk tsol_mounts(zlog_t *zlogp, char *zone_name, char *rootpath)
270345916cd2Sjpk {
270445916cd2Sjpk 	zoneid_t *zids = NULL;
270545916cd2Sjpk 	priv_set_t *zid_privs;
270645916cd2Sjpk 	const priv_impl_info_t *ip = NULL;
270745916cd2Sjpk 	uint_t nzents_saved;
270845916cd2Sjpk 	uint_t nzents;
270945916cd2Sjpk 	int i;
271045916cd2Sjpk 	char readonly[] = "ro";
271145916cd2Sjpk 	struct zone_fstab lower_fstab;
271245916cd2Sjpk 	char *argv[4];
271345916cd2Sjpk 
271445916cd2Sjpk 	if (!is_system_labeled())
271545916cd2Sjpk 		return (0);
271645916cd2Sjpk 
271745916cd2Sjpk 	if (zid_label == NULL) {
271845916cd2Sjpk 		zid_label = m_label_alloc(MAC_LABEL);
271945916cd2Sjpk 		if (zid_label == NULL)
272045916cd2Sjpk 			return (-1);
272145916cd2Sjpk 	}
272245916cd2Sjpk 
272345916cd2Sjpk 	/* Make sure our zone has an /export/home dir */
272445916cd2Sjpk 	(void) make_one_dir(zlogp, rootpath, "/export/home",
272545916cd2Sjpk 	    DEFAULT_DIR_MODE);
272645916cd2Sjpk 
272745916cd2Sjpk 	lower_fstab.zone_fs_raw[0] = '\0';
272845916cd2Sjpk 	(void) strlcpy(lower_fstab.zone_fs_type, MNTTYPE_LOFS,
272945916cd2Sjpk 	    sizeof (lower_fstab.zone_fs_type));
273045916cd2Sjpk 	lower_fstab.zone_fs_options = NULL;
273145916cd2Sjpk 	(void) zonecfg_add_fs_option(&lower_fstab, readonly);
273245916cd2Sjpk 
273345916cd2Sjpk 	/*
273445916cd2Sjpk 	 * Get the list of zones from the kernel
273545916cd2Sjpk 	 */
273645916cd2Sjpk 	if (zone_list(NULL, &nzents) != 0) {
273745916cd2Sjpk 		zerror(zlogp, B_TRUE, "unable to list zones");
273845916cd2Sjpk 		zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
273945916cd2Sjpk 		return (-1);
274045916cd2Sjpk 	}
274145916cd2Sjpk again:
274245916cd2Sjpk 	if (nzents == 0) {
274345916cd2Sjpk 		zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
274445916cd2Sjpk 		return (-1);
274545916cd2Sjpk 	}
274645916cd2Sjpk 
274745916cd2Sjpk 	zids = malloc(nzents * sizeof (zoneid_t));
274845916cd2Sjpk 	if (zids == NULL) {
27493f2f09c1Sdp 		zerror(zlogp, B_TRUE, "memory allocation failed");
275045916cd2Sjpk 		return (-1);
275145916cd2Sjpk 	}
275245916cd2Sjpk 	nzents_saved = nzents;
275345916cd2Sjpk 
275445916cd2Sjpk 	if (zone_list(zids, &nzents) != 0) {
275545916cd2Sjpk 		zerror(zlogp, B_TRUE, "unable to list zones");
275645916cd2Sjpk 		zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
275745916cd2Sjpk 		free(zids);
275845916cd2Sjpk 		return (-1);
275945916cd2Sjpk 	}
276045916cd2Sjpk 	if (nzents != nzents_saved) {
276145916cd2Sjpk 		/* list changed, try again */
276245916cd2Sjpk 		free(zids);
276345916cd2Sjpk 		goto again;
276445916cd2Sjpk 	}
276545916cd2Sjpk 
276645916cd2Sjpk 	ip = getprivimplinfo();
276745916cd2Sjpk 	if ((zid_privs = priv_allocset()) == NULL) {
276845916cd2Sjpk 		zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
276945916cd2Sjpk 		zonecfg_free_fs_option_list(
277045916cd2Sjpk 		    lower_fstab.zone_fs_options);
277145916cd2Sjpk 		free(zids);
277245916cd2Sjpk 		return (-1);
277345916cd2Sjpk 	}
277445916cd2Sjpk 
277545916cd2Sjpk 	for (i = 0; i < nzents; i++) {
277645916cd2Sjpk 		char zid_name[ZONENAME_MAX];
277745916cd2Sjpk 		zone_state_t zid_state;
277845916cd2Sjpk 		char zid_rpath[MAXPATHLEN];
277945916cd2Sjpk 		struct stat stat_buf;
278045916cd2Sjpk 
278145916cd2Sjpk 		if (zids[i] == GLOBAL_ZONEID)
278245916cd2Sjpk 			continue;
278345916cd2Sjpk 
278445916cd2Sjpk 		if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1)
278545916cd2Sjpk 			continue;
278645916cd2Sjpk 
278745916cd2Sjpk 		/*
278845916cd2Sjpk 		 * Do special setup for the zone we are booting
278945916cd2Sjpk 		 */
279045916cd2Sjpk 		if (strcmp(zid_name, zone_name) == 0) {
279145916cd2Sjpk 			struct zone_fstab autofs_fstab;
279245916cd2Sjpk 			char map_path[MAXPATHLEN];
279345916cd2Sjpk 			int fd;
279445916cd2Sjpk 
279545916cd2Sjpk 			/*
279645916cd2Sjpk 			 * Create auto_home_<zone> map for this zone
279745916cd2Sjpk 			 * in the global zone. The local zone entry
279845916cd2Sjpk 			 * will be created by automount when the zone
279945916cd2Sjpk 			 * is booted.
280045916cd2Sjpk 			 */
280145916cd2Sjpk 
280245916cd2Sjpk 			(void) snprintf(autofs_fstab.zone_fs_special,
280345916cd2Sjpk 			    MAXPATHLEN, "auto_home_%s", zid_name);
280445916cd2Sjpk 
280545916cd2Sjpk 			(void) snprintf(autofs_fstab.zone_fs_dir, MAXPATHLEN,
280645916cd2Sjpk 			    "/zone/%s/home", zid_name);
280745916cd2Sjpk 
280845916cd2Sjpk 			(void) snprintf(map_path, sizeof (map_path),
280945916cd2Sjpk 			    "/etc/%s", autofs_fstab.zone_fs_special);
281045916cd2Sjpk 			/*
281145916cd2Sjpk 			 * If the map file doesn't exist create a template
281245916cd2Sjpk 			 */
281345916cd2Sjpk 			if ((fd = open(map_path, O_RDWR | O_CREAT | O_EXCL,
281445916cd2Sjpk 			    S_IRUSR | S_IWUSR | S_IRGRP| S_IROTH)) != -1) {
281545916cd2Sjpk 				int len;
281645916cd2Sjpk 				char map_rec[MAXPATHLEN];
281745916cd2Sjpk 
281845916cd2Sjpk 				len = snprintf(map_rec, sizeof (map_rec),
281945916cd2Sjpk 				    "+%s\n*\t-fstype=lofs\t:%s/export/home/&\n",
282045916cd2Sjpk 				    autofs_fstab.zone_fs_special, rootpath);
282145916cd2Sjpk 				(void) write(fd, map_rec, len);
282245916cd2Sjpk 				(void) close(fd);
282345916cd2Sjpk 			}
282445916cd2Sjpk 
282545916cd2Sjpk 			/*
282645916cd2Sjpk 			 * Mount auto_home_<zone> in the global zone if absent.
282745916cd2Sjpk 			 * If it's already of type autofs, then
282845916cd2Sjpk 			 * don't mount it again.
282945916cd2Sjpk 			 */
283045916cd2Sjpk 			if ((stat(autofs_fstab.zone_fs_dir, &stat_buf) == -1) ||
283145916cd2Sjpk 			    strcmp(stat_buf.st_fstype, MNTTYPE_AUTOFS) != 0) {
283245916cd2Sjpk 				char optstr[] = "indirect,ignore,nobrowse";
283345916cd2Sjpk 
283445916cd2Sjpk 				(void) make_one_dir(zlogp, "",
283545916cd2Sjpk 				    autofs_fstab.zone_fs_dir, DEFAULT_DIR_MODE);
283645916cd2Sjpk 
283745916cd2Sjpk 				/*
283845916cd2Sjpk 				 * Mount will fail if automounter has already
283945916cd2Sjpk 				 * processed the auto_home_<zonename> map
284045916cd2Sjpk 				 */
284145916cd2Sjpk 				(void) domount(zlogp, MNTTYPE_AUTOFS, optstr,
284245916cd2Sjpk 				    autofs_fstab.zone_fs_special,
284345916cd2Sjpk 				    autofs_fstab.zone_fs_dir);
284445916cd2Sjpk 			}
284545916cd2Sjpk 			continue;
284645916cd2Sjpk 		}
284745916cd2Sjpk 
284845916cd2Sjpk 
284945916cd2Sjpk 		if (zone_get_state(zid_name, &zid_state) != Z_OK ||
285048451833Scarlsonj 		    (zid_state != ZONE_STATE_READY &&
285148451833Scarlsonj 		    zid_state != ZONE_STATE_RUNNING))
285245916cd2Sjpk 			/* Skip over zones without mounted filesystems */
285345916cd2Sjpk 			continue;
285445916cd2Sjpk 
285545916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label,
285645916cd2Sjpk 		    sizeof (m_label_t)) < 0)
285745916cd2Sjpk 			/* Skip over zones with unspecified label */
285845916cd2Sjpk 			continue;
285945916cd2Sjpk 
286045916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath,
286145916cd2Sjpk 		    sizeof (zid_rpath)) == -1)
286245916cd2Sjpk 			/* Skip over zones with bad path */
286345916cd2Sjpk 			continue;
286445916cd2Sjpk 
286545916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_PRIVSET, zid_privs,
286645916cd2Sjpk 		    sizeof (priv_chunk_t) * ip->priv_setsize) == -1)
286745916cd2Sjpk 			/* Skip over zones with bad privs */
286845916cd2Sjpk 			continue;
286945916cd2Sjpk 
287045916cd2Sjpk 		/*
287145916cd2Sjpk 		 * Reading down is valid according to our label model
287245916cd2Sjpk 		 * but some customers want to disable it because it
287345916cd2Sjpk 		 * allows execute down and other possible attacks.
287445916cd2Sjpk 		 * Therefore, we restrict this feature to zones that
287545916cd2Sjpk 		 * have the NET_MAC_AWARE privilege which is required
287645916cd2Sjpk 		 * for NFS read-down semantics.
287745916cd2Sjpk 		 */
287845916cd2Sjpk 		if ((bldominates(zlabel, zid_label)) &&
287945916cd2Sjpk 		    (priv_ismember(zprivs, PRIV_NET_MAC_AWARE))) {
288045916cd2Sjpk 			/*
288145916cd2Sjpk 			 * Our zone dominates this one.
288245916cd2Sjpk 			 * Create a lofs mount from lower zone's /export/home
288345916cd2Sjpk 			 */
288445916cd2Sjpk 			(void) snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN,
288545916cd2Sjpk 			    "%s/zone/%s/export/home", rootpath, zid_name);
288645916cd2Sjpk 
288745916cd2Sjpk 			/*
288845916cd2Sjpk 			 * If the target is already an LOFS mount
288945916cd2Sjpk 			 * then don't do it again.
289045916cd2Sjpk 			 */
289145916cd2Sjpk 			if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) ||
289245916cd2Sjpk 			    strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) {
289345916cd2Sjpk 
289445916cd2Sjpk 				if (snprintf(lower_fstab.zone_fs_special,
289545916cd2Sjpk 				    MAXPATHLEN, "%s/export",
289645916cd2Sjpk 				    zid_rpath) > MAXPATHLEN)
289745916cd2Sjpk 					continue;
289845916cd2Sjpk 
289945916cd2Sjpk 				/*
290045916cd2Sjpk 				 * Make sure the lower-level home exists
290145916cd2Sjpk 				 */
290245916cd2Sjpk 				if (make_one_dir(zlogp,
290345916cd2Sjpk 				    lower_fstab.zone_fs_special,
290445916cd2Sjpk 				    "/home", DEFAULT_DIR_MODE) != 0)
290545916cd2Sjpk 					continue;
290645916cd2Sjpk 
290745916cd2Sjpk 				(void) strlcat(lower_fstab.zone_fs_special,
290845916cd2Sjpk 				    "/home", MAXPATHLEN);
290945916cd2Sjpk 
291045916cd2Sjpk 				/*
291145916cd2Sjpk 				 * Mount can fail because the lower-level
291245916cd2Sjpk 				 * zone may have already done a mount up.
291345916cd2Sjpk 				 */
291445916cd2Sjpk 				(void) mount_one(zlogp, &lower_fstab, "");
291545916cd2Sjpk 			}
291645916cd2Sjpk 		} else if ((bldominates(zid_label, zlabel)) &&
291745916cd2Sjpk 		    (priv_ismember(zid_privs, PRIV_NET_MAC_AWARE))) {
291845916cd2Sjpk 			/*
291945916cd2Sjpk 			 * This zone dominates our zone.
292045916cd2Sjpk 			 * Create a lofs mount from our zone's /export/home
292145916cd2Sjpk 			 */
292245916cd2Sjpk 			if (snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN,
292345916cd2Sjpk 			    "%s/zone/%s/export/home", zid_rpath,
292445916cd2Sjpk 			    zone_name) > MAXPATHLEN)
292545916cd2Sjpk 				continue;
292645916cd2Sjpk 
292745916cd2Sjpk 			/*
292845916cd2Sjpk 			 * If the target is already an LOFS mount
292945916cd2Sjpk 			 * then don't do it again.
293045916cd2Sjpk 			 */
293145916cd2Sjpk 			if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) ||
293245916cd2Sjpk 			    strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) {
293345916cd2Sjpk 
293445916cd2Sjpk 				(void) snprintf(lower_fstab.zone_fs_special,
293545916cd2Sjpk 				    MAXPATHLEN, "%s/export/home", rootpath);
293645916cd2Sjpk 
293745916cd2Sjpk 				/*
293845916cd2Sjpk 				 * Mount can fail because the higher-level
293945916cd2Sjpk 				 * zone may have already done a mount down.
294045916cd2Sjpk 				 */
294145916cd2Sjpk 				(void) mount_one(zlogp, &lower_fstab, "");
294245916cd2Sjpk 			}
294345916cd2Sjpk 		}
294445916cd2Sjpk 	}
294545916cd2Sjpk 	zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
294645916cd2Sjpk 	priv_freeset(zid_privs);
294745916cd2Sjpk 	free(zids);
294845916cd2Sjpk 
294945916cd2Sjpk 	/*
295045916cd2Sjpk 	 * Now share any exported directories from this zone.
295145916cd2Sjpk 	 * Each zone can have its own dfstab.
295245916cd2Sjpk 	 */
295345916cd2Sjpk 
295445916cd2Sjpk 	argv[0] = "zoneshare";
295545916cd2Sjpk 	argv[1] = "-z";
295645916cd2Sjpk 	argv[2] = zone_name;
295745916cd2Sjpk 	argv[3] = NULL;
295845916cd2Sjpk 
295945916cd2Sjpk 	(void) forkexec(zlogp, "/usr/lib/zones/zoneshare", argv);
296045916cd2Sjpk 	/* Don't check for errors since they don't affect the zone */
296145916cd2Sjpk 
296245916cd2Sjpk 	return (0);
296345916cd2Sjpk }
296445916cd2Sjpk 
296545916cd2Sjpk /*
296645916cd2Sjpk  * Unmount lofs mounts from higher level zones
296745916cd2Sjpk  * Unshare nfs exported directories
296845916cd2Sjpk  */
296945916cd2Sjpk static void
297045916cd2Sjpk tsol_unmounts(zlog_t *zlogp, char *zone_name)
297145916cd2Sjpk {
297245916cd2Sjpk 	zoneid_t *zids = NULL;
297345916cd2Sjpk 	uint_t nzents_saved;
297445916cd2Sjpk 	uint_t nzents;
297545916cd2Sjpk 	int i;
297645916cd2Sjpk 	char *argv[4];
297745916cd2Sjpk 	char path[MAXPATHLEN];
297845916cd2Sjpk 
297945916cd2Sjpk 	if (!is_system_labeled())
298045916cd2Sjpk 		return;
298145916cd2Sjpk 
298245916cd2Sjpk 	/*
298345916cd2Sjpk 	 * Get the list of zones from the kernel
298445916cd2Sjpk 	 */
298545916cd2Sjpk 	if (zone_list(NULL, &nzents) != 0) {
298645916cd2Sjpk 		return;
298745916cd2Sjpk 	}
298845916cd2Sjpk 
298945916cd2Sjpk 	if (zid_label == NULL) {
299045916cd2Sjpk 		zid_label = m_label_alloc(MAC_LABEL);
299145916cd2Sjpk 		if (zid_label == NULL)
299245916cd2Sjpk 			return;
299345916cd2Sjpk 	}
299445916cd2Sjpk 
299545916cd2Sjpk again:
299645916cd2Sjpk 	if (nzents == 0)
299745916cd2Sjpk 		return;
299845916cd2Sjpk 
299945916cd2Sjpk 	zids = malloc(nzents * sizeof (zoneid_t));
300045916cd2Sjpk 	if (zids == NULL) {
30013f2f09c1Sdp 		zerror(zlogp, B_TRUE, "memory allocation failed");
300245916cd2Sjpk 		return;
300345916cd2Sjpk 	}
300445916cd2Sjpk 	nzents_saved = nzents;
300545916cd2Sjpk 
300645916cd2Sjpk 	if (zone_list(zids, &nzents) != 0) {
300745916cd2Sjpk 		free(zids);
300845916cd2Sjpk 		return;
300945916cd2Sjpk 	}
301045916cd2Sjpk 	if (nzents != nzents_saved) {
301145916cd2Sjpk 		/* list changed, try again */
301245916cd2Sjpk 		free(zids);
301345916cd2Sjpk 		goto again;
301445916cd2Sjpk 	}
301545916cd2Sjpk 
301645916cd2Sjpk 	for (i = 0; i < nzents; i++) {
301745916cd2Sjpk 		char zid_name[ZONENAME_MAX];
301845916cd2Sjpk 		zone_state_t zid_state;
301945916cd2Sjpk 		char zid_rpath[MAXPATHLEN];
302045916cd2Sjpk 
302145916cd2Sjpk 		if (zids[i] == GLOBAL_ZONEID)
302245916cd2Sjpk 			continue;
302345916cd2Sjpk 
302445916cd2Sjpk 		if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1)
302545916cd2Sjpk 			continue;
302645916cd2Sjpk 
302745916cd2Sjpk 		/*
302845916cd2Sjpk 		 * Skip the zone we are halting
302945916cd2Sjpk 		 */
303045916cd2Sjpk 		if (strcmp(zid_name, zone_name) == 0)
303145916cd2Sjpk 			continue;
303245916cd2Sjpk 
303345916cd2Sjpk 		if ((zone_getattr(zids[i], ZONE_ATTR_STATUS, &zid_state,
303445916cd2Sjpk 		    sizeof (zid_state)) < 0) ||
303545916cd2Sjpk 		    (zid_state < ZONE_IS_READY))
303645916cd2Sjpk 			/* Skip over zones without mounted filesystems */
303745916cd2Sjpk 			continue;
303845916cd2Sjpk 
303945916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label,
304045916cd2Sjpk 		    sizeof (m_label_t)) < 0)
304145916cd2Sjpk 			/* Skip over zones with unspecified label */
304245916cd2Sjpk 			continue;
304345916cd2Sjpk 
304445916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath,
304545916cd2Sjpk 		    sizeof (zid_rpath)) == -1)
304645916cd2Sjpk 			/* Skip over zones with bad path */
304745916cd2Sjpk 			continue;
304845916cd2Sjpk 
304945916cd2Sjpk 		if (zlabel != NULL && bldominates(zid_label, zlabel)) {
305045916cd2Sjpk 			/*
305145916cd2Sjpk 			 * This zone dominates our zone.
305245916cd2Sjpk 			 * Unmount the lofs mount of our zone's /export/home
305345916cd2Sjpk 			 */
305445916cd2Sjpk 
305545916cd2Sjpk 			if (snprintf(path, MAXPATHLEN,
305645916cd2Sjpk 			    "%s/zone/%s/export/home", zid_rpath,
305745916cd2Sjpk 			    zone_name) > MAXPATHLEN)
305845916cd2Sjpk 				continue;
305945916cd2Sjpk 
306045916cd2Sjpk 			/* Skip over mount failures */
306145916cd2Sjpk 			(void) umount(path);
306245916cd2Sjpk 		}
306345916cd2Sjpk 	}
306445916cd2Sjpk 	free(zids);
306545916cd2Sjpk 
306645916cd2Sjpk 	/*
306745916cd2Sjpk 	 * Unmount global zone autofs trigger for this zone
306845916cd2Sjpk 	 */
306945916cd2Sjpk 	(void) snprintf(path, MAXPATHLEN, "/zone/%s/home", zone_name);
307045916cd2Sjpk 	/* Skip over mount failures */
307145916cd2Sjpk 	(void) umount(path);
307245916cd2Sjpk 
307345916cd2Sjpk 	/*
307445916cd2Sjpk 	 * Next unshare any exported directories from this zone.
307545916cd2Sjpk 	 */
307645916cd2Sjpk 
307745916cd2Sjpk 	argv[0] = "zoneunshare";
307845916cd2Sjpk 	argv[1] = "-z";
307945916cd2Sjpk 	argv[2] = zone_name;
308045916cd2Sjpk 	argv[3] = NULL;
308145916cd2Sjpk 
308245916cd2Sjpk 	(void) forkexec(zlogp, "/usr/lib/zones/zoneunshare", argv);
308345916cd2Sjpk 	/* Don't check for errors since they don't affect the zone */
308445916cd2Sjpk 
308545916cd2Sjpk 	/*
308645916cd2Sjpk 	 * Finally, deallocate any devices in the zone.
308745916cd2Sjpk 	 */
308845916cd2Sjpk 
308945916cd2Sjpk 	argv[0] = "deallocate";
309045916cd2Sjpk 	argv[1] = "-Isz";
309145916cd2Sjpk 	argv[2] = zone_name;
309245916cd2Sjpk 	argv[3] = NULL;
309345916cd2Sjpk 
309445916cd2Sjpk 	(void) forkexec(zlogp, "/usr/sbin/deallocate", argv);
309545916cd2Sjpk 	/* Don't check for errors since they don't affect the zone */
309645916cd2Sjpk }
309745916cd2Sjpk 
309845916cd2Sjpk /*
309945916cd2Sjpk  * Fetch the Trusted Extensions label and multi-level ports (MLPs) for
310045916cd2Sjpk  * this zone.
310145916cd2Sjpk  */
310245916cd2Sjpk static tsol_zcent_t *
310345916cd2Sjpk get_zone_label(zlog_t *zlogp, priv_set_t *privs)
310445916cd2Sjpk {
310545916cd2Sjpk 	FILE *fp;
310645916cd2Sjpk 	tsol_zcent_t *zcent = NULL;
310745916cd2Sjpk 	char line[MAXTNZLEN];
310845916cd2Sjpk 
310945916cd2Sjpk 	if ((fp = fopen(TNZONECFG_PATH, "r")) == NULL) {
311045916cd2Sjpk 		zerror(zlogp, B_TRUE, "%s", TNZONECFG_PATH);
311145916cd2Sjpk 		return (NULL);
311245916cd2Sjpk 	}
311345916cd2Sjpk 
311445916cd2Sjpk 	while (fgets(line, sizeof (line), fp) != NULL) {
311545916cd2Sjpk 		/*
311645916cd2Sjpk 		 * Check for malformed database
311745916cd2Sjpk 		 */
311845916cd2Sjpk 		if (strlen(line) == MAXTNZLEN - 1)
311945916cd2Sjpk 			break;
312045916cd2Sjpk 		if ((zcent = tsol_sgetzcent(line, NULL, NULL)) == NULL)
312145916cd2Sjpk 			continue;
312245916cd2Sjpk 		if (strcmp(zcent->zc_name, zone_name) == 0)
312345916cd2Sjpk 			break;
312445916cd2Sjpk 		tsol_freezcent(zcent);
312545916cd2Sjpk 		zcent = NULL;
312645916cd2Sjpk 	}
312745916cd2Sjpk 	(void) fclose(fp);
312845916cd2Sjpk 
312945916cd2Sjpk 	if (zcent == NULL) {
313045916cd2Sjpk 		zerror(zlogp, B_FALSE, "zone requires a label assignment. "
313145916cd2Sjpk 		    "See tnzonecfg(4)");
313245916cd2Sjpk 	} else {
313345916cd2Sjpk 		if (zlabel == NULL)
313445916cd2Sjpk 			zlabel = m_label_alloc(MAC_LABEL);
313545916cd2Sjpk 		/*
313645916cd2Sjpk 		 * Save this zone's privileges for later read-down processing
313745916cd2Sjpk 		 */
313845916cd2Sjpk 		if ((zprivs = priv_allocset()) == NULL) {
313945916cd2Sjpk 			zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
314045916cd2Sjpk 			return (NULL);
314145916cd2Sjpk 		} else {
314245916cd2Sjpk 			priv_copyset(privs, zprivs);
314345916cd2Sjpk 		}
314445916cd2Sjpk 	}
314545916cd2Sjpk 	return (zcent);
314645916cd2Sjpk }
314745916cd2Sjpk 
314845916cd2Sjpk /*
314945916cd2Sjpk  * Add the Trusted Extensions multi-level ports for this zone.
315045916cd2Sjpk  */
315145916cd2Sjpk static void
315245916cd2Sjpk set_mlps(zlog_t *zlogp, zoneid_t zoneid, tsol_zcent_t *zcent)
315345916cd2Sjpk {
315445916cd2Sjpk 	tsol_mlp_t *mlp;
315545916cd2Sjpk 	tsol_mlpent_t tsme;
315645916cd2Sjpk 
315745916cd2Sjpk 	if (!is_system_labeled())
315845916cd2Sjpk 		return;
315945916cd2Sjpk 
316045916cd2Sjpk 	tsme.tsme_zoneid = zoneid;
316145916cd2Sjpk 	tsme.tsme_flags = 0;
316245916cd2Sjpk 	for (mlp = zcent->zc_private_mlp; !TSOL_MLP_END(mlp); mlp++) {
316345916cd2Sjpk 		tsme.tsme_mlp = *mlp;
316445916cd2Sjpk 		if (tnmlp(TNDB_LOAD, &tsme) != 0) {
316545916cd2Sjpk 			zerror(zlogp, B_TRUE, "cannot set zone-specific MLP "
316645916cd2Sjpk 			    "on %d-%d/%d", mlp->mlp_port,
316745916cd2Sjpk 			    mlp->mlp_port_upper, mlp->mlp_ipp);
316845916cd2Sjpk 		}
316945916cd2Sjpk 	}
317045916cd2Sjpk 
317145916cd2Sjpk 	tsme.tsme_flags = TSOL_MEF_SHARED;
317245916cd2Sjpk 	for (mlp = zcent->zc_shared_mlp; !TSOL_MLP_END(mlp); mlp++) {
317345916cd2Sjpk 		tsme.tsme_mlp = *mlp;
317445916cd2Sjpk 		if (tnmlp(TNDB_LOAD, &tsme) != 0) {
317545916cd2Sjpk 			zerror(zlogp, B_TRUE, "cannot set shared MLP "
317645916cd2Sjpk 			    "on %d-%d/%d", mlp->mlp_port,
317745916cd2Sjpk 			    mlp->mlp_port_upper, mlp->mlp_ipp);
317845916cd2Sjpk 		}
317945916cd2Sjpk 	}
318045916cd2Sjpk }
318145916cd2Sjpk 
318245916cd2Sjpk static void
318345916cd2Sjpk remove_mlps(zlog_t *zlogp, zoneid_t zoneid)
318445916cd2Sjpk {
318545916cd2Sjpk 	tsol_mlpent_t tsme;
318645916cd2Sjpk 
318745916cd2Sjpk 	if (!is_system_labeled())
318845916cd2Sjpk 		return;
318945916cd2Sjpk 
319045916cd2Sjpk 	(void) memset(&tsme, 0, sizeof (tsme));
319145916cd2Sjpk 	tsme.tsme_zoneid = zoneid;
319245916cd2Sjpk 	if (tnmlp(TNDB_FLUSH, &tsme) != 0)
319345916cd2Sjpk 		zerror(zlogp, B_TRUE, "cannot flush MLPs");
319445916cd2Sjpk }
319545916cd2Sjpk 
31967c478bd9Sstevel@tonic-gate int
31977c478bd9Sstevel@tonic-gate prtmount(const char *fs, void *x) {
31987c478bd9Sstevel@tonic-gate 	zerror((zlog_t *)x, B_FALSE, "  %s", fs);
31997c478bd9Sstevel@tonic-gate 	return (0);
32007c478bd9Sstevel@tonic-gate }
32017c478bd9Sstevel@tonic-gate 
3202108322fbScarlsonj /*
3203108322fbScarlsonj  * Look for zones running on the main system that are using this root (or any
3204108322fbScarlsonj  * subdirectory of it).  Return B_TRUE and print an error if a conflicting zone
3205108322fbScarlsonj  * is found or if we can't tell.
3206108322fbScarlsonj  */
3207108322fbScarlsonj static boolean_t
3208108322fbScarlsonj duplicate_zone_root(zlog_t *zlogp, const char *rootpath)
32097c478bd9Sstevel@tonic-gate {
3210108322fbScarlsonj 	zoneid_t *zids = NULL;
3211108322fbScarlsonj 	uint_t nzids = 0;
3212108322fbScarlsonj 	boolean_t retv;
3213108322fbScarlsonj 	int rlen, zlen;
3214108322fbScarlsonj 	char zroot[MAXPATHLEN];
3215108322fbScarlsonj 	char zonename[ZONENAME_MAX];
3216108322fbScarlsonj 
3217108322fbScarlsonj 	for (;;) {
3218108322fbScarlsonj 		nzids += 10;
3219108322fbScarlsonj 		zids = malloc(nzids * sizeof (*zids));
3220108322fbScarlsonj 		if (zids == NULL) {
32213f2f09c1Sdp 			zerror(zlogp, B_TRUE, "memory allocation failed");
3222108322fbScarlsonj 			return (B_TRUE);
3223108322fbScarlsonj 		}
3224108322fbScarlsonj 		if (zone_list(zids, &nzids) == 0)
3225108322fbScarlsonj 			break;
3226108322fbScarlsonj 		free(zids);
3227108322fbScarlsonj 	}
3228108322fbScarlsonj 	retv = B_FALSE;
3229108322fbScarlsonj 	rlen = strlen(rootpath);
3230108322fbScarlsonj 	while (nzids > 0) {
3231108322fbScarlsonj 		/*
3232108322fbScarlsonj 		 * Ignore errors; they just mean that the zone has disappeared
3233108322fbScarlsonj 		 * while we were busy.
3234108322fbScarlsonj 		 */
3235108322fbScarlsonj 		if (zone_getattr(zids[--nzids], ZONE_ATTR_ROOT, zroot,
3236108322fbScarlsonj 		    sizeof (zroot)) == -1)
3237108322fbScarlsonj 			continue;
3238108322fbScarlsonj 		zlen = strlen(zroot);
3239108322fbScarlsonj 		if (zlen > rlen)
3240108322fbScarlsonj 			zlen = rlen;
3241108322fbScarlsonj 		if (strncmp(rootpath, zroot, zlen) == 0 &&
3242108322fbScarlsonj 		    (zroot[zlen] == '\0' || zroot[zlen] == '/') &&
3243108322fbScarlsonj 		    (rootpath[zlen] == '\0' || rootpath[zlen] == '/')) {
3244108322fbScarlsonj 			if (getzonenamebyid(zids[nzids], zonename,
3245108322fbScarlsonj 			    sizeof (zonename)) == -1)
3246108322fbScarlsonj 				(void) snprintf(zonename, sizeof (zonename),
3247108322fbScarlsonj 				    "id %d", (int)zids[nzids]);
3248108322fbScarlsonj 			zerror(zlogp, B_FALSE,
3249108322fbScarlsonj 			    "zone root %s already in use by zone %s",
3250108322fbScarlsonj 			    rootpath, zonename);
3251108322fbScarlsonj 			retv = B_TRUE;
3252108322fbScarlsonj 			break;
3253108322fbScarlsonj 		}
3254108322fbScarlsonj 	}
3255108322fbScarlsonj 	free(zids);
3256108322fbScarlsonj 	return (retv);
3257108322fbScarlsonj }
3258108322fbScarlsonj 
3259108322fbScarlsonj /*
3260108322fbScarlsonj  * Search for loopback mounts that use this same source node (same device and
3261108322fbScarlsonj  * inode).  Return B_TRUE if there is one or if we can't tell.
3262108322fbScarlsonj  */
3263108322fbScarlsonj static boolean_t
3264108322fbScarlsonj duplicate_reachable_path(zlog_t *zlogp, const char *rootpath)
3265108322fbScarlsonj {
3266108322fbScarlsonj 	struct stat64 rst, zst;
3267108322fbScarlsonj 	struct mnttab *mnp;
3268108322fbScarlsonj 
3269108322fbScarlsonj 	if (stat64(rootpath, &rst) == -1) {
3270108322fbScarlsonj 		zerror(zlogp, B_TRUE, "can't stat %s", rootpath);
3271108322fbScarlsonj 		return (B_TRUE);
3272108322fbScarlsonj 	}
3273108322fbScarlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
3274108322fbScarlsonj 		return (B_TRUE);
3275108322fbScarlsonj 	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) {
3276108322fbScarlsonj 		if (mnp->mnt_fstype == NULL ||
3277108322fbScarlsonj 		    strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0)
3278108322fbScarlsonj 			continue;
3279108322fbScarlsonj 		/* We're looking at a loopback mount.  Stat it. */
3280108322fbScarlsonj 		if (mnp->mnt_special != NULL &&
3281108322fbScarlsonj 		    stat64(mnp->mnt_special, &zst) != -1 &&
3282108322fbScarlsonj 		    rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
3283108322fbScarlsonj 			zerror(zlogp, B_FALSE,
3284108322fbScarlsonj 			    "zone root %s is reachable through %s",
3285108322fbScarlsonj 			    rootpath, mnp->mnt_mountp);
3286108322fbScarlsonj 			return (B_TRUE);
3287108322fbScarlsonj 		}
3288108322fbScarlsonj 	}
3289108322fbScarlsonj 	return (B_FALSE);
3290108322fbScarlsonj }
3291108322fbScarlsonj 
3292108322fbScarlsonj zoneid_t
3293108322fbScarlsonj vplat_create(zlog_t *zlogp, boolean_t mount_cmd)
3294108322fbScarlsonj {
3295108322fbScarlsonj 	zoneid_t rval = -1;
32967c478bd9Sstevel@tonic-gate 	priv_set_t *privs;
32977c478bd9Sstevel@tonic-gate 	char rootpath[MAXPATHLEN];
32987c478bd9Sstevel@tonic-gate 	char *rctlbuf = NULL;
3299108322fbScarlsonj 	size_t rctlbufsz = 0;
3300fa9e4066Sahrens 	char *zfsbuf = NULL;
3301fa9e4066Sahrens 	size_t zfsbufsz = 0;
3302108322fbScarlsonj 	zoneid_t zoneid = -1;
33037c478bd9Sstevel@tonic-gate 	int xerr;
3304108322fbScarlsonj 	char *kzone;
3305108322fbScarlsonj 	FILE *fp = NULL;
330645916cd2Sjpk 	tsol_zcent_t *zcent = NULL;
330745916cd2Sjpk 	int match = 0;
330845916cd2Sjpk 	int doi = 0;
33097c478bd9Sstevel@tonic-gate 
33107c478bd9Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
33117c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone root");
33127c478bd9Sstevel@tonic-gate 		return (-1);
33137c478bd9Sstevel@tonic-gate 	}
3314108322fbScarlsonj 	if (zonecfg_in_alt_root())
3315108322fbScarlsonj 		resolve_lofs(zlogp, rootpath, sizeof (rootpath));
33167c478bd9Sstevel@tonic-gate 
33177c478bd9Sstevel@tonic-gate 	if ((privs = priv_allocset()) == NULL) {
33187c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
33197c478bd9Sstevel@tonic-gate 		return (-1);
33207c478bd9Sstevel@tonic-gate 	}
33217c478bd9Sstevel@tonic-gate 	priv_emptyset(privs);
3322ffbafc53Scomay 	if (get_privset(zlogp, privs, mount_cmd) != 0)
33237c478bd9Sstevel@tonic-gate 		goto error;
3324ffbafc53Scomay 
3325108322fbScarlsonj 	if (!mount_cmd && get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) {
33267c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "Unable to get list of rctls");
33277c478bd9Sstevel@tonic-gate 		goto error;
33287c478bd9Sstevel@tonic-gate 	}
3329ffbafc53Scomay 
3330fa9e4066Sahrens 	if (get_datasets(zlogp, &zfsbuf, &zfsbufsz) != 0) {
3331fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "Unable to get list of ZFS datasets");
3332fa9e4066Sahrens 		goto error;
3333fa9e4066Sahrens 	}
33347c478bd9Sstevel@tonic-gate 
333548451833Scarlsonj 	if (!mount_cmd && is_system_labeled()) {
333645916cd2Sjpk 		zcent = get_zone_label(zlogp, privs);
333748451833Scarlsonj 		if (zcent != NULL) {
333845916cd2Sjpk 			match = zcent->zc_match;
333945916cd2Sjpk 			doi = zcent->zc_doi;
334045916cd2Sjpk 			*zlabel = zcent->zc_label;
334145916cd2Sjpk 		} else {
334245916cd2Sjpk 			goto error;
334345916cd2Sjpk 		}
334445916cd2Sjpk 	}
334545916cd2Sjpk 
3346108322fbScarlsonj 	kzone = zone_name;
3347108322fbScarlsonj 
3348108322fbScarlsonj 	/*
3349108322fbScarlsonj 	 * We must do this scan twice.  First, we look for zones running on the
3350108322fbScarlsonj 	 * main system that are using this root (or any subdirectory of it).
3351108322fbScarlsonj 	 * Next, we reduce to the shortest path and search for loopback mounts
3352108322fbScarlsonj 	 * that use this same source node (same device and inode).
3353108322fbScarlsonj 	 */
3354108322fbScarlsonj 	if (duplicate_zone_root(zlogp, rootpath))
3355108322fbScarlsonj 		goto error;
3356108322fbScarlsonj 	if (duplicate_reachable_path(zlogp, rootpath))
3357108322fbScarlsonj 		goto error;
3358108322fbScarlsonj 
3359108322fbScarlsonj 	if (mount_cmd) {
3360108322fbScarlsonj 		root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE);
3361108322fbScarlsonj 
3362108322fbScarlsonj 		/*
3363108322fbScarlsonj 		 * Forge up a special root for this zone.  When a zone is
3364108322fbScarlsonj 		 * mounted, we can't let the zone have its own root because the
3365108322fbScarlsonj 		 * tools that will be used in this "scratch zone" need access
3366108322fbScarlsonj 		 * to both the zone's resources and the running machine's
3367108322fbScarlsonj 		 * executables.
3368108322fbScarlsonj 		 *
3369108322fbScarlsonj 		 * Note that the mkdir here also catches read-only filesystems.
3370108322fbScarlsonj 		 */
3371108322fbScarlsonj 		if (mkdir(rootpath, 0755) != 0 && errno != EEXIST) {
3372108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", rootpath);
3373108322fbScarlsonj 			goto error;
3374108322fbScarlsonj 		}
3375108322fbScarlsonj 		if (domount(zlogp, "tmpfs", "", "swap", rootpath) != 0)
3376108322fbScarlsonj 			goto error;
3377108322fbScarlsonj 	}
3378108322fbScarlsonj 
3379108322fbScarlsonj 	if (zonecfg_in_alt_root()) {
3380108322fbScarlsonj 		/*
3381108322fbScarlsonj 		 * If we are mounting up a zone in an alternate root partition,
3382108322fbScarlsonj 		 * then we have some additional work to do before starting the
3383108322fbScarlsonj 		 * zone.  First, resolve the root path down so that we're not
3384108322fbScarlsonj 		 * fooled by duplicates.  Then forge up an internal name for
3385108322fbScarlsonj 		 * the zone.
3386108322fbScarlsonj 		 */
3387108322fbScarlsonj 		if ((fp = zonecfg_open_scratch("", B_TRUE)) == NULL) {
3388108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot open mapfile");
3389108322fbScarlsonj 			goto error;
3390108322fbScarlsonj 		}
3391108322fbScarlsonj 		if (zonecfg_lock_scratch(fp) != 0) {
3392108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot lock mapfile");
3393108322fbScarlsonj 			goto error;
3394108322fbScarlsonj 		}
3395108322fbScarlsonj 		if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
3396108322fbScarlsonj 		    NULL, 0) == 0) {
3397108322fbScarlsonj 			zerror(zlogp, B_FALSE, "scratch zone already running");
3398108322fbScarlsonj 			goto error;
3399108322fbScarlsonj 		}
3400108322fbScarlsonj 		/* This is the preferred name */
3401108322fbScarlsonj 		(void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s",
3402108322fbScarlsonj 		    zone_name);
3403108322fbScarlsonj 		srandom(getpid());
3404108322fbScarlsonj 		while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL,
3405108322fbScarlsonj 		    0) == 0) {
3406108322fbScarlsonj 			/* This is just an arbitrary name; note "." usage */
3407108322fbScarlsonj 			(void) snprintf(kernzone, sizeof (kernzone),
3408108322fbScarlsonj 			    "SUNWlu.%08lX%08lX", random(), random());
3409108322fbScarlsonj 		}
3410108322fbScarlsonj 		kzone = kernzone;
3411108322fbScarlsonj 	}
3412108322fbScarlsonj 
34137c478bd9Sstevel@tonic-gate 	xerr = 0;
3414108322fbScarlsonj 	if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf,
341545916cd2Sjpk 	    rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel)) == -1) {
34167c478bd9Sstevel@tonic-gate 		if (xerr == ZE_AREMOUNTS) {
34177c478bd9Sstevel@tonic-gate 			if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) {
34187c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
34197c478bd9Sstevel@tonic-gate 				    "An unknown file-system is mounted on "
34207c478bd9Sstevel@tonic-gate 				    "a subdirectory of %s", rootpath);
34217c478bd9Sstevel@tonic-gate 			} else {
34227c478bd9Sstevel@tonic-gate 
34237c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
34247c478bd9Sstevel@tonic-gate 				    "These file-systems are mounted on "
34257c478bd9Sstevel@tonic-gate 				    "subdirectories of %s:", rootpath);
34267c478bd9Sstevel@tonic-gate 				(void) zonecfg_find_mounts(rootpath,
34277c478bd9Sstevel@tonic-gate 				    prtmount, zlogp);
34287c478bd9Sstevel@tonic-gate 			}
34297c478bd9Sstevel@tonic-gate 		} else if (xerr == ZE_CHROOTED) {
34307c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s: "
34317c478bd9Sstevel@tonic-gate 			    "cannot create a zone from a chrooted "
34327c478bd9Sstevel@tonic-gate 			    "environment", "zone_create");
34337c478bd9Sstevel@tonic-gate 		} else {
34347c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s failed", "zone_create");
34357c478bd9Sstevel@tonic-gate 		}
34367c478bd9Sstevel@tonic-gate 		goto error;
34377c478bd9Sstevel@tonic-gate 	}
3438108322fbScarlsonj 
3439108322fbScarlsonj 	if (zonecfg_in_alt_root() &&
3440108322fbScarlsonj 	    zonecfg_add_scratch(fp, zone_name, kernzone,
3441108322fbScarlsonj 	    zonecfg_get_root()) == -1) {
3442108322fbScarlsonj 		zerror(zlogp, B_TRUE, "cannot add mapfile entry");
3443108322fbScarlsonj 		goto error;
3444108322fbScarlsonj 	}
3445108322fbScarlsonj 
34467c478bd9Sstevel@tonic-gate 	/*
3447108322fbScarlsonj 	 * The following is a warning, not an error, and is not performed when
3448108322fbScarlsonj 	 * merely mounting a zone for administrative use.
34497c478bd9Sstevel@tonic-gate 	 */
3450108322fbScarlsonj 	if (!mount_cmd && bind_to_pool(zlogp, zoneid) != 0)
34517c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to "
34527c478bd9Sstevel@tonic-gate 		    "requested pool; using default pool.");
345348451833Scarlsonj 	if (!mount_cmd)
345445916cd2Sjpk 		set_mlps(zlogp, zoneid, zcent);
3455108322fbScarlsonj 	rval = zoneid;
3456108322fbScarlsonj 	zoneid = -1;
3457108322fbScarlsonj 
34587c478bd9Sstevel@tonic-gate error:
3459108322fbScarlsonj 	if (zoneid != -1)
3460108322fbScarlsonj 		(void) zone_destroy(zoneid);
34617c478bd9Sstevel@tonic-gate 	if (rctlbuf != NULL)
34627c478bd9Sstevel@tonic-gate 		free(rctlbuf);
34637c478bd9Sstevel@tonic-gate 	priv_freeset(privs);
3464108322fbScarlsonj 	if (fp != NULL)
3465108322fbScarlsonj 		zonecfg_close_scratch(fp);
3466108322fbScarlsonj 	lofs_discard_mnttab();
346745916cd2Sjpk 	if (zcent != NULL)
346845916cd2Sjpk 		tsol_freezcent(zcent);
34697c478bd9Sstevel@tonic-gate 	return (rval);
34707c478bd9Sstevel@tonic-gate }
34717c478bd9Sstevel@tonic-gate 
3472555afedfScarlsonj /*
3473555afedfScarlsonj  * Enter the zone and write a /etc/zones/index file there.  This allows
3474555afedfScarlsonj  * libzonecfg (and thus zoneadm) to report the UUID and potentially other zone
3475555afedfScarlsonj  * details from inside the zone.
3476555afedfScarlsonj  */
3477555afedfScarlsonj static void
3478555afedfScarlsonj write_index_file(zoneid_t zoneid)
3479555afedfScarlsonj {
3480555afedfScarlsonj 	FILE *zef;
3481555afedfScarlsonj 	FILE *zet;
3482555afedfScarlsonj 	struct zoneent *zep;
3483555afedfScarlsonj 	pid_t child;
3484555afedfScarlsonj 	int tmpl_fd;
3485555afedfScarlsonj 	ctid_t ct;
3486555afedfScarlsonj 	int fd;
3487555afedfScarlsonj 	char uuidstr[UUID_PRINTABLE_STRING_LENGTH];
3488555afedfScarlsonj 
3489555afedfScarlsonj 	/* Locate the zone entry in the global zone's index file */
3490555afedfScarlsonj 	if ((zef = setzoneent()) == NULL)
3491555afedfScarlsonj 		return;
3492555afedfScarlsonj 	while ((zep = getzoneent_private(zef)) != NULL) {
3493555afedfScarlsonj 		if (strcmp(zep->zone_name, zone_name) == 0)
3494555afedfScarlsonj 			break;
3495555afedfScarlsonj 		free(zep);
3496555afedfScarlsonj 	}
3497555afedfScarlsonj 	endzoneent(zef);
3498555afedfScarlsonj 	if (zep == NULL)
3499555afedfScarlsonj 		return;
3500555afedfScarlsonj 
3501555afedfScarlsonj 	if ((tmpl_fd = init_template()) == -1) {
3502555afedfScarlsonj 		free(zep);
3503555afedfScarlsonj 		return;
3504555afedfScarlsonj 	}
3505555afedfScarlsonj 
3506555afedfScarlsonj 	if ((child = fork()) == -1) {
3507555afedfScarlsonj 		(void) ct_tmpl_clear(tmpl_fd);
3508555afedfScarlsonj 		(void) close(tmpl_fd);
3509555afedfScarlsonj 		free(zep);
3510555afedfScarlsonj 		return;
3511555afedfScarlsonj 	}
3512555afedfScarlsonj 
3513555afedfScarlsonj 	/* parent waits for child to finish */
3514555afedfScarlsonj 	if (child != 0) {
3515555afedfScarlsonj 		free(zep);
3516555afedfScarlsonj 		if (contract_latest(&ct) == -1)
3517555afedfScarlsonj 			ct = -1;
3518555afedfScarlsonj 		(void) ct_tmpl_clear(tmpl_fd);
3519555afedfScarlsonj 		(void) close(tmpl_fd);
3520555afedfScarlsonj 		(void) waitpid(child, NULL, 0);
3521555afedfScarlsonj 		(void) contract_abandon_id(ct);
3522555afedfScarlsonj 		return;
3523555afedfScarlsonj 	}
3524555afedfScarlsonj 
3525555afedfScarlsonj 	/* child enters zone and sets up index file */
3526555afedfScarlsonj 	(void) ct_tmpl_clear(tmpl_fd);
3527555afedfScarlsonj 	if (zone_enter(zoneid) != -1) {
3528555afedfScarlsonj 		(void) mkdir(ZONE_CONFIG_ROOT, ZONE_CONFIG_MODE);
3529555afedfScarlsonj 		(void) chown(ZONE_CONFIG_ROOT, ZONE_CONFIG_UID,
3530555afedfScarlsonj 		    ZONE_CONFIG_GID);
3531555afedfScarlsonj 		fd = open(ZONE_INDEX_FILE, O_WRONLY|O_CREAT|O_TRUNC,
3532555afedfScarlsonj 		    ZONE_INDEX_MODE);
3533555afedfScarlsonj 		if (fd != -1 && (zet = fdopen(fd, "w")) != NULL) {
3534555afedfScarlsonj 			(void) fchown(fd, ZONE_INDEX_UID, ZONE_INDEX_GID);
3535555afedfScarlsonj 			if (uuid_is_null(zep->zone_uuid))
3536555afedfScarlsonj 				uuidstr[0] = '\0';
3537555afedfScarlsonj 			else
3538555afedfScarlsonj 				uuid_unparse(zep->zone_uuid, uuidstr);
3539555afedfScarlsonj 			(void) fprintf(zet, "%s:%s:/:%s\n", zep->zone_name,
3540555afedfScarlsonj 			    zone_state_str(zep->zone_state),
3541555afedfScarlsonj 			    uuidstr);
3542555afedfScarlsonj 			(void) fclose(zet);
3543555afedfScarlsonj 		}
3544555afedfScarlsonj 	}
3545555afedfScarlsonj 	_exit(0);
3546555afedfScarlsonj }
3547555afedfScarlsonj 
35487c478bd9Sstevel@tonic-gate int
3549555afedfScarlsonj vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd, zoneid_t zoneid)
35507c478bd9Sstevel@tonic-gate {
35515749802bSdp 
3552fa9e4066Sahrens 	if (!mount_cmd && validate_datasets(zlogp) != 0) {
3553fa9e4066Sahrens 		lofs_discard_mnttab();
3554fa9e4066Sahrens 		return (-1);
3555fa9e4066Sahrens 	}
3556fa9e4066Sahrens 
3557facf4a8dSllai1 	if (mount_filesystems(zlogp, mount_cmd) != 0) {
3558108322fbScarlsonj 		lofs_discard_mnttab();
35597c478bd9Sstevel@tonic-gate 		return (-1);
3560108322fbScarlsonj 	}
3561facf4a8dSllai1 
3562facf4a8dSllai1 	/* mount /dev for zone (both normal and scratch zone) */
3563facf4a8dSllai1 	if (vplat_mount_dev(zlogp) != 0) {
3564facf4a8dSllai1 		lofs_discard_mnttab();
3565facf4a8dSllai1 		return (-1);
3566facf4a8dSllai1 	}
3567facf4a8dSllai1 
3568facf4a8dSllai1 	if (!mount_cmd && configure_network_interfaces(zlogp) != 0) {
3569108322fbScarlsonj 		lofs_discard_mnttab();
35707c478bd9Sstevel@tonic-gate 		return (-1);
3571108322fbScarlsonj 	}
3572555afedfScarlsonj 
3573555afedfScarlsonj 	write_index_file(zoneid);
3574555afedfScarlsonj 
3575108322fbScarlsonj 	lofs_discard_mnttab();
35767c478bd9Sstevel@tonic-gate 	return (0);
35777c478bd9Sstevel@tonic-gate }
35787c478bd9Sstevel@tonic-gate 
3579108322fbScarlsonj static int
3580108322fbScarlsonj lu_root_teardown(zlog_t *zlogp)
35817c478bd9Sstevel@tonic-gate {
3582108322fbScarlsonj 	char zroot[MAXPATHLEN];
3583108322fbScarlsonj 
3584108322fbScarlsonj 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
3585108322fbScarlsonj 		zerror(zlogp, B_FALSE, "unable to determine zone root");
3586108322fbScarlsonj 		return (-1);
3587108322fbScarlsonj 	}
3588108322fbScarlsonj 	root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
3589108322fbScarlsonj 
3590108322fbScarlsonj 	/*
3591108322fbScarlsonj 	 * At this point, the processes are gone, the filesystems (save the
3592108322fbScarlsonj 	 * root) are unmounted, and the zone is on death row.  But there may
3593108322fbScarlsonj 	 * still be creds floating about in the system that reference the
3594108322fbScarlsonj 	 * zone_t, and which pin down zone_rootvp causing this call to fail
3595108322fbScarlsonj 	 * with EBUSY.  Thus, we try for a little while before just giving up.
3596108322fbScarlsonj 	 * (How I wish this were not true, and umount2 just did the right
3597108322fbScarlsonj 	 * thing, or tmpfs supported MS_FORCE This is a gross hack.)
3598108322fbScarlsonj 	 */
3599108322fbScarlsonj 	if (umount2(zroot, MS_FORCE) != 0) {
3600108322fbScarlsonj 		if (errno == ENOTSUP && umount2(zroot, 0) == 0)
3601108322fbScarlsonj 			goto unmounted;
3602108322fbScarlsonj 		if (errno == EBUSY) {
3603108322fbScarlsonj 			int tries = 10;
3604108322fbScarlsonj 
3605108322fbScarlsonj 			while (--tries >= 0) {
3606108322fbScarlsonj 				(void) sleep(1);
3607108322fbScarlsonj 				if (umount2(zroot, 0) == 0)
3608108322fbScarlsonj 					goto unmounted;
3609108322fbScarlsonj 				if (errno != EBUSY)
3610108322fbScarlsonj 					break;
3611108322fbScarlsonj 			}
3612108322fbScarlsonj 		}
3613108322fbScarlsonj 		zerror(zlogp, B_TRUE, "unable to unmount '%s'", zroot);
3614108322fbScarlsonj 		return (-1);
3615108322fbScarlsonj 	}
3616108322fbScarlsonj unmounted:
3617108322fbScarlsonj 
3618108322fbScarlsonj 	/*
3619108322fbScarlsonj 	 * Only zones in an alternate root environment have scratch zone
3620108322fbScarlsonj 	 * entries.
3621108322fbScarlsonj 	 */
3622108322fbScarlsonj 	if (zonecfg_in_alt_root()) {
3623108322fbScarlsonj 		FILE *fp;
3624108322fbScarlsonj 		int retv;
3625108322fbScarlsonj 
3626108322fbScarlsonj 		if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
3627108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot open mapfile");
3628108322fbScarlsonj 			return (-1);
3629108322fbScarlsonj 		}
3630108322fbScarlsonj 		retv = -1;
3631108322fbScarlsonj 		if (zonecfg_lock_scratch(fp) != 0)
3632108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot lock mapfile");
3633108322fbScarlsonj 		else if (zonecfg_delete_scratch(fp, kernzone) != 0)
3634108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot delete map entry");
3635108322fbScarlsonj 		else
3636108322fbScarlsonj 			retv = 0;
3637108322fbScarlsonj 		zonecfg_close_scratch(fp);
3638108322fbScarlsonj 		return (retv);
3639108322fbScarlsonj 	} else {
3640108322fbScarlsonj 		return (0);
3641108322fbScarlsonj 	}
3642108322fbScarlsonj }
3643108322fbScarlsonj 
3644108322fbScarlsonj int
3645108322fbScarlsonj vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd)
3646108322fbScarlsonj {
3647108322fbScarlsonj 	char *kzone;
36487c478bd9Sstevel@tonic-gate 	zoneid_t zoneid;
36497c478bd9Sstevel@tonic-gate 
3650108322fbScarlsonj 	kzone = zone_name;
3651108322fbScarlsonj 	if (zonecfg_in_alt_root()) {
3652108322fbScarlsonj 		FILE *fp;
3653108322fbScarlsonj 
3654108322fbScarlsonj 		if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
3655108322fbScarlsonj 			zerror(zlogp, B_TRUE, "unable to open map file");
3656108322fbScarlsonj 			goto error;
3657108322fbScarlsonj 		}
3658108322fbScarlsonj 		if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
3659108322fbScarlsonj 		    kernzone, sizeof (kernzone)) != 0) {
3660108322fbScarlsonj 			zerror(zlogp, B_FALSE, "unable to find scratch zone");
3661108322fbScarlsonj 			zonecfg_close_scratch(fp);
3662108322fbScarlsonj 			goto error;
3663108322fbScarlsonj 		}
3664108322fbScarlsonj 		zonecfg_close_scratch(fp);
3665108322fbScarlsonj 		kzone = kernzone;
3666108322fbScarlsonj 	}
3667108322fbScarlsonj 
3668108322fbScarlsonj 	if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) {
36697c478bd9Sstevel@tonic-gate 		if (!bringup_failure_recovery)
36707c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "unable to get zoneid");
3671108322fbScarlsonj 		if (unmount_cmd)
3672108322fbScarlsonj 			(void) lu_root_teardown(zlogp);
36737c478bd9Sstevel@tonic-gate 		goto error;
36747c478bd9Sstevel@tonic-gate 	}
36757c478bd9Sstevel@tonic-gate 
36767c478bd9Sstevel@tonic-gate 	if (zone_shutdown(zoneid) != 0) {
36777c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to shutdown zone");
36787c478bd9Sstevel@tonic-gate 		goto error;
36797c478bd9Sstevel@tonic-gate 	}
36807c478bd9Sstevel@tonic-gate 
3681108322fbScarlsonj 	if (!unmount_cmd &&
3682108322fbScarlsonj 	    unconfigure_network_interfaces(zlogp, zoneid) != 0) {
36837c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
36847c478bd9Sstevel@tonic-gate 		    "unable to unconfigure network interfaces in zone");
36857c478bd9Sstevel@tonic-gate 		goto error;
36867c478bd9Sstevel@tonic-gate 	}
36877c478bd9Sstevel@tonic-gate 
3688108322fbScarlsonj 	if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) {
36897c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to abort TCP connections");
36907c478bd9Sstevel@tonic-gate 		goto error;
36917c478bd9Sstevel@tonic-gate 	}
36927c478bd9Sstevel@tonic-gate 
3693facf4a8dSllai1 	/* destroy zconsole before umount /dev */
3694facf4a8dSllai1 	if (!unmount_cmd)
3695facf4a8dSllai1 		destroy_console_slave();
3696facf4a8dSllai1 
3697108322fbScarlsonj 	if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) {
36987c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
36997c478bd9Sstevel@tonic-gate 		    "unable to unmount file systems in zone");
37007c478bd9Sstevel@tonic-gate 		goto error;
37017c478bd9Sstevel@tonic-gate 	}
37027c478bd9Sstevel@tonic-gate 
370345916cd2Sjpk 	remove_mlps(zlogp, zoneid);
370445916cd2Sjpk 
37057c478bd9Sstevel@tonic-gate 	if (zone_destroy(zoneid) != 0) {
37067c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to destroy zone");
37077c478bd9Sstevel@tonic-gate 		goto error;
37087c478bd9Sstevel@tonic-gate 	}
3709108322fbScarlsonj 
3710108322fbScarlsonj 	/*
3711108322fbScarlsonj 	 * Special teardown for alternate boot environments: remove the tmpfs
3712108322fbScarlsonj 	 * root for the zone and then remove it from the map file.
3713108322fbScarlsonj 	 */
3714108322fbScarlsonj 	if (unmount_cmd && lu_root_teardown(zlogp) != 0)
3715108322fbScarlsonj 		goto error;
3716108322fbScarlsonj 
3717108322fbScarlsonj 	lofs_discard_mnttab();
37187c478bd9Sstevel@tonic-gate 	return (0);
37197c478bd9Sstevel@tonic-gate 
37207c478bd9Sstevel@tonic-gate error:
3721108322fbScarlsonj 	lofs_discard_mnttab();
37227c478bd9Sstevel@tonic-gate 	return (-1);
37237c478bd9Sstevel@tonic-gate }
3724facf4a8dSllai1 
3725facf4a8dSllai1 /*
3726facf4a8dSllai1  * Apply the standard lists of devices/symlinks/mappings and the user-specified
3727facf4a8dSllai1  * list of devices (via zonecfg) to the /dev filesystem.  The filesystem will
3728facf4a8dSllai1  * use these as a profile/filter to determine what exists in /dev.
3729facf4a8dSllai1  */
3730facf4a8dSllai1 static int
3731facf4a8dSllai1 vplat_mount_dev(zlog_t *zlogp)
3732facf4a8dSllai1 {
3733facf4a8dSllai1 	char			zonedevpath[MAXPATHLEN];
3734facf4a8dSllai1 	zone_dochandle_t	handle = NULL;
3735facf4a8dSllai1 	struct zone_devtab	ztab;
3736facf4a8dSllai1 	zone_fsopt_t		opt_attr;
3737facf4a8dSllai1 	di_prof_t		prof = NULL;
3738facf4a8dSllai1 	int			i, err, len;
3739facf4a8dSllai1 	int			retval = -1;
3740facf4a8dSllai1 
3741facf4a8dSllai1 	struct zone_fstab devtab = {
3742facf4a8dSllai1 		"/dev",
3743facf4a8dSllai1 		"/dev",
3744facf4a8dSllai1 		MNTTYPE_DEV,
3745facf4a8dSllai1 		NULL,
3746facf4a8dSllai1 		""
3747facf4a8dSllai1 	};
3748facf4a8dSllai1 
3749facf4a8dSllai1 	if (err = zone_get_devroot(zone_name, zonedevpath,
3750facf4a8dSllai1 	    sizeof (zonedevpath))) {
3751facf4a8dSllai1 		zerror(zlogp, B_FALSE, "can't get zone dev: %s",
3752facf4a8dSllai1 		    zonecfg_strerror(err));
3753facf4a8dSllai1 		return (-1);
3754facf4a8dSllai1 	}
3755facf4a8dSllai1 
3756facf4a8dSllai1 	/*
3757facf4a8dSllai1 	 * The old /dev was a lofs mount from <zonepath>/dev, with
3758facf4a8dSllai1 	 * dev fs, that becomes a mount on <zonepath>/root/dev.
3759facf4a8dSllai1 	 * However, we need to preserve device permission bits during
3760facf4a8dSllai1 	 * upgrade.  What we should do is migrate the attribute directory
3761facf4a8dSllai1 	 * on upgrade, but for now, preserve it at <zonepath>/dev.
3762facf4a8dSllai1 	 */
3763facf4a8dSllai1 	(void) strcpy(opt_attr.zone_fsopt_opt, "attrdir=");
3764facf4a8dSllai1 	len = strlen(opt_attr.zone_fsopt_opt);
3765facf4a8dSllai1 	if (err = zone_get_zonepath(zone_name,
3766facf4a8dSllai1 	    opt_attr.zone_fsopt_opt + len, MAX_MNTOPT_STR - len)) {
3767facf4a8dSllai1 		zerror(zlogp, B_FALSE, "can't get zone path: %s",
3768facf4a8dSllai1 		    zonecfg_strerror(err));
3769facf4a8dSllai1 		return (-1);
3770facf4a8dSllai1 	}
3771facf4a8dSllai1 
3772facf4a8dSllai1 	if (make_one_dir(zlogp, opt_attr.zone_fsopt_opt + len, "/dev",
3773facf4a8dSllai1 	    DEFAULT_DIR_MODE) != 0)
3774facf4a8dSllai1 		return (-1);
3775facf4a8dSllai1 
3776facf4a8dSllai1 	(void) strlcat(opt_attr.zone_fsopt_opt, "/dev", MAX_MNTOPT_STR);
3777facf4a8dSllai1 	devtab.zone_fs_options = &opt_attr;
3778facf4a8dSllai1 	opt_attr.zone_fsopt_next = NULL;
3779facf4a8dSllai1 
3780facf4a8dSllai1 	/* mount /dev inside the zone */
3781facf4a8dSllai1 	i = strlen(zonedevpath);
3782facf4a8dSllai1 	if (mount_one(zlogp, &devtab, zonedevpath))
3783facf4a8dSllai1 		return (-1);
3784facf4a8dSllai1 
3785facf4a8dSllai1 	(void) strlcat(zonedevpath, "/dev", sizeof (zonedevpath));
3786facf4a8dSllai1 	if (di_prof_init(zonedevpath, &prof)) {
3787facf4a8dSllai1 		zerror(zlogp, B_TRUE, "failed to initialize profile");
3788facf4a8dSllai1 		goto cleanup;
3789facf4a8dSllai1 	}
3790facf4a8dSllai1 
3791facf4a8dSllai1 	/* Add the standard devices and directories */
3792facf4a8dSllai1 	for (i = 0; standard_devs[i] != NULL; ++i) {
3793facf4a8dSllai1 		if (di_prof_add_dev(prof, standard_devs[i])) {
3794facf4a8dSllai1 			zerror(zlogp, B_TRUE, "failed to add "
3795facf4a8dSllai1 			    "standard device");
3796facf4a8dSllai1 			goto cleanup;
3797facf4a8dSllai1 		}
3798facf4a8dSllai1 	}
3799facf4a8dSllai1 
3800facf4a8dSllai1 	/* Add the standard symlinks */
3801facf4a8dSllai1 	for (i = 0; standard_devlinks[i].source != NULL; ++i) {
3802facf4a8dSllai1 		if (di_prof_add_symlink(prof,
3803facf4a8dSllai1 		    standard_devlinks[i].source,
3804facf4a8dSllai1 		    standard_devlinks[i].target)) {
3805facf4a8dSllai1 			zerror(zlogp, B_TRUE, "failed to add "
3806facf4a8dSllai1 			    "standard symlink");
3807facf4a8dSllai1 			goto cleanup;
3808facf4a8dSllai1 		}
3809facf4a8dSllai1 	}
3810facf4a8dSllai1 
3811facf4a8dSllai1 	/* Add user-specified devices and directories */
3812facf4a8dSllai1 	if ((handle = zonecfg_init_handle()) == NULL) {
3813facf4a8dSllai1 		zerror(zlogp, B_FALSE, "can't initialize zone handle");
3814facf4a8dSllai1 		goto cleanup;
3815facf4a8dSllai1 	}
3816facf4a8dSllai1 	if (err = zonecfg_get_handle(zone_name, handle)) {
3817facf4a8dSllai1 		zerror(zlogp, B_FALSE, "can't get handle for zone "
3818facf4a8dSllai1 		    "%s: %s", zone_name, zonecfg_strerror(err));
3819facf4a8dSllai1 		goto cleanup;
3820facf4a8dSllai1 	}
3821facf4a8dSllai1 	if (err = zonecfg_setdevent(handle)) {
3822facf4a8dSllai1 		zerror(zlogp, B_FALSE, "%s: %s", zone_name,
3823facf4a8dSllai1 		    zonecfg_strerror(err));
3824facf4a8dSllai1 		goto cleanup;
3825facf4a8dSllai1 	}
3826facf4a8dSllai1 	while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
3827facf4a8dSllai1 		if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
3828facf4a8dSllai1 			zerror(zlogp, B_TRUE, "failed to add "
3829facf4a8dSllai1 			    "user-specified device");
3830facf4a8dSllai1 			goto cleanup;
3831facf4a8dSllai1 		}
3832facf4a8dSllai1 	}
3833facf4a8dSllai1 	(void) zonecfg_enddevent(handle);
3834facf4a8dSllai1 
3835facf4a8dSllai1 	/* Send profile to kernel */
3836facf4a8dSllai1 	if (di_prof_commit(prof)) {
3837facf4a8dSllai1 		zerror(zlogp, B_TRUE, "failed to commit profile");
3838facf4a8dSllai1 		goto cleanup;
3839facf4a8dSllai1 	}
3840facf4a8dSllai1 
3841facf4a8dSllai1 	retval = 0;
3842facf4a8dSllai1 
3843facf4a8dSllai1 cleanup:
3844facf4a8dSllai1 	if (handle)
3845facf4a8dSllai1 		zonecfg_fini_handle(handle);
3846facf4a8dSllai1 	if (prof)
3847facf4a8dSllai1 		di_prof_fini(prof);
3848facf4a8dSllai1 	return (retval);
3849facf4a8dSllai1 }
3850