xref: /titanic_52/usr/src/uts/common/fs/mntfs/mntvnops.c (revision 1a5e258f5471356ca102c7176637cdce45bac147)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5aa59c4cbSrsb  * Common Development and Distribution License (the "License").
6aa59c4cbSrsb  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
225010b7f7SLori Alt  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
237c478bd9Sstevel@tonic-gate  */
247c478bd9Sstevel@tonic-gate 
257c478bd9Sstevel@tonic-gate #include <sys/file.h>
267c478bd9Sstevel@tonic-gate #include <sys/stat.h>
277c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
287c478bd9Sstevel@tonic-gate #include <sys/mntio.h>
297c478bd9Sstevel@tonic-gate #include <sys/mnttab.h>
307c478bd9Sstevel@tonic-gate #include <sys/mount.h>
317c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
327c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
337c478bd9Sstevel@tonic-gate #include <sys/systm.h>
347c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
35aa59c4cbSrsb #include <sys/vfs_opreg.h>
367c478bd9Sstevel@tonic-gate #include <sys/fs/mntdata.h>
377c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h>
387c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
397c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h>
40835ee219SRobert Harris #include <sys/time.h>
41835ee219SRobert Harris #include <sys/ksynch.h>
42835ee219SRobert Harris #include <sys/sdt.h>
437c478bd9Sstevel@tonic-gate 
447c478bd9Sstevel@tonic-gate #define	MNTROOTINO	2
457c478bd9Sstevel@tonic-gate 
467c478bd9Sstevel@tonic-gate static mntnode_t *mntgetnode(vnode_t *);
477c478bd9Sstevel@tonic-gate 
487c478bd9Sstevel@tonic-gate vnodeops_t *mntvnodeops;
49df2381bfSpraks extern void vfs_mnttab_readop(void);
507c478bd9Sstevel@tonic-gate 
517c478bd9Sstevel@tonic-gate /*
527c478bd9Sstevel@tonic-gate  * Design of kernel mnttab accounting.
537c478bd9Sstevel@tonic-gate  *
54835ee219SRobert Harris  * mntfs provides two methods of reading the in-kernel mnttab, i.e. the state of
55835ee219SRobert Harris  * the mounted resources: the read-only file /etc/mnttab, and a collection of
56835ee219SRobert Harris  * ioctl() commands. Most of these interfaces are public and are described in
57835ee219SRobert Harris  * mnttab(4). Three private ioctl() commands, MNTIOC_GETMNTENT,
58835ee219SRobert Harris  * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY, provide for the getmntent(3C)
59835ee219SRobert Harris  * family of functions, allowing them to support white space in mount names.
607c478bd9Sstevel@tonic-gate  *
61835ee219SRobert Harris  * A significant feature of mntfs is that it provides a file descriptor with a
62835ee219SRobert Harris  * snapshot once it begins to consume mnttab data. Thus, as the process
63835ee219SRobert Harris  * continues to consume data, its view of the in-kernel mnttab does not change
64835ee219SRobert Harris  * even if resources are mounted or unmounted. The intent is to ensure that
65835ee219SRobert Harris  * processes are guaranteed to read self-consistent data even as the system
66835ee219SRobert Harris  * changes.
677c478bd9Sstevel@tonic-gate  *
68835ee219SRobert Harris  * The snapshot is implemented by a "database", unique to each zone, that
69835ee219SRobert Harris  * comprises a linked list of mntelem_ts. The database is identified by
70835ee219SRobert Harris  * zone_mntfs_db and is protected by zone_mntfs_db_lock. Each element contains
71835ee219SRobert Harris  * the text entry in /etc/mnttab for a mounted resource, i.e. a vfs_t, and is
72835ee219SRobert Harris  * marked with its time of "birth", i.e. creation. An element is "killed", and
73835ee219SRobert Harris  * marked with its time of death, when it is found to be out of date, e.g. when
74835ee219SRobert Harris  * the corresponding resource has been unmounted.
75835ee219SRobert Harris  *
76835ee219SRobert Harris  * When a process performs the first read() or ioctl() for a file descriptor for
77835ee219SRobert Harris  * /etc/mnttab, the database is updated by a call to mntfs_snapshot() to ensure
78835ee219SRobert Harris  * that an element exists for each currently mounted resource. Following this,
79835ee219SRobert Harris  * the current time is written into a snapshot structure, a mntsnap_t, embedded
80835ee219SRobert Harris  * in the descriptor's mntnode_t.
81835ee219SRobert Harris  *
82835ee219SRobert Harris  * mntfs is able to enumerate the /etc/mnttab entries corresponding to a
83835ee219SRobert Harris  * particular file descriptor by searching the database for entries that were
84835ee219SRobert Harris  * born before the appropriate snapshot and that either are still alive or died
85835ee219SRobert Harris  * after the snapshot was created. Consumers use the iterator function
86835ee219SRobert Harris  * mntfs_get_next_elem() to identify the next suitable element in the database.
87835ee219SRobert Harris  *
88835ee219SRobert Harris  * Each snapshot has a hold on its corresponding database elements, effected by
89835ee219SRobert Harris  * a per-element reference count. At last close(), a snapshot is destroyed in
90835ee219SRobert Harris  * mntfs_freesnap() by releasing all of its holds; an element is destroyed if
91835ee219SRobert Harris  * its reference count becomes zero. Therefore the database never exists unless
92835ee219SRobert Harris  * there is at least one active consumer of /etc/mnttab.
93835ee219SRobert Harris  *
94835ee219SRobert Harris  * getmntent(3C) et al. "do not open, close or rewind the file." This implies
95835ee219SRobert Harris  * that getmntent() and read() must be able to operate without interaction on
96835ee219SRobert Harris  * the same file descriptor; this is accomplished by the use of separate
97835ee219SRobert Harris  * mntsnap_ts for both read() and ioctl().
98835ee219SRobert Harris  *
995545576aSRobert Harris  * mntfs observes the following lock-ordering:
1005545576aSRobert Harris  *
1015545576aSRobert Harris  *	mnp->mnt_contents -> vfslist -> zonep->zone_mntfs_db_lock
1025545576aSRobert Harris  *
1037c478bd9Sstevel@tonic-gate  * NOTE: The following variable enables the generation of the "dev=xxx"
1047c478bd9Sstevel@tonic-gate  * in the option string for a mounted file system.  Really this should
1057c478bd9Sstevel@tonic-gate  * be gotten rid of altogether, but for the sake of backwards compatibility
1067c478bd9Sstevel@tonic-gate  * we had to leave it in.  It is defined as a 32-bit device number.  This
1077c478bd9Sstevel@tonic-gate  * means that when 64-bit device numbers are in use, if either the major or
1087c478bd9Sstevel@tonic-gate  * minor part of the device number will not fit in a 16 bit quantity, the
1097c478bd9Sstevel@tonic-gate  * "dev=" will be set to NODEV (0x7fffffff).  See PSARC 1999/566 and
1107c478bd9Sstevel@tonic-gate  * 1999/131 for details.  The cmpldev() function used to generate the 32-bit
1117c478bd9Sstevel@tonic-gate  * device number handles this check and assigns the proper value.
1127c478bd9Sstevel@tonic-gate  */
1137c478bd9Sstevel@tonic-gate int mntfs_enabledev = 1;	/* enable old "dev=xxx" option */
1147c478bd9Sstevel@tonic-gate 
115835ee219SRobert Harris extern void vfs_mono_time(timespec_t *);
116835ee219SRobert Harris enum { MNTFS_FIRST, MNTFS_SECOND, MNTFS_NEITHER };
117835ee219SRobert Harris 
118835ee219SRobert Harris /*
119835ee219SRobert Harris  * Determine whether a field within a line from /etc/mnttab contains actual
120835ee219SRobert Harris  * content or simply the marker string "-". This never applies to the time,
121835ee219SRobert Harris  * therefore the delimiter must be a tab.
122835ee219SRobert Harris  */
123835ee219SRobert Harris #define	MNTFS_REAL_FIELD(x)	(*(x) != '-' || *((x) + 1) != '\t')
124835ee219SRobert Harris 
1257c478bd9Sstevel@tonic-gate static int
1267c478bd9Sstevel@tonic-gate mntfs_devsize(struct vfs *vfsp)
1277c478bd9Sstevel@tonic-gate {
1287c478bd9Sstevel@tonic-gate 	dev32_t odev;
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate 	(void) cmpldev(&odev, vfsp->vfs_dev);
1317c478bd9Sstevel@tonic-gate 	return (snprintf(NULL, 0, "dev=%x", odev));
1327c478bd9Sstevel@tonic-gate }
1337c478bd9Sstevel@tonic-gate 
1347c478bd9Sstevel@tonic-gate static int
1357c478bd9Sstevel@tonic-gate mntfs_devprint(struct vfs *vfsp, char *buf)
1367c478bd9Sstevel@tonic-gate {
1377c478bd9Sstevel@tonic-gate 	dev32_t odev;
1387c478bd9Sstevel@tonic-gate 
1397c478bd9Sstevel@tonic-gate 	(void) cmpldev(&odev, vfsp->vfs_dev);
1407c478bd9Sstevel@tonic-gate 	return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev));
1417c478bd9Sstevel@tonic-gate }
1427c478bd9Sstevel@tonic-gate 
143835ee219SRobert Harris /* Identify which, if either, of two supplied timespec structs is newer. */
144835ee219SRobert Harris static int
145835ee219SRobert Harris mntfs_newest(timespec_t *a, timespec_t *b)
146835ee219SRobert Harris {
147835ee219SRobert Harris 	if (a->tv_sec == b->tv_sec &&
148835ee219SRobert Harris 	    a->tv_nsec == b->tv_nsec) {
149835ee219SRobert Harris 		return (MNTFS_NEITHER);
150835ee219SRobert Harris 	} else if (b->tv_sec > a->tv_sec ||
151835ee219SRobert Harris 	    (b->tv_sec == a->tv_sec &&
152835ee219SRobert Harris 	    b->tv_nsec > a->tv_nsec)) {
153835ee219SRobert Harris 		return (MNTFS_SECOND);
154835ee219SRobert Harris 	} else {
155835ee219SRobert Harris 		return (MNTFS_FIRST);
156835ee219SRobert Harris 	}
157835ee219SRobert Harris }
158835ee219SRobert Harris 
1597c478bd9Sstevel@tonic-gate static int
1607c478bd9Sstevel@tonic-gate mntfs_optsize(struct vfs *vfsp)
1617c478bd9Sstevel@tonic-gate {
1627c478bd9Sstevel@tonic-gate 	int i, size = 0;
1637c478bd9Sstevel@tonic-gate 	mntopt_t *mop;
1647c478bd9Sstevel@tonic-gate 
1657c478bd9Sstevel@tonic-gate 	for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) {
1667c478bd9Sstevel@tonic-gate 		mop = &vfsp->vfs_mntopts.mo_list[i];
1677c478bd9Sstevel@tonic-gate 		if (mop->mo_flags & MO_NODISPLAY)
1687c478bd9Sstevel@tonic-gate 			continue;
1697c478bd9Sstevel@tonic-gate 		if (mop->mo_flags & MO_SET) {
1707c478bd9Sstevel@tonic-gate 			if (size)
1717c478bd9Sstevel@tonic-gate 				size++; /* space for comma */
1727c478bd9Sstevel@tonic-gate 			size += strlen(mop->mo_name);
1737c478bd9Sstevel@tonic-gate 			/*
1747c478bd9Sstevel@tonic-gate 			 * count option value if there is one
1757c478bd9Sstevel@tonic-gate 			 */
1767c478bd9Sstevel@tonic-gate 			if (mop->mo_arg != NULL) {
1777c478bd9Sstevel@tonic-gate 				size += strlen(mop->mo_arg) + 1;
1787c478bd9Sstevel@tonic-gate 			}
1797c478bd9Sstevel@tonic-gate 		}
1807c478bd9Sstevel@tonic-gate 	}
1817c478bd9Sstevel@tonic-gate 	if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) {
1827c478bd9Sstevel@tonic-gate 		/*
1837c478bd9Sstevel@tonic-gate 		 * Add space for "zone=<zone_name>" if required.
1847c478bd9Sstevel@tonic-gate 		 */
1857c478bd9Sstevel@tonic-gate 		if (size)
1867c478bd9Sstevel@tonic-gate 			size++;	/* space for comma */
1877c478bd9Sstevel@tonic-gate 		size += sizeof ("zone=") - 1;
1887c478bd9Sstevel@tonic-gate 		size += strlen(vfsp->vfs_zone->zone_name);
1897c478bd9Sstevel@tonic-gate 	}
1907c478bd9Sstevel@tonic-gate 	if (mntfs_enabledev) {
1917c478bd9Sstevel@tonic-gate 		if (size != 0)
1927c478bd9Sstevel@tonic-gate 			size++; /* space for comma */
1937c478bd9Sstevel@tonic-gate 		size += mntfs_devsize(vfsp);
1947c478bd9Sstevel@tonic-gate 	}
1957c478bd9Sstevel@tonic-gate 	if (size == 0)
1967c478bd9Sstevel@tonic-gate 		size = strlen("-");
1977c478bd9Sstevel@tonic-gate 	return (size);
1987c478bd9Sstevel@tonic-gate }
1997c478bd9Sstevel@tonic-gate 
2007c478bd9Sstevel@tonic-gate static int
2017c478bd9Sstevel@tonic-gate mntfs_optprint(struct vfs *vfsp, char *buf)
2027c478bd9Sstevel@tonic-gate {
2037c478bd9Sstevel@tonic-gate 	int i, optinbuf = 0;
2047c478bd9Sstevel@tonic-gate 	mntopt_t *mop;
2057c478bd9Sstevel@tonic-gate 	char *origbuf = buf;
2067c478bd9Sstevel@tonic-gate 
2077c478bd9Sstevel@tonic-gate 	for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) {
2087c478bd9Sstevel@tonic-gate 		mop = &vfsp->vfs_mntopts.mo_list[i];
2097c478bd9Sstevel@tonic-gate 		if (mop->mo_flags & MO_NODISPLAY)
2107c478bd9Sstevel@tonic-gate 			continue;
2117c478bd9Sstevel@tonic-gate 		if (mop->mo_flags & MO_SET) {
2127c478bd9Sstevel@tonic-gate 			if (optinbuf)
2137c478bd9Sstevel@tonic-gate 				*buf++ = ',';
2147c478bd9Sstevel@tonic-gate 			else
2157c478bd9Sstevel@tonic-gate 				optinbuf = 1;
2167c478bd9Sstevel@tonic-gate 			buf += snprintf(buf, MAX_MNTOPT_STR,
2177c478bd9Sstevel@tonic-gate 			    "%s", mop->mo_name);
2187c478bd9Sstevel@tonic-gate 			/*
2197c478bd9Sstevel@tonic-gate 			 * print option value if there is one
2207c478bd9Sstevel@tonic-gate 			 */
2217c478bd9Sstevel@tonic-gate 			if (mop->mo_arg != NULL) {
2227c478bd9Sstevel@tonic-gate 				buf += snprintf(buf, MAX_MNTOPT_STR, "=%s",
2237c478bd9Sstevel@tonic-gate 				    mop->mo_arg);
2247c478bd9Sstevel@tonic-gate 			}
2257c478bd9Sstevel@tonic-gate 		}
2267c478bd9Sstevel@tonic-gate 	}
2277c478bd9Sstevel@tonic-gate 	if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) {
2287c478bd9Sstevel@tonic-gate 		if (optinbuf)
2297c478bd9Sstevel@tonic-gate 			*buf++ = ',';
2307c478bd9Sstevel@tonic-gate 		else
2317c478bd9Sstevel@tonic-gate 			optinbuf = 1;
2327c478bd9Sstevel@tonic-gate 		buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s",
2337c478bd9Sstevel@tonic-gate 		    vfsp->vfs_zone->zone_name);
2347c478bd9Sstevel@tonic-gate 	}
2357c478bd9Sstevel@tonic-gate 	if (mntfs_enabledev) {
2367c478bd9Sstevel@tonic-gate 		if (optinbuf++)
2377c478bd9Sstevel@tonic-gate 			*buf++ = ',';
2387c478bd9Sstevel@tonic-gate 		buf += mntfs_devprint(vfsp, buf);
2397c478bd9Sstevel@tonic-gate 	}
2407c478bd9Sstevel@tonic-gate 	if (!optinbuf) {
2417c478bd9Sstevel@tonic-gate 		buf += snprintf(buf, MAX_MNTOPT_STR, "-");
2427c478bd9Sstevel@tonic-gate 	}
2437c478bd9Sstevel@tonic-gate 	return (buf - origbuf);
2447c478bd9Sstevel@tonic-gate }
2457c478bd9Sstevel@tonic-gate 
246835ee219SRobert Harris void
247835ee219SRobert Harris mntfs_populate_text(vfs_t *vfsp, zone_t *zonep, mntelem_t *elemp)
248835ee219SRobert Harris {
249835ee219SRobert Harris 	struct extmnttab *tabp = &elemp->mnte_tab;
250835ee219SRobert Harris 	const char *resource, *mntpt;
251835ee219SRobert Harris 	char *cp = elemp->mnte_text;
252835ee219SRobert Harris 	mntpt = refstr_value(vfsp->vfs_mntpt);
253835ee219SRobert Harris 	resource = refstr_value(vfsp->vfs_resource);
254835ee219SRobert Harris 
255835ee219SRobert Harris 	tabp->mnt_special = 0;
256835ee219SRobert Harris 	if (resource != NULL && resource[0] != '\0') {
257835ee219SRobert Harris 		if (resource[0] != '/') {
258835ee219SRobert Harris 			cp += snprintf(cp, MAXPATHLEN, "%s\t", resource);
259835ee219SRobert Harris 		} else if (!ZONE_PATH_VISIBLE(resource, zonep)) {
260835ee219SRobert Harris 			/*
261835ee219SRobert Harris 			 * Use the mount point as the resource.
262835ee219SRobert Harris 			 */
263835ee219SRobert Harris 			cp += snprintf(cp, MAXPATHLEN, "%s\t",
264835ee219SRobert Harris 			    ZONE_PATH_TRANSLATE(mntpt, zonep));
265835ee219SRobert Harris 		} else {
266835ee219SRobert Harris 			cp += snprintf(cp, MAXPATHLEN, "%s\t",
267835ee219SRobert Harris 			    ZONE_PATH_TRANSLATE(resource, zonep));
268835ee219SRobert Harris 		}
269835ee219SRobert Harris 	} else {
270835ee219SRobert Harris 		cp += snprintf(cp, MAXPATHLEN, "-\t");
271835ee219SRobert Harris 	}
272835ee219SRobert Harris 
273835ee219SRobert Harris 	tabp->mnt_mountp = (char *)(cp - elemp->mnte_text);
274835ee219SRobert Harris 	if (mntpt != NULL && mntpt[0] != '\0') {
275835ee219SRobert Harris 		/*
276835ee219SRobert Harris 		 * We know the mount point is visible from within the zone,
277835ee219SRobert Harris 		 * otherwise it wouldn't be on the zone's vfs list.
278835ee219SRobert Harris 		 */
279835ee219SRobert Harris 		cp += snprintf(cp, MAXPATHLEN, "%s\t",
280835ee219SRobert Harris 		    ZONE_PATH_TRANSLATE(mntpt, zonep));
281835ee219SRobert Harris 	} else {
282835ee219SRobert Harris 		cp += snprintf(cp, MAXPATHLEN, "-\t");
283835ee219SRobert Harris 	}
284835ee219SRobert Harris 
285835ee219SRobert Harris 	tabp->mnt_fstype = (char *)(cp - elemp->mnte_text);
286835ee219SRobert Harris 	cp += snprintf(cp, MAXPATHLEN, "%s\t",
287835ee219SRobert Harris 	    vfssw[vfsp->vfs_fstype].vsw_name);
288835ee219SRobert Harris 
289835ee219SRobert Harris 	tabp->mnt_mntopts = (char *)(cp - elemp->mnte_text);
290835ee219SRobert Harris 	cp += mntfs_optprint(vfsp, cp);
291835ee219SRobert Harris 	*cp++ = '\t';
292835ee219SRobert Harris 
293835ee219SRobert Harris 	tabp->mnt_time = (char *)(cp - elemp->mnte_text);
294835ee219SRobert Harris 	cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime);
295835ee219SRobert Harris 	*cp++ = '\n'; /* over-write snprintf's trailing null-byte */
296835ee219SRobert Harris 
297835ee219SRobert Harris 	tabp->mnt_major = getmajor(vfsp->vfs_dev);
298835ee219SRobert Harris 	tabp->mnt_minor = getminor(vfsp->vfs_dev);
299835ee219SRobert Harris 
300835ee219SRobert Harris 	elemp->mnte_text_size = cp - elemp->mnte_text;
301835ee219SRobert Harris 	elemp->mnte_vfs_ctime = vfsp->vfs_hrctime;
302835ee219SRobert Harris 	elemp->mnte_hidden = vfsp->vfs_flag & VFS_NOMNTTAB;
303835ee219SRobert Harris }
304835ee219SRobert Harris 
305835ee219SRobert Harris /* Determine the length of the /etc/mnttab entry for this vfs_t. */
3067c478bd9Sstevel@tonic-gate static size_t
307835ee219SRobert Harris mntfs_text_len(vfs_t *vfsp, zone_t *zone)
3087c478bd9Sstevel@tonic-gate {
3097c478bd9Sstevel@tonic-gate 	size_t size = 0;
3107c478bd9Sstevel@tonic-gate 	const char *resource, *mntpt;
311835ee219SRobert Harris 	size_t mntsize;
3127c478bd9Sstevel@tonic-gate 
3137c478bd9Sstevel@tonic-gate 	mntpt = refstr_value(vfsp->vfs_mntpt);
3147c478bd9Sstevel@tonic-gate 	if (mntpt != NULL && mntpt[0] != '\0') {
315835ee219SRobert Harris 		mntsize = strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1;
3167c478bd9Sstevel@tonic-gate 	} else {
317835ee219SRobert Harris 		mntsize = 2;	/* "-\t" */
3187c478bd9Sstevel@tonic-gate 	}
319835ee219SRobert Harris 	size += mntsize;
3207c478bd9Sstevel@tonic-gate 
3217c478bd9Sstevel@tonic-gate 	resource = refstr_value(vfsp->vfs_resource);
3227c478bd9Sstevel@tonic-gate 	if (resource != NULL && resource[0] != '\0') {
3237c478bd9Sstevel@tonic-gate 		if (resource[0] != '/') {
3247c478bd9Sstevel@tonic-gate 			size += strlen(resource) + 1;
3257c478bd9Sstevel@tonic-gate 		} else if (!ZONE_PATH_VISIBLE(resource, zone)) {
3267c478bd9Sstevel@tonic-gate 			/*
3277c478bd9Sstevel@tonic-gate 			 * Same as the zone's view of the mount point.
3287c478bd9Sstevel@tonic-gate 			 */
329835ee219SRobert Harris 			size += mntsize;
3307c478bd9Sstevel@tonic-gate 		} else {
3317c478bd9Sstevel@tonic-gate 			size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1;
3327c478bd9Sstevel@tonic-gate 		}
3337c478bd9Sstevel@tonic-gate 	} else {
334835ee219SRobert Harris 		size += 2;	/* "-\t" */
3357c478bd9Sstevel@tonic-gate 	}
3367c478bd9Sstevel@tonic-gate 	size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1;
3377c478bd9Sstevel@tonic-gate 	size += mntfs_optsize(vfsp);
3387c478bd9Sstevel@tonic-gate 	size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime);
3397c478bd9Sstevel@tonic-gate 	return (size);
3407c478bd9Sstevel@tonic-gate }
3417c478bd9Sstevel@tonic-gate 
342835ee219SRobert Harris /* Destroy the resources associated with a snapshot element. */
3437c478bd9Sstevel@tonic-gate static void
344835ee219SRobert Harris mntfs_destroy_elem(mntelem_t *elemp)
3457c478bd9Sstevel@tonic-gate {
346835ee219SRobert Harris 	kmem_free(elemp->mnte_text, elemp->mnte_text_size);
347835ee219SRobert Harris 	kmem_free(elemp, sizeof (mntelem_t));
3487c478bd9Sstevel@tonic-gate }
3497c478bd9Sstevel@tonic-gate 
3507c478bd9Sstevel@tonic-gate /*
351835ee219SRobert Harris  * Return 1 if the given snapshot is in the range of the given element; return
352835ee219SRobert Harris  * 0 otherwise.
3537c478bd9Sstevel@tonic-gate  */
3547c478bd9Sstevel@tonic-gate static int
355835ee219SRobert Harris mntfs_elem_in_range(mntsnap_t *snapp, mntelem_t *elemp)
3567c478bd9Sstevel@tonic-gate {
357835ee219SRobert Harris 	timespec_t	*stimep = &snapp->mnts_time;
358835ee219SRobert Harris 	timespec_t	*btimep = &elemp->mnte_birth;
359835ee219SRobert Harris 	timespec_t	*dtimep = &elemp->mnte_death;
3607c478bd9Sstevel@tonic-gate 
3617c478bd9Sstevel@tonic-gate 	/*
362835ee219SRobert Harris 	 * If a snapshot is in range of an element then the snapshot must have
363835ee219SRobert Harris 	 * been created after the birth of the element, and either the element
364835ee219SRobert Harris 	 * is still alive or it died after the snapshot was created.
3657c478bd9Sstevel@tonic-gate 	 */
366835ee219SRobert Harris 	if (mntfs_newest(btimep, stimep) == MNTFS_SECOND &&
367835ee219SRobert Harris 	    (MNTFS_ELEM_IS_ALIVE(elemp) ||
368835ee219SRobert Harris 	    mntfs_newest(stimep, dtimep) == MNTFS_SECOND))
369835ee219SRobert Harris 		return (1);
370835ee219SRobert Harris 	else
3717c478bd9Sstevel@tonic-gate 		return (0);
3727c478bd9Sstevel@tonic-gate }
3737c478bd9Sstevel@tonic-gate 
3747c478bd9Sstevel@tonic-gate /*
375835ee219SRobert Harris  * Return the next valid database element, after the one provided, for a given
376835ee219SRobert Harris  * snapshot; return NULL if none exists. The caller must hold the zone's
377835ee219SRobert Harris  * database lock as a reader before calling this function.
3787c478bd9Sstevel@tonic-gate  */
379835ee219SRobert Harris static mntelem_t *
380835ee219SRobert Harris mntfs_get_next_elem(mntsnap_t *snapp, mntelem_t *elemp)
381835ee219SRobert Harris {
382835ee219SRobert Harris 	int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN;
3837c478bd9Sstevel@tonic-gate 
384835ee219SRobert Harris 	do {
385835ee219SRobert Harris 		elemp = elemp->mnte_next;
386835ee219SRobert Harris 	} while (elemp &&
387835ee219SRobert Harris 	    (!mntfs_elem_in_range(snapp, elemp) ||
388835ee219SRobert Harris 	    (!show_hidden && elemp->mnte_hidden)));
389835ee219SRobert Harris 	return (elemp);
3907c478bd9Sstevel@tonic-gate }
3917c478bd9Sstevel@tonic-gate 
392835ee219SRobert Harris /*
393835ee219SRobert Harris  * This function frees the resources associated with a mntsnap_t. It walks
394835ee219SRobert Harris  * through the database, decrementing the reference count of any element that
395835ee219SRobert Harris  * satisfies the snapshot. If the reference count of an element becomes zero
396835ee219SRobert Harris  * then it is removed from the database.
397835ee219SRobert Harris  */
398835ee219SRobert Harris static void
399835ee219SRobert Harris mntfs_freesnap(mntnode_t *mnp, mntsnap_t *snapp)
400835ee219SRobert Harris {
401a19609f8Sjv227347 	zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone;
402835ee219SRobert Harris 	krwlock_t *dblockp = &zonep->zone_mntfs_db_lock;
403835ee219SRobert Harris 	mntelem_t **elempp = &zonep->zone_mntfs_db;
404835ee219SRobert Harris 	mntelem_t *elemp;
405835ee219SRobert Harris 	int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN;
406835ee219SRobert Harris 	size_t number_decremented = 0;
4077c478bd9Sstevel@tonic-gate 
408835ee219SRobert Harris 	ASSERT(RW_WRITE_HELD(&mnp->mnt_contents));
409835ee219SRobert Harris 
410835ee219SRobert Harris 	/* Ignore an uninitialised snapshot. */
411835ee219SRobert Harris 	if (snapp->mnts_nmnts == 0)
412835ee219SRobert Harris 		return;
413835ee219SRobert Harris 
414835ee219SRobert Harris 	/* Drop the holds on any matching database elements. */
415835ee219SRobert Harris 	rw_enter(dblockp, RW_WRITER);
416835ee219SRobert Harris 	while ((elemp = *elempp) != NULL) {
417835ee219SRobert Harris 		if (mntfs_elem_in_range(snapp, elemp) &&
418835ee219SRobert Harris 		    (!elemp->mnte_hidden || show_hidden) &&
419835ee219SRobert Harris 		    ++number_decremented && --elemp->mnte_refcnt == 0) {
420835ee219SRobert Harris 			if ((*elempp = elemp->mnte_next) != NULL)
421835ee219SRobert Harris 				(*elempp)->mnte_prev = elemp->mnte_prev;
422835ee219SRobert Harris 			mntfs_destroy_elem(elemp);
4237c478bd9Sstevel@tonic-gate 		} else {
424835ee219SRobert Harris 			elempp = &elemp->mnte_next;
425835ee219SRobert Harris 		}
426835ee219SRobert Harris 	}
427835ee219SRobert Harris 	rw_exit(dblockp);
428835ee219SRobert Harris 	ASSERT(number_decremented == snapp->mnts_nmnts);
429835ee219SRobert Harris 
430835ee219SRobert Harris 	/* Clear the snapshot data. */
431835ee219SRobert Harris 	bzero(snapp, sizeof (mntsnap_t));
4327c478bd9Sstevel@tonic-gate }
4337c478bd9Sstevel@tonic-gate 
434835ee219SRobert Harris /* Insert the new database element newp after the existing element prevp. */
435835ee219SRobert Harris static void
436835ee219SRobert Harris mntfs_insert_after(mntelem_t *newp, mntelem_t *prevp)
437835ee219SRobert Harris {
438835ee219SRobert Harris 	newp->mnte_prev = prevp;
439835ee219SRobert Harris 	newp->mnte_next = prevp->mnte_next;
440835ee219SRobert Harris 	prevp->mnte_next = newp;
441835ee219SRobert Harris 	if (newp->mnte_next != NULL)
442835ee219SRobert Harris 		newp->mnte_next->mnte_prev = newp;
4437c478bd9Sstevel@tonic-gate }
4447c478bd9Sstevel@tonic-gate 
445835ee219SRobert Harris /* Create and return a copy of a given database element. */
446835ee219SRobert Harris static mntelem_t *
447835ee219SRobert Harris mntfs_copy(mntelem_t *origp)
448835ee219SRobert Harris {
449835ee219SRobert Harris 	mntelem_t *copyp;
4507c478bd9Sstevel@tonic-gate 
451835ee219SRobert Harris 	copyp = kmem_zalloc(sizeof (mntelem_t), KM_SLEEP);
452835ee219SRobert Harris 	copyp->mnte_vfs_ctime = origp->mnte_vfs_ctime;
453835ee219SRobert Harris 	copyp->mnte_text_size = origp->mnte_text_size;
454835ee219SRobert Harris 	copyp->mnte_text = kmem_alloc(copyp->mnte_text_size, KM_SLEEP);
455835ee219SRobert Harris 	bcopy(origp->mnte_text, copyp->mnte_text, copyp->mnte_text_size);
456835ee219SRobert Harris 	copyp->mnte_tab = origp->mnte_tab;
457835ee219SRobert Harris 	copyp->mnte_hidden = origp->mnte_hidden;
458835ee219SRobert Harris 
459835ee219SRobert Harris 	return (copyp);
4607c478bd9Sstevel@tonic-gate }
461835ee219SRobert Harris 
462835ee219SRobert Harris /*
463835ee219SRobert Harris  * Compare two database elements and determine whether or not the vfs_t payload
464835ee219SRobert Harris  * data of each are the same. Return 1 if so and 0 otherwise.
465835ee219SRobert Harris  */
466835ee219SRobert Harris static int
467835ee219SRobert Harris mntfs_is_same_element(mntelem_t *a, mntelem_t *b)
468835ee219SRobert Harris {
469835ee219SRobert Harris 	if (a->mnte_hidden == b->mnte_hidden &&
470835ee219SRobert Harris 	    a->mnte_text_size == b->mnte_text_size &&
471835ee219SRobert Harris 	    bcmp(a->mnte_text, b->mnte_text, a->mnte_text_size) == 0 &&
472835ee219SRobert Harris 	    bcmp(&a->mnte_tab, &b->mnte_tab, sizeof (struct extmnttab)) == 0)
473835ee219SRobert Harris 		return (1);
474835ee219SRobert Harris 	else
4757c478bd9Sstevel@tonic-gate 		return (0);
4767c478bd9Sstevel@tonic-gate }
4777c478bd9Sstevel@tonic-gate 
4787c478bd9Sstevel@tonic-gate /*
479835ee219SRobert Harris  * mntfs_snapshot() updates the database, creating it if necessary, so that it
480835ee219SRobert Harris  * accurately reflects the state of the in-kernel mnttab. It also increments
481835ee219SRobert Harris  * the reference count on all database elements that correspond to currently-
482835ee219SRobert Harris  * mounted resources. Finally, it initialises the appropriate snapshot
483835ee219SRobert Harris  * structure.
484835ee219SRobert Harris  *
485835ee219SRobert Harris  * Each vfs_t is given a high-resolution time stamp, for the benefit of mntfs,
486835ee219SRobert Harris  * when it is inserted into the in-kernel mnttab. This time stamp is copied into
487835ee219SRobert Harris  * the corresponding database element when it is created, allowing the element
488835ee219SRobert Harris  * and the vfs_t to be identified as a pair. It is possible that some file
489835ee219SRobert Harris  * systems may make unadvertised changes to, for example, a resource's mount
490835ee219SRobert Harris  * options. Therefore, in order to determine whether a database element is an
491835ee219SRobert Harris  * up-to-date representation of a given vfs_t, it is compared with a temporary
492835ee219SRobert Harris  * element generated for this purpose. Although less efficient, this is safer
493835ee219SRobert Harris  * than implementing an mtime for a vfs_t.
494835ee219SRobert Harris  *
495835ee219SRobert Harris  * Some mounted resources are marked as "hidden" with a VFS_NOMNTTAB flag. These
496835ee219SRobert Harris  * are considered invisible unless the user has already set the MNT_SHOWHIDDEN
497835ee219SRobert Harris  * flag in the vnode using the MNTIOC_SHOWHIDDEN ioctl.
498835ee219SRobert Harris  */
499835ee219SRobert Harris static void
500835ee219SRobert Harris mntfs_snapshot(mntnode_t *mnp, mntsnap_t *snapp)
501835ee219SRobert Harris {
5025545576aSRobert Harris 	mntdata_t	*mnd = MTOD(mnp);
503a19609f8Sjv227347 	zone_t		*zonep = mnd->mnt_zone_ref.zref_zone;
504835ee219SRobert Harris 	int		is_global_zone = (zonep == global_zone);
505835ee219SRobert Harris 	int		show_hidden = mnp->mnt_flags & MNT_SHOWHIDDEN;
506835ee219SRobert Harris 	vfs_t		*vfsp, *firstvfsp, *lastvfsp;
507835ee219SRobert Harris 	vfs_t		dummyvfs;
508835ee219SRobert Harris 	vfs_t		*dummyvfsp = NULL;
509835ee219SRobert Harris 	krwlock_t	*dblockp = &zonep->zone_mntfs_db_lock;
510835ee219SRobert Harris 	mntelem_t	**headpp = &zonep->zone_mntfs_db;
511835ee219SRobert Harris 	mntelem_t	*elemp;
512835ee219SRobert Harris 	mntelem_t	*prevp = NULL;
513835ee219SRobert Harris 	int		order;
514835ee219SRobert Harris 	mntelem_t	*tempelemp;
515835ee219SRobert Harris 	mntelem_t	*newp;
516835ee219SRobert Harris 	mntelem_t	*firstp = NULL;
517835ee219SRobert Harris 	size_t		nmnts = 0;
5185545576aSRobert Harris 	size_t		total_text_size = 0;
5195545576aSRobert Harris 	size_t		normal_text_size = 0;
520835ee219SRobert Harris 	int		insert_before;
521835ee219SRobert Harris 	timespec_t	last_mtime;
522835ee219SRobert Harris 	size_t		entry_length, new_entry_length;
523835ee219SRobert Harris 
524835ee219SRobert Harris 
525835ee219SRobert Harris 	ASSERT(RW_WRITE_HELD(&mnp->mnt_contents));
526835ee219SRobert Harris 	vfs_list_read_lock();
527835ee219SRobert Harris 	vfs_mnttab_modtime(&last_mtime);
528835ee219SRobert Harris 
529835ee219SRobert Harris 	/*
530835ee219SRobert Harris 	 * If this snapshot already exists then we must have been asked to
531835ee219SRobert Harris 	 * rewind the file, i.e. discard the snapshot and create a new one in
532835ee219SRobert Harris 	 * its place. In this case we first see if the in-kernel mnttab has
533835ee219SRobert Harris 	 * advertised a change; if not then we simply reinitialise the metadata.
534835ee219SRobert Harris 	 */
535835ee219SRobert Harris 	if (snapp->mnts_nmnts) {
536835ee219SRobert Harris 		if (mntfs_newest(&last_mtime, &snapp->mnts_last_mtime) ==
537835ee219SRobert Harris 		    MNTFS_NEITHER) {
538835ee219SRobert Harris 			/*
539835ee219SRobert Harris 			 * An unchanged mtime is no guarantee that the
540835ee219SRobert Harris 			 * in-kernel mnttab is unchanged; for example, a
541835ee219SRobert Harris 			 * concurrent remount may be between calls to
542835ee219SRobert Harris 			 * vfs_setmntopt_nolock() and vfs_mnttab_modtimeupd().
543835ee219SRobert Harris 			 * It follows that the database may have changed, and
544835ee219SRobert Harris 			 * in particular that some elements in this snapshot
545835ee219SRobert Harris 			 * may have been killed by another call to
546835ee219SRobert Harris 			 * mntfs_snapshot(). It is therefore not merely
547835ee219SRobert Harris 			 * unnecessary to update the snapshot's time but in
548835ee219SRobert Harris 			 * fact dangerous; it needs to be left alone.
549835ee219SRobert Harris 			 */
550835ee219SRobert Harris 			snapp->mnts_next = snapp->mnts_first;
551835ee219SRobert Harris 			snapp->mnts_flags &= ~MNTS_REWIND;
552835ee219SRobert Harris 			snapp->mnts_foffset = snapp->mnts_ieoffset = 0;
553835ee219SRobert Harris 			vfs_list_unlock();
554835ee219SRobert Harris 			return;
555835ee219SRobert Harris 		} else {
556835ee219SRobert Harris 			mntfs_freesnap(mnp, snapp);
557835ee219SRobert Harris 		}
558835ee219SRobert Harris 	}
559835ee219SRobert Harris 
560835ee219SRobert Harris 	/*
561835ee219SRobert Harris 	 * Create a temporary database element. For each vfs_t, the temporary
562835ee219SRobert Harris 	 * element will be populated with the corresponding text. If the vfs_t
563835ee219SRobert Harris 	 * does not have a corresponding element within the database, or if
564835ee219SRobert Harris 	 * there is such an element but it is stale, a copy of the temporary
565835ee219SRobert Harris 	 * element is inserted into the database at the appropriate location.
566835ee219SRobert Harris 	 */
567835ee219SRobert Harris 	tempelemp = kmem_alloc(sizeof (mntelem_t), KM_SLEEP);
568835ee219SRobert Harris 	entry_length = MNT_LINE_MAX;
569835ee219SRobert Harris 	tempelemp->mnte_text = kmem_alloc(entry_length, KM_SLEEP);
570835ee219SRobert Harris 
571835ee219SRobert Harris 	/* Find the first and last vfs_t for the given zone. */
572835ee219SRobert Harris 	if (is_global_zone) {
573835ee219SRobert Harris 		firstvfsp = rootvfs;
574835ee219SRobert Harris 		lastvfsp = firstvfsp->vfs_prev;
575835ee219SRobert Harris 	} else {
576835ee219SRobert Harris 		firstvfsp = zonep->zone_vfslist;
577835ee219SRobert Harris 		/*
578835ee219SRobert Harris 		 * If there isn't already a vfs_t for root then we create a
579835ee219SRobert Harris 		 * dummy which will be used as the head of the list (which will
580835ee219SRobert Harris 		 * therefore no longer be circular).
581835ee219SRobert Harris 		 */
582835ee219SRobert Harris 		if (firstvfsp == NULL ||
583835ee219SRobert Harris 		    strcmp(refstr_value(firstvfsp->vfs_mntpt),
584835ee219SRobert Harris 		    zonep->zone_rootpath) != 0) {
585835ee219SRobert Harris 			/*
586835ee219SRobert Harris 			 * The zone's vfs_ts will have mount points relative to
587835ee219SRobert Harris 			 * the zone's root path. The vfs_t for the zone's
588835ee219SRobert Harris 			 * root file system would therefore have a mount point
589835ee219SRobert Harris 			 * equal to the zone's root path. Since the zone's root
590835ee219SRobert Harris 			 * path isn't a mount point, we copy the vfs_t of the
591835ee219SRobert Harris 			 * zone's root vnode, and provide it with a fake mount
5925010b7f7SLori Alt 			 * and resource. However, if the zone's root is a
5935010b7f7SLori Alt 			 * zfs dataset, use the dataset name as the resource.
594835ee219SRobert Harris 			 *
595835ee219SRobert Harris 			 * Note that by cloning another vfs_t we also acquire
596835ee219SRobert Harris 			 * its high-resolution ctime. This might appear to
597835ee219SRobert Harris 			 * violate the requirement that the ctimes in the list
598835ee219SRobert Harris 			 * of vfs_ts are unique and monotonically increasing;
599835ee219SRobert Harris 			 * this is not the case. The dummy vfs_t appears in only
600835ee219SRobert Harris 			 * a non-global zone's vfs_t list, where the cloned
601835ee219SRobert Harris 			 * vfs_t would not ordinarily be visible; the ctimes are
602835ee219SRobert Harris 			 * therefore unique. The zone's root path must be
603835ee219SRobert Harris 			 * available before the zone boots, and so its root
604835ee219SRobert Harris 			 * vnode's vfs_t's ctime must be lower than those of any
605835ee219SRobert Harris 			 * resources subsequently mounted by the zone. The
606835ee219SRobert Harris 			 * ctimes are therefore monotonically increasing.
607835ee219SRobert Harris 			 */
608835ee219SRobert Harris 			dummyvfs = *zonep->zone_rootvp->v_vfsp;
609835ee219SRobert Harris 			dummyvfs.vfs_mntpt = refstr_alloc(zonep->zone_rootpath);
6105010b7f7SLori Alt 			if (strcmp(vfssw[dummyvfs.vfs_fstype].vsw_name, "zfs")
6115010b7f7SLori Alt 			    != 0)
612835ee219SRobert Harris 				dummyvfs.vfs_resource = dummyvfs.vfs_mntpt;
613835ee219SRobert Harris 			dummyvfsp = &dummyvfs;
614835ee219SRobert Harris 			if (firstvfsp == NULL) {
615835ee219SRobert Harris 				lastvfsp = dummyvfsp;
616835ee219SRobert Harris 			} else {
617835ee219SRobert Harris 				lastvfsp = firstvfsp->vfs_zone_prev;
618835ee219SRobert Harris 				dummyvfsp->vfs_zone_next = firstvfsp;
619835ee219SRobert Harris 			}
620835ee219SRobert Harris 			firstvfsp = dummyvfsp;
621835ee219SRobert Harris 		} else {
622835ee219SRobert Harris 			lastvfsp = firstvfsp->vfs_zone_prev;
623835ee219SRobert Harris 		}
624835ee219SRobert Harris 	}
625835ee219SRobert Harris 
626835ee219SRobert Harris 	/*
627835ee219SRobert Harris 	 * Now walk through all the vfs_ts for this zone. For each one, find the
628835ee219SRobert Harris 	 * corresponding database element, creating it first if necessary, and
629835ee219SRobert Harris 	 * increment its reference count.
630835ee219SRobert Harris 	 */
631835ee219SRobert Harris 	rw_enter(dblockp, RW_WRITER);
632835ee219SRobert Harris 	elemp = zonep->zone_mntfs_db;
633835ee219SRobert Harris 	/* CSTYLED */
634835ee219SRobert Harris 	for (vfsp = firstvfsp;;
635835ee219SRobert Harris 	    vfsp = is_global_zone ? vfsp->vfs_next : vfsp->vfs_zone_next) {
636835ee219SRobert Harris 		DTRACE_PROBE1(new__vfs, vfs_t *, vfsp);
637835ee219SRobert Harris 		/* Consider only visible entries. */
638835ee219SRobert Harris 		if ((vfsp->vfs_flag & VFS_NOMNTTAB) == 0 || show_hidden) {
639835ee219SRobert Harris 			/*
640835ee219SRobert Harris 			 * Walk through the existing database looking for either
641835ee219SRobert Harris 			 * an element that matches the current vfs_t, or for the
642835ee219SRobert Harris 			 * correct place in which to insert a new element.
643835ee219SRobert Harris 			 */
644835ee219SRobert Harris 			insert_before = 0;
645835ee219SRobert Harris 			for (; elemp; prevp = elemp, elemp = elemp->mnte_next) {
646835ee219SRobert Harris 				DTRACE_PROBE1(considering__elem, mntelem_t *,
647835ee219SRobert Harris 				    elemp);
648835ee219SRobert Harris 
649835ee219SRobert Harris 				/* Compare the vfs_t with the element. */
650835ee219SRobert Harris 				order = mntfs_newest(&elemp->mnte_vfs_ctime,
651835ee219SRobert Harris 				    &vfsp->vfs_hrctime);
652835ee219SRobert Harris 
653835ee219SRobert Harris 				/*
654835ee219SRobert Harris 				 * If we encounter a database element newer than
655835ee219SRobert Harris 				 * this vfs_t then we've stepped over a gap
656835ee219SRobert Harris 				 * where the element for this vfs_t must be
657835ee219SRobert Harris 				 * inserted.
658835ee219SRobert Harris 				 */
659835ee219SRobert Harris 				if (order == MNTFS_FIRST) {
660835ee219SRobert Harris 					insert_before = 1;
661835ee219SRobert Harris 					break;
662835ee219SRobert Harris 				}
663835ee219SRobert Harris 
664835ee219SRobert Harris 				/* Dead elements no longer interest us. */
665835ee219SRobert Harris 				if (MNTFS_ELEM_IS_DEAD(elemp))
666835ee219SRobert Harris 					continue;
667835ee219SRobert Harris 
668835ee219SRobert Harris 				/*
669835ee219SRobert Harris 				 * If the time stamps are the same then the
670835ee219SRobert Harris 				 * element is potential match for the vfs_t,
671835ee219SRobert Harris 				 * although it may later prove to be stale.
672835ee219SRobert Harris 				 */
673835ee219SRobert Harris 				if (order == MNTFS_NEITHER)
674835ee219SRobert Harris 					break;
675835ee219SRobert Harris 
676835ee219SRobert Harris 				/*
677835ee219SRobert Harris 				 * This element must be older than the vfs_t.
678835ee219SRobert Harris 				 * It must, therefore, correspond to a vfs_t
679835ee219SRobert Harris 				 * that has been unmounted. Since the element is
680835ee219SRobert Harris 				 * still alive, we kill it if it is visible.
681835ee219SRobert Harris 				 */
682835ee219SRobert Harris 				if (!elemp->mnte_hidden || show_hidden)
683835ee219SRobert Harris 					vfs_mono_time(&elemp->mnte_death);
684835ee219SRobert Harris 			}
685835ee219SRobert Harris 			DTRACE_PROBE2(possible__match, vfs_t *, vfsp,
686835ee219SRobert Harris 			    mntelem_t *, elemp);
687835ee219SRobert Harris 
688835ee219SRobert Harris 			/* Create a new database element if required. */
689835ee219SRobert Harris 			new_entry_length = mntfs_text_len(vfsp, zonep);
690835ee219SRobert Harris 			if (new_entry_length > entry_length) {
691835ee219SRobert Harris 				kmem_free(tempelemp->mnte_text, entry_length);
692835ee219SRobert Harris 				tempelemp->mnte_text =
693835ee219SRobert Harris 				    kmem_alloc(new_entry_length, KM_SLEEP);
694835ee219SRobert Harris 				entry_length = new_entry_length;
695835ee219SRobert Harris 			}
696835ee219SRobert Harris 			mntfs_populate_text(vfsp, zonep, tempelemp);
697835ee219SRobert Harris 			ASSERT(tempelemp->mnte_text_size == new_entry_length);
698835ee219SRobert Harris 			if (elemp == NULL) {
699835ee219SRobert Harris 				/*
700835ee219SRobert Harris 				 * We ran off the end of the database. Insert a
701835ee219SRobert Harris 				 * new element at the end.
702835ee219SRobert Harris 				 */
703835ee219SRobert Harris 				newp = mntfs_copy(tempelemp);
704835ee219SRobert Harris 				vfs_mono_time(&newp->mnte_birth);
705835ee219SRobert Harris 				if (prevp) {
706835ee219SRobert Harris 					mntfs_insert_after(newp, prevp);
707835ee219SRobert Harris 				} else {
708835ee219SRobert Harris 					newp->mnte_next = NULL;
709835ee219SRobert Harris 					newp->mnte_prev = NULL;
710835ee219SRobert Harris 					ASSERT(*headpp == NULL);
711835ee219SRobert Harris 					*headpp = newp;
712835ee219SRobert Harris 				}
713835ee219SRobert Harris 				elemp = newp;
714835ee219SRobert Harris 			} else if (insert_before) {
715835ee219SRobert Harris 				/*
716835ee219SRobert Harris 				 * Insert a new element before the current one.
717835ee219SRobert Harris 				 */
718835ee219SRobert Harris 				newp = mntfs_copy(tempelemp);
719835ee219SRobert Harris 				vfs_mono_time(&newp->mnte_birth);
720835ee219SRobert Harris 				if (prevp) {
721835ee219SRobert Harris 					mntfs_insert_after(newp, prevp);
722835ee219SRobert Harris 				} else {
723835ee219SRobert Harris 					newp->mnte_next = elemp;
724835ee219SRobert Harris 					newp->mnte_prev = NULL;
725835ee219SRobert Harris 					elemp->mnte_prev = newp;
726835ee219SRobert Harris 					ASSERT(*headpp == elemp);
727835ee219SRobert Harris 					*headpp = newp;
728835ee219SRobert Harris 				}
729835ee219SRobert Harris 				elemp = newp;
730835ee219SRobert Harris 			} else if (!mntfs_is_same_element(elemp, tempelemp)) {
731835ee219SRobert Harris 				/*
732835ee219SRobert Harris 				 * The element corresponds to the vfs_t, but the
733835ee219SRobert Harris 				 * vfs_t has changed; it must have been
734835ee219SRobert Harris 				 * remounted. Kill the old element and insert a
735835ee219SRobert Harris 				 * new one after it.
736835ee219SRobert Harris 				 */
737835ee219SRobert Harris 				vfs_mono_time(&elemp->mnte_death);
738835ee219SRobert Harris 				newp = mntfs_copy(tempelemp);
739835ee219SRobert Harris 				vfs_mono_time(&newp->mnte_birth);
740835ee219SRobert Harris 				mntfs_insert_after(newp, elemp);
741835ee219SRobert Harris 				elemp = newp;
742835ee219SRobert Harris 			}
743835ee219SRobert Harris 
744835ee219SRobert Harris 			/* We've found the corresponding element. Hold it. */
745835ee219SRobert Harris 			DTRACE_PROBE1(incrementing, mntelem_t *, elemp);
746835ee219SRobert Harris 			elemp->mnte_refcnt++;
747835ee219SRobert Harris 
748835ee219SRobert Harris 			/*
749835ee219SRobert Harris 			 * Update the parameters used to initialise the
750835ee219SRobert Harris 			 * snapshot.
751835ee219SRobert Harris 			 */
752835ee219SRobert Harris 			nmnts++;
7535545576aSRobert Harris 			total_text_size += elemp->mnte_text_size;
7545545576aSRobert Harris 			if (!elemp->mnte_hidden)
7555545576aSRobert Harris 				normal_text_size += elemp->mnte_text_size;
756835ee219SRobert Harris 			if (!firstp)
757835ee219SRobert Harris 				firstp = elemp;
758835ee219SRobert Harris 
759835ee219SRobert Harris 			prevp = elemp;
760835ee219SRobert Harris 			elemp = elemp->mnte_next;
761835ee219SRobert Harris 		}
762835ee219SRobert Harris 
763835ee219SRobert Harris 		if (vfsp == lastvfsp)
764835ee219SRobert Harris 			break;
765835ee219SRobert Harris 	}
766835ee219SRobert Harris 
767835ee219SRobert Harris 	/*
768835ee219SRobert Harris 	 * Any remaining visible database elements that are still alive must be
769835ee219SRobert Harris 	 * killed now, because their corresponding vfs_ts must have been
770835ee219SRobert Harris 	 * unmounted.
771835ee219SRobert Harris 	 */
772835ee219SRobert Harris 	for (; elemp; elemp = elemp->mnte_next) {
773835ee219SRobert Harris 		if (MNTFS_ELEM_IS_ALIVE(elemp) &&
774835ee219SRobert Harris 		    (!elemp->mnte_hidden || show_hidden))
775835ee219SRobert Harris 			vfs_mono_time(&elemp->mnte_death);
776835ee219SRobert Harris 	}
777835ee219SRobert Harris 
778835ee219SRobert Harris 	/* Initialise the snapshot. */
779835ee219SRobert Harris 	vfs_mono_time(&snapp->mnts_time);
780835ee219SRobert Harris 	snapp->mnts_last_mtime = last_mtime;
781835ee219SRobert Harris 	snapp->mnts_first = snapp->mnts_next = firstp;
782835ee219SRobert Harris 	snapp->mnts_flags = show_hidden ? MNTS_SHOWHIDDEN : 0;
783835ee219SRobert Harris 	snapp->mnts_nmnts = nmnts;
7845545576aSRobert Harris 	snapp->mnts_text_size = total_text_size;
785835ee219SRobert Harris 	snapp->mnts_foffset = snapp->mnts_ieoffset = 0;
786835ee219SRobert Harris 
7875545576aSRobert Harris 	/*
7885545576aSRobert Harris 	 * Record /etc/mnttab's current size and mtime for possible future use
7895545576aSRobert Harris 	 * by mntgetattr().
7905545576aSRobert Harris 	 */
7915545576aSRobert Harris 	mnd->mnt_size = normal_text_size;
7925545576aSRobert Harris 	mnd->mnt_mtime = last_mtime;
7935545576aSRobert Harris 	if (show_hidden) {
7945545576aSRobert Harris 		mnd->mnt_hidden_size = total_text_size;
7955545576aSRobert Harris 		mnd->mnt_hidden_mtime = last_mtime;
7965545576aSRobert Harris 	}
7975545576aSRobert Harris 
798835ee219SRobert Harris 	/* Clean up. */
799835ee219SRobert Harris 	rw_exit(dblockp);
800835ee219SRobert Harris 	vfs_list_unlock();
801835ee219SRobert Harris 	if (dummyvfsp != NULL)
802835ee219SRobert Harris 		refstr_rele(dummyvfsp->vfs_mntpt);
803835ee219SRobert Harris 	kmem_free(tempelemp->mnte_text, entry_length);
804835ee219SRobert Harris 	kmem_free(tempelemp, sizeof (mntelem_t));
805835ee219SRobert Harris }
806835ee219SRobert Harris 
807835ee219SRobert Harris /*
8087c478bd9Sstevel@tonic-gate  * Public function to convert vfs_mntopts into a string.
8097c478bd9Sstevel@tonic-gate  * A buffer of sufficient size is allocated, which is returned via bufp,
8107c478bd9Sstevel@tonic-gate  * and whose length is returned via lenp.
8117c478bd9Sstevel@tonic-gate  */
8127c478bd9Sstevel@tonic-gate void
8137c478bd9Sstevel@tonic-gate mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp)
8147c478bd9Sstevel@tonic-gate {
8157c478bd9Sstevel@tonic-gate 	size_t len;
8167c478bd9Sstevel@tonic-gate 	char *buf;
8177c478bd9Sstevel@tonic-gate 
8187c478bd9Sstevel@tonic-gate 	vfs_list_read_lock();
8197c478bd9Sstevel@tonic-gate 
8207c478bd9Sstevel@tonic-gate 	len = mntfs_optsize(vfsp) + 1;
8217c478bd9Sstevel@tonic-gate 	buf = kmem_alloc(len, KM_NOSLEEP);
8227c478bd9Sstevel@tonic-gate 	if (buf == NULL) {
8237c478bd9Sstevel@tonic-gate 		*bufp = NULL;
8247c478bd9Sstevel@tonic-gate 		vfs_list_unlock();
8257c478bd9Sstevel@tonic-gate 		return;
8267c478bd9Sstevel@tonic-gate 	}
8277c478bd9Sstevel@tonic-gate 	buf[len - 1] = '\0';
8287c478bd9Sstevel@tonic-gate 	(void) mntfs_optprint(vfsp, buf);
8297c478bd9Sstevel@tonic-gate 	ASSERT(buf[len - 1] == '\0');
8307c478bd9Sstevel@tonic-gate 
8317c478bd9Sstevel@tonic-gate 	vfs_list_unlock();
8327c478bd9Sstevel@tonic-gate 	*bufp = buf;
8337c478bd9Sstevel@tonic-gate 	*lenp = len;
8347c478bd9Sstevel@tonic-gate }
8357c478bd9Sstevel@tonic-gate 
8367c478bd9Sstevel@tonic-gate /* ARGSUSED */
8377c478bd9Sstevel@tonic-gate static int
838da6c28aaSamw mntopen(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
8397c478bd9Sstevel@tonic-gate {
8407c478bd9Sstevel@tonic-gate 	vnode_t *vp = *vpp;
8417c478bd9Sstevel@tonic-gate 	mntnode_t *nmnp;
8427c478bd9Sstevel@tonic-gate 
8437c478bd9Sstevel@tonic-gate 	/*
8447c478bd9Sstevel@tonic-gate 	 * Not allowed to open for writing, return error.
8457c478bd9Sstevel@tonic-gate 	 */
8467c478bd9Sstevel@tonic-gate 	if (flag & FWRITE)
8477c478bd9Sstevel@tonic-gate 		return (EPERM);
8487c478bd9Sstevel@tonic-gate 	/*
8497c478bd9Sstevel@tonic-gate 	 * Create a new mnt/vnode for each open, this will give us a handle to
8507c478bd9Sstevel@tonic-gate 	 * hang the snapshot on.
8517c478bd9Sstevel@tonic-gate 	 */
8527c478bd9Sstevel@tonic-gate 	nmnp = mntgetnode(vp);
8537c478bd9Sstevel@tonic-gate 
8547c478bd9Sstevel@tonic-gate 	*vpp = MTOV(nmnp);
855*1a5e258fSJosef 'Jeff' Sipek 	atomic_inc_32(&MTOD(nmnp)->mnt_nopen);
8567c478bd9Sstevel@tonic-gate 	VN_RELE(vp);
8577c478bd9Sstevel@tonic-gate 	return (0);
8587c478bd9Sstevel@tonic-gate }
8597c478bd9Sstevel@tonic-gate 
8607c478bd9Sstevel@tonic-gate /* ARGSUSED */
8617c478bd9Sstevel@tonic-gate static int
862da6c28aaSamw mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
863da6c28aaSamw 	caller_context_t *ct)
8647c478bd9Sstevel@tonic-gate {
8657c478bd9Sstevel@tonic-gate 	mntnode_t *mnp = VTOM(vp);
8667c478bd9Sstevel@tonic-gate 
8677c478bd9Sstevel@tonic-gate 	/* Clean up any locks or shares held by the current process */
8687c478bd9Sstevel@tonic-gate 	cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
8697c478bd9Sstevel@tonic-gate 	cleanshares(vp, ttoproc(curthread)->p_pid);
8707c478bd9Sstevel@tonic-gate 
8717c478bd9Sstevel@tonic-gate 	if (count > 1)
8727c478bd9Sstevel@tonic-gate 		return (0);
8737c478bd9Sstevel@tonic-gate 	if (vp->v_count == 1) {
874835ee219SRobert Harris 		rw_enter(&mnp->mnt_contents, RW_WRITER);
875835ee219SRobert Harris 		mntfs_freesnap(mnp, &mnp->mnt_read);
876835ee219SRobert Harris 		mntfs_freesnap(mnp, &mnp->mnt_ioctl);
877835ee219SRobert Harris 		rw_exit(&mnp->mnt_contents);
878*1a5e258fSJosef 'Jeff' Sipek 		atomic_dec_32(&MTOD(mnp)->mnt_nopen);
8797c478bd9Sstevel@tonic-gate 	}
8807c478bd9Sstevel@tonic-gate 	return (0);
8817c478bd9Sstevel@tonic-gate }
8827c478bd9Sstevel@tonic-gate 
8837c478bd9Sstevel@tonic-gate /* ARGSUSED */
8847c478bd9Sstevel@tonic-gate static int
8857c478bd9Sstevel@tonic-gate mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct)
8867c478bd9Sstevel@tonic-gate {
887835ee219SRobert Harris 	mntnode_t *mnp = VTOM(vp);
888a19609f8Sjv227347 	zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone;
889835ee219SRobert Harris 	mntsnap_t *snapp = &mnp->mnt_read;
8907c478bd9Sstevel@tonic-gate 	off_t off = uio->uio_offset;
8917c478bd9Sstevel@tonic-gate 	size_t len = uio->uio_resid;
892835ee219SRobert Harris 	char *bufferp;
893835ee219SRobert Harris 	size_t available, copylen;
894835ee219SRobert Harris 	size_t written = 0;
895835ee219SRobert Harris 	mntelem_t *elemp;
896835ee219SRobert Harris 	krwlock_t *dblockp = &zonep->zone_mntfs_db_lock;
897835ee219SRobert Harris 	int error = 0;
898835ee219SRobert Harris 	off_t	ieoffset;
8997c478bd9Sstevel@tonic-gate 
90054c529d4SViswanathan Kannappan 	rw_enter(&mnp->mnt_contents, RW_WRITER);
901835ee219SRobert Harris 	if (snapp->mnts_nmnts == 0 || (off == (off_t)0))
902835ee219SRobert Harris 		mntfs_snapshot(mnp, snapp);
9037c478bd9Sstevel@tonic-gate 
904835ee219SRobert Harris 	if ((size_t)(off + len) > snapp->mnts_text_size)
905835ee219SRobert Harris 		len = snapp->mnts_text_size - off;
906835ee219SRobert Harris 
907835ee219SRobert Harris 	if (off < 0 || len > snapp->mnts_text_size) {
90854c529d4SViswanathan Kannappan 		rw_exit(&mnp->mnt_contents);
9097c478bd9Sstevel@tonic-gate 		return (EFAULT);
91054c529d4SViswanathan Kannappan 	}
9117c478bd9Sstevel@tonic-gate 
91254c529d4SViswanathan Kannappan 	if (len == 0) {
91354c529d4SViswanathan Kannappan 		rw_exit(&mnp->mnt_contents);
9147c478bd9Sstevel@tonic-gate 		return (0);
91554c529d4SViswanathan Kannappan 	}
9167c478bd9Sstevel@tonic-gate 
9177c478bd9Sstevel@tonic-gate 	/*
918835ee219SRobert Harris 	 * For the file offset provided, locate the corresponding database
919835ee219SRobert Harris 	 * element and calculate the corresponding offset within its text. If
920835ee219SRobert Harris 	 * the file offset is the same as that reached during the last read(2)
921835ee219SRobert Harris 	 * then use the saved element and intra-element offset.
9227c478bd9Sstevel@tonic-gate 	 */
923835ee219SRobert Harris 	rw_enter(dblockp, RW_READER);
924835ee219SRobert Harris 	if (off == 0 || (off == snapp->mnts_foffset)) {
925835ee219SRobert Harris 		elemp = snapp->mnts_next;
926835ee219SRobert Harris 		ieoffset = snapp->mnts_ieoffset;
927835ee219SRobert Harris 	} else {
928835ee219SRobert Harris 		off_t total_off;
929835ee219SRobert Harris 		/*
930835ee219SRobert Harris 		 * Find the element corresponding to the requested file offset
931835ee219SRobert Harris 		 * by walking through the database and summing the text sizes
932835ee219SRobert Harris 		 * of the individual elements. If the requested file offset is
933835ee219SRobert Harris 		 * greater than that reached on the last visit then we can start
934835ee219SRobert Harris 		 * at the last seen element; otherwise, we have to start at the
935835ee219SRobert Harris 		 * beginning.
936835ee219SRobert Harris 		 */
937835ee219SRobert Harris 		if (off > snapp->mnts_foffset) {
938835ee219SRobert Harris 			elemp = snapp->mnts_next;
939835ee219SRobert Harris 			total_off = snapp->mnts_foffset - snapp->mnts_ieoffset;
940835ee219SRobert Harris 		} else {
941835ee219SRobert Harris 			elemp = snapp->mnts_first;
942835ee219SRobert Harris 			total_off = 0;
9437c478bd9Sstevel@tonic-gate 		}
944835ee219SRobert Harris 		while (off > total_off + elemp->mnte_text_size) {
945835ee219SRobert Harris 			total_off += elemp->mnte_text_size;
946835ee219SRobert Harris 			elemp = mntfs_get_next_elem(snapp, elemp);
947835ee219SRobert Harris 			ASSERT(elemp != NULL);
948835ee219SRobert Harris 		}
949835ee219SRobert Harris 		/* Calculate the intra-element offset. */
950835ee219SRobert Harris 		if (off > total_off)
951835ee219SRobert Harris 			ieoffset = off - total_off;
952835ee219SRobert Harris 		else
953835ee219SRobert Harris 			ieoffset = 0;
9547c478bd9Sstevel@tonic-gate 	}
9557c478bd9Sstevel@tonic-gate 
956835ee219SRobert Harris 	/*
957835ee219SRobert Harris 	 * Create a buffer and populate it with the text from successive
958835ee219SRobert Harris 	 * database elements until it is full.
959835ee219SRobert Harris 	 */
960835ee219SRobert Harris 	bufferp = kmem_alloc(len, KM_SLEEP);
961835ee219SRobert Harris 	while (written < len) {
962835ee219SRobert Harris 		available = elemp->mnte_text_size - ieoffset;
963835ee219SRobert Harris 		copylen = MIN(len - written, available);
964835ee219SRobert Harris 		bcopy(elemp->mnte_text + ieoffset, bufferp + written, copylen);
965835ee219SRobert Harris 		written += copylen;
966835ee219SRobert Harris 		if (copylen == available) {
967835ee219SRobert Harris 			elemp = mntfs_get_next_elem(snapp, elemp);
968835ee219SRobert Harris 			ASSERT(elemp != NULL || written == len);
969835ee219SRobert Harris 			ieoffset = 0;
970835ee219SRobert Harris 		} else {
971835ee219SRobert Harris 			ieoffset += copylen;
972835ee219SRobert Harris 		}
973835ee219SRobert Harris 	}
974835ee219SRobert Harris 	rw_exit(dblockp);
975835ee219SRobert Harris 
976835ee219SRobert Harris 	/*
977835ee219SRobert Harris 	 * Write the populated buffer, update the snapshot's state if
978835ee219SRobert Harris 	 * successful and then advertise our read.
979835ee219SRobert Harris 	 */
980835ee219SRobert Harris 	error = uiomove(bufferp, len, UIO_READ, uio);
981835ee219SRobert Harris 	if (error == 0) {
982835ee219SRobert Harris 		snapp->mnts_next = elemp;
983835ee219SRobert Harris 		snapp->mnts_foffset = off + len;
984835ee219SRobert Harris 		snapp->mnts_ieoffset = ieoffset;
985835ee219SRobert Harris 	}
986835ee219SRobert Harris 	vfs_mnttab_readop();
987835ee219SRobert Harris 	rw_exit(&mnp->mnt_contents);
988835ee219SRobert Harris 
989835ee219SRobert Harris 	/* Clean up. */
990835ee219SRobert Harris 	kmem_free(bufferp, len);
991835ee219SRobert Harris 	return (error);
992835ee219SRobert Harris }
9937c478bd9Sstevel@tonic-gate 
9947c478bd9Sstevel@tonic-gate static int
995da6c28aaSamw mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
996da6c28aaSamw 	caller_context_t *ct)
9977c478bd9Sstevel@tonic-gate {
9985545576aSRobert Harris 	int mask = vap->va_mask;
9997c478bd9Sstevel@tonic-gate 	int error;
10005545576aSRobert Harris 	mntnode_t *mnp = VTOM(vp);
10015545576aSRobert Harris 	timespec_t mtime, old_mtime;
10025545576aSRobert Harris 	size_t size, old_size;
10037c478bd9Sstevel@tonic-gate 	mntdata_t *mntdata = MTOD(VTOM(vp));
10045545576aSRobert Harris 	mntsnap_t *rsnapp, *isnapp;
10055545576aSRobert Harris 	extern timespec_t vfs_mnttab_ctime;
10067c478bd9Sstevel@tonic-gate 
10075545576aSRobert Harris 
10085545576aSRobert Harris 	/* AT_MODE, AT_UID and AT_GID are derived from the underlying file. */
10095545576aSRobert Harris 	if (mask & AT_MODE|AT_UID|AT_GID) {
10105545576aSRobert Harris 		if (error = VOP_GETATTR(mnp->mnt_mountvp, vap, flags, cr, ct))
10117c478bd9Sstevel@tonic-gate 			return (error);
1012835ee219SRobert Harris 	}
10137c478bd9Sstevel@tonic-gate 
10147c478bd9Sstevel@tonic-gate 	/*
10155545576aSRobert Harris 	 * There are some minor subtleties in the determination of
10165545576aSRobert Harris 	 * /etc/mnttab's size and mtime. We wish to avoid any condition in
10175545576aSRobert Harris 	 * which, in the vicinity of a change to the in-kernel mnttab, we
10185545576aSRobert Harris 	 * return an old value for one but a new value for the other. We cannot
10195545576aSRobert Harris 	 * simply hold vfslist for the entire calculation because we might need
10205545576aSRobert Harris 	 * to call mntfs_snapshot(), which calls vfs_list_read_lock().
10217c478bd9Sstevel@tonic-gate 	 */
10225545576aSRobert Harris 	if (mask & AT_SIZE|AT_NBLOCKS) {
10235545576aSRobert Harris 		rw_enter(&mnp->mnt_contents, RW_WRITER);
10245545576aSRobert Harris 
10255545576aSRobert Harris 		vfs_list_read_lock();
10265545576aSRobert Harris 		vfs_mnttab_modtime(&mtime);
10275545576aSRobert Harris 		if (mnp->mnt_flags & MNT_SHOWHIDDEN) {
10285545576aSRobert Harris 			old_mtime = mntdata->mnt_hidden_mtime;
10295545576aSRobert Harris 			old_size = mntdata->mnt_hidden_size;
10305545576aSRobert Harris 		} else {
10315545576aSRobert Harris 			old_mtime = mntdata->mnt_mtime;
10325545576aSRobert Harris 			old_size = mntdata->mnt_size;
10335545576aSRobert Harris 		}
10345545576aSRobert Harris 		vfs_list_unlock();
10355545576aSRobert Harris 
10365545576aSRobert Harris 		rsnapp = &mnp->mnt_read;
10375545576aSRobert Harris 		isnapp = &mnp->mnt_ioctl;
10385545576aSRobert Harris 		if (rsnapp->mnts_nmnts || isnapp->mnts_nmnts) {
10397c478bd9Sstevel@tonic-gate 			/*
10405545576aSRobert Harris 			 * The mntnode already has at least one snapshot from
10415545576aSRobert Harris 			 * which to take the size; the user will understand from
10425545576aSRobert Harris 			 * mnttab(4) that the current size of the in-kernel
10435545576aSRobert Harris 			 * mnttab is irrelevant.
10447c478bd9Sstevel@tonic-gate 			 */
10455545576aSRobert Harris 			size = rsnapp->mnts_nmnts ? rsnapp->mnts_text_size :
10465545576aSRobert Harris 			    isnapp->mnts_text_size;
10475545576aSRobert Harris 		} else if (mntfs_newest(&mtime, &old_mtime) == MNTFS_NEITHER) {
10485545576aSRobert Harris 			/*
10495545576aSRobert Harris 			 * There is no existing valid snapshot but the in-kernel
10505545576aSRobert Harris 			 * mnttab has not changed since the time that the last
10515545576aSRobert Harris 			 * one was generated. Use the old file size; note that
10525545576aSRobert Harris 			 * it is guaranteed to be consistent with mtime, which
10535545576aSRobert Harris 			 * may be returned to the user later.
10545545576aSRobert Harris 			 */
10555545576aSRobert Harris 			size = old_size;
10565545576aSRobert Harris 		} else {
10575545576aSRobert Harris 			/*
10585545576aSRobert Harris 			 * There is no snapshot and the in-kernel mnttab has
10595545576aSRobert Harris 			 * changed since the last one was created. We generate a
10605545576aSRobert Harris 			 * new snapshot which we use for not only the size but
10615545576aSRobert Harris 			 * also the mtime, thereby ensuring that the two are
10625545576aSRobert Harris 			 * consistent.
10635545576aSRobert Harris 			 */
10645545576aSRobert Harris 			mntfs_snapshot(mnp, rsnapp);
10655545576aSRobert Harris 			size = rsnapp->mnts_text_size;
10665545576aSRobert Harris 			mtime = rsnapp->mnts_last_mtime;
10675545576aSRobert Harris 			mntfs_freesnap(mnp, rsnapp);
10685545576aSRobert Harris 		}
10695545576aSRobert Harris 
10705545576aSRobert Harris 		rw_exit(&mnp->mnt_contents);
10715545576aSRobert Harris 	} else if (mask & AT_ATIME|AT_MTIME) {
10725545576aSRobert Harris 		vfs_list_read_lock();
10735545576aSRobert Harris 		vfs_mnttab_modtime(&mtime);
10745545576aSRobert Harris 		vfs_list_unlock();
10755545576aSRobert Harris 	}
10765545576aSRobert Harris 
10775545576aSRobert Harris 	/* Always look like a regular file. */
10785545576aSRobert Harris 	if (mask & AT_TYPE)
10795545576aSRobert Harris 		vap->va_type = VREG;
10805545576aSRobert Harris 	/* Mode should basically be read only. */
10815545576aSRobert Harris 	if (mask & AT_MODE)
10827c478bd9Sstevel@tonic-gate 		vap->va_mode &= 07444;
10835545576aSRobert Harris 	if (mask & AT_FSID)
10847c478bd9Sstevel@tonic-gate 		vap->va_fsid = vp->v_vfsp->vfs_dev;
10855545576aSRobert Harris 	/* Nodeid is always ROOTINO. */
10865545576aSRobert Harris 	if (mask & AT_NODEID)
10875545576aSRobert Harris 		vap->va_nodeid = (ino64_t)MNTROOTINO;
10887c478bd9Sstevel@tonic-gate 	/*
10897c478bd9Sstevel@tonic-gate 	 * Set nlink to the number of open vnodes for mnttab info
10907c478bd9Sstevel@tonic-gate 	 * plus one for existing.
10917c478bd9Sstevel@tonic-gate 	 */
10925545576aSRobert Harris 	if (mask & AT_NLINK)
10937c478bd9Sstevel@tonic-gate 		vap->va_nlink = mntdata->mnt_nopen + 1;
10945545576aSRobert Harris 	if (mask & AT_SIZE)
10955545576aSRobert Harris 		vap->va_size = size;
10965545576aSRobert Harris 	if (mask & AT_ATIME)
10975545576aSRobert Harris 		vap->va_atime = mtime;
10985545576aSRobert Harris 	if (mask & AT_MTIME)
10995545576aSRobert Harris 		vap->va_mtime = mtime;
11005545576aSRobert Harris 	if (mask & AT_CTIME)
11017c478bd9Sstevel@tonic-gate 		vap->va_ctime = vfs_mnttab_ctime;
11025545576aSRobert Harris 	if (mask & AT_RDEV)
11035545576aSRobert Harris 		vap->va_rdev = 0;
11045545576aSRobert Harris 	if (mask & AT_BLKSIZE)
11055545576aSRobert Harris 		vap->va_blksize = DEV_BSIZE;
11065545576aSRobert Harris 	if (mask & AT_NBLOCKS)
11075545576aSRobert Harris 		vap->va_nblocks = btod(size);
11085545576aSRobert Harris 	if (mask & AT_SEQ)
11095545576aSRobert Harris 		vap->va_seq = 0;
11105545576aSRobert Harris 
11117c478bd9Sstevel@tonic-gate 	return (0);
11127c478bd9Sstevel@tonic-gate }
11137c478bd9Sstevel@tonic-gate 
11147c478bd9Sstevel@tonic-gate static int
1115da6c28aaSamw mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr,
1116da6c28aaSamw 	caller_context_t *ct)
11177c478bd9Sstevel@tonic-gate {
11187c478bd9Sstevel@tonic-gate 	mntnode_t *mnp = VTOM(vp);
11197c478bd9Sstevel@tonic-gate 
11207c478bd9Sstevel@tonic-gate 	if (mode & (VWRITE|VEXEC))
11217c478bd9Sstevel@tonic-gate 		return (EROFS);
11227c478bd9Sstevel@tonic-gate 
11237c478bd9Sstevel@tonic-gate 	/*
11247c478bd9Sstevel@tonic-gate 	 * Do access check on the underlying directory vnode.
11257c478bd9Sstevel@tonic-gate 	 */
1126da6c28aaSamw 	return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr, ct));
11277c478bd9Sstevel@tonic-gate }
11287c478bd9Sstevel@tonic-gate 
11297c478bd9Sstevel@tonic-gate 
11307c478bd9Sstevel@tonic-gate /*
11317c478bd9Sstevel@tonic-gate  * New /mntfs vnode required; allocate it and fill in most of the fields.
11327c478bd9Sstevel@tonic-gate  */
11337c478bd9Sstevel@tonic-gate static mntnode_t *
11347c478bd9Sstevel@tonic-gate mntgetnode(vnode_t *dp)
11357c478bd9Sstevel@tonic-gate {
11367c478bd9Sstevel@tonic-gate 	mntnode_t *mnp;
11377c478bd9Sstevel@tonic-gate 	vnode_t *vp;
11387c478bd9Sstevel@tonic-gate 
11397c478bd9Sstevel@tonic-gate 	mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP);
11407c478bd9Sstevel@tonic-gate 	mnp->mnt_vnode = vn_alloc(KM_SLEEP);
11417c478bd9Sstevel@tonic-gate 	mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp;
114254c529d4SViswanathan Kannappan 	rw_init(&mnp->mnt_contents, NULL, RW_DEFAULT, NULL);
11437c478bd9Sstevel@tonic-gate 	vp = MTOV(mnp);
11447c478bd9Sstevel@tonic-gate 	vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
11457c478bd9Sstevel@tonic-gate 	vn_setops(vp, mntvnodeops);
11467c478bd9Sstevel@tonic-gate 	vp->v_vfsp = dp->v_vfsp;
11477c478bd9Sstevel@tonic-gate 	vp->v_type = VREG;
11487c478bd9Sstevel@tonic-gate 	vp->v_data = (caddr_t)mnp;
11497c478bd9Sstevel@tonic-gate 
11507c478bd9Sstevel@tonic-gate 	return (mnp);
11517c478bd9Sstevel@tonic-gate }
11527c478bd9Sstevel@tonic-gate 
11537c478bd9Sstevel@tonic-gate /*
11547c478bd9Sstevel@tonic-gate  * Free the storage obtained from mntgetnode().
11557c478bd9Sstevel@tonic-gate  */
11567c478bd9Sstevel@tonic-gate static void
11577c478bd9Sstevel@tonic-gate mntfreenode(mntnode_t *mnp)
11587c478bd9Sstevel@tonic-gate {
11597c478bd9Sstevel@tonic-gate 	vnode_t *vp = MTOV(mnp);
11607c478bd9Sstevel@tonic-gate 
116154c529d4SViswanathan Kannappan 	rw_destroy(&mnp->mnt_contents);
11627c478bd9Sstevel@tonic-gate 	vn_invalid(vp);
11637c478bd9Sstevel@tonic-gate 	vn_free(vp);
11647c478bd9Sstevel@tonic-gate 	kmem_free(mnp, sizeof (*mnp));
11657c478bd9Sstevel@tonic-gate }
11667c478bd9Sstevel@tonic-gate 
11677c478bd9Sstevel@tonic-gate 
11687c478bd9Sstevel@tonic-gate /* ARGSUSED */
11697c478bd9Sstevel@tonic-gate static int
1170da6c28aaSamw mntfsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
11717c478bd9Sstevel@tonic-gate {
11727c478bd9Sstevel@tonic-gate 	return (0);
11737c478bd9Sstevel@tonic-gate }
11747c478bd9Sstevel@tonic-gate 
11757c478bd9Sstevel@tonic-gate /* ARGSUSED */
11767c478bd9Sstevel@tonic-gate static void
1177da6c28aaSamw mntinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
11787c478bd9Sstevel@tonic-gate {
11797c478bd9Sstevel@tonic-gate 	mntnode_t *mnp = VTOM(vp);
11807c478bd9Sstevel@tonic-gate 
11817c478bd9Sstevel@tonic-gate 	mntfreenode(mnp);
11827c478bd9Sstevel@tonic-gate }
11837c478bd9Sstevel@tonic-gate 
1184835ee219SRobert Harris /*
11855545576aSRobert Harris  * lseek(2) is supported only to rewind the file by resetmnttab(3C). Rewinding
11865545576aSRobert Harris  * has a special meaning for /etc/mnttab: it forces mntfs to refresh the
11875545576aSRobert Harris  * snapshot at the next ioctl().
1188835ee219SRobert Harris  *
11895545576aSRobert Harris  * mnttab(4) explains that "the snapshot...is taken any time a read(2) is
11905545576aSRobert Harris  * performed at offset 0". We therefore ignore the read snapshot here.
1191835ee219SRobert Harris  */
11927c478bd9Sstevel@tonic-gate /* ARGSUSED */
11937c478bd9Sstevel@tonic-gate static int
1194835ee219SRobert Harris mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
11957c478bd9Sstevel@tonic-gate {
119654c529d4SViswanathan Kannappan 	mntnode_t *mnp = VTOM(vp);
119754c529d4SViswanathan Kannappan 
119854c529d4SViswanathan Kannappan 	if (*noffp == 0) {
119954c529d4SViswanathan Kannappan 		rw_enter(&mnp->mnt_contents, RW_WRITER);
1200835ee219SRobert Harris 		mnp->mnt_ioctl.mnts_flags |= MNTS_REWIND;
120154c529d4SViswanathan Kannappan 		rw_exit(&mnp->mnt_contents);
120254c529d4SViswanathan Kannappan 	}
12037c478bd9Sstevel@tonic-gate 
12047c478bd9Sstevel@tonic-gate 	return (0);
12057c478bd9Sstevel@tonic-gate }
12067c478bd9Sstevel@tonic-gate 
12077c478bd9Sstevel@tonic-gate /*
12087c478bd9Sstevel@tonic-gate  * Return the answer requested to poll().
12097c478bd9Sstevel@tonic-gate  * POLLRDBAND will return when the mtime of the mnttab
12107c478bd9Sstevel@tonic-gate  * information is newer than the latest one read for this open.
12117c478bd9Sstevel@tonic-gate  */
12127c478bd9Sstevel@tonic-gate /* ARGSUSED */
12137c478bd9Sstevel@tonic-gate static int
1214da6c28aaSamw mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp,
1215da6c28aaSamw 	caller_context_t *ct)
12167c478bd9Sstevel@tonic-gate {
12177c478bd9Sstevel@tonic-gate 	mntnode_t *mnp = VTOM(vp);
1218835ee219SRobert Harris 	mntsnap_t *snapp;
12197c478bd9Sstevel@tonic-gate 
122054c529d4SViswanathan Kannappan 	rw_enter(&mnp->mnt_contents, RW_READER);
1221835ee219SRobert Harris 	if (mntfs_newest(&mnp->mnt_ioctl.mnts_last_mtime,
1222835ee219SRobert Harris 	    &mnp->mnt_read.mnts_last_mtime) == MNTFS_FIRST)
1223835ee219SRobert Harris 		snapp = &mnp->mnt_ioctl;
1224835ee219SRobert Harris 	else
1225835ee219SRobert Harris 		snapp = &mnp->mnt_read;
12267c478bd9Sstevel@tonic-gate 
12277c478bd9Sstevel@tonic-gate 	*revp = 0;
12287c478bd9Sstevel@tonic-gate 	*phpp = (pollhead_t *)NULL;
12297c478bd9Sstevel@tonic-gate 	if (ev & POLLIN)
12307c478bd9Sstevel@tonic-gate 		*revp |= POLLIN;
12317c478bd9Sstevel@tonic-gate 
12327c478bd9Sstevel@tonic-gate 	if (ev & POLLRDNORM)
12337c478bd9Sstevel@tonic-gate 		*revp |= POLLRDNORM;
12347c478bd9Sstevel@tonic-gate 
12357c478bd9Sstevel@tonic-gate 	if (ev & POLLRDBAND) {
1236835ee219SRobert Harris 		vfs_mnttab_poll(&snapp->mnts_last_mtime, phpp);
12377c478bd9Sstevel@tonic-gate 		if (*phpp == (pollhead_t *)NULL)
12387c478bd9Sstevel@tonic-gate 			*revp |= POLLRDBAND;
12397c478bd9Sstevel@tonic-gate 	}
124054c529d4SViswanathan Kannappan 	rw_exit(&mnp->mnt_contents);
124154c529d4SViswanathan Kannappan 
12427c478bd9Sstevel@tonic-gate 	if (*revp || *phpp != NULL || any) {
12437c478bd9Sstevel@tonic-gate 		return (0);
12447c478bd9Sstevel@tonic-gate 	}
12457c478bd9Sstevel@tonic-gate 	/*
12467c478bd9Sstevel@tonic-gate 	 * If someone is polling an unsupported poll events (e.g.
12477c478bd9Sstevel@tonic-gate 	 * POLLOUT, POLLPRI, etc.), just return POLLERR revents.
12487c478bd9Sstevel@tonic-gate 	 * That way we will ensure that we don't return a 0
12497c478bd9Sstevel@tonic-gate 	 * revents with a NULL pollhead pointer.
12507c478bd9Sstevel@tonic-gate 	 */
12517c478bd9Sstevel@tonic-gate 	*revp = POLLERR;
12527c478bd9Sstevel@tonic-gate 	return (0);
12537c478bd9Sstevel@tonic-gate }
1254835ee219SRobert Harris 
1255835ee219SRobert Harris /*
1256835ee219SRobert Harris  * mntfs_same_word() returns 1 if two words are the same in the context of
1257835ee219SRobert Harris  * MNTIOC_GETMNTANY and 0 otherwise.
1258835ee219SRobert Harris  *
1259835ee219SRobert Harris  * worda is a memory address that lies somewhere in the buffer bufa; it cannot
1260835ee219SRobert Harris  * be NULL since this is used to indicate to getmntany(3C) that the user does
1261835ee219SRobert Harris  * not wish to match a particular field. The text to which worda points is
1262835ee219SRobert Harris  * supplied by the user; if it is not null-terminated then it cannot match.
1263835ee219SRobert Harris  *
1264835ee219SRobert Harris  * Buffer bufb contains a line from /etc/mnttab, in which the fields are
1265835ee219SRobert Harris  * delimited by tab or new-line characters. offb is the offset of the second
1266835ee219SRobert Harris  * word within this buffer.
1267835ee219SRobert Harris  *
1268835ee219SRobert Harris  * mntfs_same_word() returns 1 if the words are the same and 0 otherwise.
1269835ee219SRobert Harris  */
1270835ee219SRobert Harris int
1271835ee219SRobert Harris mntfs_same_word(char *worda, char *bufa, size_t sizea, off_t offb, char *bufb,
1272835ee219SRobert Harris     size_t sizeb)
1273835ee219SRobert Harris {
1274835ee219SRobert Harris 	char *wordb = bufb + offb;
1275835ee219SRobert Harris 	int bytes_remaining;
1276835ee219SRobert Harris 
1277835ee219SRobert Harris 	ASSERT(worda != NULL);
1278835ee219SRobert Harris 
1279835ee219SRobert Harris 	bytes_remaining = MIN(((bufa + sizea) - worda),
1280835ee219SRobert Harris 	    ((bufb + sizeb) - wordb));
1281835ee219SRobert Harris 	while (bytes_remaining && *worda == *wordb) {
1282835ee219SRobert Harris 		worda++;
1283835ee219SRobert Harris 		wordb++;
1284835ee219SRobert Harris 		bytes_remaining--;
1285835ee219SRobert Harris 	}
1286835ee219SRobert Harris 	if (bytes_remaining &&
1287835ee219SRobert Harris 	    *worda == '\0' && (*wordb == '\t' || *wordb == '\n'))
1288835ee219SRobert Harris 		return (1);
1289835ee219SRobert Harris 	else
1290835ee219SRobert Harris 		return (0);
1291835ee219SRobert Harris }
1292835ee219SRobert Harris 
1293835ee219SRobert Harris /*
1294835ee219SRobert Harris  * mntfs_special_info_string() returns which, if either, of VBLK or VCHR
1295835ee219SRobert Harris  * corresponds to a supplied path. If the path is a special device then the
1296835ee219SRobert Harris  * function optionally sets the major and minor numbers.
1297835ee219SRobert Harris  */
1298835ee219SRobert Harris vtype_t
1299835ee219SRobert Harris mntfs_special_info_string(char *path, uint_t *major, uint_t *minor, cred_t *cr)
1300835ee219SRobert Harris {
1301835ee219SRobert Harris 	vattr_t vattr;
1302835ee219SRobert Harris 	vnode_t *vp;
1303835ee219SRobert Harris 	vtype_t type;
1304835ee219SRobert Harris 	int error;
1305835ee219SRobert Harris 
1306835ee219SRobert Harris 	if (path == NULL || *path != '/' ||
1307835ee219SRobert Harris 	    lookupnameat(path + 1, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir))
1308835ee219SRobert Harris 		return (0);
1309835ee219SRobert Harris 
1310835ee219SRobert Harris 	vattr.va_mask = AT_TYPE | AT_RDEV;
1311835ee219SRobert Harris 	error = VOP_GETATTR(vp, &vattr, ATTR_REAL, cr, NULL);
1312835ee219SRobert Harris 	VN_RELE(vp);
1313835ee219SRobert Harris 
1314835ee219SRobert Harris 	if (error == 0 && ((type = vattr.va_type) == VBLK || type == VCHR)) {
1315835ee219SRobert Harris 		if (major && minor) {
1316835ee219SRobert Harris 			*major = getmajor(vattr.va_rdev);
1317835ee219SRobert Harris 			*minor = getminor(vattr.va_rdev);
1318835ee219SRobert Harris 		}
1319835ee219SRobert Harris 		return (type);
1320835ee219SRobert Harris 	} else {
1321835ee219SRobert Harris 		return (0);
1322835ee219SRobert Harris 	}
1323835ee219SRobert Harris }
1324835ee219SRobert Harris 
1325835ee219SRobert Harris /*
1326835ee219SRobert Harris  * mntfs_special_info_element() extracts the name of the mounted resource
1327835ee219SRobert Harris  * for a given element and copies it into a null-terminated string, which it
1328835ee219SRobert Harris  * then passes to mntfs_special_info_string().
1329835ee219SRobert Harris  */
1330835ee219SRobert Harris vtype_t
1331835ee219SRobert Harris mntfs_special_info_element(mntelem_t *elemp, cred_t *cr)
1332835ee219SRobert Harris {
1333835ee219SRobert Harris 	char *newpath;
1334835ee219SRobert Harris 	vtype_t type;
1335835ee219SRobert Harris 
1336835ee219SRobert Harris 	newpath = kmem_alloc(elemp->mnte_text_size, KM_SLEEP);
1337835ee219SRobert Harris 	bcopy(elemp->mnte_text, newpath, (off_t)(elemp->mnte_tab.mnt_mountp));
1338835ee219SRobert Harris 	*(newpath + (off_t)elemp->mnte_tab.mnt_mountp - 1) = '\0';
1339835ee219SRobert Harris 	type = mntfs_special_info_string(newpath, NULL, NULL, cr);
1340835ee219SRobert Harris 	kmem_free(newpath, elemp->mnte_text_size);
1341835ee219SRobert Harris 
1342835ee219SRobert Harris 	return (type);
1343835ee219SRobert Harris }
1344835ee219SRobert Harris 
1345835ee219SRobert Harris /*
1346835ee219SRobert Harris  * Convert an address that points to a byte within a user buffer into an
1347835ee219SRobert Harris  * address that points to the corresponding offset within a kernel buffer. If
1348835ee219SRobert Harris  * the user address is NULL then make no conversion. If the address does not
1349835ee219SRobert Harris  * lie within the buffer then reset it to NULL.
1350835ee219SRobert Harris  */
1351835ee219SRobert Harris char *
1352835ee219SRobert Harris mntfs_import_addr(char *uaddr, char *ubufp, char *kbufp, size_t bufsize)
1353835ee219SRobert Harris {
1354835ee219SRobert Harris 	if (uaddr < ubufp || uaddr >= ubufp + bufsize)
1355835ee219SRobert Harris 		return (NULL);
1356835ee219SRobert Harris 	else
1357835ee219SRobert Harris 		return (kbufp + (uaddr - ubufp));
1358835ee219SRobert Harris }
1359835ee219SRobert Harris 
1360835ee219SRobert Harris /*
1361835ee219SRobert Harris  * These 32-bit versions are to support STRUCT_DECL(9F) etc. in
1362835ee219SRobert Harris  * mntfs_copyout_element() and mntioctl().
1363835ee219SRobert Harris  */
1364835ee219SRobert Harris #ifdef _SYSCALL32_IMPL
1365835ee219SRobert Harris typedef struct extmnttab32 {
1366835ee219SRobert Harris 	uint32_t	mnt_special;
1367835ee219SRobert Harris 	uint32_t	mnt_mountp;
1368835ee219SRobert Harris 	uint32_t	mnt_fstype;
1369835ee219SRobert Harris 	uint32_t	mnt_mntopts;
1370835ee219SRobert Harris 	uint32_t	mnt_time;
1371835ee219SRobert Harris 	uint_t		mnt_major;
1372835ee219SRobert Harris 	uint_t		mnt_minor;
1373835ee219SRobert Harris } extmnttab32_t;
1374835ee219SRobert Harris 
1375835ee219SRobert Harris typedef struct mnttab32 {
1376835ee219SRobert Harris 	uint32_t	mnt_special;
1377835ee219SRobert Harris 	uint32_t	mnt_mountp;
1378835ee219SRobert Harris 	uint32_t	mnt_fstype;
1379835ee219SRobert Harris 	uint32_t	mnt_mntopts;
1380835ee219SRobert Harris 	uint32_t	mnt_time;
1381835ee219SRobert Harris } mnttab32_t;
1382835ee219SRobert Harris 
1383835ee219SRobert Harris struct mntentbuf32 {
1384835ee219SRobert Harris 	uint32_t	mbuf_emp;
1385835ee219SRobert Harris 	uint_t		mbuf_bufsize;
1386835ee219SRobert Harris 	uint32_t	mbuf_buf;
1387835ee219SRobert Harris };
1388835ee219SRobert Harris #endif
1389835ee219SRobert Harris 
1390835ee219SRobert Harris /*
1391835ee219SRobert Harris  * mntfs_copyout_element() is common code for the MNTIOC_GETMNTENT,
1392835ee219SRobert Harris  * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY ioctls. Having identifed the
1393835ee219SRobert Harris  * database element desired by the user, this function copies out the text and
1394835ee219SRobert Harris  * the pointers to the relevant userland addresses. It returns 0 on success
1395835ee219SRobert Harris  * and non-zero otherwise.
1396835ee219SRobert Harris  */
1397835ee219SRobert Harris int
1398835ee219SRobert Harris mntfs_copyout_elem(mntelem_t *elemp, struct extmnttab *uemp,
1399835ee219SRobert Harris     char *ubufp, int cmd, int datamodel)
1400835ee219SRobert Harris {
1401835ee219SRobert Harris 		STRUCT_DECL(extmnttab, ktab);
1402835ee219SRobert Harris 		char *dbbufp = elemp->mnte_text;
1403835ee219SRobert Harris 		size_t dbbufsize = elemp->mnte_text_size;
1404835ee219SRobert Harris 		struct extmnttab *dbtabp = &elemp->mnte_tab;
1405835ee219SRobert Harris 		size_t ssize;
1406835ee219SRobert Harris 		char *kbufp;
1407835ee219SRobert Harris 		int error = 0;
1408835ee219SRobert Harris 
1409835ee219SRobert Harris 
1410835ee219SRobert Harris 		/*
1411835ee219SRobert Harris 		 * We create a struct extmnttab within the kernel of the size
1412835ee219SRobert Harris 		 * determined by the user's data model. We then populate its
1413835ee219SRobert Harris 		 * fields by combining the start address of the text buffer
1414835ee219SRobert Harris 		 * supplied by the user, ubufp, with the offsets stored for
1415835ee219SRobert Harris 		 * this database element within dbtabp, a pointer to a struct
1416835ee219SRobert Harris 		 * extmnttab.
1417835ee219SRobert Harris 		 *
1418835ee219SRobert Harris 		 * Note that if the corresponding field is "-" this signifies
1419835ee219SRobert Harris 		 * no real content, and we set the address to NULL. This does
1420835ee219SRobert Harris 		 * not apply to mnt_time.
1421835ee219SRobert Harris 		 */
1422835ee219SRobert Harris 		STRUCT_INIT(ktab, datamodel);
1423835ee219SRobert Harris 		STRUCT_FSETP(ktab, mnt_special,
1424835ee219SRobert Harris 		    MNTFS_REAL_FIELD(dbbufp) ? ubufp : NULL);
1425835ee219SRobert Harris 		STRUCT_FSETP(ktab, mnt_mountp,
1426835ee219SRobert Harris 		    MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mountp) ?
1427835ee219SRobert Harris 		    ubufp + (off_t)dbtabp->mnt_mountp : NULL);
1428835ee219SRobert Harris 		STRUCT_FSETP(ktab, mnt_fstype,
1429835ee219SRobert Harris 		    MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_fstype) ?
1430835ee219SRobert Harris 		    ubufp + (off_t)dbtabp->mnt_fstype : NULL);
1431835ee219SRobert Harris 		STRUCT_FSETP(ktab, mnt_mntopts,
1432835ee219SRobert Harris 		    MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mntopts) ?
1433835ee219SRobert Harris 		    ubufp + (off_t)dbtabp->mnt_mntopts : NULL);
1434835ee219SRobert Harris 		STRUCT_FSETP(ktab, mnt_time,
1435835ee219SRobert Harris 		    ubufp + (off_t)dbtabp->mnt_time);
1436835ee219SRobert Harris 		if (cmd == MNTIOC_GETEXTMNTENT) {
1437835ee219SRobert Harris 			STRUCT_FSETP(ktab, mnt_major, dbtabp->mnt_major);
1438835ee219SRobert Harris 			STRUCT_FSETP(ktab, mnt_minor, dbtabp->mnt_minor);
1439835ee219SRobert Harris 			ssize = SIZEOF_STRUCT(extmnttab, datamodel);
1440835ee219SRobert Harris 		} else {
1441835ee219SRobert Harris 			ssize = SIZEOF_STRUCT(mnttab, datamodel);
1442835ee219SRobert Harris 		}
1443835ee219SRobert Harris 		if (copyout(STRUCT_BUF(ktab), uemp, ssize))
1444835ee219SRobert Harris 			return (EFAULT);
1445835ee219SRobert Harris 
1446835ee219SRobert Harris 		/*
1447835ee219SRobert Harris 		 * We create a text buffer in the kernel into which we copy the
1448835ee219SRobert Harris 		 * /etc/mnttab entry for this element. We change the tab and
1449835ee219SRobert Harris 		 * new-line delimiters to null bytes before copying out the
1450835ee219SRobert Harris 		 * buffer.
1451835ee219SRobert Harris 		 */
1452835ee219SRobert Harris 		kbufp = kmem_alloc(dbbufsize, KM_SLEEP);
1453835ee219SRobert Harris 		bcopy(elemp->mnte_text, kbufp, dbbufsize);
1454835ee219SRobert Harris 		*(kbufp + (off_t)dbtabp->mnt_mountp - 1) =
1455835ee219SRobert Harris 		    *(kbufp + (off_t)dbtabp->mnt_fstype - 1) =
1456835ee219SRobert Harris 		    *(kbufp + (off_t)dbtabp->mnt_mntopts - 1) =
1457835ee219SRobert Harris 		    *(kbufp + (off_t)dbtabp->mnt_time - 1) =
1458835ee219SRobert Harris 		    *(kbufp + dbbufsize - 1) = '\0';
1459835ee219SRobert Harris 		if (copyout(kbufp, ubufp, dbbufsize))
1460835ee219SRobert Harris 			error = EFAULT;
1461835ee219SRobert Harris 
1462835ee219SRobert Harris 		kmem_free(kbufp, dbbufsize);
1463835ee219SRobert Harris 		return (error);
1464835ee219SRobert Harris }
1465835ee219SRobert Harris 
14667c478bd9Sstevel@tonic-gate /* ARGSUSED */
14677c478bd9Sstevel@tonic-gate static int
1468835ee219SRobert Harris mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cr,
1469835ee219SRobert Harris     int *rvalp, caller_context_t *ct)
14707c478bd9Sstevel@tonic-gate {
14717c478bd9Sstevel@tonic-gate 	uint_t *up = (uint_t *)arg;
14727c478bd9Sstevel@tonic-gate 	mntnode_t *mnp = VTOM(vp);
1473835ee219SRobert Harris 	mntsnap_t *snapp = &mnp->mnt_ioctl;
1474835ee219SRobert Harris 	int error = 0;
1475a19609f8Sjv227347 	zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone;
1476835ee219SRobert Harris 	krwlock_t *dblockp = &zonep->zone_mntfs_db_lock;
1477835ee219SRobert Harris 	model_t datamodel = flag & DATAMODEL_MASK;
14787c478bd9Sstevel@tonic-gate 
14797c478bd9Sstevel@tonic-gate 	switch (cmd) {
14807c478bd9Sstevel@tonic-gate 
1481835ee219SRobert Harris 	case MNTIOC_NMNTS:  		/* get no. of mounted resources */
1482835ee219SRobert Harris 	{
1483835ee219SRobert Harris 		rw_enter(&mnp->mnt_contents, RW_READER);
1484835ee219SRobert Harris 		if (snapp->mnts_nmnts == 0 ||
1485835ee219SRobert Harris 		    (snapp->mnts_flags & MNTS_REWIND)) {
148654c529d4SViswanathan Kannappan 			if (!rw_tryupgrade(&mnp->mnt_contents)) {
148754c529d4SViswanathan Kannappan 				rw_exit(&mnp->mnt_contents);
148854c529d4SViswanathan Kannappan 				rw_enter(&mnp->mnt_contents, RW_WRITER);
148954c529d4SViswanathan Kannappan 			}
1490835ee219SRobert Harris 			if (snapp->mnts_nmnts == 0 ||
1491835ee219SRobert Harris 			    (snapp->mnts_flags & MNTS_REWIND))
1492835ee219SRobert Harris 				mntfs_snapshot(mnp, snapp);
1493835ee219SRobert Harris 		}
149454c529d4SViswanathan Kannappan 		rw_exit(&mnp->mnt_contents);
1495835ee219SRobert Harris 
1496835ee219SRobert Harris 		if (suword32(up, snapp->mnts_nmnts) != 0)
14977c478bd9Sstevel@tonic-gate 			error = EFAULT;
14987c478bd9Sstevel@tonic-gate 		break;
14997c478bd9Sstevel@tonic-gate 	}
15007c478bd9Sstevel@tonic-gate 
1501835ee219SRobert Harris 	case MNTIOC_GETDEVLIST:  	/* get mounted device major/minor nos */
1502835ee219SRobert Harris 	{
15037c478bd9Sstevel@tonic-gate 		size_t len;
1504835ee219SRobert Harris 		uint_t *devlist;
1505835ee219SRobert Harris 		mntelem_t *elemp;
1506835ee219SRobert Harris 		int i = 0;
15077c478bd9Sstevel@tonic-gate 
1508835ee219SRobert Harris 		rw_enter(&mnp->mnt_contents, RW_READER);
1509835ee219SRobert Harris 		if (snapp->mnts_nmnts == 0 ||
1510835ee219SRobert Harris 		    (snapp->mnts_flags & MNTS_REWIND)) {
151154c529d4SViswanathan Kannappan 			if (!rw_tryupgrade(&mnp->mnt_contents)) {
151254c529d4SViswanathan Kannappan 				rw_exit(&mnp->mnt_contents);
151354c529d4SViswanathan Kannappan 				rw_enter(&mnp->mnt_contents, RW_WRITER);
151454c529d4SViswanathan Kannappan 			}
1515835ee219SRobert Harris 			if (snapp->mnts_nmnts == 0 ||
1516835ee219SRobert Harris 			    (snapp->mnts_flags & MNTS_REWIND))
1517835ee219SRobert Harris 				mntfs_snapshot(mnp, snapp);
151854c529d4SViswanathan Kannappan 			rw_downgrade(&mnp->mnt_contents);
151954c529d4SViswanathan Kannappan 		}
15207c478bd9Sstevel@tonic-gate 
1521835ee219SRobert Harris 		/* Create a local buffer to hold the device numbers. */
1522835ee219SRobert Harris 		len = 2 * snapp->mnts_nmnts * sizeof (uint_t);
15237c478bd9Sstevel@tonic-gate 		devlist = kmem_alloc(len, KM_SLEEP);
15247c478bd9Sstevel@tonic-gate 
1525835ee219SRobert Harris 		/*
1526835ee219SRobert Harris 		 * Walk the database elements for this snapshot and add their
1527835ee219SRobert Harris 		 * major and minor numbers.
1528835ee219SRobert Harris 		 */
1529835ee219SRobert Harris 		rw_enter(dblockp, RW_READER);
1530835ee219SRobert Harris 		for (elemp = snapp->mnts_first; elemp;
1531835ee219SRobert Harris 		    elemp = mntfs_get_next_elem(snapp, elemp)) {
1532835ee219SRobert Harris 				devlist[2 * i] = elemp->mnte_tab.mnt_major;
1533835ee219SRobert Harris 				devlist[2 * i + 1] = elemp->mnte_tab.mnt_minor;
1534835ee219SRobert Harris 				i++;
15357c478bd9Sstevel@tonic-gate 		}
1536835ee219SRobert Harris 		rw_exit(dblockp);
1537835ee219SRobert Harris 		ASSERT(i == snapp->mnts_nmnts);
1538835ee219SRobert Harris 		rw_exit(&mnp->mnt_contents);
15397c478bd9Sstevel@tonic-gate 
15407c478bd9Sstevel@tonic-gate 		error = xcopyout(devlist, up, len);
15417c478bd9Sstevel@tonic-gate 		kmem_free(devlist, len);
15427c478bd9Sstevel@tonic-gate 		break;
15437c478bd9Sstevel@tonic-gate 	}
15447c478bd9Sstevel@tonic-gate 
15457c478bd9Sstevel@tonic-gate 	case MNTIOC_SETTAG:		/* set tag on mounted file system */
15467c478bd9Sstevel@tonic-gate 	case MNTIOC_CLRTAG:		/* clear tag on mounted file system */
15477c478bd9Sstevel@tonic-gate 	{
15487c478bd9Sstevel@tonic-gate 		struct mnttagdesc *dp = (struct mnttagdesc *)arg;
15497c478bd9Sstevel@tonic-gate 		STRUCT_DECL(mnttagdesc, tagdesc);
15507c478bd9Sstevel@tonic-gate 		char *cptr;
15517c478bd9Sstevel@tonic-gate 		uint32_t major, minor;
15527c478bd9Sstevel@tonic-gate 		char tagbuf[MAX_MNTOPT_TAG];
15537c478bd9Sstevel@tonic-gate 		char *pbuf;
15547c478bd9Sstevel@tonic-gate 		size_t len;
15557c478bd9Sstevel@tonic-gate 		uint_t start = 0;
15567c478bd9Sstevel@tonic-gate 		mntdata_t *mntdata = MTOD(mnp);
1557a19609f8Sjv227347 		zone_t *zone = mntdata->mnt_zone_ref.zref_zone;
15587c478bd9Sstevel@tonic-gate 
15597c478bd9Sstevel@tonic-gate 		STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK);
15607c478bd9Sstevel@tonic-gate 		if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) {
15617c478bd9Sstevel@tonic-gate 			error = EFAULT;
15627c478bd9Sstevel@tonic-gate 			break;
15637c478bd9Sstevel@tonic-gate 		}
15647c478bd9Sstevel@tonic-gate 		pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
15657c478bd9Sstevel@tonic-gate 		if (zone != global_zone) {
15667c478bd9Sstevel@tonic-gate 			(void) strcpy(pbuf, zone->zone_rootpath);
15677c478bd9Sstevel@tonic-gate 			/* truncate "/" and nul */
15687c478bd9Sstevel@tonic-gate 			start = zone->zone_rootpathlen - 2;
15697c478bd9Sstevel@tonic-gate 			ASSERT(pbuf[start] == '/');
15707c478bd9Sstevel@tonic-gate 		}
15717c478bd9Sstevel@tonic-gate 		cptr = STRUCT_FGETP(tagdesc, mtd_mntpt);
15727c478bd9Sstevel@tonic-gate 		error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len);
15737c478bd9Sstevel@tonic-gate 		if (error) {
15747c478bd9Sstevel@tonic-gate 			kmem_free(pbuf, MAXPATHLEN);
15757c478bd9Sstevel@tonic-gate 			break;
15767c478bd9Sstevel@tonic-gate 		}
15777c478bd9Sstevel@tonic-gate 		if (start != 0 && pbuf[start] != '/') {
15787c478bd9Sstevel@tonic-gate 			kmem_free(pbuf, MAXPATHLEN);
15797c478bd9Sstevel@tonic-gate 			error = EINVAL;
15807c478bd9Sstevel@tonic-gate 			break;
15817c478bd9Sstevel@tonic-gate 		}
15827c478bd9Sstevel@tonic-gate 		cptr = STRUCT_FGETP(tagdesc, mtd_tag);
15837c478bd9Sstevel@tonic-gate 		if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) {
15847c478bd9Sstevel@tonic-gate 			kmem_free(pbuf, MAXPATHLEN);
15857c478bd9Sstevel@tonic-gate 			break;
15867c478bd9Sstevel@tonic-gate 		}
15877c478bd9Sstevel@tonic-gate 		major = STRUCT_FGET(tagdesc, mtd_major);
15887c478bd9Sstevel@tonic-gate 		minor = STRUCT_FGET(tagdesc, mtd_minor);
15897c478bd9Sstevel@tonic-gate 		if (cmd == MNTIOC_SETTAG)
15907c478bd9Sstevel@tonic-gate 			error = vfs_settag(major, minor, pbuf, tagbuf, cr);
15917c478bd9Sstevel@tonic-gate 		else
15927c478bd9Sstevel@tonic-gate 			error = vfs_clrtag(major, minor, pbuf, tagbuf, cr);
15937c478bd9Sstevel@tonic-gate 		kmem_free(pbuf, MAXPATHLEN);
15947c478bd9Sstevel@tonic-gate 		break;
15957c478bd9Sstevel@tonic-gate 	}
15967c478bd9Sstevel@tonic-gate 
15977c478bd9Sstevel@tonic-gate 	case MNTIOC_SHOWHIDDEN:
15987c478bd9Sstevel@tonic-gate 	{
15995545576aSRobert Harris 		rw_enter(&mnp->mnt_contents, RW_WRITER);
16007c478bd9Sstevel@tonic-gate 		mnp->mnt_flags |= MNT_SHOWHIDDEN;
16015545576aSRobert Harris 		rw_exit(&mnp->mnt_contents);
16027c478bd9Sstevel@tonic-gate 		break;
16037c478bd9Sstevel@tonic-gate 	}
16047c478bd9Sstevel@tonic-gate 
1605835ee219SRobert Harris 	case MNTIOC_GETMNTANY:
16067c478bd9Sstevel@tonic-gate 	{
1607835ee219SRobert Harris 		STRUCT_DECL(mntentbuf, embuf);	/* Our copy of user's embuf */
1608835ee219SRobert Harris 		STRUCT_DECL(extmnttab, ktab);	/* Out copy of user's emp */
1609835ee219SRobert Harris 		struct extmnttab *uemp;		/* uaddr of user's emp */
1610835ee219SRobert Harris 		char *ubufp;			/* uaddr of user's text buf */
1611835ee219SRobert Harris 		size_t ubufsize;		/* size of the above */
1612835ee219SRobert Harris 		struct extmnttab preftab;	/* our version of user's emp */
1613835ee219SRobert Harris 		char *prefbuf;			/* our copy of user's text */
1614835ee219SRobert Harris 		mntelem_t *elemp;		/* a database element */
1615835ee219SRobert Harris 		struct extmnttab *dbtabp;	/* element's extmnttab */
1616835ee219SRobert Harris 		char *dbbufp;			/* element's text buf */
1617835ee219SRobert Harris 		size_t dbbufsize;		/* size of the above */
1618835ee219SRobert Harris 		vtype_t type;			/* type, if any, of special */
16197c478bd9Sstevel@tonic-gate 
1620835ee219SRobert Harris 
16217c478bd9Sstevel@tonic-gate 		/*
1622835ee219SRobert Harris 		 * embuf is a struct embuf within the kernel. We copy into it
1623835ee219SRobert Harris 		 * the struct embuf supplied by the user.
16247c478bd9Sstevel@tonic-gate 		 */
1625835ee219SRobert Harris 		STRUCT_INIT(embuf, datamodel);
1626835ee219SRobert Harris 		if (copyin((void *) arg, STRUCT_BUF(embuf),
1627835ee219SRobert Harris 		    STRUCT_SIZE(embuf))) {
1628835ee219SRobert Harris 			error = EFAULT;
1629835ee219SRobert Harris 			break;
1630835ee219SRobert Harris 		}
1631835ee219SRobert Harris 		uemp = STRUCT_FGETP(embuf, mbuf_emp);
1632835ee219SRobert Harris 		ubufp = STRUCT_FGETP(embuf, mbuf_buf);
1633835ee219SRobert Harris 		ubufsize = STRUCT_FGET(embuf, mbuf_bufsize);
1634835ee219SRobert Harris 
1635835ee219SRobert Harris 		/*
1636835ee219SRobert Harris 		 * Check that the text buffer offered by the user is the
1637835ee219SRobert Harris 		 * agreed size.
1638835ee219SRobert Harris 		 */
1639835ee219SRobert Harris 		if (ubufsize != MNT_LINE_MAX) {
1640835ee219SRobert Harris 			error = EINVAL;
1641835ee219SRobert Harris 			break;
16427c478bd9Sstevel@tonic-gate 		}
16437c478bd9Sstevel@tonic-gate 
1644835ee219SRobert Harris 		/* Copy the user-supplied entry into a local buffer. */
1645835ee219SRobert Harris 		prefbuf = kmem_alloc(MNT_LINE_MAX, KM_SLEEP);
1646835ee219SRobert Harris 		if (copyin(ubufp, prefbuf, MNT_LINE_MAX)) {
1647835ee219SRobert Harris 			kmem_free(prefbuf, MNT_LINE_MAX);
1648835ee219SRobert Harris 			error = EFAULT;
1649835ee219SRobert Harris 			break;
1650835ee219SRobert Harris 		}
1651835ee219SRobert Harris 
1652835ee219SRobert Harris 		/* Ensure that any string within it is null-terminated. */
1653835ee219SRobert Harris 		*(prefbuf + MNT_LINE_MAX - 1) = 0;
1654835ee219SRobert Harris 
1655835ee219SRobert Harris 		/* Copy in the user-supplied mpref */
1656835ee219SRobert Harris 		STRUCT_INIT(ktab, datamodel);
1657835ee219SRobert Harris 		if (copyin(uemp, STRUCT_BUF(ktab),
1658835ee219SRobert Harris 		    SIZEOF_STRUCT(mnttab, datamodel))) {
1659835ee219SRobert Harris 			kmem_free(prefbuf, MNT_LINE_MAX);
1660835ee219SRobert Harris 			error = EFAULT;
1661835ee219SRobert Harris 			break;
1662835ee219SRobert Harris 		}
1663835ee219SRobert Harris 
1664835ee219SRobert Harris 		/*
1665835ee219SRobert Harris 		 * Copy the members of the user's pref struct into a local
1666835ee219SRobert Harris 		 * struct. The pointers need to be offset and verified to
1667835ee219SRobert Harris 		 * ensure that they lie within the bounds of the buffer.
1668835ee219SRobert Harris 		 */
1669835ee219SRobert Harris 		preftab.mnt_special = mntfs_import_addr(STRUCT_FGETP(ktab,
1670835ee219SRobert Harris 		    mnt_special), ubufp, prefbuf, MNT_LINE_MAX);
1671835ee219SRobert Harris 		preftab.mnt_mountp = mntfs_import_addr(STRUCT_FGETP(ktab,
1672835ee219SRobert Harris 		    mnt_mountp), ubufp, prefbuf, MNT_LINE_MAX);
1673835ee219SRobert Harris 		preftab.mnt_fstype = mntfs_import_addr(STRUCT_FGETP(ktab,
1674835ee219SRobert Harris 		    mnt_fstype), ubufp, prefbuf, MNT_LINE_MAX);
1675835ee219SRobert Harris 		preftab.mnt_mntopts = mntfs_import_addr(STRUCT_FGETP(ktab,
1676835ee219SRobert Harris 		    mnt_mntopts), ubufp, prefbuf, MNT_LINE_MAX);
1677835ee219SRobert Harris 		preftab.mnt_time = mntfs_import_addr(STRUCT_FGETP(ktab,
1678835ee219SRobert Harris 		    mnt_time), ubufp, prefbuf, MNT_LINE_MAX);
1679835ee219SRobert Harris 
1680835ee219SRobert Harris 		/*
1681835ee219SRobert Harris 		 * If the user specifies a mounted resource that is a special
1682835ee219SRobert Harris 		 * device then we capture its mode and major and minor numbers;
16835545576aSRobert Harris 		 * cf. the block comment below.
1684835ee219SRobert Harris 		 */
1685835ee219SRobert Harris 		type = mntfs_special_info_string(preftab.mnt_special,
1686835ee219SRobert Harris 		    &preftab.mnt_major, &preftab.mnt_minor, cr);
1687835ee219SRobert Harris 
1688835ee219SRobert Harris 		rw_enter(&mnp->mnt_contents, RW_WRITER);
1689835ee219SRobert Harris 		if (snapp->mnts_nmnts == 0 ||
1690835ee219SRobert Harris 		    (snapp->mnts_flags & MNTS_REWIND))
1691835ee219SRobert Harris 			mntfs_snapshot(mnp, snapp);
1692835ee219SRobert Harris 
1693835ee219SRobert Harris 		/*
1694835ee219SRobert Harris 		 * This is the core functionality that implements getmntany().
1695835ee219SRobert Harris 		 * We walk through the mntfs database until we find an element
1696835ee219SRobert Harris 		 * matching the user's preferences that are contained in
1697835ee219SRobert Harris 		 * preftab. Typically, this means checking that the text
1698835ee219SRobert Harris 		 * matches. However, the mounted resource is special: if the
1699835ee219SRobert Harris 		 * user is looking for a special device then we must find a
1700835ee219SRobert Harris 		 * database element with the same major and minor numbers and
1701835ee219SRobert Harris 		 * the same type, i.e. VBLK or VCHR. The type is not recorded
1702835ee219SRobert Harris 		 * in the element because it cannot be inferred from the vfs_t.
1703835ee219SRobert Harris 		 * We therefore check the type of suitable candidates via
1704835ee219SRobert Harris 		 * mntfs_special_info_element(); since this calls into the
1705835ee219SRobert Harris 		 * underlying file system we make sure to drop the database lock
1706835ee219SRobert Harris 		 * first.
1707835ee219SRobert Harris 		 */
1708835ee219SRobert Harris 		elemp = snapp->mnts_next;
1709835ee219SRobert Harris 		rw_enter(dblockp, RW_READER);
1710835ee219SRobert Harris 		for (;;) {
1711835ee219SRobert Harris 			for (; elemp; elemp = mntfs_get_next_elem(snapp,
1712835ee219SRobert Harris 			    elemp)) {
1713835ee219SRobert Harris 				dbtabp = &elemp->mnte_tab;
1714835ee219SRobert Harris 				dbbufp = elemp->mnte_text;
1715835ee219SRobert Harris 				dbbufsize = elemp->mnte_text_size;
1716835ee219SRobert Harris 
1717835ee219SRobert Harris 				if (((type &&
1718835ee219SRobert Harris 				    dbtabp->mnt_major == preftab.mnt_major &&
1719835ee219SRobert Harris 				    dbtabp->mnt_minor == preftab.mnt_minor &&
1720835ee219SRobert Harris 				    MNTFS_REAL_FIELD(dbbufp)) ||
1721835ee219SRobert Harris 				    (!type && (!preftab.mnt_special ||
1722835ee219SRobert Harris 				    mntfs_same_word(preftab.mnt_special,
1723835ee219SRobert Harris 				    prefbuf, MNT_LINE_MAX, (off_t)0, dbbufp,
1724835ee219SRobert Harris 				    dbbufsize)))) &&
1725835ee219SRobert Harris 
1726835ee219SRobert Harris 				    (!preftab.mnt_mountp || mntfs_same_word(
1727835ee219SRobert Harris 				    preftab.mnt_mountp, prefbuf, MNT_LINE_MAX,
1728835ee219SRobert Harris 				    (off_t)dbtabp->mnt_mountp, dbbufp,
1729835ee219SRobert Harris 				    dbbufsize)) &&
1730835ee219SRobert Harris 
1731835ee219SRobert Harris 				    (!preftab.mnt_fstype || mntfs_same_word(
1732835ee219SRobert Harris 				    preftab.mnt_fstype, prefbuf, MNT_LINE_MAX,
1733835ee219SRobert Harris 				    (off_t)dbtabp->mnt_fstype, dbbufp,
1734835ee219SRobert Harris 				    dbbufsize)) &&
1735835ee219SRobert Harris 
1736835ee219SRobert Harris 				    (!preftab.mnt_mntopts || mntfs_same_word(
1737835ee219SRobert Harris 				    preftab.mnt_mntopts, prefbuf, MNT_LINE_MAX,
1738835ee219SRobert Harris 				    (off_t)dbtabp->mnt_mntopts, dbbufp,
1739835ee219SRobert Harris 				    dbbufsize)) &&
1740835ee219SRobert Harris 
1741835ee219SRobert Harris 				    (!preftab.mnt_time || mntfs_same_word(
1742835ee219SRobert Harris 				    preftab.mnt_time, prefbuf, MNT_LINE_MAX,
1743835ee219SRobert Harris 				    (off_t)dbtabp->mnt_time, dbbufp,
1744835ee219SRobert Harris 				    dbbufsize)))
1745835ee219SRobert Harris 					break;
1746835ee219SRobert Harris 			}
1747835ee219SRobert Harris 			rw_exit(dblockp);
1748835ee219SRobert Harris 
1749835ee219SRobert Harris 			if (elemp == NULL || type == 0 ||
1750835ee219SRobert Harris 			    type == mntfs_special_info_element(elemp, cr))
1751835ee219SRobert Harris 				break;
1752835ee219SRobert Harris 
1753835ee219SRobert Harris 			rw_enter(dblockp, RW_READER);
1754835ee219SRobert Harris 			elemp = mntfs_get_next_elem(snapp, elemp);
1755835ee219SRobert Harris 		}
1756835ee219SRobert Harris 
1757835ee219SRobert Harris 		kmem_free(prefbuf, MNT_LINE_MAX);
1758835ee219SRobert Harris 
1759835ee219SRobert Harris 		/* If we failed to find a match then return EOF. */
1760835ee219SRobert Harris 		if (elemp == NULL) {
1761835ee219SRobert Harris 			rw_exit(&mnp->mnt_contents);
1762835ee219SRobert Harris 			*rvalp = MNTFS_EOF;
1763835ee219SRobert Harris 			break;
1764835ee219SRobert Harris 		}
1765835ee219SRobert Harris 
1766835ee219SRobert Harris 		/*
1767835ee219SRobert Harris 		 * Check that the text buffer offered by the user will be large
1768835ee219SRobert Harris 		 * enough to accommodate the text for this entry.
1769835ee219SRobert Harris 		 */
1770835ee219SRobert Harris 		if (elemp->mnte_text_size > MNT_LINE_MAX) {
1771835ee219SRobert Harris 			rw_exit(&mnp->mnt_contents);
1772835ee219SRobert Harris 			*rvalp = MNTFS_TOOLONG;
1773835ee219SRobert Harris 			break;
1774835ee219SRobert Harris 		}
1775835ee219SRobert Harris 
1776835ee219SRobert Harris 		/*
1777835ee219SRobert Harris 		 * Populate the user's struct mnttab and text buffer using the
1778835ee219SRobert Harris 		 * element's contents.
1779835ee219SRobert Harris 		 */
1780835ee219SRobert Harris 		if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) {
1781835ee219SRobert Harris 			error = EFAULT;
17827c478bd9Sstevel@tonic-gate 		} else {
1783835ee219SRobert Harris 			rw_enter(dblockp, RW_READER);
1784835ee219SRobert Harris 			elemp = mntfs_get_next_elem(snapp, elemp);
1785835ee219SRobert Harris 			rw_exit(dblockp);
1786835ee219SRobert Harris 			snapp->mnts_next = elemp;
17877c478bd9Sstevel@tonic-gate 		}
178854c529d4SViswanathan Kannappan 		rw_exit(&mnp->mnt_contents);
1789835ee219SRobert Harris 		break;
179054c529d4SViswanathan Kannappan 	}
17917c478bd9Sstevel@tonic-gate 
1792835ee219SRobert Harris 	case MNTIOC_GETMNTENT:
1793835ee219SRobert Harris 	case MNTIOC_GETEXTMNTENT:
1794835ee219SRobert Harris 	{
1795835ee219SRobert Harris 		STRUCT_DECL(mntentbuf, embuf);	/* Our copy of user's embuf */
1796835ee219SRobert Harris 		struct extmnttab *uemp;		/* uaddr of user's emp */
1797835ee219SRobert Harris 		char *ubufp;			/* uaddr of user's text buf */
1798835ee219SRobert Harris 		size_t ubufsize;		/* size of the above */
1799835ee219SRobert Harris 		mntelem_t *elemp;		/* a database element */
1800835ee219SRobert Harris 
1801835ee219SRobert Harris 
1802835ee219SRobert Harris 		rw_enter(&mnp->mnt_contents, RW_WRITER);
1803835ee219SRobert Harris 		if (snapp->mnts_nmnts == 0 ||
1804835ee219SRobert Harris 		    (snapp->mnts_flags & MNTS_REWIND))
1805835ee219SRobert Harris 			mntfs_snapshot(mnp, snapp);
1806835ee219SRobert Harris 		if ((elemp = snapp->mnts_next) == NULL) {
1807835ee219SRobert Harris 			rw_exit(&mnp->mnt_contents);
1808835ee219SRobert Harris 			*rvalp = MNTFS_EOF;
1809835ee219SRobert Harris 			break;
1810835ee219SRobert Harris 		}
1811835ee219SRobert Harris 
1812835ee219SRobert Harris 		/*
1813835ee219SRobert Harris 		 * embuf is a struct embuf within the kernel. We copy into it
1814835ee219SRobert Harris 		 * the struct embuf supplied by the user.
1815835ee219SRobert Harris 		 */
1816835ee219SRobert Harris 		STRUCT_INIT(embuf, datamodel);
1817835ee219SRobert Harris 		if (copyin((void *) arg, STRUCT_BUF(embuf),
1818835ee219SRobert Harris 		    STRUCT_SIZE(embuf))) {
1819835ee219SRobert Harris 			rw_exit(&mnp->mnt_contents);
1820835ee219SRobert Harris 			error = EFAULT;
1821835ee219SRobert Harris 			break;
1822835ee219SRobert Harris 		}
1823835ee219SRobert Harris 		uemp = STRUCT_FGETP(embuf, mbuf_emp);
1824835ee219SRobert Harris 		ubufp = STRUCT_FGETP(embuf, mbuf_buf);
1825835ee219SRobert Harris 		ubufsize = STRUCT_FGET(embuf, mbuf_bufsize);
1826835ee219SRobert Harris 
1827835ee219SRobert Harris 		/*
1828835ee219SRobert Harris 		 * Check that the text buffer offered by the user will be large
1829835ee219SRobert Harris 		 * enough to accommodate the text for this entry.
1830835ee219SRobert Harris 		 */
1831835ee219SRobert Harris 		if (elemp->mnte_text_size > ubufsize) {
1832835ee219SRobert Harris 			rw_exit(&mnp->mnt_contents);
1833835ee219SRobert Harris 			*rvalp = MNTFS_TOOLONG;
1834835ee219SRobert Harris 			break;
1835835ee219SRobert Harris 		}
1836835ee219SRobert Harris 
1837835ee219SRobert Harris 		/*
1838835ee219SRobert Harris 		 * Populate the user's struct mnttab and text buffer using the
1839835ee219SRobert Harris 		 * element's contents.
1840835ee219SRobert Harris 		 */
1841835ee219SRobert Harris 		if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) {
1842835ee219SRobert Harris 			error = EFAULT;
1843835ee219SRobert Harris 		} else {
1844835ee219SRobert Harris 			rw_enter(dblockp, RW_READER);
1845835ee219SRobert Harris 			elemp = mntfs_get_next_elem(snapp, elemp);
1846835ee219SRobert Harris 			rw_exit(dblockp);
1847835ee219SRobert Harris 			snapp->mnts_next = elemp;
1848835ee219SRobert Harris 		}
1849835ee219SRobert Harris 		rw_exit(&mnp->mnt_contents);
18507c478bd9Sstevel@tonic-gate 		break;
18517c478bd9Sstevel@tonic-gate 	}
18527c478bd9Sstevel@tonic-gate 
18537c478bd9Sstevel@tonic-gate 	default:
18547c478bd9Sstevel@tonic-gate 		error = EINVAL;
18557c478bd9Sstevel@tonic-gate 		break;
18567c478bd9Sstevel@tonic-gate 	}
18577c478bd9Sstevel@tonic-gate 
18587c478bd9Sstevel@tonic-gate 	return (error);
18597c478bd9Sstevel@tonic-gate }
18607c478bd9Sstevel@tonic-gate 
18617c478bd9Sstevel@tonic-gate /*
18625545576aSRobert Harris  * mntfs provides a new vnode for each open(2). Two vnodes will represent the
18635545576aSRobert Harris  * same instance of /etc/mnttab if they share the same (zone-specific) vfs.
18645545576aSRobert Harris  */
18655545576aSRobert Harris /* ARGSUSED */
18665545576aSRobert Harris int
18675545576aSRobert Harris mntcmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
18685545576aSRobert Harris {
18695545576aSRobert Harris 	return (vp1 != NULL && vp2 != NULL && vp1->v_vfsp == vp2->v_vfsp);
18705545576aSRobert Harris }
18715545576aSRobert Harris 
18725545576aSRobert Harris /*
18737c478bd9Sstevel@tonic-gate  * /mntfs vnode operations vector
18747c478bd9Sstevel@tonic-gate  */
18757c478bd9Sstevel@tonic-gate const fs_operation_def_t mnt_vnodeops_template[] = {
1876aa59c4cbSrsb 	VOPNAME_OPEN,		{ .vop_open = mntopen },
1877aa59c4cbSrsb 	VOPNAME_CLOSE,		{ .vop_close = mntclose },
1878aa59c4cbSrsb 	VOPNAME_READ,		{ .vop_read = mntread },
1879aa59c4cbSrsb 	VOPNAME_IOCTL,		{ .vop_ioctl = mntioctl },
1880aa59c4cbSrsb 	VOPNAME_GETATTR,	{ .vop_getattr = mntgetattr },
1881aa59c4cbSrsb 	VOPNAME_ACCESS,		{ .vop_access = mntaccess },
1882aa59c4cbSrsb 	VOPNAME_FSYNC,		{ .vop_fsync = mntfsync },
1883aa59c4cbSrsb 	VOPNAME_INACTIVE,	{ .vop_inactive = mntinactive },
1884aa59c4cbSrsb 	VOPNAME_SEEK,		{ .vop_seek = mntseek },
1885aa59c4cbSrsb 	VOPNAME_POLL,		{ .vop_poll = mntpoll },
18865545576aSRobert Harris 	VOPNAME_CMP,		{ .vop_cmp = mntcmp },
1887aa59c4cbSrsb 	VOPNAME_DISPOSE,	{ .error = fs_error },
1888aa59c4cbSrsb 	VOPNAME_SHRLOCK,	{ .error = fs_error },
18897c478bd9Sstevel@tonic-gate 	NULL,			NULL
18907c478bd9Sstevel@tonic-gate };
1891