/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MNTROOTINO 2 static mntnode_t *mntgetnode(vnode_t *); vnodeops_t *mntvnodeops; extern void vfs_mnttab_readop(void); /* * Design of kernel mnttab accounting. * * mntfs provides two methods of reading the in-kernel mnttab, i.e. the state of * the mounted resources: the read-only file /etc/mnttab, and a collection of * ioctl() commands. Most of these interfaces are public and are described in * mnttab(4). Three private ioctl() commands, MNTIOC_GETMNTENT, * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY, provide for the getmntent(3C) * family of functions, allowing them to support white space in mount names. * * A significant feature of mntfs is that it provides a file descriptor with a * snapshot once it begins to consume mnttab data. Thus, as the process * continues to consume data, its view of the in-kernel mnttab does not change * even if resources are mounted or unmounted. The intent is to ensure that * processes are guaranteed to read self-consistent data even as the system * changes. * * The snapshot is implemented by a "database", unique to each zone, that * comprises a linked list of mntelem_ts. The database is identified by * zone_mntfs_db and is protected by zone_mntfs_db_lock. Each element contains * the text entry in /etc/mnttab for a mounted resource, i.e. a vfs_t, and is * marked with its time of "birth", i.e. creation. An element is "killed", and * marked with its time of death, when it is found to be out of date, e.g. when * the corresponding resource has been unmounted. * * When a process performs the first read() or ioctl() for a file descriptor for * /etc/mnttab, the database is updated by a call to mntfs_snapshot() to ensure * that an element exists for each currently mounted resource. Following this, * the current time is written into a snapshot structure, a mntsnap_t, embedded * in the descriptor's mntnode_t. * * mntfs is able to enumerate the /etc/mnttab entries corresponding to a * particular file descriptor by searching the database for entries that were * born before the appropriate snapshot and that either are still alive or died * after the snapshot was created. Consumers use the iterator function * mntfs_get_next_elem() to identify the next suitable element in the database. * * Each snapshot has a hold on its corresponding database elements, effected by * a per-element reference count. At last close(), a snapshot is destroyed in * mntfs_freesnap() by releasing all of its holds; an element is destroyed if * its reference count becomes zero. Therefore the database never exists unless * there is at least one active consumer of /etc/mnttab. * * getmntent(3C) et al. "do not open, close or rewind the file." This implies * that getmntent() and read() must be able to operate without interaction on * the same file descriptor; this is accomplished by the use of separate * mntsnap_ts for both read() and ioctl(). * * mntfs observes the following lock-ordering: * * mnp->mnt_contents -> vfslist -> zonep->zone_mntfs_db_lock * * NOTE: The following variable enables the generation of the "dev=xxx" * in the option string for a mounted file system. Really this should * be gotten rid of altogether, but for the sake of backwards compatibility * we had to leave it in. It is defined as a 32-bit device number. This * means that when 64-bit device numbers are in use, if either the major or * minor part of the device number will not fit in a 16 bit quantity, the * "dev=" will be set to NODEV (0x7fffffff). See PSARC 1999/566 and * 1999/131 for details. The cmpldev() function used to generate the 32-bit * device number handles this check and assigns the proper value. */ int mntfs_enabledev = 1; /* enable old "dev=xxx" option */ extern void vfs_mono_time(timespec_t *); enum { MNTFS_FIRST, MNTFS_SECOND, MNTFS_NEITHER }; /* * Determine whether a field within a line from /etc/mnttab contains actual * content or simply the marker string "-". This never applies to the time, * therefore the delimiter must be a tab. */ #define MNTFS_REAL_FIELD(x) (*(x) != '-' || *((x) + 1) != '\t') static int mntfs_devsize(struct vfs *vfsp) { dev32_t odev; (void) cmpldev(&odev, vfsp->vfs_dev); return (snprintf(NULL, 0, "dev=%x", odev)); } static int mntfs_devprint(struct vfs *vfsp, char *buf) { dev32_t odev; (void) cmpldev(&odev, vfsp->vfs_dev); return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev)); } /* Identify which, if either, of two supplied timespec structs is newer. */ static int mntfs_newest(timespec_t *a, timespec_t *b) { if (a->tv_sec == b->tv_sec && a->tv_nsec == b->tv_nsec) { return (MNTFS_NEITHER); } else if (b->tv_sec > a->tv_sec || (b->tv_sec == a->tv_sec && b->tv_nsec > a->tv_nsec)) { return (MNTFS_SECOND); } else { return (MNTFS_FIRST); } } static int mntfs_optsize(struct vfs *vfsp) { int i, size = 0; mntopt_t *mop; for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { mop = &vfsp->vfs_mntopts.mo_list[i]; if (mop->mo_flags & MO_NODISPLAY) continue; if (mop->mo_flags & MO_SET) { if (size) size++; /* space for comma */ size += strlen(mop->mo_name); /* * count option value if there is one */ if (mop->mo_arg != NULL) { size += strlen(mop->mo_arg) + 1; } } } if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { /* * Add space for "zone=" if required. */ if (size) size++; /* space for comma */ size += sizeof ("zone=") - 1; size += strlen(vfsp->vfs_zone->zone_name); } if (mntfs_enabledev) { if (size != 0) size++; /* space for comma */ size += mntfs_devsize(vfsp); } if (size == 0) size = strlen("-"); return (size); } static int mntfs_optprint(struct vfs *vfsp, char *buf) { int i, optinbuf = 0; mntopt_t *mop; char *origbuf = buf; for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { mop = &vfsp->vfs_mntopts.mo_list[i]; if (mop->mo_flags & MO_NODISPLAY) continue; if (mop->mo_flags & MO_SET) { if (optinbuf) *buf++ = ','; else optinbuf = 1; buf += snprintf(buf, MAX_MNTOPT_STR, "%s", mop->mo_name); /* * print option value if there is one */ if (mop->mo_arg != NULL) { buf += snprintf(buf, MAX_MNTOPT_STR, "=%s", mop->mo_arg); } } } if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { if (optinbuf) *buf++ = ','; else optinbuf = 1; buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s", vfsp->vfs_zone->zone_name); } if (mntfs_enabledev) { if (optinbuf++) *buf++ = ','; buf += mntfs_devprint(vfsp, buf); } if (!optinbuf) { buf += snprintf(buf, MAX_MNTOPT_STR, "-"); } return (buf - origbuf); } void mntfs_populate_text(vfs_t *vfsp, zone_t *zonep, mntelem_t *elemp) { struct extmnttab *tabp = &elemp->mnte_tab; const char *resource, *mntpt; char *cp = elemp->mnte_text; mntpt = refstr_value(vfsp->vfs_mntpt); resource = refstr_value(vfsp->vfs_resource); tabp->mnt_special = 0; if (resource != NULL && resource[0] != '\0') { if (resource[0] != '/') { cp += snprintf(cp, MAXPATHLEN, "%s\t", resource); } else if (!ZONE_PATH_VISIBLE(resource, zonep)) { /* * Use the mount point as the resource. */ cp += snprintf(cp, MAXPATHLEN, "%s\t", ZONE_PATH_TRANSLATE(mntpt, zonep)); } else { cp += snprintf(cp, MAXPATHLEN, "%s\t", ZONE_PATH_TRANSLATE(resource, zonep)); } } else { cp += snprintf(cp, MAXPATHLEN, "-\t"); } tabp->mnt_mountp = (char *)(cp - elemp->mnte_text); if (mntpt != NULL && mntpt[0] != '\0') { /* * We know the mount point is visible from within the zone, * otherwise it wouldn't be on the zone's vfs list. */ cp += snprintf(cp, MAXPATHLEN, "%s\t", ZONE_PATH_TRANSLATE(mntpt, zonep)); } else { cp += snprintf(cp, MAXPATHLEN, "-\t"); } tabp->mnt_fstype = (char *)(cp - elemp->mnte_text); cp += snprintf(cp, MAXPATHLEN, "%s\t", vfssw[vfsp->vfs_fstype].vsw_name); tabp->mnt_mntopts = (char *)(cp - elemp->mnte_text); cp += mntfs_optprint(vfsp, cp); *cp++ = '\t'; tabp->mnt_time = (char *)(cp - elemp->mnte_text); cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime); *cp++ = '\n'; /* over-write snprintf's trailing null-byte */ tabp->mnt_major = getmajor(vfsp->vfs_dev); tabp->mnt_minor = getminor(vfsp->vfs_dev); elemp->mnte_text_size = cp - elemp->mnte_text; elemp->mnte_vfs_ctime = vfsp->vfs_hrctime; elemp->mnte_hidden = vfsp->vfs_flag & VFS_NOMNTTAB; } /* Determine the length of the /etc/mnttab entry for this vfs_t. */ static size_t mntfs_text_len(vfs_t *vfsp, zone_t *zone) { size_t size = 0; const char *resource, *mntpt; size_t mntsize; mntpt = refstr_value(vfsp->vfs_mntpt); if (mntpt != NULL && mntpt[0] != '\0') { mntsize = strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; } else { mntsize = 2; /* "-\t" */ } size += mntsize; resource = refstr_value(vfsp->vfs_resource); if (resource != NULL && resource[0] != '\0') { if (resource[0] != '/') { size += strlen(resource) + 1; } else if (!ZONE_PATH_VISIBLE(resource, zone)) { /* * Same as the zone's view of the mount point. */ size += mntsize; } else { size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1; } } else { size += 2; /* "-\t" */ } size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1; size += mntfs_optsize(vfsp); size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime); return (size); } /* Destroy the resources associated with a snapshot element. */ static void mntfs_destroy_elem(mntelem_t *elemp) { kmem_free(elemp->mnte_text, elemp->mnte_text_size); kmem_free(elemp, sizeof (mntelem_t)); } /* * Return 1 if the given snapshot is in the range of the given element; return * 0 otherwise. */ static int mntfs_elem_in_range(mntsnap_t *snapp, mntelem_t *elemp) { timespec_t *stimep = &snapp->mnts_time; timespec_t *btimep = &elemp->mnte_birth; timespec_t *dtimep = &elemp->mnte_death; /* * If a snapshot is in range of an element then the snapshot must have * been created after the birth of the element, and either the element * is still alive or it died after the snapshot was created. */ if (mntfs_newest(btimep, stimep) == MNTFS_SECOND && (MNTFS_ELEM_IS_ALIVE(elemp) || mntfs_newest(stimep, dtimep) == MNTFS_SECOND)) return (1); else return (0); } /* * Return the next valid database element, after the one provided, for a given * snapshot; return NULL if none exists. The caller must hold the zone's * database lock as a reader before calling this function. */ static mntelem_t * mntfs_get_next_elem(mntsnap_t *snapp, mntelem_t *elemp) { int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN; do { elemp = elemp->mnte_next; } while (elemp && (!mntfs_elem_in_range(snapp, elemp) || (!show_hidden && elemp->mnte_hidden))); return (elemp); } /* * This function frees the resources associated with a mntsnap_t. It walks * through the database, decrementing the reference count of any element that * satisfies the snapshot. If the reference count of an element becomes zero * then it is removed from the database. */ static void mntfs_freesnap(mntnode_t *mnp, mntsnap_t *snapp) { zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone; krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; mntelem_t **elempp = &zonep->zone_mntfs_db; mntelem_t *elemp; int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN; size_t number_decremented = 0; ASSERT(RW_WRITE_HELD(&mnp->mnt_contents)); /* Ignore an uninitialised snapshot. */ if (snapp->mnts_nmnts == 0) return; /* Drop the holds on any matching database elements. */ rw_enter(dblockp, RW_WRITER); while ((elemp = *elempp) != NULL) { if (mntfs_elem_in_range(snapp, elemp) && (!elemp->mnte_hidden || show_hidden) && ++number_decremented && --elemp->mnte_refcnt == 0) { if ((*elempp = elemp->mnte_next) != NULL) (*elempp)->mnte_prev = elemp->mnte_prev; mntfs_destroy_elem(elemp); } else { elempp = &elemp->mnte_next; } } rw_exit(dblockp); ASSERT(number_decremented == snapp->mnts_nmnts); /* Clear the snapshot data. */ bzero(snapp, sizeof (mntsnap_t)); } /* Insert the new database element newp after the existing element prevp. */ static void mntfs_insert_after(mntelem_t *newp, mntelem_t *prevp) { newp->mnte_prev = prevp; newp->mnte_next = prevp->mnte_next; prevp->mnte_next = newp; if (newp->mnte_next != NULL) newp->mnte_next->mnte_prev = newp; } /* Create and return a copy of a given database element. */ static mntelem_t * mntfs_copy(mntelem_t *origp) { mntelem_t *copyp; copyp = kmem_zalloc(sizeof (mntelem_t), KM_SLEEP); copyp->mnte_vfs_ctime = origp->mnte_vfs_ctime; copyp->mnte_text_size = origp->mnte_text_size; copyp->mnte_text = kmem_alloc(copyp->mnte_text_size, KM_SLEEP); bcopy(origp->mnte_text, copyp->mnte_text, copyp->mnte_text_size); copyp->mnte_tab = origp->mnte_tab; copyp->mnte_hidden = origp->mnte_hidden; return (copyp); } /* * Compare two database elements and determine whether or not the vfs_t payload * data of each are the same. Return 1 if so and 0 otherwise. */ static int mntfs_is_same_element(mntelem_t *a, mntelem_t *b) { if (a->mnte_hidden == b->mnte_hidden && a->mnte_text_size == b->mnte_text_size && bcmp(a->mnte_text, b->mnte_text, a->mnte_text_size) == 0 && bcmp(&a->mnte_tab, &b->mnte_tab, sizeof (struct extmnttab)) == 0) return (1); else return (0); } /* * mntfs_snapshot() updates the database, creating it if necessary, so that it * accurately reflects the state of the in-kernel mnttab. It also increments * the reference count on all database elements that correspond to currently- * mounted resources. Finally, it initialises the appropriate snapshot * structure. * * Each vfs_t is given a high-resolution time stamp, for the benefit of mntfs, * when it is inserted into the in-kernel mnttab. This time stamp is copied into * the corresponding database element when it is created, allowing the element * and the vfs_t to be identified as a pair. It is possible that some file * systems may make unadvertised changes to, for example, a resource's mount * options. Therefore, in order to determine whether a database element is an * up-to-date representation of a given vfs_t, it is compared with a temporary * element generated for this purpose. Although less efficient, this is safer * than implementing an mtime for a vfs_t. * * Some mounted resources are marked as "hidden" with a VFS_NOMNTTAB flag. These * are considered invisible unless the user has already set the MNT_SHOWHIDDEN * flag in the vnode using the MNTIOC_SHOWHIDDEN ioctl. */ static void mntfs_snapshot(mntnode_t *mnp, mntsnap_t *snapp) { mntdata_t *mnd = MTOD(mnp); zone_t *zonep = mnd->mnt_zone_ref.zref_zone; int is_global_zone = (zonep == global_zone); int show_hidden = mnp->mnt_flags & MNT_SHOWHIDDEN; vfs_t *vfsp, *firstvfsp, *lastvfsp; vfs_t dummyvfs; vfs_t *dummyvfsp = NULL; krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; mntelem_t **headpp = &zonep->zone_mntfs_db; mntelem_t *elemp; mntelem_t *prevp = NULL; int order; mntelem_t *tempelemp; mntelem_t *newp; mntelem_t *firstp = NULL; size_t nmnts = 0; size_t total_text_size = 0; size_t normal_text_size = 0; int insert_before; timespec_t last_mtime; size_t entry_length, new_entry_length; ASSERT(RW_WRITE_HELD(&mnp->mnt_contents)); vfs_list_read_lock(); vfs_mnttab_modtime(&last_mtime); /* * If this snapshot already exists then we must have been asked to * rewind the file, i.e. discard the snapshot and create a new one in * its place. In this case we first see if the in-kernel mnttab has * advertised a change; if not then we simply reinitialise the metadata. */ if (snapp->mnts_nmnts) { if (mntfs_newest(&last_mtime, &snapp->mnts_last_mtime) == MNTFS_NEITHER) { /* * An unchanged mtime is no guarantee that the * in-kernel mnttab is unchanged; for example, a * concurrent remount may be between calls to * vfs_setmntopt_nolock() and vfs_mnttab_modtimeupd(). * It follows that the database may have changed, and * in particular that some elements in this snapshot * may have been killed by another call to * mntfs_snapshot(). It is therefore not merely * unnecessary to update the snapshot's time but in * fact dangerous; it needs to be left alone. */ snapp->mnts_next = snapp->mnts_first; snapp->mnts_flags &= ~MNTS_REWIND; snapp->mnts_foffset = snapp->mnts_ieoffset = 0; vfs_list_unlock(); return; } else { mntfs_freesnap(mnp, snapp); } } /* * Create a temporary database element. For each vfs_t, the temporary * element will be populated with the corresponding text. If the vfs_t * does not have a corresponding element within the database, or if * there is such an element but it is stale, a copy of the temporary * element is inserted into the database at the appropriate location. */ tempelemp = kmem_alloc(sizeof (mntelem_t), KM_SLEEP); entry_length = MNT_LINE_MAX; tempelemp->mnte_text = kmem_alloc(entry_length, KM_SLEEP); /* Find the first and last vfs_t for the given zone. */ if (is_global_zone) { firstvfsp = rootvfs; lastvfsp = firstvfsp->vfs_prev; } else { firstvfsp = zonep->zone_vfslist; /* * If there isn't already a vfs_t for root then we create a * dummy which will be used as the head of the list (which will * therefore no longer be circular). */ if (firstvfsp == NULL || strcmp(refstr_value(firstvfsp->vfs_mntpt), zonep->zone_rootpath) != 0) { /* * The zone's vfs_ts will have mount points relative to * the zone's root path. The vfs_t for the zone's * root file system would therefore have a mount point * equal to the zone's root path. Since the zone's root * path isn't a mount point, we copy the vfs_t of the * zone's root vnode, and provide it with a fake mount * and resource. However, if the zone's root is a * zfs dataset, use the dataset name as the resource. * * Note that by cloning another vfs_t we also acquire * its high-resolution ctime. This might appear to * violate the requirement that the ctimes in the list * of vfs_ts are unique and monotonically increasing; * this is not the case. The dummy vfs_t appears in only * a non-global zone's vfs_t list, where the cloned * vfs_t would not ordinarily be visible; the ctimes are * therefore unique. The zone's root path must be * available before the zone boots, and so its root * vnode's vfs_t's ctime must be lower than those of any * resources subsequently mounted by the zone. The * ctimes are therefore monotonically increasing. */ dummyvfs = *zonep->zone_rootvp->v_vfsp; dummyvfs.vfs_mntpt = refstr_alloc(zonep->zone_rootpath); if (strcmp(vfssw[dummyvfs.vfs_fstype].vsw_name, "zfs") != 0) dummyvfs.vfs_resource = dummyvfs.vfs_mntpt; dummyvfsp = &dummyvfs; if (firstvfsp == NULL) { lastvfsp = dummyvfsp; } else { lastvfsp = firstvfsp->vfs_zone_prev; dummyvfsp->vfs_zone_next = firstvfsp; } firstvfsp = dummyvfsp; } else { lastvfsp = firstvfsp->vfs_zone_prev; } } /* * Now walk through all the vfs_ts for this zone. For each one, find the * corresponding database element, creating it first if necessary, and * increment its reference count. */ rw_enter(dblockp, RW_WRITER); elemp = zonep->zone_mntfs_db; /* CSTYLED */ for (vfsp = firstvfsp;; vfsp = is_global_zone ? vfsp->vfs_next : vfsp->vfs_zone_next) { DTRACE_PROBE1(new__vfs, vfs_t *, vfsp); /* Consider only visible entries. */ if ((vfsp->vfs_flag & VFS_NOMNTTAB) == 0 || show_hidden) { /* * Walk through the existing database looking for either * an element that matches the current vfs_t, or for the * correct place in which to insert a new element. */ insert_before = 0; for (; elemp; prevp = elemp, elemp = elemp->mnte_next) { DTRACE_PROBE1(considering__elem, mntelem_t *, elemp); /* Compare the vfs_t with the element. */ order = mntfs_newest(&elemp->mnte_vfs_ctime, &vfsp->vfs_hrctime); /* * If we encounter a database element newer than * this vfs_t then we've stepped over a gap * where the element for this vfs_t must be * inserted. */ if (order == MNTFS_FIRST) { insert_before = 1; break; } /* Dead elements no longer interest us. */ if (MNTFS_ELEM_IS_DEAD(elemp)) continue; /* * If the time stamps are the same then the * element is potential match for the vfs_t, * although it may later prove to be stale. */ if (order == MNTFS_NEITHER) break; /* * This element must be older than the vfs_t. * It must, therefore, correspond to a vfs_t * that has been unmounted. Since the element is * still alive, we kill it if it is visible. */ if (!elemp->mnte_hidden || show_hidden) vfs_mono_time(&elemp->mnte_death); } DTRACE_PROBE2(possible__match, vfs_t *, vfsp, mntelem_t *, elemp); /* Create a new database element if required. */ new_entry_length = mntfs_text_len(vfsp, zonep); if (new_entry_length > entry_length) { kmem_free(tempelemp->mnte_text, entry_length); tempelemp->mnte_text = kmem_alloc(new_entry_length, KM_SLEEP); entry_length = new_entry_length; } mntfs_populate_text(vfsp, zonep, tempelemp); ASSERT(tempelemp->mnte_text_size == new_entry_length); if (elemp == NULL) { /* * We ran off the end of the database. Insert a * new element at the end. */ newp = mntfs_copy(tempelemp); vfs_mono_time(&newp->mnte_birth); if (prevp) { mntfs_insert_after(newp, prevp); } else { newp->mnte_next = NULL; newp->mnte_prev = NULL; ASSERT(*headpp == NULL); *headpp = newp; } elemp = newp; } else if (insert_before) { /* * Insert a new element before the current one. */ newp = mntfs_copy(tempelemp); vfs_mono_time(&newp->mnte_birth); if (prevp) { mntfs_insert_after(newp, prevp); } else { newp->mnte_next = elemp; newp->mnte_prev = NULL; elemp->mnte_prev = newp; ASSERT(*headpp == elemp); *headpp = newp; } elemp = newp; } else if (!mntfs_is_same_element(elemp, tempelemp)) { /* * The element corresponds to the vfs_t, but the * vfs_t has changed; it must have been * remounted. Kill the old element and insert a * new one after it. */ vfs_mono_time(&elemp->mnte_death); newp = mntfs_copy(tempelemp); vfs_mono_time(&newp->mnte_birth); mntfs_insert_after(newp, elemp); elemp = newp; } /* We've found the corresponding element. Hold it. */ DTRACE_PROBE1(incrementing, mntelem_t *, elemp); elemp->mnte_refcnt++; /* * Update the parameters used to initialise the * snapshot. */ nmnts++; total_text_size += elemp->mnte_text_size; if (!elemp->mnte_hidden) normal_text_size += elemp->mnte_text_size; if (!firstp) firstp = elemp; prevp = elemp; elemp = elemp->mnte_next; } if (vfsp == lastvfsp) break; } /* * Any remaining visible database elements that are still alive must be * killed now, because their corresponding vfs_ts must have been * unmounted. */ for (; elemp; elemp = elemp->mnte_next) { if (MNTFS_ELEM_IS_ALIVE(elemp) && (!elemp->mnte_hidden || show_hidden)) vfs_mono_time(&elemp->mnte_death); } /* Initialise the snapshot. */ vfs_mono_time(&snapp->mnts_time); snapp->mnts_last_mtime = last_mtime; snapp->mnts_first = snapp->mnts_next = firstp; snapp->mnts_flags = show_hidden ? MNTS_SHOWHIDDEN : 0; snapp->mnts_nmnts = nmnts; snapp->mnts_text_size = total_text_size; snapp->mnts_foffset = snapp->mnts_ieoffset = 0; /* * Record /etc/mnttab's current size and mtime for possible future use * by mntgetattr(). */ mnd->mnt_size = normal_text_size; mnd->mnt_mtime = last_mtime; if (show_hidden) { mnd->mnt_hidden_size = total_text_size; mnd->mnt_hidden_mtime = last_mtime; } /* Clean up. */ rw_exit(dblockp); vfs_list_unlock(); if (dummyvfsp != NULL) refstr_rele(dummyvfsp->vfs_mntpt); kmem_free(tempelemp->mnte_text, entry_length); kmem_free(tempelemp, sizeof (mntelem_t)); } /* * Public function to convert vfs_mntopts into a string. * A buffer of sufficient size is allocated, which is returned via bufp, * and whose length is returned via lenp. */ void mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp) { size_t len; char *buf; vfs_list_read_lock(); len = mntfs_optsize(vfsp) + 1; buf = kmem_alloc(len, KM_NOSLEEP); if (buf == NULL) { *bufp = NULL; vfs_list_unlock(); return; } buf[len - 1] = '\0'; (void) mntfs_optprint(vfsp, buf); ASSERT(buf[len - 1] == '\0'); vfs_list_unlock(); *bufp = buf; *lenp = len; } /* ARGSUSED */ static int mntopen(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) { vnode_t *vp = *vpp; mntnode_t *nmnp; /* * Not allowed to open for writing, return error. */ if (flag & FWRITE) return (EPERM); /* * Create a new mnt/vnode for each open, this will give us a handle to * hang the snapshot on. */ nmnp = mntgetnode(vp); *vpp = MTOV(nmnp); atomic_add_32(&MTOD(nmnp)->mnt_nopen, 1); VN_RELE(vp); return (0); } /* ARGSUSED */ static int mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, caller_context_t *ct) { mntnode_t *mnp = VTOM(vp); /* Clean up any locks or shares held by the current process */ cleanlocks(vp, ttoproc(curthread)->p_pid, 0); cleanshares(vp, ttoproc(curthread)->p_pid); if (count > 1) return (0); if (vp->v_count == 1) { rw_enter(&mnp->mnt_contents, RW_WRITER); mntfs_freesnap(mnp, &mnp->mnt_read); mntfs_freesnap(mnp, &mnp->mnt_ioctl); rw_exit(&mnp->mnt_contents); atomic_add_32(&MTOD(mnp)->mnt_nopen, -1); } return (0); } /* ARGSUSED */ static int mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct) { mntnode_t *mnp = VTOM(vp); zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone; mntsnap_t *snapp = &mnp->mnt_read; off_t off = uio->uio_offset; size_t len = uio->uio_resid; char *bufferp; size_t available, copylen; size_t written = 0; mntelem_t *elemp; krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; int error = 0; off_t ieoffset; rw_enter(&mnp->mnt_contents, RW_WRITER); if (snapp->mnts_nmnts == 0 || (off == (off_t)0)) mntfs_snapshot(mnp, snapp); if ((size_t)(off + len) > snapp->mnts_text_size) len = snapp->mnts_text_size - off; if (off < 0 || len > snapp->mnts_text_size) { rw_exit(&mnp->mnt_contents); return (EFAULT); } if (len == 0) { rw_exit(&mnp->mnt_contents); return (0); } /* * For the file offset provided, locate the corresponding database * element and calculate the corresponding offset within its text. If * the file offset is the same as that reached during the last read(2) * then use the saved element and intra-element offset. */ rw_enter(dblockp, RW_READER); if (off == 0 || (off == snapp->mnts_foffset)) { elemp = snapp->mnts_next; ieoffset = snapp->mnts_ieoffset; } else { off_t total_off; /* * Find the element corresponding to the requested file offset * by walking through the database and summing the text sizes * of the individual elements. If the requested file offset is * greater than that reached on the last visit then we can start * at the last seen element; otherwise, we have to start at the * beginning. */ if (off > snapp->mnts_foffset) { elemp = snapp->mnts_next; total_off = snapp->mnts_foffset - snapp->mnts_ieoffset; } else { elemp = snapp->mnts_first; total_off = 0; } while (off > total_off + elemp->mnte_text_size) { total_off += elemp->mnte_text_size; elemp = mntfs_get_next_elem(snapp, elemp); ASSERT(elemp != NULL); } /* Calculate the intra-element offset. */ if (off > total_off) ieoffset = off - total_off; else ieoffset = 0; } /* * Create a buffer and populate it with the text from successive * database elements until it is full. */ bufferp = kmem_alloc(len, KM_SLEEP); while (written < len) { available = elemp->mnte_text_size - ieoffset; copylen = MIN(len - written, available); bcopy(elemp->mnte_text + ieoffset, bufferp + written, copylen); written += copylen; if (copylen == available) { elemp = mntfs_get_next_elem(snapp, elemp); ASSERT(elemp != NULL || written == len); ieoffset = 0; } else { ieoffset += copylen; } } rw_exit(dblockp); /* * Write the populated buffer, update the snapshot's state if * successful and then advertise our read. */ error = uiomove(bufferp, len, UIO_READ, uio); if (error == 0) { snapp->mnts_next = elemp; snapp->mnts_foffset = off + len; snapp->mnts_ieoffset = ieoffset; } vfs_mnttab_readop(); rw_exit(&mnp->mnt_contents); /* Clean up. */ kmem_free(bufferp, len); return (error); } static int mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, caller_context_t *ct) { int mask = vap->va_mask; int error; mntnode_t *mnp = VTOM(vp); timespec_t mtime, old_mtime; size_t size, old_size; mntdata_t *mntdata = MTOD(VTOM(vp)); mntsnap_t *rsnapp, *isnapp; extern timespec_t vfs_mnttab_ctime; /* AT_MODE, AT_UID and AT_GID are derived from the underlying file. */ if (mask & AT_MODE|AT_UID|AT_GID) { if (error = VOP_GETATTR(mnp->mnt_mountvp, vap, flags, cr, ct)) return (error); } /* * There are some minor subtleties in the determination of * /etc/mnttab's size and mtime. We wish to avoid any condition in * which, in the vicinity of a change to the in-kernel mnttab, we * return an old value for one but a new value for the other. We cannot * simply hold vfslist for the entire calculation because we might need * to call mntfs_snapshot(), which calls vfs_list_read_lock(). */ if (mask & AT_SIZE|AT_NBLOCKS) { rw_enter(&mnp->mnt_contents, RW_WRITER); vfs_list_read_lock(); vfs_mnttab_modtime(&mtime); if (mnp->mnt_flags & MNT_SHOWHIDDEN) { old_mtime = mntdata->mnt_hidden_mtime; old_size = mntdata->mnt_hidden_size; } else { old_mtime = mntdata->mnt_mtime; old_size = mntdata->mnt_size; } vfs_list_unlock(); rsnapp = &mnp->mnt_read; isnapp = &mnp->mnt_ioctl; if (rsnapp->mnts_nmnts || isnapp->mnts_nmnts) { /* * The mntnode already has at least one snapshot from * which to take the size; the user will understand from * mnttab(4) that the current size of the in-kernel * mnttab is irrelevant. */ size = rsnapp->mnts_nmnts ? rsnapp->mnts_text_size : isnapp->mnts_text_size; } else if (mntfs_newest(&mtime, &old_mtime) == MNTFS_NEITHER) { /* * There is no existing valid snapshot but the in-kernel * mnttab has not changed since the time that the last * one was generated. Use the old file size; note that * it is guaranteed to be consistent with mtime, which * may be returned to the user later. */ size = old_size; } else { /* * There is no snapshot and the in-kernel mnttab has * changed since the last one was created. We generate a * new snapshot which we use for not only the size but * also the mtime, thereby ensuring that the two are * consistent. */ mntfs_snapshot(mnp, rsnapp); size = rsnapp->mnts_text_size; mtime = rsnapp->mnts_last_mtime; mntfs_freesnap(mnp, rsnapp); } rw_exit(&mnp->mnt_contents); } else if (mask & AT_ATIME|AT_MTIME) { vfs_list_read_lock(); vfs_mnttab_modtime(&mtime); vfs_list_unlock(); } /* Always look like a regular file. */ if (mask & AT_TYPE) vap->va_type = VREG; /* Mode should basically be read only. */ if (mask & AT_MODE) vap->va_mode &= 07444; if (mask & AT_FSID) vap->va_fsid = vp->v_vfsp->vfs_dev; /* Nodeid is always ROOTINO. */ if (mask & AT_NODEID) vap->va_nodeid = (ino64_t)MNTROOTINO; /* * Set nlink to the number of open vnodes for mnttab info * plus one for existing. */ if (mask & AT_NLINK) vap->va_nlink = mntdata->mnt_nopen + 1; if (mask & AT_SIZE) vap->va_size = size; if (mask & AT_ATIME) vap->va_atime = mtime; if (mask & AT_MTIME) vap->va_mtime = mtime; if (mask & AT_CTIME) vap->va_ctime = vfs_mnttab_ctime; if (mask & AT_RDEV) vap->va_rdev = 0; if (mask & AT_BLKSIZE) vap->va_blksize = DEV_BSIZE; if (mask & AT_NBLOCKS) vap->va_nblocks = btod(size); if (mask & AT_SEQ) vap->va_seq = 0; return (0); } static int mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct) { mntnode_t *mnp = VTOM(vp); if (mode & (VWRITE|VEXEC)) return (EROFS); /* * Do access check on the underlying directory vnode. */ return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr, ct)); } /* * New /mntfs vnode required; allocate it and fill in most of the fields. */ static mntnode_t * mntgetnode(vnode_t *dp) { mntnode_t *mnp; vnode_t *vp; mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP); mnp->mnt_vnode = vn_alloc(KM_SLEEP); mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp; rw_init(&mnp->mnt_contents, NULL, RW_DEFAULT, NULL); vp = MTOV(mnp); vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT; vn_setops(vp, mntvnodeops); vp->v_vfsp = dp->v_vfsp; vp->v_type = VREG; vp->v_data = (caddr_t)mnp; return (mnp); } /* * Free the storage obtained from mntgetnode(). */ static void mntfreenode(mntnode_t *mnp) { vnode_t *vp = MTOV(mnp); rw_destroy(&mnp->mnt_contents); vn_invalid(vp); vn_free(vp); kmem_free(mnp, sizeof (*mnp)); } /* ARGSUSED */ static int mntfsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) { return (0); } /* ARGSUSED */ static void mntinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) { mntnode_t *mnp = VTOM(vp); mntfreenode(mnp); } /* * lseek(2) is supported only to rewind the file by resetmnttab(3C). Rewinding * has a special meaning for /etc/mnttab: it forces mntfs to refresh the * snapshot at the next ioctl(). * * mnttab(4) explains that "the snapshot...is taken any time a read(2) is * performed at offset 0". We therefore ignore the read snapshot here. */ /* ARGSUSED */ static int mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) { mntnode_t *mnp = VTOM(vp); if (*noffp == 0) { rw_enter(&mnp->mnt_contents, RW_WRITER); mnp->mnt_ioctl.mnts_flags |= MNTS_REWIND; rw_exit(&mnp->mnt_contents); } return (0); } /* * Return the answer requested to poll(). * POLLRDBAND will return when the mtime of the mnttab * information is newer than the latest one read for this open. */ /* ARGSUSED */ static int mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp, caller_context_t *ct) { mntnode_t *mnp = VTOM(vp); mntsnap_t *snapp; rw_enter(&mnp->mnt_contents, RW_READER); if (mntfs_newest(&mnp->mnt_ioctl.mnts_last_mtime, &mnp->mnt_read.mnts_last_mtime) == MNTFS_FIRST) snapp = &mnp->mnt_ioctl; else snapp = &mnp->mnt_read; *revp = 0; *phpp = (pollhead_t *)NULL; if (ev & POLLIN) *revp |= POLLIN; if (ev & POLLRDNORM) *revp |= POLLRDNORM; if (ev & POLLRDBAND) { vfs_mnttab_poll(&snapp->mnts_last_mtime, phpp); if (*phpp == (pollhead_t *)NULL) *revp |= POLLRDBAND; } rw_exit(&mnp->mnt_contents); if (*revp || *phpp != NULL || any) { return (0); } /* * If someone is polling an unsupported poll events (e.g. * POLLOUT, POLLPRI, etc.), just return POLLERR revents. * That way we will ensure that we don't return a 0 * revents with a NULL pollhead pointer. */ *revp = POLLERR; return (0); } /* * mntfs_same_word() returns 1 if two words are the same in the context of * MNTIOC_GETMNTANY and 0 otherwise. * * worda is a memory address that lies somewhere in the buffer bufa; it cannot * be NULL since this is used to indicate to getmntany(3C) that the user does * not wish to match a particular field. The text to which worda points is * supplied by the user; if it is not null-terminated then it cannot match. * * Buffer bufb contains a line from /etc/mnttab, in which the fields are * delimited by tab or new-line characters. offb is the offset of the second * word within this buffer. * * mntfs_same_word() returns 1 if the words are the same and 0 otherwise. */ int mntfs_same_word(char *worda, char *bufa, size_t sizea, off_t offb, char *bufb, size_t sizeb) { char *wordb = bufb + offb; int bytes_remaining; ASSERT(worda != NULL); bytes_remaining = MIN(((bufa + sizea) - worda), ((bufb + sizeb) - wordb)); while (bytes_remaining && *worda == *wordb) { worda++; wordb++; bytes_remaining--; } if (bytes_remaining && *worda == '\0' && (*wordb == '\t' || *wordb == '\n')) return (1); else return (0); } /* * mntfs_special_info_string() returns which, if either, of VBLK or VCHR * corresponds to a supplied path. If the path is a special device then the * function optionally sets the major and minor numbers. */ vtype_t mntfs_special_info_string(char *path, uint_t *major, uint_t *minor, cred_t *cr) { vattr_t vattr; vnode_t *vp; vtype_t type; int error; if (path == NULL || *path != '/' || lookupnameat(path + 1, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) return (0); vattr.va_mask = AT_TYPE | AT_RDEV; error = VOP_GETATTR(vp, &vattr, ATTR_REAL, cr, NULL); VN_RELE(vp); if (error == 0 && ((type = vattr.va_type) == VBLK || type == VCHR)) { if (major && minor) { *major = getmajor(vattr.va_rdev); *minor = getminor(vattr.va_rdev); } return (type); } else { return (0); } } /* * mntfs_special_info_element() extracts the name of the mounted resource * for a given element and copies it into a null-terminated string, which it * then passes to mntfs_special_info_string(). */ vtype_t mntfs_special_info_element(mntelem_t *elemp, cred_t *cr) { char *newpath; vtype_t type; newpath = kmem_alloc(elemp->mnte_text_size, KM_SLEEP); bcopy(elemp->mnte_text, newpath, (off_t)(elemp->mnte_tab.mnt_mountp)); *(newpath + (off_t)elemp->mnte_tab.mnt_mountp - 1) = '\0'; type = mntfs_special_info_string(newpath, NULL, NULL, cr); kmem_free(newpath, elemp->mnte_text_size); return (type); } /* * Convert an address that points to a byte within a user buffer into an * address that points to the corresponding offset within a kernel buffer. If * the user address is NULL then make no conversion. If the address does not * lie within the buffer then reset it to NULL. */ char * mntfs_import_addr(char *uaddr, char *ubufp, char *kbufp, size_t bufsize) { if (uaddr < ubufp || uaddr >= ubufp + bufsize) return (NULL); else return (kbufp + (uaddr - ubufp)); } /* * These 32-bit versions are to support STRUCT_DECL(9F) etc. in * mntfs_copyout_element() and mntioctl(). */ #ifdef _SYSCALL32_IMPL typedef struct extmnttab32 { uint32_t mnt_special; uint32_t mnt_mountp; uint32_t mnt_fstype; uint32_t mnt_mntopts; uint32_t mnt_time; uint_t mnt_major; uint_t mnt_minor; } extmnttab32_t; typedef struct mnttab32 { uint32_t mnt_special; uint32_t mnt_mountp; uint32_t mnt_fstype; uint32_t mnt_mntopts; uint32_t mnt_time; } mnttab32_t; struct mntentbuf32 { uint32_t mbuf_emp; uint_t mbuf_bufsize; uint32_t mbuf_buf; }; #endif /* * mntfs_copyout_element() is common code for the MNTIOC_GETMNTENT, * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY ioctls. Having identifed the * database element desired by the user, this function copies out the text and * the pointers to the relevant userland addresses. It returns 0 on success * and non-zero otherwise. */ int mntfs_copyout_elem(mntelem_t *elemp, struct extmnttab *uemp, char *ubufp, int cmd, int datamodel) { STRUCT_DECL(extmnttab, ktab); char *dbbufp = elemp->mnte_text; size_t dbbufsize = elemp->mnte_text_size; struct extmnttab *dbtabp = &elemp->mnte_tab; size_t ssize; char *kbufp; int error = 0; /* * We create a struct extmnttab within the kernel of the size * determined by the user's data model. We then populate its * fields by combining the start address of the text buffer * supplied by the user, ubufp, with the offsets stored for * this database element within dbtabp, a pointer to a struct * extmnttab. * * Note that if the corresponding field is "-" this signifies * no real content, and we set the address to NULL. This does * not apply to mnt_time. */ STRUCT_INIT(ktab, datamodel); STRUCT_FSETP(ktab, mnt_special, MNTFS_REAL_FIELD(dbbufp) ? ubufp : NULL); STRUCT_FSETP(ktab, mnt_mountp, MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mountp) ? ubufp + (off_t)dbtabp->mnt_mountp : NULL); STRUCT_FSETP(ktab, mnt_fstype, MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_fstype) ? ubufp + (off_t)dbtabp->mnt_fstype : NULL); STRUCT_FSETP(ktab, mnt_mntopts, MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mntopts) ? ubufp + (off_t)dbtabp->mnt_mntopts : NULL); STRUCT_FSETP(ktab, mnt_time, ubufp + (off_t)dbtabp->mnt_time); if (cmd == MNTIOC_GETEXTMNTENT) { STRUCT_FSETP(ktab, mnt_major, dbtabp->mnt_major); STRUCT_FSETP(ktab, mnt_minor, dbtabp->mnt_minor); ssize = SIZEOF_STRUCT(extmnttab, datamodel); } else { ssize = SIZEOF_STRUCT(mnttab, datamodel); } if (copyout(STRUCT_BUF(ktab), uemp, ssize)) return (EFAULT); /* * We create a text buffer in the kernel into which we copy the * /etc/mnttab entry for this element. We change the tab and * new-line delimiters to null bytes before copying out the * buffer. */ kbufp = kmem_alloc(dbbufsize, KM_SLEEP); bcopy(elemp->mnte_text, kbufp, dbbufsize); *(kbufp + (off_t)dbtabp->mnt_mountp - 1) = *(kbufp + (off_t)dbtabp->mnt_fstype - 1) = *(kbufp + (off_t)dbtabp->mnt_mntopts - 1) = *(kbufp + (off_t)dbtabp->mnt_time - 1) = *(kbufp + dbbufsize - 1) = '\0'; if (copyout(kbufp, ubufp, dbbufsize)) error = EFAULT; kmem_free(kbufp, dbbufsize); return (error); } /* ARGSUSED */ static int mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp, caller_context_t *ct) { uint_t *up = (uint_t *)arg; mntnode_t *mnp = VTOM(vp); mntsnap_t *snapp = &mnp->mnt_ioctl; int error = 0; zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone; krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; model_t datamodel = flag & DATAMODEL_MASK; switch (cmd) { case MNTIOC_NMNTS: /* get no. of mounted resources */ { rw_enter(&mnp->mnt_contents, RW_READER); if (snapp->mnts_nmnts == 0 || (snapp->mnts_flags & MNTS_REWIND)) { if (!rw_tryupgrade(&mnp->mnt_contents)) { rw_exit(&mnp->mnt_contents); rw_enter(&mnp->mnt_contents, RW_WRITER); } if (snapp->mnts_nmnts == 0 || (snapp->mnts_flags & MNTS_REWIND)) mntfs_snapshot(mnp, snapp); } rw_exit(&mnp->mnt_contents); if (suword32(up, snapp->mnts_nmnts) != 0) error = EFAULT; break; } case MNTIOC_GETDEVLIST: /* get mounted device major/minor nos */ { size_t len; uint_t *devlist; mntelem_t *elemp; int i = 0; rw_enter(&mnp->mnt_contents, RW_READER); if (snapp->mnts_nmnts == 0 || (snapp->mnts_flags & MNTS_REWIND)) { if (!rw_tryupgrade(&mnp->mnt_contents)) { rw_exit(&mnp->mnt_contents); rw_enter(&mnp->mnt_contents, RW_WRITER); } if (snapp->mnts_nmnts == 0 || (snapp->mnts_flags & MNTS_REWIND)) mntfs_snapshot(mnp, snapp); rw_downgrade(&mnp->mnt_contents); } /* Create a local buffer to hold the device numbers. */ len = 2 * snapp->mnts_nmnts * sizeof (uint_t); devlist = kmem_alloc(len, KM_SLEEP); /* * Walk the database elements for this snapshot and add their * major and minor numbers. */ rw_enter(dblockp, RW_READER); for (elemp = snapp->mnts_first; elemp; elemp = mntfs_get_next_elem(snapp, elemp)) { devlist[2 * i] = elemp->mnte_tab.mnt_major; devlist[2 * i + 1] = elemp->mnte_tab.mnt_minor; i++; } rw_exit(dblockp); ASSERT(i == snapp->mnts_nmnts); rw_exit(&mnp->mnt_contents); error = xcopyout(devlist, up, len); kmem_free(devlist, len); break; } case MNTIOC_SETTAG: /* set tag on mounted file system */ case MNTIOC_CLRTAG: /* clear tag on mounted file system */ { struct mnttagdesc *dp = (struct mnttagdesc *)arg; STRUCT_DECL(mnttagdesc, tagdesc); char *cptr; uint32_t major, minor; char tagbuf[MAX_MNTOPT_TAG]; char *pbuf; size_t len; uint_t start = 0; mntdata_t *mntdata = MTOD(mnp); zone_t *zone = mntdata->mnt_zone_ref.zref_zone; STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK); if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) { error = EFAULT; break; } pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); if (zone != global_zone) { (void) strcpy(pbuf, zone->zone_rootpath); /* truncate "/" and nul */ start = zone->zone_rootpathlen - 2; ASSERT(pbuf[start] == '/'); } cptr = STRUCT_FGETP(tagdesc, mtd_mntpt); error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len); if (error) { kmem_free(pbuf, MAXPATHLEN); break; } if (start != 0 && pbuf[start] != '/') { kmem_free(pbuf, MAXPATHLEN); error = EINVAL; break; } cptr = STRUCT_FGETP(tagdesc, mtd_tag); if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) { kmem_free(pbuf, MAXPATHLEN); break; } major = STRUCT_FGET(tagdesc, mtd_major); minor = STRUCT_FGET(tagdesc, mtd_minor); if (cmd == MNTIOC_SETTAG) error = vfs_settag(major, minor, pbuf, tagbuf, cr); else error = vfs_clrtag(major, minor, pbuf, tagbuf, cr); kmem_free(pbuf, MAXPATHLEN); break; } case MNTIOC_SHOWHIDDEN: { rw_enter(&mnp->mnt_contents, RW_WRITER); mnp->mnt_flags |= MNT_SHOWHIDDEN; rw_exit(&mnp->mnt_contents); break; } case MNTIOC_GETMNTANY: { STRUCT_DECL(mntentbuf, embuf); /* Our copy of user's embuf */ STRUCT_DECL(extmnttab, ktab); /* Out copy of user's emp */ struct extmnttab *uemp; /* uaddr of user's emp */ char *ubufp; /* uaddr of user's text buf */ size_t ubufsize; /* size of the above */ struct extmnttab preftab; /* our version of user's emp */ char *prefbuf; /* our copy of user's text */ mntelem_t *elemp; /* a database element */ struct extmnttab *dbtabp; /* element's extmnttab */ char *dbbufp; /* element's text buf */ size_t dbbufsize; /* size of the above */ vtype_t type; /* type, if any, of special */ /* * embuf is a struct embuf within the kernel. We copy into it * the struct embuf supplied by the user. */ STRUCT_INIT(embuf, datamodel); if (copyin((void *) arg, STRUCT_BUF(embuf), STRUCT_SIZE(embuf))) { error = EFAULT; break; } uemp = STRUCT_FGETP(embuf, mbuf_emp); ubufp = STRUCT_FGETP(embuf, mbuf_buf); ubufsize = STRUCT_FGET(embuf, mbuf_bufsize); /* * Check that the text buffer offered by the user is the * agreed size. */ if (ubufsize != MNT_LINE_MAX) { error = EINVAL; break; } /* Copy the user-supplied entry into a local buffer. */ prefbuf = kmem_alloc(MNT_LINE_MAX, KM_SLEEP); if (copyin(ubufp, prefbuf, MNT_LINE_MAX)) { kmem_free(prefbuf, MNT_LINE_MAX); error = EFAULT; break; } /* Ensure that any string within it is null-terminated. */ *(prefbuf + MNT_LINE_MAX - 1) = 0; /* Copy in the user-supplied mpref */ STRUCT_INIT(ktab, datamodel); if (copyin(uemp, STRUCT_BUF(ktab), SIZEOF_STRUCT(mnttab, datamodel))) { kmem_free(prefbuf, MNT_LINE_MAX); error = EFAULT; break; } /* * Copy the members of the user's pref struct into a local * struct. The pointers need to be offset and verified to * ensure that they lie within the bounds of the buffer. */ preftab.mnt_special = mntfs_import_addr(STRUCT_FGETP(ktab, mnt_special), ubufp, prefbuf, MNT_LINE_MAX); preftab.mnt_mountp = mntfs_import_addr(STRUCT_FGETP(ktab, mnt_mountp), ubufp, prefbuf, MNT_LINE_MAX); preftab.mnt_fstype = mntfs_import_addr(STRUCT_FGETP(ktab, mnt_fstype), ubufp, prefbuf, MNT_LINE_MAX); preftab.mnt_mntopts = mntfs_import_addr(STRUCT_FGETP(ktab, mnt_mntopts), ubufp, prefbuf, MNT_LINE_MAX); preftab.mnt_time = mntfs_import_addr(STRUCT_FGETP(ktab, mnt_time), ubufp, prefbuf, MNT_LINE_MAX); /* * If the user specifies a mounted resource that is a special * device then we capture its mode and major and minor numbers; * cf. the block comment below. */ type = mntfs_special_info_string(preftab.mnt_special, &preftab.mnt_major, &preftab.mnt_minor, cr); rw_enter(&mnp->mnt_contents, RW_WRITER); if (snapp->mnts_nmnts == 0 || (snapp->mnts_flags & MNTS_REWIND)) mntfs_snapshot(mnp, snapp); /* * This is the core functionality that implements getmntany(). * We walk through the mntfs database until we find an element * matching the user's preferences that are contained in * preftab. Typically, this means checking that the text * matches. However, the mounted resource is special: if the * user is looking for a special device then we must find a * database element with the same major and minor numbers and * the same type, i.e. VBLK or VCHR. The type is not recorded * in the element because it cannot be inferred from the vfs_t. * We therefore check the type of suitable candidates via * mntfs_special_info_element(); since this calls into the * underlying file system we make sure to drop the database lock * first. */ elemp = snapp->mnts_next; rw_enter(dblockp, RW_READER); for (;;) { for (; elemp; elemp = mntfs_get_next_elem(snapp, elemp)) { dbtabp = &elemp->mnte_tab; dbbufp = elemp->mnte_text; dbbufsize = elemp->mnte_text_size; if (((type && dbtabp->mnt_major == preftab.mnt_major && dbtabp->mnt_minor == preftab.mnt_minor && MNTFS_REAL_FIELD(dbbufp)) || (!type && (!preftab.mnt_special || mntfs_same_word(preftab.mnt_special, prefbuf, MNT_LINE_MAX, (off_t)0, dbbufp, dbbufsize)))) && (!preftab.mnt_mountp || mntfs_same_word( preftab.mnt_mountp, prefbuf, MNT_LINE_MAX, (off_t)dbtabp->mnt_mountp, dbbufp, dbbufsize)) && (!preftab.mnt_fstype || mntfs_same_word( preftab.mnt_fstype, prefbuf, MNT_LINE_MAX, (off_t)dbtabp->mnt_fstype, dbbufp, dbbufsize)) && (!preftab.mnt_mntopts || mntfs_same_word( preftab.mnt_mntopts, prefbuf, MNT_LINE_MAX, (off_t)dbtabp->mnt_mntopts, dbbufp, dbbufsize)) && (!preftab.mnt_time || mntfs_same_word( preftab.mnt_time, prefbuf, MNT_LINE_MAX, (off_t)dbtabp->mnt_time, dbbufp, dbbufsize))) break; } rw_exit(dblockp); if (elemp == NULL || type == 0 || type == mntfs_special_info_element(elemp, cr)) break; rw_enter(dblockp, RW_READER); elemp = mntfs_get_next_elem(snapp, elemp); } kmem_free(prefbuf, MNT_LINE_MAX); /* If we failed to find a match then return EOF. */ if (elemp == NULL) { rw_exit(&mnp->mnt_contents); *rvalp = MNTFS_EOF; break; } /* * Check that the text buffer offered by the user will be large * enough to accommodate the text for this entry. */ if (elemp->mnte_text_size > MNT_LINE_MAX) { rw_exit(&mnp->mnt_contents); *rvalp = MNTFS_TOOLONG; break; } /* * Populate the user's struct mnttab and text buffer using the * element's contents. */ if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) { error = EFAULT; } else { rw_enter(dblockp, RW_READER); elemp = mntfs_get_next_elem(snapp, elemp); rw_exit(dblockp); snapp->mnts_next = elemp; } rw_exit(&mnp->mnt_contents); break; } case MNTIOC_GETMNTENT: case MNTIOC_GETEXTMNTENT: { STRUCT_DECL(mntentbuf, embuf); /* Our copy of user's embuf */ struct extmnttab *uemp; /* uaddr of user's emp */ char *ubufp; /* uaddr of user's text buf */ size_t ubufsize; /* size of the above */ mntelem_t *elemp; /* a database element */ rw_enter(&mnp->mnt_contents, RW_WRITER); if (snapp->mnts_nmnts == 0 || (snapp->mnts_flags & MNTS_REWIND)) mntfs_snapshot(mnp, snapp); if ((elemp = snapp->mnts_next) == NULL) { rw_exit(&mnp->mnt_contents); *rvalp = MNTFS_EOF; break; } /* * embuf is a struct embuf within the kernel. We copy into it * the struct embuf supplied by the user. */ STRUCT_INIT(embuf, datamodel); if (copyin((void *) arg, STRUCT_BUF(embuf), STRUCT_SIZE(embuf))) { rw_exit(&mnp->mnt_contents); error = EFAULT; break; } uemp = STRUCT_FGETP(embuf, mbuf_emp); ubufp = STRUCT_FGETP(embuf, mbuf_buf); ubufsize = STRUCT_FGET(embuf, mbuf_bufsize); /* * Check that the text buffer offered by the user will be large * enough to accommodate the text for this entry. */ if (elemp->mnte_text_size > ubufsize) { rw_exit(&mnp->mnt_contents); *rvalp = MNTFS_TOOLONG; break; } /* * Populate the user's struct mnttab and text buffer using the * element's contents. */ if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) { error = EFAULT; } else { rw_enter(dblockp, RW_READER); elemp = mntfs_get_next_elem(snapp, elemp); rw_exit(dblockp); snapp->mnts_next = elemp; } rw_exit(&mnp->mnt_contents); break; } default: error = EINVAL; break; } return (error); } /* * mntfs provides a new vnode for each open(2). Two vnodes will represent the * same instance of /etc/mnttab if they share the same (zone-specific) vfs. */ /* ARGSUSED */ int mntcmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct) { return (vp1 != NULL && vp2 != NULL && vp1->v_vfsp == vp2->v_vfsp); } /* * /mntfs vnode operations vector */ const fs_operation_def_t mnt_vnodeops_template[] = { VOPNAME_OPEN, { .vop_open = mntopen }, VOPNAME_CLOSE, { .vop_close = mntclose }, VOPNAME_READ, { .vop_read = mntread }, VOPNAME_IOCTL, { .vop_ioctl = mntioctl }, VOPNAME_GETATTR, { .vop_getattr = mntgetattr }, VOPNAME_ACCESS, { .vop_access = mntaccess }, VOPNAME_FSYNC, { .vop_fsync = mntfsync }, VOPNAME_INACTIVE, { .vop_inactive = mntinactive }, VOPNAME_SEEK, { .vop_seek = mntseek }, VOPNAME_POLL, { .vop_poll = mntpoll }, VOPNAME_CMP, { .vop_cmp = mntcmp }, VOPNAME_DISPOSE, { .error = fs_error }, VOPNAME_SHRLOCK, { .error = fs_error }, NULL, NULL };