/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013, 2016 Joyent, Inc. All rights reserved. */ /* * utility routines for the /dev fs */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEBUG int sdev_debug = 0x00000001; int sdev_debug_cache_flags = 0; #endif /* * globals */ /* prototype memory vattrs */ vattr_t sdev_vattr_dir = { AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ VDIR, /* va_type */ SDEV_DIRMODE_DEFAULT, /* va_mode */ SDEV_UID_DEFAULT, /* va_uid */ SDEV_GID_DEFAULT, /* va_gid */ 0, /* va_fsid */ 0, /* va_nodeid */ 0, /* va_nlink */ 0, /* va_size */ 0, /* va_atime */ 0, /* va_mtime */ 0, /* va_ctime */ 0, /* va_rdev */ 0, /* va_blksize */ 0, /* va_nblocks */ 0 /* va_vcode */ }; vattr_t sdev_vattr_lnk = { AT_TYPE|AT_MODE, /* va_mask */ VLNK, /* va_type */ SDEV_LNKMODE_DEFAULT, /* va_mode */ SDEV_UID_DEFAULT, /* va_uid */ SDEV_GID_DEFAULT, /* va_gid */ 0, /* va_fsid */ 0, /* va_nodeid */ 0, /* va_nlink */ 0, /* va_size */ 0, /* va_atime */ 0, /* va_mtime */ 0, /* va_ctime */ 0, /* va_rdev */ 0, /* va_blksize */ 0, /* va_nblocks */ 0 /* va_vcode */ }; vattr_t sdev_vattr_blk = { AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ VBLK, /* va_type */ S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */ SDEV_UID_DEFAULT, /* va_uid */ SDEV_GID_DEFAULT, /* va_gid */ 0, /* va_fsid */ 0, /* va_nodeid */ 0, /* va_nlink */ 0, /* va_size */ 0, /* va_atime */ 0, /* va_mtime */ 0, /* va_ctime */ 0, /* va_rdev */ 0, /* va_blksize */ 0, /* va_nblocks */ 0 /* va_vcode */ }; vattr_t sdev_vattr_chr = { AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ VCHR, /* va_type */ S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */ SDEV_UID_DEFAULT, /* va_uid */ SDEV_GID_DEFAULT, /* va_gid */ 0, /* va_fsid */ 0, /* va_nodeid */ 0, /* va_nlink */ 0, /* va_size */ 0, /* va_atime */ 0, /* va_mtime */ 0, /* va_ctime */ 0, /* va_rdev */ 0, /* va_blksize */ 0, /* va_nblocks */ 0 /* va_vcode */ }; kmem_cache_t *sdev_node_cache; /* sdev_node cache */ int devtype; /* fstype */ /* static */ static struct vnodeops *sdev_get_vop(struct sdev_node *); static void sdev_set_no_negcache(struct sdev_node *); static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []); static void sdev_free_vtab(fs_operation_def_t *); static void sdev_prof_free(struct sdev_node *dv) { ASSERT(!SDEV_IS_GLOBAL(dv)); nvlist_free(dv->sdev_prof.dev_name); nvlist_free(dv->sdev_prof.dev_map); nvlist_free(dv->sdev_prof.dev_symlink); nvlist_free(dv->sdev_prof.dev_glob_incdir); nvlist_free(dv->sdev_prof.dev_glob_excdir); bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); } /* sdev_node cache constructor */ /*ARGSUSED1*/ static int i_sdev_node_ctor(void *buf, void *cfarg, int flag) { struct sdev_node *dv = (struct sdev_node *)buf; struct vnode *vp; bzero(buf, sizeof (struct sdev_node)); vp = dv->sdev_vnode = vn_alloc(flag); if (vp == NULL) { return (-1); } vp->v_data = dv; rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL); return (0); } /* sdev_node cache destructor */ /*ARGSUSED1*/ static void i_sdev_node_dtor(void *buf, void *arg) { struct sdev_node *dv = (struct sdev_node *)buf; struct vnode *vp = SDEVTOV(dv); rw_destroy(&dv->sdev_contents); vn_free(vp); } /* initialize sdev_node cache */ void sdev_node_cache_init() { int flags = 0; #ifdef DEBUG flags = sdev_debug_cache_flags; if (flags) sdcmn_err(("cache debug flags 0x%x\n", flags)); #endif /* DEBUG */ ASSERT(sdev_node_cache == NULL); sdev_node_cache = kmem_cache_create("sdev_node_cache", sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor, NULL, NULL, NULL, flags); } /* destroy sdev_node cache */ void sdev_node_cache_fini() { ASSERT(sdev_node_cache != NULL); kmem_cache_destroy(sdev_node_cache); sdev_node_cache = NULL; } /* * Compare two nodes lexographically to balance avl tree */ static int sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2) { int rv; if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0) return (0); return ((rv < 0) ? -1 : 1); } void sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state) { ASSERT(dv); ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); dv->sdev_state = state; } static void sdev_attr_update(struct sdev_node *dv, vattr_t *vap) { timestruc_t now; struct vattr *attrp; uint_t mask; ASSERT(dv->sdev_attr); ASSERT(vap); attrp = dv->sdev_attr; mask = vap->va_mask; if (mask & AT_TYPE) attrp->va_type = vap->va_type; if (mask & AT_MODE) attrp->va_mode = vap->va_mode; if (mask & AT_UID) attrp->va_uid = vap->va_uid; if (mask & AT_GID) attrp->va_gid = vap->va_gid; if (mask & AT_RDEV) attrp->va_rdev = vap->va_rdev; gethrestime(&now); attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now; attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now; attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now; } static void sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap) { ASSERT(dv->sdev_attr == NULL); ASSERT(vap->va_mask & AT_TYPE); ASSERT(vap->va_mask & AT_MODE); dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP); sdev_attr_update(dv, vap); } /* alloc and initialize a sdev_node */ int sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, vattr_t *vap) { struct sdev_node *dv = NULL; struct vnode *vp; size_t nmlen, len; devname_handle_t *dhl; nmlen = strlen(nm) + 1; if (nmlen > MAXNAMELEN) { sdcmn_err9(("sdev_nodeinit: node name %s" " too long\n", nm)); *newdv = NULL; return (ENAMETOOLONG); } dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP); bcopy(nm, dv->sdev_name, nmlen); dv->sdev_namelen = nmlen - 1; /* '\0' not included */ len = strlen(ddv->sdev_path) + strlen(nm) + 2; dv->sdev_path = kmem_alloc(len, KM_SLEEP); (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm); /* overwritten for VLNK nodes */ dv->sdev_symlink = NULL; vp = SDEVTOV(dv); vn_reinit(vp); vp->v_vfsp = SDEVTOV(ddv)->v_vfsp; if (vap) vp->v_type = vap->va_type; /* * initialized to the parent's vnodeops. * maybe overwriten for a VDIR */ vn_setops(vp, vn_getops(SDEVTOV(ddv))); vn_exists(vp); dv->sdev_dotdot = NULL; dv->sdev_attrvp = NULL; if (vap) { sdev_attr_alloc(dv, vap); } else { dv->sdev_attr = NULL; } dv->sdev_ino = sdev_mkino(dv); dv->sdev_nlink = 0; /* updated on insert */ dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */ dv->sdev_flags |= SDEV_BUILD; mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); if (SDEV_IS_GLOBAL(ddv)) { dv->sdev_flags |= SDEV_GLOBAL; dhl = &(dv->sdev_handle); dhl->dh_data = dv; dhl->dh_args = NULL; sdev_set_no_negcache(dv); dv->sdev_gdir_gen = 0; } else { dv->sdev_flags &= ~SDEV_GLOBAL; dv->sdev_origin = NULL; /* set later */ bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); dv->sdev_ldir_gen = 0; dv->sdev_devtree_gen = 0; } rw_enter(&dv->sdev_contents, RW_WRITER); sdev_set_nodestate(dv, SDEV_INIT); rw_exit(&dv->sdev_contents); *newdv = dv; return (0); } /* * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the * caller to transition the node to the SDEV_ZOMBIE state. */ int sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, void *args, struct cred *cred) { int error = 0; struct vnode *vp = SDEVTOV(dv); vtype_t type; ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap); type = vap->va_type; vp->v_type = type; vp->v_rdev = vap->va_rdev; rw_enter(&dv->sdev_contents, RW_WRITER); if (type == VDIR) { dv->sdev_nlink = 2; dv->sdev_flags &= ~SDEV_PERSIST; dv->sdev_flags &= ~SDEV_DYNAMIC; vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */ ASSERT(dv->sdev_dotdot); ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR); vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev; avl_create(&dv->sdev_entries, (int (*)(const void *, const void *))sdev_compare_nodes, sizeof (struct sdev_node), offsetof(struct sdev_node, sdev_avllink)); } else if (type == VLNK) { ASSERT(args); dv->sdev_nlink = 1; dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP); } else { dv->sdev_nlink = 1; } if (!(SDEV_IS_GLOBAL(dv))) { dv->sdev_origin = (struct sdev_node *)args; dv->sdev_flags &= ~SDEV_PERSIST; } /* * shadow node is created here OR * if failed (indicated by dv->sdev_attrvp == NULL), * created later in sdev_setattr */ if (avp) { dv->sdev_attrvp = avp; } else { if (dv->sdev_attr == NULL) { sdev_attr_alloc(dv, vap); } else { sdev_attr_update(dv, vap); } if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv)) error = sdev_shadow_node(dv, cred); } if (error == 0) { /* transition to READY state */ sdev_set_nodestate(dv, SDEV_READY); sdev_nc_node_exists(dv); } rw_exit(&dv->sdev_contents); return (error); } /* * Build the VROOT sdev_node. */ /*ARGSUSED*/ struct sdev_node * sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp, struct vnode *avp, struct cred *cred) { struct sdev_node *dv; struct vnode *vp; char devdir[] = "/dev"; ASSERT(sdev_node_cache != NULL); ASSERT(avp); dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); vp = SDEVTOV(dv); vn_reinit(vp); vp->v_flag |= VROOT; vp->v_vfsp = vfsp; vp->v_type = VDIR; vp->v_rdev = devdev; vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */ vn_exists(vp); if (vfsp->vfs_mntpt) dv->sdev_name = i_ddi_strdup( (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP); else /* vfs_mountdev1 set mount point later */ dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP); dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */ dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP); dv->sdev_ino = SDEV_ROOTINO; dv->sdev_nlink = 2; /* name + . (no sdev_insert) */ dv->sdev_dotdot = dv; /* .. == self */ dv->sdev_attrvp = avp; dv->sdev_attr = NULL; mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); if (strcmp(dv->sdev_name, "/dev") == 0) { dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST; bzero(&dv->sdev_handle, sizeof (dv->sdev_handle)); dv->sdev_gdir_gen = 0; } else { dv->sdev_flags = SDEV_BUILD; dv->sdev_flags &= ~SDEV_PERSIST; bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); dv->sdev_ldir_gen = 0; dv->sdev_devtree_gen = 0; } avl_create(&dv->sdev_entries, (int (*)(const void *, const void *))sdev_compare_nodes, sizeof (struct sdev_node), offsetof(struct sdev_node, sdev_avllink)); rw_enter(&dv->sdev_contents, RW_WRITER); sdev_set_nodestate(dv, SDEV_READY); rw_exit(&dv->sdev_contents); sdev_nc_node_exists(dv); return (dv); } /* directory dependent vop table */ struct sdev_vop_table { char *vt_name; /* subdirectory name */ const fs_operation_def_t *vt_service; /* vnodeops table */ struct vnodeops *vt_vops; /* constructed vop */ struct vnodeops **vt_global_vops; /* global container for vop */ int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */ int vt_flags; }; /* * A nice improvement would be to provide a plug-in mechanism * for this table instead of a const table. */ static struct sdev_vop_table vtab[] = { { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate, SDEV_DYNAMIC | SDEV_VTOR }, { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate, SDEV_DYNAMIC | SDEV_VTOR }, { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops, devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR }, { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE }, { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate, SDEV_DYNAMIC | SDEV_VTOR }, { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops, devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE }, /* * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the * lofi driver controls child nodes. * * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted * stale nodes (e.g. from devfsadm -R). * * In addition, devfsadm knows not to attempt a rmdir: a zone * may hold a reference, which would zombify the node, * preventing a mkdir. */ { "lofi", NULL, NULL, NULL, NULL, SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, { "rlofi", NULL, NULL, NULL, NULL, SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, { NULL, NULL, NULL, NULL, NULL, 0} }; /* * We need to match off of the sdev_path, not the sdev_name. We are only allowed * to exist directly under /dev. */ struct sdev_vop_table * sdev_match(struct sdev_node *dv) { int vlen; int i; const char *path; if (strlen(dv->sdev_path) <= 5) return (NULL); if (strncmp(dv->sdev_path, "/dev/", 5) != 0) return (NULL); path = dv->sdev_path + 5; for (i = 0; vtab[i].vt_name; i++) { if (strcmp(vtab[i].vt_name, path) == 0) return (&vtab[i]); if (vtab[i].vt_flags & SDEV_SUBDIR) { vlen = strlen(vtab[i].vt_name); if ((strncmp(vtab[i].vt_name, path, vlen - 1) == 0) && path[vlen] == '/') return (&vtab[i]); } } return (NULL); } /* * sets a directory's vnodeops if the directory is in the vtab; */ static struct vnodeops * sdev_get_vop(struct sdev_node *dv) { struct sdev_vop_table *vtp; char *path; path = dv->sdev_path; ASSERT(path); /* gets the relative path to /dev/ */ path += 5; /* gets the vtab entry it matches */ if ((vtp = sdev_match(dv)) != NULL) { dv->sdev_flags |= vtp->vt_flags; if (SDEV_IS_PERSIST(dv->sdev_dotdot) && (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv))) dv->sdev_flags |= SDEV_PERSIST; if (vtp->vt_vops) { if (vtp->vt_global_vops) *(vtp->vt_global_vops) = vtp->vt_vops; return (vtp->vt_vops); } if (vtp->vt_service) { fs_operation_def_t *templ; templ = sdev_merge_vtab(vtp->vt_service); if (vn_make_ops(vtp->vt_name, (const fs_operation_def_t *)templ, &vtp->vt_vops) != 0) { cmn_err(CE_PANIC, "%s: malformed vnode ops\n", vtp->vt_name); /*NOTREACHED*/ } if (vtp->vt_global_vops) { *(vtp->vt_global_vops) = vtp->vt_vops; } sdev_free_vtab(templ); return (vtp->vt_vops); } return (sdev_vnodeops); } /* child inherits the persistence of the parent */ if (SDEV_IS_PERSIST(dv->sdev_dotdot)) dv->sdev_flags |= SDEV_PERSIST; return (sdev_vnodeops); } static void sdev_set_no_negcache(struct sdev_node *dv) { int i; char *path; ASSERT(dv->sdev_path); path = dv->sdev_path + strlen("/dev/"); for (i = 0; vtab[i].vt_name; i++) { if (strcmp(vtab[i].vt_name, path) == 0) { if (vtab[i].vt_flags & SDEV_NO_NCACHE) dv->sdev_flags |= SDEV_NO_NCACHE; break; } } } void * sdev_get_vtor(struct sdev_node *dv) { struct sdev_vop_table *vtp; vtp = sdev_match(dv); if (vtp) return ((void *)vtp->vt_vtor); else return (NULL); } /* * Build the base root inode */ ino_t sdev_mkino(struct sdev_node *dv) { ino_t ino; /* * for now, follow the lead of tmpfs here * need to someday understand the requirements here */ ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3); ino += SDEV_ROOTINO + 1; return (ino); } int sdev_getlink(struct vnode *linkvp, char **link) { int err; char *buf; struct uio uio = {0}; struct iovec iov = {0}; if (linkvp == NULL) return (ENOENT); ASSERT(linkvp->v_type == VLNK); buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); iov.iov_base = buf; iov.iov_len = MAXPATHLEN; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_resid = MAXPATHLEN; uio.uio_segflg = UIO_SYSSPACE; uio.uio_llimit = MAXOFFSET_T; err = VOP_READLINK(linkvp, &uio, kcred, NULL); if (err) { cmn_err(CE_WARN, "readlink %s failed in dev\n", buf); kmem_free(buf, MAXPATHLEN); return (ENOENT); } /* mission complete */ *link = i_ddi_strdup(buf, KM_SLEEP); kmem_free(buf, MAXPATHLEN); return (0); } /* * A convenient wrapper to get the devfs node vnode for a device * minor functionality: readlink() of a /dev symlink * Place the link into dv->sdev_symlink */ static int sdev_follow_link(struct sdev_node *dv) { int err; struct vnode *linkvp; char *link = NULL; linkvp = SDEVTOV(dv); if (linkvp == NULL) return (ENOENT); ASSERT(linkvp->v_type == VLNK); err = sdev_getlink(linkvp, &link); if (err) { dv->sdev_symlink = NULL; return (ENOENT); } ASSERT(link != NULL); dv->sdev_symlink = link; return (0); } static int sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs) { vtype_t otype = SDEVTOV(dv)->v_type; /* * existing sdev_node has a different type. */ if (otype != nvap->va_type) { sdcmn_err9(("sdev_node_check: existing node " " %s type %d does not match new node type %d\n", dv->sdev_name, otype, nvap->va_type)); return (EEXIST); } /* * For a symlink, the target should be the same. */ if (otype == VLNK) { ASSERT(nargs != NULL); ASSERT(dv->sdev_symlink != NULL); if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) { sdcmn_err9(("sdev_node_check: existing node " " %s has different symlink %s as new node " " %s\n", dv->sdev_name, dv->sdev_symlink, (char *)nargs)); return (EEXIST); } } return (0); } /* * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready() * * arguments: * - ddv (parent) * - nm (child name) * - newdv (sdev_node for nm is returned here) * - vap (vattr for the node to be created, va_type should be set. * - avp (attribute vnode) * the defaults should be used if unknown) * - cred * - args * . tnm (for VLNK) * . global sdev_node (for !SDEV_GLOBAL) * - state: SDEV_INIT, SDEV_READY * * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT) * * NOTE: directory contents writers lock needs to be held before * calling this routine. */ int sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, struct vattr *vap, struct vnode *avp, void *args, struct cred *cred, sdev_node_state_t state) { int error = 0; sdev_node_state_t node_state; struct sdev_node *dv = NULL; ASSERT(state != SDEV_ZOMBIE); ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); if (*newdv) { dv = *newdv; } else { /* allocate and initialize a sdev_node */ if (ddv->sdev_state == SDEV_ZOMBIE) { sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n", ddv->sdev_path)); return (ENOENT); } error = sdev_nodeinit(ddv, nm, &dv, vap); if (error != 0) { sdcmn_err9(("sdev_mknode: error %d," " name %s can not be initialized\n", error, nm)); return (error); } ASSERT(dv); /* insert into the directory cache */ sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); } ASSERT(dv); node_state = dv->sdev_state; ASSERT(node_state != SDEV_ZOMBIE); if (state == SDEV_READY) { switch (node_state) { case SDEV_INIT: error = sdev_nodeready(dv, vap, avp, args, cred); if (error) { sdcmn_err9(("sdev_mknode: node %s can NOT" " be transitioned into READY state, " "error %d\n", nm, error)); } break; case SDEV_READY: /* * Do some sanity checking to make sure * the existing sdev_node is what has been * asked for. */ error = sdev_node_check(dv, vap, args); break; default: break; } } if (!error) { *newdv = dv; ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE); } else { sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); /* * We created this node, it wasn't passed into us. Therefore it * is up to us to delete it. */ if (*newdv == NULL) SDEV_SIMPLE_RELE(dv); *newdv = NULL; } return (error); } /* * convenient wrapper to change vp's ATIME, CTIME and MTIME */ void sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask) { struct vattr attr; timestruc_t now; int err; ASSERT(vp); gethrestime(&now); if (mask & AT_CTIME) attr.va_ctime = now; if (mask & AT_MTIME) attr.va_mtime = now; if (mask & AT_ATIME) attr.va_atime = now; attr.va_mask = (mask & AT_TIMES); err = VOP_SETATTR(vp, &attr, 0, cred, NULL); if (err && (err != EROFS)) { sdcmn_err(("update timestamps error %d\n", err)); } } /* * the backing store vnode is released here */ /*ARGSUSED1*/ void sdev_nodedestroy(struct sdev_node *dv, uint_t flags) { /* no references */ ASSERT(dv->sdev_nlink == 0); if (dv->sdev_attrvp != NULLVP) { VN_RELE(dv->sdev_attrvp); /* * reset the attrvp so that no more * references can be made on this already * vn_rele() vnode */ dv->sdev_attrvp = NULLVP; } if (dv->sdev_attr != NULL) { kmem_free(dv->sdev_attr, sizeof (struct vattr)); dv->sdev_attr = NULL; } if (dv->sdev_name != NULL) { kmem_free(dv->sdev_name, dv->sdev_namelen + 1); dv->sdev_name = NULL; } if (dv->sdev_symlink != NULL) { kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1); dv->sdev_symlink = NULL; } if (dv->sdev_path) { kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1); dv->sdev_path = NULL; } if (!SDEV_IS_GLOBAL(dv)) sdev_prof_free(dv); if (SDEVTOV(dv)->v_type == VDIR) { ASSERT(SDEV_FIRST_ENTRY(dv) == NULL); avl_destroy(&dv->sdev_entries); } mutex_destroy(&dv->sdev_lookup_lock); cv_destroy(&dv->sdev_lookup_cv); /* return node to initial state as per constructor */ (void) memset((void *)&dv->sdev_instance_data, 0, sizeof (dv->sdev_instance_data)); vn_invalid(SDEVTOV(dv)); kmem_cache_free(sdev_node_cache, dv); } /* * DIRECTORY CACHE lookup */ struct sdev_node * sdev_findbyname(struct sdev_node *ddv, char *nm) { struct sdev_node *dv; struct sdev_node dvtmp; avl_index_t where; ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); dvtmp.sdev_name = nm; dv = avl_find(&ddv->sdev_entries, &dvtmp, &where); if (dv) { ASSERT(dv->sdev_dotdot == ddv); ASSERT(strcmp(dv->sdev_name, nm) == 0); ASSERT(dv->sdev_state != SDEV_ZOMBIE); SDEV_HOLD(dv); return (dv); } return (NULL); } /* * Inserts a new sdev_node in a parent directory */ void sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv) { avl_index_t where; ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); ASSERT(SDEVTOV(ddv)->v_type == VDIR); ASSERT(ddv->sdev_nlink >= 2); ASSERT(dv->sdev_nlink == 0); ASSERT(dv->sdev_state != SDEV_ZOMBIE); dv->sdev_dotdot = ddv; VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL); avl_insert(&ddv->sdev_entries, dv, where); ddv->sdev_nlink++; } /* * The following check is needed because while sdev_nodes are linked * in SDEV_INIT state, they have their link counts incremented only * in SDEV_READY state. */ static void decr_link(struct sdev_node *dv) { VERIFY(RW_WRITE_HELD(&dv->sdev_contents)); if (dv->sdev_state != SDEV_INIT) { VERIFY(dv->sdev_nlink >= 1); dv->sdev_nlink--; } else { VERIFY(dv->sdev_nlink == 0); } } /* * Delete an existing dv from directory cache * * In the case of a node is still held by non-zero reference count, the node is * put into ZOMBIE state. The node is always unlinked from its parent, but it is * not destroyed via sdev_inactive until its reference count reaches "0". */ static void sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv) { struct vnode *vp; sdev_node_state_t os; ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); vp = SDEVTOV(dv); mutex_enter(&vp->v_lock); rw_enter(&dv->sdev_contents, RW_WRITER); os = dv->sdev_state; ASSERT(os != SDEV_ZOMBIE); dv->sdev_state = SDEV_ZOMBIE; /* * unlink ourselves from the parent directory now to take care of the .. * link. However, if we're a directory, we don't remove our reference to * ourself eg. '.' until we are torn down in the inactive callback. */ decr_link(ddv); avl_remove(&ddv->sdev_entries, dv); /* * sdev_inactive expects nodes to have a link to themselves when we're * tearing them down. If we're transitioning from the initial state to * zombie and not via ready, then we're not going to have this link that * comes from the node being ready. As a result, we need to increment * our link count by one to account for this. */ if (os == SDEV_INIT && dv->sdev_nlink == 0) dv->sdev_nlink++; rw_exit(&dv->sdev_contents); mutex_exit(&vp->v_lock); } /* * check if the source is in the path of the target * * source and target are different */ /*ARGSUSED2*/ static int sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred) { int error = 0; struct sdev_node *dotdot, *dir; dotdot = tdv->sdev_dotdot; ASSERT(dotdot); /* fs root */ if (dotdot == tdv) { return (0); } for (;;) { /* * avoid error cases like * mv a a/b * mv a a/b/c * etc. */ if (dotdot == sdv) { error = EINVAL; break; } dir = dotdot; dotdot = dir->sdev_dotdot; /* done checking because root is reached */ if (dir == dotdot) { break; } } return (error); } int sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm, struct cred *cred) { int error = 0; struct vnode *ovp = SDEVTOV(odv); struct vnode *nvp; struct vattr vattr; int doingdir = (ovp->v_type == VDIR); char *link = NULL; int samedir = (oddv == nddv) ? 1 : 0; int bkstore = 0; struct sdev_node *idv = NULL; struct sdev_node *ndv = NULL; timestruc_t now; vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL); if (error) return (error); if (!samedir) rw_enter(&oddv->sdev_contents, RW_WRITER); rw_enter(&nddv->sdev_contents, RW_WRITER); /* * the source may have been deleted by another thread before * we gets here. */ if (odv->sdev_state != SDEV_READY) { error = ENOENT; goto err_out; } if (doingdir && (odv == nddv)) { error = EINVAL; goto err_out; } /* * If renaming a directory, and the parents are different (".." must be * changed) then the source dir must not be in the dir hierarchy above * the target since it would orphan everything below the source dir. */ if (doingdir && (oddv != nddv)) { error = sdev_checkpath(odv, nddv, cred); if (error) goto err_out; } /* fix the source for a symlink */ if (vattr.va_type == VLNK) { if (odv->sdev_symlink == NULL) { error = sdev_follow_link(odv); if (error) { /* * The underlying symlink doesn't exist. This * node probably shouldn't even exist. While * it's a bit jarring to consumers, we're going * to remove the node from /dev. */ if (SDEV_IS_PERSIST((*ndvp))) bkstore = 1; sdev_dirdelete(oddv, odv); if (bkstore) { ASSERT(nddv->sdev_attrvp); error = VOP_REMOVE(nddv->sdev_attrvp, nnm, cred, NULL, 0); if (error) goto err_out; } error = ENOENT; goto err_out; } } ASSERT(odv->sdev_symlink); link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); } /* destination existing */ if (*ndvp) { nvp = SDEVTOV(*ndvp); ASSERT(nvp); /* handling renaming to itself */ if (odv == *ndvp) { error = 0; goto err_out; } if (nvp->v_type == VDIR) { if (!doingdir) { error = EISDIR; goto err_out; } if (vn_vfswlock(nvp)) { error = EBUSY; goto err_out; } if (vn_mountedvfs(nvp) != NULL) { vn_vfsunlock(nvp); error = EBUSY; goto err_out; } /* in case dir1 exists in dir2 and "mv dir1 dir2" */ if ((*ndvp)->sdev_nlink > 2) { vn_vfsunlock(nvp); error = EEXIST; goto err_out; } vn_vfsunlock(nvp); /* * We did not place the hold on *ndvp, so even though * we're deleting the node, we should not get rid of our * reference. */ sdev_dirdelete(nddv, *ndvp); *ndvp = NULL; ASSERT(nddv->sdev_attrvp); error = VOP_RMDIR(nddv->sdev_attrvp, nnm, nddv->sdev_attrvp, cred, NULL, 0); if (error) goto err_out; } else { if (doingdir) { error = ENOTDIR; goto err_out; } if (SDEV_IS_PERSIST((*ndvp))) { bkstore = 1; } /* * Get rid of the node from the directory cache note. * Don't forget that it's not up to us to remove the vn * ref on the sdev node, as we did not place it. */ sdev_dirdelete(nddv, *ndvp); *ndvp = NULL; if (bkstore) { ASSERT(nddv->sdev_attrvp); error = VOP_REMOVE(nddv->sdev_attrvp, nnm, cred, NULL, 0); if (error) goto err_out; } } } /* * make a fresh node from the source attrs */ ASSERT(RW_WRITE_HELD(&nddv->sdev_contents)); error = sdev_mknode(nddv, nnm, ndvp, &vattr, NULL, (void *)link, cred, SDEV_READY); if (link != NULL) { kmem_free(link, strlen(link) + 1); link = NULL; } if (error) goto err_out; ASSERT(*ndvp); ASSERT((*ndvp)->sdev_state == SDEV_READY); /* move dir contents */ if (doingdir) { for (idv = SDEV_FIRST_ENTRY(odv); idv; idv = SDEV_NEXT_ENTRY(odv, idv)) { SDEV_HOLD(idv); error = sdev_rnmnode(odv, idv, (struct sdev_node *)(*ndvp), &ndv, idv->sdev_name, cred); SDEV_RELE(idv); if (error) goto err_out; ndv = NULL; } } if ((*ndvp)->sdev_attrvp) { sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred, AT_CTIME|AT_ATIME); } else { ASSERT((*ndvp)->sdev_attr); gethrestime(&now); (*ndvp)->sdev_attr->va_ctime = now; (*ndvp)->sdev_attr->va_atime = now; } if (nddv->sdev_attrvp) { sdev_update_timestamps(nddv->sdev_attrvp, kcred, AT_MTIME|AT_ATIME); } else { ASSERT(nddv->sdev_attr); gethrestime(&now); nddv->sdev_attr->va_mtime = now; nddv->sdev_attr->va_atime = now; } rw_exit(&nddv->sdev_contents); if (!samedir) rw_exit(&oddv->sdev_contents); SDEV_RELE(*ndvp); return (error); err_out: if (link != NULL) { kmem_free(link, strlen(link) + 1); link = NULL; } rw_exit(&nddv->sdev_contents); if (!samedir) rw_exit(&oddv->sdev_contents); return (error); } /* * Merge sdev_node specific information into an attribute structure. * * note: sdev_node is not locked here */ void sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap) { struct vnode *vp = SDEVTOV(dv); vap->va_nlink = dv->sdev_nlink; vap->va_nodeid = dv->sdev_ino; vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev; vap->va_type = vp->v_type; if (vp->v_type == VDIR) { vap->va_rdev = 0; vap->va_fsid = vp->v_rdev; } else if (vp->v_type == VLNK) { vap->va_rdev = 0; vap->va_mode &= ~S_IFMT; vap->va_mode |= S_IFLNK; } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { vap->va_rdev = vp->v_rdev; vap->va_mode &= ~S_IFMT; if (vap->va_type == VCHR) vap->va_mode |= S_IFCHR; else vap->va_mode |= S_IFBLK; } else { vap->va_rdev = 0; } } struct vattr * sdev_getdefault_attr(enum vtype type) { if (type == VDIR) return (&sdev_vattr_dir); else if (type == VCHR) return (&sdev_vattr_chr); else if (type == VBLK) return (&sdev_vattr_blk); else if (type == VLNK) return (&sdev_vattr_lnk); else return (NULL); } int sdev_to_vp(struct sdev_node *dv, struct vnode **vpp) { int rv = 0; struct vnode *vp = SDEVTOV(dv); switch (vp->v_type) { case VCHR: case VBLK: /* * If vnode is a device, return special vnode instead * (though it knows all about -us- via sp->s_realvp) */ *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred); VN_RELE(vp); if (*vpp == NULLVP) rv = ENOSYS; break; default: /* most types are returned as is */ *vpp = vp; break; } return (rv); } /* * junction between devname and root file system, e.g. ufs */ int devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp) { struct vnode *rdvp = ddv->sdev_attrvp; int rval = 0; ASSERT(rdvp); rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL, NULL); return (rval); } static int sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred) { struct sdev_node *dv = NULL; char *nm; struct vnode *dirvp; int error; vnode_t *vp; int eof; struct iovec iov; struct uio uio; struct dirent64 *dp; dirent64_t *dbuf; size_t dbuflen; struct vattr vattr; char *link = NULL; if (ddv->sdev_attrvp == NULL) return (0); if (!(ddv->sdev_flags & SDEV_BUILD)) return (0); dirvp = ddv->sdev_attrvp; VN_HOLD(dirvp); dbuf = kmem_zalloc(dlen, KM_SLEEP); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_SYSSPACE; uio.uio_fmode = 0; uio.uio_extflg = UIO_COPY_CACHED; uio.uio_loffset = 0; uio.uio_llimit = MAXOFFSET_T; eof = 0; error = 0; while (!error && !eof) { uio.uio_resid = dlen; iov.iov_base = (char *)dbuf; iov.iov_len = dlen; (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); dbuflen = dlen - uio.uio_resid; if (error || dbuflen == 0) break; if (!(ddv->sdev_flags & SDEV_BUILD)) break; for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { nm = dp->d_name; if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) continue; vp = NULLVP; dv = sdev_cache_lookup(ddv, nm); if (dv) { VERIFY(dv->sdev_state != SDEV_ZOMBIE); SDEV_SIMPLE_RELE(dv); continue; } /* refill the cache if not already */ error = devname_backstore_lookup(ddv, nm, &vp); if (error) continue; vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; error = VOP_GETATTR(vp, &vattr, 0, cred, NULL); if (error) continue; if (vattr.va_type == VLNK) { error = sdev_getlink(vp, &link); if (error) { continue; } ASSERT(link != NULL); } if (!rw_tryupgrade(&ddv->sdev_contents)) { rw_exit(&ddv->sdev_contents); rw_enter(&ddv->sdev_contents, RW_WRITER); } error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link, cred, SDEV_READY); rw_downgrade(&ddv->sdev_contents); if (link != NULL) { kmem_free(link, strlen(link) + 1); link = NULL; } if (!error) { ASSERT(dv); ASSERT(dv->sdev_state != SDEV_ZOMBIE); SDEV_SIMPLE_RELE(dv); } vp = NULL; dv = NULL; } } done: VN_RELE(dirvp); kmem_free(dbuf, dlen); return (error); } void sdev_filldir_dynamic(struct sdev_node *ddv) { int error; int i; struct vattr vattr; struct vattr *vap = &vattr; char *nm = NULL; struct sdev_node *dv = NULL; ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); ASSERT((ddv->sdev_flags & SDEV_BUILD)); *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */ gethrestime(&vap->va_atime); vap->va_mtime = vap->va_atime; vap->va_ctime = vap->va_atime; for (i = 0; vtab[i].vt_name != NULL; i++) { /* * This early, we may be in a read-only /dev environment: leave * the creation of any nodes we'd attempt to persist to * devfsadm. Because /dev itself is normally persistent, any * node which is not marked dynamic will end up being marked * persistent. However, some nodes are both dynamic and * persistent, mostly lofi and rlofi, so we need to be careful * in our check. */ if ((vtab[i].vt_flags & SDEV_PERSIST) || !(vtab[i].vt_flags & SDEV_DYNAMIC)) continue; nm = vtab[i].vt_name; ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); dv = NULL; error = sdev_mknode(ddv, nm, &dv, vap, NULL, NULL, kcred, SDEV_READY); if (error) { cmn_err(CE_WARN, "%s/%s: error %d\n", ddv->sdev_name, nm, error); } else { ASSERT(dv); ASSERT(dv->sdev_state != SDEV_ZOMBIE); SDEV_SIMPLE_RELE(dv); } } } /* * Creating a backing store entry based on sdev_attr. * This is called either as part of node creation in a persistent directory * or from setattr/setsecattr to persist access attributes across reboot. */ int sdev_shadow_node(struct sdev_node *dv, struct cred *cred) { int error = 0; struct vnode *dvp = SDEVTOV(dv->sdev_dotdot); struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp; struct vattr *vap = dv->sdev_attr; char *nm = dv->sdev_name; struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL; ASSERT(dv && dv->sdev_name && rdvp); ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL); lookup: /* try to find it in the backing store */ error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL, NULL); if (error == 0) { if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) { VN_HOLD(rrvp); VN_RELE(*rvp); *rvp = rrvp; } kmem_free(dv->sdev_attr, sizeof (vattr_t)); dv->sdev_attr = NULL; dv->sdev_attrvp = *rvp; return (0); } /* let's try to persist the node */ gethrestime(&vap->va_atime); vap->va_mtime = vap->va_atime; vap->va_ctime = vap->va_atime; vap->va_mask |= AT_TYPE|AT_MODE; switch (vap->va_type) { case VDIR: error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL); sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n", (void *)(*rvp), error)); if (!error) VN_RELE(*rvp); break; case VCHR: case VBLK: case VREG: case VDOOR: error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE, rvp, cred, 0, NULL, NULL); sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n", (void *)(*rvp), error)); if (!error) VN_RELE(*rvp); break; case VLNK: ASSERT(dv->sdev_symlink); error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred, NULL, 0); sdcmn_err9(("sdev_shadow_node: create symlink error %d\n", error)); break; default: cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node " "create\n", nm); /*NOTREACHED*/ } /* go back to lookup to factor out spec node and set attrvp */ if (error == 0) goto lookup; sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error)); return (error); } static void sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm) { struct sdev_node *dup = NULL; ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); if ((dup = sdev_findbyname(ddv, nm)) == NULL) { sdev_direnter(ddv, *dv); } else { VERIFY(dup->sdev_state != SDEV_ZOMBIE); SDEV_SIMPLE_RELE(*dv); sdev_nodedestroy(*dv, 0); *dv = dup; } } static void sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv) { ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); sdev_dirdelete(ddv, *dv); } /* * update the in-core directory cache */ void sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm, sdev_cache_ops_t ops) { ASSERT((SDEV_HELD(*dv))); ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); switch (ops) { case SDEV_CACHE_ADD: sdev_cache_add(ddv, dv, nm); break; case SDEV_CACHE_DELETE: sdev_cache_delete(ddv, dv); break; default: break; } } /* * retrieve the named entry from the directory cache */ struct sdev_node * sdev_cache_lookup(struct sdev_node *ddv, char *nm) { struct sdev_node *dv = NULL; ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); dv = sdev_findbyname(ddv, nm); return (dv); } /* * Implicit reconfig for nodes constructed by a link generator * Start devfsadm if needed, or if devfsadm is in progress, * prepare to block on devfsadm either completing or * constructing the desired node. As devfsadmd is global * in scope, constructing all necessary nodes, we only * need to initiate it once. */ static int sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm) { int error = 0; if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n", ddv->sdev_name, nm, devfsadm_state)); mutex_enter(&dv->sdev_lookup_lock); SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING)); mutex_exit(&dv->sdev_lookup_lock); error = 0; } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) { sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n", ddv->sdev_name, nm, devfsadm_state)); sdev_devfsadmd_thread(ddv, dv, kcred); mutex_enter(&dv->sdev_lookup_lock); SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING)); mutex_exit(&dv->sdev_lookup_lock); error = 0; } else { error = -1; } return (error); } /* * Support for specialized device naming construction mechanisms */ static int sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm, int (*callback)(struct sdev_node *, char *, void **, struct cred *, void *, char *), int flags, struct cred *cred) { int rv = 0; char *physpath = NULL; struct vattr vattr; struct vattr *vap = &vattr; struct sdev_node *dv = NULL; ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); if (flags & SDEV_VLINK) { physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); rv = callback(ddv, nm, (void *)&physpath, kcred, NULL, NULL); if (rv) { kmem_free(physpath, MAXPATHLEN); return (-1); } *vap = *sdev_getdefault_attr(VLNK); /* structure copy */ vap->va_size = strlen(physpath); gethrestime(&vap->va_atime); vap->va_mtime = vap->va_atime; vap->va_ctime = vap->va_atime; rv = sdev_mknode(ddv, nm, &dv, vap, NULL, (void *)physpath, cred, SDEV_READY); kmem_free(physpath, MAXPATHLEN); if (rv) return (rv); } else if (flags & SDEV_VATTR) { /* * /dev/pts * * callback is responsible to set the basic attributes, * e.g. va_type/va_uid/va_gid/ * dev_t if VCHR or VBLK/ */ ASSERT(callback); rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL); if (rv) { sdcmn_err3(("devname_lookup_func: SDEV_NONE " "callback failed \n")); return (-1); } rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, cred, SDEV_READY); if (rv) return (rv); } else { impossible(("lookup: %s/%s by %s not supported (%d)\n", SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm, __LINE__)); rv = -1; } *dvp = dv; return (rv); } static int is_devfsadm_thread(char *exec_name) { /* * note: because devfsadmd -> /usr/sbin/devfsadm * it is safe to use "devfsadm" to capture the lookups * from devfsadm and its daemon version. */ if (strcmp(exec_name, "devfsadm") == 0) return (1); return (0); } /* * Lookup Order: * sdev_node cache; * backing store (SDEV_PERSIST); * DBNR: a. dir_ops implemented in the loadable modules; * b. vnode ops in vtab. */ int devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp, struct cred *cred, int (*callback)(struct sdev_node *, char *, void **, struct cred *, void *, char *), int flags) { int rv = 0, nmlen; struct vnode *rvp = NULL; struct sdev_node *dv = NULL; int retried = 0; int error = 0; struct vattr vattr; char *lookup_thread = curproc->p_user.u_comm; int failed_flags = 0; int (*vtor)(struct sdev_node *) = NULL; int state; int parent_state; char *link = NULL; if (SDEVTOV(ddv)->v_type != VDIR) return (ENOTDIR); /* * Empty name or ., return node itself. */ nmlen = strlen(nm); if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { *vpp = SDEVTOV(ddv); VN_HOLD(*vpp); return (0); } /* * .., return the parent directory */ if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { *vpp = SDEVTOV(ddv->sdev_dotdot); VN_HOLD(*vpp); return (0); } rw_enter(&ddv->sdev_contents, RW_READER); if (ddv->sdev_flags & SDEV_VTOR) { vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); ASSERT(vtor); } tryagain: /* * (a) directory cache lookup: */ ASSERT(RW_READ_HELD(&ddv->sdev_contents)); parent_state = ddv->sdev_state; dv = sdev_cache_lookup(ddv, nm); if (dv) { state = dv->sdev_state; switch (state) { case SDEV_INIT: if (is_devfsadm_thread(lookup_thread)) break; /* ZOMBIED parent won't allow node creation */ if (parent_state == SDEV_ZOMBIE) { SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); goto nolock_notfound; } mutex_enter(&dv->sdev_lookup_lock); /* compensate the threads started after devfsadm */ if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && !(SDEV_IS_LOOKUP(dv))) SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING)); if (SDEV_IS_LOOKUP(dv)) { failed_flags |= SLF_REBUILT; rw_exit(&ddv->sdev_contents); error = sdev_wait4lookup(dv, SDEV_LOOKUP); mutex_exit(&dv->sdev_lookup_lock); rw_enter(&ddv->sdev_contents, RW_READER); if (error != 0) { SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); goto nolock_notfound; } state = dv->sdev_state; if (state == SDEV_INIT) { SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); goto nolock_notfound; } else if (state == SDEV_READY) { goto found; } else if (state == SDEV_ZOMBIE) { rw_exit(&ddv->sdev_contents); SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); SDEV_RELE(dv); goto lookup_failed; } } else { mutex_exit(&dv->sdev_lookup_lock); } break; case SDEV_READY: goto found; case SDEV_ZOMBIE: rw_exit(&ddv->sdev_contents); SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); SDEV_RELE(dv); goto lookup_failed; default: rw_exit(&ddv->sdev_contents); SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); sdev_lookup_failed(ddv, nm, failed_flags); *vpp = NULLVP; return (ENOENT); } } ASSERT(RW_READ_HELD(&ddv->sdev_contents)); /* * ZOMBIED parent does not allow new node creation. * bail out early */ if (parent_state == SDEV_ZOMBIE) { rw_exit(&ddv->sdev_contents); *vpp = NULLVP; SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); return (ENOENT); } /* * (b0): backing store lookup * SDEV_PERSIST is default except: * 1) pts nodes * 2) non-chmod'ed local nodes * 3) zvol nodes */ if (SDEV_IS_PERSIST(ddv)) { error = devname_backstore_lookup(ddv, nm, &rvp); if (!error) { vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL); if (error) { rw_exit(&ddv->sdev_contents); if (dv) SDEV_RELE(dv); SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); sdev_lookup_failed(ddv, nm, failed_flags); *vpp = NULLVP; return (ENOENT); } if (vattr.va_type == VLNK) { error = sdev_getlink(rvp, &link); if (error) { rw_exit(&ddv->sdev_contents); if (dv) SDEV_RELE(dv); SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); sdev_lookup_failed(ddv, nm, failed_flags); *vpp = NULLVP; return (ENOENT); } ASSERT(link != NULL); } if (!rw_tryupgrade(&ddv->sdev_contents)) { rw_exit(&ddv->sdev_contents); rw_enter(&ddv->sdev_contents, RW_WRITER); } error = sdev_mknode(ddv, nm, &dv, &vattr, rvp, link, cred, SDEV_READY); rw_downgrade(&ddv->sdev_contents); if (link != NULL) { kmem_free(link, strlen(link) + 1); link = NULL; } if (error) { SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); rw_exit(&ddv->sdev_contents); if (dv) SDEV_RELE(dv); goto lookup_failed; } else { goto found; } } else if (retried) { rw_exit(&ddv->sdev_contents); sdcmn_err3(("retry of lookup of %s/%s: failed\n", ddv->sdev_name, nm)); if (dv) SDEV_RELE(dv); SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); sdev_lookup_failed(ddv, nm, failed_flags); *vpp = NULLVP; return (ENOENT); } } lookup_create_node: /* first thread that is doing the lookup on this node */ if (callback) { ASSERT(dv == NULL); if (!rw_tryupgrade(&ddv->sdev_contents)) { rw_exit(&ddv->sdev_contents); rw_enter(&ddv->sdev_contents, RW_WRITER); } error = sdev_call_dircallback(ddv, &dv, nm, callback, flags, cred); rw_downgrade(&ddv->sdev_contents); if (error == 0) { goto found; } else { SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); rw_exit(&ddv->sdev_contents); goto lookup_failed; } } if (!dv) { if (!rw_tryupgrade(&ddv->sdev_contents)) { rw_exit(&ddv->sdev_contents); rw_enter(&ddv->sdev_contents, RW_WRITER); } error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL, cred, SDEV_INIT); if (!dv) { rw_exit(&ddv->sdev_contents); SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); sdev_lookup_failed(ddv, nm, failed_flags); *vpp = NULLVP; return (ENOENT); } rw_downgrade(&ddv->sdev_contents); } /* * (b1) invoking devfsadm once per life time for devfsadm nodes */ ASSERT(SDEV_HELD(dv)); if (SDEV_IS_NO_NCACHE(dv)) failed_flags |= SLF_NO_NCACHE; if (sdev_reconfig_boot || !i_ddi_io_initialized() || SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) || ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) { ASSERT(SDEV_HELD(dv)); SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); goto nolock_notfound; } /* * filter out known non-existent devices recorded * during initial reconfiguration boot for which * reconfig should not be done and lookup may * be short-circuited now. */ if (sdev_lookup_filter(ddv, nm)) { SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); goto nolock_notfound; } /* bypassing devfsadm internal nodes */ if (is_devfsadm_thread(lookup_thread)) { SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); goto nolock_notfound; } if (sdev_reconfig_disable) { SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); goto nolock_notfound; } error = sdev_call_devfsadmd(ddv, dv, nm); if (error == 0) { sdcmn_err8(("lookup of %s/%s by %s: reconfig\n", ddv->sdev_name, nm, curproc->p_user.u_comm)); if (sdev_reconfig_verbose) { cmn_err(CE_CONT, "?lookup of %s/%s by %s: reconfig\n", ddv->sdev_name, nm, curproc->p_user.u_comm); } retried = 1; failed_flags |= SLF_REBUILT; ASSERT(dv->sdev_state != SDEV_ZOMBIE); SDEV_SIMPLE_RELE(dv); goto tryagain; } else { SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); goto nolock_notfound; } found: ASSERT(dv->sdev_state == SDEV_READY); if (vtor) { /* * Check validity of returned node */ switch (vtor(dv)) { case SDEV_VTOR_VALID: break; case SDEV_VTOR_STALE: /* * The name exists, but the cache entry is * stale and needs to be re-created. */ ASSERT(RW_READ_HELD(&ddv->sdev_contents)); if (rw_tryupgrade(&ddv->sdev_contents) == 0) { rw_exit(&ddv->sdev_contents); rw_enter(&ddv->sdev_contents, RW_WRITER); } sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); rw_downgrade(&ddv->sdev_contents); SDEV_RELE(dv); dv = NULL; goto lookup_create_node; /* FALLTHRU */ case SDEV_VTOR_INVALID: SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); sdcmn_err7(("lookup: destroy invalid " "node: %s(%p)\n", dv->sdev_name, (void *)dv)); goto nolock_notfound; case SDEV_VTOR_SKIP: sdcmn_err7(("lookup: node not applicable - " "skipping: %s(%p)\n", dv->sdev_name, (void *)dv)); rw_exit(&ddv->sdev_contents); SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); SDEV_RELE(dv); goto lookup_failed; default: cmn_err(CE_PANIC, "dev fs: validator failed: %s(%p)\n", dv->sdev_name, (void *)dv); break; } } rw_exit(&ddv->sdev_contents); rv = sdev_to_vp(dv, vpp); sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d " "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count, dv->sdev_state, nm, rv)); return (rv); nolock_notfound: /* * Destroy the node that is created for synchronization purposes. */ sdcmn_err3(("devname_lookup_func: %s with state %d\n", nm, dv->sdev_state)); ASSERT(RW_READ_HELD(&ddv->sdev_contents)); if (dv->sdev_state == SDEV_INIT) { if (!rw_tryupgrade(&ddv->sdev_contents)) { rw_exit(&ddv->sdev_contents); rw_enter(&ddv->sdev_contents, RW_WRITER); } /* * Node state may have changed during the lock * changes. Re-check. */ if (dv->sdev_state == SDEV_INIT) { sdev_dirdelete(ddv, dv); rw_exit(&ddv->sdev_contents); sdev_lookup_failed(ddv, nm, failed_flags); SDEV_RELE(dv); *vpp = NULL; return (ENOENT); } } rw_exit(&ddv->sdev_contents); SDEV_RELE(dv); lookup_failed: sdev_lookup_failed(ddv, nm, failed_flags); *vpp = NULL; return (ENOENT); } /* * Given a directory node, mark all nodes beneath as * STALE, i.e. nodes that don't exist as far as new * consumers are concerned. Remove them from the * list of directory entries so that no lookup or * directory traversal will find them. The node * not deallocated so existing holds are not affected. */ void sdev_stale(struct sdev_node *ddv) { struct sdev_node *dv; struct vnode *vp; ASSERT(SDEVTOV(ddv)->v_type == VDIR); rw_enter(&ddv->sdev_contents, RW_WRITER); while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) { vp = SDEVTOV(dv); SDEV_HOLD(dv); if (vp->v_type == VDIR) sdev_stale(dv); sdev_dirdelete(ddv, dv); SDEV_RELE(dv); } ddv->sdev_flags |= SDEV_BUILD; rw_exit(&ddv->sdev_contents); } /* * Given a directory node, clean out all the nodes beneath. * If expr is specified, clean node with names matching expr. * If SDEV_ENFORCE is specified in flags, busy nodes are made stale, * so they are excluded from future lookups. */ int sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) { int error = 0; int busy = 0; struct vnode *vp; struct sdev_node *dv, *next; int bkstore = 0; int len = 0; char *bks_name = NULL; ASSERT(SDEVTOV(ddv)->v_type == VDIR); /* * We try our best to destroy all unused sdev_node's */ rw_enter(&ddv->sdev_contents, RW_WRITER); for (dv = SDEV_FIRST_ENTRY(ddv); dv != NULL; dv = next) { next = SDEV_NEXT_ENTRY(ddv, dv); vp = SDEVTOV(dv); if (expr && gmatch(dv->sdev_name, expr) == 0) continue; if (vp->v_type == VDIR && sdev_cleandir(dv, NULL, flags) != 0) { sdcmn_err9(("sdev_cleandir: dir %s busy\n", dv->sdev_name)); busy++; continue; } if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) { sdcmn_err9(("sdev_cleandir: dir %s busy\n", dv->sdev_name)); busy++; continue; } /* * at this point, either dv is not held or SDEV_ENFORCE * is specified. In either case, dv needs to be deleted */ SDEV_HOLD(dv); bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; if (bkstore && (vp->v_type == VDIR)) bkstore += 1; if (bkstore) { len = strlen(dv->sdev_name) + 1; bks_name = kmem_alloc(len, KM_SLEEP); bcopy(dv->sdev_name, bks_name, len); } sdev_dirdelete(ddv, dv); /* take care the backing store clean up */ if (bkstore) { ASSERT(bks_name); ASSERT(ddv->sdev_attrvp); if (bkstore == 1) { error = VOP_REMOVE(ddv->sdev_attrvp, bks_name, kcred, NULL, 0); } else if (bkstore == 2) { error = VOP_RMDIR(ddv->sdev_attrvp, bks_name, ddv->sdev_attrvp, kcred, NULL, 0); } /* do not propagate the backing store errors */ if (error) { sdcmn_err9(("sdev_cleandir: backing store" "not cleaned\n")); error = 0; } bkstore = 0; kmem_free(bks_name, len); bks_name = NULL; len = 0; } ddv->sdev_flags |= SDEV_BUILD; SDEV_RELE(dv); } ddv->sdev_flags |= SDEV_BUILD; rw_exit(&ddv->sdev_contents); if (busy) { error = EBUSY; } return (error); } /* * a convenient wrapper for readdir() funcs */ size_t add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off) { size_t reclen = DIRENT64_RECLEN(strlen(nm)); if (reclen > size) return (0); de->d_ino = (ino64_t)ino; de->d_off = (off64_t)off + 1; de->d_reclen = (ushort_t)reclen; (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen)); return (reclen); } /* * sdev_mount service routines */ int sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args) { int error; if (uap->datalen != sizeof (*args)) return (EINVAL); if (error = copyin(uap->dataptr, args, sizeof (*args))) { cmn_err(CE_WARN, "sdev_copyin_mountargs: can not" "get user data. error %d\n", error); return (EFAULT); } return (0); } #ifdef nextdp #undef nextdp #endif #define nextdp(dp) ((struct dirent64 *) \ (intptr_t)((char *)(dp) + (dp)->d_reclen)) /* * readdir helper func */ int devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp, int flags) { struct sdev_node *ddv = VTOSDEV(vp); struct sdev_node *dv; dirent64_t *dp; ulong_t outcount = 0; size_t namelen; ulong_t alloc_count; void *outbuf; struct iovec *iovp; int error = 0; size_t reclen; offset_t diroff; offset_t soff; int this_reclen; int (*vtor)(struct sdev_node *) = NULL; struct vattr attr; timestruc_t now; ASSERT(ddv->sdev_attr || ddv->sdev_attrvp); ASSERT(RW_READ_HELD(&ddv->sdev_contents)); if (uiop->uio_loffset >= MAXOFF_T) { if (eofp) *eofp = 1; return (0); } if (uiop->uio_iovcnt != 1) return (EINVAL); if (vp->v_type != VDIR) return (ENOTDIR); if (ddv->sdev_flags & SDEV_VTOR) { vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); ASSERT(vtor); } if (eofp != NULL) *eofp = 0; soff = uiop->uio_loffset; iovp = uiop->uio_iov; alloc_count = iovp->iov_len; dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP); outcount = 0; if (ddv->sdev_state == SDEV_ZOMBIE) goto get_cache; if (SDEV_IS_GLOBAL(ddv)) { if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) && !sdev_reconfig_boot && (flags & SDEV_BROWSE) && !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) && ((moddebug & MODDEBUG_FINI_EBUSY) == 0) && !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) && !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && !sdev_reconfig_disable) { /* * invoking "devfsadm" to do system device reconfig */ mutex_enter(&ddv->sdev_lookup_lock); SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING)); mutex_exit(&ddv->sdev_lookup_lock); sdcmn_err8(("readdir of %s by %s: reconfig\n", ddv->sdev_path, curproc->p_user.u_comm)); if (sdev_reconfig_verbose) { cmn_err(CE_CONT, "?readdir of %s by %s: reconfig\n", ddv->sdev_path, curproc->p_user.u_comm); } sdev_devfsadmd_thread(ddv, NULL, kcred); } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { /* * compensate the "ls" started later than "devfsadm" */ mutex_enter(&ddv->sdev_lookup_lock); SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING)); mutex_exit(&ddv->sdev_lookup_lock); } /* * release the contents lock so that * the cache may be updated by devfsadmd */ rw_exit(&ddv->sdev_contents); mutex_enter(&ddv->sdev_lookup_lock); if (SDEV_IS_READDIR(ddv)) (void) sdev_wait4lookup(ddv, SDEV_READDIR); mutex_exit(&ddv->sdev_lookup_lock); rw_enter(&ddv->sdev_contents, RW_READER); sdcmn_err4(("readdir of directory %s by %s\n", ddv->sdev_name, curproc->p_user.u_comm)); if (ddv->sdev_flags & SDEV_BUILD) { if (SDEV_IS_PERSIST(ddv)) { error = sdev_filldir_from_store(ddv, alloc_count, cred); } ddv->sdev_flags &= ~SDEV_BUILD; } } get_cache: /* handle "." and ".." */ diroff = 0; if (soff == 0) { /* first time */ this_reclen = DIRENT64_RECLEN(1); if (alloc_count < this_reclen) { error = EINVAL; goto done; } dp->d_ino = (ino64_t)ddv->sdev_ino; dp->d_off = (off64_t)1; dp->d_reclen = (ushort_t)this_reclen; (void) strncpy(dp->d_name, ".", DIRENT64_NAMELEN(this_reclen)); outcount += dp->d_reclen; dp = nextdp(dp); } diroff++; if (soff <= 1) { this_reclen = DIRENT64_RECLEN(2); if (alloc_count < outcount + this_reclen) { error = EINVAL; goto done; } dp->d_reclen = (ushort_t)this_reclen; dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino; dp->d_off = (off64_t)2; (void) strncpy(dp->d_name, "..", DIRENT64_NAMELEN(this_reclen)); outcount += dp->d_reclen; dp = nextdp(dp); } /* gets the cache */ diroff++; for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) { sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n", diroff, soff, dv->sdev_name)); /* bypassing pre-matured nodes */ if (diroff < soff || (dv->sdev_state != SDEV_READY)) { sdcmn_err3(("sdev_readdir: pre-mature node " "%s %d\n", dv->sdev_name, dv->sdev_state)); continue; } /* * Check validity of node * Drop invalid and nodes to be skipped. * A node the validator indicates as stale needs * to be returned as presumably the node name itself * is valid and the node data itself will be refreshed * on lookup. An application performing a readdir then * stat on each entry should thus always see consistent * data. In any case, it is not possible to synchronize * with dynamic kernel state, and any view we return can * never be anything more than a snapshot at a point in time. */ if (vtor) { switch (vtor(dv)) { case SDEV_VTOR_VALID: break; case SDEV_VTOR_INVALID: case SDEV_VTOR_SKIP: continue; case SDEV_VTOR_STALE: sdcmn_err3(("sdev_readir: %s stale\n", dv->sdev_name)); break; default: cmn_err(CE_PANIC, "dev fs: validator failed: %s(%p)\n", dv->sdev_name, (void *)dv); break; /*NOTREACHED*/ } } namelen = strlen(dv->sdev_name); reclen = DIRENT64_RECLEN(namelen); if (outcount + reclen > alloc_count) { goto full; } dp->d_reclen = (ushort_t)reclen; dp->d_ino = (ino64_t)dv->sdev_ino; dp->d_off = (off64_t)diroff + 1; (void) strncpy(dp->d_name, dv->sdev_name, DIRENT64_NAMELEN(reclen)); outcount += reclen; dp = nextdp(dp); } full: sdcmn_err4(("sdev_readdir: moving %lu bytes: " "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff, (void *)dv)); if (outcount) error = uiomove(outbuf, outcount, UIO_READ, uiop); if (!error) { uiop->uio_loffset = diroff; if (eofp) *eofp = dv ? 0 : 1; } if (ddv->sdev_attrvp) { gethrestime(&now); attr.va_ctime = now; attr.va_atime = now; attr.va_mask = AT_CTIME|AT_ATIME; (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL); } done: kmem_free(outbuf, alloc_count); return (error); } static int sdev_modctl_lookup(const char *path, vnode_t **r_vp) { vnode_t *vp; vnode_t *cvp; struct sdev_node *svp; char *nm; struct pathname pn; int error; int persisted = 0; ASSERT(INGLOBALZONE(curproc)); if (error = pn_get((char *)path, UIO_SYSSPACE, &pn)) return (error); nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); vp = rootdir; VN_HOLD(vp); while (pn_pathleft(&pn)) { ASSERT(vp->v_type == VDIR || vp->v_type == VLNK); (void) pn_getcomponent(&pn, nm); /* * Deal with the .. special case where we may be * traversing up across a mount point, to the * root of this filesystem or global root. */ if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) { checkforroot: if (VN_CMP(vp, rootdir)) { nm[1] = 0; } else if (vp->v_flag & VROOT) { vfs_t *vfsp; cvp = vp; vfsp = cvp->v_vfsp; vfs_rlock_wait(vfsp); vp = cvp->v_vfsp->vfs_vnodecovered; if (vp == NULL || (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { vfs_unlock(vfsp); VN_RELE(cvp); error = EIO; break; } VN_HOLD(vp); vfs_unlock(vfsp); VN_RELE(cvp); cvp = NULL; goto checkforroot; } } error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL, NULL, NULL); if (error) { VN_RELE(vp); break; } /* traverse mount points encountered on our journey */ if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) { VN_RELE(vp); VN_RELE(cvp); break; } /* * symbolic link, can be either relative and absolute */ if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) { struct pathname linkpath; pn_alloc(&linkpath); if (error = pn_getsymlink(cvp, &linkpath, kcred)) { pn_free(&linkpath); break; } if (pn_pathleft(&linkpath) == 0) (void) pn_set(&linkpath, "."); error = pn_insert(&pn, &linkpath, strlen(nm)); pn_free(&linkpath); if (pn.pn_pathlen == 0) { VN_RELE(vp); return (ENOENT); } if (pn.pn_path[0] == '/') { pn_skipslash(&pn); VN_RELE(vp); VN_RELE(cvp); vp = rootdir; VN_HOLD(vp); } else { VN_RELE(cvp); } continue; } VN_RELE(vp); /* * Direct the operation to the persisting filesystem * underlying /dev. Bail if we encounter a * non-persistent dev entity here. */ if (cvp->v_vfsp->vfs_fstype == devtype) { if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) { error = ENOENT; VN_RELE(cvp); break; } if (VTOSDEV(cvp) == NULL) { error = ENOENT; VN_RELE(cvp); break; } svp = VTOSDEV(cvp); if ((vp = svp->sdev_attrvp) == NULL) { error = ENOENT; VN_RELE(cvp); break; } persisted = 1; VN_HOLD(vp); VN_RELE(cvp); cvp = vp; } vp = cvp; pn_skipslash(&pn); } kmem_free(nm, MAXNAMELEN); pn_free(&pn); if (error) return (error); /* * Only return persisted nodes in the filesystem underlying /dev. */ if (!persisted) { VN_RELE(vp); return (ENOENT); } *r_vp = vp; return (0); } int sdev_modctl_readdir(const char *dir, char ***dirlistp, int *npathsp, int *npathsp_alloc, int checking_empty) { char **pathlist = NULL; char **newlist = NULL; int npaths = 0; int npaths_alloc = 0; dirent64_t *dbuf = NULL; int n; char *s; int error; vnode_t *vp; int eof; struct iovec iov; struct uio uio; struct dirent64 *dp; size_t dlen; size_t dbuflen; int ndirents = 64; char *nm; error = sdev_modctl_lookup(dir, &vp); sdcmn_err11(("modctl readdir: %s by %s: %s\n", dir, curproc->p_user.u_comm, (error == 0) ? "ok" : "failed")); if (error) return (error); dlen = ndirents * (sizeof (*dbuf)); dbuf = kmem_alloc(dlen, KM_SLEEP); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_SYSSPACE; uio.uio_fmode = 0; uio.uio_extflg = UIO_COPY_CACHED; uio.uio_loffset = 0; uio.uio_llimit = MAXOFFSET_T; eof = 0; error = 0; while (!error && !eof) { uio.uio_resid = dlen; iov.iov_base = (char *)dbuf; iov.iov_len = dlen; (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0); VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); dbuflen = dlen - uio.uio_resid; if (error || dbuflen == 0) break; for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { nm = dp->d_name; if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) continue; if (npaths == npaths_alloc) { npaths_alloc += 64; newlist = (char **) kmem_zalloc((npaths_alloc + 1) * sizeof (char *), KM_SLEEP); if (pathlist) { bcopy(pathlist, newlist, npaths * sizeof (char *)); kmem_free(pathlist, (npaths + 1) * sizeof (char *)); } pathlist = newlist; } n = strlen(nm) + 1; s = kmem_alloc(n, KM_SLEEP); bcopy(nm, s, n); pathlist[npaths++] = s; sdcmn_err11((" %s/%s\n", dir, s)); /* if checking empty, one entry is as good as many */ if (checking_empty) { eof = 1; break; } } } exit: VN_RELE(vp); if (dbuf) kmem_free(dbuf, dlen); if (error) return (error); *dirlistp = pathlist; *npathsp = npaths; *npathsp_alloc = npaths_alloc; return (0); } void sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc) { int i, n; for (i = 0; i < npaths; i++) { n = strlen(pathlist[i]) + 1; kmem_free(pathlist[i], n); } kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *)); } int sdev_modctl_devexists(const char *path) { vnode_t *vp; int error; error = sdev_modctl_lookup(path, &vp); sdcmn_err11(("modctl dev exists: %s by %s: %s\n", path, curproc->p_user.u_comm, (error == 0) ? "ok" : "failed")); if (error == 0) VN_RELE(vp); return (error); } extern int sdev_vnodeops_tbl_size; /* * construct a new template with overrides from vtab */ static fs_operation_def_t * sdev_merge_vtab(const fs_operation_def_t tab[]) { fs_operation_def_t *new; const fs_operation_def_t *tab_entry; /* make a copy of standard vnode ops table */ new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP); bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size); /* replace the overrides from tab */ for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) { fs_operation_def_t *std_entry = new; while (std_entry->name) { if (strcmp(tab_entry->name, std_entry->name) == 0) { std_entry->func = tab_entry->func; break; } std_entry++; } if (std_entry->name == NULL) cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.", tab_entry->name); } return (new); } /* free memory allocated by sdev_merge_vtab */ static void sdev_free_vtab(fs_operation_def_t *new) { kmem_free(new, sdev_vnodeops_tbl_size); } /* * a generic setattr() function * * note: flags only supports AT_UID and AT_GID. * Future enhancements can be done for other types, e.g. AT_MODE */ int devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags, struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *, int), int protocol) { struct sdev_node *dv = VTOSDEV(vp); struct sdev_node *parent = dv->sdev_dotdot; struct vattr *get; uint_t mask = vap->va_mask; int error; /* some sanity checks */ if (vap->va_mask & AT_NOSET) return (EINVAL); if (vap->va_mask & AT_SIZE) { if (vp->v_type == VDIR) { return (EISDIR); } } /* no need to set attribute, but do not fail either */ ASSERT(parent); rw_enter(&parent->sdev_contents, RW_READER); if (dv->sdev_state == SDEV_ZOMBIE) { rw_exit(&parent->sdev_contents); return (0); } /* If backing store exists, just set it. */ if (dv->sdev_attrvp) { rw_exit(&parent->sdev_contents); return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); } /* * Otherwise, for nodes with the persistence attribute, create it. */ ASSERT(dv->sdev_attr); if (SDEV_IS_PERSIST(dv) || ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) { sdev_vattr_merge(dv, vap); rw_enter(&dv->sdev_contents, RW_WRITER); error = sdev_shadow_node(dv, cred); rw_exit(&dv->sdev_contents); rw_exit(&parent->sdev_contents); if (error) return (error); return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); } /* * sdev_attr was allocated in sdev_mknode */ rw_enter(&dv->sdev_contents, RW_WRITER); error = secpolicy_vnode_setattr(cred, vp, vap, dv->sdev_attr, flags, sdev_unlocked_access, dv); if (error) { rw_exit(&dv->sdev_contents); rw_exit(&parent->sdev_contents); return (error); } get = dv->sdev_attr; if (mask & AT_MODE) { get->va_mode &= S_IFMT; get->va_mode |= vap->va_mode & ~S_IFMT; } if ((mask & AT_UID) || (mask & AT_GID)) { if (mask & AT_UID) get->va_uid = vap->va_uid; if (mask & AT_GID) get->va_gid = vap->va_gid; /* * a callback must be provided if the protocol is set */ if ((protocol & AT_UID) || (protocol & AT_GID)) { ASSERT(callback); error = callback(dv, get, protocol); if (error) { rw_exit(&dv->sdev_contents); rw_exit(&parent->sdev_contents); return (error); } } } if (mask & AT_ATIME) get->va_atime = vap->va_atime; if (mask & AT_MTIME) get->va_mtime = vap->va_mtime; if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) { gethrestime(&get->va_ctime); } sdev_vattr_merge(dv, get); rw_exit(&dv->sdev_contents); rw_exit(&parent->sdev_contents); return (0); } /* * a generic inactive() function */ /*ARGSUSED*/ void devname_inactive_func(struct vnode *vp, struct cred *cred, void (*callback)(struct vnode *)) { int clean; struct sdev_node *dv = VTOSDEV(vp); int state; mutex_enter(&vp->v_lock); ASSERT(vp->v_count >= 1); if (vp->v_count == 1 && callback != NULL) callback(vp); rw_enter(&dv->sdev_contents, RW_WRITER); state = dv->sdev_state; clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); /* * sdev is a rather bad public citizen. It violates the general * agreement that in memory nodes should always have a valid reference * count on their vnode. But that's not the case here. This means that * we do actually have to distinguish between getting inactive callbacks * for zombies and otherwise. This should probably be fixed. */ if (clean) { /* Remove the . entry to ourselves */ if (vp->v_type == VDIR) { decr_link(dv); } VERIFY(dv->sdev_nlink == 1); decr_link(dv); --vp->v_count; rw_exit(&dv->sdev_contents); mutex_exit(&vp->v_lock); sdev_nodedestroy(dv, 0); } else { --vp->v_count; rw_exit(&dv->sdev_contents); mutex_exit(&vp->v_lock); } }