/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright 2013, 2016 Joyent, Inc. All rights reserved. * Copyright (c) 2014 by Delphix. All rights reserved. */ /* vnode ops for the /dev/zvol directory */ #include #include #include #include #include #include #include #include #include #include #include #include #include struct vnodeops *devzvol_vnodeops; static major_t devzvol_major; static taskq_ent_t devzvol_zclist_task; static kmutex_t devzvol_mtx; /* Below are protected by devzvol_mtx */ static boolean_t devzvol_isopen; static boolean_t devzvol_zclist_task_running = B_FALSE; static uint64_t devzvol_gen = 0; static uint64_t devzvol_zclist; static size_t devzvol_zclist_size; static ldi_ident_t devzvol_li; static ldi_handle_t devzvol_lh; /* * we need to use ddi_mod* since fs/dev gets loaded early on in * startup(), and linking fs/dev to fs/zfs would drag in a lot of * other stuff (like drv/random) before the rest of the system is * ready to go */ ddi_modhandle_t zfs_mod; int (*szcm)(char *); int (*szn2m)(char *, minor_t *); /* * Enable/disable snapshots from being created in /dev/zvol. By default, * they are enabled, preserving the historic behavior. */ boolean_t devzvol_snaps_allowed = B_TRUE; int sdev_zvol_create_minor(char *dsname) { if (szcm == NULL) return (-1); return ((*szcm)(dsname)); } int sdev_zvol_name2minor(char *dsname, minor_t *minor) { if (szn2m == NULL) return (-1); return ((*szn2m)(dsname, minor)); } int devzvol_open_zfs() { int rc; dev_t dv; devzvol_li = ldi_ident_from_anon(); if (ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred, &devzvol_lh, devzvol_li)) return (-1); if (zfs_mod == NULL && ((zfs_mod = ddi_modopen("fs/zfs", KRTLD_MODE_FIRST, &rc)) == NULL)) { return (rc); } ASSERT(szcm == NULL && szn2m == NULL); if ((szcm = (int (*)(char *)) ddi_modsym(zfs_mod, "zvol_create_minor", &rc)) == NULL) { cmn_err(CE_WARN, "couldn't resolve zvol_create_minor"); return (rc); } if ((szn2m = (int(*)(char *, minor_t *)) ddi_modsym(zfs_mod, "zvol_name2minor", &rc)) == NULL) { cmn_err(CE_WARN, "couldn't resolve zvol_name2minor"); return (rc); } if (ldi_get_dev(devzvol_lh, &dv)) return (-1); devzvol_major = getmajor(dv); return (0); } void devzvol_close_zfs() { szcm = NULL; szn2m = NULL; (void) ldi_close(devzvol_lh, FREAD|FWRITE, kcred); ldi_ident_release(devzvol_li); if (zfs_mod != NULL) { (void) ddi_modclose(zfs_mod); zfs_mod = NULL; } } int devzvol_handle_ioctl(int cmd, zfs_cmd_t *zc, size_t *alloc_size) { uint64_t cookie; int size = 8000; int unused; int rc; if (cmd != ZFS_IOC_POOL_CONFIGS) mutex_enter(&devzvol_mtx); if (!devzvol_isopen) { if ((rc = devzvol_open_zfs()) == 0) { devzvol_isopen = B_TRUE; } else { if (cmd != ZFS_IOC_POOL_CONFIGS) mutex_exit(&devzvol_mtx); return (ENXIO); } } cookie = zc->zc_cookie; again: zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(size, KM_SLEEP); zc->zc_nvlist_dst_size = size; rc = ldi_ioctl(devzvol_lh, cmd, (intptr_t)zc, FKIOCTL, kcred, &unused); if (rc == ENOMEM) { int newsize; newsize = zc->zc_nvlist_dst_size; ASSERT(newsize > size); kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size); size = newsize; zc->zc_cookie = cookie; goto again; } if (alloc_size == NULL) kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size); else *alloc_size = size; if (cmd != ZFS_IOC_POOL_CONFIGS) mutex_exit(&devzvol_mtx); return (rc); } /* figures out if the objset exists and returns its type */ int devzvol_objset_check(char *dsname, dmu_objset_type_t *type) { boolean_t ispool, is_snapshot; zfs_cmd_t *zc; int rc; nvlist_t *nvl; size_t nvsz; ispool = (strchr(dsname, '/') == NULL); is_snapshot = (strchr(dsname, '@') != NULL); if (is_snapshot && !devzvol_snaps_allowed) return (ENOTSUP); zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); (void) strlcpy(zc->zc_name, dsname, MAXPATHLEN); nvl = fnvlist_alloc(); fnvlist_add_boolean_value(nvl, "cachedpropsonly", B_TRUE); zc->zc_nvlist_src = (uintptr_t)fnvlist_pack(nvl, &nvsz); zc->zc_nvlist_src_size = nvsz; fnvlist_free(nvl); rc = devzvol_handle_ioctl(ispool ? ZFS_IOC_POOL_STATS : ZFS_IOC_OBJSET_STATS, zc, NULL); if (type && rc == 0) *type = (ispool) ? DMU_OST_ZFS : zc->zc_objset_stats.dds_type; fnvlist_pack_free((char *)(uintptr_t)zc->zc_nvlist_src, nvsz); kmem_free(zc, sizeof (zfs_cmd_t)); return (rc); } /* * Returns what the zfs dataset name should be, given the /dev/zvol * path and an optional name (can be NULL). * * Note that if the name param is NULL, then path must be an * actual dataset's directory and not one of the top-level * /dev/zvol/{dsk,rdsk} dirs, as these do not correspond to a * specific dataset. */ char * devzvol_make_dsname(const char *path, const char *name) { char *dsname; const char *ptr; int dslen; if (strcmp(path, ZVOL_DIR) == 0) return (NULL); if (name && (strcmp(name, ".") == 0 || strcmp(name, "..") == 0)) return (NULL); ptr = path + strlen(ZVOL_DIR); if (strncmp(ptr, "/dsk", 4) == 0) ptr += strlen("/dsk"); else if (strncmp(ptr, "/rdsk", 5) == 0) ptr += strlen("/rdsk"); else return (NULL); if (*ptr == '/') ptr++; else if (name == NULL) return (NULL); dslen = strlen(ptr); if (dslen) dslen++; /* plus null */ if (name) dslen += strlen(name) + 1; /* plus slash */ dsname = kmem_zalloc(dslen, KM_SLEEP); if (*ptr) { (void) strlcpy(dsname, ptr, dslen); if (name) (void) strlcat(dsname, "/", dslen); } if (name) (void) strlcat(dsname, name, dslen); return (dsname); } /* * check if the zvol's sdev_node is still valid, which means make * sure the zvol is still valid. zvol minors aren't proactively * destroyed when the zvol is destroyed, so we use a validator to clean * these up (in other words, when such nodes are encountered during * subsequent lookup() and readdir() operations) so that only valid * nodes are returned. The ordering between devname_lookup_func and * devzvol_validate is a little inefficient in the case of invalid * or stale nodes because devname_lookup_func calls * devzvol_create_{dir, link}, then the validator says it's invalid, * and then the node gets cleaned up. */ int devzvol_validate(struct sdev_node *dv) { vnode_t *vn = SDEVTOV(dv); dmu_objset_type_t do_type; char *dsname; char *nm = dv->sdev_name; int rc; sdcmn_err13(("validating ('%s' '%s')", dv->sdev_path, nm)); /* * validate only READY nodes; if someone is sitting on the * directory of a dataset that just got destroyed we could * get a zombie node which we just skip. */ if (dv->sdev_state != SDEV_READY) { sdcmn_err13(("skipping '%s'", nm)); return (SDEV_VTOR_SKIP); } if ((strcmp(dv->sdev_path, ZVOL_DIR "/dsk") == 0) || (strcmp(dv->sdev_path, ZVOL_DIR "/rdsk") == 0)) return (SDEV_VTOR_VALID); dsname = devzvol_make_dsname(dv->sdev_path, NULL); if (dsname == NULL) return (SDEV_VTOR_INVALID); /* * Leave any nodes alone that have been explicitly created by * sdev profiles. */ if (!(dv->sdev_flags & SDEV_GLOBAL) && dv->sdev_origin != NULL) { kmem_free(dsname, strlen(dsname) + 1); return (SDEV_VTOR_VALID); } rc = devzvol_objset_check(dsname, &do_type); sdcmn_err13((" '%s' rc %d", dsname, rc)); if (rc != 0) { sdev_node_t *parent = dv->sdev_dotdot; /* * Explicitly passed-through zvols in our sdev profile can't * be created as prof_* shadow nodes, because in the GZ they * are symlinks, but in the NGZ they are actual device files. * * The objset_check will fail on these as they are outside * any delegated dataset (zfs will not allow ioctl access to * them from this zone). We still want them to work, though. */ if (!(parent->sdev_flags & SDEV_GLOBAL) && parent->sdev_origin != NULL && !(dv->sdev_flags & SDEV_GLOBAL) && (vn->v_type == VBLK || vn->v_type == VCHR) && prof_name_matched(nm, parent)) { do_type = DMU_OST_ZVOL; } else { kmem_free(dsname, strlen(dsname) + 1); return (SDEV_VTOR_INVALID); } } sdcmn_err13((" v_type %d do_type %d", vn->v_type, do_type)); if ((vn->v_type == VLNK && do_type != DMU_OST_ZVOL) || ((vn->v_type == VBLK || vn->v_type == VCHR) && do_type != DMU_OST_ZVOL) || (vn->v_type == VDIR && do_type == DMU_OST_ZVOL)) { kmem_free(dsname, strlen(dsname) + 1); return (SDEV_VTOR_STALE); } if (vn->v_type == VLNK) { char *ptr, *link; long val = 0; minor_t lminor, ominor; rc = sdev_getlink(vn, &link); ASSERT(rc == 0); ptr = strrchr(link, ':') + 1; rc = ddi_strtol(ptr, NULL, 10, &val); kmem_free(link, strlen(link) + 1); ASSERT(rc == 0 && val != 0); lminor = (minor_t)val; if (sdev_zvol_name2minor(dsname, &ominor) < 0 || ominor != lminor) { kmem_free(dsname, strlen(dsname) + 1); return (SDEV_VTOR_STALE); } } kmem_free(dsname, strlen(dsname) + 1); return (SDEV_VTOR_VALID); } /* * Taskq callback to update the devzvol_zclist. * * We need to defer this to the taskq to avoid it running with a user * context that might be associated with some non-global zone, and thus * not being able to list all of the pools on the entire system. */ /*ARGSUSED*/ static void devzvol_update_zclist_cb(void *arg) { zfs_cmd_t *zc; int rc; size_t size; zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); mutex_enter(&devzvol_mtx); zc->zc_cookie = devzvol_gen; rc = devzvol_handle_ioctl(ZFS_IOC_POOL_CONFIGS, zc, &size); switch (rc) { case 0: /* new generation */ ASSERT(devzvol_gen != zc->zc_cookie); devzvol_gen = zc->zc_cookie; if (devzvol_zclist) kmem_free((void *)(uintptr_t)devzvol_zclist, devzvol_zclist_size); devzvol_zclist = zc->zc_nvlist_dst; /* Keep the alloc'd size, not the nvlist size. */ devzvol_zclist_size = size; break; default: /* * Either there was no change in pool configuration * since we last asked (rc == EEXIST) or we got a * catastrophic error. * * Give up memory and exit. */ kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size); break; } VERIFY(devzvol_zclist_task_running == B_TRUE); devzvol_zclist_task_running = B_FALSE; mutex_exit(&devzvol_mtx); kmem_free(zc, sizeof (zfs_cmd_t)); } static void devzvol_update_zclist(void) { mutex_enter(&devzvol_mtx); if (devzvol_zclist_task_running == B_TRUE) { mutex_exit(&devzvol_mtx); goto wait; } devzvol_zclist_task_running = B_TRUE; taskq_dispatch_ent(sdev_taskq, devzvol_update_zclist_cb, NULL, 0, &devzvol_zclist_task); mutex_exit(&devzvol_mtx); wait: taskq_wait(sdev_taskq); } /* * Creates sub-directories for each zpool as needed in response to a * readdir on one of the /dev/zvol/{dsk,rdsk} directories. */ void devzvol_create_pool_dirs(struct vnode *dvp) { nvlist_t *nv = NULL; nvpair_t *elem = NULL; int pools = 0; int rc; sdcmn_err13(("devzvol_create_pool_dirs")); devzvol_update_zclist(); mutex_enter(&devzvol_mtx); rc = nvlist_unpack((char *)(uintptr_t)devzvol_zclist, devzvol_zclist_size, &nv, 0); if (rc) { ASSERT(rc == 0); kmem_free((void *)(uintptr_t)devzvol_zclist, devzvol_zclist_size); devzvol_gen = 0; devzvol_zclist = NULL; devzvol_zclist_size = 0; goto out; } mutex_exit(&devzvol_mtx); while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) { struct vnode *vp; ASSERT(dvp->v_count > 0); rc = VOP_LOOKUP(dvp, nvpair_name(elem), &vp, NULL, 0, NULL, kcred, NULL, 0, NULL); /* should either work, or not be visible from a zone */ ASSERT(rc == 0 || rc == ENOENT); if (rc == 0) VN_RELE(vp); pools++; } nvlist_free(nv); mutex_enter(&devzvol_mtx); if (devzvol_isopen && pools == 0) { /* clean up so zfs can be unloaded */ devzvol_close_zfs(); devzvol_isopen = B_FALSE; } out: mutex_exit(&devzvol_mtx); } /*ARGSUSED3*/ static int devzvol_create_dir(struct sdev_node *ddv, char *nm, void **arg, cred_t *cred, void *whatever, char *whichever) { timestruc_t now; struct vattr *vap = (struct vattr *)arg; sdcmn_err13(("create_dir (%s) (%s) '%s'", ddv->sdev_name, ddv->sdev_path, nm)); ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR, strlen(ZVOL_DIR)) == 0); *vap = *sdev_getdefault_attr(VDIR); gethrestime(&now); vap->va_atime = now; vap->va_mtime = now; vap->va_ctime = now; return (0); } /*ARGSUSED3*/ static int devzvol_create_link(struct sdev_node *ddv, char *nm, void **arg, cred_t *cred, void *whatever, char *whichever) { minor_t minor; char *pathname = (char *)*arg; int rc; char *dsname; char *x; char str[MAXNAMELEN]; sdcmn_err13(("create_link (%s) (%s) '%s'", ddv->sdev_name, ddv->sdev_path, nm)); dsname = devzvol_make_dsname(ddv->sdev_path, nm); rc = sdev_zvol_create_minor(dsname); if ((rc != 0 && rc != EEXIST && rc != EBUSY) || sdev_zvol_name2minor(dsname, &minor)) { sdcmn_err13(("devzvol_create_link %d", rc)); kmem_free(dsname, strlen(dsname) + 1); return (-1); } kmem_free(dsname, strlen(dsname) + 1); /* * This is a valid zvol; create a symlink that points to the * minor which was created under /devices/pseudo/zfs@0 */ *pathname = '\0'; for (x = ddv->sdev_path; x = strchr(x, '/'); x++) (void) strcat(pathname, "../"); (void) snprintf(str, sizeof (str), ZVOL_PSEUDO_DEV "%u", minor); (void) strncat(pathname, str, MAXPATHLEN); if (strncmp(ddv->sdev_path, ZVOL_FULL_RDEV_DIR, strlen(ZVOL_FULL_RDEV_DIR)) == 0) (void) strcat(pathname, ",raw"); return (0); } /* Clean zvol sdev_nodes that are no longer valid. */ static void devzvol_prunedir(struct sdev_node *ddv) { struct sdev_node *dv; ASSERT(RW_READ_HELD(&ddv->sdev_contents)); sdcmn_err13(("prunedir '%s'", ddv->sdev_name)); ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR, strlen(ZVOL_DIR)) == 0); if (rw_tryupgrade(&ddv->sdev_contents) == 0) { rw_exit(&ddv->sdev_contents); rw_enter(&ddv->sdev_contents, RW_WRITER); } dv = SDEV_FIRST_ENTRY(ddv); while (dv) { sdcmn_err13(("sdev_name '%s'", dv->sdev_name)); switch (devzvol_validate(dv)) { case SDEV_VTOR_VALID: case SDEV_VTOR_SKIP: dv = SDEV_NEXT_ENTRY(ddv, dv); continue; case SDEV_VTOR_INVALID: sdcmn_err7(("prunedir: destroy invalid " "node: %s\n", dv->sdev_name)); break; } if ((SDEVTOV(dv)->v_type == VDIR) && (sdev_cleandir(dv, NULL, 0) != 0)) { dv = SDEV_NEXT_ENTRY(ddv, dv); continue; } SDEV_HOLD(dv); /* remove the cache node */ sdev_cache_update(ddv, &dv, dv->sdev_name, SDEV_CACHE_DELETE); SDEV_RELE(dv); dv = SDEV_FIRST_ENTRY(ddv); } rw_downgrade(&ddv->sdev_contents); } /* * This function is used to create a dir or dev inside a zone's /dev when the * zone has a zvol that is dynamically created within the zone (i.e. inside * of a delegated dataset. Since there is no /devices tree within a zone, * we create the chr/blk devices directly inside the zone's /dev instead of * making symlinks. */ static int devzvol_mk_ngz_node(struct sdev_node *parent, char *nm) { struct vattr vattr; timestruc_t now; enum vtype expected_type = VDIR; dmu_objset_type_t do_type; struct sdev_node *dv = NULL; int res; char *dsname; bzero(&vattr, sizeof (vattr)); gethrestime(&now); vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; vattr.va_uid = SDEV_UID_DEFAULT; vattr.va_gid = SDEV_GID_DEFAULT; vattr.va_type = VNON; vattr.va_atime = now; vattr.va_mtime = now; vattr.va_ctime = now; if ((dsname = devzvol_make_dsname(parent->sdev_path, nm)) == NULL) return (ENOENT); if (devzvol_objset_check(dsname, &do_type) != 0) { /* * objset_check will succeed on any valid objset in the global * zone, and any valid delegated dataset. It will fail, however, * in non-global zones on explicitly whitelisted zvol devices * that are outside any delegated dataset. * * The directories leading up to the zvol device itself will be * created by prof for us in advance (and will always validate * because of the matching check in devzvol_validate). The zvol * device itself can't be created by prof though because in the * GZ it's a symlink, and in the NGZ it is not. So, we create * such zvol device files here. */ if (!(parent->sdev_flags & SDEV_GLOBAL) && parent->sdev_origin != NULL && prof_name_matched(nm, parent)) { do_type = DMU_OST_ZVOL; } else { kmem_free(dsname, strlen(dsname) + 1); return (ENOENT); } } if (do_type == DMU_OST_ZVOL) expected_type = VBLK; if (expected_type == VDIR) { vattr.va_type = VDIR; vattr.va_mode = SDEV_DIRMODE_DEFAULT; } else { minor_t minor; dev_t devnum; int rc; rc = sdev_zvol_create_minor(dsname); if ((rc != 0 && rc != EEXIST && rc != EBUSY) || sdev_zvol_name2minor(dsname, &minor)) { kmem_free(dsname, strlen(dsname) + 1); return (ENOENT); } devnum = makedevice(devzvol_major, minor); vattr.va_rdev = devnum; if (strstr(parent->sdev_path, "/rdsk/") != NULL) vattr.va_type = VCHR; else vattr.va_type = VBLK; vattr.va_mode = SDEV_DEVMODE_DEFAULT; } kmem_free(dsname, strlen(dsname) + 1); rw_enter(&parent->sdev_contents, RW_WRITER); res = sdev_mknode(parent, nm, &dv, &vattr, NULL, NULL, kcred, SDEV_READY); rw_exit(&parent->sdev_contents); if (res != 0) return (ENOENT); SDEV_RELE(dv); return (0); } /*ARGSUSED*/ static int devzvol_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, caller_context_t *ct, int *direntflags, pathname_t *realpnp) { enum vtype expected_type = VDIR; struct sdev_node *parent = VTOSDEV(dvp); char *dsname; dmu_objset_type_t do_type; int error; sdcmn_err13(("devzvol_lookup '%s' '%s'", parent->sdev_path, nm)); *vpp = NULL; /* execute access is required to search the directory */ if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) return (error); rw_enter(&parent->sdev_contents, RW_READER); if (!SDEV_IS_GLOBAL(parent)) { int res; rw_exit(&parent->sdev_contents); /* * If we're in the global zone and reach down into a non-global * zone's /dev/zvol then this action could trigger the creation * of all of the zvol devices for every zone into the non-global * zone's /dev tree. This could be a big security hole. To * prevent this, disallow the global zone from looking inside * a non-global zones /dev/zvol. This behavior is similar to * delegated datasets, which cannot be used by the global zone. */ if (getzoneid() == GLOBAL_ZONEID) return (EPERM); res = prof_lookup(dvp, nm, vpp, cred); /* * We won't find a zvol that was dynamically created inside * a NGZ, within a delegated dataset, in the zone's dev profile * but prof_lookup will also find it via sdev_cache_lookup. */ if (res == ENOENT) { /* * We have to create the sdev node for the dymamically * created zvol. */ if (devzvol_mk_ngz_node(parent, nm) != 0) return (ENOENT); res = prof_lookup(dvp, nm, vpp, cred); } return (res); } /* * Don't let the global-zone style lookup succeed here when we're not * running in the global zone. This can happen because prof calls into * us (in prof_filldir) trying to create an explicitly passed-through * zvol device outside any delegated dataset. * * We have to stop this here or else we will create prof shadows of * the global zone symlink, which will make no sense at all in the * non-global zone (it has no /devices for the symlink to point at). * * These zvols will be created later (at access time) by mk_ngz_node * instead. The dirs leading up to them will be created by prof * internally. * * We have to return EPERM here, because ENOENT is given special * meaning by prof in this context. */ if (getzoneid() != GLOBAL_ZONEID) { rw_exit(&parent->sdev_contents); return (EPERM); } dsname = devzvol_make_dsname(parent->sdev_path, nm); rw_exit(&parent->sdev_contents); sdcmn_err13(("rvp dsname %s", dsname ? dsname : "(null)")); if (dsname) { error = devzvol_objset_check(dsname, &do_type); if (error != 0) { error = ENOENT; goto out; } if (do_type == DMU_OST_ZVOL) expected_type = VLNK; } /* * the callbacks expect: * * parent->sdev_path nm * /dev/zvol {r}dsk * /dev/zvol/{r}dsk * /dev/zvol/{r}dsk/ * * sdev_name is always last path component of sdev_path */ if (expected_type == VDIR) { error = devname_lookup_func(parent, nm, vpp, cred, devzvol_create_dir, SDEV_VATTR); } else { error = devname_lookup_func(parent, nm, vpp, cred, devzvol_create_link, SDEV_VLINK); } sdcmn_err13(("devzvol_lookup %d %d", expected_type, error)); ASSERT(error || ((*vpp)->v_type == expected_type)); out: if (dsname) kmem_free(dsname, strlen(dsname) + 1); sdcmn_err13(("devzvol_lookup %d", error)); return (error); } /* * We allow create to find existing nodes * - if the node doesn't exist - EROFS * - creating an existing dir read-only succeeds, otherwise EISDIR * - exclusive creates fail - EEXIST */ /*ARGSUSED2*/ static int devzvol_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, int mode, struct vnode **vpp, struct cred *cred, int flag, caller_context_t *ct, vsecattr_t *vsecp) { int error; struct vnode *vp; *vpp = NULL; error = devzvol_lookup(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); if (error == 0) { if (excl == EXCL) error = EEXIST; else if (vp->v_type == VDIR && (mode & VWRITE)) error = EISDIR; else error = VOP_ACCESS(vp, mode, 0, cred, ct); if (error) { VN_RELE(vp); } else *vpp = vp; } else if (error == ENOENT) { error = EROFS; } return (error); } void sdev_iter_snapshots(struct vnode *dvp, char *name); void sdev_iter_datasets(struct vnode *dvp, int arg, char *name) { zfs_cmd_t *zc; int rc; sdcmn_err13(("iter name is '%s' (arg %x)", name, arg)); zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); (void) strcpy(zc->zc_name, name); while ((rc = devzvol_handle_ioctl(arg, zc, B_FALSE)) == 0) { struct vnode *vpp; char *ptr; sdcmn_err13((" name %s", zc->zc_name)); if (strchr(zc->zc_name, '$') || strchr(zc->zc_name, '%')) goto skip; ptr = strrchr(zc->zc_name, '/') + 1; rc = devzvol_lookup(dvp, ptr, &vpp, NULL, 0, NULL, kcred, NULL, NULL, NULL); if (rc == 0) { VN_RELE(vpp); } else if (rc == ENOENT) { goto skip; } else { /* * EBUSY == problem with zvols's dmu holds? * EPERM when in a NGZ and traversing up and out. */ goto skip; } if (arg == ZFS_IOC_DATASET_LIST_NEXT && zc->zc_objset_stats.dds_type == DMU_OST_ZVOL && devzvol_snaps_allowed) sdev_iter_snapshots(dvp, zc->zc_name); skip: (void) strcpy(zc->zc_name, name); } kmem_free(zc, sizeof (zfs_cmd_t)); } void sdev_iter_snapshots(struct vnode *dvp, char *name) { sdev_iter_datasets(dvp, ZFS_IOC_SNAPSHOT_LIST_NEXT, name); } /*ARGSUSED4*/ static int devzvol_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp, caller_context_t *ct_unused, int flags_unused) { struct sdev_node *sdvp = VTOSDEV(dvp); char *ptr; sdcmn_err13(("zv readdir of '%s' %s'", sdvp->sdev_path, sdvp->sdev_name)); if (strcmp(sdvp->sdev_path, ZVOL_DIR) == 0) { struct vnode *vp; rw_exit(&sdvp->sdev_contents); (void) devname_lookup_func(sdvp, "dsk", &vp, cred, devzvol_create_dir, SDEV_VATTR); VN_RELE(vp); (void) devname_lookup_func(sdvp, "rdsk", &vp, cred, devzvol_create_dir, SDEV_VATTR); VN_RELE(vp); rw_enter(&sdvp->sdev_contents, RW_READER); return (devname_readdir_func(dvp, uiop, cred, eofp, 0)); } if (uiop->uio_offset == 0) devzvol_prunedir(sdvp); ptr = sdvp->sdev_path + strlen(ZVOL_DIR); if ((strcmp(ptr, "/dsk") == 0) || (strcmp(ptr, "/rdsk") == 0)) { rw_exit(&sdvp->sdev_contents); devzvol_create_pool_dirs(dvp); rw_enter(&sdvp->sdev_contents, RW_READER); return (devname_readdir_func(dvp, uiop, cred, eofp, 0)); } ptr = strchr(ptr + 1, '/'); if (ptr == NULL) return (ENOENT); ptr++; rw_exit(&sdvp->sdev_contents); sdev_iter_datasets(dvp, ZFS_IOC_DATASET_LIST_NEXT, ptr); rw_enter(&sdvp->sdev_contents, RW_READER); return (devname_readdir_func(dvp, uiop, cred, eofp, 0)); } const fs_operation_def_t devzvol_vnodeops_tbl[] = { VOPNAME_READDIR, { .vop_readdir = devzvol_readdir }, VOPNAME_LOOKUP, { .vop_lookup = devzvol_lookup }, VOPNAME_CREATE, { .vop_create = devzvol_create }, VOPNAME_RENAME, { .error = fs_nosys }, VOPNAME_MKDIR, { .error = fs_nosys }, VOPNAME_RMDIR, { .error = fs_nosys }, VOPNAME_REMOVE, { .error = fs_nosys }, VOPNAME_SYMLINK, { .error = fs_nosys }, NULL, NULL };