1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 2298d1cbfeSGeorge Wilson * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23*97e81309SPrakash Surya * Copyright (c) 2011, 2015 by Delphix. All rights reserved. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #include <sys/zfs_context.h> 27fa9e4066Sahrens #include <sys/spa.h> 2831d7e8faSGeorge Wilson #include <sys/spa_impl.h> 29fa9e4066Sahrens #include <sys/vdev_file.h> 30fa9e4066Sahrens #include <sys/vdev_impl.h> 31fa9e4066Sahrens #include <sys/zio.h> 32fa9e4066Sahrens #include <sys/fs/zfs.h> 3351ece835Seschrock #include <sys/fm/fs/zfs.h> 34fa9e4066Sahrens 35fa9e4066Sahrens /* 36fa9e4066Sahrens * Virtual device vector for files. 37fa9e4066Sahrens */ 38fa9e4066Sahrens 39dcba9f3fSGeorge Wilson static void 40dcba9f3fSGeorge Wilson vdev_file_hold(vdev_t *vd) 41dcba9f3fSGeorge Wilson { 42dcba9f3fSGeorge Wilson ASSERT(vd->vdev_path != NULL); 43dcba9f3fSGeorge Wilson } 44dcba9f3fSGeorge Wilson 45dcba9f3fSGeorge Wilson static void 46dcba9f3fSGeorge Wilson vdev_file_rele(vdev_t *vd) 47dcba9f3fSGeorge Wilson { 48dcba9f3fSGeorge Wilson ASSERT(vd->vdev_path != NULL); 49dcba9f3fSGeorge Wilson } 50dcba9f3fSGeorge Wilson 51fa9e4066Sahrens static int 524263d13fSGeorge Wilson vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 534263d13fSGeorge Wilson uint64_t *ashift) 54fa9e4066Sahrens { 55fa9e4066Sahrens vdev_file_t *vf; 56fa9e4066Sahrens vnode_t *vp; 57e14bb325SJeff Bonwick vattr_t vattr; 58fa9e4066Sahrens int error; 59fa9e4066Sahrens 60fa9e4066Sahrens /* 61fa9e4066Sahrens * We must have a pathname, and it must be absolute. 62fa9e4066Sahrens */ 63fa9e4066Sahrens if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 64fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 65be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 66fa9e4066Sahrens } 67fa9e4066Sahrens 68095bcd66SGeorge Wilson /* 69095bcd66SGeorge Wilson * Reopen the device if it's not currently open. Otherwise, 70095bcd66SGeorge Wilson * just update the physical size of the device. 71095bcd66SGeorge Wilson */ 72095bcd66SGeorge Wilson if (vd->vdev_tsd != NULL) { 73095bcd66SGeorge Wilson ASSERT(vd->vdev_reopening); 74095bcd66SGeorge Wilson vf = vd->vdev_tsd; 75095bcd66SGeorge Wilson goto skip_open; 76095bcd66SGeorge Wilson } 77095bcd66SGeorge Wilson 78fa9e4066Sahrens vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP); 79fa9e4066Sahrens 80fa9e4066Sahrens /* 81fa9e4066Sahrens * We always open the files from the root of the global zone, even if 82fa9e4066Sahrens * we're in a local zone. If the user has gotten to this point, the 83fa9e4066Sahrens * administrator has already decided that the pool should be available 84fa9e4066Sahrens * to local zone users, so the underlying devices should be as well. 85fa9e4066Sahrens */ 86fa9e4066Sahrens ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/'); 870a4e9518Sgw25295 error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, 888ad4d6ddSJeff Bonwick spa_mode(vd->vdev_spa) | FOFFMAX, 0, &vp, 0, 0, rootdir, -1); 89fa9e4066Sahrens 90fa9e4066Sahrens if (error) { 91fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 92fa9e4066Sahrens return (error); 93fa9e4066Sahrens } 94fa9e4066Sahrens 95fa9e4066Sahrens vf->vf_vnode = vp; 96fa9e4066Sahrens 97fa9e4066Sahrens #ifdef _KERNEL 98fa9e4066Sahrens /* 99fa9e4066Sahrens * Make sure it's a regular file. 100fa9e4066Sahrens */ 101fa9e4066Sahrens if (vp->v_type != VREG) { 102fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 103be6fd75aSMatthew Ahrens return (SET_ERROR(ENODEV)); 104fa9e4066Sahrens } 105fa9e4066Sahrens #endif 106095bcd66SGeorge Wilson 107095bcd66SGeorge Wilson skip_open: 108fa9e4066Sahrens /* 109fa9e4066Sahrens * Determine the physical size of the file. 110fa9e4066Sahrens */ 111fa9e4066Sahrens vattr.va_mask = AT_SIZE; 112da6c28aaSamw error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL); 113fa9e4066Sahrens if (error) { 114fa9e4066Sahrens vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 115fa9e4066Sahrens return (error); 116fa9e4066Sahrens } 117fa9e4066Sahrens 1184263d13fSGeorge Wilson *max_psize = *psize = vattr.va_size; 119fa9e4066Sahrens *ashift = SPA_MINBLOCKSHIFT; 120fa9e4066Sahrens 121fa9e4066Sahrens return (0); 122fa9e4066Sahrens } 123fa9e4066Sahrens 124fa9e4066Sahrens static void 125fa9e4066Sahrens vdev_file_close(vdev_t *vd) 126fa9e4066Sahrens { 127fa9e4066Sahrens vdev_file_t *vf = vd->vdev_tsd; 128fa9e4066Sahrens 129095bcd66SGeorge Wilson if (vd->vdev_reopening || vf == NULL) 130fa9e4066Sahrens return; 131fa9e4066Sahrens 132fa9e4066Sahrens if (vf->vf_vnode != NULL) { 133da6c28aaSamw (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL); 1348ad4d6ddSJeff Bonwick (void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0, 1358ad4d6ddSJeff Bonwick kcred, NULL); 136fa9e4066Sahrens VN_RELE(vf->vf_vnode); 137fa9e4066Sahrens } 138fa9e4066Sahrens 13998d1cbfeSGeorge Wilson vd->vdev_delayed_close = B_FALSE; 140fa9e4066Sahrens kmem_free(vf, sizeof (vdev_file_t)); 141fa9e4066Sahrens vd->vdev_tsd = NULL; 142fa9e4066Sahrens } 143fa9e4066Sahrens 14431d7e8faSGeorge Wilson /* 14531d7e8faSGeorge Wilson * Implements the interrupt side for file vdev types. This routine will be 14631d7e8faSGeorge Wilson * called when the I/O completes allowing us to transfer the I/O to the 14731d7e8faSGeorge Wilson * interrupt taskqs. For consistency, the code structure mimics disk vdev 14831d7e8faSGeorge Wilson * types. 14931d7e8faSGeorge Wilson */ 15031d7e8faSGeorge Wilson static void 15131d7e8faSGeorge Wilson vdev_file_io_intr(buf_t *bp) 15231d7e8faSGeorge Wilson { 15331d7e8faSGeorge Wilson vdev_buf_t *vb = (vdev_buf_t *)bp; 15431d7e8faSGeorge Wilson zio_t *zio = vb->vb_io; 15531d7e8faSGeorge Wilson 15631d7e8faSGeorge Wilson zio->io_error = (geterror(bp) != 0 ? EIO : 0); 15731d7e8faSGeorge Wilson if (zio->io_error == 0 && bp->b_resid != 0) 158be6fd75aSMatthew Ahrens zio->io_error = SET_ERROR(ENOSPC); 15931d7e8faSGeorge Wilson 16031d7e8faSGeorge Wilson kmem_free(vb, sizeof (vdev_buf_t)); 161*97e81309SPrakash Surya zio_delay_interrupt(zio); 16231d7e8faSGeorge Wilson } 16331d7e8faSGeorge Wilson 16431d7e8faSGeorge Wilson static void 16531d7e8faSGeorge Wilson vdev_file_io_strategy(void *arg) 16631d7e8faSGeorge Wilson { 16731d7e8faSGeorge Wilson buf_t *bp = arg; 16831d7e8faSGeorge Wilson vnode_t *vp = bp->b_private; 16931d7e8faSGeorge Wilson ssize_t resid; 17031d7e8faSGeorge Wilson int error; 17131d7e8faSGeorge Wilson 17231d7e8faSGeorge Wilson error = vn_rdwr((bp->b_flags & B_READ) ? UIO_READ : UIO_WRITE, 17331d7e8faSGeorge Wilson vp, bp->b_un.b_addr, bp->b_bcount, ldbtob(bp->b_lblkno), 17431d7e8faSGeorge Wilson UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 17531d7e8faSGeorge Wilson 17631d7e8faSGeorge Wilson if (error == 0) { 17731d7e8faSGeorge Wilson bp->b_resid = resid; 17831d7e8faSGeorge Wilson biodone(bp); 17931d7e8faSGeorge Wilson } else { 18031d7e8faSGeorge Wilson bioerror(bp, error); 18131d7e8faSGeorge Wilson biodone(bp); 18231d7e8faSGeorge Wilson } 18331d7e8faSGeorge Wilson } 18431d7e8faSGeorge Wilson 185738f37bcSGeorge Wilson static void 186fa9e4066Sahrens vdev_file_io_start(zio_t *zio) 187fa9e4066Sahrens { 188fa9e4066Sahrens vdev_t *vd = zio->io_vd; 189fa9e4066Sahrens vdev_file_t *vf = vd->vdev_tsd; 19031d7e8faSGeorge Wilson vdev_buf_t *vb; 19131d7e8faSGeorge Wilson buf_t *bp; 192fa9e4066Sahrens 193fa9e4066Sahrens if (zio->io_type == ZIO_TYPE_IOCTL) { 194fa9e4066Sahrens /* XXPOLICY */ 1950a4e9518Sgw25295 if (!vdev_readable(vd)) { 196be6fd75aSMatthew Ahrens zio->io_error = SET_ERROR(ENXIO); 197738f37bcSGeorge Wilson zio_interrupt(zio); 198738f37bcSGeorge Wilson return; 199fa9e4066Sahrens } 200fa9e4066Sahrens 201fa9e4066Sahrens switch (zio->io_cmd) { 202fa9e4066Sahrens case DKIOCFLUSHWRITECACHE: 203fa9e4066Sahrens zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, 204da6c28aaSamw kcred, NULL); 205fa9e4066Sahrens break; 206fa9e4066Sahrens default: 207be6fd75aSMatthew Ahrens zio->io_error = SET_ERROR(ENOTSUP); 208fa9e4066Sahrens } 209fa9e4066Sahrens 210738f37bcSGeorge Wilson zio_execute(zio); 211738f37bcSGeorge Wilson return; 212fa9e4066Sahrens } 213fa9e4066Sahrens 214f693d300SSteven Hartland ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); 215*97e81309SPrakash Surya zio->io_target_timestamp = zio_handle_io_delay(zio); 216f693d300SSteven Hartland 21731d7e8faSGeorge Wilson vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP); 218fa9e4066Sahrens 21931d7e8faSGeorge Wilson vb->vb_io = zio; 22031d7e8faSGeorge Wilson bp = &vb->vb_buf; 221fa9e4066Sahrens 22231d7e8faSGeorge Wilson bioinit(bp); 22331d7e8faSGeorge Wilson bp->b_flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); 22431d7e8faSGeorge Wilson bp->b_bcount = zio->io_size; 22531d7e8faSGeorge Wilson bp->b_un.b_addr = zio->io_data; 22631d7e8faSGeorge Wilson bp->b_lblkno = lbtodb(zio->io_offset); 22731d7e8faSGeorge Wilson bp->b_bufsize = zio->io_size; 22831d7e8faSGeorge Wilson bp->b_private = vf->vf_vnode; 22931d7e8faSGeorge Wilson bp->b_iodone = (int (*)())vdev_file_io_intr; 23031d7e8faSGeorge Wilson 2312c1e2b44SGeorge Wilson VERIFY3U(taskq_dispatch(system_taskq, vdev_file_io_strategy, bp, 2322c1e2b44SGeorge Wilson TQ_SLEEP), !=, 0); 233fa9e4066Sahrens } 234fa9e4066Sahrens 235e14bb325SJeff Bonwick /* ARGSUSED */ 236e14bb325SJeff Bonwick static void 237fa9e4066Sahrens vdev_file_io_done(zio_t *zio) 238fa9e4066Sahrens { 239fa9e4066Sahrens } 240fa9e4066Sahrens 241fa9e4066Sahrens vdev_ops_t vdev_file_ops = { 242fa9e4066Sahrens vdev_file_open, 243fa9e4066Sahrens vdev_file_close, 244fa9e4066Sahrens vdev_default_asize, 245fa9e4066Sahrens vdev_file_io_start, 246fa9e4066Sahrens vdev_file_io_done, 247fa9e4066Sahrens NULL, 248dcba9f3fSGeorge Wilson vdev_file_hold, 249dcba9f3fSGeorge Wilson vdev_file_rele, 250fa9e4066Sahrens VDEV_TYPE_FILE, /* name of this vdev type */ 251fa9e4066Sahrens B_TRUE /* leaf vdev */ 252fa9e4066Sahrens }; 253fa9e4066Sahrens 254fa9e4066Sahrens /* 255fa9e4066Sahrens * From userland we access disks just like files. 256fa9e4066Sahrens */ 257fa9e4066Sahrens #ifndef _KERNEL 258fa9e4066Sahrens 259fa9e4066Sahrens vdev_ops_t vdev_disk_ops = { 260fa9e4066Sahrens vdev_file_open, 261fa9e4066Sahrens vdev_file_close, 262fa9e4066Sahrens vdev_default_asize, 263fa9e4066Sahrens vdev_file_io_start, 264fa9e4066Sahrens vdev_file_io_done, 265fa9e4066Sahrens NULL, 266dcba9f3fSGeorge Wilson vdev_file_hold, 267dcba9f3fSGeorge Wilson vdev_file_rele, 268fa9e4066Sahrens VDEV_TYPE_DISK, /* name of this vdev type */ 269fa9e4066Sahrens B_TRUE /* leaf vdev */ 270fa9e4066Sahrens }; 271fa9e4066Sahrens 272fa9e4066Sahrens #endif 273