1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/zfs_context.h> 29 #include <sys/spa.h> 30 #include <sys/vdev_file.h> 31 #include <sys/vdev_impl.h> 32 #include <sys/zio.h> 33 #include <sys/fs/zfs.h> 34 35 /* 36 * Virtual device vector for files. 37 */ 38 39 static int 40 vdev_file_open_common(vdev_t *vd) 41 { 42 vdev_file_t *vf; 43 vnode_t *vp; 44 int error; 45 46 /* 47 * We must have a pathname, and it must be absolute. 48 */ 49 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 50 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 51 return (EINVAL); 52 } 53 54 vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP); 55 56 /* 57 * We always open the files from the root of the global zone, even if 58 * we're in a local zone. If the user has gotten to this point, the 59 * administrator has already decided that the pool should be available 60 * to local zone users, so the underlying devices should be as well. 61 */ 62 ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/'); 63 error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, 64 spa_mode | FOFFMAX, 0, &vp, 0, 0, rootdir, -1); 65 66 if (error) { 67 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 68 return (error); 69 } 70 71 vf->vf_vnode = vp; 72 73 #ifdef _KERNEL 74 /* 75 * Make sure it's a regular file. 76 */ 77 if (vp->v_type != VREG) { 78 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 79 return (ENODEV); 80 } 81 #endif 82 83 return (0); 84 } 85 86 static int 87 vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) 88 { 89 vdev_file_t *vf; 90 vattr_t vattr; 91 int error; 92 93 if ((error = vdev_file_open_common(vd)) != 0) 94 return (error); 95 96 vf = vd->vdev_tsd; 97 98 /* 99 * Determine the physical size of the file. 100 */ 101 vattr.va_mask = AT_SIZE; 102 error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL); 103 if (error) { 104 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 105 return (error); 106 } 107 108 *psize = vattr.va_size; 109 *ashift = SPA_MINBLOCKSHIFT; 110 111 return (0); 112 } 113 114 static void 115 vdev_file_close(vdev_t *vd) 116 { 117 vdev_file_t *vf = vd->vdev_tsd; 118 119 if (vf == NULL) 120 return; 121 122 if (vf->vf_vnode != NULL) { 123 (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL); 124 (void) VOP_CLOSE(vf->vf_vnode, spa_mode, 1, 0, kcred, NULL); 125 VN_RELE(vf->vf_vnode); 126 } 127 128 kmem_free(vf, sizeof (vdev_file_t)); 129 vd->vdev_tsd = NULL; 130 } 131 132 static int 133 vdev_file_probe_io(vdev_t *vd, caddr_t data, size_t size, uint64_t offset, 134 enum uio_rw rw) 135 { 136 vdev_file_t *vf = vd->vdev_tsd; 137 ssize_t resid; 138 int error = 0; 139 140 if (vd == NULL || vf == NULL || vf->vf_vnode == NULL) 141 return (EINVAL); 142 143 ASSERT(rw == UIO_READ || rw == UIO_WRITE); 144 145 error = vn_rdwr(rw, vf->vf_vnode, data, size, offset, UIO_SYSSPACE, 146 0, RLIM64_INFINITY, kcred, &resid); 147 if (error || resid != 0) 148 return (EIO); 149 return (0); 150 } 151 152 static int 153 vdev_file_probe(vdev_t *vd) 154 { 155 vdev_t *nvd; 156 char *vl_boot; 157 uint64_t offset; 158 int l, error = 0, retries = 0; 159 160 if (vd == NULL) 161 return (EINVAL); 162 163 /* Hijack the current vdev */ 164 nvd = vd; 165 166 /* 167 * Pick a random label to rewrite. 168 */ 169 l = spa_get_random(VDEV_LABELS); 170 ASSERT(l < VDEV_LABELS); 171 172 offset = vdev_label_offset(vd->vdev_psize, l, 173 offsetof(vdev_label_t, vl_boot_header)); 174 175 vl_boot = kmem_alloc(VDEV_BOOT_HEADER_SIZE, KM_SLEEP); 176 177 while ((error = vdev_file_probe_io(nvd, vl_boot, VDEV_BOOT_HEADER_SIZE, 178 offset, UIO_READ)) != 0 && retries == 0) { 179 180 /* 181 * If we failed with the vdev that was passed in then 182 * try allocating a new one and try again. 183 */ 184 nvd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); 185 if (vd->vdev_path) 186 nvd->vdev_path = spa_strdup(vd->vdev_path); 187 error = vdev_file_open_common(nvd); 188 if (error) { 189 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, 190 nvd->vdev_stat.vs_aux); 191 break; 192 } 193 retries++; 194 } 195 196 if ((spa_mode & FWRITE) && !error) { 197 error = vdev_file_probe_io(nvd, vl_boot, VDEV_BOOT_HEADER_SIZE, 198 offset, UIO_WRITE); 199 } 200 201 if (retries) { 202 vdev_file_close(nvd); 203 if (nvd->vdev_path) 204 spa_strfree(nvd->vdev_path); 205 kmem_free(nvd, sizeof (vdev_t)); 206 } 207 kmem_free(vl_boot, VDEV_BOOT_HEADER_SIZE); 208 209 if (!error) 210 vd->vdev_is_failing = B_FALSE; 211 212 return (error); 213 } 214 215 static void 216 vdev_file_io_start(zio_t *zio) 217 { 218 vdev_t *vd = zio->io_vd; 219 vdev_file_t *vf = vd->vdev_tsd; 220 ssize_t resid; 221 int error; 222 223 if (zio->io_type == ZIO_TYPE_IOCTL) { 224 zio_vdev_io_bypass(zio); 225 226 /* XXPOLICY */ 227 if (!vdev_readable(vd)) { 228 zio->io_error = ENXIO; 229 zio_next_stage_async(zio); 230 return; 231 } 232 233 switch (zio->io_cmd) { 234 case DKIOCFLUSHWRITECACHE: 235 zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, 236 kcred, NULL); 237 dprintf("fsync(%s) = %d\n", vdev_description(vd), 238 zio->io_error); 239 break; 240 default: 241 zio->io_error = ENOTSUP; 242 } 243 244 zio_next_stage_async(zio); 245 return; 246 } 247 248 /* 249 * In the kernel, don't bother double-caching, but in userland, 250 * we want to test the vdev_cache code. 251 */ 252 #ifndef _KERNEL 253 if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0) 254 return; 255 #endif 256 257 if ((zio = vdev_queue_io(zio)) == NULL) 258 return; 259 260 /* XXPOLICY */ 261 if (zio->io_type == ZIO_TYPE_WRITE) 262 error = vdev_writeable(vd) ? vdev_error_inject(vd, zio) : ENXIO; 263 else 264 error = vdev_readable(vd) ? vdev_error_inject(vd, zio) : ENXIO; 265 error = (vd->vdev_remove_wanted || vd->vdev_is_failing) ? ENXIO : error; 266 if (error) { 267 zio->io_error = error; 268 zio_next_stage_async(zio); 269 return; 270 } 271 272 zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? 273 UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data, 274 zio->io_size, zio->io_offset, UIO_SYSSPACE, 275 0, RLIM64_INFINITY, kcred, &resid); 276 277 if (resid != 0 && zio->io_error == 0) 278 zio->io_error = ENOSPC; 279 280 zio_next_stage_async(zio); 281 } 282 283 static void 284 vdev_file_io_done(zio_t *zio) 285 { 286 287 if (zio_injection_enabled && zio->io_error == 0) 288 zio->io_error = zio_handle_device_injection(zio->io_vd, EIO); 289 290 /* 291 * If this device is truely gone, then attempt to remove it 292 * from the configuration. 293 */ 294 if (zio->io_error == EIO) { 295 vdev_t *vd = zio->io_vd; 296 297 if (vdev_probe(vd) != 0) 298 vd->vdev_is_failing = B_TRUE; 299 } 300 301 vdev_queue_io_done(zio); 302 303 #ifndef _KERNEL 304 if (zio->io_type == ZIO_TYPE_WRITE) 305 vdev_cache_write(zio); 306 #endif 307 308 zio_next_stage(zio); 309 } 310 311 vdev_ops_t vdev_file_ops = { 312 vdev_file_open, 313 vdev_file_close, 314 vdev_file_probe, 315 vdev_default_asize, 316 vdev_file_io_start, 317 vdev_file_io_done, 318 NULL, 319 VDEV_TYPE_FILE, /* name of this vdev type */ 320 B_TRUE /* leaf vdev */ 321 }; 322 323 /* 324 * From userland we access disks just like files. 325 */ 326 #ifndef _KERNEL 327 328 vdev_ops_t vdev_disk_ops = { 329 vdev_file_open, 330 vdev_file_close, 331 vdev_file_probe, 332 vdev_default_asize, 333 vdev_file_io_start, 334 vdev_file_io_done, 335 NULL, 336 VDEV_TYPE_DISK, /* name of this vdev type */ 337 B_TRUE /* leaf vdev */ 338 }; 339 340 #endif 341