1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/zfs_context.h> 30 #include <sys/spa.h> 31 #include <sys/vdev_disk.h> 32 #include <sys/vdev_impl.h> 33 #include <sys/fs/zfs.h> 34 #include <sys/zio.h> 35 #include <sys/sunddi.h> 36 37 /* 38 * Virtual device vector for disks. 39 */ 40 41 extern ldi_ident_t zfs_li; 42 43 typedef struct vdev_disk_buf { 44 buf_t vdb_buf; 45 zio_t *vdb_io; 46 } vdev_disk_buf_t; 47 48 static int 49 vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) 50 { 51 vdev_disk_t *dvd; 52 int error; 53 54 /* 55 * We must have a pathname, and it must be absolute. 56 */ 57 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 58 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 59 return (EINVAL); 60 } 61 62 dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); 63 64 /* 65 * When opening a disk device, we want to preserve the user's original 66 * intent. We always want to open the device by the path the user gave 67 * us, even if it is one of multiple paths to the save device. But we 68 * also want to be able to survive disks being removed/recabled. 69 * Therefore the sequence of opening devices is: 70 * 71 * 1. Try opening the device by path. 72 * 73 * a. First append "s0" to see if this is a whole disk 74 * b. Fall back to path otherwise 75 * 76 * 2. If the devid of the device matches the stored value, return 77 * success. 78 * 79 * 3. Otherwise, the device may have moved. Try opening the device 80 * by the devid instead. 81 * 82 */ 83 if (vd->vdev_devid != NULL) { 84 if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, 85 &dvd->vd_minor) != 0) { 86 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 87 return (EINVAL); 88 } 89 } 90 91 error = EINVAL; /* presume failure */ 92 93 if (vd->vdev_path != NULL) { 94 size_t len = strlen(vd->vdev_path) + 3; 95 char *buf = kmem_alloc(len, KM_SLEEP); 96 ddi_devid_t devid; 97 98 (void) snprintf(buf, len, "%ss0", vd->vdev_path); 99 100 /* 101 * Try whole disk first, then slice name. 102 */ 103 if ((error = ldi_open_by_name(buf, spa_mode, kcred, 104 &dvd->vd_lh, zfs_li)) != 0) 105 error = ldi_open_by_name(vd->vdev_path, 106 spa_mode, kcred, &dvd->vd_lh, zfs_li); 107 108 kmem_free(buf, len); 109 110 /* 111 * Compare the devid to the stored value. 112 */ 113 if (error == 0 && vd->vdev_devid != NULL && 114 ldi_get_devid(dvd->vd_lh, &devid) == 0) { 115 if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { 116 error = EINVAL; 117 (void) ldi_close(dvd->vd_lh, spa_mode, kcred); 118 dvd->vd_lh = NULL; 119 } 120 ddi_devid_free(devid); 121 } 122 } 123 124 /* 125 * If we were unable to open by path, or the devid check fails, open by 126 * devid instead. 127 */ 128 if (error != 0 && vd->vdev_devid != NULL) 129 error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, 130 spa_mode, kcred, &dvd->vd_lh, zfs_li); 131 132 if (error) { 133 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 134 return (error); 135 } 136 137 /* 138 * Determine the actual size of the device. 139 */ 140 if (ldi_get_size(dvd->vd_lh, psize) != 0) { 141 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 142 return (EINVAL); 143 } 144 145 *ashift = SPA_MINBLOCKSHIFT; 146 147 return (0); 148 } 149 150 static void 151 vdev_disk_close(vdev_t *vd) 152 { 153 vdev_disk_t *dvd = vd->vdev_tsd; 154 155 if (dvd == NULL) 156 return; 157 158 dprintf("removing disk %s, devid %s\n", 159 vd->vdev_path ? vd->vdev_path : "<none>", 160 vd->vdev_devid ? vd->vdev_devid : "<none>"); 161 162 if (dvd->vd_minor != NULL) 163 ddi_devid_str_free(dvd->vd_minor); 164 165 if (dvd->vd_devid != NULL) 166 ddi_devid_free(dvd->vd_devid); 167 168 if (dvd->vd_lh != NULL) 169 (void) ldi_close(dvd->vd_lh, spa_mode, kcred); 170 171 kmem_free(dvd, sizeof (vdev_disk_t)); 172 vd->vdev_tsd = NULL; 173 } 174 175 static void 176 vdev_disk_io_intr(buf_t *bp) 177 { 178 vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp; 179 zio_t *zio = vdb->vdb_io; 180 181 if ((zio->io_error = geterror(bp)) == 0 && bp->b_resid != 0) 182 zio->io_error = EIO; 183 184 kmem_free(vdb, sizeof (vdev_disk_buf_t)); 185 186 zio_next_stage_async(zio); 187 } 188 189 static void 190 vdev_disk_ioctl_done(void *zio_arg, int error) 191 { 192 zio_t *zio = zio_arg; 193 194 zio->io_error = error; 195 196 zio_next_stage_async(zio); 197 } 198 199 static void 200 vdev_disk_io_start(zio_t *zio) 201 { 202 vdev_t *vd = zio->io_vd; 203 vdev_disk_t *dvd = vd->vdev_tsd; 204 vdev_disk_buf_t *vdb; 205 buf_t *bp; 206 int flags, error; 207 208 if (zio->io_type == ZIO_TYPE_IOCTL) { 209 zio_vdev_io_bypass(zio); 210 211 /* XXPOLICY */ 212 if (vdev_is_dead(vd)) { 213 zio->io_error = ENXIO; 214 zio_next_stage_async(zio); 215 return; 216 } 217 218 switch (zio->io_cmd) { 219 220 case DKIOCFLUSHWRITECACHE: 221 222 zio->io_dk_callback.dkc_callback = vdev_disk_ioctl_done; 223 zio->io_dk_callback.dkc_cookie = zio; 224 225 error = ldi_ioctl(dvd->vd_lh, zio->io_cmd, 226 (uintptr_t)&zio->io_dk_callback, 227 FKIOCTL, kcred, NULL); 228 229 if (error == 0) { 230 /* 231 * The ioctl will be done asychronously, 232 * and will call vdev_disk_ioctl_done() 233 * upon completion. 234 */ 235 return; 236 } 237 zio->io_error = error; 238 break; 239 240 default: 241 zio->io_error = ENOTSUP; 242 } 243 244 zio_next_stage_async(zio); 245 return; 246 } 247 248 if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0) 249 return; 250 251 if ((zio = vdev_queue_io(zio)) == NULL) 252 return; 253 254 flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); 255 flags |= B_BUSY | B_NOCACHE; 256 if (zio->io_flags & ZIO_FLAG_FAILFAST) 257 flags |= B_FAILFAST; 258 259 vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP); 260 261 vdb->vdb_io = zio; 262 bp = &vdb->vdb_buf; 263 264 bioinit(bp); 265 bp->b_flags = flags; 266 bp->b_bcount = zio->io_size; 267 bp->b_un.b_addr = zio->io_data; 268 bp->b_lblkno = lbtodb(zio->io_offset); 269 bp->b_bufsize = zio->io_size; 270 bp->b_iodone = (int (*)())vdev_disk_io_intr; 271 272 /* XXPOLICY */ 273 error = vdev_is_dead(vd) ? ENXIO : vdev_error_inject(vd, zio); 274 if (error) { 275 zio->io_error = error; 276 bioerror(bp, error); 277 bp->b_resid = bp->b_bcount; 278 bp->b_iodone(bp); 279 return; 280 } 281 282 error = ldi_strategy(dvd->vd_lh, bp); 283 /* ldi_strategy() will return non-zero only on programming errors */ 284 ASSERT(error == 0); 285 } 286 287 static void 288 vdev_disk_io_done(zio_t *zio) 289 { 290 vdev_queue_io_done(zio); 291 292 if (zio->io_type == ZIO_TYPE_WRITE) 293 vdev_cache_write(zio); 294 295 zio_next_stage(zio); 296 } 297 298 vdev_ops_t vdev_disk_ops = { 299 vdev_disk_open, 300 vdev_disk_close, 301 vdev_default_asize, 302 vdev_disk_io_start, 303 vdev_disk_io_done, 304 NULL, 305 VDEV_TYPE_DISK, /* name of this vdev type */ 306 B_TRUE /* leaf vdev */ 307 }; 308