1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/linker_set.h> 34 #include <sys/stat.h> 35 #include <sys/uio.h> 36 #include <sys/ioctl.h> 37 #include <sys/disk.h> 38 39 #include <errno.h> 40 #include <fcntl.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <stdint.h> 44 #include <string.h> 45 #include <strings.h> 46 #include <unistd.h> 47 #include <assert.h> 48 #include <pthread.h> 49 #include <md5.h> 50 51 #include "bhyverun.h" 52 #include "pci_emul.h" 53 #include "virtio.h" 54 55 #ifndef min 56 #define min(a, b) ((a) < (b) ? (a) : (b)) 57 #endif 58 59 #define VTBLK_RINGSZ 64 60 61 #define VTBLK_MAXSEGS 32 62 63 #define VTBLK_S_OK 0 64 #define VTBLK_S_IOERR 1 65 #define VTBLK_S_UNSUPP 2 66 67 #define VTBLK_BLK_ID_BYTES 20 68 69 /* Capability bits */ 70 #define VTBLK_F_SEG_MAX (1 << 2) /* Maximum request segments */ 71 #define VTBLK_F_BLK_SIZE (1 << 6) /* cfg block size valid */ 72 73 /* 74 * Host capabilities 75 */ 76 #define VTBLK_S_HOSTCAPS \ 77 ( VTBLK_F_SEG_MAX | \ 78 VTBLK_F_BLK_SIZE | \ 79 VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */ 80 81 /* 82 * Config space "registers" 83 */ 84 struct vtblk_config { 85 uint64_t vbc_capacity; 86 uint32_t vbc_size_max; 87 uint32_t vbc_seg_max; 88 uint16_t vbc_geom_c; 89 uint8_t vbc_geom_h; 90 uint8_t vbc_geom_s; 91 uint32_t vbc_blk_size; 92 uint32_t vbc_sectors_max; 93 } __packed; 94 95 /* 96 * Fixed-size block header 97 */ 98 struct virtio_blk_hdr { 99 #define VBH_OP_READ 0 100 #define VBH_OP_WRITE 1 101 #define VBH_OP_IDENT 8 102 #define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */ 103 uint32_t vbh_type; 104 uint32_t vbh_ioprio; 105 uint64_t vbh_sector; 106 } __packed; 107 108 /* 109 * Debug printf 110 */ 111 static int pci_vtblk_debug; 112 #define DPRINTF(params) if (pci_vtblk_debug) printf params 113 #define WPRINTF(params) printf params 114 115 /* 116 * Per-device softc 117 */ 118 struct pci_vtblk_softc { 119 struct virtio_softc vbsc_vs; 120 pthread_mutex_t vsc_mtx; 121 struct vqueue_info vbsc_vq; 122 int vbsc_fd; 123 struct vtblk_config vbsc_cfg; 124 char vbsc_ident[VTBLK_BLK_ID_BYTES]; 125 }; 126 127 static void pci_vtblk_reset(void *); 128 static void pci_vtblk_notify(void *, struct vqueue_info *); 129 static int pci_vtblk_cfgread(void *, int, int, uint32_t *); 130 static int pci_vtblk_cfgwrite(void *, int, int, uint32_t); 131 132 static struct virtio_consts vtblk_vi_consts = { 133 "vtblk", /* our name */ 134 1, /* we support 1 virtqueue */ 135 sizeof(struct vtblk_config), /* config reg size */ 136 pci_vtblk_reset, /* reset */ 137 pci_vtblk_notify, /* device-wide qnotify */ 138 pci_vtblk_cfgread, /* read PCI config */ 139 pci_vtblk_cfgwrite, /* write PCI config */ 140 VTBLK_S_HOSTCAPS, /* our capabilities */ 141 }; 142 143 static void 144 pci_vtblk_reset(void *vsc) 145 { 146 struct pci_vtblk_softc *sc = vsc; 147 148 DPRINTF(("vtblk: device reset requested !\n")); 149 vi_reset_dev(&sc->vbsc_vs); 150 } 151 152 static void 153 pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) 154 { 155 struct virtio_blk_hdr *vbh; 156 uint8_t *status; 157 int i, n; 158 int err; 159 int iolen; 160 int writeop, type; 161 off_t offset; 162 struct iovec iov[VTBLK_MAXSEGS + 2]; 163 uint16_t flags[VTBLK_MAXSEGS + 2]; 164 165 n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags); 166 167 /* 168 * The first descriptor will be the read-only fixed header, 169 * and the last is for status (hence +2 above and below). 170 * The remaining iov's are the actual data I/O vectors. 171 * 172 * XXX - note - this fails on crash dump, which does a 173 * VIRTIO_BLK_T_FLUSH with a zero transfer length 174 */ 175 assert(n >= 2 && n <= VTBLK_MAXSEGS + 2); 176 177 assert((flags[0] & VRING_DESC_F_WRITE) == 0); 178 assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr)); 179 vbh = iov[0].iov_base; 180 181 status = iov[--n].iov_base; 182 assert(iov[n].iov_len == 1); 183 assert(flags[n] & VRING_DESC_F_WRITE); 184 185 /* 186 * XXX 187 * The guest should not be setting the BARRIER flag because 188 * we don't advertise the capability. 189 */ 190 type = vbh->vbh_type & ~VBH_FLAG_BARRIER; 191 writeop = (type == VBH_OP_WRITE); 192 193 offset = vbh->vbh_sector * DEV_BSIZE; 194 195 iolen = 0; 196 for (i = 1; i < n; i++) { 197 /* 198 * - write op implies read-only descriptor, 199 * - read/ident op implies write-only descriptor, 200 * therefore test the inverse of the descriptor bit 201 * to the op. 202 */ 203 assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop); 204 iolen += iov[i].iov_len; 205 } 206 207 DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", 208 writeop ? "write" : "read/ident", iolen, i - 1, offset)); 209 210 switch (type) { 211 case VBH_OP_WRITE: 212 err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset); 213 break; 214 case VBH_OP_READ: 215 err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset); 216 break; 217 case VBH_OP_IDENT: 218 /* Assume a single buffer */ 219 strlcpy(iov[1].iov_base, sc->vbsc_ident, 220 min(iov[1].iov_len, sizeof(sc->vbsc_ident))); 221 err = 0; 222 break; 223 default: 224 err = -ENOSYS; 225 break; 226 } 227 228 /* convert errno into a virtio block error return */ 229 if (err < 0) { 230 if (err == -ENOSYS) 231 *status = VTBLK_S_UNSUPP; 232 else 233 *status = VTBLK_S_IOERR; 234 } else 235 *status = VTBLK_S_OK; 236 237 /* 238 * Return the descriptor back to the host. 239 * We wrote 1 byte (our status) to host. 240 */ 241 vq_relchain(vq, 1); 242 } 243 244 static void 245 pci_vtblk_notify(void *vsc, struct vqueue_info *vq) 246 { 247 struct pci_vtblk_softc *sc = vsc; 248 249 vq_startchains(vq); 250 while (vq_has_descs(vq)) 251 pci_vtblk_proc(sc, vq); 252 vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ 253 } 254 255 static int 256 pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 257 { 258 struct stat sbuf; 259 MD5_CTX mdctx; 260 u_char digest[16]; 261 struct pci_vtblk_softc *sc; 262 off_t size; 263 int fd; 264 int sectsz; 265 266 if (opts == NULL) { 267 printf("virtio-block: backing device required\n"); 268 return (1); 269 } 270 271 /* 272 * The supplied backing file has to exist 273 */ 274 fd = open(opts, O_RDWR); 275 if (fd < 0) { 276 perror("Could not open backing file"); 277 return (1); 278 } 279 280 if (fstat(fd, &sbuf) < 0) { 281 perror("Could not stat backing file"); 282 close(fd); 283 return (1); 284 } 285 286 /* 287 * Deal with raw devices 288 */ 289 size = sbuf.st_size; 290 sectsz = DEV_BSIZE; 291 if (S_ISCHR(sbuf.st_mode)) { 292 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 293 ioctl(fd, DIOCGSECTORSIZE, §sz)) { 294 perror("Could not fetch dev blk/sector size"); 295 close(fd); 296 return (1); 297 } 298 assert(size != 0); 299 assert(sectsz != 0); 300 } 301 302 sc = calloc(1, sizeof(struct pci_vtblk_softc)); 303 304 /* record fd of storage device/file */ 305 sc->vbsc_fd = fd; 306 307 pthread_mutex_init(&sc->vsc_mtx, NULL); 308 309 /* init virtio softc and virtqueues */ 310 vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq); 311 sc->vbsc_vs.vs_mtx = &sc->vsc_mtx; 312 313 sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ; 314 /* sc->vbsc_vq.vq_notify = we have no per-queue notify */ 315 316 /* 317 * Create an identifier for the backing file. Use parts of the 318 * md5 sum of the filename 319 */ 320 MD5Init(&mdctx); 321 MD5Update(&mdctx, opts, strlen(opts)); 322 MD5Final(digest, &mdctx); 323 sprintf(sc->vbsc_ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X", 324 digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); 325 326 /* setup virtio block config space */ 327 sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */ 328 sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS; 329 sc->vbsc_cfg.vbc_blk_size = sectsz; 330 sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ 331 sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */ 332 sc->vbsc_cfg.vbc_geom_h = 0; 333 sc->vbsc_cfg.vbc_geom_s = 0; 334 sc->vbsc_cfg.vbc_sectors_max = 0; 335 336 /* 337 * Should we move some of this into virtio.c? Could 338 * have the device, class, and subdev_0 as fields in 339 * the virtio constants structure. 340 */ 341 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); 342 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 343 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); 344 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); 345 346 pci_lintr_request(pi); 347 348 if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) 349 return (1); 350 vi_set_io_bar(&sc->vbsc_vs, 0); 351 return (0); 352 } 353 354 static int 355 pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value) 356 { 357 358 DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); 359 return (1); 360 } 361 362 static int 363 pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval) 364 { 365 struct pci_vtblk_softc *sc = vsc; 366 void *ptr; 367 368 /* our caller has already verified offset and size */ 369 ptr = (uint8_t *)&sc->vbsc_cfg + offset; 370 memcpy(retval, ptr, size); 371 return (0); 372 } 373 374 struct pci_devemu pci_de_vblk = { 375 .pe_emu = "virtio-blk", 376 .pe_init = pci_vtblk_init, 377 .pe_barwrite = vi_pci_write, 378 .pe_barread = vi_pci_read 379 }; 380 PCI_EMUL_SET(pci_de_vblk); 381