1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/linker_set.h> 34 #include <sys/stat.h> 35 #include <sys/uio.h> 36 #include <sys/ioctl.h> 37 #include <sys/disk.h> 38 39 #include <errno.h> 40 #include <fcntl.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <stdint.h> 44 #include <string.h> 45 #include <strings.h> 46 #include <unistd.h> 47 #include <assert.h> 48 #include <pthread.h> 49 50 #include "bhyverun.h" 51 #include "pci_emul.h" 52 #include "virtio.h" 53 54 #define VTBLK_RINGSZ 64 55 56 #define VTBLK_MAXSEGS 32 57 58 #define VTBLK_S_OK 0 59 #define VTBLK_S_IOERR 1 60 61 /* 62 * Host capabilities 63 */ 64 #define VTBLK_S_HOSTCAPS \ 65 ( 0x00000004 | /* host maximum request segments */ \ 66 VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */ 67 68 /* 69 * Config space "registers" 70 */ 71 struct vtblk_config { 72 uint64_t vbc_capacity; 73 uint32_t vbc_size_max; 74 uint32_t vbc_seg_max; 75 uint16_t vbc_geom_c; 76 uint8_t vbc_geom_h; 77 uint8_t vbc_geom_s; 78 uint32_t vbc_blk_size; 79 uint32_t vbc_sectors_max; 80 } __packed; 81 82 /* 83 * Fixed-size block header 84 */ 85 struct virtio_blk_hdr { 86 #define VBH_OP_READ 0 87 #define VBH_OP_WRITE 1 88 #define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */ 89 uint32_t vbh_type; 90 uint32_t vbh_ioprio; 91 uint64_t vbh_sector; 92 } __packed; 93 94 /* 95 * Debug printf 96 */ 97 static int pci_vtblk_debug; 98 #define DPRINTF(params) if (pci_vtblk_debug) printf params 99 #define WPRINTF(params) printf params 100 101 /* 102 * Per-device softc 103 */ 104 struct pci_vtblk_softc { 105 struct virtio_softc vbsc_vs; 106 struct vqueue_info vbsc_vq; 107 int vbsc_fd; 108 struct vtblk_config vbsc_cfg; 109 }; 110 111 static void pci_vtblk_reset(void *); 112 static void pci_vtblk_notify(void *, struct vqueue_info *); 113 static int pci_vtblk_cfgread(void *, int, int, uint32_t *); 114 static int pci_vtblk_cfgwrite(void *, int, int, uint32_t); 115 116 static struct virtio_consts vtblk_vi_consts = { 117 "vtblk", /* our name */ 118 1, /* we support 1 virtqueue */ 119 sizeof(struct vtblk_config), /* config reg size */ 120 pci_vtblk_reset, /* reset */ 121 pci_vtblk_notify, /* device-wide qnotify */ 122 pci_vtblk_cfgread, /* read PCI config */ 123 pci_vtblk_cfgwrite, /* write PCI config */ 124 VTBLK_S_HOSTCAPS, /* our capabilities */ 125 }; 126 127 static void 128 pci_vtblk_reset(void *vsc) 129 { 130 struct pci_vtblk_softc *sc = vsc; 131 132 DPRINTF(("vtblk: device reset requested !\n")); 133 vi_reset_dev(&sc->vbsc_vs); 134 } 135 136 static void 137 pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) 138 { 139 struct virtio_blk_hdr *vbh; 140 uint8_t *status; 141 int i, n; 142 int err; 143 int iolen; 144 int writeop, type; 145 off_t offset; 146 struct iovec iov[VTBLK_MAXSEGS + 2]; 147 uint16_t flags[VTBLK_MAXSEGS + 2]; 148 149 n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags); 150 151 /* 152 * The first descriptor will be the read-only fixed header, 153 * and the last is for status (hence +2 above and below). 154 * The remaining iov's are the actual data I/O vectors. 155 * 156 * XXX - note - this fails on crash dump, which does a 157 * VIRTIO_BLK_T_FLUSH with a zero transfer length 158 */ 159 assert (n >= 3 && n < VTBLK_MAXSEGS + 2); 160 161 assert((flags[0] & VRING_DESC_F_WRITE) == 0); 162 assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr)); 163 vbh = iov[0].iov_base; 164 165 status = iov[--n].iov_base; 166 assert(iov[n].iov_len == 1); 167 assert(flags[n] & VRING_DESC_F_WRITE); 168 169 /* 170 * XXX 171 * The guest should not be setting the BARRIER flag because 172 * we don't advertise the capability. 173 */ 174 type = vbh->vbh_type & ~VBH_FLAG_BARRIER; 175 writeop = (type == VBH_OP_WRITE); 176 177 offset = vbh->vbh_sector * DEV_BSIZE; 178 179 iolen = 0; 180 for (i = 1; i < n; i++) { 181 /* 182 * - write op implies read-only descriptor, 183 * - read op implies write-only descriptor, 184 * therefore test the inverse of the descriptor bit 185 * to the op. 186 */ 187 assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop); 188 iolen += iov[i].iov_len; 189 } 190 191 DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", 192 writeop ? "write" : "read", iolen, i - 1, offset)); 193 194 if (writeop) 195 err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset); 196 else 197 err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset); 198 199 *status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK; 200 201 /* 202 * Return the descriptor back to the host. 203 * We wrote 1 byte (our status) to host. 204 */ 205 vq_relchain(vq, 1); 206 } 207 208 static void 209 pci_vtblk_notify(void *vsc, struct vqueue_info *vq) 210 { 211 struct pci_vtblk_softc *sc = vsc; 212 213 vq_startchains(vq); 214 while (vq_has_descs(vq)) 215 pci_vtblk_proc(sc, vq); 216 vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ 217 } 218 219 static int 220 pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 221 { 222 struct stat sbuf; 223 struct pci_vtblk_softc *sc; 224 off_t size; 225 int fd; 226 int sectsz; 227 int use_msix; 228 const char *env_msi; 229 230 if (opts == NULL) { 231 printf("virtio-block: backing device required\n"); 232 return (1); 233 } 234 235 /* 236 * The supplied backing file has to exist 237 */ 238 fd = open(opts, O_RDWR); 239 if (fd < 0) { 240 perror("Could not open backing file"); 241 return (1); 242 } 243 244 if (fstat(fd, &sbuf) < 0) { 245 perror("Could not stat backing file"); 246 close(fd); 247 return (1); 248 } 249 250 /* 251 * Deal with raw devices 252 */ 253 size = sbuf.st_size; 254 sectsz = DEV_BSIZE; 255 if (S_ISCHR(sbuf.st_mode)) { 256 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 257 ioctl(fd, DIOCGSECTORSIZE, §sz)) { 258 perror("Could not fetch dev blk/sector size"); 259 close(fd); 260 return (1); 261 } 262 assert(size != 0); 263 assert(sectsz != 0); 264 } 265 266 sc = malloc(sizeof(struct pci_vtblk_softc)); 267 memset(sc, 0, sizeof(struct pci_vtblk_softc)); 268 269 /* record fd of storage device/file */ 270 sc->vbsc_fd = fd; 271 272 /* init virtio softc and virtqueues */ 273 vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq); 274 sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ; 275 /* sc->vbsc_vq.vq_notify = we have no per-queue notify */ 276 277 /* setup virtio block config space */ 278 sc->vbsc_cfg.vbc_capacity = size / sectsz; 279 sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS; 280 sc->vbsc_cfg.vbc_blk_size = sectsz; 281 sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ 282 sc->vbsc_cfg.vbc_geom_c = 0; /* no geometry */ 283 sc->vbsc_cfg.vbc_geom_h = 0; 284 sc->vbsc_cfg.vbc_geom_s = 0; 285 sc->vbsc_cfg.vbc_sectors_max = 0; 286 287 /* 288 * Should we move some of this into virtio.c? Could 289 * have the device, class, and subdev_0 as fields in 290 * the virtio constants structure. 291 */ 292 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); 293 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 294 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); 295 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); 296 297 use_msix = 1; 298 if ((env_msi = getenv("BHYVE_USE_MSI"))) { 299 if (strcasecmp(env_msi, "yes") == 0) 300 use_msix = 0; 301 } 302 if (vi_intr_init(&sc->vbsc_vs, 1, use_msix)) 303 return (1); 304 vi_set_io_bar(&sc->vbsc_vs, 0); 305 return (0); 306 } 307 308 static int 309 pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value) 310 { 311 312 DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); 313 return (1); 314 } 315 316 static int 317 pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval) 318 { 319 struct pci_vtblk_softc *sc = vsc; 320 void *ptr; 321 322 /* our caller has already verified offset and size */ 323 ptr = (uint8_t *)&sc->vbsc_cfg + offset; 324 memcpy(retval, ptr, size); 325 return (0); 326 } 327 328 struct pci_devemu pci_de_vblk = { 329 .pe_emu = "virtio-blk", 330 .pe_init = pci_vtblk_init, 331 .pe_barwrite = vi_pci_write, 332 .pe_barread = vi_pci_read 333 }; 334 PCI_EMUL_SET(pci_de_vblk); 335