1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* Driver for VirtIO block devices. */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/bio.h> 36 #include <sys/malloc.h> 37 #include <sys/module.h> 38 #include <sys/sglist.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/queue.h> 42 #include <sys/taskqueue.h> 43 44 #include <geom/geom_disk.h> 45 #include <vm/uma.h> 46 47 #include <machine/bus.h> 48 #include <machine/resource.h> 49 #include <sys/bus.h> 50 #include <sys/rman.h> 51 52 #include <dev/virtio/virtio.h> 53 #include <dev/virtio/virtqueue.h> 54 #include <dev/virtio/block/virtio_blk.h> 55 56 #include "virtio_if.h" 57 58 struct vtblk_request { 59 struct virtio_blk_outhdr vbr_hdr; 60 struct bio *vbr_bp; 61 uint8_t vbr_ack; 62 63 TAILQ_ENTRY(vtblk_request) vbr_link; 64 }; 65 66 struct vtblk_softc { 67 device_t vtblk_dev; 68 struct mtx vtblk_mtx; 69 uint64_t vtblk_features; 70 uint32_t vtblk_flags; 71 #define VTBLK_FLAG_INDIRECT 0x0001 72 #define VTBLK_FLAG_READONLY 0x0002 73 #define VTBLK_FLAG_DETACHING 0x0004 74 #define VTBLK_FLAG_SUSPENDED 0x0008 75 #define VTBLK_FLAG_DUMPING 0x0010 76 77 struct virtqueue *vtblk_vq; 78 struct sglist *vtblk_sglist; 79 struct disk *vtblk_disk; 80 81 struct bio_queue_head vtblk_bioq; 82 TAILQ_HEAD(, vtblk_request) 83 vtblk_req_free; 84 TAILQ_HEAD(, vtblk_request) 85 vtblk_req_ready; 86 87 struct taskqueue *vtblk_tq; 88 struct task vtblk_intr_task; 89 90 int vtblk_max_nsegs; 91 int vtblk_request_count; 92 93 struct vtblk_request vtblk_dump_request; 94 }; 95 96 static struct virtio_feature_desc vtblk_feature_desc[] = { 97 { VIRTIO_BLK_F_BARRIER, "HostBarrier" }, 98 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" }, 99 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" }, 100 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" }, 101 { VIRTIO_BLK_F_RO, "ReadOnly" }, 102 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" }, 103 { VIRTIO_BLK_F_SCSI, "SCSICmds" }, 104 { VIRTIO_BLK_F_FLUSH, "FlushCmd" }, 105 { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, 106 107 { 0, NULL } 108 }; 109 110 static int vtblk_modevent(module_t, int, void *); 111 112 static int vtblk_probe(device_t); 113 static int vtblk_attach(device_t); 114 static int vtblk_detach(device_t); 115 static int vtblk_suspend(device_t); 116 static int vtblk_resume(device_t); 117 static int vtblk_shutdown(device_t); 118 119 static void vtblk_negotiate_features(struct vtblk_softc *); 120 static int vtblk_maximum_segments(struct vtblk_softc *, 121 struct virtio_blk_config *); 122 static int vtblk_alloc_virtqueue(struct vtblk_softc *); 123 static void vtblk_alloc_disk(struct vtblk_softc *, 124 struct virtio_blk_config *); 125 static void vtblk_create_disk(struct vtblk_softc *); 126 127 static int vtblk_open(struct disk *); 128 static int vtblk_close(struct disk *); 129 static int vtblk_ioctl(struct disk *, u_long, void *, int, 130 struct thread *); 131 static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t); 132 static void vtblk_strategy(struct bio *); 133 134 static void vtblk_startio(struct vtblk_softc *); 135 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *); 136 static int vtblk_execute_request(struct vtblk_softc *, 137 struct vtblk_request *); 138 139 static int vtblk_vq_intr(void *); 140 static void vtblk_intr_task(void *, int); 141 142 static void vtblk_stop(struct vtblk_softc *); 143 144 static void vtblk_get_ident(struct vtblk_softc *); 145 static void vtblk_prepare_dump(struct vtblk_softc *); 146 static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t); 147 static int vtblk_flush_dump(struct vtblk_softc *); 148 static int vtblk_poll_request(struct vtblk_softc *, 149 struct vtblk_request *); 150 151 static void vtblk_drain_vq(struct vtblk_softc *, int); 152 static void vtblk_drain(struct vtblk_softc *); 153 154 static int vtblk_alloc_requests(struct vtblk_softc *); 155 static void vtblk_free_requests(struct vtblk_softc *); 156 static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *); 157 static void vtblk_enqueue_request(struct vtblk_softc *, 158 struct vtblk_request *); 159 160 static struct vtblk_request * vtblk_dequeue_ready(struct vtblk_softc *); 161 static void vtblk_enqueue_ready(struct vtblk_softc *, 162 struct vtblk_request *); 163 164 static void vtblk_bio_error(struct bio *, int); 165 166 /* Tunables. */ 167 static int vtblk_no_ident = 0; 168 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident); 169 170 /* Features desired/implemented by this driver. */ 171 #define VTBLK_FEATURES \ 172 (VIRTIO_BLK_F_BARRIER | \ 173 VIRTIO_BLK_F_SIZE_MAX | \ 174 VIRTIO_BLK_F_SEG_MAX | \ 175 VIRTIO_BLK_F_GEOMETRY | \ 176 VIRTIO_BLK_F_RO | \ 177 VIRTIO_BLK_F_BLK_SIZE | \ 178 VIRTIO_BLK_F_FLUSH | \ 179 VIRTIO_RING_F_INDIRECT_DESC) 180 181 #define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx 182 #define VTBLK_LOCK_INIT(_sc, _name) \ 183 mtx_init(VTBLK_MTX((_sc)), (_name), \ 184 "VTBLK Lock", MTX_DEF) 185 #define VTBLK_LOCK(_sc) mtx_lock(VTBLK_MTX((_sc))) 186 #define VTBLK_UNLOCK(_sc) mtx_unlock(VTBLK_MTX((_sc))) 187 #define VTBLK_LOCK_DESTROY(_sc) mtx_destroy(VTBLK_MTX((_sc))) 188 #define VTBLK_LOCK_ASSERT(_sc) mtx_assert(VTBLK_MTX((_sc)), MA_OWNED) 189 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \ 190 mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED) 191 192 #define VTBLK_BIO_SEGMENTS(_bp) sglist_count((_bp)->bio_data, (_bp)->bio_bcount) 193 194 #define VTBLK_DISK_NAME "vtbd" 195 196 /* 197 * Each block request uses at least two segments - one for the header 198 * and one for the status. 199 */ 200 #define VTBLK_MIN_SEGMENTS 2 201 202 static uma_zone_t vtblk_req_zone; 203 204 static device_method_t vtblk_methods[] = { 205 /* Device methods. */ 206 DEVMETHOD(device_probe, vtblk_probe), 207 DEVMETHOD(device_attach, vtblk_attach), 208 DEVMETHOD(device_detach, vtblk_detach), 209 DEVMETHOD(device_suspend, vtblk_suspend), 210 DEVMETHOD(device_resume, vtblk_resume), 211 DEVMETHOD(device_shutdown, vtblk_shutdown), 212 213 { 0, 0 } 214 }; 215 216 static driver_t vtblk_driver = { 217 "vtblk", 218 vtblk_methods, 219 sizeof(struct vtblk_softc) 220 }; 221 static devclass_t vtblk_devclass; 222 223 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, 224 vtblk_modevent, 0); 225 MODULE_VERSION(virtio_blk, 1); 226 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1); 227 228 static int 229 vtblk_modevent(module_t mod, int type, void *unused) 230 { 231 int error; 232 233 error = 0; 234 235 switch (type) { 236 case MOD_LOAD: 237 vtblk_req_zone = uma_zcreate("vtblk_request", 238 sizeof(struct vtblk_request), 239 NULL, NULL, NULL, NULL, 0, 0); 240 break; 241 case MOD_QUIESCE: 242 case MOD_UNLOAD: 243 if (uma_zone_get_cur(vtblk_req_zone) > 0) 244 error = EBUSY; 245 else if (type == MOD_UNLOAD) { 246 uma_zdestroy(vtblk_req_zone); 247 vtblk_req_zone = NULL; 248 } 249 break; 250 case MOD_SHUTDOWN: 251 break; 252 default: 253 error = EOPNOTSUPP; 254 break; 255 } 256 257 return (error); 258 } 259 260 static int 261 vtblk_probe(device_t dev) 262 { 263 264 if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK) 265 return (ENXIO); 266 267 device_set_desc(dev, "VirtIO Block Adapter"); 268 269 return (BUS_PROBE_DEFAULT); 270 } 271 272 static int 273 vtblk_attach(device_t dev) 274 { 275 struct vtblk_softc *sc; 276 struct virtio_blk_config blkcfg; 277 int error; 278 279 sc = device_get_softc(dev); 280 sc->vtblk_dev = dev; 281 282 VTBLK_LOCK_INIT(sc, device_get_nameunit(dev)); 283 284 bioq_init(&sc->vtblk_bioq); 285 TAILQ_INIT(&sc->vtblk_req_free); 286 TAILQ_INIT(&sc->vtblk_req_ready); 287 288 virtio_set_feature_desc(dev, vtblk_feature_desc); 289 vtblk_negotiate_features(sc); 290 291 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 292 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT; 293 294 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) 295 sc->vtblk_flags |= VTBLK_FLAG_READONLY; 296 297 /* Get local copy of config. */ 298 virtio_read_device_config(dev, 0, &blkcfg, 299 sizeof(struct virtio_blk_config)); 300 301 /* 302 * With the current sglist(9) implementation, it is not easy 303 * for us to support a maximum segment size as adjacent 304 * segments are coalesced. For now, just make sure it's larger 305 * than the maximum supported transfer size. 306 */ 307 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { 308 if (blkcfg.size_max < MAXPHYS) { 309 error = ENOTSUP; 310 device_printf(dev, "host requires unsupported " 311 "maximum segment size feature\n"); 312 goto fail; 313 } 314 } 315 316 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); 317 318 /* 319 * Allocate working sglist. The number of segments may be too 320 * large to safely store on the stack. 321 */ 322 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT); 323 if (sc->vtblk_sglist == NULL) { 324 error = ENOMEM; 325 device_printf(dev, "cannot allocate sglist\n"); 326 goto fail; 327 } 328 329 error = vtblk_alloc_virtqueue(sc); 330 if (error) { 331 device_printf(dev, "cannot allocate virtqueue\n"); 332 goto fail; 333 } 334 335 error = vtblk_alloc_requests(sc); 336 if (error) { 337 device_printf(dev, "cannot preallocate requests\n"); 338 goto fail; 339 } 340 341 vtblk_alloc_disk(sc, &blkcfg); 342 343 TASK_INIT(&sc->vtblk_intr_task, 0, vtblk_intr_task, sc); 344 sc->vtblk_tq = taskqueue_create_fast("vtblk_taskq", M_NOWAIT, 345 taskqueue_thread_enqueue, &sc->vtblk_tq); 346 if (sc->vtblk_tq == NULL) { 347 error = ENOMEM; 348 device_printf(dev, "cannot allocate taskqueue\n"); 349 goto fail; 350 } 351 taskqueue_start_threads(&sc->vtblk_tq, 1, PI_DISK, "%s taskq", 352 device_get_nameunit(dev)); 353 354 error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY); 355 if (error) { 356 device_printf(dev, "cannot setup virtqueue interrupt\n"); 357 goto fail; 358 } 359 360 vtblk_create_disk(sc); 361 362 virtqueue_enable_intr(sc->vtblk_vq); 363 364 fail: 365 if (error) 366 vtblk_detach(dev); 367 368 return (error); 369 } 370 371 static int 372 vtblk_detach(device_t dev) 373 { 374 struct vtblk_softc *sc; 375 376 sc = device_get_softc(dev); 377 378 VTBLK_LOCK(sc); 379 sc->vtblk_flags |= VTBLK_FLAG_DETACHING; 380 if (device_is_attached(dev)) 381 vtblk_stop(sc); 382 VTBLK_UNLOCK(sc); 383 384 if (sc->vtblk_tq != NULL) { 385 taskqueue_drain(sc->vtblk_tq, &sc->vtblk_intr_task); 386 taskqueue_free(sc->vtblk_tq); 387 sc->vtblk_tq = NULL; 388 } 389 390 vtblk_drain(sc); 391 392 if (sc->vtblk_disk != NULL) { 393 disk_destroy(sc->vtblk_disk); 394 sc->vtblk_disk = NULL; 395 } 396 397 if (sc->vtblk_sglist != NULL) { 398 sglist_free(sc->vtblk_sglist); 399 sc->vtblk_sglist = NULL; 400 } 401 402 VTBLK_LOCK_DESTROY(sc); 403 404 return (0); 405 } 406 407 static int 408 vtblk_suspend(device_t dev) 409 { 410 struct vtblk_softc *sc; 411 412 sc = device_get_softc(dev); 413 414 VTBLK_LOCK(sc); 415 sc->vtblk_flags |= VTBLK_FLAG_SUSPENDED; 416 /* TODO Wait for any inflight IO to complete? */ 417 VTBLK_UNLOCK(sc); 418 419 return (0); 420 } 421 422 static int 423 vtblk_resume(device_t dev) 424 { 425 struct vtblk_softc *sc; 426 427 sc = device_get_softc(dev); 428 429 VTBLK_LOCK(sc); 430 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPENDED; 431 /* TODO Resume IO? */ 432 VTBLK_UNLOCK(sc); 433 434 return (0); 435 } 436 437 static int 438 vtblk_shutdown(device_t dev) 439 { 440 441 return (0); 442 } 443 444 static int 445 vtblk_open(struct disk *dp) 446 { 447 struct vtblk_softc *sc; 448 449 if ((sc = dp->d_drv1) == NULL) 450 return (ENXIO); 451 452 return (sc->vtblk_flags & VTBLK_FLAG_DETACHING ? ENXIO : 0); 453 } 454 455 static int 456 vtblk_close(struct disk *dp) 457 { 458 struct vtblk_softc *sc; 459 460 if ((sc = dp->d_drv1) == NULL) 461 return (ENXIO); 462 463 return (0); 464 } 465 466 static int 467 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, 468 struct thread *td) 469 { 470 struct vtblk_softc *sc; 471 472 if ((sc = dp->d_drv1) == NULL) 473 return (ENXIO); 474 475 return (ENOTTY); 476 } 477 478 static int 479 vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, 480 size_t length) 481 { 482 struct disk *dp; 483 struct vtblk_softc *sc; 484 int error; 485 486 dp = arg; 487 error = 0; 488 489 if ((sc = dp->d_drv1) == NULL) 490 return (ENXIO); 491 492 if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) { 493 vtblk_prepare_dump(sc); 494 sc->vtblk_flags |= VTBLK_FLAG_DUMPING; 495 } 496 497 if (length > 0) 498 error = vtblk_write_dump(sc, virtual, offset, length); 499 else if (virtual == NULL && offset == 0) 500 error = vtblk_flush_dump(sc); 501 502 VTBLK_UNLOCK(sc); 503 504 return (error); 505 } 506 507 static void 508 vtblk_strategy(struct bio *bp) 509 { 510 struct vtblk_softc *sc; 511 512 if ((sc = bp->bio_disk->d_drv1) == NULL) { 513 vtblk_bio_error(bp, EINVAL); 514 return; 515 } 516 517 /* 518 * Fail any write if RO. Unfortunately, there does not seem to 519 * be a better way to report our readonly'ness to GEOM above. 520 */ 521 if (sc->vtblk_flags & VTBLK_FLAG_READONLY && 522 (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) { 523 vtblk_bio_error(bp, EROFS); 524 return; 525 } 526 527 /* 528 * Prevent read/write buffers spanning too many segments from 529 * getting into the queue. This should only trip if d_maxsize 530 * was incorrectly set. 531 */ 532 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { 533 KASSERT(VTBLK_BIO_SEGMENTS(bp) <= sc->vtblk_max_nsegs - 534 VTBLK_MIN_SEGMENTS, 535 ("bio spanned too many segments: %d, max: %d", 536 VTBLK_BIO_SEGMENTS(bp), 537 sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS)); 538 } 539 540 VTBLK_LOCK(sc); 541 if ((sc->vtblk_flags & VTBLK_FLAG_DETACHING) == 0) { 542 bioq_disksort(&sc->vtblk_bioq, bp); 543 vtblk_startio(sc); 544 } else 545 vtblk_bio_error(bp, ENXIO); 546 VTBLK_UNLOCK(sc); 547 } 548 549 static void 550 vtblk_negotiate_features(struct vtblk_softc *sc) 551 { 552 device_t dev; 553 uint64_t features; 554 555 dev = sc->vtblk_dev; 556 features = VTBLK_FEATURES; 557 558 sc->vtblk_features = virtio_negotiate_features(dev, features); 559 } 560 561 static int 562 vtblk_maximum_segments(struct vtblk_softc *sc, 563 struct virtio_blk_config *blkcfg) 564 { 565 device_t dev; 566 int nsegs; 567 568 dev = sc->vtblk_dev; 569 nsegs = VTBLK_MIN_SEGMENTS; 570 571 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) { 572 nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1); 573 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT) 574 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT); 575 } else 576 nsegs += 1; 577 578 return (nsegs); 579 } 580 581 static int 582 vtblk_alloc_virtqueue(struct vtblk_softc *sc) 583 { 584 device_t dev; 585 struct vq_alloc_info vq_info; 586 587 dev = sc->vtblk_dev; 588 589 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs, 590 vtblk_vq_intr, sc, &sc->vtblk_vq, 591 "%s request", device_get_nameunit(dev)); 592 593 return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info)); 594 } 595 596 static void 597 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) 598 { 599 device_t dev; 600 struct disk *dp; 601 602 dev = sc->vtblk_dev; 603 604 sc->vtblk_disk = dp = disk_alloc(); 605 dp->d_open = vtblk_open; 606 dp->d_close = vtblk_close; 607 dp->d_ioctl = vtblk_ioctl; 608 dp->d_strategy = vtblk_strategy; 609 dp->d_name = VTBLK_DISK_NAME; 610 dp->d_unit = device_get_unit(dev); 611 dp->d_drv1 = sc; 612 613 if ((sc->vtblk_flags & VTBLK_FLAG_READONLY) == 0) 614 dp->d_dump = vtblk_dump; 615 616 /* Capacity is always in 512-byte units. */ 617 dp->d_mediasize = blkcfg->capacity * 512; 618 619 if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE)) 620 dp->d_sectorsize = blkcfg->blk_size; 621 else 622 dp->d_sectorsize = 512; 623 624 /* 625 * The VirtIO maximum I/O size is given in terms of segments. 626 * However, FreeBSD limits I/O size by logical buffer size, not 627 * by physically contiguous pages. Therefore, we have to assume 628 * no pages are contiguous. This may impose an artificially low 629 * maximum I/O size. But in practice, since QEMU advertises 128 630 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE, 631 * which is typically greater than MAXPHYS. Eventually we should 632 * just advertise MAXPHYS and split buffers that are too big. 633 * 634 * Note we must subtract one additional segment in case of non 635 * page aligned buffers. 636 */ 637 dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) * 638 PAGE_SIZE; 639 if (dp->d_maxsize < PAGE_SIZE) 640 dp->d_maxsize = PAGE_SIZE; /* XXX */ 641 642 if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) { 643 dp->d_fwsectors = blkcfg->geometry.sectors; 644 dp->d_fwheads = blkcfg->geometry.heads; 645 } 646 647 if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH)) 648 dp->d_flags |= DISKFLAG_CANFLUSHCACHE; 649 } 650 651 static void 652 vtblk_create_disk(struct vtblk_softc *sc) 653 { 654 struct disk *dp; 655 656 dp = sc->vtblk_disk; 657 658 /* 659 * Retrieving the identification string must be done after 660 * the virtqueue interrupt is setup otherwise it will hang. 661 */ 662 vtblk_get_ident(sc); 663 664 device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n", 665 (uintmax_t) dp->d_mediasize >> 20, 666 (uintmax_t) dp->d_mediasize / dp->d_sectorsize, 667 dp->d_sectorsize); 668 669 disk_create(dp, DISK_VERSION); 670 } 671 672 static void 673 vtblk_startio(struct vtblk_softc *sc) 674 { 675 struct virtqueue *vq; 676 struct vtblk_request *req; 677 int enq; 678 679 vq = sc->vtblk_vq; 680 enq = 0; 681 682 VTBLK_LOCK_ASSERT(sc); 683 684 if (sc->vtblk_flags & VTBLK_FLAG_SUSPENDED) 685 return; 686 687 while (!virtqueue_full(vq)) { 688 if ((req = vtblk_dequeue_ready(sc)) == NULL) 689 req = vtblk_bio_request(sc); 690 if (req == NULL) 691 break; 692 693 if (vtblk_execute_request(sc, req) != 0) { 694 vtblk_enqueue_ready(sc, req); 695 break; 696 } 697 698 enq++; 699 } 700 701 if (enq > 0) 702 virtqueue_notify(vq); 703 } 704 705 static struct vtblk_request * 706 vtblk_bio_request(struct vtblk_softc *sc) 707 { 708 struct bio_queue_head *bioq; 709 struct vtblk_request *req; 710 struct bio *bp; 711 712 bioq = &sc->vtblk_bioq; 713 714 if (bioq_first(bioq) == NULL) 715 return (NULL); 716 717 req = vtblk_dequeue_request(sc); 718 if (req == NULL) 719 return (NULL); 720 721 bp = bioq_takefirst(bioq); 722 req->vbr_bp = bp; 723 req->vbr_ack = -1; 724 req->vbr_hdr.ioprio = 1; 725 726 switch (bp->bio_cmd) { 727 case BIO_FLUSH: 728 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 729 break; 730 case BIO_READ: 731 req->vbr_hdr.type = VIRTIO_BLK_T_IN; 732 req->vbr_hdr.sector = bp->bio_offset / 512; 733 break; 734 case BIO_WRITE: 735 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 736 req->vbr_hdr.sector = bp->bio_offset / 512; 737 break; 738 default: 739 KASSERT(0, ("bio with unhandled cmd: %d", bp->bio_cmd)); 740 req->vbr_hdr.type = -1; 741 break; 742 } 743 744 if (bp->bio_flags & BIO_ORDERED) 745 req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER; 746 747 return (req); 748 } 749 750 static int 751 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) 752 { 753 struct sglist *sg; 754 struct bio *bp; 755 int writable, error; 756 757 sg = sc->vtblk_sglist; 758 bp = req->vbr_bp; 759 writable = 0; 760 761 VTBLK_LOCK_ASSERT(sc); 762 763 sglist_reset(sg); 764 error = sglist_append(sg, &req->vbr_hdr, 765 sizeof(struct virtio_blk_outhdr)); 766 KASSERT(error == 0, ("error adding header to sglist")); 767 KASSERT(sg->sg_nseg == 1, 768 ("header spanned multiple segments: %d", sg->sg_nseg)); 769 770 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { 771 error = sglist_append(sg, bp->bio_data, bp->bio_bcount); 772 KASSERT(error == 0, ("error adding buffer to sglist")); 773 774 /* BIO_READ means the host writes into our buffer. */ 775 if (bp->bio_cmd == BIO_READ) 776 writable += sg->sg_nseg - 1; 777 } 778 779 error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); 780 KASSERT(error == 0, ("error adding ack to sglist")); 781 writable++; 782 783 KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS, 784 ("fewer than min segments: %d", sg->sg_nseg)); 785 786 error = virtqueue_enqueue(sc->vtblk_vq, req, sg, 787 sg->sg_nseg - writable, writable); 788 789 return (error); 790 } 791 792 static int 793 vtblk_vq_intr(void *xsc) 794 { 795 struct vtblk_softc *sc; 796 797 sc = xsc; 798 799 virtqueue_disable_intr(sc->vtblk_vq); 800 taskqueue_enqueue_fast(sc->vtblk_tq, &sc->vtblk_intr_task); 801 802 return (1); 803 } 804 805 static void 806 vtblk_intr_task(void *arg, int pending) 807 { 808 struct vtblk_softc *sc; 809 struct vtblk_request *req; 810 struct virtqueue *vq; 811 struct bio *bp; 812 813 sc = arg; 814 vq = sc->vtblk_vq; 815 816 VTBLK_LOCK(sc); 817 if (sc->vtblk_flags & VTBLK_FLAG_DETACHING) { 818 VTBLK_UNLOCK(sc); 819 return; 820 } 821 822 while ((req = virtqueue_dequeue(vq, NULL)) != NULL) { 823 bp = req->vbr_bp; 824 825 if (req->vbr_ack == VIRTIO_BLK_S_OK) 826 bp->bio_resid = 0; 827 else { 828 bp->bio_flags |= BIO_ERROR; 829 if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) 830 bp->bio_error = ENOTSUP; 831 else 832 bp->bio_error = EIO; 833 } 834 835 biodone(bp); 836 vtblk_enqueue_request(sc, req); 837 } 838 839 vtblk_startio(sc); 840 841 if (virtqueue_enable_intr(vq) != 0) { 842 virtqueue_disable_intr(vq); 843 VTBLK_UNLOCK(sc); 844 taskqueue_enqueue_fast(sc->vtblk_tq, 845 &sc->vtblk_intr_task); 846 return; 847 } 848 849 VTBLK_UNLOCK(sc); 850 } 851 852 static void 853 vtblk_stop(struct vtblk_softc *sc) 854 { 855 856 virtqueue_disable_intr(sc->vtblk_vq); 857 virtio_stop(sc->vtblk_dev); 858 } 859 860 static void 861 vtblk_get_ident(struct vtblk_softc *sc) 862 { 863 struct bio buf; 864 struct disk *dp; 865 struct vtblk_request *req; 866 int len, error; 867 868 dp = sc->vtblk_disk; 869 len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE); 870 871 if (vtblk_no_ident != 0) 872 return; 873 874 req = vtblk_dequeue_request(sc); 875 if (req == NULL) 876 return; 877 878 req->vbr_ack = -1; 879 req->vbr_hdr.type = VIRTIO_BLK_T_GET_ID; 880 req->vbr_hdr.ioprio = 1; 881 req->vbr_hdr.sector = 0; 882 883 req->vbr_bp = &buf; 884 bzero(&buf, sizeof(struct bio)); 885 886 buf.bio_cmd = BIO_READ; 887 buf.bio_data = dp->d_ident; 888 buf.bio_bcount = len; 889 890 VTBLK_LOCK(sc); 891 error = vtblk_poll_request(sc, req); 892 VTBLK_UNLOCK(sc); 893 894 vtblk_enqueue_request(sc, req); 895 896 if (error) { 897 device_printf(sc->vtblk_dev, 898 "error getting device identifier: %d\n", error); 899 } 900 } 901 902 static void 903 vtblk_prepare_dump(struct vtblk_softc *sc) 904 { 905 device_t dev; 906 struct virtqueue *vq; 907 908 dev = sc->vtblk_dev; 909 vq = sc->vtblk_vq; 910 911 vtblk_stop(sc); 912 913 /* 914 * Drain all requests caught in-flight in the virtqueue, 915 * skipping biodone(). When dumping, only one request is 916 * outstanding at a time, and we just poll the virtqueue 917 * for the response. 918 */ 919 vtblk_drain_vq(sc, 1); 920 921 if (virtio_reinit(dev, sc->vtblk_features) != 0) 922 panic("cannot reinit VirtIO block device during dump"); 923 924 virtqueue_disable_intr(vq); 925 virtio_reinit_complete(dev); 926 } 927 928 static int 929 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset, 930 size_t length) 931 { 932 struct bio buf; 933 struct vtblk_request *req; 934 935 req = &sc->vtblk_dump_request; 936 req->vbr_ack = -1; 937 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 938 req->vbr_hdr.ioprio = 1; 939 req->vbr_hdr.sector = offset / 512; 940 941 req->vbr_bp = &buf; 942 bzero(&buf, sizeof(struct bio)); 943 944 buf.bio_cmd = BIO_WRITE; 945 buf.bio_data = virtual; 946 buf.bio_bcount = length; 947 948 return (vtblk_poll_request(sc, req)); 949 } 950 951 static int 952 vtblk_flush_dump(struct vtblk_softc *sc) 953 { 954 struct bio buf; 955 struct vtblk_request *req; 956 957 req = &sc->vtblk_dump_request; 958 req->vbr_ack = -1; 959 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 960 req->vbr_hdr.ioprio = 1; 961 req->vbr_hdr.sector = 0; 962 963 req->vbr_bp = &buf; 964 bzero(&buf, sizeof(struct bio)); 965 966 buf.bio_cmd = BIO_FLUSH; 967 968 return (vtblk_poll_request(sc, req)); 969 } 970 971 static int 972 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) 973 { 974 device_t dev; 975 struct virtqueue *vq; 976 struct vtblk_request *r; 977 int error; 978 979 dev = sc->vtblk_dev; 980 vq = sc->vtblk_vq; 981 982 if (!virtqueue_empty(vq)) 983 return (EBUSY); 984 985 error = vtblk_execute_request(sc, req); 986 if (error) 987 return (error); 988 989 virtqueue_notify(vq); 990 991 r = virtqueue_poll(vq, NULL); 992 KASSERT(r == req, ("unexpected request response")); 993 994 if (req->vbr_ack != VIRTIO_BLK_S_OK) { 995 error = req->vbr_ack == VIRTIO_BLK_S_UNSUPP ? ENOTSUP : EIO; 996 if (bootverbose) 997 device_printf(dev, 998 "vtblk_poll_request: IO error: %d\n", error); 999 } 1000 1001 return (error); 1002 } 1003 1004 static void 1005 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) 1006 { 1007 struct virtqueue *vq; 1008 struct vtblk_request *req; 1009 int last; 1010 1011 vq = sc->vtblk_vq; 1012 last = 0; 1013 1014 while ((req = virtqueue_drain(vq, &last)) != NULL) { 1015 if (!skip_done) 1016 vtblk_bio_error(req->vbr_bp, ENXIO); 1017 1018 vtblk_enqueue_request(sc, req); 1019 } 1020 1021 KASSERT(virtqueue_empty(vq), ("virtqueue not empty")); 1022 } 1023 1024 static void 1025 vtblk_drain(struct vtblk_softc *sc) 1026 { 1027 struct bio_queue_head *bioq; 1028 struct vtblk_request *req; 1029 struct bio *bp; 1030 1031 bioq = &sc->vtblk_bioq; 1032 1033 if (sc->vtblk_vq != NULL) 1034 vtblk_drain_vq(sc, 0); 1035 1036 while ((req = vtblk_dequeue_ready(sc)) != NULL) { 1037 vtblk_bio_error(req->vbr_bp, ENXIO); 1038 vtblk_enqueue_request(sc, req); 1039 } 1040 1041 while (bioq_first(bioq) != NULL) { 1042 bp = bioq_takefirst(bioq); 1043 vtblk_bio_error(bp, ENXIO); 1044 } 1045 1046 vtblk_free_requests(sc); 1047 } 1048 1049 static int 1050 vtblk_alloc_requests(struct vtblk_softc *sc) 1051 { 1052 struct vtblk_request *req; 1053 int i, size; 1054 1055 size = virtqueue_size(sc->vtblk_vq); 1056 1057 /* 1058 * Preallocate sufficient requests to keep the virtqueue full. Each 1059 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce 1060 * the number allocated when indirect descriptors are not available. 1061 */ 1062 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0) 1063 size /= VTBLK_MIN_SEGMENTS; 1064 1065 for (i = 0; i < size; i++) { 1066 req = uma_zalloc(vtblk_req_zone, M_NOWAIT); 1067 if (req == NULL) 1068 return (ENOMEM); 1069 1070 sc->vtblk_request_count++; 1071 vtblk_enqueue_request(sc, req); 1072 } 1073 1074 return (0); 1075 } 1076 1077 static void 1078 vtblk_free_requests(struct vtblk_softc *sc) 1079 { 1080 struct vtblk_request *req; 1081 1082 while ((req = vtblk_dequeue_request(sc)) != NULL) { 1083 sc->vtblk_request_count--; 1084 uma_zfree(vtblk_req_zone, req); 1085 } 1086 1087 KASSERT(sc->vtblk_request_count == 0, ("leaked requests")); 1088 } 1089 1090 static struct vtblk_request * 1091 vtblk_dequeue_request(struct vtblk_softc *sc) 1092 { 1093 struct vtblk_request *req; 1094 1095 req = TAILQ_FIRST(&sc->vtblk_req_free); 1096 if (req != NULL) 1097 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link); 1098 1099 return (req); 1100 } 1101 1102 static void 1103 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req) 1104 { 1105 1106 bzero(req, sizeof(struct vtblk_request)); 1107 TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link); 1108 } 1109 1110 static struct vtblk_request * 1111 vtblk_dequeue_ready(struct vtblk_softc *sc) 1112 { 1113 struct vtblk_request *req; 1114 1115 req = TAILQ_FIRST(&sc->vtblk_req_ready); 1116 if (req != NULL) 1117 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link); 1118 1119 return (req); 1120 } 1121 1122 static void 1123 vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req) 1124 { 1125 1126 TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link); 1127 } 1128 1129 static void 1130 vtblk_bio_error(struct bio *bp, int error) 1131 { 1132 1133 biofinish(bp, NULL, error); 1134 } 1135