1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* Driver for VirtIO block devices. */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/bio.h> 36 #include <sys/malloc.h> 37 #include <sys/module.h> 38 #include <sys/sglist.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/queue.h> 42 #include <sys/taskqueue.h> 43 44 #include <geom/geom_disk.h> 45 #include <vm/uma.h> 46 47 #include <machine/bus.h> 48 #include <machine/resource.h> 49 #include <sys/bus.h> 50 #include <sys/rman.h> 51 52 #include <dev/virtio/virtio.h> 53 #include <dev/virtio/virtqueue.h> 54 #include <dev/virtio/block/virtio_blk.h> 55 56 #include "virtio_if.h" 57 58 struct vtblk_request { 59 struct virtio_blk_outhdr vbr_hdr; 60 struct bio *vbr_bp; 61 uint8_t vbr_ack; 62 63 TAILQ_ENTRY(vtblk_request) vbr_link; 64 }; 65 66 struct vtblk_softc { 67 device_t vtblk_dev; 68 struct mtx vtblk_mtx; 69 uint64_t vtblk_features; 70 uint32_t vtblk_flags; 71 #define VTBLK_FLAG_INDIRECT 0x0001 72 #define VTBLK_FLAG_READONLY 0x0002 73 #define VTBLK_FLAG_DETACH 0x0004 74 #define VTBLK_FLAG_SUSPEND 0x0008 75 #define VTBLK_FLAG_DUMPING 0x0010 76 77 struct virtqueue *vtblk_vq; 78 struct sglist *vtblk_sglist; 79 struct disk *vtblk_disk; 80 81 struct bio_queue_head vtblk_bioq; 82 TAILQ_HEAD(, vtblk_request) 83 vtblk_req_free; 84 TAILQ_HEAD(, vtblk_request) 85 vtblk_req_ready; 86 87 struct taskqueue *vtblk_tq; 88 struct task vtblk_intr_task; 89 90 int vtblk_max_nsegs; 91 int vtblk_request_count; 92 93 struct vtblk_request vtblk_dump_request; 94 }; 95 96 static struct virtio_feature_desc vtblk_feature_desc[] = { 97 { VIRTIO_BLK_F_BARRIER, "HostBarrier" }, 98 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" }, 99 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" }, 100 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" }, 101 { VIRTIO_BLK_F_RO, "ReadOnly" }, 102 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" }, 103 { VIRTIO_BLK_F_SCSI, "SCSICmds" }, 104 { VIRTIO_BLK_F_FLUSH, "FlushCmd" }, 105 { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, 106 107 { 0, NULL } 108 }; 109 110 static int vtblk_modevent(module_t, int, void *); 111 112 static int vtblk_probe(device_t); 113 static int vtblk_attach(device_t); 114 static int vtblk_detach(device_t); 115 static int vtblk_suspend(device_t); 116 static int vtblk_resume(device_t); 117 static int vtblk_shutdown(device_t); 118 119 static int vtblk_open(struct disk *); 120 static int vtblk_close(struct disk *); 121 static int vtblk_ioctl(struct disk *, u_long, void *, int, 122 struct thread *); 123 static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t); 124 static void vtblk_strategy(struct bio *); 125 126 static void vtblk_negotiate_features(struct vtblk_softc *); 127 static int vtblk_maximum_segments(struct vtblk_softc *, 128 struct virtio_blk_config *); 129 static int vtblk_alloc_virtqueue(struct vtblk_softc *); 130 static void vtblk_alloc_disk(struct vtblk_softc *, 131 struct virtio_blk_config *); 132 static void vtblk_create_disk(struct vtblk_softc *); 133 134 static int vtblk_quiesce(struct vtblk_softc *); 135 static void vtblk_startio(struct vtblk_softc *); 136 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *); 137 static int vtblk_execute_request(struct vtblk_softc *, 138 struct vtblk_request *); 139 140 static int vtblk_vq_intr(void *); 141 static void vtblk_intr_task(void *, int); 142 143 static void vtblk_stop(struct vtblk_softc *); 144 145 static void vtblk_get_ident(struct vtblk_softc *); 146 static void vtblk_prepare_dump(struct vtblk_softc *); 147 static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t); 148 static int vtblk_flush_dump(struct vtblk_softc *); 149 static int vtblk_poll_request(struct vtblk_softc *, 150 struct vtblk_request *); 151 152 static void vtblk_finish_completed(struct vtblk_softc *); 153 static void vtblk_drain_vq(struct vtblk_softc *, int); 154 static void vtblk_drain(struct vtblk_softc *); 155 156 static int vtblk_alloc_requests(struct vtblk_softc *); 157 static void vtblk_free_requests(struct vtblk_softc *); 158 static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *); 159 static void vtblk_enqueue_request(struct vtblk_softc *, 160 struct vtblk_request *); 161 162 static struct vtblk_request * vtblk_dequeue_ready(struct vtblk_softc *); 163 static void vtblk_enqueue_ready(struct vtblk_softc *, 164 struct vtblk_request *); 165 166 static int vtblk_request_error(struct vtblk_request *); 167 static void vtblk_finish_bio(struct bio *, int); 168 169 /* Tunables. */ 170 static int vtblk_no_ident = 0; 171 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident); 172 173 /* Features desired/implemented by this driver. */ 174 #define VTBLK_FEATURES \ 175 (VIRTIO_BLK_F_BARRIER | \ 176 VIRTIO_BLK_F_SIZE_MAX | \ 177 VIRTIO_BLK_F_SEG_MAX | \ 178 VIRTIO_BLK_F_GEOMETRY | \ 179 VIRTIO_BLK_F_RO | \ 180 VIRTIO_BLK_F_BLK_SIZE | \ 181 VIRTIO_BLK_F_FLUSH | \ 182 VIRTIO_RING_F_INDIRECT_DESC) 183 184 #define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx 185 #define VTBLK_LOCK_INIT(_sc, _name) \ 186 mtx_init(VTBLK_MTX((_sc)), (_name), \ 187 "VTBLK Lock", MTX_DEF) 188 #define VTBLK_LOCK(_sc) mtx_lock(VTBLK_MTX((_sc))) 189 #define VTBLK_UNLOCK(_sc) mtx_unlock(VTBLK_MTX((_sc))) 190 #define VTBLK_LOCK_DESTROY(_sc) mtx_destroy(VTBLK_MTX((_sc))) 191 #define VTBLK_LOCK_ASSERT(_sc) mtx_assert(VTBLK_MTX((_sc)), MA_OWNED) 192 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \ 193 mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED) 194 195 #define VTBLK_DISK_NAME "vtbd" 196 #define VTBLK_QUIESCE_TIMEOUT (30 * hz) 197 198 /* 199 * Each block request uses at least two segments - one for the header 200 * and one for the status. 201 */ 202 #define VTBLK_MIN_SEGMENTS 2 203 204 static uma_zone_t vtblk_req_zone; 205 206 static device_method_t vtblk_methods[] = { 207 /* Device methods. */ 208 DEVMETHOD(device_probe, vtblk_probe), 209 DEVMETHOD(device_attach, vtblk_attach), 210 DEVMETHOD(device_detach, vtblk_detach), 211 DEVMETHOD(device_suspend, vtblk_suspend), 212 DEVMETHOD(device_resume, vtblk_resume), 213 DEVMETHOD(device_shutdown, vtblk_shutdown), 214 215 DEVMETHOD_END 216 }; 217 218 static driver_t vtblk_driver = { 219 "vtblk", 220 vtblk_methods, 221 sizeof(struct vtblk_softc) 222 }; 223 static devclass_t vtblk_devclass; 224 225 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, 226 vtblk_modevent, 0); 227 MODULE_VERSION(virtio_blk, 1); 228 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1); 229 230 static int 231 vtblk_modevent(module_t mod, int type, void *unused) 232 { 233 int error; 234 235 error = 0; 236 237 switch (type) { 238 case MOD_LOAD: 239 vtblk_req_zone = uma_zcreate("vtblk_request", 240 sizeof(struct vtblk_request), 241 NULL, NULL, NULL, NULL, 0, 0); 242 break; 243 case MOD_QUIESCE: 244 case MOD_UNLOAD: 245 if (uma_zone_get_cur(vtblk_req_zone) > 0) 246 error = EBUSY; 247 else if (type == MOD_UNLOAD) { 248 uma_zdestroy(vtblk_req_zone); 249 vtblk_req_zone = NULL; 250 } 251 break; 252 case MOD_SHUTDOWN: 253 break; 254 default: 255 error = EOPNOTSUPP; 256 break; 257 } 258 259 return (error); 260 } 261 262 static int 263 vtblk_probe(device_t dev) 264 { 265 266 if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK) 267 return (ENXIO); 268 269 device_set_desc(dev, "VirtIO Block Adapter"); 270 271 return (BUS_PROBE_DEFAULT); 272 } 273 274 static int 275 vtblk_attach(device_t dev) 276 { 277 struct vtblk_softc *sc; 278 struct virtio_blk_config blkcfg; 279 int error; 280 281 sc = device_get_softc(dev); 282 sc->vtblk_dev = dev; 283 284 VTBLK_LOCK_INIT(sc, device_get_nameunit(dev)); 285 286 bioq_init(&sc->vtblk_bioq); 287 TAILQ_INIT(&sc->vtblk_req_free); 288 TAILQ_INIT(&sc->vtblk_req_ready); 289 290 virtio_set_feature_desc(dev, vtblk_feature_desc); 291 vtblk_negotiate_features(sc); 292 293 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 294 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT; 295 296 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) 297 sc->vtblk_flags |= VTBLK_FLAG_READONLY; 298 299 /* Get local copy of config. */ 300 virtio_read_device_config(dev, 0, &blkcfg, 301 sizeof(struct virtio_blk_config)); 302 303 /* 304 * With the current sglist(9) implementation, it is not easy 305 * for us to support a maximum segment size as adjacent 306 * segments are coalesced. For now, just make sure it's larger 307 * than the maximum supported transfer size. 308 */ 309 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { 310 if (blkcfg.size_max < MAXPHYS) { 311 error = ENOTSUP; 312 device_printf(dev, "host requires unsupported " 313 "maximum segment size feature\n"); 314 goto fail; 315 } 316 } 317 318 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); 319 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { 320 error = EINVAL; 321 device_printf(dev, "fewer than minimum number of segments " 322 "allowed: %d\n", sc->vtblk_max_nsegs); 323 goto fail; 324 } 325 326 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT); 327 if (sc->vtblk_sglist == NULL) { 328 error = ENOMEM; 329 device_printf(dev, "cannot allocate sglist\n"); 330 goto fail; 331 } 332 333 error = vtblk_alloc_virtqueue(sc); 334 if (error) { 335 device_printf(dev, "cannot allocate virtqueue\n"); 336 goto fail; 337 } 338 339 error = vtblk_alloc_requests(sc); 340 if (error) { 341 device_printf(dev, "cannot preallocate requests\n"); 342 goto fail; 343 } 344 345 vtblk_alloc_disk(sc, &blkcfg); 346 347 TASK_INIT(&sc->vtblk_intr_task, 0, vtblk_intr_task, sc); 348 sc->vtblk_tq = taskqueue_create_fast("vtblk_taskq", M_NOWAIT, 349 taskqueue_thread_enqueue, &sc->vtblk_tq); 350 if (sc->vtblk_tq == NULL) { 351 error = ENOMEM; 352 device_printf(dev, "cannot allocate taskqueue\n"); 353 goto fail; 354 } 355 taskqueue_start_threads(&sc->vtblk_tq, 1, PI_DISK, "%s taskq", 356 device_get_nameunit(dev)); 357 358 error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY); 359 if (error) { 360 device_printf(dev, "cannot setup virtqueue interrupt\n"); 361 goto fail; 362 } 363 364 vtblk_create_disk(sc); 365 366 virtqueue_enable_intr(sc->vtblk_vq); 367 368 fail: 369 if (error) 370 vtblk_detach(dev); 371 372 return (error); 373 } 374 375 static int 376 vtblk_detach(device_t dev) 377 { 378 struct vtblk_softc *sc; 379 380 sc = device_get_softc(dev); 381 382 VTBLK_LOCK(sc); 383 sc->vtblk_flags |= VTBLK_FLAG_DETACH; 384 if (device_is_attached(dev)) 385 vtblk_stop(sc); 386 VTBLK_UNLOCK(sc); 387 388 if (sc->vtblk_tq != NULL) { 389 taskqueue_drain(sc->vtblk_tq, &sc->vtblk_intr_task); 390 taskqueue_free(sc->vtblk_tq); 391 sc->vtblk_tq = NULL; 392 } 393 394 vtblk_drain(sc); 395 396 if (sc->vtblk_disk != NULL) { 397 disk_destroy(sc->vtblk_disk); 398 sc->vtblk_disk = NULL; 399 } 400 401 if (sc->vtblk_sglist != NULL) { 402 sglist_free(sc->vtblk_sglist); 403 sc->vtblk_sglist = NULL; 404 } 405 406 VTBLK_LOCK_DESTROY(sc); 407 408 return (0); 409 } 410 411 static int 412 vtblk_suspend(device_t dev) 413 { 414 struct vtblk_softc *sc; 415 int error; 416 417 sc = device_get_softc(dev); 418 419 VTBLK_LOCK(sc); 420 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND; 421 /* XXX BMV: virtio_stop(), etc needed here? */ 422 error = vtblk_quiesce(sc); 423 if (error) 424 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND; 425 VTBLK_UNLOCK(sc); 426 427 return (error); 428 } 429 430 static int 431 vtblk_resume(device_t dev) 432 { 433 struct vtblk_softc *sc; 434 435 sc = device_get_softc(dev); 436 437 VTBLK_LOCK(sc); 438 /* XXX BMV: virtio_reinit(), etc needed here? */ 439 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND; 440 vtblk_startio(sc); 441 VTBLK_UNLOCK(sc); 442 443 return (0); 444 } 445 446 static int 447 vtblk_shutdown(device_t dev) 448 { 449 450 return (0); 451 } 452 453 static int 454 vtblk_open(struct disk *dp) 455 { 456 struct vtblk_softc *sc; 457 458 if ((sc = dp->d_drv1) == NULL) 459 return (ENXIO); 460 461 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0); 462 } 463 464 static int 465 vtblk_close(struct disk *dp) 466 { 467 struct vtblk_softc *sc; 468 469 if ((sc = dp->d_drv1) == NULL) 470 return (ENXIO); 471 472 return (0); 473 } 474 475 static int 476 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, 477 struct thread *td) 478 { 479 struct vtblk_softc *sc; 480 481 if ((sc = dp->d_drv1) == NULL) 482 return (ENXIO); 483 484 return (ENOTTY); 485 } 486 487 static int 488 vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, 489 size_t length) 490 { 491 struct disk *dp; 492 struct vtblk_softc *sc; 493 int error; 494 495 dp = arg; 496 error = 0; 497 498 if ((sc = dp->d_drv1) == NULL) 499 return (ENXIO); 500 501 VTBLK_LOCK(sc); 502 503 if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) { 504 vtblk_prepare_dump(sc); 505 sc->vtblk_flags |= VTBLK_FLAG_DUMPING; 506 } 507 508 if (length > 0) 509 error = vtblk_write_dump(sc, virtual, offset, length); 510 else if (virtual == NULL && offset == 0) 511 error = vtblk_flush_dump(sc); 512 else { 513 error = EINVAL; 514 sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING; 515 } 516 517 VTBLK_UNLOCK(sc); 518 519 return (error); 520 } 521 522 static void 523 vtblk_strategy(struct bio *bp) 524 { 525 struct vtblk_softc *sc; 526 527 if ((sc = bp->bio_disk->d_drv1) == NULL) { 528 vtblk_finish_bio(bp, EINVAL); 529 return; 530 } 531 532 /* 533 * Fail any write if RO. Unfortunately, there does not seem to 534 * be a better way to report our readonly'ness to GEOM above. 535 */ 536 if (sc->vtblk_flags & VTBLK_FLAG_READONLY && 537 (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) { 538 vtblk_finish_bio(bp, EROFS); 539 return; 540 } 541 542 #ifdef INVARIANTS 543 /* 544 * Prevent read/write buffers spanning too many segments from 545 * getting into the queue. This should only trip if d_maxsize 546 * was incorrectly set. 547 */ 548 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { 549 int nsegs, max_nsegs; 550 551 nsegs = sglist_count(bp->bio_data, bp->bio_bcount); 552 max_nsegs = sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS; 553 554 KASSERT(nsegs <= max_nsegs, 555 ("bio spanned too many segments: %d, max: %d", 556 nsegs, max_nsegs)); 557 } 558 #endif 559 560 VTBLK_LOCK(sc); 561 if (sc->vtblk_flags & VTBLK_FLAG_DETACH) 562 vtblk_finish_bio(bp, ENXIO); 563 else { 564 bioq_disksort(&sc->vtblk_bioq, bp); 565 566 if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0) 567 vtblk_startio(sc); 568 } 569 VTBLK_UNLOCK(sc); 570 } 571 572 static void 573 vtblk_negotiate_features(struct vtblk_softc *sc) 574 { 575 device_t dev; 576 uint64_t features; 577 578 dev = sc->vtblk_dev; 579 features = VTBLK_FEATURES; 580 581 sc->vtblk_features = virtio_negotiate_features(dev, features); 582 } 583 584 static int 585 vtblk_maximum_segments(struct vtblk_softc *sc, 586 struct virtio_blk_config *blkcfg) 587 { 588 device_t dev; 589 int nsegs; 590 591 dev = sc->vtblk_dev; 592 nsegs = VTBLK_MIN_SEGMENTS; 593 594 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) { 595 nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1); 596 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT) 597 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT); 598 } else 599 nsegs += 1; 600 601 return (nsegs); 602 } 603 604 static int 605 vtblk_alloc_virtqueue(struct vtblk_softc *sc) 606 { 607 device_t dev; 608 struct vq_alloc_info vq_info; 609 610 dev = sc->vtblk_dev; 611 612 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs, 613 vtblk_vq_intr, sc, &sc->vtblk_vq, 614 "%s request", device_get_nameunit(dev)); 615 616 return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info)); 617 } 618 619 static void 620 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) 621 { 622 device_t dev; 623 struct disk *dp; 624 625 dev = sc->vtblk_dev; 626 627 sc->vtblk_disk = dp = disk_alloc(); 628 dp->d_open = vtblk_open; 629 dp->d_close = vtblk_close; 630 dp->d_ioctl = vtblk_ioctl; 631 dp->d_strategy = vtblk_strategy; 632 dp->d_name = VTBLK_DISK_NAME; 633 dp->d_unit = device_get_unit(dev); 634 dp->d_drv1 = sc; 635 636 if ((sc->vtblk_flags & VTBLK_FLAG_READONLY) == 0) 637 dp->d_dump = vtblk_dump; 638 639 /* Capacity is always in 512-byte units. */ 640 dp->d_mediasize = blkcfg->capacity * 512; 641 642 if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE)) 643 dp->d_sectorsize = blkcfg->blk_size; 644 else 645 dp->d_sectorsize = 512; 646 647 /* 648 * The VirtIO maximum I/O size is given in terms of segments. 649 * However, FreeBSD limits I/O size by logical buffer size, not 650 * by physically contiguous pages. Therefore, we have to assume 651 * no pages are contiguous. This may impose an artificially low 652 * maximum I/O size. But in practice, since QEMU advertises 128 653 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE, 654 * which is typically greater than MAXPHYS. Eventually we should 655 * just advertise MAXPHYS and split buffers that are too big. 656 * 657 * Note we must subtract one additional segment in case of non 658 * page aligned buffers. 659 */ 660 dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) * 661 PAGE_SIZE; 662 if (dp->d_maxsize < PAGE_SIZE) 663 dp->d_maxsize = PAGE_SIZE; /* XXX */ 664 665 if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) { 666 dp->d_fwsectors = blkcfg->geometry.sectors; 667 dp->d_fwheads = blkcfg->geometry.heads; 668 } 669 670 if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH)) 671 dp->d_flags |= DISKFLAG_CANFLUSHCACHE; 672 } 673 674 static void 675 vtblk_create_disk(struct vtblk_softc *sc) 676 { 677 struct disk *dp; 678 679 dp = sc->vtblk_disk; 680 681 /* 682 * Retrieving the identification string must be done after 683 * the virtqueue interrupt is setup otherwise it will hang. 684 */ 685 vtblk_get_ident(sc); 686 687 device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n", 688 (uintmax_t) dp->d_mediasize >> 20, 689 (uintmax_t) dp->d_mediasize / dp->d_sectorsize, 690 dp->d_sectorsize); 691 692 disk_create(dp, DISK_VERSION); 693 } 694 695 static int 696 vtblk_quiesce(struct vtblk_softc *sc) 697 { 698 int error; 699 700 error = 0; 701 702 VTBLK_LOCK_ASSERT(sc); 703 704 while (!virtqueue_empty(sc->vtblk_vq)) { 705 if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq", 706 VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) { 707 error = EBUSY; 708 break; 709 } 710 } 711 712 return (error); 713 } 714 715 static void 716 vtblk_startio(struct vtblk_softc *sc) 717 { 718 struct virtqueue *vq; 719 struct vtblk_request *req; 720 int enq; 721 722 vq = sc->vtblk_vq; 723 enq = 0; 724 725 VTBLK_LOCK_ASSERT(sc); 726 727 while (!virtqueue_full(vq)) { 728 if ((req = vtblk_dequeue_ready(sc)) == NULL) 729 req = vtblk_bio_request(sc); 730 if (req == NULL) 731 break; 732 733 if (vtblk_execute_request(sc, req) != 0) { 734 vtblk_enqueue_ready(sc, req); 735 break; 736 } 737 738 enq++; 739 } 740 741 if (enq > 0) 742 virtqueue_notify(vq); 743 } 744 745 static struct vtblk_request * 746 vtblk_bio_request(struct vtblk_softc *sc) 747 { 748 struct bio_queue_head *bioq; 749 struct vtblk_request *req; 750 struct bio *bp; 751 752 bioq = &sc->vtblk_bioq; 753 754 if (bioq_first(bioq) == NULL) 755 return (NULL); 756 757 req = vtblk_dequeue_request(sc); 758 if (req == NULL) 759 return (NULL); 760 761 bp = bioq_takefirst(bioq); 762 req->vbr_bp = bp; 763 req->vbr_ack = -1; 764 req->vbr_hdr.ioprio = 1; 765 766 switch (bp->bio_cmd) { 767 case BIO_FLUSH: 768 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 769 break; 770 case BIO_READ: 771 req->vbr_hdr.type = VIRTIO_BLK_T_IN; 772 req->vbr_hdr.sector = bp->bio_offset / 512; 773 break; 774 case BIO_WRITE: 775 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 776 req->vbr_hdr.sector = bp->bio_offset / 512; 777 break; 778 default: 779 panic("%s: bio with unhandled cmd: %d", __FUNCTION__, 780 bp->bio_cmd); 781 } 782 783 if (bp->bio_flags & BIO_ORDERED) 784 req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER; 785 786 return (req); 787 } 788 789 static int 790 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) 791 { 792 struct sglist *sg; 793 struct bio *bp; 794 int readable, writable, error; 795 796 sg = sc->vtblk_sglist; 797 bp = req->vbr_bp; 798 writable = 0; 799 800 VTBLK_LOCK_ASSERT(sc); 801 802 sglist_reset(sg); 803 error = sglist_append(sg, &req->vbr_hdr, 804 sizeof(struct virtio_blk_outhdr)); 805 KASSERT(error == 0, ("error adding header to sglist")); 806 KASSERT(sg->sg_nseg == 1, 807 ("header spanned multiple segments: %d", sg->sg_nseg)); 808 809 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { 810 error = sglist_append(sg, bp->bio_data, bp->bio_bcount); 811 KASSERT(error == 0, ("error adding buffer to sglist")); 812 813 /* BIO_READ means the host writes into our buffer. */ 814 if (bp->bio_cmd == BIO_READ) 815 writable += sg->sg_nseg - 1; 816 } 817 818 error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); 819 KASSERT(error == 0, ("error adding ack to sglist")); 820 writable++; 821 822 KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS, 823 ("fewer than min segments: %d", sg->sg_nseg)); 824 825 readable = sg->sg_nseg - writable; 826 827 return (virtqueue_enqueue(sc->vtblk_vq, req, sg, readable, writable)); 828 } 829 830 static int 831 vtblk_vq_intr(void *xsc) 832 { 833 struct vtblk_softc *sc; 834 835 sc = xsc; 836 837 virtqueue_disable_intr(sc->vtblk_vq); 838 taskqueue_enqueue_fast(sc->vtblk_tq, &sc->vtblk_intr_task); 839 840 return (1); 841 } 842 843 static void 844 vtblk_intr_task(void *arg, int pending) 845 { 846 struct vtblk_softc *sc; 847 struct virtqueue *vq; 848 849 sc = arg; 850 vq = sc->vtblk_vq; 851 852 VTBLK_LOCK(sc); 853 if (sc->vtblk_flags & VTBLK_FLAG_DETACH) { 854 VTBLK_UNLOCK(sc); 855 return; 856 } 857 858 vtblk_finish_completed(sc); 859 860 if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0) 861 vtblk_startio(sc); 862 else 863 wakeup(&sc->vtblk_vq); 864 865 if (virtqueue_enable_intr(vq) != 0) { 866 virtqueue_disable_intr(vq); 867 VTBLK_UNLOCK(sc); 868 taskqueue_enqueue_fast(sc->vtblk_tq, 869 &sc->vtblk_intr_task); 870 return; 871 } 872 873 VTBLK_UNLOCK(sc); 874 } 875 876 static void 877 vtblk_stop(struct vtblk_softc *sc) 878 { 879 880 virtqueue_disable_intr(sc->vtblk_vq); 881 virtio_stop(sc->vtblk_dev); 882 } 883 884 static void 885 vtblk_get_ident(struct vtblk_softc *sc) 886 { 887 struct bio buf; 888 struct disk *dp; 889 struct vtblk_request *req; 890 int len, error; 891 892 dp = sc->vtblk_disk; 893 len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE); 894 895 if (vtblk_no_ident != 0) 896 return; 897 898 req = vtblk_dequeue_request(sc); 899 if (req == NULL) 900 return; 901 902 req->vbr_ack = -1; 903 req->vbr_hdr.type = VIRTIO_BLK_T_GET_ID; 904 req->vbr_hdr.ioprio = 1; 905 req->vbr_hdr.sector = 0; 906 907 req->vbr_bp = &buf; 908 bzero(&buf, sizeof(struct bio)); 909 910 buf.bio_cmd = BIO_READ; 911 buf.bio_data = dp->d_ident; 912 buf.bio_bcount = len; 913 914 VTBLK_LOCK(sc); 915 error = vtblk_poll_request(sc, req); 916 VTBLK_UNLOCK(sc); 917 918 vtblk_enqueue_request(sc, req); 919 920 if (error) { 921 device_printf(sc->vtblk_dev, 922 "error getting device identifier: %d\n", error); 923 } 924 } 925 926 static void 927 vtblk_prepare_dump(struct vtblk_softc *sc) 928 { 929 device_t dev; 930 struct virtqueue *vq; 931 932 dev = sc->vtblk_dev; 933 vq = sc->vtblk_vq; 934 935 vtblk_stop(sc); 936 937 /* 938 * Drain all requests caught in-flight in the virtqueue, 939 * skipping biodone(). When dumping, only one request is 940 * outstanding at a time, and we just poll the virtqueue 941 * for the response. 942 */ 943 vtblk_drain_vq(sc, 1); 944 945 if (virtio_reinit(dev, sc->vtblk_features) != 0) 946 panic("cannot reinit VirtIO block device during dump"); 947 948 virtqueue_disable_intr(vq); 949 virtio_reinit_complete(dev); 950 } 951 952 static int 953 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset, 954 size_t length) 955 { 956 struct bio buf; 957 struct vtblk_request *req; 958 959 req = &sc->vtblk_dump_request; 960 req->vbr_ack = -1; 961 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 962 req->vbr_hdr.ioprio = 1; 963 req->vbr_hdr.sector = offset / 512; 964 965 req->vbr_bp = &buf; 966 bzero(&buf, sizeof(struct bio)); 967 968 buf.bio_cmd = BIO_WRITE; 969 buf.bio_data = virtual; 970 buf.bio_bcount = length; 971 972 return (vtblk_poll_request(sc, req)); 973 } 974 975 static int 976 vtblk_flush_dump(struct vtblk_softc *sc) 977 { 978 struct bio buf; 979 struct vtblk_request *req; 980 981 req = &sc->vtblk_dump_request; 982 req->vbr_ack = -1; 983 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 984 req->vbr_hdr.ioprio = 1; 985 req->vbr_hdr.sector = 0; 986 987 req->vbr_bp = &buf; 988 bzero(&buf, sizeof(struct bio)); 989 990 buf.bio_cmd = BIO_FLUSH; 991 992 return (vtblk_poll_request(sc, req)); 993 } 994 995 static int 996 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) 997 { 998 device_t dev; 999 struct virtqueue *vq; 1000 struct vtblk_request *r; 1001 int error; 1002 1003 dev = sc->vtblk_dev; 1004 vq = sc->vtblk_vq; 1005 1006 if (!virtqueue_empty(vq)) 1007 return (EBUSY); 1008 1009 error = vtblk_execute_request(sc, req); 1010 if (error) 1011 return (error); 1012 1013 virtqueue_notify(vq); 1014 1015 r = virtqueue_poll(vq, NULL); 1016 KASSERT(r == req, ("unexpected request response")); 1017 1018 error = vtblk_request_error(req); 1019 if (error && bootverbose) { 1020 device_printf(dev, "vtblk_poll_request: IO error: %d\n", 1021 error); 1022 } 1023 1024 return (error); 1025 } 1026 1027 static void 1028 vtblk_finish_completed(struct vtblk_softc *sc) 1029 { 1030 struct vtblk_request *req; 1031 struct bio *bp; 1032 int error; 1033 1034 while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) { 1035 bp = req->vbr_bp; 1036 1037 error = vtblk_request_error(req); 1038 if (error) 1039 disk_err(bp, "hard error", -1, 1); 1040 1041 vtblk_finish_bio(bp, error); 1042 vtblk_enqueue_request(sc, req); 1043 } 1044 } 1045 1046 static void 1047 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) 1048 { 1049 struct virtqueue *vq; 1050 struct vtblk_request *req; 1051 int last; 1052 1053 vq = sc->vtblk_vq; 1054 last = 0; 1055 1056 while ((req = virtqueue_drain(vq, &last)) != NULL) { 1057 if (!skip_done) 1058 vtblk_finish_bio(req->vbr_bp, ENXIO); 1059 1060 vtblk_enqueue_request(sc, req); 1061 } 1062 1063 KASSERT(virtqueue_empty(vq), ("virtqueue not empty")); 1064 } 1065 1066 static void 1067 vtblk_drain(struct vtblk_softc *sc) 1068 { 1069 struct bio_queue_head *bioq; 1070 struct vtblk_request *req; 1071 struct bio *bp; 1072 1073 bioq = &sc->vtblk_bioq; 1074 1075 if (sc->vtblk_vq != NULL) { 1076 vtblk_finish_completed(sc); 1077 vtblk_drain_vq(sc, 0); 1078 } 1079 1080 while ((req = vtblk_dequeue_ready(sc)) != NULL) { 1081 vtblk_finish_bio(req->vbr_bp, ENXIO); 1082 vtblk_enqueue_request(sc, req); 1083 } 1084 1085 while (bioq_first(bioq) != NULL) { 1086 bp = bioq_takefirst(bioq); 1087 vtblk_finish_bio(bp, ENXIO); 1088 } 1089 1090 vtblk_free_requests(sc); 1091 } 1092 1093 static int 1094 vtblk_alloc_requests(struct vtblk_softc *sc) 1095 { 1096 struct vtblk_request *req; 1097 int i, nreqs; 1098 1099 nreqs = virtqueue_size(sc->vtblk_vq); 1100 1101 /* 1102 * Preallocate sufficient requests to keep the virtqueue full. Each 1103 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce 1104 * the number allocated when indirect descriptors are not available. 1105 */ 1106 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0) 1107 nreqs /= VTBLK_MIN_SEGMENTS; 1108 1109 for (i = 0; i < nreqs; i++) { 1110 req = uma_zalloc(vtblk_req_zone, M_NOWAIT); 1111 if (req == NULL) 1112 return (ENOMEM); 1113 1114 sc->vtblk_request_count++; 1115 vtblk_enqueue_request(sc, req); 1116 } 1117 1118 return (0); 1119 } 1120 1121 static void 1122 vtblk_free_requests(struct vtblk_softc *sc) 1123 { 1124 struct vtblk_request *req; 1125 1126 KASSERT(TAILQ_EMPTY(&sc->vtblk_req_ready), 1127 ("ready requests left on queue")); 1128 1129 while ((req = vtblk_dequeue_request(sc)) != NULL) { 1130 sc->vtblk_request_count--; 1131 uma_zfree(vtblk_req_zone, req); 1132 } 1133 1134 KASSERT(sc->vtblk_request_count == 0, ("leaked requests")); 1135 } 1136 1137 static struct vtblk_request * 1138 vtblk_dequeue_request(struct vtblk_softc *sc) 1139 { 1140 struct vtblk_request *req; 1141 1142 req = TAILQ_FIRST(&sc->vtblk_req_free); 1143 if (req != NULL) 1144 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link); 1145 1146 return (req); 1147 } 1148 1149 static void 1150 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req) 1151 { 1152 1153 bzero(req, sizeof(struct vtblk_request)); 1154 TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link); 1155 } 1156 1157 static struct vtblk_request * 1158 vtblk_dequeue_ready(struct vtblk_softc *sc) 1159 { 1160 struct vtblk_request *req; 1161 1162 req = TAILQ_FIRST(&sc->vtblk_req_ready); 1163 if (req != NULL) 1164 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link); 1165 1166 return (req); 1167 } 1168 1169 static void 1170 vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req) 1171 { 1172 1173 TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link); 1174 } 1175 1176 static int 1177 vtblk_request_error(struct vtblk_request *req) 1178 { 1179 int error; 1180 1181 switch (req->vbr_ack) { 1182 case VIRTIO_BLK_S_OK: 1183 error = 0; 1184 break; 1185 case VIRTIO_BLK_S_UNSUPP: 1186 error = ENOTSUP; 1187 break; 1188 default: 1189 error = EIO; 1190 break; 1191 } 1192 1193 return (error); 1194 } 1195 1196 static void 1197 vtblk_finish_bio(struct bio *bp, int error) 1198 { 1199 1200 if (error) { 1201 bp->bio_resid = bp->bio_bcount; 1202 bp->bio_error = error; 1203 bp->bio_flags |= BIO_ERROR; 1204 } 1205 1206 biodone(bp); 1207 } 1208