1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* Driver for VirtIO block devices. */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/bio.h> 36 #include <sys/malloc.h> 37 #include <sys/module.h> 38 #include <sys/sglist.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/queue.h> 42 #include <sys/taskqueue.h> 43 44 #include <geom/geom_disk.h> 45 46 #include <machine/bus.h> 47 #include <machine/resource.h> 48 #include <sys/bus.h> 49 #include <sys/rman.h> 50 51 #include <dev/virtio/virtio.h> 52 #include <dev/virtio/virtqueue.h> 53 #include <dev/virtio/block/virtio_blk.h> 54 55 #include "virtio_if.h" 56 57 struct vtblk_request { 58 struct virtio_blk_outhdr vbr_hdr; 59 struct bio *vbr_bp; 60 uint8_t vbr_ack; 61 62 TAILQ_ENTRY(vtblk_request) vbr_link; 63 }; 64 65 struct vtblk_softc { 66 device_t vtblk_dev; 67 struct mtx vtblk_mtx; 68 uint64_t vtblk_features; 69 uint32_t vtblk_flags; 70 #define VTBLK_FLAG_INDIRECT 0x0001 71 #define VTBLK_FLAG_READONLY 0x0002 72 #define VTBLK_FLAG_DETACH 0x0004 73 #define VTBLK_FLAG_SUSPEND 0x0008 74 #define VTBLK_FLAG_DUMPING 0x0010 75 76 struct virtqueue *vtblk_vq; 77 struct sglist *vtblk_sglist; 78 struct disk *vtblk_disk; 79 80 struct bio_queue_head vtblk_bioq; 81 TAILQ_HEAD(, vtblk_request) 82 vtblk_req_free; 83 TAILQ_HEAD(, vtblk_request) 84 vtblk_req_ready; 85 86 struct taskqueue *vtblk_tq; 87 struct task vtblk_intr_task; 88 89 int vtblk_max_nsegs; 90 int vtblk_request_count; 91 92 struct vtblk_request vtblk_dump_request; 93 }; 94 95 static struct virtio_feature_desc vtblk_feature_desc[] = { 96 { VIRTIO_BLK_F_BARRIER, "HostBarrier" }, 97 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" }, 98 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" }, 99 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" }, 100 { VIRTIO_BLK_F_RO, "ReadOnly" }, 101 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" }, 102 { VIRTIO_BLK_F_SCSI, "SCSICmds" }, 103 { VIRTIO_BLK_F_FLUSH, "FlushCmd" }, 104 { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, 105 106 { 0, NULL } 107 }; 108 109 static int vtblk_modevent(module_t, int, void *); 110 111 static int vtblk_probe(device_t); 112 static int vtblk_attach(device_t); 113 static int vtblk_detach(device_t); 114 static int vtblk_suspend(device_t); 115 static int vtblk_resume(device_t); 116 static int vtblk_shutdown(device_t); 117 118 static int vtblk_open(struct disk *); 119 static int vtblk_close(struct disk *); 120 static int vtblk_ioctl(struct disk *, u_long, void *, int, 121 struct thread *); 122 static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t); 123 static void vtblk_strategy(struct bio *); 124 125 static void vtblk_negotiate_features(struct vtblk_softc *); 126 static int vtblk_maximum_segments(struct vtblk_softc *, 127 struct virtio_blk_config *); 128 static int vtblk_alloc_virtqueue(struct vtblk_softc *); 129 static void vtblk_alloc_disk(struct vtblk_softc *, 130 struct virtio_blk_config *); 131 static void vtblk_create_disk(struct vtblk_softc *); 132 133 static int vtblk_quiesce(struct vtblk_softc *); 134 static void vtblk_startio(struct vtblk_softc *); 135 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *); 136 static int vtblk_execute_request(struct vtblk_softc *, 137 struct vtblk_request *); 138 139 static int vtblk_vq_intr(void *); 140 static void vtblk_intr_task(void *, int); 141 142 static void vtblk_stop(struct vtblk_softc *); 143 144 static void vtblk_get_ident(struct vtblk_softc *); 145 static void vtblk_prepare_dump(struct vtblk_softc *); 146 static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t); 147 static int vtblk_flush_dump(struct vtblk_softc *); 148 static int vtblk_poll_request(struct vtblk_softc *, 149 struct vtblk_request *); 150 151 static void vtblk_finish_completed(struct vtblk_softc *); 152 static void vtblk_drain_vq(struct vtblk_softc *, int); 153 static void vtblk_drain(struct vtblk_softc *); 154 155 static int vtblk_alloc_requests(struct vtblk_softc *); 156 static void vtblk_free_requests(struct vtblk_softc *); 157 static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *); 158 static void vtblk_enqueue_request(struct vtblk_softc *, 159 struct vtblk_request *); 160 161 static struct vtblk_request * vtblk_dequeue_ready(struct vtblk_softc *); 162 static void vtblk_enqueue_ready(struct vtblk_softc *, 163 struct vtblk_request *); 164 165 static int vtblk_request_error(struct vtblk_request *); 166 static void vtblk_finish_bio(struct bio *, int); 167 168 /* Tunables. */ 169 static int vtblk_no_ident = 0; 170 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident); 171 172 /* Features desired/implemented by this driver. */ 173 #define VTBLK_FEATURES \ 174 (VIRTIO_BLK_F_BARRIER | \ 175 VIRTIO_BLK_F_SIZE_MAX | \ 176 VIRTIO_BLK_F_SEG_MAX | \ 177 VIRTIO_BLK_F_GEOMETRY | \ 178 VIRTIO_BLK_F_RO | \ 179 VIRTIO_BLK_F_BLK_SIZE | \ 180 VIRTIO_BLK_F_FLUSH | \ 181 VIRTIO_RING_F_INDIRECT_DESC) 182 183 #define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx 184 #define VTBLK_LOCK_INIT(_sc, _name) \ 185 mtx_init(VTBLK_MTX((_sc)), (_name), \ 186 "VTBLK Lock", MTX_DEF) 187 #define VTBLK_LOCK(_sc) mtx_lock(VTBLK_MTX((_sc))) 188 #define VTBLK_UNLOCK(_sc) mtx_unlock(VTBLK_MTX((_sc))) 189 #define VTBLK_LOCK_DESTROY(_sc) mtx_destroy(VTBLK_MTX((_sc))) 190 #define VTBLK_LOCK_ASSERT(_sc) mtx_assert(VTBLK_MTX((_sc)), MA_OWNED) 191 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \ 192 mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED) 193 194 #define VTBLK_DISK_NAME "vtbd" 195 #define VTBLK_QUIESCE_TIMEOUT (30 * hz) 196 197 /* 198 * Each block request uses at least two segments - one for the header 199 * and one for the status. 200 */ 201 #define VTBLK_MIN_SEGMENTS 2 202 203 static device_method_t vtblk_methods[] = { 204 /* Device methods. */ 205 DEVMETHOD(device_probe, vtblk_probe), 206 DEVMETHOD(device_attach, vtblk_attach), 207 DEVMETHOD(device_detach, vtblk_detach), 208 DEVMETHOD(device_suspend, vtblk_suspend), 209 DEVMETHOD(device_resume, vtblk_resume), 210 DEVMETHOD(device_shutdown, vtblk_shutdown), 211 212 DEVMETHOD_END 213 }; 214 215 static driver_t vtblk_driver = { 216 "vtblk", 217 vtblk_methods, 218 sizeof(struct vtblk_softc) 219 }; 220 static devclass_t vtblk_devclass; 221 222 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, 223 vtblk_modevent, 0); 224 MODULE_VERSION(virtio_blk, 1); 225 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1); 226 227 static int 228 vtblk_modevent(module_t mod, int type, void *unused) 229 { 230 int error; 231 232 error = 0; 233 234 switch (type) { 235 case MOD_LOAD: 236 case MOD_QUIESCE: 237 case MOD_UNLOAD: 238 case MOD_SHUTDOWN: 239 break; 240 default: 241 error = EOPNOTSUPP; 242 break; 243 } 244 245 return (error); 246 } 247 248 static int 249 vtblk_probe(device_t dev) 250 { 251 252 if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK) 253 return (ENXIO); 254 255 device_set_desc(dev, "VirtIO Block Adapter"); 256 257 return (BUS_PROBE_DEFAULT); 258 } 259 260 static int 261 vtblk_attach(device_t dev) 262 { 263 struct vtblk_softc *sc; 264 struct virtio_blk_config blkcfg; 265 int error; 266 267 sc = device_get_softc(dev); 268 sc->vtblk_dev = dev; 269 270 VTBLK_LOCK_INIT(sc, device_get_nameunit(dev)); 271 272 bioq_init(&sc->vtblk_bioq); 273 TAILQ_INIT(&sc->vtblk_req_free); 274 TAILQ_INIT(&sc->vtblk_req_ready); 275 276 virtio_set_feature_desc(dev, vtblk_feature_desc); 277 vtblk_negotiate_features(sc); 278 279 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 280 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT; 281 282 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) 283 sc->vtblk_flags |= VTBLK_FLAG_READONLY; 284 285 /* Get local copy of config. */ 286 virtio_read_device_config(dev, 0, &blkcfg, 287 sizeof(struct virtio_blk_config)); 288 289 /* 290 * With the current sglist(9) implementation, it is not easy 291 * for us to support a maximum segment size as adjacent 292 * segments are coalesced. For now, just make sure it's larger 293 * than the maximum supported transfer size. 294 */ 295 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { 296 if (blkcfg.size_max < MAXPHYS) { 297 error = ENOTSUP; 298 device_printf(dev, "host requires unsupported " 299 "maximum segment size feature\n"); 300 goto fail; 301 } 302 } 303 304 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); 305 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { 306 error = EINVAL; 307 device_printf(dev, "fewer than minimum number of segments " 308 "allowed: %d\n", sc->vtblk_max_nsegs); 309 goto fail; 310 } 311 312 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT); 313 if (sc->vtblk_sglist == NULL) { 314 error = ENOMEM; 315 device_printf(dev, "cannot allocate sglist\n"); 316 goto fail; 317 } 318 319 error = vtblk_alloc_virtqueue(sc); 320 if (error) { 321 device_printf(dev, "cannot allocate virtqueue\n"); 322 goto fail; 323 } 324 325 error = vtblk_alloc_requests(sc); 326 if (error) { 327 device_printf(dev, "cannot preallocate requests\n"); 328 goto fail; 329 } 330 331 vtblk_alloc_disk(sc, &blkcfg); 332 333 TASK_INIT(&sc->vtblk_intr_task, 0, vtblk_intr_task, sc); 334 sc->vtblk_tq = taskqueue_create_fast("vtblk_taskq", M_NOWAIT, 335 taskqueue_thread_enqueue, &sc->vtblk_tq); 336 if (sc->vtblk_tq == NULL) { 337 error = ENOMEM; 338 device_printf(dev, "cannot allocate taskqueue\n"); 339 goto fail; 340 } 341 342 error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY); 343 if (error) { 344 device_printf(dev, "cannot setup virtqueue interrupt\n"); 345 goto fail; 346 } 347 348 taskqueue_start_threads(&sc->vtblk_tq, 1, PI_DISK, "%s taskq", 349 device_get_nameunit(dev)); 350 351 vtblk_create_disk(sc); 352 353 virtqueue_enable_intr(sc->vtblk_vq); 354 355 fail: 356 if (error) 357 vtblk_detach(dev); 358 359 return (error); 360 } 361 362 static int 363 vtblk_detach(device_t dev) 364 { 365 struct vtblk_softc *sc; 366 367 sc = device_get_softc(dev); 368 369 VTBLK_LOCK(sc); 370 sc->vtblk_flags |= VTBLK_FLAG_DETACH; 371 if (device_is_attached(dev)) 372 vtblk_stop(sc); 373 VTBLK_UNLOCK(sc); 374 375 if (sc->vtblk_tq != NULL) { 376 taskqueue_drain(sc->vtblk_tq, &sc->vtblk_intr_task); 377 taskqueue_free(sc->vtblk_tq); 378 sc->vtblk_tq = NULL; 379 } 380 381 vtblk_drain(sc); 382 383 if (sc->vtblk_disk != NULL) { 384 disk_destroy(sc->vtblk_disk); 385 sc->vtblk_disk = NULL; 386 } 387 388 if (sc->vtblk_sglist != NULL) { 389 sglist_free(sc->vtblk_sglist); 390 sc->vtblk_sglist = NULL; 391 } 392 393 VTBLK_LOCK_DESTROY(sc); 394 395 return (0); 396 } 397 398 static int 399 vtblk_suspend(device_t dev) 400 { 401 struct vtblk_softc *sc; 402 int error; 403 404 sc = device_get_softc(dev); 405 406 VTBLK_LOCK(sc); 407 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND; 408 /* XXX BMV: virtio_stop(), etc needed here? */ 409 error = vtblk_quiesce(sc); 410 if (error) 411 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND; 412 VTBLK_UNLOCK(sc); 413 414 return (error); 415 } 416 417 static int 418 vtblk_resume(device_t dev) 419 { 420 struct vtblk_softc *sc; 421 422 sc = device_get_softc(dev); 423 424 VTBLK_LOCK(sc); 425 /* XXX BMV: virtio_reinit(), etc needed here? */ 426 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND; 427 vtblk_startio(sc); 428 VTBLK_UNLOCK(sc); 429 430 return (0); 431 } 432 433 static int 434 vtblk_shutdown(device_t dev) 435 { 436 437 return (0); 438 } 439 440 static int 441 vtblk_open(struct disk *dp) 442 { 443 struct vtblk_softc *sc; 444 445 if ((sc = dp->d_drv1) == NULL) 446 return (ENXIO); 447 448 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0); 449 } 450 451 static int 452 vtblk_close(struct disk *dp) 453 { 454 struct vtblk_softc *sc; 455 456 if ((sc = dp->d_drv1) == NULL) 457 return (ENXIO); 458 459 return (0); 460 } 461 462 static int 463 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, 464 struct thread *td) 465 { 466 struct vtblk_softc *sc; 467 468 if ((sc = dp->d_drv1) == NULL) 469 return (ENXIO); 470 471 return (ENOTTY); 472 } 473 474 static int 475 vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, 476 size_t length) 477 { 478 struct disk *dp; 479 struct vtblk_softc *sc; 480 int error; 481 482 dp = arg; 483 484 if ((sc = dp->d_drv1) == NULL) 485 return (ENXIO); 486 487 VTBLK_LOCK(sc); 488 489 if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) { 490 vtblk_prepare_dump(sc); 491 sc->vtblk_flags |= VTBLK_FLAG_DUMPING; 492 } 493 494 if (length > 0) 495 error = vtblk_write_dump(sc, virtual, offset, length); 496 else if (virtual == NULL && offset == 0) 497 error = vtblk_flush_dump(sc); 498 else { 499 error = EINVAL; 500 sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING; 501 } 502 503 VTBLK_UNLOCK(sc); 504 505 return (error); 506 } 507 508 static void 509 vtblk_strategy(struct bio *bp) 510 { 511 struct vtblk_softc *sc; 512 513 if ((sc = bp->bio_disk->d_drv1) == NULL) { 514 vtblk_finish_bio(bp, EINVAL); 515 return; 516 } 517 518 /* 519 * Fail any write if RO. Unfortunately, there does not seem to 520 * be a better way to report our readonly'ness to GEOM above. 521 */ 522 if (sc->vtblk_flags & VTBLK_FLAG_READONLY && 523 (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) { 524 vtblk_finish_bio(bp, EROFS); 525 return; 526 } 527 528 #ifdef INVARIANTS 529 /* 530 * Prevent read/write buffers spanning too many segments from 531 * getting into the queue. This should only trip if d_maxsize 532 * was incorrectly set. 533 */ 534 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { 535 int nsegs, max_nsegs; 536 537 nsegs = sglist_count(bp->bio_data, bp->bio_bcount); 538 max_nsegs = sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS; 539 540 KASSERT(nsegs <= max_nsegs, 541 ("bio %p spanned too many segments: %d, max: %d", 542 bp, nsegs, max_nsegs)); 543 } 544 #endif 545 546 VTBLK_LOCK(sc); 547 if (sc->vtblk_flags & VTBLK_FLAG_DETACH) 548 vtblk_finish_bio(bp, ENXIO); 549 else { 550 bioq_disksort(&sc->vtblk_bioq, bp); 551 552 if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0) 553 vtblk_startio(sc); 554 } 555 VTBLK_UNLOCK(sc); 556 } 557 558 static void 559 vtblk_negotiate_features(struct vtblk_softc *sc) 560 { 561 device_t dev; 562 uint64_t features; 563 564 dev = sc->vtblk_dev; 565 features = VTBLK_FEATURES; 566 567 sc->vtblk_features = virtio_negotiate_features(dev, features); 568 } 569 570 static int 571 vtblk_maximum_segments(struct vtblk_softc *sc, 572 struct virtio_blk_config *blkcfg) 573 { 574 device_t dev; 575 int nsegs; 576 577 dev = sc->vtblk_dev; 578 nsegs = VTBLK_MIN_SEGMENTS; 579 580 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) { 581 nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1); 582 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT) 583 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT); 584 } else 585 nsegs += 1; 586 587 return (nsegs); 588 } 589 590 static int 591 vtblk_alloc_virtqueue(struct vtblk_softc *sc) 592 { 593 device_t dev; 594 struct vq_alloc_info vq_info; 595 596 dev = sc->vtblk_dev; 597 598 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs, 599 vtblk_vq_intr, sc, &sc->vtblk_vq, 600 "%s request", device_get_nameunit(dev)); 601 602 return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info)); 603 } 604 605 static void 606 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) 607 { 608 device_t dev; 609 struct disk *dp; 610 611 dev = sc->vtblk_dev; 612 613 sc->vtblk_disk = dp = disk_alloc(); 614 dp->d_open = vtblk_open; 615 dp->d_close = vtblk_close; 616 dp->d_ioctl = vtblk_ioctl; 617 dp->d_strategy = vtblk_strategy; 618 dp->d_name = VTBLK_DISK_NAME; 619 dp->d_unit = device_get_unit(dev); 620 dp->d_drv1 = sc; 621 622 if ((sc->vtblk_flags & VTBLK_FLAG_READONLY) == 0) 623 dp->d_dump = vtblk_dump; 624 625 /* Capacity is always in 512-byte units. */ 626 dp->d_mediasize = blkcfg->capacity * 512; 627 628 if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE)) 629 dp->d_sectorsize = blkcfg->blk_size; 630 else 631 dp->d_sectorsize = 512; 632 633 /* 634 * The VirtIO maximum I/O size is given in terms of segments. 635 * However, FreeBSD limits I/O size by logical buffer size, not 636 * by physically contiguous pages. Therefore, we have to assume 637 * no pages are contiguous. This may impose an artificially low 638 * maximum I/O size. But in practice, since QEMU advertises 128 639 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE, 640 * which is typically greater than MAXPHYS. Eventually we should 641 * just advertise MAXPHYS and split buffers that are too big. 642 * 643 * Note we must subtract one additional segment in case of non 644 * page aligned buffers. 645 */ 646 dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) * 647 PAGE_SIZE; 648 if (dp->d_maxsize < PAGE_SIZE) 649 dp->d_maxsize = PAGE_SIZE; /* XXX */ 650 651 if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) { 652 dp->d_fwsectors = blkcfg->geometry.sectors; 653 dp->d_fwheads = blkcfg->geometry.heads; 654 } 655 656 if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH)) 657 dp->d_flags |= DISKFLAG_CANFLUSHCACHE; 658 } 659 660 static void 661 vtblk_create_disk(struct vtblk_softc *sc) 662 { 663 struct disk *dp; 664 665 dp = sc->vtblk_disk; 666 667 /* 668 * Retrieving the identification string must be done after 669 * the virtqueue interrupt is setup otherwise it will hang. 670 */ 671 vtblk_get_ident(sc); 672 673 device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n", 674 (uintmax_t) dp->d_mediasize >> 20, 675 (uintmax_t) dp->d_mediasize / dp->d_sectorsize, 676 dp->d_sectorsize); 677 678 disk_create(dp, DISK_VERSION); 679 } 680 681 static int 682 vtblk_quiesce(struct vtblk_softc *sc) 683 { 684 int error; 685 686 error = 0; 687 688 VTBLK_LOCK_ASSERT(sc); 689 690 while (!virtqueue_empty(sc->vtblk_vq)) { 691 if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq", 692 VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) { 693 error = EBUSY; 694 break; 695 } 696 } 697 698 return (error); 699 } 700 701 static void 702 vtblk_startio(struct vtblk_softc *sc) 703 { 704 struct virtqueue *vq; 705 struct vtblk_request *req; 706 int enq; 707 708 vq = sc->vtblk_vq; 709 enq = 0; 710 711 VTBLK_LOCK_ASSERT(sc); 712 713 while (!virtqueue_full(vq)) { 714 if ((req = vtblk_dequeue_ready(sc)) == NULL) 715 req = vtblk_bio_request(sc); 716 if (req == NULL) 717 break; 718 719 if (vtblk_execute_request(sc, req) != 0) { 720 vtblk_enqueue_ready(sc, req); 721 break; 722 } 723 724 enq++; 725 } 726 727 if (enq > 0) 728 virtqueue_notify(vq); 729 } 730 731 static struct vtblk_request * 732 vtblk_bio_request(struct vtblk_softc *sc) 733 { 734 struct bio_queue_head *bioq; 735 struct vtblk_request *req; 736 struct bio *bp; 737 738 bioq = &sc->vtblk_bioq; 739 740 if (bioq_first(bioq) == NULL) 741 return (NULL); 742 743 req = vtblk_dequeue_request(sc); 744 if (req == NULL) 745 return (NULL); 746 747 bp = bioq_takefirst(bioq); 748 req->vbr_bp = bp; 749 req->vbr_ack = -1; 750 req->vbr_hdr.ioprio = 1; 751 752 switch (bp->bio_cmd) { 753 case BIO_FLUSH: 754 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 755 break; 756 case BIO_READ: 757 req->vbr_hdr.type = VIRTIO_BLK_T_IN; 758 req->vbr_hdr.sector = bp->bio_offset / 512; 759 break; 760 case BIO_WRITE: 761 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 762 req->vbr_hdr.sector = bp->bio_offset / 512; 763 break; 764 default: 765 panic("%s: bio with unhandled cmd: %d", __FUNCTION__, 766 bp->bio_cmd); 767 } 768 769 if (bp->bio_flags & BIO_ORDERED) 770 req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER; 771 772 return (req); 773 } 774 775 static int 776 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) 777 { 778 struct sglist *sg; 779 struct bio *bp; 780 int readable, writable, error; 781 782 sg = sc->vtblk_sglist; 783 bp = req->vbr_bp; 784 writable = 0; 785 786 VTBLK_LOCK_ASSERT(sc); 787 788 sglist_reset(sg); 789 790 sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr)); 791 792 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { 793 error = sglist_append(sg, bp->bio_data, bp->bio_bcount); 794 if (error || sg->sg_nseg == sg->sg_maxseg) 795 panic("%s: data buffer too big bio:%p error:%d", 796 __FUNCTION__, bp, error); 797 798 /* BIO_READ means the host writes into our buffer. */ 799 if (bp->bio_cmd == BIO_READ) 800 writable = sg->sg_nseg - 1; 801 } 802 803 writable++; 804 sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); 805 806 readable = sg->sg_nseg - writable; 807 808 return (virtqueue_enqueue(sc->vtblk_vq, req, sg, readable, writable)); 809 } 810 811 static int 812 vtblk_vq_intr(void *xsc) 813 { 814 struct vtblk_softc *sc; 815 816 sc = xsc; 817 818 virtqueue_disable_intr(sc->vtblk_vq); 819 taskqueue_enqueue_fast(sc->vtblk_tq, &sc->vtblk_intr_task); 820 821 return (1); 822 } 823 824 static void 825 vtblk_intr_task(void *arg, int pending) 826 { 827 struct vtblk_softc *sc; 828 struct virtqueue *vq; 829 830 sc = arg; 831 vq = sc->vtblk_vq; 832 833 VTBLK_LOCK(sc); 834 if (sc->vtblk_flags & VTBLK_FLAG_DETACH) { 835 VTBLK_UNLOCK(sc); 836 return; 837 } 838 839 vtblk_finish_completed(sc); 840 841 if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0) 842 vtblk_startio(sc); 843 else 844 wakeup(&sc->vtblk_vq); 845 846 if (virtqueue_enable_intr(vq) != 0) { 847 virtqueue_disable_intr(vq); 848 VTBLK_UNLOCK(sc); 849 taskqueue_enqueue_fast(sc->vtblk_tq, 850 &sc->vtblk_intr_task); 851 return; 852 } 853 854 VTBLK_UNLOCK(sc); 855 } 856 857 static void 858 vtblk_stop(struct vtblk_softc *sc) 859 { 860 861 virtqueue_disable_intr(sc->vtblk_vq); 862 virtio_stop(sc->vtblk_dev); 863 } 864 865 static void 866 vtblk_get_ident(struct vtblk_softc *sc) 867 { 868 struct bio buf; 869 struct disk *dp; 870 struct vtblk_request *req; 871 int len, error; 872 873 dp = sc->vtblk_disk; 874 len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE); 875 876 if (vtblk_no_ident != 0) 877 return; 878 879 req = vtblk_dequeue_request(sc); 880 if (req == NULL) 881 return; 882 883 req->vbr_ack = -1; 884 req->vbr_hdr.type = VIRTIO_BLK_T_GET_ID; 885 req->vbr_hdr.ioprio = 1; 886 req->vbr_hdr.sector = 0; 887 888 req->vbr_bp = &buf; 889 bzero(&buf, sizeof(struct bio)); 890 891 buf.bio_cmd = BIO_READ; 892 buf.bio_data = dp->d_ident; 893 buf.bio_bcount = len; 894 895 VTBLK_LOCK(sc); 896 error = vtblk_poll_request(sc, req); 897 VTBLK_UNLOCK(sc); 898 899 vtblk_enqueue_request(sc, req); 900 901 if (error) { 902 device_printf(sc->vtblk_dev, 903 "error getting device identifier: %d\n", error); 904 } 905 } 906 907 static void 908 vtblk_prepare_dump(struct vtblk_softc *sc) 909 { 910 device_t dev; 911 struct virtqueue *vq; 912 913 dev = sc->vtblk_dev; 914 vq = sc->vtblk_vq; 915 916 vtblk_stop(sc); 917 918 /* 919 * Drain all requests caught in-flight in the virtqueue, 920 * skipping biodone(). When dumping, only one request is 921 * outstanding at a time, and we just poll the virtqueue 922 * for the response. 923 */ 924 vtblk_drain_vq(sc, 1); 925 926 if (virtio_reinit(dev, sc->vtblk_features) != 0) 927 panic("cannot reinit VirtIO block device during dump"); 928 929 virtqueue_disable_intr(vq); 930 virtio_reinit_complete(dev); 931 } 932 933 static int 934 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset, 935 size_t length) 936 { 937 struct bio buf; 938 struct vtblk_request *req; 939 940 req = &sc->vtblk_dump_request; 941 req->vbr_ack = -1; 942 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 943 req->vbr_hdr.ioprio = 1; 944 req->vbr_hdr.sector = offset / 512; 945 946 req->vbr_bp = &buf; 947 bzero(&buf, sizeof(struct bio)); 948 949 buf.bio_cmd = BIO_WRITE; 950 buf.bio_data = virtual; 951 buf.bio_bcount = length; 952 953 return (vtblk_poll_request(sc, req)); 954 } 955 956 static int 957 vtblk_flush_dump(struct vtblk_softc *sc) 958 { 959 struct bio buf; 960 struct vtblk_request *req; 961 962 req = &sc->vtblk_dump_request; 963 req->vbr_ack = -1; 964 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 965 req->vbr_hdr.ioprio = 1; 966 req->vbr_hdr.sector = 0; 967 968 req->vbr_bp = &buf; 969 bzero(&buf, sizeof(struct bio)); 970 971 buf.bio_cmd = BIO_FLUSH; 972 973 return (vtblk_poll_request(sc, req)); 974 } 975 976 static int 977 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) 978 { 979 struct virtqueue *vq; 980 struct vtblk_request *r; 981 int error; 982 983 vq = sc->vtblk_vq; 984 985 if (!virtqueue_empty(vq)) 986 return (EBUSY); 987 988 error = vtblk_execute_request(sc, req); 989 if (error) 990 return (error); 991 992 virtqueue_notify(vq); 993 994 r = virtqueue_poll(vq, NULL); 995 KASSERT(r == req, ("unexpected request response: %p/%p", r, req)); 996 997 error = vtblk_request_error(req); 998 if (error && bootverbose) { 999 device_printf(sc->vtblk_dev, 1000 "%s: IO error: %d\n", __FUNCTION__, error); 1001 } 1002 1003 return (error); 1004 } 1005 1006 static void 1007 vtblk_finish_completed(struct vtblk_softc *sc) 1008 { 1009 struct vtblk_request *req; 1010 struct bio *bp; 1011 int error; 1012 1013 while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) { 1014 bp = req->vbr_bp; 1015 1016 error = vtblk_request_error(req); 1017 if (error) 1018 disk_err(bp, "hard error", -1, 1); 1019 1020 vtblk_finish_bio(bp, error); 1021 vtblk_enqueue_request(sc, req); 1022 } 1023 } 1024 1025 static void 1026 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) 1027 { 1028 struct virtqueue *vq; 1029 struct vtblk_request *req; 1030 int last; 1031 1032 vq = sc->vtblk_vq; 1033 last = 0; 1034 1035 while ((req = virtqueue_drain(vq, &last)) != NULL) { 1036 if (!skip_done) 1037 vtblk_finish_bio(req->vbr_bp, ENXIO); 1038 1039 vtblk_enqueue_request(sc, req); 1040 } 1041 1042 KASSERT(virtqueue_empty(vq), ("virtqueue not empty")); 1043 } 1044 1045 static void 1046 vtblk_drain(struct vtblk_softc *sc) 1047 { 1048 struct bio_queue_head *bioq; 1049 struct vtblk_request *req; 1050 struct bio *bp; 1051 1052 bioq = &sc->vtblk_bioq; 1053 1054 if (sc->vtblk_vq != NULL) { 1055 vtblk_finish_completed(sc); 1056 vtblk_drain_vq(sc, 0); 1057 } 1058 1059 while ((req = vtblk_dequeue_ready(sc)) != NULL) { 1060 vtblk_finish_bio(req->vbr_bp, ENXIO); 1061 vtblk_enqueue_request(sc, req); 1062 } 1063 1064 while (bioq_first(bioq) != NULL) { 1065 bp = bioq_takefirst(bioq); 1066 vtblk_finish_bio(bp, ENXIO); 1067 } 1068 1069 vtblk_free_requests(sc); 1070 } 1071 1072 #ifdef INVARIANTS 1073 static void 1074 vtblk_request_invariants(struct vtblk_request *req) 1075 { 1076 int hdr_nsegs, ack_nsegs; 1077 1078 hdr_nsegs = sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)); 1079 ack_nsegs = sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)); 1080 1081 KASSERT(hdr_nsegs == 1, ("request header crossed page boundary")); 1082 KASSERT(ack_nsegs == 1, ("request ack crossed page boundary")); 1083 } 1084 #endif 1085 1086 static int 1087 vtblk_alloc_requests(struct vtblk_softc *sc) 1088 { 1089 struct vtblk_request *req; 1090 int i, nreqs; 1091 1092 nreqs = virtqueue_size(sc->vtblk_vq); 1093 1094 /* 1095 * Preallocate sufficient requests to keep the virtqueue full. Each 1096 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce 1097 * the number allocated when indirect descriptors are not available. 1098 */ 1099 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0) 1100 nreqs /= VTBLK_MIN_SEGMENTS; 1101 1102 for (i = 0; i < nreqs; i++) { 1103 req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT); 1104 if (req == NULL) 1105 return (ENOMEM); 1106 1107 #ifdef INVARIANTS 1108 vtblk_request_invariants(req); 1109 #endif 1110 1111 sc->vtblk_request_count++; 1112 vtblk_enqueue_request(sc, req); 1113 } 1114 1115 return (0); 1116 } 1117 1118 static void 1119 vtblk_free_requests(struct vtblk_softc *sc) 1120 { 1121 struct vtblk_request *req; 1122 1123 KASSERT(TAILQ_EMPTY(&sc->vtblk_req_ready), 1124 ("ready requests left on queue")); 1125 1126 while ((req = vtblk_dequeue_request(sc)) != NULL) { 1127 sc->vtblk_request_count--; 1128 free(req, M_DEVBUF); 1129 } 1130 1131 KASSERT(sc->vtblk_request_count == 0, 1132 ("leaked requests: %d", sc->vtblk_request_count)); 1133 } 1134 1135 static struct vtblk_request * 1136 vtblk_dequeue_request(struct vtblk_softc *sc) 1137 { 1138 struct vtblk_request *req; 1139 1140 req = TAILQ_FIRST(&sc->vtblk_req_free); 1141 if (req != NULL) 1142 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link); 1143 1144 return (req); 1145 } 1146 1147 static void 1148 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req) 1149 { 1150 1151 bzero(req, sizeof(struct vtblk_request)); 1152 TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link); 1153 } 1154 1155 static struct vtblk_request * 1156 vtblk_dequeue_ready(struct vtblk_softc *sc) 1157 { 1158 struct vtblk_request *req; 1159 1160 req = TAILQ_FIRST(&sc->vtblk_req_ready); 1161 if (req != NULL) 1162 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link); 1163 1164 return (req); 1165 } 1166 1167 static void 1168 vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req) 1169 { 1170 1171 TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link); 1172 } 1173 1174 static int 1175 vtblk_request_error(struct vtblk_request *req) 1176 { 1177 int error; 1178 1179 switch (req->vbr_ack) { 1180 case VIRTIO_BLK_S_OK: 1181 error = 0; 1182 break; 1183 case VIRTIO_BLK_S_UNSUPP: 1184 error = ENOTSUP; 1185 break; 1186 default: 1187 error = EIO; 1188 break; 1189 } 1190 1191 return (error); 1192 } 1193 1194 static void 1195 vtblk_finish_bio(struct bio *bp, int error) 1196 { 1197 1198 if (error) { 1199 bp->bio_resid = bp->bio_bcount; 1200 bp->bio_error = error; 1201 bp->bio_flags |= BIO_ERROR; 1202 } 1203 1204 biodone(bp); 1205 } 1206