1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* Driver for VirtIO block devices. */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/bio.h> 36 #include <sys/malloc.h> 37 #include <sys/module.h> 38 #include <sys/sglist.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/queue.h> 42 #include <sys/taskqueue.h> 43 44 #include <geom/geom_disk.h> 45 #include <vm/uma.h> 46 47 #include <machine/bus.h> 48 #include <machine/resource.h> 49 #include <sys/bus.h> 50 #include <sys/rman.h> 51 52 #include <dev/virtio/virtio.h> 53 #include <dev/virtio/virtqueue.h> 54 #include <dev/virtio/block/virtio_blk.h> 55 56 #include "virtio_if.h" 57 58 struct vtblk_request { 59 struct virtio_blk_outhdr vbr_hdr; 60 struct bio *vbr_bp; 61 uint8_t vbr_ack; 62 63 TAILQ_ENTRY(vtblk_request) vbr_link; 64 }; 65 66 struct vtblk_softc { 67 device_t vtblk_dev; 68 struct mtx vtblk_mtx; 69 uint64_t vtblk_features; 70 uint32_t vtblk_flags; 71 #define VTBLK_FLAG_INDIRECT 0x0001 72 #define VTBLK_FLAG_READONLY 0x0002 73 #define VTBLK_FLAG_DETACHING 0x0004 74 #define VTBLK_FLAG_SUSPENDED 0x0008 75 #define VTBLK_FLAG_DUMPING 0x0010 76 77 struct virtqueue *vtblk_vq; 78 struct sglist *vtblk_sglist; 79 struct disk *vtblk_disk; 80 81 struct bio_queue_head vtblk_bioq; 82 TAILQ_HEAD(, vtblk_request) 83 vtblk_req_free; 84 TAILQ_HEAD(, vtblk_request) 85 vtblk_req_ready; 86 87 struct taskqueue *vtblk_tq; 88 struct task vtblk_intr_task; 89 90 int vtblk_sector_size; 91 int vtblk_max_nsegs; 92 int vtblk_unit; 93 int vtblk_request_count; 94 95 struct vtblk_request vtblk_dump_request; 96 }; 97 98 static struct virtio_feature_desc vtblk_feature_desc[] = { 99 { VIRTIO_BLK_F_BARRIER, "HostBarrier" }, 100 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" }, 101 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" }, 102 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" }, 103 { VIRTIO_BLK_F_RO, "ReadOnly" }, 104 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" }, 105 { VIRTIO_BLK_F_SCSI, "SCSICmds" }, 106 { VIRTIO_BLK_F_FLUSH, "FlushCmd" }, 107 { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, 108 109 { 0, NULL } 110 }; 111 112 static int vtblk_modevent(module_t, int, void *); 113 114 static int vtblk_probe(device_t); 115 static int vtblk_attach(device_t); 116 static int vtblk_detach(device_t); 117 static int vtblk_suspend(device_t); 118 static int vtblk_resume(device_t); 119 static int vtblk_shutdown(device_t); 120 121 static void vtblk_negotiate_features(struct vtblk_softc *); 122 static int vtblk_maximum_segments(struct vtblk_softc *, 123 struct virtio_blk_config *); 124 static int vtblk_alloc_virtqueue(struct vtblk_softc *); 125 static void vtblk_alloc_disk(struct vtblk_softc *, 126 struct virtio_blk_config *); 127 static void vtblk_create_disk(struct vtblk_softc *); 128 129 static int vtblk_open(struct disk *); 130 static int vtblk_close(struct disk *); 131 static int vtblk_ioctl(struct disk *, u_long, void *, int, 132 struct thread *); 133 static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t); 134 static void vtblk_strategy(struct bio *); 135 136 static void vtblk_startio(struct vtblk_softc *); 137 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *); 138 static int vtblk_execute_request(struct vtblk_softc *, 139 struct vtblk_request *); 140 141 static int vtblk_vq_intr(void *); 142 static void vtblk_intr_task(void *, int); 143 144 static void vtblk_stop(struct vtblk_softc *); 145 146 static void vtblk_get_ident(struct vtblk_softc *); 147 static void vtblk_prepare_dump(struct vtblk_softc *); 148 static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t); 149 static int vtblk_flush_dump(struct vtblk_softc *); 150 static int vtblk_poll_request(struct vtblk_softc *, 151 struct vtblk_request *); 152 153 static void vtblk_drain_vq(struct vtblk_softc *, int); 154 static void vtblk_drain(struct vtblk_softc *); 155 156 static int vtblk_alloc_requests(struct vtblk_softc *); 157 static void vtblk_free_requests(struct vtblk_softc *); 158 static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *); 159 static void vtblk_enqueue_request(struct vtblk_softc *, 160 struct vtblk_request *); 161 162 static struct vtblk_request * vtblk_dequeue_ready(struct vtblk_softc *); 163 static void vtblk_enqueue_ready(struct vtblk_softc *, 164 struct vtblk_request *); 165 166 static void vtblk_bio_error(struct bio *, int); 167 168 /* Tunables. */ 169 static int vtblk_no_ident = 0; 170 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident); 171 172 /* Features desired/implemented by this driver. */ 173 #define VTBLK_FEATURES \ 174 (VIRTIO_BLK_F_BARRIER | \ 175 VIRTIO_BLK_F_SIZE_MAX | \ 176 VIRTIO_BLK_F_SEG_MAX | \ 177 VIRTIO_BLK_F_GEOMETRY | \ 178 VIRTIO_BLK_F_RO | \ 179 VIRTIO_BLK_F_BLK_SIZE | \ 180 VIRTIO_BLK_F_FLUSH | \ 181 VIRTIO_RING_F_INDIRECT_DESC) 182 183 #define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx 184 #define VTBLK_LOCK_INIT(_sc, _name) \ 185 mtx_init(VTBLK_MTX((_sc)), (_name), \ 186 "VTBLK Lock", MTX_DEF) 187 #define VTBLK_LOCK(_sc) mtx_lock(VTBLK_MTX((_sc))) 188 #define VTBLK_TRYLOCK(_sc) mtx_trylock(VTBLK_MTX((_sc))) 189 #define VTBLK_UNLOCK(_sc) mtx_unlock(VTBLK_MTX((_sc))) 190 #define VTBLK_LOCK_DESTROY(_sc) mtx_destroy(VTBLK_MTX((_sc))) 191 #define VTBLK_LOCK_ASSERT(_sc) mtx_assert(VTBLK_MTX((_sc)), MA_OWNED) 192 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \ 193 mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED) 194 195 #define VTBLK_BIO_SEGMENTS(_bp) sglist_count((_bp)->bio_data, (_bp)->bio_bcount) 196 197 #define VTBLK_DISK_NAME "vtbd" 198 199 /* 200 * Each block request uses at least two segments - one for the header 201 * and one for the status. 202 */ 203 #define VTBLK_MIN_SEGMENTS 2 204 205 static uma_zone_t vtblk_req_zone; 206 207 static device_method_t vtblk_methods[] = { 208 /* Device methods. */ 209 DEVMETHOD(device_probe, vtblk_probe), 210 DEVMETHOD(device_attach, vtblk_attach), 211 DEVMETHOD(device_detach, vtblk_detach), 212 DEVMETHOD(device_suspend, vtblk_suspend), 213 DEVMETHOD(device_resume, vtblk_resume), 214 DEVMETHOD(device_shutdown, vtblk_shutdown), 215 216 { 0, 0 } 217 }; 218 219 static driver_t vtblk_driver = { 220 "vtblk", 221 vtblk_methods, 222 sizeof(struct vtblk_softc) 223 }; 224 static devclass_t vtblk_devclass; 225 226 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, 227 vtblk_modevent, 0); 228 MODULE_VERSION(virtio_blk, 1); 229 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1); 230 231 static int 232 vtblk_modevent(module_t mod, int type, void *unused) 233 { 234 int error; 235 236 error = 0; 237 238 switch (type) { 239 case MOD_LOAD: 240 vtblk_req_zone = uma_zcreate("vtblk_request", 241 sizeof(struct vtblk_request), 242 NULL, NULL, NULL, NULL, 0, 0); 243 break; 244 case MOD_QUIESCE: 245 case MOD_UNLOAD: 246 if (uma_zone_get_cur(vtblk_req_zone) > 0) 247 error = EBUSY; 248 else if (type == MOD_UNLOAD) { 249 uma_zdestroy(vtblk_req_zone); 250 vtblk_req_zone = NULL; 251 } 252 break; 253 case MOD_SHUTDOWN: 254 break; 255 default: 256 error = EOPNOTSUPP; 257 break; 258 } 259 260 return (error); 261 } 262 263 static int 264 vtblk_probe(device_t dev) 265 { 266 267 if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK) 268 return (ENXIO); 269 270 device_set_desc(dev, "VirtIO Block Adapter"); 271 272 return (BUS_PROBE_DEFAULT); 273 } 274 275 static int 276 vtblk_attach(device_t dev) 277 { 278 struct vtblk_softc *sc; 279 struct virtio_blk_config blkcfg; 280 int error; 281 282 sc = device_get_softc(dev); 283 sc->vtblk_dev = dev; 284 sc->vtblk_unit = device_get_unit(dev); 285 286 VTBLK_LOCK_INIT(sc, device_get_nameunit(dev)); 287 288 bioq_init(&sc->vtblk_bioq); 289 TAILQ_INIT(&sc->vtblk_req_free); 290 TAILQ_INIT(&sc->vtblk_req_ready); 291 292 virtio_set_feature_desc(dev, vtblk_feature_desc); 293 vtblk_negotiate_features(sc); 294 295 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 296 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT; 297 298 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) 299 sc->vtblk_flags |= VTBLK_FLAG_READONLY; 300 301 /* Get local copy of config. */ 302 if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) == 0) { 303 bzero(&blkcfg, sizeof(struct virtio_blk_config)); 304 virtio_read_device_config(dev, 0, &blkcfg, 305 offsetof(struct virtio_blk_config, physical_block_exp)); 306 } else 307 virtio_read_device_config(dev, 0, &blkcfg, 308 sizeof(struct virtio_blk_config)); 309 310 /* 311 * With the current sglist(9) implementation, it is not easy 312 * for us to support a maximum segment size as adjacent 313 * segments are coalesced. For now, just make sure it's larger 314 * than the maximum supported transfer size. 315 */ 316 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { 317 if (blkcfg.size_max < MAXPHYS) { 318 error = ENOTSUP; 319 device_printf(dev, "host requires unsupported " 320 "maximum segment size feature\n"); 321 goto fail; 322 } 323 } 324 325 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); 326 327 /* 328 * Allocate working sglist. The number of segments may be too 329 * large to safely store on the stack. 330 */ 331 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT); 332 if (sc->vtblk_sglist == NULL) { 333 error = ENOMEM; 334 device_printf(dev, "cannot allocate sglist\n"); 335 goto fail; 336 } 337 338 error = vtblk_alloc_virtqueue(sc); 339 if (error) { 340 device_printf(dev, "cannot allocate virtqueue\n"); 341 goto fail; 342 } 343 344 error = vtblk_alloc_requests(sc); 345 if (error) { 346 device_printf(dev, "cannot preallocate requests\n"); 347 goto fail; 348 } 349 350 vtblk_alloc_disk(sc, &blkcfg); 351 352 TASK_INIT(&sc->vtblk_intr_task, 0, vtblk_intr_task, sc); 353 sc->vtblk_tq = taskqueue_create_fast("vtblk_taskq", M_NOWAIT, 354 taskqueue_thread_enqueue, &sc->vtblk_tq); 355 if (sc->vtblk_tq == NULL) { 356 error = ENOMEM; 357 device_printf(dev, "cannot allocate taskqueue\n"); 358 goto fail; 359 } 360 taskqueue_start_threads(&sc->vtblk_tq, 1, PI_DISK, "%s taskq", 361 device_get_nameunit(dev)); 362 363 error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY); 364 if (error) { 365 device_printf(dev, "cannot setup virtqueue interrupt\n"); 366 goto fail; 367 } 368 369 vtblk_create_disk(sc); 370 371 virtqueue_enable_intr(sc->vtblk_vq); 372 373 fail: 374 if (error) 375 vtblk_detach(dev); 376 377 return (error); 378 } 379 380 static int 381 vtblk_detach(device_t dev) 382 { 383 struct vtblk_softc *sc; 384 385 sc = device_get_softc(dev); 386 387 VTBLK_LOCK(sc); 388 sc->vtblk_flags |= VTBLK_FLAG_DETACHING; 389 if (device_is_attached(dev)) 390 vtblk_stop(sc); 391 VTBLK_UNLOCK(sc); 392 393 if (sc->vtblk_tq != NULL) { 394 taskqueue_drain(sc->vtblk_tq, &sc->vtblk_intr_task); 395 taskqueue_free(sc->vtblk_tq); 396 sc->vtblk_tq = NULL; 397 } 398 399 vtblk_drain(sc); 400 401 if (sc->vtblk_disk != NULL) { 402 disk_destroy(sc->vtblk_disk); 403 sc->vtblk_disk = NULL; 404 } 405 406 if (sc->vtblk_sglist != NULL) { 407 sglist_free(sc->vtblk_sglist); 408 sc->vtblk_sglist = NULL; 409 } 410 411 VTBLK_LOCK_DESTROY(sc); 412 413 return (0); 414 } 415 416 static int 417 vtblk_suspend(device_t dev) 418 { 419 struct vtblk_softc *sc; 420 421 sc = device_get_softc(dev); 422 423 VTBLK_LOCK(sc); 424 sc->vtblk_flags |= VTBLK_FLAG_SUSPENDED; 425 /* TODO Wait for any inflight IO to complete? */ 426 VTBLK_UNLOCK(sc); 427 428 return (0); 429 } 430 431 static int 432 vtblk_resume(device_t dev) 433 { 434 struct vtblk_softc *sc; 435 436 sc = device_get_softc(dev); 437 438 VTBLK_LOCK(sc); 439 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPENDED; 440 /* TODO Resume IO? */ 441 VTBLK_UNLOCK(sc); 442 443 return (0); 444 } 445 446 static int 447 vtblk_shutdown(device_t dev) 448 { 449 450 return (0); 451 } 452 453 static int 454 vtblk_open(struct disk *dp) 455 { 456 struct vtblk_softc *sc; 457 458 if ((sc = dp->d_drv1) == NULL) 459 return (ENXIO); 460 461 return (sc->vtblk_flags & VTBLK_FLAG_DETACHING ? ENXIO : 0); 462 } 463 464 static int 465 vtblk_close(struct disk *dp) 466 { 467 struct vtblk_softc *sc; 468 469 if ((sc = dp->d_drv1) == NULL) 470 return (ENXIO); 471 472 return (0); 473 } 474 475 static int 476 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, 477 struct thread *td) 478 { 479 struct vtblk_softc *sc; 480 481 if ((sc = dp->d_drv1) == NULL) 482 return (ENXIO); 483 484 return (ENOTTY); 485 } 486 487 static int 488 vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, 489 size_t length) 490 { 491 struct disk *dp; 492 struct vtblk_softc *sc; 493 int error; 494 495 dp = arg; 496 error = 0; 497 498 if ((sc = dp->d_drv1) == NULL) 499 return (ENXIO); 500 501 if (VTBLK_TRYLOCK(sc) == 0) { 502 device_printf(sc->vtblk_dev, 503 "softc already locked, cannot dump...\n"); 504 return (EBUSY); 505 } 506 507 if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) { 508 vtblk_prepare_dump(sc); 509 sc->vtblk_flags |= VTBLK_FLAG_DUMPING; 510 } 511 512 if (length > 0) 513 error = vtblk_write_dump(sc, virtual, offset, length); 514 else if (virtual == NULL && offset == 0) 515 error = vtblk_flush_dump(sc); 516 517 VTBLK_UNLOCK(sc); 518 519 return (error); 520 } 521 522 static void 523 vtblk_strategy(struct bio *bp) 524 { 525 struct vtblk_softc *sc; 526 527 if ((sc = bp->bio_disk->d_drv1) == NULL) { 528 vtblk_bio_error(bp, EINVAL); 529 return; 530 } 531 532 /* 533 * Fail any write if RO. Unfortunately, there does not seem to 534 * be a better way to report our readonly'ness to GEOM above. 535 */ 536 if (sc->vtblk_flags & VTBLK_FLAG_READONLY && 537 (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) { 538 vtblk_bio_error(bp, EROFS); 539 return; 540 } 541 542 /* 543 * Prevent read/write buffers spanning too many segments from 544 * getting into the queue. This should only trip if d_maxsize 545 * was incorrectly set. 546 */ 547 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { 548 KASSERT(VTBLK_BIO_SEGMENTS(bp) <= sc->vtblk_max_nsegs - 549 VTBLK_MIN_SEGMENTS, 550 ("bio spanned too many segments: %d, max: %d", 551 VTBLK_BIO_SEGMENTS(bp), 552 sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS)); 553 } 554 555 VTBLK_LOCK(sc); 556 if ((sc->vtblk_flags & VTBLK_FLAG_DETACHING) == 0) { 557 bioq_disksort(&sc->vtblk_bioq, bp); 558 vtblk_startio(sc); 559 } else 560 vtblk_bio_error(bp, ENXIO); 561 VTBLK_UNLOCK(sc); 562 } 563 564 static void 565 vtblk_negotiate_features(struct vtblk_softc *sc) 566 { 567 device_t dev; 568 uint64_t features; 569 570 dev = sc->vtblk_dev; 571 features = VTBLK_FEATURES; 572 573 sc->vtblk_features = virtio_negotiate_features(dev, features); 574 } 575 576 static int 577 vtblk_maximum_segments(struct vtblk_softc *sc, 578 struct virtio_blk_config *blkcfg) 579 { 580 device_t dev; 581 int nsegs; 582 583 dev = sc->vtblk_dev; 584 nsegs = VTBLK_MIN_SEGMENTS; 585 586 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) { 587 nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1); 588 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT) 589 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT); 590 } else 591 nsegs += 1; 592 593 return (nsegs); 594 } 595 596 static int 597 vtblk_alloc_virtqueue(struct vtblk_softc *sc) 598 { 599 device_t dev; 600 struct vq_alloc_info vq_info; 601 602 dev = sc->vtblk_dev; 603 604 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs, 605 vtblk_vq_intr, sc, &sc->vtblk_vq, 606 "%s request", device_get_nameunit(dev)); 607 608 return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info)); 609 } 610 611 static void 612 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) 613 { 614 device_t dev; 615 struct disk *dp; 616 617 dev = sc->vtblk_dev; 618 619 sc->vtblk_disk = dp = disk_alloc(); 620 dp->d_open = vtblk_open; 621 dp->d_close = vtblk_close; 622 dp->d_ioctl = vtblk_ioctl; 623 dp->d_strategy = vtblk_strategy; 624 dp->d_name = VTBLK_DISK_NAME; 625 dp->d_unit = sc->vtblk_unit; 626 dp->d_drv1 = sc; 627 628 if ((sc->vtblk_flags & VTBLK_FLAG_READONLY) == 0) 629 dp->d_dump = vtblk_dump; 630 631 /* Capacity is always in 512-byte units. */ 632 dp->d_mediasize = blkcfg->capacity * 512; 633 634 if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE)) 635 sc->vtblk_sector_size = blkcfg->blk_size; 636 else 637 sc->vtblk_sector_size = 512; 638 dp->d_sectorsize = sc->vtblk_sector_size; 639 640 /* 641 * The VirtIO maximum I/O size is given in terms of segments. 642 * However, FreeBSD limits I/O size by logical buffer size, not 643 * by physically contiguous pages. Therefore, we have to assume 644 * no pages are contiguous. This may impose an artificially low 645 * maximum I/O size. But in practice, since QEMU advertises 128 646 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE, 647 * which is typically greater than MAXPHYS. Eventually we should 648 * just advertise MAXPHYS and split buffers that are too big. 649 * 650 * Note we must subtract one additional segment in case of non 651 * page aligned buffers. 652 */ 653 dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) * 654 PAGE_SIZE; 655 if (dp->d_maxsize < PAGE_SIZE) 656 dp->d_maxsize = PAGE_SIZE; /* XXX */ 657 658 if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) { 659 dp->d_fwsectors = blkcfg->geometry.sectors; 660 dp->d_fwheads = blkcfg->geometry.heads; 661 } 662 663 if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH)) 664 dp->d_flags |= DISKFLAG_CANFLUSHCACHE; 665 } 666 667 static void 668 vtblk_create_disk(struct vtblk_softc *sc) 669 { 670 struct disk *dp; 671 672 dp = sc->vtblk_disk; 673 674 /* 675 * Retrieving the identification string must be done after 676 * the virtqueue interrupt is setup otherwise it will hang. 677 */ 678 vtblk_get_ident(sc); 679 680 device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n", 681 (uintmax_t) dp->d_mediasize >> 20, 682 (uintmax_t) dp->d_mediasize / dp->d_sectorsize, 683 dp->d_sectorsize); 684 685 disk_create(dp, DISK_VERSION); 686 } 687 688 static void 689 vtblk_startio(struct vtblk_softc *sc) 690 { 691 struct virtqueue *vq; 692 struct vtblk_request *req; 693 int enq; 694 695 vq = sc->vtblk_vq; 696 enq = 0; 697 698 VTBLK_LOCK_ASSERT(sc); 699 700 if (sc->vtblk_flags & VTBLK_FLAG_SUSPENDED) 701 return; 702 703 while (!virtqueue_full(vq)) { 704 if ((req = vtblk_dequeue_ready(sc)) == NULL) 705 req = vtblk_bio_request(sc); 706 if (req == NULL) 707 break; 708 709 if (vtblk_execute_request(sc, req) != 0) { 710 vtblk_enqueue_ready(sc, req); 711 break; 712 } 713 714 enq++; 715 } 716 717 if (enq > 0) 718 virtqueue_notify(vq); 719 } 720 721 static struct vtblk_request * 722 vtblk_bio_request(struct vtblk_softc *sc) 723 { 724 struct bio_queue_head *bioq; 725 struct vtblk_request *req; 726 struct bio *bp; 727 728 bioq = &sc->vtblk_bioq; 729 730 if (bioq_first(bioq) == NULL) 731 return (NULL); 732 733 req = vtblk_dequeue_request(sc); 734 if (req == NULL) 735 return (NULL); 736 737 bp = bioq_takefirst(bioq); 738 req->vbr_bp = bp; 739 req->vbr_ack = -1; 740 req->vbr_hdr.ioprio = 1; 741 742 switch (bp->bio_cmd) { 743 case BIO_FLUSH: 744 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 745 break; 746 case BIO_READ: 747 req->vbr_hdr.type = VIRTIO_BLK_T_IN; 748 req->vbr_hdr.sector = bp->bio_offset / 512; 749 break; 750 case BIO_WRITE: 751 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 752 req->vbr_hdr.sector = bp->bio_offset / 512; 753 break; 754 default: 755 KASSERT(0, ("bio with unhandled cmd: %d", bp->bio_cmd)); 756 req->vbr_hdr.type = -1; 757 break; 758 } 759 760 if (bp->bio_flags & BIO_ORDERED) 761 req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER; 762 763 return (req); 764 } 765 766 static int 767 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req) 768 { 769 struct sglist *sg; 770 struct bio *bp; 771 int writable, error; 772 773 sg = sc->vtblk_sglist; 774 bp = req->vbr_bp; 775 writable = 0; 776 777 VTBLK_LOCK_ASSERT(sc); 778 779 sglist_reset(sg); 780 error = sglist_append(sg, &req->vbr_hdr, 781 sizeof(struct virtio_blk_outhdr)); 782 KASSERT(error == 0, ("error adding header to sglist")); 783 KASSERT(sg->sg_nseg == 1, 784 ("header spanned multiple segments: %d", sg->sg_nseg)); 785 786 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { 787 error = sglist_append(sg, bp->bio_data, bp->bio_bcount); 788 KASSERT(error == 0, ("error adding buffer to sglist")); 789 790 /* BIO_READ means the host writes into our buffer. */ 791 if (bp->bio_cmd == BIO_READ) 792 writable += sg->sg_nseg - 1; 793 } 794 795 error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); 796 KASSERT(error == 0, ("error adding ack to sglist")); 797 writable++; 798 799 KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS, 800 ("fewer than min segments: %d", sg->sg_nseg)); 801 802 error = virtqueue_enqueue(sc->vtblk_vq, req, sg, 803 sg->sg_nseg - writable, writable); 804 805 return (error); 806 } 807 808 static int 809 vtblk_vq_intr(void *xsc) 810 { 811 struct vtblk_softc *sc; 812 813 sc = xsc; 814 815 virtqueue_disable_intr(sc->vtblk_vq); 816 taskqueue_enqueue_fast(sc->vtblk_tq, &sc->vtblk_intr_task); 817 818 return (1); 819 } 820 821 static void 822 vtblk_intr_task(void *arg, int pending) 823 { 824 struct vtblk_softc *sc; 825 struct vtblk_request *req; 826 struct virtqueue *vq; 827 struct bio *bp; 828 829 sc = arg; 830 vq = sc->vtblk_vq; 831 832 VTBLK_LOCK(sc); 833 if (sc->vtblk_flags & VTBLK_FLAG_DETACHING) { 834 VTBLK_UNLOCK(sc); 835 return; 836 } 837 838 while ((req = virtqueue_dequeue(vq, NULL)) != NULL) { 839 bp = req->vbr_bp; 840 841 if (req->vbr_ack == VIRTIO_BLK_S_OK) 842 bp->bio_resid = 0; 843 else { 844 bp->bio_flags |= BIO_ERROR; 845 if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) 846 bp->bio_error = ENOTSUP; 847 else 848 bp->bio_error = EIO; 849 } 850 851 biodone(bp); 852 vtblk_enqueue_request(sc, req); 853 } 854 855 vtblk_startio(sc); 856 857 if (virtqueue_enable_intr(vq) != 0) { 858 virtqueue_disable_intr(vq); 859 VTBLK_UNLOCK(sc); 860 taskqueue_enqueue_fast(sc->vtblk_tq, 861 &sc->vtblk_intr_task); 862 return; 863 } 864 865 VTBLK_UNLOCK(sc); 866 } 867 868 static void 869 vtblk_stop(struct vtblk_softc *sc) 870 { 871 872 virtqueue_disable_intr(sc->vtblk_vq); 873 virtio_stop(sc->vtblk_dev); 874 } 875 876 static void 877 vtblk_get_ident(struct vtblk_softc *sc) 878 { 879 struct bio buf; 880 struct disk *dp; 881 struct vtblk_request *req; 882 int len, error; 883 884 dp = sc->vtblk_disk; 885 len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE); 886 887 if (vtblk_no_ident != 0) 888 return; 889 890 req = vtblk_dequeue_request(sc); 891 if (req == NULL) 892 return; 893 894 req->vbr_ack = -1; 895 req->vbr_hdr.type = VIRTIO_BLK_T_GET_ID; 896 req->vbr_hdr.ioprio = 1; 897 req->vbr_hdr.sector = 0; 898 899 req->vbr_bp = &buf; 900 bzero(&buf, sizeof(struct bio)); 901 902 buf.bio_cmd = BIO_READ; 903 buf.bio_data = dp->d_ident; 904 buf.bio_bcount = len; 905 906 VTBLK_LOCK(sc); 907 error = vtblk_poll_request(sc, req); 908 vtblk_enqueue_request(sc, req); 909 VTBLK_UNLOCK(sc); 910 911 if (error) { 912 device_printf(sc->vtblk_dev, 913 "error getting device identifier: %d\n", error); 914 } 915 } 916 917 static void 918 vtblk_prepare_dump(struct vtblk_softc *sc) 919 { 920 device_t dev; 921 struct virtqueue *vq; 922 923 dev = sc->vtblk_dev; 924 vq = sc->vtblk_vq; 925 926 vtblk_stop(sc); 927 928 /* 929 * Drain all requests caught in-flight in the virtqueue, 930 * skipping biodone(). When dumping, only one request is 931 * outstanding at a time, and we just poll the virtqueue 932 * for the response. 933 */ 934 vtblk_drain_vq(sc, 1); 935 936 if (virtio_reinit(dev, sc->vtblk_features) != 0) 937 panic("cannot reinit VirtIO block device during dump"); 938 939 virtqueue_disable_intr(vq); 940 virtio_reinit_complete(dev); 941 } 942 943 static int 944 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset, 945 size_t length) 946 { 947 struct bio buf; 948 struct vtblk_request *req; 949 950 req = &sc->vtblk_dump_request; 951 req->vbr_ack = -1; 952 req->vbr_hdr.type = VIRTIO_BLK_T_OUT; 953 req->vbr_hdr.ioprio = 1; 954 req->vbr_hdr.sector = offset / 512; 955 956 req->vbr_bp = &buf; 957 bzero(&buf, sizeof(struct bio)); 958 959 buf.bio_cmd = BIO_WRITE; 960 buf.bio_data = virtual; 961 buf.bio_bcount = length; 962 963 return (vtblk_poll_request(sc, req)); 964 } 965 966 static int 967 vtblk_flush_dump(struct vtblk_softc *sc) 968 { 969 struct bio buf; 970 struct vtblk_request *req; 971 972 req = &sc->vtblk_dump_request; 973 req->vbr_ack = -1; 974 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; 975 req->vbr_hdr.ioprio = 1; 976 req->vbr_hdr.sector = 0; 977 978 req->vbr_bp = &buf; 979 bzero(&buf, sizeof(struct bio)); 980 981 buf.bio_cmd = BIO_FLUSH; 982 983 return (vtblk_poll_request(sc, req)); 984 } 985 986 static int 987 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) 988 { 989 device_t dev; 990 struct virtqueue *vq; 991 struct vtblk_request *r; 992 int error; 993 994 dev = sc->vtblk_dev; 995 vq = sc->vtblk_vq; 996 997 if (!virtqueue_empty(vq)) 998 return (EBUSY); 999 1000 error = vtblk_execute_request(sc, req); 1001 if (error) 1002 return (error); 1003 1004 virtqueue_notify(vq); 1005 1006 r = virtqueue_poll(vq, NULL); 1007 KASSERT(r == req, ("unexpected request response")); 1008 1009 if (req->vbr_ack != VIRTIO_BLK_S_OK) { 1010 error = req->vbr_ack == VIRTIO_BLK_S_UNSUPP ? ENOTSUP : EIO; 1011 if (bootverbose) 1012 device_printf(dev, 1013 "vtblk_poll_request: IO error: %d\n", error); 1014 } 1015 1016 return (error); 1017 } 1018 1019 static void 1020 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done) 1021 { 1022 struct virtqueue *vq; 1023 struct vtblk_request *req; 1024 int last; 1025 1026 vq = sc->vtblk_vq; 1027 last = 0; 1028 1029 while ((req = virtqueue_drain(vq, &last)) != NULL) { 1030 if (!skip_done) 1031 vtblk_bio_error(req->vbr_bp, ENXIO); 1032 1033 vtblk_enqueue_request(sc, req); 1034 } 1035 1036 KASSERT(virtqueue_empty(vq), ("virtqueue not empty")); 1037 } 1038 1039 static void 1040 vtblk_drain(struct vtblk_softc *sc) 1041 { 1042 struct bio_queue_head *bioq; 1043 struct vtblk_request *req; 1044 struct bio *bp; 1045 1046 bioq = &sc->vtblk_bioq; 1047 1048 if (sc->vtblk_vq != NULL) 1049 vtblk_drain_vq(sc, 0); 1050 1051 while ((req = vtblk_dequeue_ready(sc)) != NULL) { 1052 vtblk_bio_error(req->vbr_bp, ENXIO); 1053 vtblk_enqueue_request(sc, req); 1054 } 1055 1056 while (bioq_first(bioq) != NULL) { 1057 bp = bioq_takefirst(bioq); 1058 vtblk_bio_error(bp, ENXIO); 1059 } 1060 1061 vtblk_free_requests(sc); 1062 } 1063 1064 static int 1065 vtblk_alloc_requests(struct vtblk_softc *sc) 1066 { 1067 struct vtblk_request *req; 1068 int i, size; 1069 1070 size = virtqueue_size(sc->vtblk_vq); 1071 1072 /* 1073 * Preallocate sufficient requests to keep the virtqueue full. Each 1074 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce 1075 * the number allocated when indirect descriptors are not available. 1076 */ 1077 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0) 1078 size /= VTBLK_MIN_SEGMENTS; 1079 1080 for (i = 0; i < size; i++) { 1081 req = uma_zalloc(vtblk_req_zone, M_NOWAIT); 1082 if (req == NULL) 1083 return (ENOMEM); 1084 1085 sc->vtblk_request_count++; 1086 vtblk_enqueue_request(sc, req); 1087 } 1088 1089 return (0); 1090 } 1091 1092 static void 1093 vtblk_free_requests(struct vtblk_softc *sc) 1094 { 1095 struct vtblk_request *req; 1096 1097 while ((req = vtblk_dequeue_request(sc)) != NULL) { 1098 sc->vtblk_request_count--; 1099 uma_zfree(vtblk_req_zone, req); 1100 } 1101 1102 KASSERT(sc->vtblk_request_count == 0, ("leaked requests")); 1103 } 1104 1105 static struct vtblk_request * 1106 vtblk_dequeue_request(struct vtblk_softc *sc) 1107 { 1108 struct vtblk_request *req; 1109 1110 req = TAILQ_FIRST(&sc->vtblk_req_free); 1111 if (req != NULL) 1112 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link); 1113 1114 return (req); 1115 } 1116 1117 static void 1118 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req) 1119 { 1120 1121 bzero(req, sizeof(struct vtblk_request)); 1122 TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link); 1123 } 1124 1125 static struct vtblk_request * 1126 vtblk_dequeue_ready(struct vtblk_softc *sc) 1127 { 1128 struct vtblk_request *req; 1129 1130 req = TAILQ_FIRST(&sc->vtblk_req_ready); 1131 if (req != NULL) 1132 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link); 1133 1134 return (req); 1135 } 1136 1137 static void 1138 vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req) 1139 { 1140 1141 TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link); 1142 } 1143 1144 static void 1145 vtblk_bio_error(struct bio *bp, int error) 1146 { 1147 1148 biofinish(bp, NULL, error); 1149 } 1150