1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 2012, Alexey Zaytsev <alexey.zaytsev@gmail.com> 25 * Copyright 2019 Joyent Inc. 26 * Copyright 2019 Western Digital Corporation. 27 */ 28 29 /* 30 * VIRTIO BLOCK DRIVER 31 * 32 * This driver provides support for Virtio Block devices. Each driver instance 33 * attaches to a single underlying block device. 34 * 35 * REQUEST CHAIN LAYOUT 36 * 37 * Every request chain sent to the I/O queue has the following structure. Each 38 * box in the diagram represents a descriptor entry (i.e., a DMA cookie) within 39 * the chain: 40 * 41 * +-0-----------------------------------------+ 42 * | struct virtio_blk_hdr |-----------------------\ 43 * | (written by driver, read by device) | | 44 * +-1-----------------------------------------+ | 45 * | optional data payload |--\ | 46 * | (written by driver for write requests, | | | 47 * | or by device for read requests) | | | 48 * +-2-----------------------------------------+ | | 49 * | ,~` : |-cookies loaned | 50 * |/ : ,~`| | from blkdev | 51 * : / | | | 52 * +-(N - 1)-----------------------------------+ | | 53 * | ... end of data payload. | | | 54 * | | | | 55 * | |--/ | 56 * +-N-----------------------------------------+ | 57 * | status byte | | 58 * | (written by device, read by driver) |--------------------\ | 59 * +-------------------------------------------+ | | 60 * | | 61 * The memory for the header and status bytes (i.e., 0 and N above) | | 62 * is allocated as a single chunk by vioblk_alloc_reqs(): | | 63 * | | 64 * +-------------------------------------------+ | | 65 * | struct virtio_blk_hdr |<----------------------/ 66 * +-------------------------------------------+ | 67 * | status byte |<-------------------/ 68 * +-------------------------------------------+ 69 */ 70 71 #include <sys/modctl.h> 72 #include <sys/blkdev.h> 73 #include <sys/types.h> 74 #include <sys/errno.h> 75 #include <sys/param.h> 76 #include <sys/stropts.h> 77 #include <sys/stream.h> 78 #include <sys/strsubr.h> 79 #include <sys/kmem.h> 80 #include <sys/conf.h> 81 #include <sys/devops.h> 82 #include <sys/ksynch.h> 83 #include <sys/stat.h> 84 #include <sys/modctl.h> 85 #include <sys/debug.h> 86 #include <sys/pci.h> 87 #include <sys/containerof.h> 88 #include <sys/ctype.h> 89 #include <sys/sysmacros.h> 90 91 #include "virtio.h" 92 #include "vioblk.h" 93 94 95 static void vioblk_get_id(vioblk_t *); 96 uint_t vioblk_int_handler(caddr_t, caddr_t); 97 static uint_t vioblk_poll(vioblk_t *); 98 static int vioblk_quiesce(dev_info_t *); 99 static int vioblk_attach(dev_info_t *, ddi_attach_cmd_t); 100 static int vioblk_detach(dev_info_t *, ddi_detach_cmd_t); 101 102 103 static struct dev_ops vioblk_dev_ops = { 104 .devo_rev = DEVO_REV, 105 .devo_refcnt = 0, 106 107 .devo_attach = vioblk_attach, 108 .devo_detach = vioblk_detach, 109 .devo_quiesce = vioblk_quiesce, 110 111 .devo_getinfo = ddi_no_info, 112 .devo_identify = nulldev, 113 .devo_probe = nulldev, 114 .devo_reset = nodev, 115 .devo_cb_ops = NULL, 116 .devo_bus_ops = NULL, 117 .devo_power = NULL, 118 }; 119 120 static struct modldrv vioblk_modldrv = { 121 .drv_modops = &mod_driverops, 122 .drv_linkinfo = "VIRTIO block driver", 123 .drv_dev_ops = &vioblk_dev_ops 124 }; 125 126 static struct modlinkage vioblk_modlinkage = { 127 .ml_rev = MODREV_1, 128 .ml_linkage = { &vioblk_modldrv, NULL } 129 }; 130 131 /* 132 * DMA attribute template for header and status blocks. We also make a 133 * per-instance copy of this template with negotiated sizes from the device for 134 * blkdev. 135 */ 136 static const ddi_dma_attr_t vioblk_dma_attr = { 137 .dma_attr_version = DMA_ATTR_V0, 138 .dma_attr_addr_lo = 0x0000000000000000, 139 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF, 140 .dma_attr_count_max = 0x00000000FFFFFFFF, 141 .dma_attr_align = 1, 142 .dma_attr_burstsizes = 1, 143 .dma_attr_minxfer = 1, 144 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 145 .dma_attr_seg = 0x00000000FFFFFFFF, 146 .dma_attr_sgllen = 1, 147 .dma_attr_granular = 1, 148 .dma_attr_flags = 0 149 }; 150 151 152 static vioblk_req_t * 153 vioblk_req_alloc(vioblk_t *vib) 154 { 155 vioblk_req_t *vbr; 156 157 VERIFY(MUTEX_HELD(&vib->vib_mutex)); 158 159 if ((vbr = list_remove_head(&vib->vib_reqs)) == NULL) { 160 return (NULL); 161 } 162 vib->vib_nreqs_alloc++; 163 164 VERIFY0(vbr->vbr_status); 165 vbr->vbr_status |= VIOBLK_REQSTAT_ALLOCATED; 166 167 VERIFY3P(vbr->vbr_xfer, ==, NULL); 168 VERIFY3S(vbr->vbr_error, ==, 0); 169 170 return (vbr); 171 } 172 173 static void 174 vioblk_req_free(vioblk_t *vib, vioblk_req_t *vbr) 175 { 176 VERIFY(MUTEX_HELD(&vib->vib_mutex)); 177 178 /* 179 * Check that this request was allocated, then zero the status field to 180 * clear all status bits. 181 */ 182 VERIFY(vbr->vbr_status & VIOBLK_REQSTAT_ALLOCATED); 183 vbr->vbr_status = 0; 184 185 vbr->vbr_xfer = NULL; 186 vbr->vbr_error = 0; 187 vbr->vbr_type = 0; 188 189 list_insert_head(&vib->vib_reqs, vbr); 190 191 VERIFY3U(vib->vib_nreqs_alloc, >, 0); 192 vib->vib_nreqs_alloc--; 193 } 194 195 static void 196 vioblk_complete(vioblk_t *vib, vioblk_req_t *vbr) 197 { 198 VERIFY(MUTEX_HELD(&vib->vib_mutex)); 199 200 VERIFY(!(vbr->vbr_status & VIOBLK_REQSTAT_COMPLETE)); 201 vbr->vbr_status |= VIOBLK_REQSTAT_COMPLETE; 202 203 if (vbr->vbr_type == VIRTIO_BLK_T_FLUSH) { 204 vib->vib_stats->vbs_rw_cacheflush.value.ui64++; 205 } 206 207 if (vbr->vbr_xfer != NULL) { 208 /* 209 * This is a blkdev framework request. 210 */ 211 mutex_exit(&vib->vib_mutex); 212 bd_xfer_done(vbr->vbr_xfer, vbr->vbr_error); 213 mutex_enter(&vib->vib_mutex); 214 vbr->vbr_xfer = NULL; 215 } 216 } 217 218 static virtio_chain_t * 219 vioblk_common_start(vioblk_t *vib, int type, uint64_t sector, 220 boolean_t polled) 221 { 222 vioblk_req_t *vbr = NULL; 223 virtio_chain_t *vic = NULL; 224 225 if ((vbr = vioblk_req_alloc(vib)) == NULL) { 226 vib->vib_stats->vbs_rw_outofmemory.value.ui64++; 227 return (NULL); 228 } 229 vbr->vbr_type = type; 230 231 if (polled) { 232 /* 233 * Mark this command as polled so that we can wait on it 234 * ourselves. 235 */ 236 vbr->vbr_status |= VIOBLK_REQSTAT_POLLED; 237 } 238 239 if ((vic = virtio_chain_alloc(vib->vib_vq, KM_NOSLEEP)) == NULL) { 240 vib->vib_stats->vbs_rw_outofmemory.value.ui64++; 241 goto fail; 242 } 243 244 struct vioblk_req_hdr vbh; 245 vbh.vbh_type = type; 246 vbh.vbh_ioprio = 0; 247 vbh.vbh_sector = (sector * vib->vib_blk_size) / DEV_BSIZE; 248 bcopy(&vbh, virtio_dma_va(vbr->vbr_dma, 0), sizeof (vbh)); 249 250 virtio_chain_data_set(vic, vbr); 251 252 /* 253 * Put the header in the first descriptor. See the block comment at 254 * the top of the file for more details on the chain layout. 255 */ 256 if (virtio_chain_append(vic, virtio_dma_cookie_pa(vbr->vbr_dma, 0), 257 sizeof (struct vioblk_req_hdr), VIRTIO_DIR_DEVICE_READS) != 258 DDI_SUCCESS) { 259 goto fail; 260 } 261 262 return (vic); 263 264 fail: 265 vbr->vbr_xfer = NULL; 266 vioblk_req_free(vib, vbr); 267 if (vic != NULL) { 268 virtio_chain_free(vic); 269 } 270 return (NULL); 271 } 272 273 static int 274 vioblk_common_submit(vioblk_t *vib, virtio_chain_t *vic) 275 { 276 int r; 277 vioblk_req_t *vbr = virtio_chain_data(vic); 278 279 VERIFY(MUTEX_HELD(&vib->vib_mutex)); 280 281 /* 282 * The device will write the status byte into this last descriptor. 283 * See the block comment at the top of the file for more details on the 284 * chain layout. 285 */ 286 if (virtio_chain_append(vic, virtio_dma_cookie_pa(vbr->vbr_dma, 0) + 287 sizeof (struct vioblk_req_hdr), sizeof (uint8_t), 288 VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) { 289 r = ENOMEM; 290 goto out; 291 } 292 293 virtio_dma_sync(vbr->vbr_dma, DDI_DMA_SYNC_FORDEV); 294 virtio_chain_submit(vic, B_TRUE); 295 296 if (!(vbr->vbr_status & VIOBLK_REQSTAT_POLLED)) { 297 /* 298 * This is not a polled request. Our request will be freed and 299 * the caller notified later in vioblk_poll(). 300 */ 301 return (0); 302 } 303 304 /* 305 * This is a polled request. We need to block here and wait for the 306 * device to complete request processing. 307 */ 308 while (!(vbr->vbr_status & VIOBLK_REQSTAT_POLL_COMPLETE)) { 309 if (ddi_in_panic()) { 310 /* 311 * When panicking, interrupts are disabled. We must 312 * poll the queue manually. 313 */ 314 drv_usecwait(10); 315 (void) vioblk_poll(vib); 316 continue; 317 } 318 319 /* 320 * When not panicking, the device will interrupt on command 321 * completion and vioblk_poll() will be called to wake us up. 322 */ 323 cv_wait(&vib->vib_cv, &vib->vib_mutex); 324 } 325 326 vioblk_complete(vib, vbr); 327 r = vbr->vbr_error; 328 329 out: 330 vioblk_req_free(vib, vbr); 331 virtio_chain_free(vic); 332 return (r); 333 } 334 335 static int 336 vioblk_internal(vioblk_t *vib, int type, virtio_dma_t *dma, 337 uint64_t sector, virtio_direction_t dir) 338 { 339 virtio_chain_t *vic; 340 vioblk_req_t *vbr; 341 int r; 342 343 VERIFY(MUTEX_HELD(&vib->vib_mutex)); 344 345 /* 346 * Allocate a polled request. 347 */ 348 if ((vic = vioblk_common_start(vib, type, sector, B_TRUE)) == NULL) { 349 return (ENOMEM); 350 } 351 vbr = virtio_chain_data(vic); 352 353 /* 354 * If there is a request payload, it goes between the header and the 355 * status byte. See the block comment at the top of the file for more 356 * detail on the chain layout. 357 */ 358 if (dma != NULL) { 359 for (uint_t n = 0; n < virtio_dma_ncookies(dma); n++) { 360 if (virtio_chain_append(vic, 361 virtio_dma_cookie_pa(dma, n), 362 virtio_dma_cookie_size(dma, n), dir) != 363 DDI_SUCCESS) { 364 r = ENOMEM; 365 goto out; 366 } 367 } 368 } 369 370 return (vioblk_common_submit(vib, vic)); 371 372 out: 373 vioblk_req_free(vib, vbr); 374 virtio_chain_free(vic); 375 return (r); 376 } 377 378 static int 379 vioblk_request(vioblk_t *vib, bd_xfer_t *xfer, int type) 380 { 381 virtio_chain_t *vic = NULL; 382 vioblk_req_t *vbr = NULL; 383 uint_t total_cookies = 2; 384 boolean_t polled = (xfer->x_flags & BD_XFER_POLL) != 0; 385 int r; 386 387 VERIFY(MUTEX_HELD(&vib->vib_mutex)); 388 389 /* 390 * Ensure that this request falls within the advertised size of the 391 * block device. Be careful to avoid overflow. 392 */ 393 if (xfer->x_nblks > SIZE_MAX - xfer->x_blkno || 394 (xfer->x_blkno + xfer->x_nblks) > vib->vib_nblks) { 395 vib->vib_stats->vbs_rw_badoffset.value.ui64++; 396 return (EINVAL); 397 } 398 399 if ((vic = vioblk_common_start(vib, type, xfer->x_blkno, polled)) == 400 NULL) { 401 return (ENOMEM); 402 } 403 vbr = virtio_chain_data(vic); 404 vbr->vbr_xfer = xfer; 405 406 /* 407 * If there is a request payload, it goes between the header and the 408 * status byte. See the block comment at the top of the file for more 409 * detail on the chain layout. 410 */ 411 if ((type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_OUT) && 412 xfer->x_nblks > 0) { 413 virtio_direction_t dir = (type == VIRTIO_BLK_T_OUT) ? 414 VIRTIO_DIR_DEVICE_READS : VIRTIO_DIR_DEVICE_WRITES; 415 416 for (uint_t n = 0; n < xfer->x_ndmac; n++) { 417 ddi_dma_cookie_t dmac; 418 419 if (n == 0) { 420 /* 421 * The first cookie is in the blkdev request. 422 */ 423 dmac = xfer->x_dmac; 424 } else { 425 ddi_dma_nextcookie(xfer->x_dmah, &dmac); 426 } 427 428 if (virtio_chain_append(vic, dmac.dmac_laddress, 429 dmac.dmac_size, dir) != DDI_SUCCESS) { 430 r = ENOMEM; 431 goto fail; 432 } 433 } 434 435 total_cookies += xfer->x_ndmac; 436 437 } else if (xfer->x_nblks > 0) { 438 dev_err(vib->vib_dip, CE_PANIC, 439 "request of type %d had payload length of %lu blocks", type, 440 xfer->x_nblks); 441 } 442 443 if (vib->vib_stats->vbs_rw_cookiesmax.value.ui32 < total_cookies) { 444 vib->vib_stats->vbs_rw_cookiesmax.value.ui32 = total_cookies; 445 } 446 447 return (vioblk_common_submit(vib, vic)); 448 449 fail: 450 vbr->vbr_xfer = NULL; 451 vioblk_req_free(vib, vbr); 452 virtio_chain_free(vic); 453 return (r); 454 } 455 456 static int 457 vioblk_bd_read(void *arg, bd_xfer_t *xfer) 458 { 459 vioblk_t *vib = arg; 460 int r; 461 462 mutex_enter(&vib->vib_mutex); 463 r = vioblk_request(vib, xfer, VIRTIO_BLK_T_IN); 464 mutex_exit(&vib->vib_mutex); 465 466 return (r); 467 } 468 469 static int 470 vioblk_bd_write(void *arg, bd_xfer_t *xfer) 471 { 472 vioblk_t *vib = arg; 473 int r; 474 475 mutex_enter(&vib->vib_mutex); 476 r = vioblk_request(vib, xfer, VIRTIO_BLK_T_OUT); 477 mutex_exit(&vib->vib_mutex); 478 479 return (r); 480 } 481 482 static int 483 vioblk_bd_flush(void *arg, bd_xfer_t *xfer) 484 { 485 vioblk_t *vib = arg; 486 int r; 487 488 mutex_enter(&vib->vib_mutex); 489 if (!virtio_feature_present(vib->vib_virtio, VIRTIO_BLK_F_FLUSH)) { 490 /* 491 * We don't really expect to get here, because if we did not 492 * negotiate the flush feature we would not have installed this 493 * function in the blkdev ops vector. 494 */ 495 mutex_exit(&vib->vib_mutex); 496 return (ENOTSUP); 497 } 498 499 r = vioblk_request(vib, xfer, VIRTIO_BLK_T_FLUSH); 500 mutex_exit(&vib->vib_mutex); 501 502 return (r); 503 } 504 505 static void 506 vioblk_bd_driveinfo(void *arg, bd_drive_t *drive) 507 { 508 vioblk_t *vib = arg; 509 510 drive->d_qsize = vib->vib_reqs_capacity; 511 drive->d_removable = B_FALSE; 512 drive->d_hotpluggable = B_TRUE; 513 drive->d_target = 0; 514 drive->d_lun = 0; 515 516 drive->d_vendor = "Virtio"; 517 drive->d_vendor_len = strlen(drive->d_vendor); 518 519 drive->d_product = "Block Device"; 520 drive->d_product_len = strlen(drive->d_product); 521 522 drive->d_serial = vib->vib_devid; 523 drive->d_serial_len = strlen(drive->d_serial); 524 525 drive->d_revision = "0000"; 526 drive->d_revision_len = strlen(drive->d_revision); 527 } 528 529 static int 530 vioblk_bd_mediainfo(void *arg, bd_media_t *media) 531 { 532 vioblk_t *vib = (void *)arg; 533 534 /* 535 * The device protocol is specified in terms of 512 byte logical 536 * blocks, regardless of the recommended I/O size which might be 537 * larger. 538 */ 539 media->m_nblks = vib->vib_nblks; 540 media->m_blksize = vib->vib_blk_size; 541 542 media->m_readonly = vib->vib_readonly; 543 media->m_pblksize = vib->vib_pblk_size; 544 return (0); 545 } 546 547 static void 548 vioblk_get_id(vioblk_t *vib) 549 { 550 virtio_dma_t *dma; 551 int r; 552 553 if ((dma = virtio_dma_alloc(vib->vib_virtio, VIRTIO_BLK_ID_BYTES, 554 &vioblk_dma_attr, DDI_DMA_CONSISTENT | DDI_DMA_READ, 555 KM_SLEEP)) == NULL) { 556 return; 557 } 558 559 mutex_enter(&vib->vib_mutex); 560 if ((r = vioblk_internal(vib, VIRTIO_BLK_T_GET_ID, dma, 0, 561 VIRTIO_DIR_DEVICE_WRITES)) == 0) { 562 const char *b = virtio_dma_va(dma, 0); 563 uint_t pos = 0; 564 565 /* 566 * Save the entire response for debugging purposes. 567 */ 568 bcopy(virtio_dma_va(dma, 0), vib->vib_rawid, 569 VIRTIO_BLK_ID_BYTES); 570 571 /* 572 * Process the returned ID. 573 */ 574 bzero(vib->vib_devid, sizeof (vib->vib_devid)); 575 for (uint_t n = 0; n < VIRTIO_BLK_ID_BYTES; n++) { 576 if (isalnum(b[n]) || b[n] == '-' || b[n] == '_') { 577 /* 578 * Accept a subset of printable ASCII 579 * characters. 580 */ 581 vib->vib_devid[pos++] = b[n]; 582 } else { 583 /* 584 * Stop processing at the first sign of 585 * trouble. 586 */ 587 break; 588 } 589 } 590 591 vib->vib_devid_fetched = B_TRUE; 592 } 593 mutex_exit(&vib->vib_mutex); 594 595 virtio_dma_free(dma); 596 } 597 598 static int 599 vioblk_bd_devid(void *arg, dev_info_t *dip, ddi_devid_t *devid) 600 { 601 vioblk_t *vib = arg; 602 size_t len; 603 604 if ((len = strlen(vib->vib_devid)) == 0) { 605 /* 606 * The device has no ID. 607 */ 608 return (DDI_FAILURE); 609 } 610 611 return (ddi_devid_init(dip, DEVID_ATA_SERIAL, len, vib->vib_devid, 612 devid)); 613 } 614 615 /* 616 * As the device completes processing of a request, it returns the chain for 617 * that request to our I/O queue. This routine is called in two contexts: 618 * - from the interrupt handler, in response to notification from the device 619 * - synchronously in line with request processing when panicking 620 */ 621 static uint_t 622 vioblk_poll(vioblk_t *vib) 623 { 624 virtio_chain_t *vic; 625 uint_t count = 0; 626 boolean_t wakeup = B_FALSE; 627 628 VERIFY(MUTEX_HELD(&vib->vib_mutex)); 629 630 while ((vic = virtio_queue_poll(vib->vib_vq)) != NULL) { 631 vioblk_req_t *vbr = virtio_chain_data(vic); 632 uint8_t status; 633 634 virtio_dma_sync(vbr->vbr_dma, DDI_DMA_SYNC_FORCPU); 635 636 bcopy(virtio_dma_va(vbr->vbr_dma, 637 sizeof (struct vioblk_req_hdr)), &status, sizeof (status)); 638 639 switch (status) { 640 case VIRTIO_BLK_S_OK: 641 vbr->vbr_error = 0; 642 break; 643 case VIRTIO_BLK_S_IOERR: 644 vbr->vbr_error = EIO; 645 vib->vib_stats->vbs_io_errors.value.ui64++; 646 break; 647 case VIRTIO_BLK_S_UNSUPP: 648 vbr->vbr_error = ENOTTY; 649 vib->vib_stats->vbs_unsupp_errors.value.ui64++; 650 break; 651 default: 652 vbr->vbr_error = ENXIO; 653 vib->vib_stats->vbs_nxio_errors.value.ui64++; 654 break; 655 } 656 657 count++; 658 659 if (vbr->vbr_status & VIOBLK_REQSTAT_POLLED) { 660 /* 661 * This request must not be freed as it is being held 662 * by a call to vioblk_common_submit(). 663 */ 664 VERIFY(!(vbr->vbr_status & 665 VIOBLK_REQSTAT_POLL_COMPLETE)); 666 vbr->vbr_status |= VIOBLK_REQSTAT_POLL_COMPLETE; 667 wakeup = B_TRUE; 668 continue; 669 } 670 671 vioblk_complete(vib, vbr); 672 673 vioblk_req_free(vib, vbr); 674 virtio_chain_free(vic); 675 } 676 677 if (wakeup) { 678 /* 679 * Signal anybody waiting for polled command completion. 680 */ 681 cv_broadcast(&vib->vib_cv); 682 } 683 684 return (count); 685 } 686 687 uint_t 688 vioblk_int_handler(caddr_t arg0, caddr_t arg1) 689 { 690 vioblk_t *vib = (vioblk_t *)arg0; 691 uint_t count; 692 693 mutex_enter(&vib->vib_mutex); 694 if ((count = vioblk_poll(vib)) > 695 vib->vib_stats->vbs_intr_queuemax.value.ui32) { 696 vib->vib_stats->vbs_intr_queuemax.value.ui32 = count; 697 } 698 699 vib->vib_stats->vbs_intr_total.value.ui64++; 700 mutex_exit(&vib->vib_mutex); 701 702 return (DDI_INTR_CLAIMED); 703 } 704 705 static void 706 vioblk_free_reqs(vioblk_t *vib) 707 { 708 VERIFY3U(vib->vib_nreqs_alloc, ==, 0); 709 710 for (uint_t i = 0; i < vib->vib_reqs_capacity; i++) { 711 struct vioblk_req *vbr = &vib->vib_reqs_mem[i]; 712 713 VERIFY(list_link_active(&vbr->vbr_link)); 714 list_remove(&vib->vib_reqs, vbr); 715 716 VERIFY0(vbr->vbr_status); 717 718 if (vbr->vbr_dma != NULL) { 719 virtio_dma_free(vbr->vbr_dma); 720 vbr->vbr_dma = NULL; 721 } 722 } 723 VERIFY(list_is_empty(&vib->vib_reqs)); 724 725 if (vib->vib_reqs_mem != NULL) { 726 kmem_free(vib->vib_reqs_mem, 727 sizeof (struct vioblk_req) * vib->vib_reqs_capacity); 728 vib->vib_reqs_mem = NULL; 729 vib->vib_reqs_capacity = 0; 730 } 731 } 732 733 static int 734 vioblk_alloc_reqs(vioblk_t *vib) 735 { 736 vib->vib_reqs_capacity = MIN(virtio_queue_size(vib->vib_vq), 737 VIRTIO_BLK_REQ_BUFS); 738 vib->vib_reqs_mem = kmem_zalloc( 739 sizeof (struct vioblk_req) * vib->vib_reqs_capacity, KM_SLEEP); 740 vib->vib_nreqs_alloc = 0; 741 742 for (uint_t i = 0; i < vib->vib_reqs_capacity; i++) { 743 list_insert_tail(&vib->vib_reqs, &vib->vib_reqs_mem[i]); 744 } 745 746 for (vioblk_req_t *vbr = list_head(&vib->vib_reqs); vbr != NULL; 747 vbr = list_next(&vib->vib_reqs, vbr)) { 748 if ((vbr->vbr_dma = virtio_dma_alloc(vib->vib_virtio, 749 sizeof (struct vioblk_req_hdr) + sizeof (uint8_t), 750 &vioblk_dma_attr, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 751 KM_SLEEP)) == NULL) { 752 goto fail; 753 } 754 } 755 756 return (0); 757 758 fail: 759 vioblk_free_reqs(vib); 760 return (ENOMEM); 761 } 762 763 static int 764 vioblk_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 765 { 766 int instance = ddi_get_instance(dip); 767 vioblk_t *vib; 768 virtio_t *vio; 769 boolean_t did_mutex = B_FALSE; 770 771 if (cmd != DDI_ATTACH) { 772 return (DDI_FAILURE); 773 } 774 775 if ((vio = virtio_init(dip, VIRTIO_BLK_WANTED_FEATURES, B_TRUE)) == 776 NULL) { 777 dev_err(dip, CE_WARN, "failed to start Virtio init"); 778 return (DDI_FAILURE); 779 } 780 781 vib = kmem_zalloc(sizeof (*vib), KM_SLEEP); 782 vib->vib_dip = dip; 783 vib->vib_virtio = vio; 784 ddi_set_driver_private(dip, vib); 785 list_create(&vib->vib_reqs, sizeof (vioblk_req_t), 786 offsetof(vioblk_req_t, vbr_link)); 787 788 /* 789 * Determine how many scatter-gather entries we can use in a single 790 * request. 791 */ 792 vib->vib_seg_max = VIRTIO_BLK_DEFAULT_MAX_SEG; 793 if (virtio_feature_present(vio, VIRTIO_BLK_F_SEG_MAX)) { 794 vib->vib_seg_max = virtio_dev_get32(vio, 795 VIRTIO_BLK_CONFIG_SEG_MAX); 796 797 if (vib->vib_seg_max == 0 || vib->vib_seg_max == PCI_EINVAL32) { 798 /* 799 * We need to be able to use at least one data segment, 800 * so we'll assume that this device is just poorly 801 * implemented and try for one. 802 */ 803 vib->vib_seg_max = 1; 804 } 805 } 806 807 /* 808 * When allocating the request queue, we include two additional 809 * descriptors (beyond those required for request data) to account for 810 * the header and the status byte. 811 */ 812 if ((vib->vib_vq = virtio_queue_alloc(vio, VIRTIO_BLK_VIRTQ_IO, "io", 813 vioblk_int_handler, vib, B_FALSE, vib->vib_seg_max + 2)) == NULL) { 814 goto fail; 815 } 816 817 if (virtio_init_complete(vio, 0) != DDI_SUCCESS) { 818 dev_err(dip, CE_WARN, "failed to complete Virtio init"); 819 goto fail; 820 } 821 822 cv_init(&vib->vib_cv, NULL, CV_DRIVER, NULL); 823 mutex_init(&vib->vib_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio)); 824 did_mutex = B_TRUE; 825 826 if ((vib->vib_kstat = kstat_create("vioblk", instance, 827 "statistics", "controller", KSTAT_TYPE_NAMED, 828 sizeof (struct vioblk_stats) / sizeof (kstat_named_t), 829 KSTAT_FLAG_PERSISTENT)) == NULL) { 830 dev_err(dip, CE_WARN, "kstat_create failed"); 831 goto fail; 832 } 833 vib->vib_stats = (vioblk_stats_t *)vib->vib_kstat->ks_data; 834 kstat_named_init(&vib->vib_stats->vbs_rw_outofmemory, 835 "total_rw_outofmemory", KSTAT_DATA_UINT64); 836 kstat_named_init(&vib->vib_stats->vbs_rw_badoffset, 837 "total_rw_badoffset", KSTAT_DATA_UINT64); 838 kstat_named_init(&vib->vib_stats->vbs_intr_total, 839 "total_intr", KSTAT_DATA_UINT64); 840 kstat_named_init(&vib->vib_stats->vbs_io_errors, 841 "total_io_errors", KSTAT_DATA_UINT64); 842 kstat_named_init(&vib->vib_stats->vbs_unsupp_errors, 843 "total_unsupp_errors", KSTAT_DATA_UINT64); 844 kstat_named_init(&vib->vib_stats->vbs_nxio_errors, 845 "total_nxio_errors", KSTAT_DATA_UINT64); 846 kstat_named_init(&vib->vib_stats->vbs_rw_cacheflush, 847 "total_rw_cacheflush", KSTAT_DATA_UINT64); 848 kstat_named_init(&vib->vib_stats->vbs_rw_cookiesmax, 849 "max_rw_cookies", KSTAT_DATA_UINT32); 850 kstat_named_init(&vib->vib_stats->vbs_intr_queuemax, 851 "max_intr_queue", KSTAT_DATA_UINT32); 852 kstat_install(vib->vib_kstat); 853 854 vib->vib_readonly = virtio_feature_present(vio, VIRTIO_BLK_F_RO); 855 if ((vib->vib_nblks = virtio_dev_get64(vio, 856 VIRTIO_BLK_CONFIG_CAPACITY)) == UINT64_MAX) { 857 dev_err(dip, CE_WARN, "invalid capacity"); 858 goto fail; 859 } 860 861 /* 862 * Determine the optimal logical block size recommended by the device. 863 * This size is advisory; the protocol always deals in 512 byte blocks. 864 */ 865 vib->vib_blk_size = DEV_BSIZE; 866 if (virtio_feature_present(vio, VIRTIO_BLK_F_BLK_SIZE)) { 867 uint32_t v = virtio_dev_get32(vio, VIRTIO_BLK_CONFIG_BLK_SIZE); 868 869 if (v != 0 && v != PCI_EINVAL32) { 870 vib->vib_blk_size = v; 871 } 872 } 873 874 /* 875 * Device capacity is always in 512-byte units, convert to 876 * native blocks. 877 */ 878 vib->vib_nblks = (vib->vib_nblks * DEV_BSIZE) / vib->vib_blk_size; 879 880 /* 881 * The device may also provide an advisory physical block size. 882 */ 883 vib->vib_pblk_size = vib->vib_blk_size; 884 if (virtio_feature_present(vio, VIRTIO_BLK_F_TOPOLOGY)) { 885 uint8_t v = virtio_dev_get8(vio, VIRTIO_BLK_CONFIG_TOPO_PBEXP); 886 887 if (v != PCI_EINVAL8) { 888 vib->vib_pblk_size <<= v; 889 } 890 } 891 892 /* 893 * The maximum size for a cookie in a request. 894 */ 895 vib->vib_seg_size_max = VIRTIO_BLK_DEFAULT_MAX_SIZE; 896 if (virtio_feature_present(vio, VIRTIO_BLK_F_SIZE_MAX)) { 897 uint32_t v = virtio_dev_get32(vio, VIRTIO_BLK_CONFIG_SIZE_MAX); 898 899 if (v != 0 && v != PCI_EINVAL32) { 900 vib->vib_seg_size_max = v; 901 } 902 } 903 904 /* 905 * Set up the DMA attributes for blkdev to use for request data. The 906 * specification is not extremely clear about whether DMA-related 907 * parameters include or exclude the header and status descriptors. 908 * For now, we assume they cover only the request data and not the 909 * headers. 910 */ 911 vib->vib_bd_dma_attr = vioblk_dma_attr; 912 vib->vib_bd_dma_attr.dma_attr_sgllen = vib->vib_seg_max; 913 vib->vib_bd_dma_attr.dma_attr_count_max = vib->vib_seg_size_max; 914 vib->vib_bd_dma_attr.dma_attr_maxxfer = vib->vib_seg_max * 915 vib->vib_seg_size_max; 916 917 if (vioblk_alloc_reqs(vib) != 0) { 918 goto fail; 919 } 920 921 /* 922 * The blkdev framework does not provide a way to specify that the 923 * device does not support write cache flushing, except by omitting the 924 * "o_sync_cache" member from the ops vector. As "bd_alloc_handle()" 925 * makes a copy of the ops vector, we can safely assemble one on the 926 * stack based on negotiated features. 927 */ 928 bd_ops_t vioblk_bd_ops = { 929 .o_version = BD_OPS_CURRENT_VERSION, 930 .o_drive_info = vioblk_bd_driveinfo, 931 .o_media_info = vioblk_bd_mediainfo, 932 .o_devid_init = vioblk_bd_devid, 933 .o_sync_cache = vioblk_bd_flush, 934 .o_read = vioblk_bd_read, 935 .o_write = vioblk_bd_write, 936 }; 937 if (!virtio_feature_present(vio, VIRTIO_BLK_F_FLUSH)) { 938 vioblk_bd_ops.o_sync_cache = NULL; 939 } 940 941 vib->vib_bd_h = bd_alloc_handle(vib, &vioblk_bd_ops, 942 &vib->vib_bd_dma_attr, KM_SLEEP); 943 944 /* 945 * Enable interrupts now so that we can request the device identity. 946 */ 947 if (virtio_interrupts_enable(vio) != DDI_SUCCESS) { 948 goto fail; 949 } 950 951 vioblk_get_id(vib); 952 953 if (bd_attach_handle(dip, vib->vib_bd_h) != DDI_SUCCESS) { 954 dev_err(dip, CE_WARN, "Failed to attach blkdev"); 955 goto fail; 956 } 957 958 return (DDI_SUCCESS); 959 960 fail: 961 if (vib->vib_bd_h != NULL) { 962 (void) bd_detach_handle(vib->vib_bd_h); 963 bd_free_handle(vib->vib_bd_h); 964 } 965 if (vio != NULL) { 966 (void) virtio_fini(vio, B_TRUE); 967 } 968 if (did_mutex) { 969 mutex_destroy(&vib->vib_mutex); 970 cv_destroy(&vib->vib_cv); 971 } 972 if (vib->vib_kstat != NULL) { 973 kstat_delete(vib->vib_kstat); 974 } 975 vioblk_free_reqs(vib); 976 kmem_free(vib, sizeof (*vib)); 977 return (DDI_FAILURE); 978 } 979 980 static int 981 vioblk_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 982 { 983 vioblk_t *vib = ddi_get_driver_private(dip); 984 985 if (cmd != DDI_DETACH) { 986 return (DDI_FAILURE); 987 } 988 989 mutex_enter(&vib->vib_mutex); 990 if (vib->vib_nreqs_alloc > 0) { 991 /* 992 * Cannot detach while there are still outstanding requests. 993 */ 994 mutex_exit(&vib->vib_mutex); 995 return (DDI_FAILURE); 996 } 997 998 if (bd_detach_handle(vib->vib_bd_h) != DDI_SUCCESS) { 999 mutex_exit(&vib->vib_mutex); 1000 return (DDI_FAILURE); 1001 } 1002 1003 /* 1004 * Tear down the Virtio framework before freeing the rest of the 1005 * resources. This will ensure the interrupt handlers are no longer 1006 * running. 1007 */ 1008 virtio_fini(vib->vib_virtio, B_FALSE); 1009 1010 vioblk_free_reqs(vib); 1011 kstat_delete(vib->vib_kstat); 1012 1013 mutex_exit(&vib->vib_mutex); 1014 mutex_destroy(&vib->vib_mutex); 1015 1016 kmem_free(vib, sizeof (*vib)); 1017 1018 return (DDI_SUCCESS); 1019 } 1020 1021 static int 1022 vioblk_quiesce(dev_info_t *dip) 1023 { 1024 vioblk_t *vib; 1025 1026 if ((vib = ddi_get_driver_private(dip)) == NULL) { 1027 return (DDI_FAILURE); 1028 } 1029 1030 return (virtio_quiesce(vib->vib_virtio)); 1031 } 1032 1033 int 1034 _init(void) 1035 { 1036 int rv; 1037 1038 bd_mod_init(&vioblk_dev_ops); 1039 1040 if ((rv = mod_install(&vioblk_modlinkage)) != 0) { 1041 bd_mod_fini(&vioblk_dev_ops); 1042 } 1043 1044 return (rv); 1045 } 1046 1047 int 1048 _fini(void) 1049 { 1050 int rv; 1051 1052 if ((rv = mod_remove(&vioblk_modlinkage)) == 0) { 1053 bd_mod_fini(&vioblk_dev_ops); 1054 } 1055 1056 return (rv); 1057 } 1058 1059 int 1060 _info(struct modinfo *modinfop) 1061 { 1062 return (mod_info(&vioblk_modlinkage, modinfop)); 1063 } 1064