1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019 Joyent, Inc. 14 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. 15 */ 16 17 /* 18 * VIRTIO FRAMEWORK 19 * 20 * For design and usage documentation, see the comments in "virtio.h". 21 */ 22 23 #include <sys/conf.h> 24 #include <sys/kmem.h> 25 #include <sys/debug.h> 26 #include <sys/modctl.h> 27 #include <sys/autoconf.h> 28 #include <sys/ddi_impldefs.h> 29 #include <sys/ddi.h> 30 #include <sys/sunddi.h> 31 #include <sys/sunndi.h> 32 #include <sys/avintr.h> 33 #include <sys/spl.h> 34 #include <sys/promif.h> 35 #include <sys/list.h> 36 #include <sys/bootconf.h> 37 #include <sys/bootsvcs.h> 38 #include <sys/sysmacros.h> 39 #include <sys/pci.h> 40 41 #include "virtio.h" 42 #include "virtio_impl.h" 43 44 45 /* 46 * Linkage structures 47 */ 48 static struct modlmisc virtio_modlmisc = { 49 .misc_modops = &mod_miscops, 50 .misc_linkinfo = "VIRTIO common routines", 51 }; 52 53 static struct modlinkage virtio_modlinkage = { 54 .ml_rev = MODREV_1, 55 .ml_linkage = { &virtio_modlmisc, NULL } 56 }; 57 58 int 59 _init(void) 60 { 61 return (mod_install(&virtio_modlinkage)); 62 } 63 64 int 65 _fini(void) 66 { 67 return (mod_remove(&virtio_modlinkage)); 68 } 69 70 int 71 _info(struct modinfo *modinfop) 72 { 73 return (mod_info(&virtio_modlinkage, modinfop)); 74 } 75 76 77 78 static void virtio_set_status(virtio_t *, uint8_t); 79 static void virtio_set_status_locked(virtio_t *, uint8_t); 80 static int virtio_chain_append_impl(virtio_chain_t *, uint64_t, size_t, 81 uint16_t); 82 static int virtio_interrupts_setup(virtio_t *, int); 83 static void virtio_interrupts_teardown(virtio_t *); 84 static void virtio_interrupts_disable_locked(virtio_t *); 85 static void virtio_queue_free(virtio_queue_t *); 86 static void virtio_device_reset_locked(virtio_t *); 87 88 /* 89 * We use the same device access attributes for BAR mapping and access to the 90 * virtqueue memory. 91 */ 92 ddi_device_acc_attr_t virtio_acc_attr = { 93 .devacc_attr_version = DDI_DEVICE_ATTR_V1, 94 .devacc_attr_endian_flags = DDI_NEVERSWAP_ACC, 95 .devacc_attr_dataorder = DDI_STORECACHING_OK_ACC, 96 .devacc_attr_access = DDI_DEFAULT_ACC 97 }; 98 99 100 /* 101 * DMA attributes for the memory given to the device for queue management. 102 */ 103 ddi_dma_attr_t virtio_dma_attr_queue = { 104 .dma_attr_version = DMA_ATTR_V0, 105 .dma_attr_addr_lo = 0x0000000000000000, 106 /* 107 * Queue memory is aligned on VIRTIO_PAGE_SIZE with the address shifted 108 * down by VIRTIO_PAGE_SHIFT before being passed to the device in a 109 * 32-bit register. 110 */ 111 .dma_attr_addr_hi = 0x00000FFFFFFFF000, 112 .dma_attr_count_max = 0x00000000FFFFFFFF, 113 .dma_attr_align = VIRTIO_PAGE_SIZE, 114 .dma_attr_burstsizes = 1, 115 .dma_attr_minxfer = 1, 116 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 117 .dma_attr_seg = 0x00000000FFFFFFFF, 118 .dma_attr_sgllen = 1, 119 .dma_attr_granular = 1, 120 .dma_attr_flags = 0 121 }; 122 123 /* 124 * DMA attributes for the the allocation of indirect descriptor lists. The 125 * indirect list is referenced by a regular descriptor entry: the physical 126 * address field is 64 bits wide, but the length field is only 32 bits. Each 127 * descriptor is 16 bytes long. 128 */ 129 ddi_dma_attr_t virtio_dma_attr_indirect = { 130 .dma_attr_version = DMA_ATTR_V0, 131 .dma_attr_addr_lo = 0x0000000000000000, 132 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF, 133 .dma_attr_count_max = 0x00000000FFFFFFFF, 134 .dma_attr_align = sizeof (struct virtio_vq_desc), 135 .dma_attr_burstsizes = 1, 136 .dma_attr_minxfer = 1, 137 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 138 .dma_attr_seg = 0x00000000FFFFFFFF, 139 .dma_attr_sgllen = 1, 140 .dma_attr_granular = 1, 141 .dma_attr_flags = 0 142 }; 143 144 145 uint8_t 146 virtio_get8(virtio_t *vio, uintptr_t offset) 147 { 148 return (ddi_get8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset))); 149 } 150 151 uint16_t 152 virtio_get16(virtio_t *vio, uintptr_t offset) 153 { 154 return (ddi_get16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset))); 155 } 156 157 uint32_t 158 virtio_get32(virtio_t *vio, uintptr_t offset) 159 { 160 return (ddi_get32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset))); 161 } 162 163 void 164 virtio_put8(virtio_t *vio, uintptr_t offset, uint8_t value) 165 { 166 ddi_put8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset), value); 167 } 168 169 void 170 virtio_put16(virtio_t *vio, uintptr_t offset, uint16_t value) 171 { 172 ddi_put16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset), value); 173 } 174 175 void 176 virtio_put32(virtio_t *vio, uintptr_t offset, uint32_t value) 177 { 178 ddi_put32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset), value); 179 } 180 181 void 182 virtio_fini(virtio_t *vio, boolean_t failed) 183 { 184 mutex_enter(&vio->vio_mutex); 185 186 virtio_interrupts_teardown(vio); 187 188 virtio_queue_t *viq; 189 while ((viq = list_remove_head(&vio->vio_queues)) != NULL) { 190 virtio_queue_free(viq); 191 } 192 list_destroy(&vio->vio_queues); 193 194 if (failed) { 195 /* 196 * Signal to the host that device setup failed. 197 */ 198 virtio_set_status_locked(vio, VIRTIO_STATUS_FAILED); 199 } else { 200 virtio_device_reset_locked(vio); 201 } 202 203 /* 204 * We don't need to do anything for the provider initlevel, as it 205 * merely records the fact that virtio_init_complete() was called. 206 */ 207 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_PROVIDER; 208 209 if (vio->vio_initlevel & VIRTIO_INITLEVEL_REGS) { 210 /* 211 * Unmap PCI BAR0. 212 */ 213 ddi_regs_map_free(&vio->vio_barh); 214 215 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_REGS; 216 } 217 218 /* 219 * Ensure we have torn down everything we set up. 220 */ 221 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_SHUTDOWN; 222 VERIFY0(vio->vio_initlevel); 223 224 mutex_exit(&vio->vio_mutex); 225 mutex_destroy(&vio->vio_mutex); 226 227 kmem_free(vio, sizeof (*vio)); 228 } 229 230 /* 231 * Early device initialisation for legacy (pre-1.0 specification) virtio 232 * devices. 233 */ 234 virtio_t * 235 virtio_init(dev_info_t *dip, uint64_t driver_features, boolean_t allow_indirect) 236 { 237 int r; 238 239 /* 240 * First, confirm that this is a legacy device. 241 */ 242 ddi_acc_handle_t pci; 243 if (pci_config_setup(dip, &pci) != DDI_SUCCESS) { 244 dev_err(dip, CE_WARN, "pci_config_setup failed"); 245 return (NULL); 246 } 247 248 uint8_t revid; 249 if ((revid = pci_config_get8(pci, PCI_CONF_REVID)) == PCI_EINVAL8) { 250 dev_err(dip, CE_WARN, "could not read config space"); 251 pci_config_teardown(&pci); 252 return (NULL); 253 } 254 255 pci_config_teardown(&pci); 256 257 /* 258 * The legacy specification requires that the device advertise as PCI 259 * Revision 0. 260 */ 261 if (revid != 0) { 262 dev_err(dip, CE_WARN, "PCI Revision %u incorrect for " 263 "legacy virtio device", (uint_t)revid); 264 return (NULL); 265 } 266 267 virtio_t *vio = kmem_zalloc(sizeof (*vio), KM_SLEEP); 268 vio->vio_dip = dip; 269 270 /* 271 * Map PCI BAR0 for legacy device access. 272 */ 273 if ((r = ddi_regs_map_setup(dip, VIRTIO_LEGACY_PCI_BAR0, 274 (caddr_t *)&vio->vio_bar, 0, 0, &virtio_acc_attr, 275 &vio->vio_barh)) != DDI_SUCCESS) { 276 dev_err(dip, CE_WARN, "ddi_regs_map_setup failure (%d)", r); 277 kmem_free(vio, sizeof (*vio)); 278 return (NULL); 279 } 280 vio->vio_initlevel |= VIRTIO_INITLEVEL_REGS; 281 282 /* 283 * We initialise the mutex without an interrupt priority to ease the 284 * implementation of some of the configuration space access routines. 285 * Drivers using the virtio framework MUST make a call to 286 * "virtio_init_complete()" prior to spawning other threads or enabling 287 * interrupt handlers, at which time we will destroy and reinitialise 288 * the mutex for use in our interrupt handlers. 289 */ 290 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, NULL); 291 292 list_create(&vio->vio_queues, sizeof (virtio_queue_t), 293 offsetof(virtio_queue_t, viq_link)); 294 295 /* 296 * Legacy virtio devices require a few common steps before we can 297 * negotiate device features. 298 */ 299 virtio_device_reset(vio); 300 virtio_set_status(vio, VIRTIO_STATUS_ACKNOWLEDGE); 301 virtio_set_status(vio, VIRTIO_STATUS_DRIVER); 302 303 /* 304 * Negotiate features with the device. Record the original supported 305 * feature set for debugging purposes. 306 */ 307 vio->vio_features_device = virtio_get32(vio, 308 VIRTIO_LEGACY_FEATURES_DEVICE); 309 if (allow_indirect) { 310 driver_features |= VIRTIO_F_RING_INDIRECT_DESC; 311 } 312 vio->vio_features = vio->vio_features_device & driver_features; 313 virtio_put32(vio, VIRTIO_LEGACY_FEATURES_DRIVER, vio->vio_features); 314 315 /* 316 * The device-specific configuration begins at an offset into the BAR 317 * that depends on whether we have enabled MSI-X interrupts or not. 318 * Start out with the offset for pre-MSI-X operation so that we can 319 * read device configuration space prior to configuring interrupts. 320 */ 321 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET; 322 323 return (vio); 324 } 325 326 /* 327 * Some virtio devices can change their device configuration state at any 328 * time. This function may be called by the driver during the initialisation 329 * phase - before calling virtio_init_complete() - in order to register a 330 * handler function which will be called when the device configuration space 331 * is updated. 332 */ 333 void 334 virtio_register_cfgchange_handler(virtio_t *vio, ddi_intr_handler_t *func, 335 void *funcarg) 336 { 337 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED)); 338 VERIFY(!vio->vio_cfgchange_handler_added); 339 340 mutex_enter(&vio->vio_mutex); 341 vio->vio_cfgchange_handler = func; 342 vio->vio_cfgchange_handlerarg = funcarg; 343 mutex_exit(&vio->vio_mutex); 344 } 345 346 /* 347 * This function must be called by the driver once it has completed early setup 348 * calls. The value of "allowed_interrupt_types" is a mask of interrupt types 349 * (DDI_INTR_TYPE_MSIX, etc) that we'll try to use when installing handlers, or 350 * the special value 0 to allow the system to use any available type. 351 */ 352 int 353 virtio_init_complete(virtio_t *vio, int allowed_interrupt_types) 354 { 355 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER)); 356 vio->vio_initlevel |= VIRTIO_INITLEVEL_PROVIDER; 357 358 if (!list_is_empty(&vio->vio_queues) || 359 vio->vio_cfgchange_handler != NULL) { 360 /* 361 * Set up interrupts for the queues that have been registered. 362 */ 363 if (virtio_interrupts_setup(vio, allowed_interrupt_types) != 364 DDI_SUCCESS) { 365 return (DDI_FAILURE); 366 } 367 } 368 369 /* 370 * We can allocate the mutex once we know the priority. 371 */ 372 mutex_destroy(&vio->vio_mutex); 373 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio)); 374 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 375 viq = list_next(&vio->vio_queues, viq)) { 376 mutex_destroy(&viq->viq_mutex); 377 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, 378 virtio_intr_pri(vio)); 379 } 380 381 virtio_set_status(vio, VIRTIO_STATUS_DRIVER_OK); 382 383 return (DDI_SUCCESS); 384 } 385 386 boolean_t 387 virtio_feature_present(virtio_t *vio, uint64_t feature_mask) 388 { 389 return ((vio->vio_features & feature_mask) != 0); 390 } 391 392 void * 393 virtio_intr_pri(virtio_t *vio) 394 { 395 VERIFY(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED); 396 397 return (DDI_INTR_PRI(vio->vio_interrupt_priority)); 398 } 399 400 /* 401 * Enable a bit in the device status register. Each bit signals a level of 402 * guest readiness to the host. Use the VIRTIO_CONFIG_DEVICE_STATUS_* 403 * constants for "status". To zero the status field use virtio_device_reset(). 404 */ 405 static void 406 virtio_set_status_locked(virtio_t *vio, uint8_t status) 407 { 408 VERIFY3U(status, !=, 0); 409 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 410 411 uint8_t old = virtio_get8(vio, VIRTIO_LEGACY_DEVICE_STATUS); 412 virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, status | old); 413 } 414 415 static void 416 virtio_set_status(virtio_t *vio, uint8_t status) 417 { 418 mutex_enter(&vio->vio_mutex); 419 virtio_set_status_locked(vio, status); 420 mutex_exit(&vio->vio_mutex); 421 } 422 423 static void 424 virtio_device_reset_locked(virtio_t *vio) 425 { 426 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 427 virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, VIRTIO_STATUS_RESET); 428 } 429 430 void 431 virtio_device_reset(virtio_t *vio) 432 { 433 mutex_enter(&vio->vio_mutex); 434 virtio_device_reset_locked(vio); 435 mutex_exit(&vio->vio_mutex); 436 } 437 438 /* 439 * Some queues are effectively long-polled; the driver submits a series of 440 * buffers and the device only returns them when there is data available. 441 * During detach, we need to coordinate the return of these buffers. Calling 442 * "virtio_shutdown()" will reset the device, then allow the removal of all 443 * buffers that were in flight at the time of shutdown via 444 * "virtio_queue_evacuate()". 445 */ 446 void 447 virtio_shutdown(virtio_t *vio) 448 { 449 mutex_enter(&vio->vio_mutex); 450 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 451 /* 452 * Shutdown has been performed already. 453 */ 454 mutex_exit(&vio->vio_mutex); 455 return; 456 } 457 458 /* 459 * First, mark all of the queues as shutdown. This will prevent any 460 * further activity. 461 */ 462 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 463 viq = list_next(&vio->vio_queues, viq)) { 464 mutex_enter(&viq->viq_mutex); 465 viq->viq_shutdown = B_TRUE; 466 mutex_exit(&viq->viq_mutex); 467 } 468 469 /* 470 * Now, reset the device. This removes any queue configuration on the 471 * device side. 472 */ 473 virtio_device_reset_locked(vio); 474 vio->vio_initlevel |= VIRTIO_INITLEVEL_SHUTDOWN; 475 mutex_exit(&vio->vio_mutex); 476 } 477 478 /* 479 * Common implementation of quiesce(9E) for simple Virtio-based devices. 480 */ 481 int 482 virtio_quiesce(virtio_t *vio) 483 { 484 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 485 /* 486 * Device has already been reset. 487 */ 488 return (DDI_SUCCESS); 489 } 490 491 /* 492 * When we reset the device, it should immediately stop using any DMA 493 * memory we've previously passed to it. All queue configuration is 494 * discarded. This is good enough for quiesce(9E). 495 */ 496 virtio_device_reset_locked(vio); 497 498 return (DDI_SUCCESS); 499 } 500 501 /* 502 * DEVICE-SPECIFIC REGISTER ACCESS 503 * 504 * Note that these functions take the mutex to avoid racing with interrupt 505 * enable/disable, when the device-specific offset can potentially change. 506 */ 507 508 uint8_t 509 virtio_dev_get8(virtio_t *vio, uintptr_t offset) 510 { 511 mutex_enter(&vio->vio_mutex); 512 uint8_t r = virtio_get8(vio, vio->vio_config_offset + offset); 513 mutex_exit(&vio->vio_mutex); 514 515 return (r); 516 } 517 518 uint16_t 519 virtio_dev_get16(virtio_t *vio, uintptr_t offset) 520 { 521 mutex_enter(&vio->vio_mutex); 522 uint16_t r = virtio_get16(vio, vio->vio_config_offset + offset); 523 mutex_exit(&vio->vio_mutex); 524 525 return (r); 526 } 527 528 uint32_t 529 virtio_dev_get32(virtio_t *vio, uintptr_t offset) 530 { 531 mutex_enter(&vio->vio_mutex); 532 uint32_t r = virtio_get32(vio, vio->vio_config_offset + offset); 533 mutex_exit(&vio->vio_mutex); 534 535 return (r); 536 } 537 538 uint64_t 539 virtio_dev_get64(virtio_t *vio, uintptr_t offset) 540 { 541 mutex_enter(&vio->vio_mutex); 542 /* 543 * On at least some systems, a 64-bit read or write to this BAR is not 544 * possible. For legacy devices, there is no generation number to use 545 * to determine if configuration may have changed half-way through a 546 * read. We need to continue to read both halves of the value until we 547 * read the same value at least twice. 548 */ 549 uintptr_t o_lo = vio->vio_config_offset + offset; 550 uintptr_t o_hi = o_lo + 4; 551 552 uint64_t val = virtio_get32(vio, o_lo) | 553 ((uint64_t)virtio_get32(vio, o_hi) << 32); 554 555 for (;;) { 556 uint64_t tval = virtio_get32(vio, o_lo) | 557 ((uint64_t)virtio_get32(vio, o_hi) << 32); 558 559 if (tval == val) { 560 break; 561 } 562 563 val = tval; 564 } 565 566 mutex_exit(&vio->vio_mutex); 567 return (val); 568 } 569 570 void 571 virtio_dev_put8(virtio_t *vio, uintptr_t offset, uint8_t value) 572 { 573 mutex_enter(&vio->vio_mutex); 574 virtio_put8(vio, vio->vio_config_offset + offset, value); 575 mutex_exit(&vio->vio_mutex); 576 } 577 578 void 579 virtio_dev_put16(virtio_t *vio, uintptr_t offset, uint16_t value) 580 { 581 mutex_enter(&vio->vio_mutex); 582 virtio_put16(vio, vio->vio_config_offset + offset, value); 583 mutex_exit(&vio->vio_mutex); 584 } 585 586 void 587 virtio_dev_put32(virtio_t *vio, uintptr_t offset, uint32_t value) 588 { 589 mutex_enter(&vio->vio_mutex); 590 virtio_put32(vio, vio->vio_config_offset + offset, value); 591 mutex_exit(&vio->vio_mutex); 592 } 593 594 /* 595 * VIRTQUEUE MANAGEMENT 596 */ 597 598 static int 599 virtio_inflight_compar(const void *lp, const void *rp) 600 { 601 const virtio_chain_t *l = lp; 602 const virtio_chain_t *r = rp; 603 604 if (l->vic_head < r->vic_head) { 605 return (-1); 606 } else if (l->vic_head > r->vic_head) { 607 return (1); 608 } else { 609 return (0); 610 } 611 } 612 613 virtio_queue_t * 614 virtio_queue_alloc(virtio_t *vio, uint16_t qidx, const char *name, 615 ddi_intr_handler_t *func, void *funcarg, boolean_t force_direct, 616 uint_t max_segs) 617 { 618 uint16_t qsz; 619 char space_name[256]; 620 621 if (max_segs < 1) { 622 /* 623 * Every descriptor, direct or indirect, needs to refer to at 624 * least one buffer. 625 */ 626 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 627 "segment count must be at least 1", name, (uint_t)qidx); 628 return (NULL); 629 } 630 631 mutex_enter(&vio->vio_mutex); 632 633 if (vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER) { 634 /* 635 * Cannot configure any more queues once initial setup is 636 * complete and interrupts have been allocated. 637 */ 638 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 639 "alloc after init complete", name, (uint_t)qidx); 640 mutex_exit(&vio->vio_mutex); 641 return (NULL); 642 } 643 644 /* 645 * There is no way to negotiate a different queue size for legacy 646 * devices. We must read and use the native queue size of the device. 647 */ 648 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx); 649 if ((qsz = virtio_get16(vio, VIRTIO_LEGACY_QUEUE_SIZE)) == 0) { 650 /* 651 * A size of zero means the device does not have a queue with 652 * this index. 653 */ 654 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 655 "does not exist on device", name, (uint_t)qidx); 656 mutex_exit(&vio->vio_mutex); 657 return (NULL); 658 } 659 660 mutex_exit(&vio->vio_mutex); 661 662 virtio_queue_t *viq = kmem_zalloc(sizeof (*viq), KM_SLEEP); 663 viq->viq_virtio = vio; 664 viq->viq_name = name; 665 viq->viq_index = qidx; 666 viq->viq_size = qsz; 667 viq->viq_func = func; 668 viq->viq_funcarg = funcarg; 669 viq->viq_max_segs = max_segs; 670 avl_create(&viq->viq_inflight, virtio_inflight_compar, 671 sizeof (virtio_chain_t), offsetof(virtio_chain_t, vic_node)); 672 673 /* 674 * Allocate the mutex without an interrupt priority for now, as we do 675 * with "vio_mutex". We'll reinitialise it in 676 * "virtio_init_complete()". 677 */ 678 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, NULL); 679 680 if (virtio_feature_present(vio, VIRTIO_F_RING_INDIRECT_DESC) && 681 !force_direct) { 682 /* 683 * If we were able to negotiate the indirect descriptor 684 * feature, and the caller has not explicitly forced the use of 685 * direct descriptors, we'll allocate indirect descriptor lists 686 * for each chain. 687 */ 688 viq->viq_indirect = B_TRUE; 689 } 690 691 /* 692 * Track descriptor usage in an identifier space. 693 */ 694 (void) snprintf(space_name, sizeof (space_name), "%s%d_vq_%s", 695 ddi_get_name(vio->vio_dip), ddi_get_instance(vio->vio_dip), name); 696 if ((viq->viq_descmap = id_space_create(space_name, 0, qsz)) == NULL) { 697 dev_err(vio->vio_dip, CE_WARN, "could not allocate descriptor " 698 "ID space"); 699 virtio_queue_free(viq); 700 return (NULL); 701 } 702 703 /* 704 * For legacy devices, memory for the queue has a strict layout 705 * determined by the queue size. 706 */ 707 size_t sz_descs = sizeof (virtio_vq_desc_t) * qsz; 708 size_t sz_driver = P2ROUNDUP_TYPED(sz_descs + 709 sizeof (virtio_vq_driver_t) + 710 sizeof (uint16_t) * qsz, 711 VIRTIO_PAGE_SIZE, size_t); 712 size_t sz_device = P2ROUNDUP_TYPED(sizeof (virtio_vq_device_t) + 713 sizeof (virtio_vq_elem_t) * qsz, 714 VIRTIO_PAGE_SIZE, size_t); 715 716 if (virtio_dma_init(vio, &viq->viq_dma, sz_driver + sz_device, 717 &virtio_dma_attr_queue, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 718 KM_SLEEP) != DDI_SUCCESS) { 719 dev_err(vio->vio_dip, CE_WARN, "could not allocate queue " 720 "DMA memory"); 721 virtio_queue_free(viq); 722 return (NULL); 723 } 724 725 /* 726 * NOTE: The viq_dma_* members below are used by 727 * VIRTQ_DMA_SYNC_FORDEV() and VIRTQ_DMA_SYNC_FORKERNEL() to calculate 728 * offsets into the DMA allocation for partial synchronisation. If the 729 * ordering of, or relationship between, these pointers changes, the 730 * macros must be kept in sync. 731 */ 732 viq->viq_dma_descs = virtio_dma_va(&viq->viq_dma, 0); 733 viq->viq_dma_driver = virtio_dma_va(&viq->viq_dma, sz_descs); 734 viq->viq_dma_device = virtio_dma_va(&viq->viq_dma, sz_driver); 735 736 /* 737 * Install in the per-device list of queues. 738 */ 739 mutex_enter(&vio->vio_mutex); 740 for (virtio_queue_t *chkvq = list_head(&vio->vio_queues); chkvq != NULL; 741 chkvq = list_next(&vio->vio_queues, chkvq)) { 742 if (chkvq->viq_index == qidx) { 743 dev_err(vio->vio_dip, CE_WARN, "attempt to register " 744 "queue \"%s\" with same index (%d) as queue \"%s\"", 745 name, qidx, chkvq->viq_name); 746 mutex_exit(&vio->vio_mutex); 747 virtio_queue_free(viq); 748 return (NULL); 749 } 750 } 751 list_insert_tail(&vio->vio_queues, viq); 752 753 /* 754 * Ensure the zeroing of the queue memory is visible to the host before 755 * we inform the device of the queue address. 756 */ 757 membar_producer(); 758 VIRTQ_DMA_SYNC_FORDEV(viq); 759 760 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx); 761 virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS, 762 virtio_dma_cookie_pa(&viq->viq_dma, 0) >> VIRTIO_PAGE_SHIFT); 763 764 mutex_exit(&vio->vio_mutex); 765 return (viq); 766 } 767 768 static void 769 virtio_queue_free(virtio_queue_t *viq) 770 { 771 virtio_t *vio = viq->viq_virtio; 772 773 /* 774 * We are going to destroy the queue mutex. Make sure we've already 775 * removed the interrupt handlers. 776 */ 777 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED)); 778 779 mutex_enter(&viq->viq_mutex); 780 781 /* 782 * If the device has not already been reset as part of a shutdown, 783 * detach the queue from the device now. 784 */ 785 if (!viq->viq_shutdown) { 786 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, viq->viq_index); 787 virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS, 0); 788 } 789 790 virtio_dma_fini(&viq->viq_dma); 791 792 VERIFY(avl_is_empty(&viq->viq_inflight)); 793 avl_destroy(&viq->viq_inflight); 794 if (viq->viq_descmap != NULL) { 795 id_space_destroy(viq->viq_descmap); 796 } 797 798 mutex_exit(&viq->viq_mutex); 799 mutex_destroy(&viq->viq_mutex); 800 801 kmem_free(viq, sizeof (*viq)); 802 } 803 804 void 805 virtio_queue_no_interrupt(virtio_queue_t *viq, boolean_t stop_interrupts) 806 { 807 mutex_enter(&viq->viq_mutex); 808 809 if (stop_interrupts) { 810 viq->viq_dma_driver->vqdr_flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; 811 } else { 812 viq->viq_dma_driver->vqdr_flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; 813 } 814 VIRTQ_DMA_SYNC_FORDEV(viq); 815 816 mutex_exit(&viq->viq_mutex); 817 } 818 819 static virtio_chain_t * 820 virtio_queue_complete(virtio_queue_t *viq, uint_t index) 821 { 822 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 823 824 virtio_chain_t *vic; 825 826 virtio_chain_t search; 827 bzero(&search, sizeof (search)); 828 search.vic_head = index; 829 830 if ((vic = avl_find(&viq->viq_inflight, &search, NULL)) == NULL) { 831 return (NULL); 832 } 833 avl_remove(&viq->viq_inflight, vic); 834 835 return (vic); 836 } 837 838 uint_t 839 virtio_queue_size(virtio_queue_t *viq) 840 { 841 return (viq->viq_size); 842 } 843 844 uint_t 845 virtio_queue_nactive(virtio_queue_t *viq) 846 { 847 mutex_enter(&viq->viq_mutex); 848 uint_t r = avl_numnodes(&viq->viq_inflight); 849 mutex_exit(&viq->viq_mutex); 850 851 return (r); 852 } 853 854 virtio_chain_t * 855 virtio_queue_poll(virtio_queue_t *viq) 856 { 857 mutex_enter(&viq->viq_mutex); 858 if (viq->viq_shutdown) { 859 /* 860 * The device has been reset by virtio_shutdown(), and queue 861 * processing has been halted. Any previously submitted chains 862 * will be evacuated using virtio_queue_evacuate(). 863 */ 864 mutex_exit(&viq->viq_mutex); 865 return (NULL); 866 } 867 868 VIRTQ_DMA_SYNC_FORKERNEL(viq); 869 if (viq->viq_device_index == viq->viq_dma_device->vqde_index) { 870 /* 871 * If the device index has not changed since the last poll, 872 * there are no new chains to process. 873 */ 874 mutex_exit(&viq->viq_mutex); 875 return (NULL); 876 } 877 878 /* 879 * We need to ensure that all reads from the descriptor (vqde_ring[]) 880 * and any referenced memory by the descriptor occur after we have read 881 * the descriptor index value above (vqde_index). 882 */ 883 membar_consumer(); 884 885 uint16_t index = (viq->viq_device_index++) % viq->viq_size; 886 uint16_t start = viq->viq_dma_device->vqde_ring[index].vqe_start; 887 uint32_t len = viq->viq_dma_device->vqde_ring[index].vqe_len; 888 889 virtio_chain_t *vic; 890 if ((vic = virtio_queue_complete(viq, start)) == NULL) { 891 /* 892 * We could not locate a chain for this descriptor index, which 893 * suggests that something has gone horribly wrong. 894 */ 895 dev_err(viq->viq_virtio->vio_dip, CE_PANIC, 896 "queue \"%s\" ring entry %u (descriptor %u) has no chain", 897 viq->viq_name, (uint16_t)index, (uint16_t)start); 898 } 899 900 vic->vic_received_length = len; 901 902 mutex_exit(&viq->viq_mutex); 903 904 return (vic); 905 } 906 907 /* 908 * After a call to "virtio_shutdown()", the driver must retrieve any previously 909 * submitted chains and free any associated resources. 910 */ 911 virtio_chain_t * 912 virtio_queue_evacuate(virtio_queue_t *viq) 913 { 914 virtio_t *vio = viq->viq_virtio; 915 916 mutex_enter(&vio->vio_mutex); 917 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN)) { 918 dev_err(vio->vio_dip, CE_PANIC, 919 "virtio_queue_evacuate() without virtio_shutdown()"); 920 } 921 mutex_exit(&vio->vio_mutex); 922 923 mutex_enter(&viq->viq_mutex); 924 VERIFY(viq->viq_shutdown); 925 926 virtio_chain_t *vic = avl_first(&viq->viq_inflight); 927 if (vic != NULL) { 928 avl_remove(&viq->viq_inflight, vic); 929 } 930 931 mutex_exit(&viq->viq_mutex); 932 933 return (vic); 934 } 935 936 /* 937 * VIRTQUEUE DESCRIPTOR CHAIN MANAGEMENT 938 */ 939 940 /* 941 * When the device returns a descriptor chain to the driver, it may provide the 942 * length in bytes of data written into the chain. Client drivers should use 943 * this value with care; the specification suggests some device implementations 944 * have not always provided a useful or correct value. 945 */ 946 size_t 947 virtio_chain_received_length(virtio_chain_t *vic) 948 { 949 return (vic->vic_received_length); 950 } 951 952 /* 953 * Allocate a descriptor chain for use with this queue. The "kmflags" value 954 * may be KM_SLEEP or KM_NOSLEEP as per kmem_alloc(9F). 955 */ 956 virtio_chain_t * 957 virtio_chain_alloc(virtio_queue_t *viq, int kmflags) 958 { 959 virtio_t *vio = viq->viq_virtio; 960 virtio_chain_t *vic; 961 uint_t cap; 962 963 /* 964 * Direct descriptors are known by their index in the descriptor table 965 * for the queue. We use the variable-length array member at the end 966 * of the chain tracking object to hold the list of direct descriptors 967 * assigned to this chain. 968 */ 969 if (viq->viq_indirect) { 970 /* 971 * When using indirect descriptors we still need one direct 972 * descriptor entry to hold the physical address and length of 973 * the indirect descriptor table. 974 */ 975 cap = 1; 976 } else { 977 /* 978 * For direct descriptors we need to be able to track a 979 * descriptor for each possible segment in a single chain. 980 */ 981 cap = viq->viq_max_segs; 982 } 983 984 size_t vicsz = sizeof (*vic) + sizeof (uint16_t) * cap; 985 if ((vic = kmem_zalloc(vicsz, kmflags)) == NULL) { 986 return (NULL); 987 } 988 vic->vic_vq = viq; 989 vic->vic_direct_capacity = cap; 990 991 if (viq->viq_indirect) { 992 /* 993 * Allocate an indirect descriptor list with the appropriate 994 * number of entries. 995 */ 996 if (virtio_dma_init(vio, &vic->vic_indirect_dma, 997 sizeof (virtio_vq_desc_t) * viq->viq_max_segs, 998 &virtio_dma_attr_indirect, 999 DDI_DMA_CONSISTENT | DDI_DMA_WRITE, 1000 kmflags) != DDI_SUCCESS) { 1001 goto fail; 1002 } 1003 1004 /* 1005 * Allocate a single descriptor to hold the indirect list. 1006 * Leave the length as zero for now; it will be set to include 1007 * any occupied entries at push time. 1008 */ 1009 mutex_enter(&viq->viq_mutex); 1010 if (virtio_chain_append_impl(vic, 1011 virtio_dma_cookie_pa(&vic->vic_indirect_dma, 0), 0, 1012 VIRTQ_DESC_F_INDIRECT) != DDI_SUCCESS) { 1013 mutex_exit(&viq->viq_mutex); 1014 goto fail; 1015 } 1016 mutex_exit(&viq->viq_mutex); 1017 VERIFY3U(vic->vic_direct_used, ==, 1); 1018 1019 /* 1020 * Don't set the indirect capacity until after we've installed 1021 * the direct descriptor which points at the indirect list, or 1022 * virtio_chain_append_impl() will be confused. 1023 */ 1024 vic->vic_indirect_capacity = viq->viq_max_segs; 1025 } 1026 1027 return (vic); 1028 1029 fail: 1030 virtio_dma_fini(&vic->vic_indirect_dma); 1031 kmem_free(vic, vicsz); 1032 return (NULL); 1033 } 1034 1035 void * 1036 virtio_chain_data(virtio_chain_t *vic) 1037 { 1038 return (vic->vic_data); 1039 } 1040 1041 void 1042 virtio_chain_data_set(virtio_chain_t *vic, void *data) 1043 { 1044 vic->vic_data = data; 1045 } 1046 1047 void 1048 virtio_chain_clear(virtio_chain_t *vic) 1049 { 1050 if (vic->vic_indirect_capacity != 0) { 1051 /* 1052 * There should only be one direct descriptor, which points at 1053 * our indirect descriptor list. We don't want to clear it 1054 * here. 1055 */ 1056 VERIFY3U(vic->vic_direct_capacity, ==, 1); 1057 1058 if (vic->vic_indirect_used > 0) { 1059 /* 1060 * Clear out the indirect descriptor table. 1061 */ 1062 vic->vic_indirect_used = 0; 1063 bzero(virtio_dma_va(&vic->vic_indirect_dma, 0), 1064 virtio_dma_size(&vic->vic_indirect_dma)); 1065 } 1066 1067 } else if (vic->vic_direct_capacity > 0) { 1068 /* 1069 * Release any descriptors that were assigned to us previously. 1070 */ 1071 for (uint_t i = 0; i < vic->vic_direct_used; i++) { 1072 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[i]); 1073 vic->vic_direct[i] = 0; 1074 } 1075 vic->vic_direct_used = 0; 1076 } 1077 } 1078 1079 void 1080 virtio_chain_free(virtio_chain_t *vic) 1081 { 1082 /* 1083 * First ensure that we have released any descriptors used by this 1084 * chain. 1085 */ 1086 virtio_chain_clear(vic); 1087 1088 if (vic->vic_indirect_capacity > 0) { 1089 /* 1090 * Release the direct descriptor that points to our indirect 1091 * descriptor list. 1092 */ 1093 VERIFY3U(vic->vic_direct_capacity, ==, 1); 1094 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[0]); 1095 1096 virtio_dma_fini(&vic->vic_indirect_dma); 1097 } 1098 1099 size_t vicsz = sizeof (*vic) + 1100 vic->vic_direct_capacity * sizeof (uint16_t); 1101 1102 kmem_free(vic, vicsz); 1103 } 1104 1105 static inline int 1106 virtio_queue_descmap_alloc(virtio_queue_t *viq, uint_t *indexp) 1107 { 1108 id_t index; 1109 1110 if ((index = id_alloc_nosleep(viq->viq_descmap)) == -1) { 1111 return (ENOMEM); 1112 } 1113 1114 VERIFY3S(index, >=, 0); 1115 VERIFY3S(index, <=, viq->viq_size); 1116 1117 *indexp = (uint_t)index; 1118 return (0); 1119 } 1120 1121 static int 1122 virtio_chain_append_impl(virtio_chain_t *vic, uint64_t pa, size_t len, 1123 uint16_t flags) 1124 { 1125 virtio_queue_t *viq = vic->vic_vq; 1126 virtio_vq_desc_t *vqd; 1127 uint_t index; 1128 1129 /* 1130 * We're modifying the queue-wide descriptor list so make sure we have 1131 * the appropriate lock. 1132 */ 1133 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 1134 1135 if (vic->vic_indirect_capacity != 0) { 1136 /* 1137 * Use indirect descriptors. 1138 */ 1139 if (vic->vic_indirect_used >= vic->vic_indirect_capacity) { 1140 return (DDI_FAILURE); 1141 } 1142 1143 vqd = virtio_dma_va(&vic->vic_indirect_dma, 0); 1144 1145 if ((index = vic->vic_indirect_used++) > 0) { 1146 /* 1147 * Chain the current last indirect descriptor to the 1148 * new one. 1149 */ 1150 vqd[index - 1].vqd_flags |= VIRTQ_DESC_F_NEXT; 1151 vqd[index - 1].vqd_next = index; 1152 } 1153 1154 } else { 1155 /* 1156 * Use direct descriptors. 1157 */ 1158 if (vic->vic_direct_used >= vic->vic_direct_capacity) { 1159 return (DDI_FAILURE); 1160 } 1161 1162 if (virtio_queue_descmap_alloc(viq, &index) != 0) { 1163 return (DDI_FAILURE); 1164 } 1165 1166 vqd = virtio_dma_va(&viq->viq_dma, 0); 1167 1168 if (vic->vic_direct_used > 0) { 1169 /* 1170 * This is not the first entry. Chain the current 1171 * descriptor to the next one. 1172 */ 1173 uint16_t p = vic->vic_direct[vic->vic_direct_used - 1]; 1174 1175 vqd[p].vqd_flags |= VIRTQ_DESC_F_NEXT; 1176 vqd[p].vqd_next = index; 1177 } 1178 vic->vic_direct[vic->vic_direct_used++] = index; 1179 } 1180 1181 vqd[index].vqd_addr = pa; 1182 vqd[index].vqd_len = len; 1183 vqd[index].vqd_flags = flags; 1184 vqd[index].vqd_next = 0; 1185 1186 return (DDI_SUCCESS); 1187 } 1188 1189 int 1190 virtio_chain_append(virtio_chain_t *vic, uint64_t pa, size_t len, 1191 virtio_direction_t dir) 1192 { 1193 virtio_queue_t *viq = vic->vic_vq; 1194 uint16_t flags = 0; 1195 1196 switch (dir) { 1197 case VIRTIO_DIR_DEVICE_WRITES: 1198 flags |= VIRTQ_DESC_F_WRITE; 1199 break; 1200 1201 case VIRTIO_DIR_DEVICE_READS: 1202 break; 1203 1204 default: 1205 panic("unknown direction value %u", dir); 1206 } 1207 1208 mutex_enter(&viq->viq_mutex); 1209 int r = virtio_chain_append_impl(vic, pa, len, flags); 1210 mutex_exit(&viq->viq_mutex); 1211 1212 return (r); 1213 } 1214 1215 static void 1216 virtio_queue_flush_locked(virtio_queue_t *viq) 1217 { 1218 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 1219 1220 /* 1221 * Make sure any writes we have just made to the descriptors 1222 * (vqdr_ring[]) are visible to the device before we update the ring 1223 * pointer (vqdr_index). 1224 */ 1225 membar_producer(); 1226 viq->viq_dma_driver->vqdr_index = viq->viq_driver_index; 1227 VIRTQ_DMA_SYNC_FORDEV(viq); 1228 1229 /* 1230 * Determine whether the device expects us to notify it of new 1231 * descriptors. 1232 */ 1233 VIRTQ_DMA_SYNC_FORKERNEL(viq); 1234 if (!(viq->viq_dma_device->vqde_flags & VIRTQ_USED_F_NO_NOTIFY)) { 1235 virtio_put16(viq->viq_virtio, VIRTIO_LEGACY_QUEUE_NOTIFY, 1236 viq->viq_index); 1237 } 1238 } 1239 1240 void 1241 virtio_queue_flush(virtio_queue_t *viq) 1242 { 1243 mutex_enter(&viq->viq_mutex); 1244 virtio_queue_flush_locked(viq); 1245 mutex_exit(&viq->viq_mutex); 1246 } 1247 1248 void 1249 virtio_chain_submit(virtio_chain_t *vic, boolean_t flush) 1250 { 1251 virtio_queue_t *viq = vic->vic_vq; 1252 1253 mutex_enter(&viq->viq_mutex); 1254 1255 if (vic->vic_indirect_capacity != 0) { 1256 virtio_vq_desc_t *vqd = virtio_dma_va(&viq->viq_dma, 0); 1257 1258 VERIFY3U(vic->vic_direct_used, ==, 1); 1259 1260 /* 1261 * This is an indirect descriptor queue. The length in bytes 1262 * of the descriptor must extend to cover the populated 1263 * indirect descriptor entries. 1264 */ 1265 vqd[vic->vic_direct[0]].vqd_len = 1266 sizeof (virtio_vq_desc_t) * vic->vic_indirect_used; 1267 1268 virtio_dma_sync(&vic->vic_indirect_dma, DDI_DMA_SYNC_FORDEV); 1269 } 1270 1271 /* 1272 * Populate the next available slot in the driver-owned ring for this 1273 * chain. The updated value of viq_driver_index is not yet visible to 1274 * the device until a subsequent queue flush. 1275 */ 1276 uint16_t index = (viq->viq_driver_index++) % viq->viq_size; 1277 viq->viq_dma_driver->vqdr_ring[index] = vic->vic_direct[0]; 1278 1279 vic->vic_head = vic->vic_direct[0]; 1280 avl_add(&viq->viq_inflight, vic); 1281 1282 if (flush) { 1283 virtio_queue_flush_locked(vic->vic_vq); 1284 } 1285 1286 mutex_exit(&viq->viq_mutex); 1287 } 1288 1289 /* 1290 * INTERRUPTS MANAGEMENT 1291 */ 1292 1293 static const char * 1294 virtio_interrupt_type_name(int type) 1295 { 1296 switch (type) { 1297 case DDI_INTR_TYPE_MSIX: 1298 return ("MSI-X"); 1299 case DDI_INTR_TYPE_MSI: 1300 return ("MSI"); 1301 case DDI_INTR_TYPE_FIXED: 1302 return ("fixed"); 1303 default: 1304 return ("?"); 1305 } 1306 } 1307 1308 static int 1309 virtio_interrupts_alloc(virtio_t *vio, int type, int nrequired) 1310 { 1311 dev_info_t *dip = vio->vio_dip; 1312 int nintrs = 0; 1313 int navail = 0; 1314 1315 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1316 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC)); 1317 1318 if (ddi_intr_get_nintrs(dip, type, &nintrs) != DDI_SUCCESS) { 1319 dev_err(dip, CE_WARN, "could not count %s interrupts", 1320 virtio_interrupt_type_name(type)); 1321 return (DDI_FAILURE); 1322 } 1323 if (nintrs < 1) { 1324 dev_err(dip, CE_WARN, "no %s interrupts supported", 1325 virtio_interrupt_type_name(type)); 1326 return (DDI_FAILURE); 1327 } 1328 1329 if (ddi_intr_get_navail(dip, type, &navail) != DDI_SUCCESS) { 1330 dev_err(dip, CE_WARN, "could not count available %s interrupts", 1331 virtio_interrupt_type_name(type)); 1332 return (DDI_FAILURE); 1333 } 1334 if (navail < nrequired) { 1335 dev_err(dip, CE_WARN, "need %d %s interrupts, but only %d " 1336 "available", nrequired, virtio_interrupt_type_name(type), 1337 navail); 1338 return (DDI_FAILURE); 1339 } 1340 1341 VERIFY3P(vio->vio_interrupts, ==, NULL); 1342 vio->vio_interrupts = kmem_zalloc( 1343 sizeof (ddi_intr_handle_t) * nrequired, KM_SLEEP); 1344 1345 int r; 1346 if ((r = ddi_intr_alloc(dip, vio->vio_interrupts, type, 0, nrequired, 1347 &vio->vio_ninterrupts, DDI_INTR_ALLOC_STRICT)) != DDI_SUCCESS) { 1348 dev_err(dip, CE_WARN, "%s interrupt allocation failure (%d)", 1349 virtio_interrupt_type_name(type), r); 1350 kmem_free(vio->vio_interrupts, 1351 sizeof (ddi_intr_handle_t) * nrequired); 1352 vio->vio_interrupts = NULL; 1353 return (DDI_FAILURE); 1354 } 1355 1356 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ALLOC; 1357 vio->vio_interrupt_type = type; 1358 return (DDI_SUCCESS); 1359 } 1360 1361 static uint_t 1362 virtio_shared_isr(caddr_t arg0, caddr_t arg1) 1363 { 1364 virtio_t *vio = (virtio_t *)arg0; 1365 uint_t r = DDI_INTR_UNCLAIMED; 1366 uint8_t isr; 1367 1368 mutex_enter(&vio->vio_mutex); 1369 1370 /* 1371 * Check the ISR status to see if the interrupt applies to us. Reading 1372 * this field resets it to zero. 1373 */ 1374 isr = virtio_get8(vio, VIRTIO_LEGACY_ISR_STATUS); 1375 1376 if ((isr & VIRTIO_ISR_CHECK_QUEUES) != 0) { 1377 r = DDI_INTR_CLAIMED; 1378 1379 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1380 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1381 if (viq->viq_func != NULL) { 1382 mutex_exit(&vio->vio_mutex); 1383 (void) viq->viq_func(viq->viq_funcarg, arg0); 1384 mutex_enter(&vio->vio_mutex); 1385 1386 if (vio->vio_initlevel & 1387 VIRTIO_INITLEVEL_SHUTDOWN) { 1388 /* 1389 * The device was shut down while in a 1390 * queue handler routine. 1391 */ 1392 break; 1393 } 1394 } 1395 } 1396 } 1397 1398 mutex_exit(&vio->vio_mutex); 1399 1400 /* 1401 * vio_cfgchange_{handler,handlerarg} cannot change while interrupts 1402 * are configured so it is safe to access them outside of the lock. 1403 */ 1404 1405 if ((isr & VIRTIO_ISR_CHECK_CONFIG) != 0) { 1406 r = DDI_INTR_CLAIMED; 1407 if (vio->vio_cfgchange_handler != NULL) { 1408 (void) vio->vio_cfgchange_handler( 1409 (caddr_t)vio->vio_cfgchange_handlerarg, 1410 (caddr_t)vio); 1411 } 1412 } 1413 1414 return (r); 1415 } 1416 1417 static int 1418 virtio_interrupts_setup(virtio_t *vio, int allow_types) 1419 { 1420 dev_info_t *dip = vio->vio_dip; 1421 int types; 1422 int count = 0; 1423 1424 mutex_enter(&vio->vio_mutex); 1425 1426 /* 1427 * Determine the number of interrupts we'd like based on the number of 1428 * virtqueues. 1429 */ 1430 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1431 viq = list_next(&vio->vio_queues, viq)) { 1432 if (viq->viq_func != NULL) { 1433 count++; 1434 } 1435 } 1436 1437 /* 1438 * If there is a configuration change handler, one extra interrupt 1439 * is needed for that. 1440 */ 1441 if (vio->vio_cfgchange_handler != NULL) 1442 count++; 1443 1444 if (ddi_intr_get_supported_types(dip, &types) != DDI_SUCCESS) { 1445 dev_err(dip, CE_WARN, "could not get supported interrupts"); 1446 mutex_exit(&vio->vio_mutex); 1447 return (DDI_FAILURE); 1448 } 1449 1450 if (allow_types != VIRTIO_ANY_INTR_TYPE) { 1451 /* 1452 * Restrict the possible interrupt types at the request of the 1453 * driver. 1454 */ 1455 types &= allow_types; 1456 } 1457 1458 /* 1459 * Try each potential interrupt type in descending order of preference. 1460 * Note that the specification does not appear to allow for the use of 1461 * classical MSI, so we are limited to either MSI-X or fixed 1462 * interrupts. 1463 */ 1464 if (types & DDI_INTR_TYPE_MSIX) { 1465 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_MSIX, 1466 count) == DDI_SUCCESS) { 1467 goto add_handlers; 1468 } 1469 } 1470 if (types & DDI_INTR_TYPE_FIXED) { 1471 /* 1472 * If fixed interrupts are all that are available, we'll just 1473 * ask for one. 1474 */ 1475 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_FIXED, 1) == 1476 DDI_SUCCESS) { 1477 goto add_handlers; 1478 } 1479 } 1480 1481 dev_err(dip, CE_WARN, "interrupt allocation failed"); 1482 mutex_exit(&vio->vio_mutex); 1483 return (DDI_FAILURE); 1484 1485 add_handlers: 1486 /* 1487 * Ensure that we have not been given any high-level interrupts as our 1488 * interrupt handlers do not support them. 1489 */ 1490 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1491 uint_t ipri; 1492 1493 if (ddi_intr_get_pri(vio->vio_interrupts[i], &ipri) != 1494 DDI_SUCCESS) { 1495 dev_err(dip, CE_WARN, "could not determine interrupt " 1496 "priority"); 1497 goto fail; 1498 } 1499 1500 if (ipri >= ddi_intr_get_hilevel_pri()) { 1501 dev_err(dip, CE_WARN, "high level interrupts not " 1502 "supported"); 1503 goto fail; 1504 } 1505 1506 /* 1507 * Record the highest priority we've been allocated to use for 1508 * mutex initialisation. 1509 */ 1510 if (i == 0 || ipri > vio->vio_interrupt_priority) { 1511 vio->vio_interrupt_priority = ipri; 1512 } 1513 } 1514 1515 /* 1516 * Get the interrupt capabilities from the first handle to determine 1517 * whether we need to use ddi_intr_block_enable(9F). 1518 */ 1519 if (ddi_intr_get_cap(vio->vio_interrupts[0], 1520 &vio->vio_interrupt_cap) != DDI_SUCCESS) { 1521 dev_err(dip, CE_WARN, "failed to get interrupt capabilities"); 1522 goto fail; 1523 } 1524 1525 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) { 1526 VERIFY3S(vio->vio_ninterrupts, ==, 1); 1527 /* 1528 * For fixed interrupts, we need to use our shared handler to 1529 * multiplex the per-queue handlers provided by the driver. 1530 */ 1531 if (ddi_intr_add_handler(vio->vio_interrupts[0], 1532 virtio_shared_isr, (caddr_t)vio, NULL) != DDI_SUCCESS) { 1533 dev_err(dip, CE_WARN, "adding shared %s interrupt " 1534 "handler failed", virtio_interrupt_type_name( 1535 vio->vio_interrupt_type)); 1536 goto fail; 1537 } 1538 1539 goto done; 1540 } 1541 1542 VERIFY3S(vio->vio_ninterrupts, ==, count); 1543 1544 uint_t n = 0; 1545 1546 /* Bind the configuration vector interrupt */ 1547 if (vio->vio_cfgchange_handler != NULL) { 1548 if (ddi_intr_add_handler(vio->vio_interrupts[n], 1549 vio->vio_cfgchange_handler, 1550 (caddr_t)vio->vio_cfgchange_handlerarg, 1551 (caddr_t)vio) != DDI_SUCCESS) { 1552 dev_err(dip, CE_WARN, 1553 "adding configuration change interrupt failed"); 1554 goto fail; 1555 } 1556 vio->vio_cfgchange_handler_added = B_TRUE; 1557 vio->vio_cfgchange_handler_index = n; 1558 n++; 1559 } 1560 1561 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1562 viq = list_next(&vio->vio_queues, viq)) { 1563 if (viq->viq_func == NULL) { 1564 continue; 1565 } 1566 1567 if (ddi_intr_add_handler(vio->vio_interrupts[n], 1568 viq->viq_func, (caddr_t)viq->viq_funcarg, 1569 (caddr_t)vio) != DDI_SUCCESS) { 1570 dev_err(dip, CE_WARN, "adding interrupt %u (%s) failed", 1571 n, viq->viq_name); 1572 goto fail; 1573 } 1574 1575 viq->viq_handler_index = n; 1576 viq->viq_handler_added = B_TRUE; 1577 n++; 1578 } 1579 1580 done: 1581 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ADDED; 1582 mutex_exit(&vio->vio_mutex); 1583 return (DDI_SUCCESS); 1584 1585 fail: 1586 virtio_interrupts_teardown(vio); 1587 mutex_exit(&vio->vio_mutex); 1588 return (DDI_FAILURE); 1589 } 1590 1591 static void 1592 virtio_interrupts_teardown(virtio_t *vio) 1593 { 1594 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1595 1596 virtio_interrupts_disable_locked(vio); 1597 1598 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) { 1599 /* 1600 * Remove the multiplexing interrupt handler. 1601 */ 1602 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED) { 1603 int r; 1604 1605 VERIFY3S(vio->vio_ninterrupts, ==, 1); 1606 1607 if ((r = ddi_intr_remove_handler( 1608 vio->vio_interrupts[0])) != DDI_SUCCESS) { 1609 dev_err(vio->vio_dip, CE_WARN, "removing " 1610 "shared interrupt handler failed (%d)", r); 1611 } 1612 } 1613 } else { 1614 /* 1615 * Remove the configuration vector interrupt handler. 1616 */ 1617 if (vio->vio_cfgchange_handler_added) { 1618 int r; 1619 1620 if ((r = ddi_intr_remove_handler( 1621 vio->vio_interrupts[0])) != DDI_SUCCESS) { 1622 dev_err(vio->vio_dip, CE_WARN, 1623 "removing configuration change interrupt " 1624 "handler failed (%d)", r); 1625 } 1626 vio->vio_cfgchange_handler_added = B_FALSE; 1627 } 1628 1629 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1630 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1631 int r; 1632 1633 if (!viq->viq_handler_added) { 1634 continue; 1635 } 1636 1637 if ((r = ddi_intr_remove_handler( 1638 vio->vio_interrupts[viq->viq_handler_index])) != 1639 DDI_SUCCESS) { 1640 dev_err(vio->vio_dip, CE_WARN, "removing " 1641 "interrupt handler (%s) failed (%d)", 1642 viq->viq_name, r); 1643 } 1644 1645 viq->viq_handler_added = B_FALSE; 1646 } 1647 } 1648 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ADDED; 1649 1650 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC) { 1651 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1652 int r; 1653 1654 if ((r = ddi_intr_free(vio->vio_interrupts[i])) != 1655 DDI_SUCCESS) { 1656 dev_err(vio->vio_dip, CE_WARN, "freeing " 1657 "interrupt %u failed (%d)", i, r); 1658 } 1659 } 1660 kmem_free(vio->vio_interrupts, 1661 sizeof (ddi_intr_handle_t) * vio->vio_ninterrupts); 1662 vio->vio_interrupts = NULL; 1663 vio->vio_ninterrupts = 0; 1664 vio->vio_interrupt_type = 0; 1665 vio->vio_interrupt_cap = 0; 1666 vio->vio_interrupt_priority = 0; 1667 1668 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ALLOC; 1669 } 1670 } 1671 1672 static void 1673 virtio_interrupts_unwind(virtio_t *vio) 1674 { 1675 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1676 1677 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) { 1678 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1679 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1680 if (!viq->viq_handler_added) { 1681 continue; 1682 } 1683 1684 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, 1685 viq->viq_index); 1686 virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE, 1687 VIRTIO_LEGACY_MSI_NO_VECTOR); 1688 } 1689 1690 if (vio->vio_cfgchange_handler_added) { 1691 virtio_put16(vio, VIRTIO_LEGACY_MSIX_CONFIG, 1692 VIRTIO_LEGACY_MSI_NO_VECTOR); 1693 } 1694 } 1695 1696 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) { 1697 (void) ddi_intr_block_disable(vio->vio_interrupts, 1698 vio->vio_ninterrupts); 1699 } else { 1700 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1701 (void) ddi_intr_disable(vio->vio_interrupts[i]); 1702 } 1703 } 1704 1705 /* 1706 * Disabling the interrupts makes the MSI-X fields disappear from the 1707 * BAR once more. 1708 */ 1709 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET; 1710 } 1711 1712 int 1713 virtio_interrupts_enable(virtio_t *vio) 1714 { 1715 mutex_enter(&vio->vio_mutex); 1716 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED) { 1717 mutex_exit(&vio->vio_mutex); 1718 return (DDI_SUCCESS); 1719 } 1720 1721 int r = DDI_SUCCESS; 1722 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) { 1723 r = ddi_intr_block_enable(vio->vio_interrupts, 1724 vio->vio_ninterrupts); 1725 } else { 1726 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1727 if ((r = ddi_intr_enable(vio->vio_interrupts[i])) != 1728 DDI_SUCCESS) { 1729 /* 1730 * Disable the interrupts we have enabled so 1731 * far. 1732 */ 1733 for (i--; i >= 0; i--) { 1734 (void) ddi_intr_disable( 1735 vio->vio_interrupts[i]); 1736 } 1737 break; 1738 } 1739 } 1740 } 1741 1742 if (r != DDI_SUCCESS) { 1743 mutex_exit(&vio->vio_mutex); 1744 return (r); 1745 } 1746 1747 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) { 1748 /* 1749 * When asked to enable the interrupts, the system enables 1750 * MSI-X in the PCI configuration for the device. While 1751 * enabled, the extra MSI-X configuration table fields appear 1752 * between the general and the device-specific regions of the 1753 * BAR. 1754 */ 1755 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET_MSIX; 1756 1757 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1758 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1759 if (!viq->viq_handler_added) { 1760 continue; 1761 } 1762 1763 uint16_t qi = viq->viq_index; 1764 uint16_t msi = viq->viq_handler_index; 1765 1766 /* 1767 * Route interrupts for this queue to the assigned 1768 * MSI-X vector number. 1769 */ 1770 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qi); 1771 virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE, msi); 1772 1773 /* 1774 * The device may not actually accept the vector number 1775 * we're attempting to program. We need to confirm 1776 * that configuration was successful by re-reading the 1777 * configuration we just wrote. 1778 */ 1779 if (virtio_get16(vio, VIRTIO_LEGACY_MSIX_QUEUE) != 1780 msi) { 1781 dev_err(vio->vio_dip, CE_WARN, 1782 "failed to configure MSI-X vector %u for " 1783 "queue \"%s\" (#%u)", (uint_t)msi, 1784 viq->viq_name, (uint_t)qi); 1785 1786 virtio_interrupts_unwind(vio); 1787 mutex_exit(&vio->vio_mutex); 1788 return (DDI_FAILURE); 1789 } 1790 } 1791 1792 if (vio->vio_cfgchange_handler_added) { 1793 virtio_put16(vio, VIRTIO_LEGACY_MSIX_CONFIG, 1794 vio->vio_cfgchange_handler_index); 1795 1796 /* Verify the value was accepted. */ 1797 if (virtio_get16(vio, VIRTIO_LEGACY_MSIX_CONFIG) != 1798 vio->vio_cfgchange_handler_index) { 1799 dev_err(vio->vio_dip, CE_WARN, 1800 "failed to configure MSI-X vector for " 1801 "configuration"); 1802 1803 virtio_interrupts_unwind(vio); 1804 mutex_exit(&vio->vio_mutex); 1805 return (DDI_FAILURE); 1806 } 1807 } 1808 } 1809 1810 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ENABLED; 1811 1812 mutex_exit(&vio->vio_mutex); 1813 return (DDI_SUCCESS); 1814 } 1815 1816 static void 1817 virtio_interrupts_disable_locked(virtio_t *vio) 1818 { 1819 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1820 1821 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED)) { 1822 return; 1823 } 1824 1825 virtio_interrupts_unwind(vio); 1826 1827 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ENABLED; 1828 } 1829 1830 void 1831 virtio_interrupts_disable(virtio_t *vio) 1832 { 1833 mutex_enter(&vio->vio_mutex); 1834 virtio_interrupts_disable_locked(vio); 1835 mutex_exit(&vio->vio_mutex); 1836 } 1837