1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019 Joyent, Inc. 14 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. 15 */ 16 17 /* 18 * VIRTIO FRAMEWORK 19 * 20 * For design and usage documentation, see the comments in "virtio.h". 21 */ 22 23 #include <sys/conf.h> 24 #include <sys/kmem.h> 25 #include <sys/debug.h> 26 #include <sys/modctl.h> 27 #include <sys/autoconf.h> 28 #include <sys/ddi_impldefs.h> 29 #include <sys/ddi.h> 30 #include <sys/sunddi.h> 31 #include <sys/sunndi.h> 32 #include <sys/avintr.h> 33 #include <sys/spl.h> 34 #include <sys/promif.h> 35 #include <sys/list.h> 36 #include <sys/bootconf.h> 37 #include <sys/bootsvcs.h> 38 #include <sys/sysmacros.h> 39 #include <sys/pci.h> 40 41 #include "virtio.h" 42 #include "virtio_impl.h" 43 44 45 /* 46 * Linkage structures 47 */ 48 static struct modlmisc virtio_modlmisc = { 49 .misc_modops = &mod_miscops, 50 .misc_linkinfo = "VIRTIO common routines", 51 }; 52 53 static struct modlinkage virtio_modlinkage = { 54 .ml_rev = MODREV_1, 55 .ml_linkage = { &virtio_modlmisc, NULL } 56 }; 57 58 int 59 _init(void) 60 { 61 return (mod_install(&virtio_modlinkage)); 62 } 63 64 int 65 _fini(void) 66 { 67 return (mod_remove(&virtio_modlinkage)); 68 } 69 70 int 71 _info(struct modinfo *modinfop) 72 { 73 return (mod_info(&virtio_modlinkage, modinfop)); 74 } 75 76 77 78 static void virtio_set_status(virtio_t *, uint8_t); 79 static int virtio_chain_append_impl(virtio_chain_t *, uint64_t, size_t, 80 uint16_t); 81 static int virtio_interrupts_setup(virtio_t *, int); 82 static void virtio_interrupts_teardown(virtio_t *); 83 static void virtio_interrupts_disable_locked(virtio_t *); 84 static void virtio_queue_free(virtio_queue_t *); 85 static void virtio_device_reset_locked(virtio_t *); 86 87 /* 88 * We use the same device access attributes for BAR mapping and access to the 89 * virtqueue memory. 90 */ 91 ddi_device_acc_attr_t virtio_acc_attr = { 92 .devacc_attr_version = DDI_DEVICE_ATTR_V1, 93 .devacc_attr_endian_flags = DDI_NEVERSWAP_ACC, 94 .devacc_attr_dataorder = DDI_STORECACHING_OK_ACC, 95 .devacc_attr_access = DDI_DEFAULT_ACC 96 }; 97 98 99 /* 100 * DMA attributes for the memory given to the device for queue management. 101 */ 102 ddi_dma_attr_t virtio_dma_attr_queue = { 103 .dma_attr_version = DMA_ATTR_V0, 104 .dma_attr_addr_lo = 0x0000000000000000, 105 /* 106 * Queue memory is aligned on VIRTIO_PAGE_SIZE with the address shifted 107 * down by VIRTIO_PAGE_SHIFT before being passed to the device in a 108 * 32-bit register. 109 */ 110 .dma_attr_addr_hi = 0x00000FFFFFFFF000, 111 .dma_attr_count_max = 0x00000000FFFFFFFF, 112 .dma_attr_align = VIRTIO_PAGE_SIZE, 113 .dma_attr_burstsizes = 1, 114 .dma_attr_minxfer = 1, 115 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 116 .dma_attr_seg = 0x00000000FFFFFFFF, 117 .dma_attr_sgllen = 1, 118 .dma_attr_granular = 1, 119 .dma_attr_flags = 0 120 }; 121 122 /* 123 * DMA attributes for the the allocation of indirect descriptor lists. The 124 * indirect list is referenced by a regular descriptor entry: the physical 125 * address field is 64 bits wide, but the length field is only 32 bits. Each 126 * descriptor is 16 bytes long. 127 */ 128 ddi_dma_attr_t virtio_dma_attr_indirect = { 129 .dma_attr_version = DMA_ATTR_V0, 130 .dma_attr_addr_lo = 0x0000000000000000, 131 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF, 132 .dma_attr_count_max = 0x00000000FFFFFFFF, 133 .dma_attr_align = sizeof (struct virtio_vq_desc), 134 .dma_attr_burstsizes = 1, 135 .dma_attr_minxfer = 1, 136 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 137 .dma_attr_seg = 0x00000000FFFFFFFF, 138 .dma_attr_sgllen = 1, 139 .dma_attr_granular = 1, 140 .dma_attr_flags = 0 141 }; 142 143 144 uint8_t 145 virtio_get8(virtio_t *vio, uintptr_t offset) 146 { 147 return (ddi_get8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset))); 148 } 149 150 uint16_t 151 virtio_get16(virtio_t *vio, uintptr_t offset) 152 { 153 return (ddi_get16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset))); 154 } 155 156 uint32_t 157 virtio_get32(virtio_t *vio, uintptr_t offset) 158 { 159 return (ddi_get32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset))); 160 } 161 162 void 163 virtio_put8(virtio_t *vio, uintptr_t offset, uint8_t value) 164 { 165 ddi_put8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset), value); 166 } 167 168 void 169 virtio_put16(virtio_t *vio, uintptr_t offset, uint16_t value) 170 { 171 ddi_put16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset), value); 172 } 173 174 void 175 virtio_put32(virtio_t *vio, uintptr_t offset, uint32_t value) 176 { 177 ddi_put32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset), value); 178 } 179 180 void 181 virtio_fini(virtio_t *vio, boolean_t failed) 182 { 183 mutex_enter(&vio->vio_mutex); 184 185 virtio_interrupts_teardown(vio); 186 187 virtio_queue_t *viq; 188 while ((viq = list_remove_head(&vio->vio_queues)) != NULL) { 189 virtio_queue_free(viq); 190 } 191 list_destroy(&vio->vio_queues); 192 193 if (failed) { 194 /* 195 * Signal to the host that device setup failed. 196 */ 197 virtio_set_status(vio, VIRTIO_STATUS_FAILED); 198 } else { 199 virtio_device_reset_locked(vio); 200 } 201 202 /* 203 * We don't need to do anything for the provider initlevel, as it 204 * merely records the fact that virtio_init_complete() was called. 205 */ 206 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_PROVIDER; 207 208 if (vio->vio_initlevel & VIRTIO_INITLEVEL_REGS) { 209 /* 210 * Unmap PCI BAR0. 211 */ 212 ddi_regs_map_free(&vio->vio_barh); 213 214 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_REGS; 215 } 216 217 /* 218 * Ensure we have torn down everything we set up. 219 */ 220 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_SHUTDOWN; 221 VERIFY0(vio->vio_initlevel); 222 223 mutex_exit(&vio->vio_mutex); 224 mutex_destroy(&vio->vio_mutex); 225 226 kmem_free(vio, sizeof (*vio)); 227 } 228 229 /* 230 * Early device initialisation for legacy (pre-1.0 specification) virtio 231 * devices. 232 */ 233 virtio_t * 234 virtio_init(dev_info_t *dip, uint64_t driver_features, boolean_t allow_indirect) 235 { 236 int r; 237 238 /* 239 * First, confirm that this is a legacy device. 240 */ 241 ddi_acc_handle_t pci; 242 if (pci_config_setup(dip, &pci) != DDI_SUCCESS) { 243 dev_err(dip, CE_WARN, "pci_config_setup failed"); 244 return (NULL); 245 } 246 247 uint8_t revid; 248 if ((revid = pci_config_get8(pci, PCI_CONF_REVID)) == PCI_EINVAL8) { 249 dev_err(dip, CE_WARN, "could not read config space"); 250 pci_config_teardown(&pci); 251 return (NULL); 252 } 253 254 pci_config_teardown(&pci); 255 256 /* 257 * The legacy specification requires that the device advertise as PCI 258 * Revision 0. 259 */ 260 if (revid != 0) { 261 dev_err(dip, CE_WARN, "PCI Revision %u incorrect for " 262 "legacy virtio device", (uint_t)revid); 263 return (NULL); 264 } 265 266 virtio_t *vio = kmem_zalloc(sizeof (*vio), KM_SLEEP); 267 vio->vio_dip = dip; 268 269 /* 270 * Map PCI BAR0 for legacy device access. 271 */ 272 if ((r = ddi_regs_map_setup(dip, VIRTIO_LEGACY_PCI_BAR0, 273 (caddr_t *)&vio->vio_bar, 0, 0, &virtio_acc_attr, 274 &vio->vio_barh)) != DDI_SUCCESS) { 275 dev_err(dip, CE_WARN, "ddi_regs_map_setup failure (%d)", r); 276 kmem_free(vio, sizeof (*vio)); 277 return (NULL); 278 } 279 vio->vio_initlevel |= VIRTIO_INITLEVEL_REGS; 280 281 /* 282 * We initialise the mutex without an interrupt priority to ease the 283 * implementation of some of the configuration space access routines. 284 * Drivers using the virtio framework MUST make a call to 285 * "virtio_init_complete()" prior to spawning other threads or enabling 286 * interrupt handlers, at which time we will destroy and reinitialise 287 * the mutex for use in our interrupt handlers. 288 */ 289 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, NULL); 290 291 list_create(&vio->vio_queues, sizeof (virtio_queue_t), 292 offsetof(virtio_queue_t, viq_link)); 293 294 /* 295 * Legacy virtio devices require a few common steps before we can 296 * negotiate device features. 297 */ 298 virtio_device_reset(vio); 299 virtio_set_status(vio, VIRTIO_STATUS_ACKNOWLEDGE); 300 virtio_set_status(vio, VIRTIO_STATUS_DRIVER); 301 302 /* 303 * Negotiate features with the device. Record the original supported 304 * feature set for debugging purposes. 305 */ 306 vio->vio_features_device = virtio_get32(vio, 307 VIRTIO_LEGACY_FEATURES_DEVICE); 308 if (allow_indirect) { 309 driver_features |= VIRTIO_F_RING_INDIRECT_DESC; 310 } 311 vio->vio_features = vio->vio_features_device & driver_features; 312 virtio_put32(vio, VIRTIO_LEGACY_FEATURES_DRIVER, vio->vio_features); 313 314 /* 315 * The device-specific configuration begins at an offset into the BAR 316 * that depends on whether we have enabled MSI-X interrupts or not. 317 * Start out with the offset for pre-MSI-X operation so that we can 318 * read device configuration space prior to configuring interrupts. 319 */ 320 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET; 321 322 return (vio); 323 } 324 325 /* 326 * Some virtio devices can change their device configuration state at any 327 * time. This function may be called by the driver during the initialisation 328 * phase - before calling virtio_init_complete() - in order to register a 329 * handler function which will be called when the device configuration space 330 * is updated. 331 */ 332 void 333 virtio_register_cfgchange_handler(virtio_t *vio, ddi_intr_handler_t *func, 334 void *funcarg) 335 { 336 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED)); 337 VERIFY(!vio->vio_cfgchange_handler_added); 338 339 mutex_enter(&vio->vio_mutex); 340 vio->vio_cfgchange_handler = func; 341 vio->vio_cfgchange_handlerarg = funcarg; 342 mutex_exit(&vio->vio_mutex); 343 } 344 345 /* 346 * This function must be called by the driver once it has completed early setup 347 * calls. The value of "allowed_interrupt_types" is a mask of interrupt types 348 * (DDI_INTR_TYPE_MSIX, etc) that we'll try to use when installing handlers, or 349 * the special value 0 to allow the system to use any available type. 350 */ 351 int 352 virtio_init_complete(virtio_t *vio, int allowed_interrupt_types) 353 { 354 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER)); 355 vio->vio_initlevel |= VIRTIO_INITLEVEL_PROVIDER; 356 357 if (!list_is_empty(&vio->vio_queues) || 358 vio->vio_cfgchange_handler != NULL) { 359 /* 360 * Set up interrupts for the queues that have been registered. 361 */ 362 if (virtio_interrupts_setup(vio, allowed_interrupt_types) != 363 DDI_SUCCESS) { 364 return (DDI_FAILURE); 365 } 366 } 367 368 /* 369 * We can allocate the mutex once we know the priority. 370 */ 371 mutex_destroy(&vio->vio_mutex); 372 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio)); 373 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 374 viq = list_next(&vio->vio_queues, viq)) { 375 mutex_destroy(&viq->viq_mutex); 376 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, 377 virtio_intr_pri(vio)); 378 } 379 380 virtio_set_status(vio, VIRTIO_STATUS_DRIVER_OK); 381 382 return (DDI_SUCCESS); 383 } 384 385 boolean_t 386 virtio_feature_present(virtio_t *vio, uint64_t feature_mask) 387 { 388 return ((vio->vio_features & feature_mask) != 0); 389 } 390 391 void * 392 virtio_intr_pri(virtio_t *vio) 393 { 394 VERIFY(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED); 395 396 return (DDI_INTR_PRI(vio->vio_interrupt_priority)); 397 } 398 399 /* 400 * Enable a bit in the device status register. Each bit signals a level of 401 * guest readiness to the host. Use the VIRTIO_CONFIG_DEVICE_STATUS_* 402 * constants for "status". To zero the status field use virtio_device_reset(). 403 */ 404 static void 405 virtio_set_status(virtio_t *vio, uint8_t status) 406 { 407 VERIFY3U(status, !=, 0); 408 409 mutex_enter(&vio->vio_mutex); 410 411 uint8_t old = virtio_get8(vio, VIRTIO_LEGACY_DEVICE_STATUS); 412 virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, status | old); 413 414 mutex_exit(&vio->vio_mutex); 415 } 416 417 static void 418 virtio_device_reset_locked(virtio_t *vio) 419 { 420 virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, VIRTIO_STATUS_RESET); 421 } 422 423 void 424 virtio_device_reset(virtio_t *vio) 425 { 426 mutex_enter(&vio->vio_mutex); 427 virtio_device_reset_locked(vio); 428 mutex_exit(&vio->vio_mutex); 429 } 430 431 /* 432 * Some queues are effectively long-polled; the driver submits a series of 433 * buffers and the device only returns them when there is data available. 434 * During detach, we need to coordinate the return of these buffers. Calling 435 * "virtio_shutdown()" will reset the device, then allow the removal of all 436 * buffers that were in flight at the time of shutdown via 437 * "virtio_queue_evacuate()". 438 */ 439 void 440 virtio_shutdown(virtio_t *vio) 441 { 442 mutex_enter(&vio->vio_mutex); 443 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 444 /* 445 * Shutdown has been performed already. 446 */ 447 mutex_exit(&vio->vio_mutex); 448 return; 449 } 450 451 /* 452 * First, mark all of the queues as shutdown. This will prevent any 453 * further activity. 454 */ 455 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 456 viq = list_next(&vio->vio_queues, viq)) { 457 mutex_enter(&viq->viq_mutex); 458 viq->viq_shutdown = B_TRUE; 459 mutex_exit(&viq->viq_mutex); 460 } 461 462 /* 463 * Now, reset the device. This removes any queue configuration on the 464 * device side. 465 */ 466 virtio_device_reset_locked(vio); 467 vio->vio_initlevel |= VIRTIO_INITLEVEL_SHUTDOWN; 468 mutex_exit(&vio->vio_mutex); 469 } 470 471 /* 472 * Common implementation of quiesce(9E) for simple Virtio-based devices. 473 */ 474 int 475 virtio_quiesce(virtio_t *vio) 476 { 477 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 478 /* 479 * Device has already been reset. 480 */ 481 return (DDI_SUCCESS); 482 } 483 484 /* 485 * When we reset the device, it should immediately stop using any DMA 486 * memory we've previously passed to it. All queue configuration is 487 * discarded. This is good enough for quiesce(9E). 488 */ 489 virtio_device_reset_locked(vio); 490 491 return (DDI_SUCCESS); 492 } 493 494 /* 495 * DEVICE-SPECIFIC REGISTER ACCESS 496 * 497 * Note that these functions take the mutex to avoid racing with interrupt 498 * enable/disable, when the device-specific offset can potentially change. 499 */ 500 501 uint8_t 502 virtio_dev_get8(virtio_t *vio, uintptr_t offset) 503 { 504 mutex_enter(&vio->vio_mutex); 505 uint8_t r = virtio_get8(vio, vio->vio_config_offset + offset); 506 mutex_exit(&vio->vio_mutex); 507 508 return (r); 509 } 510 511 uint16_t 512 virtio_dev_get16(virtio_t *vio, uintptr_t offset) 513 { 514 mutex_enter(&vio->vio_mutex); 515 uint16_t r = virtio_get16(vio, vio->vio_config_offset + offset); 516 mutex_exit(&vio->vio_mutex); 517 518 return (r); 519 } 520 521 uint32_t 522 virtio_dev_get32(virtio_t *vio, uintptr_t offset) 523 { 524 mutex_enter(&vio->vio_mutex); 525 uint32_t r = virtio_get32(vio, vio->vio_config_offset + offset); 526 mutex_exit(&vio->vio_mutex); 527 528 return (r); 529 } 530 531 uint64_t 532 virtio_dev_get64(virtio_t *vio, uintptr_t offset) 533 { 534 mutex_enter(&vio->vio_mutex); 535 /* 536 * On at least some systems, a 64-bit read or write to this BAR is not 537 * possible. For legacy devices, there is no generation number to use 538 * to determine if configuration may have changed half-way through a 539 * read. We need to continue to read both halves of the value until we 540 * read the same value at least twice. 541 */ 542 uintptr_t o_lo = vio->vio_config_offset + offset; 543 uintptr_t o_hi = o_lo + 4; 544 545 uint64_t val = virtio_get32(vio, o_lo) | 546 ((uint64_t)virtio_get32(vio, o_hi) << 32); 547 548 for (;;) { 549 uint64_t tval = virtio_get32(vio, o_lo) | 550 ((uint64_t)virtio_get32(vio, o_hi) << 32); 551 552 if (tval == val) { 553 break; 554 } 555 556 val = tval; 557 } 558 559 mutex_exit(&vio->vio_mutex); 560 return (val); 561 } 562 563 void 564 virtio_dev_put8(virtio_t *vio, uintptr_t offset, uint8_t value) 565 { 566 mutex_enter(&vio->vio_mutex); 567 virtio_put8(vio, vio->vio_config_offset + offset, value); 568 mutex_exit(&vio->vio_mutex); 569 } 570 571 void 572 virtio_dev_put16(virtio_t *vio, uintptr_t offset, uint16_t value) 573 { 574 mutex_enter(&vio->vio_mutex); 575 virtio_put16(vio, vio->vio_config_offset + offset, value); 576 mutex_exit(&vio->vio_mutex); 577 } 578 579 void 580 virtio_dev_put32(virtio_t *vio, uintptr_t offset, uint32_t value) 581 { 582 mutex_enter(&vio->vio_mutex); 583 virtio_put32(vio, vio->vio_config_offset + offset, value); 584 mutex_exit(&vio->vio_mutex); 585 } 586 587 /* 588 * VIRTQUEUE MANAGEMENT 589 */ 590 591 static int 592 virtio_inflight_compar(const void *lp, const void *rp) 593 { 594 const virtio_chain_t *l = lp; 595 const virtio_chain_t *r = rp; 596 597 if (l->vic_head < r->vic_head) { 598 return (-1); 599 } else if (l->vic_head > r->vic_head) { 600 return (1); 601 } else { 602 return (0); 603 } 604 } 605 606 virtio_queue_t * 607 virtio_queue_alloc(virtio_t *vio, uint16_t qidx, const char *name, 608 ddi_intr_handler_t *func, void *funcarg, boolean_t force_direct, 609 uint_t max_segs) 610 { 611 uint16_t qsz; 612 char space_name[256]; 613 614 if (max_segs < 1) { 615 /* 616 * Every descriptor, direct or indirect, needs to refer to at 617 * least one buffer. 618 */ 619 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 620 "segment count must be at least 1", name, (uint_t)qidx); 621 return (NULL); 622 } 623 624 mutex_enter(&vio->vio_mutex); 625 626 if (vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER) { 627 /* 628 * Cannot configure any more queues once initial setup is 629 * complete and interrupts have been allocated. 630 */ 631 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 632 "alloc after init complete", name, (uint_t)qidx); 633 mutex_exit(&vio->vio_mutex); 634 return (NULL); 635 } 636 637 /* 638 * There is no way to negotiate a different queue size for legacy 639 * devices. We must read and use the native queue size of the device. 640 */ 641 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx); 642 if ((qsz = virtio_get16(vio, VIRTIO_LEGACY_QUEUE_SIZE)) == 0) { 643 /* 644 * A size of zero means the device does not have a queue with 645 * this index. 646 */ 647 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 648 "does not exist on device", name, (uint_t)qidx); 649 mutex_exit(&vio->vio_mutex); 650 return (NULL); 651 } 652 653 mutex_exit(&vio->vio_mutex); 654 655 virtio_queue_t *viq = kmem_zalloc(sizeof (*viq), KM_SLEEP); 656 viq->viq_virtio = vio; 657 viq->viq_name = name; 658 viq->viq_index = qidx; 659 viq->viq_size = qsz; 660 viq->viq_func = func; 661 viq->viq_funcarg = funcarg; 662 viq->viq_max_segs = max_segs; 663 avl_create(&viq->viq_inflight, virtio_inflight_compar, 664 sizeof (virtio_chain_t), offsetof(virtio_chain_t, vic_node)); 665 666 /* 667 * Allocate the mutex without an interrupt priority for now, as we do 668 * with "vio_mutex". We'll reinitialise it in 669 * "virtio_init_complete()". 670 */ 671 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, NULL); 672 673 if (virtio_feature_present(vio, VIRTIO_F_RING_INDIRECT_DESC) && 674 !force_direct) { 675 /* 676 * If we were able to negotiate the indirect descriptor 677 * feature, and the caller has not explicitly forced the use of 678 * direct descriptors, we'll allocate indirect descriptor lists 679 * for each chain. 680 */ 681 viq->viq_indirect = B_TRUE; 682 } 683 684 /* 685 * Track descriptor usage in an identifier space. 686 */ 687 (void) snprintf(space_name, sizeof (space_name), "%s%d_vq_%s", 688 ddi_get_name(vio->vio_dip), ddi_get_instance(vio->vio_dip), name); 689 if ((viq->viq_descmap = id_space_create(space_name, 0, qsz)) == NULL) { 690 dev_err(vio->vio_dip, CE_WARN, "could not allocate descriptor " 691 "ID space"); 692 virtio_queue_free(viq); 693 return (NULL); 694 } 695 696 /* 697 * For legacy devices, memory for the queue has a strict layout 698 * determined by the queue size. 699 */ 700 size_t sz_descs = sizeof (virtio_vq_desc_t) * qsz; 701 size_t sz_driver = P2ROUNDUP_TYPED(sz_descs + 702 sizeof (virtio_vq_driver_t) + 703 sizeof (uint16_t) * qsz, 704 VIRTIO_PAGE_SIZE, size_t); 705 size_t sz_device = P2ROUNDUP_TYPED(sizeof (virtio_vq_device_t) + 706 sizeof (virtio_vq_elem_t) * qsz, 707 VIRTIO_PAGE_SIZE, size_t); 708 709 if (virtio_dma_init(vio, &viq->viq_dma, sz_driver + sz_device, 710 &virtio_dma_attr_queue, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 711 KM_SLEEP) != DDI_SUCCESS) { 712 dev_err(vio->vio_dip, CE_WARN, "could not allocate queue " 713 "DMA memory"); 714 virtio_queue_free(viq); 715 return (NULL); 716 } 717 718 /* 719 * NOTE: The viq_dma_* members below are used by 720 * VIRTQ_DMA_SYNC_FORDEV() and VIRTQ_DMA_SYNC_FORKERNEL() to calculate 721 * offsets into the DMA allocation for partial synchronisation. If the 722 * ordering of, or relationship between, these pointers changes, the 723 * macros must be kept in sync. 724 */ 725 viq->viq_dma_descs = virtio_dma_va(&viq->viq_dma, 0); 726 viq->viq_dma_driver = virtio_dma_va(&viq->viq_dma, sz_descs); 727 viq->viq_dma_device = virtio_dma_va(&viq->viq_dma, sz_driver); 728 729 /* 730 * Install in the per-device list of queues. 731 */ 732 mutex_enter(&vio->vio_mutex); 733 for (virtio_queue_t *chkvq = list_head(&vio->vio_queues); chkvq != NULL; 734 chkvq = list_next(&vio->vio_queues, chkvq)) { 735 if (chkvq->viq_index == qidx) { 736 dev_err(vio->vio_dip, CE_WARN, "attempt to register " 737 "queue \"%s\" with same index (%d) as queue \"%s\"", 738 name, qidx, chkvq->viq_name); 739 mutex_exit(&vio->vio_mutex); 740 virtio_queue_free(viq); 741 return (NULL); 742 } 743 } 744 list_insert_tail(&vio->vio_queues, viq); 745 746 /* 747 * Ensure the zeroing of the queue memory is visible to the host before 748 * we inform the device of the queue address. 749 */ 750 membar_producer(); 751 VIRTQ_DMA_SYNC_FORDEV(viq); 752 753 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx); 754 virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS, 755 virtio_dma_cookie_pa(&viq->viq_dma, 0) >> VIRTIO_PAGE_SHIFT); 756 757 mutex_exit(&vio->vio_mutex); 758 return (viq); 759 } 760 761 static void 762 virtio_queue_free(virtio_queue_t *viq) 763 { 764 virtio_t *vio = viq->viq_virtio; 765 766 /* 767 * We are going to destroy the queue mutex. Make sure we've already 768 * removed the interrupt handlers. 769 */ 770 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED)); 771 772 mutex_enter(&viq->viq_mutex); 773 774 /* 775 * If the device has not already been reset as part of a shutdown, 776 * detach the queue from the device now. 777 */ 778 if (!viq->viq_shutdown) { 779 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, viq->viq_index); 780 virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS, 0); 781 } 782 783 virtio_dma_fini(&viq->viq_dma); 784 785 VERIFY(avl_is_empty(&viq->viq_inflight)); 786 avl_destroy(&viq->viq_inflight); 787 if (viq->viq_descmap != NULL) { 788 id_space_destroy(viq->viq_descmap); 789 } 790 791 mutex_exit(&viq->viq_mutex); 792 mutex_destroy(&viq->viq_mutex); 793 794 kmem_free(viq, sizeof (*viq)); 795 } 796 797 void 798 virtio_queue_no_interrupt(virtio_queue_t *viq, boolean_t stop_interrupts) 799 { 800 mutex_enter(&viq->viq_mutex); 801 802 if (stop_interrupts) { 803 viq->viq_dma_driver->vqdr_flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; 804 } else { 805 viq->viq_dma_driver->vqdr_flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; 806 } 807 VIRTQ_DMA_SYNC_FORDEV(viq); 808 809 mutex_exit(&viq->viq_mutex); 810 } 811 812 static virtio_chain_t * 813 virtio_queue_complete(virtio_queue_t *viq, uint_t index) 814 { 815 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 816 817 virtio_chain_t *vic; 818 819 virtio_chain_t search; 820 bzero(&search, sizeof (search)); 821 search.vic_head = index; 822 823 if ((vic = avl_find(&viq->viq_inflight, &search, NULL)) == NULL) { 824 return (NULL); 825 } 826 avl_remove(&viq->viq_inflight, vic); 827 828 return (vic); 829 } 830 831 uint_t 832 virtio_queue_size(virtio_queue_t *viq) 833 { 834 return (viq->viq_size); 835 } 836 837 uint_t 838 virtio_queue_nactive(virtio_queue_t *viq) 839 { 840 mutex_enter(&viq->viq_mutex); 841 uint_t r = avl_numnodes(&viq->viq_inflight); 842 mutex_exit(&viq->viq_mutex); 843 844 return (r); 845 } 846 847 virtio_chain_t * 848 virtio_queue_poll(virtio_queue_t *viq) 849 { 850 mutex_enter(&viq->viq_mutex); 851 if (viq->viq_shutdown) { 852 /* 853 * The device has been reset by virtio_shutdown(), and queue 854 * processing has been halted. Any previously submitted chains 855 * will be evacuated using virtio_queue_evacuate(). 856 */ 857 mutex_exit(&viq->viq_mutex); 858 return (NULL); 859 } 860 861 VIRTQ_DMA_SYNC_FORKERNEL(viq); 862 if (viq->viq_device_index == viq->viq_dma_device->vqde_index) { 863 /* 864 * If the device index has not changed since the last poll, 865 * there are no new chains to process. 866 */ 867 mutex_exit(&viq->viq_mutex); 868 return (NULL); 869 } 870 871 /* 872 * We need to ensure that all reads from the descriptor (vqde_ring[]) 873 * and any referenced memory by the descriptor occur after we have read 874 * the descriptor index value above (vqde_index). 875 */ 876 membar_consumer(); 877 878 uint16_t index = (viq->viq_device_index++) % viq->viq_size; 879 uint16_t start = viq->viq_dma_device->vqde_ring[index].vqe_start; 880 uint32_t len = viq->viq_dma_device->vqde_ring[index].vqe_len; 881 882 virtio_chain_t *vic; 883 if ((vic = virtio_queue_complete(viq, start)) == NULL) { 884 /* 885 * We could not locate a chain for this descriptor index, which 886 * suggests that something has gone horribly wrong. 887 */ 888 dev_err(viq->viq_virtio->vio_dip, CE_PANIC, 889 "queue \"%s\" ring entry %u (descriptor %u) has no chain", 890 viq->viq_name, (uint16_t)index, (uint16_t)start); 891 } 892 893 vic->vic_received_length = len; 894 895 mutex_exit(&viq->viq_mutex); 896 897 return (vic); 898 } 899 900 /* 901 * After a call to "virtio_shutdown()", the driver must retrieve any previously 902 * submitted chains and free any associated resources. 903 */ 904 virtio_chain_t * 905 virtio_queue_evacuate(virtio_queue_t *viq) 906 { 907 virtio_t *vio = viq->viq_virtio; 908 909 mutex_enter(&vio->vio_mutex); 910 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN)) { 911 dev_err(vio->vio_dip, CE_PANIC, 912 "virtio_queue_evacuate() without virtio_shutdown()"); 913 } 914 mutex_exit(&vio->vio_mutex); 915 916 mutex_enter(&viq->viq_mutex); 917 VERIFY(viq->viq_shutdown); 918 919 virtio_chain_t *vic = avl_first(&viq->viq_inflight); 920 if (vic != NULL) { 921 avl_remove(&viq->viq_inflight, vic); 922 } 923 924 mutex_exit(&viq->viq_mutex); 925 926 return (vic); 927 } 928 929 /* 930 * VIRTQUEUE DESCRIPTOR CHAIN MANAGEMENT 931 */ 932 933 /* 934 * When the device returns a descriptor chain to the driver, it may provide the 935 * length in bytes of data written into the chain. Client drivers should use 936 * this value with care; the specification suggests some device implementations 937 * have not always provided a useful or correct value. 938 */ 939 size_t 940 virtio_chain_received_length(virtio_chain_t *vic) 941 { 942 return (vic->vic_received_length); 943 } 944 945 /* 946 * Allocate a descriptor chain for use with this queue. The "kmflags" value 947 * may be KM_SLEEP or KM_NOSLEEP as per kmem_alloc(9F). 948 */ 949 virtio_chain_t * 950 virtio_chain_alloc(virtio_queue_t *viq, int kmflags) 951 { 952 virtio_t *vio = viq->viq_virtio; 953 virtio_chain_t *vic; 954 uint_t cap; 955 956 /* 957 * Direct descriptors are known by their index in the descriptor table 958 * for the queue. We use the variable-length array member at the end 959 * of the chain tracking object to hold the list of direct descriptors 960 * assigned to this chain. 961 */ 962 if (viq->viq_indirect) { 963 /* 964 * When using indirect descriptors we still need one direct 965 * descriptor entry to hold the physical address and length of 966 * the indirect descriptor table. 967 */ 968 cap = 1; 969 } else { 970 /* 971 * For direct descriptors we need to be able to track a 972 * descriptor for each possible segment in a single chain. 973 */ 974 cap = viq->viq_max_segs; 975 } 976 977 size_t vicsz = sizeof (*vic) + sizeof (uint16_t) * cap; 978 if ((vic = kmem_zalloc(vicsz, kmflags)) == NULL) { 979 return (NULL); 980 } 981 vic->vic_vq = viq; 982 vic->vic_direct_capacity = cap; 983 984 if (viq->viq_indirect) { 985 /* 986 * Allocate an indirect descriptor list with the appropriate 987 * number of entries. 988 */ 989 if (virtio_dma_init(vio, &vic->vic_indirect_dma, 990 sizeof (virtio_vq_desc_t) * viq->viq_max_segs, 991 &virtio_dma_attr_indirect, 992 DDI_DMA_CONSISTENT | DDI_DMA_WRITE, 993 kmflags) != DDI_SUCCESS) { 994 goto fail; 995 } 996 997 /* 998 * Allocate a single descriptor to hold the indirect list. 999 * Leave the length as zero for now; it will be set to include 1000 * any occupied entries at push time. 1001 */ 1002 mutex_enter(&viq->viq_mutex); 1003 if (virtio_chain_append_impl(vic, 1004 virtio_dma_cookie_pa(&vic->vic_indirect_dma, 0), 0, 1005 VIRTQ_DESC_F_INDIRECT) != DDI_SUCCESS) { 1006 mutex_exit(&viq->viq_mutex); 1007 goto fail; 1008 } 1009 mutex_exit(&viq->viq_mutex); 1010 VERIFY3U(vic->vic_direct_used, ==, 1); 1011 1012 /* 1013 * Don't set the indirect capacity until after we've installed 1014 * the direct descriptor which points at the indirect list, or 1015 * virtio_chain_append_impl() will be confused. 1016 */ 1017 vic->vic_indirect_capacity = viq->viq_max_segs; 1018 } 1019 1020 return (vic); 1021 1022 fail: 1023 virtio_dma_fini(&vic->vic_indirect_dma); 1024 kmem_free(vic, vicsz); 1025 return (NULL); 1026 } 1027 1028 void * 1029 virtio_chain_data(virtio_chain_t *vic) 1030 { 1031 return (vic->vic_data); 1032 } 1033 1034 void 1035 virtio_chain_data_set(virtio_chain_t *vic, void *data) 1036 { 1037 vic->vic_data = data; 1038 } 1039 1040 void 1041 virtio_chain_clear(virtio_chain_t *vic) 1042 { 1043 if (vic->vic_indirect_capacity != 0) { 1044 /* 1045 * There should only be one direct descriptor, which points at 1046 * our indirect descriptor list. We don't want to clear it 1047 * here. 1048 */ 1049 VERIFY3U(vic->vic_direct_capacity, ==, 1); 1050 1051 if (vic->vic_indirect_used > 0) { 1052 /* 1053 * Clear out the indirect descriptor table. 1054 */ 1055 vic->vic_indirect_used = 0; 1056 bzero(virtio_dma_va(&vic->vic_indirect_dma, 0), 1057 virtio_dma_size(&vic->vic_indirect_dma)); 1058 } 1059 1060 } else if (vic->vic_direct_capacity > 0) { 1061 /* 1062 * Release any descriptors that were assigned to us previously. 1063 */ 1064 for (uint_t i = 0; i < vic->vic_direct_used; i++) { 1065 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[i]); 1066 vic->vic_direct[i] = 0; 1067 } 1068 vic->vic_direct_used = 0; 1069 } 1070 } 1071 1072 void 1073 virtio_chain_free(virtio_chain_t *vic) 1074 { 1075 /* 1076 * First ensure that we have released any descriptors used by this 1077 * chain. 1078 */ 1079 virtio_chain_clear(vic); 1080 1081 if (vic->vic_indirect_capacity > 0) { 1082 /* 1083 * Release the direct descriptor that points to our indirect 1084 * descriptor list. 1085 */ 1086 VERIFY3U(vic->vic_direct_capacity, ==, 1); 1087 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[0]); 1088 1089 virtio_dma_fini(&vic->vic_indirect_dma); 1090 } 1091 1092 size_t vicsz = sizeof (*vic) + 1093 vic->vic_direct_capacity * sizeof (uint16_t); 1094 1095 kmem_free(vic, vicsz); 1096 } 1097 1098 static inline int 1099 virtio_queue_descmap_alloc(virtio_queue_t *viq, uint_t *indexp) 1100 { 1101 id_t index; 1102 1103 if ((index = id_alloc_nosleep(viq->viq_descmap)) == -1) { 1104 return (ENOMEM); 1105 } 1106 1107 VERIFY3S(index, >=, 0); 1108 VERIFY3S(index, <=, viq->viq_size); 1109 1110 *indexp = (uint_t)index; 1111 return (0); 1112 } 1113 1114 static int 1115 virtio_chain_append_impl(virtio_chain_t *vic, uint64_t pa, size_t len, 1116 uint16_t flags) 1117 { 1118 virtio_queue_t *viq = vic->vic_vq; 1119 virtio_vq_desc_t *vqd; 1120 uint_t index; 1121 1122 /* 1123 * We're modifying the queue-wide descriptor list so make sure we have 1124 * the appropriate lock. 1125 */ 1126 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 1127 1128 if (vic->vic_indirect_capacity != 0) { 1129 /* 1130 * Use indirect descriptors. 1131 */ 1132 if (vic->vic_indirect_used >= vic->vic_indirect_capacity) { 1133 return (DDI_FAILURE); 1134 } 1135 1136 vqd = virtio_dma_va(&vic->vic_indirect_dma, 0); 1137 1138 if ((index = vic->vic_indirect_used++) > 0) { 1139 /* 1140 * Chain the current last indirect descriptor to the 1141 * new one. 1142 */ 1143 vqd[index - 1].vqd_flags |= VIRTQ_DESC_F_NEXT; 1144 vqd[index - 1].vqd_next = index; 1145 } 1146 1147 } else { 1148 /* 1149 * Use direct descriptors. 1150 */ 1151 if (vic->vic_direct_used >= vic->vic_direct_capacity) { 1152 return (DDI_FAILURE); 1153 } 1154 1155 if (virtio_queue_descmap_alloc(viq, &index) != 0) { 1156 return (DDI_FAILURE); 1157 } 1158 1159 vqd = virtio_dma_va(&viq->viq_dma, 0); 1160 1161 if (vic->vic_direct_used > 0) { 1162 /* 1163 * This is not the first entry. Chain the current 1164 * descriptor to the next one. 1165 */ 1166 uint16_t p = vic->vic_direct[vic->vic_direct_used - 1]; 1167 1168 vqd[p].vqd_flags |= VIRTQ_DESC_F_NEXT; 1169 vqd[p].vqd_next = index; 1170 } 1171 vic->vic_direct[vic->vic_direct_used++] = index; 1172 } 1173 1174 vqd[index].vqd_addr = pa; 1175 vqd[index].vqd_len = len; 1176 vqd[index].vqd_flags = flags; 1177 vqd[index].vqd_next = 0; 1178 1179 return (DDI_SUCCESS); 1180 } 1181 1182 int 1183 virtio_chain_append(virtio_chain_t *vic, uint64_t pa, size_t len, 1184 virtio_direction_t dir) 1185 { 1186 virtio_queue_t *viq = vic->vic_vq; 1187 uint16_t flags = 0; 1188 1189 switch (dir) { 1190 case VIRTIO_DIR_DEVICE_WRITES: 1191 flags |= VIRTQ_DESC_F_WRITE; 1192 break; 1193 1194 case VIRTIO_DIR_DEVICE_READS: 1195 break; 1196 1197 default: 1198 panic("unknown direction value %u", dir); 1199 } 1200 1201 mutex_enter(&viq->viq_mutex); 1202 int r = virtio_chain_append_impl(vic, pa, len, flags); 1203 mutex_exit(&viq->viq_mutex); 1204 1205 return (r); 1206 } 1207 1208 static void 1209 virtio_queue_flush_locked(virtio_queue_t *viq) 1210 { 1211 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 1212 1213 /* 1214 * Make sure any writes we have just made to the descriptors 1215 * (vqdr_ring[]) are visible to the device before we update the ring 1216 * pointer (vqdr_index). 1217 */ 1218 membar_producer(); 1219 viq->viq_dma_driver->vqdr_index = viq->viq_driver_index; 1220 VIRTQ_DMA_SYNC_FORDEV(viq); 1221 1222 /* 1223 * Determine whether the device expects us to notify it of new 1224 * descriptors. 1225 */ 1226 VIRTQ_DMA_SYNC_FORKERNEL(viq); 1227 if (!(viq->viq_dma_device->vqde_flags & VIRTQ_USED_F_NO_NOTIFY)) { 1228 virtio_put16(viq->viq_virtio, VIRTIO_LEGACY_QUEUE_NOTIFY, 1229 viq->viq_index); 1230 } 1231 } 1232 1233 void 1234 virtio_queue_flush(virtio_queue_t *viq) 1235 { 1236 mutex_enter(&viq->viq_mutex); 1237 virtio_queue_flush_locked(viq); 1238 mutex_exit(&viq->viq_mutex); 1239 } 1240 1241 void 1242 virtio_chain_submit(virtio_chain_t *vic, boolean_t flush) 1243 { 1244 virtio_queue_t *viq = vic->vic_vq; 1245 1246 mutex_enter(&viq->viq_mutex); 1247 1248 if (vic->vic_indirect_capacity != 0) { 1249 virtio_vq_desc_t *vqd = virtio_dma_va(&viq->viq_dma, 0); 1250 1251 VERIFY3U(vic->vic_direct_used, ==, 1); 1252 1253 /* 1254 * This is an indirect descriptor queue. The length in bytes 1255 * of the descriptor must extend to cover the populated 1256 * indirect descriptor entries. 1257 */ 1258 vqd[vic->vic_direct[0]].vqd_len = 1259 sizeof (virtio_vq_desc_t) * vic->vic_indirect_used; 1260 1261 virtio_dma_sync(&vic->vic_indirect_dma, DDI_DMA_SYNC_FORDEV); 1262 } 1263 1264 /* 1265 * Populate the next available slot in the driver-owned ring for this 1266 * chain. The updated value of viq_driver_index is not yet visible to 1267 * the device until a subsequent queue flush. 1268 */ 1269 uint16_t index = (viq->viq_driver_index++) % viq->viq_size; 1270 viq->viq_dma_driver->vqdr_ring[index] = vic->vic_direct[0]; 1271 1272 vic->vic_head = vic->vic_direct[0]; 1273 avl_add(&viq->viq_inflight, vic); 1274 1275 if (flush) { 1276 virtio_queue_flush_locked(vic->vic_vq); 1277 } 1278 1279 mutex_exit(&viq->viq_mutex); 1280 } 1281 1282 /* 1283 * INTERRUPTS MANAGEMENT 1284 */ 1285 1286 static const char * 1287 virtio_interrupt_type_name(int type) 1288 { 1289 switch (type) { 1290 case DDI_INTR_TYPE_MSIX: 1291 return ("MSI-X"); 1292 case DDI_INTR_TYPE_MSI: 1293 return ("MSI"); 1294 case DDI_INTR_TYPE_FIXED: 1295 return ("fixed"); 1296 default: 1297 return ("?"); 1298 } 1299 } 1300 1301 static int 1302 virtio_interrupts_alloc(virtio_t *vio, int type, int nrequired) 1303 { 1304 dev_info_t *dip = vio->vio_dip; 1305 int nintrs = 0; 1306 int navail = 0; 1307 1308 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1309 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC)); 1310 1311 if (ddi_intr_get_nintrs(dip, type, &nintrs) != DDI_SUCCESS) { 1312 dev_err(dip, CE_WARN, "could not count %s interrupts", 1313 virtio_interrupt_type_name(type)); 1314 return (DDI_FAILURE); 1315 } 1316 if (nintrs < 1) { 1317 dev_err(dip, CE_WARN, "no %s interrupts supported", 1318 virtio_interrupt_type_name(type)); 1319 return (DDI_FAILURE); 1320 } 1321 1322 if (ddi_intr_get_navail(dip, type, &navail) != DDI_SUCCESS) { 1323 dev_err(dip, CE_WARN, "could not count available %s interrupts", 1324 virtio_interrupt_type_name(type)); 1325 return (DDI_FAILURE); 1326 } 1327 if (navail < nrequired) { 1328 dev_err(dip, CE_WARN, "need %d %s interrupts, but only %d " 1329 "available", nrequired, virtio_interrupt_type_name(type), 1330 navail); 1331 return (DDI_FAILURE); 1332 } 1333 1334 VERIFY3P(vio->vio_interrupts, ==, NULL); 1335 vio->vio_interrupts = kmem_zalloc( 1336 sizeof (ddi_intr_handle_t) * nrequired, KM_SLEEP); 1337 1338 int r; 1339 if ((r = ddi_intr_alloc(dip, vio->vio_interrupts, type, 0, nrequired, 1340 &vio->vio_ninterrupts, DDI_INTR_ALLOC_STRICT)) != DDI_SUCCESS) { 1341 dev_err(dip, CE_WARN, "%s interrupt allocation failure (%d)", 1342 virtio_interrupt_type_name(type), r); 1343 kmem_free(vio->vio_interrupts, 1344 sizeof (ddi_intr_handle_t) * nrequired); 1345 vio->vio_interrupts = NULL; 1346 return (DDI_FAILURE); 1347 } 1348 1349 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ALLOC; 1350 vio->vio_interrupt_type = type; 1351 return (DDI_SUCCESS); 1352 } 1353 1354 static uint_t 1355 virtio_shared_isr(caddr_t arg0, caddr_t arg1) 1356 { 1357 virtio_t *vio = (virtio_t *)arg0; 1358 uint_t r = DDI_INTR_UNCLAIMED; 1359 uint8_t isr; 1360 1361 mutex_enter(&vio->vio_mutex); 1362 1363 /* 1364 * Check the ISR status to see if the interrupt applies to us. Reading 1365 * this field resets it to zero. 1366 */ 1367 isr = virtio_get8(vio, VIRTIO_LEGACY_ISR_STATUS); 1368 1369 if ((isr & VIRTIO_ISR_CHECK_QUEUES) != 0) { 1370 r = DDI_INTR_CLAIMED; 1371 1372 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1373 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1374 if (viq->viq_func != NULL) { 1375 mutex_exit(&vio->vio_mutex); 1376 (void) viq->viq_func(viq->viq_funcarg, arg0); 1377 mutex_enter(&vio->vio_mutex); 1378 1379 if (vio->vio_initlevel & 1380 VIRTIO_INITLEVEL_SHUTDOWN) { 1381 /* 1382 * The device was shut down while in a 1383 * queue handler routine. 1384 */ 1385 break; 1386 } 1387 } 1388 } 1389 } 1390 1391 mutex_exit(&vio->vio_mutex); 1392 1393 /* 1394 * vio_cfgchange_{handler,handlerarg} cannot change while interrupts 1395 * are configured so it is safe to access them outside of the lock. 1396 */ 1397 1398 if ((isr & VIRTIO_ISR_CHECK_CONFIG) != 0) { 1399 r = DDI_INTR_CLAIMED; 1400 if (vio->vio_cfgchange_handler != NULL) { 1401 (void) vio->vio_cfgchange_handler( 1402 (caddr_t)vio->vio_cfgchange_handlerarg, 1403 (caddr_t)vio); 1404 } 1405 } 1406 1407 return (r); 1408 } 1409 1410 static int 1411 virtio_interrupts_setup(virtio_t *vio, int allow_types) 1412 { 1413 dev_info_t *dip = vio->vio_dip; 1414 int types; 1415 int count = 0; 1416 1417 mutex_enter(&vio->vio_mutex); 1418 1419 /* 1420 * Determine the number of interrupts we'd like based on the number of 1421 * virtqueues. 1422 */ 1423 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1424 viq = list_next(&vio->vio_queues, viq)) { 1425 if (viq->viq_func != NULL) { 1426 count++; 1427 } 1428 } 1429 1430 /* 1431 * If there is a configuration change handler, one extra interrupt 1432 * is needed for that. 1433 */ 1434 if (vio->vio_cfgchange_handler != NULL) 1435 count++; 1436 1437 if (ddi_intr_get_supported_types(dip, &types) != DDI_SUCCESS) { 1438 dev_err(dip, CE_WARN, "could not get supported interrupts"); 1439 mutex_exit(&vio->vio_mutex); 1440 return (DDI_FAILURE); 1441 } 1442 1443 if (allow_types != VIRTIO_ANY_INTR_TYPE) { 1444 /* 1445 * Restrict the possible interrupt types at the request of the 1446 * driver. 1447 */ 1448 types &= allow_types; 1449 } 1450 1451 /* 1452 * Try each potential interrupt type in descending order of preference. 1453 * Note that the specification does not appear to allow for the use of 1454 * classical MSI, so we are limited to either MSI-X or fixed 1455 * interrupts. 1456 */ 1457 if (types & DDI_INTR_TYPE_MSIX) { 1458 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_MSIX, 1459 count) == DDI_SUCCESS) { 1460 goto add_handlers; 1461 } 1462 } 1463 if (types & DDI_INTR_TYPE_FIXED) { 1464 /* 1465 * If fixed interrupts are all that are available, we'll just 1466 * ask for one. 1467 */ 1468 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_FIXED, 1) == 1469 DDI_SUCCESS) { 1470 goto add_handlers; 1471 } 1472 } 1473 1474 dev_err(dip, CE_WARN, "interrupt allocation failed"); 1475 mutex_exit(&vio->vio_mutex); 1476 return (DDI_FAILURE); 1477 1478 add_handlers: 1479 /* 1480 * Ensure that we have not been given any high-level interrupts as our 1481 * interrupt handlers do not support them. 1482 */ 1483 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1484 uint_t ipri; 1485 1486 if (ddi_intr_get_pri(vio->vio_interrupts[i], &ipri) != 1487 DDI_SUCCESS) { 1488 dev_err(dip, CE_WARN, "could not determine interrupt " 1489 "priority"); 1490 goto fail; 1491 } 1492 1493 if (ipri >= ddi_intr_get_hilevel_pri()) { 1494 dev_err(dip, CE_WARN, "high level interrupts not " 1495 "supported"); 1496 goto fail; 1497 } 1498 1499 /* 1500 * Record the highest priority we've been allocated to use for 1501 * mutex initialisation. 1502 */ 1503 if (i == 0 || ipri > vio->vio_interrupt_priority) { 1504 vio->vio_interrupt_priority = ipri; 1505 } 1506 } 1507 1508 /* 1509 * Get the interrupt capabilities from the first handle to determine 1510 * whether we need to use ddi_intr_block_enable(9F). 1511 */ 1512 if (ddi_intr_get_cap(vio->vio_interrupts[0], 1513 &vio->vio_interrupt_cap) != DDI_SUCCESS) { 1514 dev_err(dip, CE_WARN, "failed to get interrupt capabilities"); 1515 goto fail; 1516 } 1517 1518 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) { 1519 VERIFY3S(vio->vio_ninterrupts, ==, 1); 1520 /* 1521 * For fixed interrupts, we need to use our shared handler to 1522 * multiplex the per-queue handlers provided by the driver. 1523 */ 1524 if (ddi_intr_add_handler(vio->vio_interrupts[0], 1525 virtio_shared_isr, (caddr_t)vio, NULL) != DDI_SUCCESS) { 1526 dev_err(dip, CE_WARN, "adding shared %s interrupt " 1527 "handler failed", virtio_interrupt_type_name( 1528 vio->vio_interrupt_type)); 1529 goto fail; 1530 } 1531 1532 goto done; 1533 } 1534 1535 VERIFY3S(vio->vio_ninterrupts, ==, count); 1536 1537 uint_t n = 0; 1538 1539 /* Bind the configuration vector interrupt */ 1540 if (vio->vio_cfgchange_handler != NULL) { 1541 if (ddi_intr_add_handler(vio->vio_interrupts[n], 1542 vio->vio_cfgchange_handler, 1543 (caddr_t)vio->vio_cfgchange_handlerarg, 1544 (caddr_t)vio) != DDI_SUCCESS) { 1545 dev_err(dip, CE_WARN, 1546 "adding configuration change interrupt failed"); 1547 goto fail; 1548 } 1549 vio->vio_cfgchange_handler_added = B_TRUE; 1550 vio->vio_cfgchange_handler_index = n; 1551 n++; 1552 } 1553 1554 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1555 viq = list_next(&vio->vio_queues, viq)) { 1556 if (viq->viq_func == NULL) { 1557 continue; 1558 } 1559 1560 if (ddi_intr_add_handler(vio->vio_interrupts[n], 1561 viq->viq_func, (caddr_t)viq->viq_funcarg, 1562 (caddr_t)vio) != DDI_SUCCESS) { 1563 dev_err(dip, CE_WARN, "adding interrupt %u (%s) failed", 1564 n, viq->viq_name); 1565 goto fail; 1566 } 1567 1568 viq->viq_handler_index = n; 1569 viq->viq_handler_added = B_TRUE; 1570 n++; 1571 } 1572 1573 done: 1574 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ADDED; 1575 mutex_exit(&vio->vio_mutex); 1576 return (DDI_SUCCESS); 1577 1578 fail: 1579 virtio_interrupts_teardown(vio); 1580 mutex_exit(&vio->vio_mutex); 1581 return (DDI_FAILURE); 1582 } 1583 1584 static void 1585 virtio_interrupts_teardown(virtio_t *vio) 1586 { 1587 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1588 1589 virtio_interrupts_disable_locked(vio); 1590 1591 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) { 1592 /* 1593 * Remove the multiplexing interrupt handler. 1594 */ 1595 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED) { 1596 int r; 1597 1598 VERIFY3S(vio->vio_ninterrupts, ==, 1); 1599 1600 if ((r = ddi_intr_remove_handler( 1601 vio->vio_interrupts[0])) != DDI_SUCCESS) { 1602 dev_err(vio->vio_dip, CE_WARN, "removing " 1603 "shared interrupt handler failed (%d)", r); 1604 } 1605 } 1606 } else { 1607 /* 1608 * Remove the configuration vector interrupt handler. 1609 */ 1610 if (vio->vio_cfgchange_handler_added) { 1611 int r; 1612 1613 if ((r = ddi_intr_remove_handler( 1614 vio->vio_interrupts[0])) != DDI_SUCCESS) { 1615 dev_err(vio->vio_dip, CE_WARN, 1616 "removing configuration change interrupt " 1617 "handler failed (%d)", r); 1618 } 1619 vio->vio_cfgchange_handler_added = B_FALSE; 1620 } 1621 1622 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1623 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1624 int r; 1625 1626 if (!viq->viq_handler_added) { 1627 continue; 1628 } 1629 1630 if ((r = ddi_intr_remove_handler( 1631 vio->vio_interrupts[viq->viq_handler_index])) != 1632 DDI_SUCCESS) { 1633 dev_err(vio->vio_dip, CE_WARN, "removing " 1634 "interrupt handler (%s) failed (%d)", 1635 viq->viq_name, r); 1636 } 1637 1638 viq->viq_handler_added = B_FALSE; 1639 } 1640 } 1641 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ADDED; 1642 1643 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC) { 1644 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1645 int r; 1646 1647 if ((r = ddi_intr_free(vio->vio_interrupts[i])) != 1648 DDI_SUCCESS) { 1649 dev_err(vio->vio_dip, CE_WARN, "freeing " 1650 "interrupt %u failed (%d)", i, r); 1651 } 1652 } 1653 kmem_free(vio->vio_interrupts, 1654 sizeof (ddi_intr_handle_t) * vio->vio_ninterrupts); 1655 vio->vio_interrupts = NULL; 1656 vio->vio_ninterrupts = 0; 1657 vio->vio_interrupt_type = 0; 1658 vio->vio_interrupt_cap = 0; 1659 vio->vio_interrupt_priority = 0; 1660 1661 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ALLOC; 1662 } 1663 } 1664 1665 static void 1666 virtio_interrupts_unwind(virtio_t *vio) 1667 { 1668 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1669 1670 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) { 1671 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1672 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1673 if (!viq->viq_handler_added) { 1674 continue; 1675 } 1676 1677 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, 1678 viq->viq_index); 1679 virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE, 1680 VIRTIO_LEGACY_MSI_NO_VECTOR); 1681 } 1682 1683 if (vio->vio_cfgchange_handler_added) { 1684 virtio_put16(vio, VIRTIO_LEGACY_MSIX_CONFIG, 1685 VIRTIO_LEGACY_MSI_NO_VECTOR); 1686 } 1687 } 1688 1689 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) { 1690 (void) ddi_intr_block_disable(vio->vio_interrupts, 1691 vio->vio_ninterrupts); 1692 } else { 1693 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1694 (void) ddi_intr_disable(vio->vio_interrupts[i]); 1695 } 1696 } 1697 1698 /* 1699 * Disabling the interrupts makes the MSI-X fields disappear from the 1700 * BAR once more. 1701 */ 1702 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET; 1703 } 1704 1705 int 1706 virtio_interrupts_enable(virtio_t *vio) 1707 { 1708 mutex_enter(&vio->vio_mutex); 1709 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED) { 1710 mutex_exit(&vio->vio_mutex); 1711 return (DDI_SUCCESS); 1712 } 1713 1714 int r = DDI_SUCCESS; 1715 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) { 1716 r = ddi_intr_block_enable(vio->vio_interrupts, 1717 vio->vio_ninterrupts); 1718 } else { 1719 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1720 if ((r = ddi_intr_enable(vio->vio_interrupts[i])) != 1721 DDI_SUCCESS) { 1722 /* 1723 * Disable the interrupts we have enabled so 1724 * far. 1725 */ 1726 for (i--; i >= 0; i--) { 1727 (void) ddi_intr_disable( 1728 vio->vio_interrupts[i]); 1729 } 1730 break; 1731 } 1732 } 1733 } 1734 1735 if (r != DDI_SUCCESS) { 1736 mutex_exit(&vio->vio_mutex); 1737 return (r); 1738 } 1739 1740 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) { 1741 /* 1742 * When asked to enable the interrupts, the system enables 1743 * MSI-X in the PCI configuration for the device. While 1744 * enabled, the extra MSI-X configuration table fields appear 1745 * between the general and the device-specific regions of the 1746 * BAR. 1747 */ 1748 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET_MSIX; 1749 1750 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1751 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1752 if (!viq->viq_handler_added) { 1753 continue; 1754 } 1755 1756 uint16_t qi = viq->viq_index; 1757 uint16_t msi = viq->viq_handler_index; 1758 1759 /* 1760 * Route interrupts for this queue to the assigned 1761 * MSI-X vector number. 1762 */ 1763 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qi); 1764 virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE, msi); 1765 1766 /* 1767 * The device may not actually accept the vector number 1768 * we're attempting to program. We need to confirm 1769 * that configuration was successful by re-reading the 1770 * configuration we just wrote. 1771 */ 1772 if (virtio_get16(vio, VIRTIO_LEGACY_MSIX_QUEUE) != 1773 msi) { 1774 dev_err(vio->vio_dip, CE_WARN, 1775 "failed to configure MSI-X vector %u for " 1776 "queue \"%s\" (#%u)", (uint_t)msi, 1777 viq->viq_name, (uint_t)qi); 1778 1779 virtio_interrupts_unwind(vio); 1780 mutex_exit(&vio->vio_mutex); 1781 return (DDI_FAILURE); 1782 } 1783 } 1784 1785 if (vio->vio_cfgchange_handler_added) { 1786 virtio_put16(vio, VIRTIO_LEGACY_MSIX_CONFIG, 1787 vio->vio_cfgchange_handler_index); 1788 1789 /* Verify the value was accepted. */ 1790 if (virtio_get16(vio, VIRTIO_LEGACY_MSIX_CONFIG) != 1791 vio->vio_cfgchange_handler_index) { 1792 dev_err(vio->vio_dip, CE_WARN, 1793 "failed to configure MSI-X vector for " 1794 "configuration"); 1795 1796 virtio_interrupts_unwind(vio); 1797 mutex_exit(&vio->vio_mutex); 1798 return (DDI_FAILURE); 1799 } 1800 } 1801 } 1802 1803 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ENABLED; 1804 1805 mutex_exit(&vio->vio_mutex); 1806 return (DDI_SUCCESS); 1807 } 1808 1809 static void 1810 virtio_interrupts_disable_locked(virtio_t *vio) 1811 { 1812 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1813 1814 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED)) { 1815 return; 1816 } 1817 1818 virtio_interrupts_unwind(vio); 1819 1820 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ENABLED; 1821 } 1822 1823 void 1824 virtio_interrupts_disable(virtio_t *vio) 1825 { 1826 mutex_enter(&vio->vio_mutex); 1827 virtio_interrupts_disable_locked(vio); 1828 mutex_exit(&vio->vio_mutex); 1829 } 1830