1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019 Joyent, Inc. 14 */ 15 16 /* 17 * VIRTIO FRAMEWORK 18 * 19 * For design and usage documentation, see the comments in "virtio.h". 20 */ 21 22 #include <sys/conf.h> 23 #include <sys/kmem.h> 24 #include <sys/debug.h> 25 #include <sys/modctl.h> 26 #include <sys/autoconf.h> 27 #include <sys/ddi_impldefs.h> 28 #include <sys/ddi.h> 29 #include <sys/sunddi.h> 30 #include <sys/sunndi.h> 31 #include <sys/avintr.h> 32 #include <sys/spl.h> 33 #include <sys/promif.h> 34 #include <sys/list.h> 35 #include <sys/bootconf.h> 36 #include <sys/bootsvcs.h> 37 #include <sys/sysmacros.h> 38 #include <sys/pci.h> 39 40 #include "virtio.h" 41 #include "virtio_impl.h" 42 43 44 /* 45 * Linkage structures 46 */ 47 static struct modlmisc virtio_modlmisc = { 48 .misc_modops = &mod_miscops, 49 .misc_linkinfo = "VIRTIO common routines", 50 }; 51 52 static struct modlinkage virtio_modlinkage = { 53 .ml_rev = MODREV_1, 54 .ml_linkage = { &virtio_modlmisc, NULL } 55 }; 56 57 int 58 _init(void) 59 { 60 return (mod_install(&virtio_modlinkage)); 61 } 62 63 int 64 _fini(void) 65 { 66 return (mod_remove(&virtio_modlinkage)); 67 } 68 69 int 70 _info(struct modinfo *modinfop) 71 { 72 return (mod_info(&virtio_modlinkage, modinfop)); 73 } 74 75 76 77 static void virtio_set_status(virtio_t *, uint8_t); 78 static int virtio_chain_append_impl(virtio_chain_t *, uint64_t, size_t, 79 uint16_t); 80 static int virtio_interrupts_setup(virtio_t *, int); 81 static void virtio_interrupts_teardown(virtio_t *); 82 static void virtio_interrupts_disable_locked(virtio_t *); 83 static void virtio_queue_free(virtio_queue_t *); 84 85 /* 86 * We use the same device access attributes for BAR mapping and access to the 87 * virtqueue memory. 88 */ 89 ddi_device_acc_attr_t virtio_acc_attr = { 90 .devacc_attr_version = DDI_DEVICE_ATTR_V1, 91 .devacc_attr_endian_flags = DDI_NEVERSWAP_ACC, 92 .devacc_attr_dataorder = DDI_STORECACHING_OK_ACC, 93 .devacc_attr_access = DDI_DEFAULT_ACC 94 }; 95 96 97 /* 98 * DMA attributes for the memory given to the device for queue management. 99 */ 100 ddi_dma_attr_t virtio_dma_attr_queue = { 101 .dma_attr_version = DMA_ATTR_V0, 102 .dma_attr_addr_lo = 0x0000000000000000, 103 /* 104 * Queue memory is aligned on VIRTIO_PAGE_SIZE with the address shifted 105 * down by VIRTIO_PAGE_SHIFT before being passed to the device in a 106 * 32-bit register. 107 */ 108 .dma_attr_addr_hi = 0x00000FFFFFFFF000, 109 .dma_attr_count_max = 0x00000000FFFFFFFF, 110 .dma_attr_align = VIRTIO_PAGE_SIZE, 111 .dma_attr_burstsizes = 1, 112 .dma_attr_minxfer = 1, 113 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 114 .dma_attr_seg = 0x00000000FFFFFFFF, 115 .dma_attr_sgllen = 1, 116 .dma_attr_granular = 1, 117 .dma_attr_flags = 0 118 }; 119 120 /* 121 * DMA attributes for the the allocation of indirect descriptor lists. The 122 * indirect list is referenced by a regular descriptor entry: the physical 123 * address field is 64 bits wide, but the length field is only 32 bits. Each 124 * descriptor is 16 bytes long. 125 */ 126 ddi_dma_attr_t virtio_dma_attr_indirect = { 127 .dma_attr_version = DMA_ATTR_V0, 128 .dma_attr_addr_lo = 0x0000000000000000, 129 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF, 130 .dma_attr_count_max = 0x00000000FFFFFFFF, 131 .dma_attr_align = sizeof (struct virtio_vq_desc), 132 .dma_attr_burstsizes = 1, 133 .dma_attr_minxfer = 1, 134 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 135 .dma_attr_seg = 0x00000000FFFFFFFF, 136 .dma_attr_sgllen = 1, 137 .dma_attr_granular = 1, 138 .dma_attr_flags = 0 139 }; 140 141 142 uint8_t 143 virtio_get8(virtio_t *vio, uintptr_t offset) 144 { 145 return (ddi_get8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset))); 146 } 147 148 uint16_t 149 virtio_get16(virtio_t *vio, uintptr_t offset) 150 { 151 return (ddi_get16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset))); 152 } 153 154 uint32_t 155 virtio_get32(virtio_t *vio, uintptr_t offset) 156 { 157 return (ddi_get32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset))); 158 } 159 160 void 161 virtio_put8(virtio_t *vio, uintptr_t offset, uint8_t value) 162 { 163 ddi_put8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset), value); 164 } 165 166 void 167 virtio_put16(virtio_t *vio, uintptr_t offset, uint16_t value) 168 { 169 ddi_put16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset), value); 170 } 171 172 void 173 virtio_put32(virtio_t *vio, uintptr_t offset, uint32_t value) 174 { 175 ddi_put32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset), value); 176 } 177 178 void 179 virtio_fini(virtio_t *vio, boolean_t failed) 180 { 181 mutex_enter(&vio->vio_mutex); 182 183 virtio_interrupts_teardown(vio); 184 185 virtio_queue_t *viq; 186 while ((viq = list_remove_head(&vio->vio_queues)) != NULL) { 187 virtio_queue_free(viq); 188 } 189 list_destroy(&vio->vio_queues); 190 191 if (failed) { 192 /* 193 * Signal to the host that device setup failed. 194 */ 195 virtio_set_status(vio, VIRTIO_STATUS_FAILED); 196 } else { 197 virtio_device_reset(vio); 198 } 199 200 /* 201 * We don't need to do anything for the provider initlevel, as it 202 * merely records the fact that virtio_init_complete() was called. 203 */ 204 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_PROVIDER; 205 206 if (vio->vio_initlevel & VIRTIO_INITLEVEL_REGS) { 207 /* 208 * Unmap PCI BAR0. 209 */ 210 ddi_regs_map_free(&vio->vio_barh); 211 212 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_REGS; 213 } 214 215 /* 216 * Ensure we have torn down everything we set up. 217 */ 218 VERIFY0(vio->vio_initlevel); 219 220 mutex_exit(&vio->vio_mutex); 221 mutex_destroy(&vio->vio_mutex); 222 223 kmem_free(vio, sizeof (*vio)); 224 } 225 226 /* 227 * Early device initialisation for legacy (pre-1.0 specification) virtio 228 * devices. 229 */ 230 virtio_t * 231 virtio_init(dev_info_t *dip, uint64_t driver_features, boolean_t allow_indirect) 232 { 233 int r; 234 235 /* 236 * First, confirm that this is a legacy device. 237 */ 238 ddi_acc_handle_t pci; 239 if (pci_config_setup(dip, &pci) != DDI_SUCCESS) { 240 dev_err(dip, CE_WARN, "pci_config_setup failed"); 241 return (NULL); 242 } 243 244 uint8_t revid; 245 if ((revid = pci_config_get8(pci, PCI_CONF_REVID)) == PCI_EINVAL8) { 246 dev_err(dip, CE_WARN, "could not read config space"); 247 pci_config_teardown(&pci); 248 return (NULL); 249 } 250 251 pci_config_teardown(&pci); 252 253 /* 254 * The legacy specification requires that the device advertise as PCI 255 * Revision 0. 256 */ 257 if (revid != 0) { 258 dev_err(dip, CE_WARN, "PCI Revision %u incorrect for " 259 "legacy virtio device", (uint_t)revid); 260 return (NULL); 261 } 262 263 virtio_t *vio = kmem_zalloc(sizeof (*vio), KM_SLEEP); 264 vio->vio_dip = dip; 265 266 /* 267 * Map PCI BAR0 for legacy device access. 268 */ 269 if ((r = ddi_regs_map_setup(dip, VIRTIO_LEGACY_PCI_BAR0, 270 (caddr_t *)&vio->vio_bar, 0, 0, &virtio_acc_attr, 271 &vio->vio_barh)) != DDI_SUCCESS) { 272 dev_err(dip, CE_WARN, "ddi_regs_map_setup failure (%d)", r); 273 kmem_free(vio, sizeof (*vio)); 274 return (NULL); 275 } 276 vio->vio_initlevel |= VIRTIO_INITLEVEL_REGS; 277 278 /* 279 * We initialise the mutex without an interrupt priority to ease the 280 * implementation of some of the configuration space access routines. 281 * Drivers using the virtio framework MUST make a call to 282 * "virtio_init_complete()" prior to spawning other threads or enabling 283 * interrupt handlers, at which time we will destroy and reinitialise 284 * the mutex for use in our interrupt handlers. 285 */ 286 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, NULL); 287 288 list_create(&vio->vio_queues, sizeof (virtio_queue_t), 289 offsetof(virtio_queue_t, viq_link)); 290 291 /* 292 * Legacy virtio devices require a few common steps before we can 293 * negotiate device features. 294 */ 295 virtio_device_reset(vio); 296 virtio_set_status(vio, VIRTIO_STATUS_ACKNOWLEDGE); 297 virtio_set_status(vio, VIRTIO_STATUS_DRIVER); 298 299 /* 300 * Negotiate features with the device. Record the original supported 301 * feature set for debugging purposes. 302 */ 303 vio->vio_features_device = virtio_get32(vio, 304 VIRTIO_LEGACY_FEATURES_DEVICE); 305 if (allow_indirect) { 306 driver_features |= VIRTIO_F_RING_INDIRECT_DESC; 307 } 308 vio->vio_features = vio->vio_features_device & driver_features; 309 virtio_put32(vio, VIRTIO_LEGACY_FEATURES_DRIVER, vio->vio_features); 310 311 /* 312 * The device-specific configuration begins at an offset into the BAR 313 * that depends on whether we have enabled MSI-X interrupts or not. 314 * Start out with the offset for pre-MSI-X operation so that we can 315 * read device configuration space prior to configuring interrupts. 316 */ 317 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET; 318 319 return (vio); 320 } 321 322 /* 323 * This function must be called by the driver once it has completed early setup 324 * calls. 325 */ 326 int 327 virtio_init_complete(virtio_t *vio, int allowed_interrupt_types) 328 { 329 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER)); 330 vio->vio_initlevel |= VIRTIO_INITLEVEL_PROVIDER; 331 332 if (!list_is_empty(&vio->vio_queues)) { 333 /* 334 * Set up interrupts for the queues that have been registered. 335 */ 336 if (virtio_interrupts_setup(vio, allowed_interrupt_types) != 337 DDI_SUCCESS) { 338 return (DDI_FAILURE); 339 } 340 } 341 342 /* 343 * We can allocate the mutex once we know the priority. 344 */ 345 mutex_destroy(&vio->vio_mutex); 346 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio)); 347 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 348 viq = list_next(&vio->vio_queues, viq)) { 349 mutex_destroy(&viq->viq_mutex); 350 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, 351 virtio_intr_pri(vio)); 352 } 353 354 virtio_set_status(vio, VIRTIO_STATUS_DRIVER_OK); 355 356 return (DDI_SUCCESS); 357 } 358 359 boolean_t 360 virtio_feature_present(virtio_t *vio, uint64_t feature_mask) 361 { 362 return ((vio->vio_features & feature_mask) != 0); 363 } 364 365 void * 366 virtio_intr_pri(virtio_t *vio) 367 { 368 VERIFY(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED); 369 370 return (DDI_INTR_PRI(vio->vio_interrupt_priority)); 371 } 372 373 /* 374 * Enable a bit in the device status register. Each bit signals a level of 375 * guest readiness to the host. Use the VIRTIO_CONFIG_DEVICE_STATUS_* 376 * constants for "status". To zero the status field use virtio_device_reset(). 377 */ 378 static void 379 virtio_set_status(virtio_t *vio, uint8_t status) 380 { 381 VERIFY3U(status, !=, 0); 382 383 mutex_enter(&vio->vio_mutex); 384 385 uint8_t old = virtio_get8(vio, VIRTIO_LEGACY_DEVICE_STATUS); 386 virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, status | old); 387 388 mutex_exit(&vio->vio_mutex); 389 } 390 391 static void 392 virtio_device_reset_locked(virtio_t *vio) 393 { 394 virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, VIRTIO_STATUS_RESET); 395 } 396 397 void 398 virtio_device_reset(virtio_t *vio) 399 { 400 mutex_enter(&vio->vio_mutex); 401 virtio_device_reset_locked(vio); 402 mutex_exit(&vio->vio_mutex); 403 } 404 405 /* 406 * Some queues are effectively long-polled; the driver submits a series of 407 * buffers and the device only returns them when there is data available. 408 * During detach, we need to coordinate the return of these buffers. Calling 409 * "virtio_shutdown()" will reset the device, then allow the removal of all 410 * buffers that were in flight at the time of shutdown via 411 * "virtio_queue_evacuate()". 412 */ 413 void 414 virtio_shutdown(virtio_t *vio) 415 { 416 mutex_enter(&vio->vio_mutex); 417 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 418 /* 419 * Shutdown has been performed already. 420 */ 421 mutex_exit(&vio->vio_mutex); 422 return; 423 } 424 425 /* 426 * First, mark all of the queues as shutdown. This will prevent any 427 * further activity. 428 */ 429 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 430 viq = list_next(&vio->vio_queues, viq)) { 431 mutex_enter(&viq->viq_mutex); 432 viq->viq_shutdown = B_TRUE; 433 mutex_exit(&viq->viq_mutex); 434 } 435 436 /* 437 * Now, reset the device. This removes any queue configuration on the 438 * device side. 439 */ 440 virtio_device_reset_locked(vio); 441 vio->vio_initlevel |= VIRTIO_INITLEVEL_SHUTDOWN; 442 mutex_exit(&vio->vio_mutex); 443 } 444 445 /* 446 * Common implementation of quiesce(9E) for simple Virtio-based devices. 447 */ 448 int 449 virtio_quiesce(virtio_t *vio) 450 { 451 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 452 /* 453 * Device has already been reset. 454 */ 455 return (DDI_SUCCESS); 456 } 457 458 /* 459 * When we reset the device, it should immediately stop using any DMA 460 * memory we've previously passed to it. All queue configuration is 461 * discarded. This is good enough for quiesce(9E). 462 */ 463 virtio_device_reset_locked(vio); 464 465 return (DDI_SUCCESS); 466 } 467 468 /* 469 * DEVICE-SPECIFIC REGISTER ACCESS 470 * 471 * Note that these functions take the mutex to avoid racing with interrupt 472 * enable/disable, when the device-specific offset can potentially change. 473 */ 474 475 uint8_t 476 virtio_dev_get8(virtio_t *vio, uintptr_t offset) 477 { 478 mutex_enter(&vio->vio_mutex); 479 uint8_t r = virtio_get8(vio, vio->vio_config_offset + offset); 480 mutex_exit(&vio->vio_mutex); 481 482 return (r); 483 } 484 485 uint16_t 486 virtio_dev_get16(virtio_t *vio, uintptr_t offset) 487 { 488 mutex_enter(&vio->vio_mutex); 489 uint16_t r = virtio_get16(vio, vio->vio_config_offset + offset); 490 mutex_exit(&vio->vio_mutex); 491 492 return (r); 493 } 494 495 uint32_t 496 virtio_dev_get32(virtio_t *vio, uintptr_t offset) 497 { 498 mutex_enter(&vio->vio_mutex); 499 uint32_t r = virtio_get32(vio, vio->vio_config_offset + offset); 500 mutex_exit(&vio->vio_mutex); 501 502 return (r); 503 } 504 505 uint64_t 506 virtio_dev_get64(virtio_t *vio, uintptr_t offset) 507 { 508 mutex_enter(&vio->vio_mutex); 509 /* 510 * On at least some systems, a 64-bit read or write to this BAR is not 511 * possible. For legacy devices, there is no generation number to use 512 * to determine if configuration may have changed half-way through a 513 * read. We need to continue to read both halves of the value until we 514 * read the same value at least twice. 515 */ 516 uintptr_t o_lo = vio->vio_config_offset + offset; 517 uintptr_t o_hi = o_lo + 4; 518 519 uint64_t val = virtio_get32(vio, o_lo) | 520 ((uint64_t)virtio_get32(vio, o_hi) << 32); 521 522 for (;;) { 523 uint64_t tval = virtio_get32(vio, o_lo) | 524 ((uint64_t)virtio_get32(vio, o_hi) << 32); 525 526 if (tval == val) { 527 break; 528 } 529 530 val = tval; 531 } 532 533 mutex_exit(&vio->vio_mutex); 534 return (val); 535 } 536 537 void 538 virtio_dev_put8(virtio_t *vio, uintptr_t offset, uint8_t value) 539 { 540 mutex_enter(&vio->vio_mutex); 541 virtio_put8(vio, vio->vio_config_offset + offset, value); 542 mutex_exit(&vio->vio_mutex); 543 } 544 545 void 546 virtio_dev_put16(virtio_t *vio, uintptr_t offset, uint16_t value) 547 { 548 mutex_enter(&vio->vio_mutex); 549 virtio_put16(vio, vio->vio_config_offset + offset, value); 550 mutex_exit(&vio->vio_mutex); 551 } 552 553 void 554 virtio_dev_put32(virtio_t *vio, uintptr_t offset, uint32_t value) 555 { 556 mutex_enter(&vio->vio_mutex); 557 virtio_put32(vio, vio->vio_config_offset + offset, value); 558 mutex_exit(&vio->vio_mutex); 559 } 560 561 /* 562 * VIRTQUEUE MANAGEMENT 563 */ 564 565 static int 566 virtio_inflight_compar(const void *lp, const void *rp) 567 { 568 const virtio_chain_t *l = lp; 569 const virtio_chain_t *r = rp; 570 571 if (l->vic_head < r->vic_head) { 572 return (-1); 573 } else if (l->vic_head > r->vic_head) { 574 return (1); 575 } else { 576 return (0); 577 } 578 } 579 580 virtio_queue_t * 581 virtio_queue_alloc(virtio_t *vio, uint16_t qidx, const char *name, 582 ddi_intr_handler_t *func, void *funcarg, boolean_t force_direct, 583 uint_t max_segs) 584 { 585 uint16_t qsz; 586 char space_name[256]; 587 588 if (max_segs < 1) { 589 /* 590 * Every descriptor, direct or indirect, needs to refer to at 591 * least one buffer. 592 */ 593 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 594 "segment count must be at least 1", name, (uint_t)qidx); 595 return (NULL); 596 } 597 598 mutex_enter(&vio->vio_mutex); 599 600 if (vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER) { 601 /* 602 * Cannot configure any more queues once initial setup is 603 * complete and interrupts have been allocated. 604 */ 605 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 606 "alloc after init complete", name, (uint_t)qidx); 607 mutex_exit(&vio->vio_mutex); 608 return (NULL); 609 } 610 611 /* 612 * There is no way to negotiate a different queue size for legacy 613 * devices. We must read and use the native queue size of the device. 614 */ 615 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx); 616 if ((qsz = virtio_get16(vio, VIRTIO_LEGACY_QUEUE_SIZE)) == 0) { 617 /* 618 * A size of zero means the device does not have a queue with 619 * this index. 620 */ 621 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 622 "does not exist on device", name, (uint_t)qidx); 623 mutex_exit(&vio->vio_mutex); 624 return (NULL); 625 } 626 627 mutex_exit(&vio->vio_mutex); 628 629 virtio_queue_t *viq = kmem_zalloc(sizeof (*viq), KM_SLEEP); 630 viq->viq_virtio = vio; 631 viq->viq_name = name; 632 viq->viq_index = qidx; 633 viq->viq_size = qsz; 634 viq->viq_func = func; 635 viq->viq_funcarg = funcarg; 636 viq->viq_max_segs = max_segs; 637 avl_create(&viq->viq_inflight, virtio_inflight_compar, 638 sizeof (virtio_chain_t), offsetof(virtio_chain_t, vic_node)); 639 640 /* 641 * Allocate the mutex without an interrupt priority for now, as we do 642 * with "vio_mutex". We'll reinitialise it in 643 * "virtio_init_complete()". 644 */ 645 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, NULL); 646 647 if (virtio_feature_present(vio, VIRTIO_F_RING_INDIRECT_DESC) && 648 !force_direct) { 649 /* 650 * If we were able to negotiate the indirect descriptor 651 * feature, and the caller has not explicitly forced the use of 652 * direct descriptors, we'll allocate indirect descriptor lists 653 * for each chain. 654 */ 655 viq->viq_indirect = B_TRUE; 656 } 657 658 /* 659 * Track descriptor usage in an identifier space. 660 */ 661 (void) snprintf(space_name, sizeof (space_name), "%s%d_vq_%s", 662 ddi_get_name(vio->vio_dip), ddi_get_instance(vio->vio_dip), name); 663 if ((viq->viq_descmap = id_space_create(space_name, 0, qsz)) == NULL) { 664 dev_err(vio->vio_dip, CE_WARN, "could not allocate descriptor " 665 "ID space"); 666 virtio_queue_free(viq); 667 return (NULL); 668 } 669 670 /* 671 * For legacy devices, memory for the queue has a strict layout 672 * determined by the queue size. 673 */ 674 size_t sz_descs = sizeof (virtio_vq_desc_t) * qsz; 675 size_t sz_driver = P2ROUNDUP_TYPED(sz_descs + 676 sizeof (virtio_vq_driver_t) + 677 sizeof (uint16_t) * qsz, 678 VIRTIO_PAGE_SIZE, size_t); 679 size_t sz_device = P2ROUNDUP_TYPED(sizeof (virtio_vq_device_t) + 680 sizeof (virtio_vq_elem_t) * qsz, 681 VIRTIO_PAGE_SIZE, size_t); 682 683 if (virtio_dma_init(vio, &viq->viq_dma, sz_driver + sz_device, 684 &virtio_dma_attr_queue, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 685 KM_SLEEP) != DDI_SUCCESS) { 686 dev_err(vio->vio_dip, CE_WARN, "could not allocate queue " 687 "DMA memory"); 688 virtio_queue_free(viq); 689 return (NULL); 690 } 691 692 /* 693 * NOTE: The viq_dma_* members below are used by 694 * VIRTQ_DMA_SYNC_FORDEV() and VIRTQ_DMA_SYNC_FORKERNEL() to calculate 695 * offsets into the DMA allocation for partial synchronisation. If the 696 * ordering of, or relationship between, these pointers changes, the 697 * macros must be kept in sync. 698 */ 699 viq->viq_dma_descs = virtio_dma_va(&viq->viq_dma, 0); 700 viq->viq_dma_driver = virtio_dma_va(&viq->viq_dma, sz_descs); 701 viq->viq_dma_device = virtio_dma_va(&viq->viq_dma, sz_driver); 702 703 /* 704 * Install in the per-device list of queues. 705 */ 706 mutex_enter(&vio->vio_mutex); 707 for (virtio_queue_t *chkvq = list_head(&vio->vio_queues); chkvq != NULL; 708 chkvq = list_next(&vio->vio_queues, chkvq)) { 709 if (chkvq->viq_index == qidx) { 710 dev_err(vio->vio_dip, CE_WARN, "attempt to register " 711 "queue \"%s\" with same index (%d) as queue \"%s\"", 712 name, qidx, chkvq->viq_name); 713 mutex_exit(&vio->vio_mutex); 714 virtio_queue_free(viq); 715 return (NULL); 716 } 717 } 718 list_insert_tail(&vio->vio_queues, viq); 719 720 /* 721 * Ensure the zeroing of the queue memory is visible to the host before 722 * we inform the device of the queue address. 723 */ 724 membar_producer(); 725 VIRTQ_DMA_SYNC_FORDEV(viq); 726 727 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx); 728 virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS, 729 virtio_dma_cookie_pa(&viq->viq_dma, 0) >> VIRTIO_PAGE_SHIFT); 730 731 mutex_exit(&vio->vio_mutex); 732 return (viq); 733 } 734 735 static void 736 virtio_queue_free(virtio_queue_t *viq) 737 { 738 virtio_t *vio = viq->viq_virtio; 739 740 /* 741 * We are going to destroy the queue mutex. Make sure we've already 742 * removed the interrupt handlers. 743 */ 744 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED)); 745 746 mutex_enter(&viq->viq_mutex); 747 748 /* 749 * If the device has not already been reset as part of a shutdown, 750 * detach the queue from the device now. 751 */ 752 if (!viq->viq_shutdown) { 753 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, viq->viq_index); 754 virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS, 0); 755 } 756 757 virtio_dma_fini(&viq->viq_dma); 758 759 VERIFY(avl_is_empty(&viq->viq_inflight)); 760 avl_destroy(&viq->viq_inflight); 761 if (viq->viq_descmap != NULL) { 762 id_space_destroy(viq->viq_descmap); 763 } 764 765 mutex_exit(&viq->viq_mutex); 766 mutex_destroy(&viq->viq_mutex); 767 768 kmem_free(viq, sizeof (*viq)); 769 } 770 771 void 772 virtio_queue_no_interrupt(virtio_queue_t *viq, boolean_t stop_interrupts) 773 { 774 mutex_enter(&viq->viq_mutex); 775 776 if (stop_interrupts) { 777 viq->viq_dma_driver->vqdr_flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; 778 } else { 779 viq->viq_dma_driver->vqdr_flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; 780 } 781 VIRTQ_DMA_SYNC_FORDEV(viq); 782 783 mutex_exit(&viq->viq_mutex); 784 } 785 786 static virtio_chain_t * 787 virtio_queue_complete(virtio_queue_t *viq, uint_t index) 788 { 789 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 790 791 virtio_chain_t *vic; 792 793 virtio_chain_t search; 794 bzero(&search, sizeof (search)); 795 search.vic_head = index; 796 797 if ((vic = avl_find(&viq->viq_inflight, &search, NULL)) == NULL) { 798 return (NULL); 799 } 800 avl_remove(&viq->viq_inflight, vic); 801 802 return (vic); 803 } 804 805 uint_t 806 virtio_queue_size(virtio_queue_t *viq) 807 { 808 return (viq->viq_size); 809 } 810 811 uint_t 812 virtio_queue_nactive(virtio_queue_t *viq) 813 { 814 mutex_enter(&viq->viq_mutex); 815 uint_t r = avl_numnodes(&viq->viq_inflight); 816 mutex_exit(&viq->viq_mutex); 817 818 return (r); 819 } 820 821 virtio_chain_t * 822 virtio_queue_poll(virtio_queue_t *viq) 823 { 824 mutex_enter(&viq->viq_mutex); 825 if (viq->viq_shutdown) { 826 /* 827 * The device has been reset by virtio_shutdown(), and queue 828 * processing has been halted. Any previously submitted chains 829 * will be evacuated using virtio_queue_evacuate(). 830 */ 831 mutex_exit(&viq->viq_mutex); 832 return (NULL); 833 } 834 835 VIRTQ_DMA_SYNC_FORKERNEL(viq); 836 if (viq->viq_device_index == viq->viq_dma_device->vqde_index) { 837 /* 838 * If the device index has not changed since the last poll, 839 * there are no new chains to process. 840 */ 841 mutex_exit(&viq->viq_mutex); 842 return (NULL); 843 } 844 845 /* 846 * We need to ensure that all reads from the descriptor (vqde_ring[]) 847 * and any referenced memory by the descriptor occur after we have read 848 * the descriptor index value above (vqde_index). 849 */ 850 membar_consumer(); 851 852 uint16_t index = (viq->viq_device_index++) % viq->viq_size; 853 uint16_t start = viq->viq_dma_device->vqde_ring[index].vqe_start; 854 uint32_t len = viq->viq_dma_device->vqde_ring[index].vqe_len; 855 856 virtio_chain_t *vic; 857 if ((vic = virtio_queue_complete(viq, start)) == NULL) { 858 /* 859 * We could not locate a chain for this descriptor index, which 860 * suggests that something has gone horribly wrong. 861 */ 862 dev_err(viq->viq_virtio->vio_dip, CE_PANIC, 863 "queue \"%s\" ring entry %u (descriptor %u) has no chain", 864 viq->viq_name, (uint16_t)index, (uint16_t)start); 865 } 866 867 vic->vic_received_length = len; 868 869 mutex_exit(&viq->viq_mutex); 870 871 return (vic); 872 } 873 874 /* 875 * After a call to "virtio_shutdown()", the driver must retrieve any previously 876 * submitted chains and free any associated resources. 877 */ 878 virtio_chain_t * 879 virtio_queue_evacuate(virtio_queue_t *viq) 880 { 881 virtio_t *vio = viq->viq_virtio; 882 883 mutex_enter(&vio->vio_mutex); 884 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN)) { 885 dev_err(vio->vio_dip, CE_PANIC, 886 "virtio_queue_evacuate() without virtio_shutdown()"); 887 } 888 mutex_exit(&vio->vio_mutex); 889 890 mutex_enter(&viq->viq_mutex); 891 VERIFY(viq->viq_shutdown); 892 893 virtio_chain_t *vic = avl_first(&viq->viq_inflight); 894 if (vic != NULL) { 895 avl_remove(&viq->viq_inflight, vic); 896 } 897 898 mutex_exit(&viq->viq_mutex); 899 900 return (vic); 901 } 902 903 /* 904 * VIRTQUEUE DESCRIPTOR CHAIN MANAGEMENT 905 */ 906 907 /* 908 * When the device returns a descriptor chain to the driver, it may provide the 909 * length in bytes of data written into the chain. Client drivers should use 910 * this value with care; the specification suggests some device implementations 911 * have not always provided a useful or correct value. 912 */ 913 size_t 914 virtio_chain_received_length(virtio_chain_t *vic) 915 { 916 return (vic->vic_received_length); 917 } 918 919 /* 920 * Allocate a descriptor chain for use with this queue. The "kmflags" value 921 * may be KM_SLEEP or KM_NOSLEEP as per kmem_alloc(9F). 922 */ 923 virtio_chain_t * 924 virtio_chain_alloc(virtio_queue_t *viq, int kmflags) 925 { 926 virtio_t *vio = viq->viq_virtio; 927 virtio_chain_t *vic; 928 uint_t cap; 929 930 /* 931 * Direct descriptors are known by their index in the descriptor table 932 * for the queue. We use the variable-length array member at the end 933 * of the chain tracking object to hold the list of direct descriptors 934 * assigned to this chain. 935 */ 936 if (viq->viq_indirect) { 937 /* 938 * When using indirect descriptors we still need one direct 939 * descriptor entry to hold the physical address and length of 940 * the indirect descriptor table. 941 */ 942 cap = 1; 943 } else { 944 /* 945 * For direct descriptors we need to be able to track a 946 * descriptor for each possible segment in a single chain. 947 */ 948 cap = viq->viq_max_segs; 949 } 950 951 size_t vicsz = sizeof (*vic) + sizeof (uint16_t) * cap; 952 if ((vic = kmem_zalloc(vicsz, kmflags)) == NULL) { 953 return (NULL); 954 } 955 vic->vic_vq = viq; 956 vic->vic_direct_capacity = cap; 957 958 if (viq->viq_indirect) { 959 /* 960 * Allocate an indirect descriptor list with the appropriate 961 * number of entries. 962 */ 963 if (virtio_dma_init(vio, &vic->vic_indirect_dma, 964 sizeof (virtio_vq_desc_t) * viq->viq_max_segs, 965 &virtio_dma_attr_indirect, 966 DDI_DMA_CONSISTENT | DDI_DMA_WRITE, 967 kmflags) != DDI_SUCCESS) { 968 goto fail; 969 } 970 971 /* 972 * Allocate a single descriptor to hold the indirect list. 973 * Leave the length as zero for now; it will be set to include 974 * any occupied entries at push time. 975 */ 976 mutex_enter(&viq->viq_mutex); 977 if (virtio_chain_append_impl(vic, 978 virtio_dma_cookie_pa(&vic->vic_indirect_dma, 0), 0, 979 VIRTQ_DESC_F_INDIRECT) != DDI_SUCCESS) { 980 mutex_exit(&viq->viq_mutex); 981 goto fail; 982 } 983 mutex_exit(&viq->viq_mutex); 984 VERIFY3U(vic->vic_direct_used, ==, 1); 985 986 /* 987 * Don't set the indirect capacity until after we've installed 988 * the direct descriptor which points at the indirect list, or 989 * virtio_chain_append_impl() will be confused. 990 */ 991 vic->vic_indirect_capacity = viq->viq_max_segs; 992 } 993 994 return (vic); 995 996 fail: 997 virtio_dma_fini(&vic->vic_indirect_dma); 998 kmem_free(vic, vicsz); 999 return (NULL); 1000 } 1001 1002 void * 1003 virtio_chain_data(virtio_chain_t *vic) 1004 { 1005 return (vic->vic_data); 1006 } 1007 1008 void 1009 virtio_chain_data_set(virtio_chain_t *vic, void *data) 1010 { 1011 vic->vic_data = data; 1012 } 1013 1014 void 1015 virtio_chain_clear(virtio_chain_t *vic) 1016 { 1017 if (vic->vic_indirect_capacity != 0) { 1018 /* 1019 * There should only be one direct descriptor, which points at 1020 * our indirect descriptor list. We don't want to clear it 1021 * here. 1022 */ 1023 VERIFY3U(vic->vic_direct_capacity, ==, 1); 1024 1025 if (vic->vic_indirect_used > 0) { 1026 /* 1027 * Clear out the indirect descriptor table. 1028 */ 1029 vic->vic_indirect_used = 0; 1030 bzero(virtio_dma_va(&vic->vic_indirect_dma, 0), 1031 virtio_dma_size(&vic->vic_indirect_dma)); 1032 } 1033 1034 } else if (vic->vic_direct_capacity > 0) { 1035 /* 1036 * Release any descriptors that were assigned to us previously. 1037 */ 1038 for (uint_t i = 0; i < vic->vic_direct_used; i++) { 1039 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[i]); 1040 vic->vic_direct[i] = 0; 1041 } 1042 vic->vic_direct_used = 0; 1043 } 1044 } 1045 1046 void 1047 virtio_chain_free(virtio_chain_t *vic) 1048 { 1049 /* 1050 * First ensure that we have released any descriptors used by this 1051 * chain. 1052 */ 1053 virtio_chain_clear(vic); 1054 1055 if (vic->vic_indirect_capacity > 0) { 1056 /* 1057 * Release the direct descriptor that points to our indirect 1058 * descriptor list. 1059 */ 1060 VERIFY3U(vic->vic_direct_capacity, ==, 1); 1061 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[0]); 1062 1063 virtio_dma_fini(&vic->vic_indirect_dma); 1064 } 1065 1066 size_t vicsz = sizeof (*vic) + 1067 vic->vic_direct_capacity * sizeof (uint16_t); 1068 1069 kmem_free(vic, vicsz); 1070 } 1071 1072 static inline int 1073 virtio_queue_descmap_alloc(virtio_queue_t *viq, uint_t *indexp) 1074 { 1075 id_t index; 1076 1077 if ((index = id_alloc_nosleep(viq->viq_descmap)) == -1) { 1078 return (ENOMEM); 1079 } 1080 1081 VERIFY3S(index, >=, 0); 1082 VERIFY3S(index, <=, viq->viq_size); 1083 1084 *indexp = (uint_t)index; 1085 return (0); 1086 } 1087 1088 static int 1089 virtio_chain_append_impl(virtio_chain_t *vic, uint64_t pa, size_t len, 1090 uint16_t flags) 1091 { 1092 virtio_queue_t *viq = vic->vic_vq; 1093 virtio_vq_desc_t *vqd; 1094 uint_t index; 1095 1096 /* 1097 * We're modifying the queue-wide descriptor list so make sure we have 1098 * the appropriate lock. 1099 */ 1100 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 1101 1102 if (vic->vic_indirect_capacity != 0) { 1103 /* 1104 * Use indirect descriptors. 1105 */ 1106 if (vic->vic_indirect_used >= vic->vic_indirect_capacity) { 1107 return (DDI_FAILURE); 1108 } 1109 1110 vqd = virtio_dma_va(&vic->vic_indirect_dma, 0); 1111 1112 if ((index = vic->vic_indirect_used++) > 0) { 1113 /* 1114 * Chain the current last indirect descriptor to the 1115 * new one. 1116 */ 1117 vqd[index - 1].vqd_flags |= VIRTQ_DESC_F_NEXT; 1118 vqd[index - 1].vqd_next = index; 1119 } 1120 1121 } else { 1122 /* 1123 * Use direct descriptors. 1124 */ 1125 if (vic->vic_direct_used >= vic->vic_direct_capacity) { 1126 return (DDI_FAILURE); 1127 } 1128 1129 if (virtio_queue_descmap_alloc(viq, &index) != 0) { 1130 return (DDI_FAILURE); 1131 } 1132 1133 vqd = virtio_dma_va(&viq->viq_dma, 0); 1134 1135 if (vic->vic_direct_used > 0) { 1136 /* 1137 * This is not the first entry. Chain the current 1138 * descriptor to the next one. 1139 */ 1140 uint16_t p = vic->vic_direct[vic->vic_direct_used - 1]; 1141 1142 vqd[p].vqd_flags |= VIRTQ_DESC_F_NEXT; 1143 vqd[p].vqd_next = index; 1144 } 1145 vic->vic_direct[vic->vic_direct_used++] = index; 1146 } 1147 1148 vqd[index].vqd_addr = pa; 1149 vqd[index].vqd_len = len; 1150 vqd[index].vqd_flags = flags; 1151 vqd[index].vqd_next = 0; 1152 1153 return (DDI_SUCCESS); 1154 } 1155 1156 int 1157 virtio_chain_append(virtio_chain_t *vic, uint64_t pa, size_t len, 1158 virtio_direction_t dir) 1159 { 1160 virtio_queue_t *viq = vic->vic_vq; 1161 uint16_t flags = 0; 1162 1163 switch (dir) { 1164 case VIRTIO_DIR_DEVICE_WRITES: 1165 flags |= VIRTQ_DESC_F_WRITE; 1166 break; 1167 1168 case VIRTIO_DIR_DEVICE_READS: 1169 break; 1170 1171 default: 1172 panic("unknown direction value %u", dir); 1173 } 1174 1175 mutex_enter(&viq->viq_mutex); 1176 int r = virtio_chain_append_impl(vic, pa, len, flags); 1177 mutex_exit(&viq->viq_mutex); 1178 1179 return (r); 1180 } 1181 1182 static void 1183 virtio_queue_flush_locked(virtio_queue_t *viq) 1184 { 1185 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 1186 1187 /* 1188 * Make sure any writes we have just made to the descriptors 1189 * (vqdr_ring[]) are visible to the device before we update the ring 1190 * pointer (vqdr_index). 1191 */ 1192 membar_producer(); 1193 viq->viq_dma_driver->vqdr_index = viq->viq_driver_index; 1194 VIRTQ_DMA_SYNC_FORDEV(viq); 1195 1196 /* 1197 * Determine whether the device expects us to notify it of new 1198 * descriptors. 1199 */ 1200 VIRTQ_DMA_SYNC_FORKERNEL(viq); 1201 if (!(viq->viq_dma_device->vqde_flags & VIRTQ_USED_F_NO_NOTIFY)) { 1202 virtio_put16(viq->viq_virtio, VIRTIO_LEGACY_QUEUE_NOTIFY, 1203 viq->viq_index); 1204 } 1205 } 1206 1207 void 1208 virtio_queue_flush(virtio_queue_t *viq) 1209 { 1210 mutex_enter(&viq->viq_mutex); 1211 virtio_queue_flush_locked(viq); 1212 mutex_exit(&viq->viq_mutex); 1213 } 1214 1215 void 1216 virtio_chain_submit(virtio_chain_t *vic, boolean_t flush) 1217 { 1218 virtio_queue_t *viq = vic->vic_vq; 1219 1220 mutex_enter(&viq->viq_mutex); 1221 1222 if (vic->vic_indirect_capacity != 0) { 1223 virtio_vq_desc_t *vqd = virtio_dma_va(&viq->viq_dma, 0); 1224 1225 VERIFY3U(vic->vic_direct_used, ==, 1); 1226 1227 /* 1228 * This is an indirect descriptor queue. The length in bytes 1229 * of the descriptor must extend to cover the populated 1230 * indirect descriptor entries. 1231 */ 1232 vqd[vic->vic_direct[0]].vqd_len = 1233 sizeof (virtio_vq_desc_t) * vic->vic_indirect_used; 1234 1235 virtio_dma_sync(&vic->vic_indirect_dma, DDI_DMA_SYNC_FORDEV); 1236 } 1237 1238 /* 1239 * Populate the next available slot in the driver-owned ring for this 1240 * chain. The updated value of viq_driver_index is not yet visible to 1241 * the device until a subsequent queue flush. 1242 */ 1243 uint16_t index = (viq->viq_driver_index++) % viq->viq_size; 1244 viq->viq_dma_driver->vqdr_ring[index] = vic->vic_direct[0]; 1245 1246 vic->vic_head = vic->vic_direct[0]; 1247 avl_add(&viq->viq_inflight, vic); 1248 1249 if (flush) { 1250 virtio_queue_flush_locked(vic->vic_vq); 1251 } 1252 1253 mutex_exit(&viq->viq_mutex); 1254 } 1255 1256 /* 1257 * INTERRUPTS MANAGEMENT 1258 */ 1259 1260 static const char * 1261 virtio_interrupt_type_name(int type) 1262 { 1263 switch (type) { 1264 case DDI_INTR_TYPE_MSIX: 1265 return ("MSI-X"); 1266 case DDI_INTR_TYPE_MSI: 1267 return ("MSI"); 1268 case DDI_INTR_TYPE_FIXED: 1269 return ("fixed"); 1270 default: 1271 return ("?"); 1272 } 1273 } 1274 1275 static int 1276 virtio_interrupts_alloc(virtio_t *vio, int type, int nrequired) 1277 { 1278 dev_info_t *dip = vio->vio_dip; 1279 int nintrs = 0; 1280 int navail = 0; 1281 1282 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1283 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC)); 1284 1285 if (ddi_intr_get_nintrs(dip, type, &nintrs) != DDI_SUCCESS) { 1286 dev_err(dip, CE_WARN, "could not count %s interrupts", 1287 virtio_interrupt_type_name(type)); 1288 return (DDI_FAILURE); 1289 } 1290 if (nintrs < 1) { 1291 dev_err(dip, CE_WARN, "no %s interrupts supported", 1292 virtio_interrupt_type_name(type)); 1293 return (DDI_FAILURE); 1294 } 1295 1296 if (ddi_intr_get_navail(dip, type, &navail) != DDI_SUCCESS) { 1297 dev_err(dip, CE_WARN, "could not count available %s interrupts", 1298 virtio_interrupt_type_name(type)); 1299 return (DDI_FAILURE); 1300 } 1301 if (navail < nrequired) { 1302 dev_err(dip, CE_WARN, "need %d %s interrupts, but only %d " 1303 "available", nrequired, virtio_interrupt_type_name(type), 1304 navail); 1305 return (DDI_FAILURE); 1306 } 1307 1308 VERIFY3P(vio->vio_interrupts, ==, NULL); 1309 vio->vio_interrupts = kmem_zalloc( 1310 sizeof (ddi_intr_handle_t) * nrequired, KM_SLEEP); 1311 1312 int r; 1313 if ((r = ddi_intr_alloc(dip, vio->vio_interrupts, type, 0, nrequired, 1314 &vio->vio_ninterrupts, DDI_INTR_ALLOC_STRICT)) != DDI_SUCCESS) { 1315 dev_err(dip, CE_WARN, "%s interrupt allocation failure (%d)", 1316 virtio_interrupt_type_name(type), r); 1317 kmem_free(vio->vio_interrupts, 1318 sizeof (ddi_intr_handle_t) * nrequired); 1319 vio->vio_interrupts = NULL; 1320 return (DDI_FAILURE); 1321 } 1322 1323 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ALLOC; 1324 vio->vio_interrupt_type = type; 1325 return (DDI_SUCCESS); 1326 } 1327 1328 static uint_t 1329 virtio_shared_isr(caddr_t arg0, caddr_t arg1) 1330 { 1331 virtio_t *vio = (virtio_t *)arg0; 1332 uint_t r = DDI_INTR_UNCLAIMED; 1333 uint8_t isr; 1334 1335 mutex_enter(&vio->vio_mutex); 1336 1337 /* 1338 * Check the ISR status to see if the interrupt applies to us. Reading 1339 * this field resets it to zero. 1340 */ 1341 isr = virtio_get8(vio, VIRTIO_LEGACY_ISR_STATUS); 1342 if ((isr & VIRTIO_ISR_CHECK_QUEUES) == 0) { 1343 goto done; 1344 } 1345 1346 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1347 viq = list_next(&vio->vio_queues, viq)) { 1348 if (viq->viq_func != NULL) { 1349 mutex_exit(&vio->vio_mutex); 1350 if (viq->viq_func(viq->viq_funcarg, arg0) == 1351 DDI_INTR_CLAIMED) { 1352 r = DDI_INTR_CLAIMED; 1353 } 1354 mutex_enter(&vio->vio_mutex); 1355 1356 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 1357 /* 1358 * The device was shut down while in a queue 1359 * handler routine. 1360 */ 1361 goto done; 1362 } 1363 } 1364 } 1365 1366 done: 1367 mutex_exit(&vio->vio_mutex); 1368 return (r); 1369 } 1370 1371 static int 1372 virtio_interrupts_setup(virtio_t *vio, int allow_types) 1373 { 1374 dev_info_t *dip = vio->vio_dip; 1375 int types; 1376 int count = 0; 1377 1378 mutex_enter(&vio->vio_mutex); 1379 1380 /* 1381 * Determine the number of interrupts we'd like based on the number of 1382 * virtqueues. 1383 */ 1384 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1385 viq = list_next(&vio->vio_queues, viq)) { 1386 if (viq->viq_func != NULL) { 1387 count++; 1388 } 1389 } 1390 1391 if (ddi_intr_get_supported_types(dip, &types) != DDI_SUCCESS) { 1392 dev_err(dip, CE_WARN, "could not get supported interrupts"); 1393 mutex_exit(&vio->vio_mutex); 1394 return (DDI_FAILURE); 1395 } 1396 1397 if (allow_types != 0) { 1398 /* 1399 * Restrict the possible interrupt types at the request of the 1400 * driver. 1401 */ 1402 types &= allow_types; 1403 } 1404 1405 /* 1406 * Try each potential interrupt type in descending order of preference. 1407 * Note that the specification does not appear to allow for the use of 1408 * classical MSI, so we are limited to either MSI-X or fixed 1409 * interrupts. 1410 */ 1411 if (types & DDI_INTR_TYPE_MSIX) { 1412 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_MSIX, 1413 count) == DDI_SUCCESS) { 1414 goto add_handlers; 1415 } 1416 } 1417 if (types & DDI_INTR_TYPE_FIXED) { 1418 /* 1419 * If fixed interrupts are all that are available, we'll just 1420 * ask for one. 1421 */ 1422 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_FIXED, 1) == 1423 DDI_SUCCESS) { 1424 goto add_handlers; 1425 } 1426 } 1427 1428 dev_err(dip, CE_WARN, "interrupt allocation failed"); 1429 mutex_exit(&vio->vio_mutex); 1430 return (DDI_FAILURE); 1431 1432 add_handlers: 1433 /* 1434 * Ensure that we have not been given any high-level interrupts as our 1435 * interrupt handlers do not support them. 1436 */ 1437 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1438 uint_t ipri; 1439 1440 if (ddi_intr_get_pri(vio->vio_interrupts[i], &ipri) != 1441 DDI_SUCCESS) { 1442 dev_err(dip, CE_WARN, "could not determine interrupt " 1443 "priority"); 1444 goto fail; 1445 } 1446 1447 if (ipri >= ddi_intr_get_hilevel_pri()) { 1448 dev_err(dip, CE_WARN, "high level interrupts not " 1449 "supported"); 1450 goto fail; 1451 } 1452 1453 /* 1454 * Record the highest priority we've been allocated to use for 1455 * mutex initialisation. 1456 */ 1457 if (i == 0 || ipri > vio->vio_interrupt_priority) { 1458 vio->vio_interrupt_priority = ipri; 1459 } 1460 } 1461 1462 /* 1463 * Get the interrupt capabilities from the first handle to determine 1464 * whether we need to use ddi_intr_block_enable(9F). 1465 */ 1466 if (ddi_intr_get_cap(vio->vio_interrupts[0], 1467 &vio->vio_interrupt_cap) != DDI_SUCCESS) { 1468 dev_err(dip, CE_WARN, "failed to get interrupt capabilities"); 1469 goto fail; 1470 } 1471 1472 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) { 1473 VERIFY3S(vio->vio_ninterrupts, ==, 1); 1474 /* 1475 * For fixed interrupts, we need to use our shared handler to 1476 * multiplex the per-queue handlers provided by the driver. 1477 */ 1478 if (ddi_intr_add_handler(vio->vio_interrupts[0], 1479 virtio_shared_isr, (caddr_t)vio, NULL) != DDI_SUCCESS) { 1480 dev_err(dip, CE_WARN, "adding shared %s interrupt " 1481 "handler failed", virtio_interrupt_type_name( 1482 vio->vio_interrupt_type)); 1483 goto fail; 1484 } 1485 1486 goto done; 1487 } 1488 1489 VERIFY3S(vio->vio_ninterrupts, ==, count); 1490 1491 uint_t n = 0; 1492 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1493 viq = list_next(&vio->vio_queues, viq)) { 1494 if (viq->viq_func == NULL) { 1495 continue; 1496 } 1497 1498 if (ddi_intr_add_handler(vio->vio_interrupts[n], 1499 viq->viq_func, (caddr_t)viq->viq_funcarg, 1500 (caddr_t)vio) != DDI_SUCCESS) { 1501 dev_err(dip, CE_WARN, "adding interrupt %u (%s) failed", 1502 n, viq->viq_name); 1503 goto fail; 1504 } 1505 1506 viq->viq_handler_index = n; 1507 viq->viq_handler_added = B_TRUE; 1508 n++; 1509 } 1510 1511 done: 1512 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ADDED; 1513 mutex_exit(&vio->vio_mutex); 1514 return (DDI_SUCCESS); 1515 1516 fail: 1517 virtio_interrupts_teardown(vio); 1518 mutex_exit(&vio->vio_mutex); 1519 return (DDI_FAILURE); 1520 } 1521 1522 static void 1523 virtio_interrupts_teardown(virtio_t *vio) 1524 { 1525 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1526 1527 virtio_interrupts_disable_locked(vio); 1528 1529 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) { 1530 /* 1531 * Remove the multiplexing interrupt handler. 1532 */ 1533 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED) { 1534 int r; 1535 1536 VERIFY3S(vio->vio_ninterrupts, ==, 1); 1537 1538 if ((r = ddi_intr_remove_handler( 1539 vio->vio_interrupts[0])) != DDI_SUCCESS) { 1540 dev_err(vio->vio_dip, CE_WARN, "removing " 1541 "shared interrupt handler failed (%d)", r); 1542 } 1543 } 1544 } else { 1545 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1546 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1547 int r; 1548 1549 if (!viq->viq_handler_added) { 1550 continue; 1551 } 1552 1553 if ((r = ddi_intr_remove_handler( 1554 vio->vio_interrupts[viq->viq_handler_index])) != 1555 DDI_SUCCESS) { 1556 dev_err(vio->vio_dip, CE_WARN, "removing " 1557 "interrupt handler (%s) failed (%d)", 1558 viq->viq_name, r); 1559 } 1560 1561 viq->viq_handler_added = B_FALSE; 1562 } 1563 } 1564 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ADDED; 1565 1566 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC) { 1567 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1568 int r; 1569 1570 if ((r = ddi_intr_free(vio->vio_interrupts[i])) != 1571 DDI_SUCCESS) { 1572 dev_err(vio->vio_dip, CE_WARN, "freeing " 1573 "interrupt %u failed (%d)", i, r); 1574 } 1575 } 1576 kmem_free(vio->vio_interrupts, 1577 sizeof (ddi_intr_handle_t) * vio->vio_ninterrupts); 1578 vio->vio_interrupts = NULL; 1579 vio->vio_ninterrupts = 0; 1580 vio->vio_interrupt_type = 0; 1581 vio->vio_interrupt_cap = 0; 1582 vio->vio_interrupt_priority = 0; 1583 1584 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ALLOC; 1585 } 1586 } 1587 1588 static void 1589 virtio_interrupts_unwind(virtio_t *vio) 1590 { 1591 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1592 1593 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) { 1594 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1595 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1596 if (!viq->viq_handler_added) { 1597 continue; 1598 } 1599 1600 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, 1601 viq->viq_index); 1602 virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE, 1603 VIRTIO_LEGACY_MSI_NO_VECTOR); 1604 } 1605 } 1606 1607 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) { 1608 (void) ddi_intr_block_disable(vio->vio_interrupts, 1609 vio->vio_ninterrupts); 1610 } else { 1611 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1612 (void) ddi_intr_disable(vio->vio_interrupts[i]); 1613 } 1614 } 1615 1616 /* 1617 * Disabling the interrupts makes the MSI-X fields disappear from the 1618 * BAR once more. 1619 */ 1620 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET; 1621 } 1622 1623 int 1624 virtio_interrupts_enable(virtio_t *vio) 1625 { 1626 mutex_enter(&vio->vio_mutex); 1627 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED) { 1628 mutex_exit(&vio->vio_mutex); 1629 return (DDI_SUCCESS); 1630 } 1631 1632 int r = DDI_SUCCESS; 1633 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) { 1634 r = ddi_intr_block_enable(vio->vio_interrupts, 1635 vio->vio_ninterrupts); 1636 } else { 1637 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1638 if ((r = ddi_intr_enable(vio->vio_interrupts[i])) != 1639 DDI_SUCCESS) { 1640 /* 1641 * Disable the interrupts we have enabled so 1642 * far. 1643 */ 1644 for (i--; i >= 0; i--) { 1645 (void) ddi_intr_disable( 1646 vio->vio_interrupts[i]); 1647 } 1648 break; 1649 } 1650 } 1651 } 1652 1653 if (r != DDI_SUCCESS) { 1654 mutex_exit(&vio->vio_mutex); 1655 return (r); 1656 } 1657 1658 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) { 1659 /* 1660 * When asked to enable the interrupts, the system enables 1661 * MSI-X in the PCI configuration for the device. While 1662 * enabled, the extra MSI-X configuration table fields appear 1663 * between the general and the device-specific regions of the 1664 * BAR. 1665 */ 1666 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET_MSIX; 1667 1668 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1669 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1670 if (!viq->viq_handler_added) { 1671 continue; 1672 } 1673 1674 uint16_t qi = viq->viq_index; 1675 uint16_t msi = viq->viq_handler_index; 1676 1677 /* 1678 * Route interrupts for this queue to the assigned 1679 * MSI-X vector number. 1680 */ 1681 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qi); 1682 virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE, msi); 1683 1684 /* 1685 * The device may not actually accept the vector number 1686 * we're attempting to program. We need to confirm 1687 * that configuration was successful by re-reading the 1688 * configuration we just wrote. 1689 */ 1690 if (virtio_get16(vio, VIRTIO_LEGACY_MSIX_QUEUE) != 1691 msi) { 1692 dev_err(vio->vio_dip, CE_WARN, 1693 "failed to configure MSI-X vector %u for " 1694 "queue \"%s\" (#%u)", (uint_t)msi, 1695 viq->viq_name, (uint_t)qi); 1696 1697 virtio_interrupts_unwind(vio); 1698 mutex_exit(&vio->vio_mutex); 1699 return (DDI_FAILURE); 1700 } 1701 } 1702 } 1703 1704 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ENABLED; 1705 1706 mutex_exit(&vio->vio_mutex); 1707 return (DDI_SUCCESS); 1708 } 1709 1710 static void 1711 virtio_interrupts_disable_locked(virtio_t *vio) 1712 { 1713 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1714 1715 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED)) { 1716 return; 1717 } 1718 1719 virtio_interrupts_unwind(vio); 1720 1721 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ENABLED; 1722 } 1723 1724 void 1725 virtio_interrupts_disable(virtio_t *vio) 1726 { 1727 mutex_enter(&vio->vio_mutex); 1728 virtio_interrupts_disable_locked(vio); 1729 mutex_exit(&vio->vio_mutex); 1730 } 1731