1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019 Joyent, Inc. 14 */ 15 16 /* 17 * VIRTIO FRAMEWORK 18 * 19 * For design and usage documentation, see the comments in "virtio.h". 20 */ 21 22 #include <sys/conf.h> 23 #include <sys/kmem.h> 24 #include <sys/debug.h> 25 #include <sys/modctl.h> 26 #include <sys/autoconf.h> 27 #include <sys/ddi_impldefs.h> 28 #include <sys/ddi.h> 29 #include <sys/sunddi.h> 30 #include <sys/sunndi.h> 31 #include <sys/avintr.h> 32 #include <sys/spl.h> 33 #include <sys/promif.h> 34 #include <sys/list.h> 35 #include <sys/bootconf.h> 36 #include <sys/bootsvcs.h> 37 #include <sys/sysmacros.h> 38 #include <sys/pci.h> 39 40 #include "virtio.h" 41 #include "virtio_impl.h" 42 43 44 /* 45 * Linkage structures 46 */ 47 static struct modlmisc virtio_modlmisc = { 48 .misc_modops = &mod_miscops, 49 .misc_linkinfo = "VIRTIO common routines", 50 }; 51 52 static struct modlinkage virtio_modlinkage = { 53 .ml_rev = MODREV_1, 54 .ml_linkage = { &virtio_modlmisc, NULL } 55 }; 56 57 int 58 _init(void) 59 { 60 return (mod_install(&virtio_modlinkage)); 61 } 62 63 int 64 _fini(void) 65 { 66 return (mod_remove(&virtio_modlinkage)); 67 } 68 69 int 70 _info(struct modinfo *modinfop) 71 { 72 return (mod_info(&virtio_modlinkage, modinfop)); 73 } 74 75 76 77 static void virtio_set_status(virtio_t *, uint8_t); 78 static int virtio_chain_append_impl(virtio_chain_t *, uint64_t, size_t, 79 uint16_t); 80 static int virtio_interrupts_setup(virtio_t *, int); 81 static void virtio_interrupts_teardown(virtio_t *); 82 static void virtio_interrupts_disable_locked(virtio_t *); 83 static void virtio_queue_free(virtio_queue_t *); 84 static void virtio_device_reset_locked(virtio_t *); 85 86 /* 87 * We use the same device access attributes for BAR mapping and access to the 88 * virtqueue memory. 89 */ 90 ddi_device_acc_attr_t virtio_acc_attr = { 91 .devacc_attr_version = DDI_DEVICE_ATTR_V1, 92 .devacc_attr_endian_flags = DDI_NEVERSWAP_ACC, 93 .devacc_attr_dataorder = DDI_STORECACHING_OK_ACC, 94 .devacc_attr_access = DDI_DEFAULT_ACC 95 }; 96 97 98 /* 99 * DMA attributes for the memory given to the device for queue management. 100 */ 101 ddi_dma_attr_t virtio_dma_attr_queue = { 102 .dma_attr_version = DMA_ATTR_V0, 103 .dma_attr_addr_lo = 0x0000000000000000, 104 /* 105 * Queue memory is aligned on VIRTIO_PAGE_SIZE with the address shifted 106 * down by VIRTIO_PAGE_SHIFT before being passed to the device in a 107 * 32-bit register. 108 */ 109 .dma_attr_addr_hi = 0x00000FFFFFFFF000, 110 .dma_attr_count_max = 0x00000000FFFFFFFF, 111 .dma_attr_align = VIRTIO_PAGE_SIZE, 112 .dma_attr_burstsizes = 1, 113 .dma_attr_minxfer = 1, 114 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 115 .dma_attr_seg = 0x00000000FFFFFFFF, 116 .dma_attr_sgllen = 1, 117 .dma_attr_granular = 1, 118 .dma_attr_flags = 0 119 }; 120 121 /* 122 * DMA attributes for the the allocation of indirect descriptor lists. The 123 * indirect list is referenced by a regular descriptor entry: the physical 124 * address field is 64 bits wide, but the length field is only 32 bits. Each 125 * descriptor is 16 bytes long. 126 */ 127 ddi_dma_attr_t virtio_dma_attr_indirect = { 128 .dma_attr_version = DMA_ATTR_V0, 129 .dma_attr_addr_lo = 0x0000000000000000, 130 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF, 131 .dma_attr_count_max = 0x00000000FFFFFFFF, 132 .dma_attr_align = sizeof (struct virtio_vq_desc), 133 .dma_attr_burstsizes = 1, 134 .dma_attr_minxfer = 1, 135 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 136 .dma_attr_seg = 0x00000000FFFFFFFF, 137 .dma_attr_sgllen = 1, 138 .dma_attr_granular = 1, 139 .dma_attr_flags = 0 140 }; 141 142 143 uint8_t 144 virtio_get8(virtio_t *vio, uintptr_t offset) 145 { 146 return (ddi_get8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset))); 147 } 148 149 uint16_t 150 virtio_get16(virtio_t *vio, uintptr_t offset) 151 { 152 return (ddi_get16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset))); 153 } 154 155 uint32_t 156 virtio_get32(virtio_t *vio, uintptr_t offset) 157 { 158 return (ddi_get32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset))); 159 } 160 161 void 162 virtio_put8(virtio_t *vio, uintptr_t offset, uint8_t value) 163 { 164 ddi_put8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset), value); 165 } 166 167 void 168 virtio_put16(virtio_t *vio, uintptr_t offset, uint16_t value) 169 { 170 ddi_put16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset), value); 171 } 172 173 void 174 virtio_put32(virtio_t *vio, uintptr_t offset, uint32_t value) 175 { 176 ddi_put32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset), value); 177 } 178 179 void 180 virtio_fini(virtio_t *vio, boolean_t failed) 181 { 182 mutex_enter(&vio->vio_mutex); 183 184 virtio_interrupts_teardown(vio); 185 186 virtio_queue_t *viq; 187 while ((viq = list_remove_head(&vio->vio_queues)) != NULL) { 188 virtio_queue_free(viq); 189 } 190 list_destroy(&vio->vio_queues); 191 192 if (failed) { 193 /* 194 * Signal to the host that device setup failed. 195 */ 196 virtio_set_status(vio, VIRTIO_STATUS_FAILED); 197 } else { 198 virtio_device_reset_locked(vio); 199 } 200 201 /* 202 * We don't need to do anything for the provider initlevel, as it 203 * merely records the fact that virtio_init_complete() was called. 204 */ 205 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_PROVIDER; 206 207 if (vio->vio_initlevel & VIRTIO_INITLEVEL_REGS) { 208 /* 209 * Unmap PCI BAR0. 210 */ 211 ddi_regs_map_free(&vio->vio_barh); 212 213 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_REGS; 214 } 215 216 /* 217 * Ensure we have torn down everything we set up. 218 */ 219 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_SHUTDOWN; 220 VERIFY0(vio->vio_initlevel); 221 222 mutex_exit(&vio->vio_mutex); 223 mutex_destroy(&vio->vio_mutex); 224 225 kmem_free(vio, sizeof (*vio)); 226 } 227 228 /* 229 * Early device initialisation for legacy (pre-1.0 specification) virtio 230 * devices. 231 */ 232 virtio_t * 233 virtio_init(dev_info_t *dip, uint64_t driver_features, boolean_t allow_indirect) 234 { 235 int r; 236 237 /* 238 * First, confirm that this is a legacy device. 239 */ 240 ddi_acc_handle_t pci; 241 if (pci_config_setup(dip, &pci) != DDI_SUCCESS) { 242 dev_err(dip, CE_WARN, "pci_config_setup failed"); 243 return (NULL); 244 } 245 246 uint8_t revid; 247 if ((revid = pci_config_get8(pci, PCI_CONF_REVID)) == PCI_EINVAL8) { 248 dev_err(dip, CE_WARN, "could not read config space"); 249 pci_config_teardown(&pci); 250 return (NULL); 251 } 252 253 pci_config_teardown(&pci); 254 255 /* 256 * The legacy specification requires that the device advertise as PCI 257 * Revision 0. 258 */ 259 if (revid != 0) { 260 dev_err(dip, CE_WARN, "PCI Revision %u incorrect for " 261 "legacy virtio device", (uint_t)revid); 262 return (NULL); 263 } 264 265 virtio_t *vio = kmem_zalloc(sizeof (*vio), KM_SLEEP); 266 vio->vio_dip = dip; 267 268 /* 269 * Map PCI BAR0 for legacy device access. 270 */ 271 if ((r = ddi_regs_map_setup(dip, VIRTIO_LEGACY_PCI_BAR0, 272 (caddr_t *)&vio->vio_bar, 0, 0, &virtio_acc_attr, 273 &vio->vio_barh)) != DDI_SUCCESS) { 274 dev_err(dip, CE_WARN, "ddi_regs_map_setup failure (%d)", r); 275 kmem_free(vio, sizeof (*vio)); 276 return (NULL); 277 } 278 vio->vio_initlevel |= VIRTIO_INITLEVEL_REGS; 279 280 /* 281 * We initialise the mutex without an interrupt priority to ease the 282 * implementation of some of the configuration space access routines. 283 * Drivers using the virtio framework MUST make a call to 284 * "virtio_init_complete()" prior to spawning other threads or enabling 285 * interrupt handlers, at which time we will destroy and reinitialise 286 * the mutex for use in our interrupt handlers. 287 */ 288 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, NULL); 289 290 list_create(&vio->vio_queues, sizeof (virtio_queue_t), 291 offsetof(virtio_queue_t, viq_link)); 292 293 /* 294 * Legacy virtio devices require a few common steps before we can 295 * negotiate device features. 296 */ 297 virtio_device_reset(vio); 298 virtio_set_status(vio, VIRTIO_STATUS_ACKNOWLEDGE); 299 virtio_set_status(vio, VIRTIO_STATUS_DRIVER); 300 301 /* 302 * Negotiate features with the device. Record the original supported 303 * feature set for debugging purposes. 304 */ 305 vio->vio_features_device = virtio_get32(vio, 306 VIRTIO_LEGACY_FEATURES_DEVICE); 307 if (allow_indirect) { 308 driver_features |= VIRTIO_F_RING_INDIRECT_DESC; 309 } 310 vio->vio_features = vio->vio_features_device & driver_features; 311 virtio_put32(vio, VIRTIO_LEGACY_FEATURES_DRIVER, vio->vio_features); 312 313 /* 314 * The device-specific configuration begins at an offset into the BAR 315 * that depends on whether we have enabled MSI-X interrupts or not. 316 * Start out with the offset for pre-MSI-X operation so that we can 317 * read device configuration space prior to configuring interrupts. 318 */ 319 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET; 320 321 return (vio); 322 } 323 324 /* 325 * This function must be called by the driver once it has completed early setup 326 * calls. The value of "allowed_interrupt_types" is a mask of interrupt types 327 * (DDI_INTR_TYPE_MSIX, etc) that we'll try to use when installing handlers, or 328 * the special value 0 to allow the system to use any available type. 329 */ 330 int 331 virtio_init_complete(virtio_t *vio, int allowed_interrupt_types) 332 { 333 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER)); 334 vio->vio_initlevel |= VIRTIO_INITLEVEL_PROVIDER; 335 336 if (!list_is_empty(&vio->vio_queues)) { 337 /* 338 * Set up interrupts for the queues that have been registered. 339 */ 340 if (virtio_interrupts_setup(vio, allowed_interrupt_types) != 341 DDI_SUCCESS) { 342 return (DDI_FAILURE); 343 } 344 } 345 346 /* 347 * We can allocate the mutex once we know the priority. 348 */ 349 mutex_destroy(&vio->vio_mutex); 350 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio)); 351 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 352 viq = list_next(&vio->vio_queues, viq)) { 353 mutex_destroy(&viq->viq_mutex); 354 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, 355 virtio_intr_pri(vio)); 356 } 357 358 virtio_set_status(vio, VIRTIO_STATUS_DRIVER_OK); 359 360 return (DDI_SUCCESS); 361 } 362 363 boolean_t 364 virtio_feature_present(virtio_t *vio, uint64_t feature_mask) 365 { 366 return ((vio->vio_features & feature_mask) != 0); 367 } 368 369 void * 370 virtio_intr_pri(virtio_t *vio) 371 { 372 VERIFY(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED); 373 374 return (DDI_INTR_PRI(vio->vio_interrupt_priority)); 375 } 376 377 /* 378 * Enable a bit in the device status register. Each bit signals a level of 379 * guest readiness to the host. Use the VIRTIO_CONFIG_DEVICE_STATUS_* 380 * constants for "status". To zero the status field use virtio_device_reset(). 381 */ 382 static void 383 virtio_set_status(virtio_t *vio, uint8_t status) 384 { 385 VERIFY3U(status, !=, 0); 386 387 mutex_enter(&vio->vio_mutex); 388 389 uint8_t old = virtio_get8(vio, VIRTIO_LEGACY_DEVICE_STATUS); 390 virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, status | old); 391 392 mutex_exit(&vio->vio_mutex); 393 } 394 395 static void 396 virtio_device_reset_locked(virtio_t *vio) 397 { 398 virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, VIRTIO_STATUS_RESET); 399 } 400 401 void 402 virtio_device_reset(virtio_t *vio) 403 { 404 mutex_enter(&vio->vio_mutex); 405 virtio_device_reset_locked(vio); 406 mutex_exit(&vio->vio_mutex); 407 } 408 409 /* 410 * Some queues are effectively long-polled; the driver submits a series of 411 * buffers and the device only returns them when there is data available. 412 * During detach, we need to coordinate the return of these buffers. Calling 413 * "virtio_shutdown()" will reset the device, then allow the removal of all 414 * buffers that were in flight at the time of shutdown via 415 * "virtio_queue_evacuate()". 416 */ 417 void 418 virtio_shutdown(virtio_t *vio) 419 { 420 mutex_enter(&vio->vio_mutex); 421 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 422 /* 423 * Shutdown has been performed already. 424 */ 425 mutex_exit(&vio->vio_mutex); 426 return; 427 } 428 429 /* 430 * First, mark all of the queues as shutdown. This will prevent any 431 * further activity. 432 */ 433 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 434 viq = list_next(&vio->vio_queues, viq)) { 435 mutex_enter(&viq->viq_mutex); 436 viq->viq_shutdown = B_TRUE; 437 mutex_exit(&viq->viq_mutex); 438 } 439 440 /* 441 * Now, reset the device. This removes any queue configuration on the 442 * device side. 443 */ 444 virtio_device_reset_locked(vio); 445 vio->vio_initlevel |= VIRTIO_INITLEVEL_SHUTDOWN; 446 mutex_exit(&vio->vio_mutex); 447 } 448 449 /* 450 * Common implementation of quiesce(9E) for simple Virtio-based devices. 451 */ 452 int 453 virtio_quiesce(virtio_t *vio) 454 { 455 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 456 /* 457 * Device has already been reset. 458 */ 459 return (DDI_SUCCESS); 460 } 461 462 /* 463 * When we reset the device, it should immediately stop using any DMA 464 * memory we've previously passed to it. All queue configuration is 465 * discarded. This is good enough for quiesce(9E). 466 */ 467 virtio_device_reset_locked(vio); 468 469 return (DDI_SUCCESS); 470 } 471 472 /* 473 * DEVICE-SPECIFIC REGISTER ACCESS 474 * 475 * Note that these functions take the mutex to avoid racing with interrupt 476 * enable/disable, when the device-specific offset can potentially change. 477 */ 478 479 uint8_t 480 virtio_dev_get8(virtio_t *vio, uintptr_t offset) 481 { 482 mutex_enter(&vio->vio_mutex); 483 uint8_t r = virtio_get8(vio, vio->vio_config_offset + offset); 484 mutex_exit(&vio->vio_mutex); 485 486 return (r); 487 } 488 489 uint16_t 490 virtio_dev_get16(virtio_t *vio, uintptr_t offset) 491 { 492 mutex_enter(&vio->vio_mutex); 493 uint16_t r = virtio_get16(vio, vio->vio_config_offset + offset); 494 mutex_exit(&vio->vio_mutex); 495 496 return (r); 497 } 498 499 uint32_t 500 virtio_dev_get32(virtio_t *vio, uintptr_t offset) 501 { 502 mutex_enter(&vio->vio_mutex); 503 uint32_t r = virtio_get32(vio, vio->vio_config_offset + offset); 504 mutex_exit(&vio->vio_mutex); 505 506 return (r); 507 } 508 509 uint64_t 510 virtio_dev_get64(virtio_t *vio, uintptr_t offset) 511 { 512 mutex_enter(&vio->vio_mutex); 513 /* 514 * On at least some systems, a 64-bit read or write to this BAR is not 515 * possible. For legacy devices, there is no generation number to use 516 * to determine if configuration may have changed half-way through a 517 * read. We need to continue to read both halves of the value until we 518 * read the same value at least twice. 519 */ 520 uintptr_t o_lo = vio->vio_config_offset + offset; 521 uintptr_t o_hi = o_lo + 4; 522 523 uint64_t val = virtio_get32(vio, o_lo) | 524 ((uint64_t)virtio_get32(vio, o_hi) << 32); 525 526 for (;;) { 527 uint64_t tval = virtio_get32(vio, o_lo) | 528 ((uint64_t)virtio_get32(vio, o_hi) << 32); 529 530 if (tval == val) { 531 break; 532 } 533 534 val = tval; 535 } 536 537 mutex_exit(&vio->vio_mutex); 538 return (val); 539 } 540 541 void 542 virtio_dev_put8(virtio_t *vio, uintptr_t offset, uint8_t value) 543 { 544 mutex_enter(&vio->vio_mutex); 545 virtio_put8(vio, vio->vio_config_offset + offset, value); 546 mutex_exit(&vio->vio_mutex); 547 } 548 549 void 550 virtio_dev_put16(virtio_t *vio, uintptr_t offset, uint16_t value) 551 { 552 mutex_enter(&vio->vio_mutex); 553 virtio_put16(vio, vio->vio_config_offset + offset, value); 554 mutex_exit(&vio->vio_mutex); 555 } 556 557 void 558 virtio_dev_put32(virtio_t *vio, uintptr_t offset, uint32_t value) 559 { 560 mutex_enter(&vio->vio_mutex); 561 virtio_put32(vio, vio->vio_config_offset + offset, value); 562 mutex_exit(&vio->vio_mutex); 563 } 564 565 /* 566 * VIRTQUEUE MANAGEMENT 567 */ 568 569 static int 570 virtio_inflight_compar(const void *lp, const void *rp) 571 { 572 const virtio_chain_t *l = lp; 573 const virtio_chain_t *r = rp; 574 575 if (l->vic_head < r->vic_head) { 576 return (-1); 577 } else if (l->vic_head > r->vic_head) { 578 return (1); 579 } else { 580 return (0); 581 } 582 } 583 584 virtio_queue_t * 585 virtio_queue_alloc(virtio_t *vio, uint16_t qidx, const char *name, 586 ddi_intr_handler_t *func, void *funcarg, boolean_t force_direct, 587 uint_t max_segs) 588 { 589 uint16_t qsz; 590 char space_name[256]; 591 592 if (max_segs < 1) { 593 /* 594 * Every descriptor, direct or indirect, needs to refer to at 595 * least one buffer. 596 */ 597 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 598 "segment count must be at least 1", name, (uint_t)qidx); 599 return (NULL); 600 } 601 602 mutex_enter(&vio->vio_mutex); 603 604 if (vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER) { 605 /* 606 * Cannot configure any more queues once initial setup is 607 * complete and interrupts have been allocated. 608 */ 609 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 610 "alloc after init complete", name, (uint_t)qidx); 611 mutex_exit(&vio->vio_mutex); 612 return (NULL); 613 } 614 615 /* 616 * There is no way to negotiate a different queue size for legacy 617 * devices. We must read and use the native queue size of the device. 618 */ 619 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx); 620 if ((qsz = virtio_get16(vio, VIRTIO_LEGACY_QUEUE_SIZE)) == 0) { 621 /* 622 * A size of zero means the device does not have a queue with 623 * this index. 624 */ 625 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 626 "does not exist on device", name, (uint_t)qidx); 627 mutex_exit(&vio->vio_mutex); 628 return (NULL); 629 } 630 631 mutex_exit(&vio->vio_mutex); 632 633 virtio_queue_t *viq = kmem_zalloc(sizeof (*viq), KM_SLEEP); 634 viq->viq_virtio = vio; 635 viq->viq_name = name; 636 viq->viq_index = qidx; 637 viq->viq_size = qsz; 638 viq->viq_func = func; 639 viq->viq_funcarg = funcarg; 640 viq->viq_max_segs = max_segs; 641 avl_create(&viq->viq_inflight, virtio_inflight_compar, 642 sizeof (virtio_chain_t), offsetof(virtio_chain_t, vic_node)); 643 644 /* 645 * Allocate the mutex without an interrupt priority for now, as we do 646 * with "vio_mutex". We'll reinitialise it in 647 * "virtio_init_complete()". 648 */ 649 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, NULL); 650 651 if (virtio_feature_present(vio, VIRTIO_F_RING_INDIRECT_DESC) && 652 !force_direct) { 653 /* 654 * If we were able to negotiate the indirect descriptor 655 * feature, and the caller has not explicitly forced the use of 656 * direct descriptors, we'll allocate indirect descriptor lists 657 * for each chain. 658 */ 659 viq->viq_indirect = B_TRUE; 660 } 661 662 /* 663 * Track descriptor usage in an identifier space. 664 */ 665 (void) snprintf(space_name, sizeof (space_name), "%s%d_vq_%s", 666 ddi_get_name(vio->vio_dip), ddi_get_instance(vio->vio_dip), name); 667 if ((viq->viq_descmap = id_space_create(space_name, 0, qsz)) == NULL) { 668 dev_err(vio->vio_dip, CE_WARN, "could not allocate descriptor " 669 "ID space"); 670 virtio_queue_free(viq); 671 return (NULL); 672 } 673 674 /* 675 * For legacy devices, memory for the queue has a strict layout 676 * determined by the queue size. 677 */ 678 size_t sz_descs = sizeof (virtio_vq_desc_t) * qsz; 679 size_t sz_driver = P2ROUNDUP_TYPED(sz_descs + 680 sizeof (virtio_vq_driver_t) + 681 sizeof (uint16_t) * qsz, 682 VIRTIO_PAGE_SIZE, size_t); 683 size_t sz_device = P2ROUNDUP_TYPED(sizeof (virtio_vq_device_t) + 684 sizeof (virtio_vq_elem_t) * qsz, 685 VIRTIO_PAGE_SIZE, size_t); 686 687 if (virtio_dma_init(vio, &viq->viq_dma, sz_driver + sz_device, 688 &virtio_dma_attr_queue, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 689 KM_SLEEP) != DDI_SUCCESS) { 690 dev_err(vio->vio_dip, CE_WARN, "could not allocate queue " 691 "DMA memory"); 692 virtio_queue_free(viq); 693 return (NULL); 694 } 695 696 /* 697 * NOTE: The viq_dma_* members below are used by 698 * VIRTQ_DMA_SYNC_FORDEV() and VIRTQ_DMA_SYNC_FORKERNEL() to calculate 699 * offsets into the DMA allocation for partial synchronisation. If the 700 * ordering of, or relationship between, these pointers changes, the 701 * macros must be kept in sync. 702 */ 703 viq->viq_dma_descs = virtio_dma_va(&viq->viq_dma, 0); 704 viq->viq_dma_driver = virtio_dma_va(&viq->viq_dma, sz_descs); 705 viq->viq_dma_device = virtio_dma_va(&viq->viq_dma, sz_driver); 706 707 /* 708 * Install in the per-device list of queues. 709 */ 710 mutex_enter(&vio->vio_mutex); 711 for (virtio_queue_t *chkvq = list_head(&vio->vio_queues); chkvq != NULL; 712 chkvq = list_next(&vio->vio_queues, chkvq)) { 713 if (chkvq->viq_index == qidx) { 714 dev_err(vio->vio_dip, CE_WARN, "attempt to register " 715 "queue \"%s\" with same index (%d) as queue \"%s\"", 716 name, qidx, chkvq->viq_name); 717 mutex_exit(&vio->vio_mutex); 718 virtio_queue_free(viq); 719 return (NULL); 720 } 721 } 722 list_insert_tail(&vio->vio_queues, viq); 723 724 /* 725 * Ensure the zeroing of the queue memory is visible to the host before 726 * we inform the device of the queue address. 727 */ 728 membar_producer(); 729 VIRTQ_DMA_SYNC_FORDEV(viq); 730 731 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx); 732 virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS, 733 virtio_dma_cookie_pa(&viq->viq_dma, 0) >> VIRTIO_PAGE_SHIFT); 734 735 mutex_exit(&vio->vio_mutex); 736 return (viq); 737 } 738 739 static void 740 virtio_queue_free(virtio_queue_t *viq) 741 { 742 virtio_t *vio = viq->viq_virtio; 743 744 /* 745 * We are going to destroy the queue mutex. Make sure we've already 746 * removed the interrupt handlers. 747 */ 748 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED)); 749 750 mutex_enter(&viq->viq_mutex); 751 752 /* 753 * If the device has not already been reset as part of a shutdown, 754 * detach the queue from the device now. 755 */ 756 if (!viq->viq_shutdown) { 757 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, viq->viq_index); 758 virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS, 0); 759 } 760 761 virtio_dma_fini(&viq->viq_dma); 762 763 VERIFY(avl_is_empty(&viq->viq_inflight)); 764 avl_destroy(&viq->viq_inflight); 765 if (viq->viq_descmap != NULL) { 766 id_space_destroy(viq->viq_descmap); 767 } 768 769 mutex_exit(&viq->viq_mutex); 770 mutex_destroy(&viq->viq_mutex); 771 772 kmem_free(viq, sizeof (*viq)); 773 } 774 775 void 776 virtio_queue_no_interrupt(virtio_queue_t *viq, boolean_t stop_interrupts) 777 { 778 mutex_enter(&viq->viq_mutex); 779 780 if (stop_interrupts) { 781 viq->viq_dma_driver->vqdr_flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; 782 } else { 783 viq->viq_dma_driver->vqdr_flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; 784 } 785 VIRTQ_DMA_SYNC_FORDEV(viq); 786 787 mutex_exit(&viq->viq_mutex); 788 } 789 790 static virtio_chain_t * 791 virtio_queue_complete(virtio_queue_t *viq, uint_t index) 792 { 793 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 794 795 virtio_chain_t *vic; 796 797 virtio_chain_t search; 798 bzero(&search, sizeof (search)); 799 search.vic_head = index; 800 801 if ((vic = avl_find(&viq->viq_inflight, &search, NULL)) == NULL) { 802 return (NULL); 803 } 804 avl_remove(&viq->viq_inflight, vic); 805 806 return (vic); 807 } 808 809 uint_t 810 virtio_queue_size(virtio_queue_t *viq) 811 { 812 return (viq->viq_size); 813 } 814 815 uint_t 816 virtio_queue_nactive(virtio_queue_t *viq) 817 { 818 mutex_enter(&viq->viq_mutex); 819 uint_t r = avl_numnodes(&viq->viq_inflight); 820 mutex_exit(&viq->viq_mutex); 821 822 return (r); 823 } 824 825 virtio_chain_t * 826 virtio_queue_poll(virtio_queue_t *viq) 827 { 828 mutex_enter(&viq->viq_mutex); 829 if (viq->viq_shutdown) { 830 /* 831 * The device has been reset by virtio_shutdown(), and queue 832 * processing has been halted. Any previously submitted chains 833 * will be evacuated using virtio_queue_evacuate(). 834 */ 835 mutex_exit(&viq->viq_mutex); 836 return (NULL); 837 } 838 839 VIRTQ_DMA_SYNC_FORKERNEL(viq); 840 if (viq->viq_device_index == viq->viq_dma_device->vqde_index) { 841 /* 842 * If the device index has not changed since the last poll, 843 * there are no new chains to process. 844 */ 845 mutex_exit(&viq->viq_mutex); 846 return (NULL); 847 } 848 849 /* 850 * We need to ensure that all reads from the descriptor (vqde_ring[]) 851 * and any referenced memory by the descriptor occur after we have read 852 * the descriptor index value above (vqde_index). 853 */ 854 membar_consumer(); 855 856 uint16_t index = (viq->viq_device_index++) % viq->viq_size; 857 uint16_t start = viq->viq_dma_device->vqde_ring[index].vqe_start; 858 uint32_t len = viq->viq_dma_device->vqde_ring[index].vqe_len; 859 860 virtio_chain_t *vic; 861 if ((vic = virtio_queue_complete(viq, start)) == NULL) { 862 /* 863 * We could not locate a chain for this descriptor index, which 864 * suggests that something has gone horribly wrong. 865 */ 866 dev_err(viq->viq_virtio->vio_dip, CE_PANIC, 867 "queue \"%s\" ring entry %u (descriptor %u) has no chain", 868 viq->viq_name, (uint16_t)index, (uint16_t)start); 869 } 870 871 vic->vic_received_length = len; 872 873 mutex_exit(&viq->viq_mutex); 874 875 return (vic); 876 } 877 878 /* 879 * After a call to "virtio_shutdown()", the driver must retrieve any previously 880 * submitted chains and free any associated resources. 881 */ 882 virtio_chain_t * 883 virtio_queue_evacuate(virtio_queue_t *viq) 884 { 885 virtio_t *vio = viq->viq_virtio; 886 887 mutex_enter(&vio->vio_mutex); 888 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN)) { 889 dev_err(vio->vio_dip, CE_PANIC, 890 "virtio_queue_evacuate() without virtio_shutdown()"); 891 } 892 mutex_exit(&vio->vio_mutex); 893 894 mutex_enter(&viq->viq_mutex); 895 VERIFY(viq->viq_shutdown); 896 897 virtio_chain_t *vic = avl_first(&viq->viq_inflight); 898 if (vic != NULL) { 899 avl_remove(&viq->viq_inflight, vic); 900 } 901 902 mutex_exit(&viq->viq_mutex); 903 904 return (vic); 905 } 906 907 /* 908 * VIRTQUEUE DESCRIPTOR CHAIN MANAGEMENT 909 */ 910 911 /* 912 * When the device returns a descriptor chain to the driver, it may provide the 913 * length in bytes of data written into the chain. Client drivers should use 914 * this value with care; the specification suggests some device implementations 915 * have not always provided a useful or correct value. 916 */ 917 size_t 918 virtio_chain_received_length(virtio_chain_t *vic) 919 { 920 return (vic->vic_received_length); 921 } 922 923 /* 924 * Allocate a descriptor chain for use with this queue. The "kmflags" value 925 * may be KM_SLEEP or KM_NOSLEEP as per kmem_alloc(9F). 926 */ 927 virtio_chain_t * 928 virtio_chain_alloc(virtio_queue_t *viq, int kmflags) 929 { 930 virtio_t *vio = viq->viq_virtio; 931 virtio_chain_t *vic; 932 uint_t cap; 933 934 /* 935 * Direct descriptors are known by their index in the descriptor table 936 * for the queue. We use the variable-length array member at the end 937 * of the chain tracking object to hold the list of direct descriptors 938 * assigned to this chain. 939 */ 940 if (viq->viq_indirect) { 941 /* 942 * When using indirect descriptors we still need one direct 943 * descriptor entry to hold the physical address and length of 944 * the indirect descriptor table. 945 */ 946 cap = 1; 947 } else { 948 /* 949 * For direct descriptors we need to be able to track a 950 * descriptor for each possible segment in a single chain. 951 */ 952 cap = viq->viq_max_segs; 953 } 954 955 size_t vicsz = sizeof (*vic) + sizeof (uint16_t) * cap; 956 if ((vic = kmem_zalloc(vicsz, kmflags)) == NULL) { 957 return (NULL); 958 } 959 vic->vic_vq = viq; 960 vic->vic_direct_capacity = cap; 961 962 if (viq->viq_indirect) { 963 /* 964 * Allocate an indirect descriptor list with the appropriate 965 * number of entries. 966 */ 967 if (virtio_dma_init(vio, &vic->vic_indirect_dma, 968 sizeof (virtio_vq_desc_t) * viq->viq_max_segs, 969 &virtio_dma_attr_indirect, 970 DDI_DMA_CONSISTENT | DDI_DMA_WRITE, 971 kmflags) != DDI_SUCCESS) { 972 goto fail; 973 } 974 975 /* 976 * Allocate a single descriptor to hold the indirect list. 977 * Leave the length as zero for now; it will be set to include 978 * any occupied entries at push time. 979 */ 980 mutex_enter(&viq->viq_mutex); 981 if (virtio_chain_append_impl(vic, 982 virtio_dma_cookie_pa(&vic->vic_indirect_dma, 0), 0, 983 VIRTQ_DESC_F_INDIRECT) != DDI_SUCCESS) { 984 mutex_exit(&viq->viq_mutex); 985 goto fail; 986 } 987 mutex_exit(&viq->viq_mutex); 988 VERIFY3U(vic->vic_direct_used, ==, 1); 989 990 /* 991 * Don't set the indirect capacity until after we've installed 992 * the direct descriptor which points at the indirect list, or 993 * virtio_chain_append_impl() will be confused. 994 */ 995 vic->vic_indirect_capacity = viq->viq_max_segs; 996 } 997 998 return (vic); 999 1000 fail: 1001 virtio_dma_fini(&vic->vic_indirect_dma); 1002 kmem_free(vic, vicsz); 1003 return (NULL); 1004 } 1005 1006 void * 1007 virtio_chain_data(virtio_chain_t *vic) 1008 { 1009 return (vic->vic_data); 1010 } 1011 1012 void 1013 virtio_chain_data_set(virtio_chain_t *vic, void *data) 1014 { 1015 vic->vic_data = data; 1016 } 1017 1018 void 1019 virtio_chain_clear(virtio_chain_t *vic) 1020 { 1021 if (vic->vic_indirect_capacity != 0) { 1022 /* 1023 * There should only be one direct descriptor, which points at 1024 * our indirect descriptor list. We don't want to clear it 1025 * here. 1026 */ 1027 VERIFY3U(vic->vic_direct_capacity, ==, 1); 1028 1029 if (vic->vic_indirect_used > 0) { 1030 /* 1031 * Clear out the indirect descriptor table. 1032 */ 1033 vic->vic_indirect_used = 0; 1034 bzero(virtio_dma_va(&vic->vic_indirect_dma, 0), 1035 virtio_dma_size(&vic->vic_indirect_dma)); 1036 } 1037 1038 } else if (vic->vic_direct_capacity > 0) { 1039 /* 1040 * Release any descriptors that were assigned to us previously. 1041 */ 1042 for (uint_t i = 0; i < vic->vic_direct_used; i++) { 1043 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[i]); 1044 vic->vic_direct[i] = 0; 1045 } 1046 vic->vic_direct_used = 0; 1047 } 1048 } 1049 1050 void 1051 virtio_chain_free(virtio_chain_t *vic) 1052 { 1053 /* 1054 * First ensure that we have released any descriptors used by this 1055 * chain. 1056 */ 1057 virtio_chain_clear(vic); 1058 1059 if (vic->vic_indirect_capacity > 0) { 1060 /* 1061 * Release the direct descriptor that points to our indirect 1062 * descriptor list. 1063 */ 1064 VERIFY3U(vic->vic_direct_capacity, ==, 1); 1065 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[0]); 1066 1067 virtio_dma_fini(&vic->vic_indirect_dma); 1068 } 1069 1070 size_t vicsz = sizeof (*vic) + 1071 vic->vic_direct_capacity * sizeof (uint16_t); 1072 1073 kmem_free(vic, vicsz); 1074 } 1075 1076 static inline int 1077 virtio_queue_descmap_alloc(virtio_queue_t *viq, uint_t *indexp) 1078 { 1079 id_t index; 1080 1081 if ((index = id_alloc_nosleep(viq->viq_descmap)) == -1) { 1082 return (ENOMEM); 1083 } 1084 1085 VERIFY3S(index, >=, 0); 1086 VERIFY3S(index, <=, viq->viq_size); 1087 1088 *indexp = (uint_t)index; 1089 return (0); 1090 } 1091 1092 static int 1093 virtio_chain_append_impl(virtio_chain_t *vic, uint64_t pa, size_t len, 1094 uint16_t flags) 1095 { 1096 virtio_queue_t *viq = vic->vic_vq; 1097 virtio_vq_desc_t *vqd; 1098 uint_t index; 1099 1100 /* 1101 * We're modifying the queue-wide descriptor list so make sure we have 1102 * the appropriate lock. 1103 */ 1104 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 1105 1106 if (vic->vic_indirect_capacity != 0) { 1107 /* 1108 * Use indirect descriptors. 1109 */ 1110 if (vic->vic_indirect_used >= vic->vic_indirect_capacity) { 1111 return (DDI_FAILURE); 1112 } 1113 1114 vqd = virtio_dma_va(&vic->vic_indirect_dma, 0); 1115 1116 if ((index = vic->vic_indirect_used++) > 0) { 1117 /* 1118 * Chain the current last indirect descriptor to the 1119 * new one. 1120 */ 1121 vqd[index - 1].vqd_flags |= VIRTQ_DESC_F_NEXT; 1122 vqd[index - 1].vqd_next = index; 1123 } 1124 1125 } else { 1126 /* 1127 * Use direct descriptors. 1128 */ 1129 if (vic->vic_direct_used >= vic->vic_direct_capacity) { 1130 return (DDI_FAILURE); 1131 } 1132 1133 if (virtio_queue_descmap_alloc(viq, &index) != 0) { 1134 return (DDI_FAILURE); 1135 } 1136 1137 vqd = virtio_dma_va(&viq->viq_dma, 0); 1138 1139 if (vic->vic_direct_used > 0) { 1140 /* 1141 * This is not the first entry. Chain the current 1142 * descriptor to the next one. 1143 */ 1144 uint16_t p = vic->vic_direct[vic->vic_direct_used - 1]; 1145 1146 vqd[p].vqd_flags |= VIRTQ_DESC_F_NEXT; 1147 vqd[p].vqd_next = index; 1148 } 1149 vic->vic_direct[vic->vic_direct_used++] = index; 1150 } 1151 1152 vqd[index].vqd_addr = pa; 1153 vqd[index].vqd_len = len; 1154 vqd[index].vqd_flags = flags; 1155 vqd[index].vqd_next = 0; 1156 1157 return (DDI_SUCCESS); 1158 } 1159 1160 int 1161 virtio_chain_append(virtio_chain_t *vic, uint64_t pa, size_t len, 1162 virtio_direction_t dir) 1163 { 1164 virtio_queue_t *viq = vic->vic_vq; 1165 uint16_t flags = 0; 1166 1167 switch (dir) { 1168 case VIRTIO_DIR_DEVICE_WRITES: 1169 flags |= VIRTQ_DESC_F_WRITE; 1170 break; 1171 1172 case VIRTIO_DIR_DEVICE_READS: 1173 break; 1174 1175 default: 1176 panic("unknown direction value %u", dir); 1177 } 1178 1179 mutex_enter(&viq->viq_mutex); 1180 int r = virtio_chain_append_impl(vic, pa, len, flags); 1181 mutex_exit(&viq->viq_mutex); 1182 1183 return (r); 1184 } 1185 1186 static void 1187 virtio_queue_flush_locked(virtio_queue_t *viq) 1188 { 1189 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 1190 1191 /* 1192 * Make sure any writes we have just made to the descriptors 1193 * (vqdr_ring[]) are visible to the device before we update the ring 1194 * pointer (vqdr_index). 1195 */ 1196 membar_producer(); 1197 viq->viq_dma_driver->vqdr_index = viq->viq_driver_index; 1198 VIRTQ_DMA_SYNC_FORDEV(viq); 1199 1200 /* 1201 * Determine whether the device expects us to notify it of new 1202 * descriptors. 1203 */ 1204 VIRTQ_DMA_SYNC_FORKERNEL(viq); 1205 if (!(viq->viq_dma_device->vqde_flags & VIRTQ_USED_F_NO_NOTIFY)) { 1206 virtio_put16(viq->viq_virtio, VIRTIO_LEGACY_QUEUE_NOTIFY, 1207 viq->viq_index); 1208 } 1209 } 1210 1211 void 1212 virtio_queue_flush(virtio_queue_t *viq) 1213 { 1214 mutex_enter(&viq->viq_mutex); 1215 virtio_queue_flush_locked(viq); 1216 mutex_exit(&viq->viq_mutex); 1217 } 1218 1219 void 1220 virtio_chain_submit(virtio_chain_t *vic, boolean_t flush) 1221 { 1222 virtio_queue_t *viq = vic->vic_vq; 1223 1224 mutex_enter(&viq->viq_mutex); 1225 1226 if (vic->vic_indirect_capacity != 0) { 1227 virtio_vq_desc_t *vqd = virtio_dma_va(&viq->viq_dma, 0); 1228 1229 VERIFY3U(vic->vic_direct_used, ==, 1); 1230 1231 /* 1232 * This is an indirect descriptor queue. The length in bytes 1233 * of the descriptor must extend to cover the populated 1234 * indirect descriptor entries. 1235 */ 1236 vqd[vic->vic_direct[0]].vqd_len = 1237 sizeof (virtio_vq_desc_t) * vic->vic_indirect_used; 1238 1239 virtio_dma_sync(&vic->vic_indirect_dma, DDI_DMA_SYNC_FORDEV); 1240 } 1241 1242 /* 1243 * Populate the next available slot in the driver-owned ring for this 1244 * chain. The updated value of viq_driver_index is not yet visible to 1245 * the device until a subsequent queue flush. 1246 */ 1247 uint16_t index = (viq->viq_driver_index++) % viq->viq_size; 1248 viq->viq_dma_driver->vqdr_ring[index] = vic->vic_direct[0]; 1249 1250 vic->vic_head = vic->vic_direct[0]; 1251 avl_add(&viq->viq_inflight, vic); 1252 1253 if (flush) { 1254 virtio_queue_flush_locked(vic->vic_vq); 1255 } 1256 1257 mutex_exit(&viq->viq_mutex); 1258 } 1259 1260 /* 1261 * INTERRUPTS MANAGEMENT 1262 */ 1263 1264 static const char * 1265 virtio_interrupt_type_name(int type) 1266 { 1267 switch (type) { 1268 case DDI_INTR_TYPE_MSIX: 1269 return ("MSI-X"); 1270 case DDI_INTR_TYPE_MSI: 1271 return ("MSI"); 1272 case DDI_INTR_TYPE_FIXED: 1273 return ("fixed"); 1274 default: 1275 return ("?"); 1276 } 1277 } 1278 1279 static int 1280 virtio_interrupts_alloc(virtio_t *vio, int type, int nrequired) 1281 { 1282 dev_info_t *dip = vio->vio_dip; 1283 int nintrs = 0; 1284 int navail = 0; 1285 1286 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1287 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC)); 1288 1289 if (ddi_intr_get_nintrs(dip, type, &nintrs) != DDI_SUCCESS) { 1290 dev_err(dip, CE_WARN, "could not count %s interrupts", 1291 virtio_interrupt_type_name(type)); 1292 return (DDI_FAILURE); 1293 } 1294 if (nintrs < 1) { 1295 dev_err(dip, CE_WARN, "no %s interrupts supported", 1296 virtio_interrupt_type_name(type)); 1297 return (DDI_FAILURE); 1298 } 1299 1300 if (ddi_intr_get_navail(dip, type, &navail) != DDI_SUCCESS) { 1301 dev_err(dip, CE_WARN, "could not count available %s interrupts", 1302 virtio_interrupt_type_name(type)); 1303 return (DDI_FAILURE); 1304 } 1305 if (navail < nrequired) { 1306 dev_err(dip, CE_WARN, "need %d %s interrupts, but only %d " 1307 "available", nrequired, virtio_interrupt_type_name(type), 1308 navail); 1309 return (DDI_FAILURE); 1310 } 1311 1312 VERIFY3P(vio->vio_interrupts, ==, NULL); 1313 vio->vio_interrupts = kmem_zalloc( 1314 sizeof (ddi_intr_handle_t) * nrequired, KM_SLEEP); 1315 1316 int r; 1317 if ((r = ddi_intr_alloc(dip, vio->vio_interrupts, type, 0, nrequired, 1318 &vio->vio_ninterrupts, DDI_INTR_ALLOC_STRICT)) != DDI_SUCCESS) { 1319 dev_err(dip, CE_WARN, "%s interrupt allocation failure (%d)", 1320 virtio_interrupt_type_name(type), r); 1321 kmem_free(vio->vio_interrupts, 1322 sizeof (ddi_intr_handle_t) * nrequired); 1323 vio->vio_interrupts = NULL; 1324 return (DDI_FAILURE); 1325 } 1326 1327 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ALLOC; 1328 vio->vio_interrupt_type = type; 1329 return (DDI_SUCCESS); 1330 } 1331 1332 static uint_t 1333 virtio_shared_isr(caddr_t arg0, caddr_t arg1) 1334 { 1335 virtio_t *vio = (virtio_t *)arg0; 1336 uint_t r = DDI_INTR_UNCLAIMED; 1337 uint8_t isr; 1338 1339 mutex_enter(&vio->vio_mutex); 1340 1341 /* 1342 * Check the ISR status to see if the interrupt applies to us. Reading 1343 * this field resets it to zero. 1344 */ 1345 isr = virtio_get8(vio, VIRTIO_LEGACY_ISR_STATUS); 1346 if ((isr & VIRTIO_ISR_CHECK_QUEUES) == 0) { 1347 goto done; 1348 } 1349 1350 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1351 viq = list_next(&vio->vio_queues, viq)) { 1352 if (viq->viq_func != NULL) { 1353 mutex_exit(&vio->vio_mutex); 1354 if (viq->viq_func(viq->viq_funcarg, arg0) == 1355 DDI_INTR_CLAIMED) { 1356 r = DDI_INTR_CLAIMED; 1357 } 1358 mutex_enter(&vio->vio_mutex); 1359 1360 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 1361 /* 1362 * The device was shut down while in a queue 1363 * handler routine. 1364 */ 1365 goto done; 1366 } 1367 } 1368 } 1369 1370 done: 1371 mutex_exit(&vio->vio_mutex); 1372 return (r); 1373 } 1374 1375 static int 1376 virtio_interrupts_setup(virtio_t *vio, int allow_types) 1377 { 1378 dev_info_t *dip = vio->vio_dip; 1379 int types; 1380 int count = 0; 1381 1382 mutex_enter(&vio->vio_mutex); 1383 1384 /* 1385 * Determine the number of interrupts we'd like based on the number of 1386 * virtqueues. 1387 */ 1388 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1389 viq = list_next(&vio->vio_queues, viq)) { 1390 if (viq->viq_func != NULL) { 1391 count++; 1392 } 1393 } 1394 1395 if (ddi_intr_get_supported_types(dip, &types) != DDI_SUCCESS) { 1396 dev_err(dip, CE_WARN, "could not get supported interrupts"); 1397 mutex_exit(&vio->vio_mutex); 1398 return (DDI_FAILURE); 1399 } 1400 1401 if (allow_types != 0) { 1402 /* 1403 * Restrict the possible interrupt types at the request of the 1404 * driver. 1405 */ 1406 types &= allow_types; 1407 } 1408 1409 /* 1410 * Try each potential interrupt type in descending order of preference. 1411 * Note that the specification does not appear to allow for the use of 1412 * classical MSI, so we are limited to either MSI-X or fixed 1413 * interrupts. 1414 */ 1415 if (types & DDI_INTR_TYPE_MSIX) { 1416 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_MSIX, 1417 count) == DDI_SUCCESS) { 1418 goto add_handlers; 1419 } 1420 } 1421 if (types & DDI_INTR_TYPE_FIXED) { 1422 /* 1423 * If fixed interrupts are all that are available, we'll just 1424 * ask for one. 1425 */ 1426 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_FIXED, 1) == 1427 DDI_SUCCESS) { 1428 goto add_handlers; 1429 } 1430 } 1431 1432 dev_err(dip, CE_WARN, "interrupt allocation failed"); 1433 mutex_exit(&vio->vio_mutex); 1434 return (DDI_FAILURE); 1435 1436 add_handlers: 1437 /* 1438 * Ensure that we have not been given any high-level interrupts as our 1439 * interrupt handlers do not support them. 1440 */ 1441 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1442 uint_t ipri; 1443 1444 if (ddi_intr_get_pri(vio->vio_interrupts[i], &ipri) != 1445 DDI_SUCCESS) { 1446 dev_err(dip, CE_WARN, "could not determine interrupt " 1447 "priority"); 1448 goto fail; 1449 } 1450 1451 if (ipri >= ddi_intr_get_hilevel_pri()) { 1452 dev_err(dip, CE_WARN, "high level interrupts not " 1453 "supported"); 1454 goto fail; 1455 } 1456 1457 /* 1458 * Record the highest priority we've been allocated to use for 1459 * mutex initialisation. 1460 */ 1461 if (i == 0 || ipri > vio->vio_interrupt_priority) { 1462 vio->vio_interrupt_priority = ipri; 1463 } 1464 } 1465 1466 /* 1467 * Get the interrupt capabilities from the first handle to determine 1468 * whether we need to use ddi_intr_block_enable(9F). 1469 */ 1470 if (ddi_intr_get_cap(vio->vio_interrupts[0], 1471 &vio->vio_interrupt_cap) != DDI_SUCCESS) { 1472 dev_err(dip, CE_WARN, "failed to get interrupt capabilities"); 1473 goto fail; 1474 } 1475 1476 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) { 1477 VERIFY3S(vio->vio_ninterrupts, ==, 1); 1478 /* 1479 * For fixed interrupts, we need to use our shared handler to 1480 * multiplex the per-queue handlers provided by the driver. 1481 */ 1482 if (ddi_intr_add_handler(vio->vio_interrupts[0], 1483 virtio_shared_isr, (caddr_t)vio, NULL) != DDI_SUCCESS) { 1484 dev_err(dip, CE_WARN, "adding shared %s interrupt " 1485 "handler failed", virtio_interrupt_type_name( 1486 vio->vio_interrupt_type)); 1487 goto fail; 1488 } 1489 1490 goto done; 1491 } 1492 1493 VERIFY3S(vio->vio_ninterrupts, ==, count); 1494 1495 uint_t n = 0; 1496 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1497 viq = list_next(&vio->vio_queues, viq)) { 1498 if (viq->viq_func == NULL) { 1499 continue; 1500 } 1501 1502 if (ddi_intr_add_handler(vio->vio_interrupts[n], 1503 viq->viq_func, (caddr_t)viq->viq_funcarg, 1504 (caddr_t)vio) != DDI_SUCCESS) { 1505 dev_err(dip, CE_WARN, "adding interrupt %u (%s) failed", 1506 n, viq->viq_name); 1507 goto fail; 1508 } 1509 1510 viq->viq_handler_index = n; 1511 viq->viq_handler_added = B_TRUE; 1512 n++; 1513 } 1514 1515 done: 1516 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ADDED; 1517 mutex_exit(&vio->vio_mutex); 1518 return (DDI_SUCCESS); 1519 1520 fail: 1521 virtio_interrupts_teardown(vio); 1522 mutex_exit(&vio->vio_mutex); 1523 return (DDI_FAILURE); 1524 } 1525 1526 static void 1527 virtio_interrupts_teardown(virtio_t *vio) 1528 { 1529 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1530 1531 virtio_interrupts_disable_locked(vio); 1532 1533 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) { 1534 /* 1535 * Remove the multiplexing interrupt handler. 1536 */ 1537 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED) { 1538 int r; 1539 1540 VERIFY3S(vio->vio_ninterrupts, ==, 1); 1541 1542 if ((r = ddi_intr_remove_handler( 1543 vio->vio_interrupts[0])) != DDI_SUCCESS) { 1544 dev_err(vio->vio_dip, CE_WARN, "removing " 1545 "shared interrupt handler failed (%d)", r); 1546 } 1547 } 1548 } else { 1549 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1550 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1551 int r; 1552 1553 if (!viq->viq_handler_added) { 1554 continue; 1555 } 1556 1557 if ((r = ddi_intr_remove_handler( 1558 vio->vio_interrupts[viq->viq_handler_index])) != 1559 DDI_SUCCESS) { 1560 dev_err(vio->vio_dip, CE_WARN, "removing " 1561 "interrupt handler (%s) failed (%d)", 1562 viq->viq_name, r); 1563 } 1564 1565 viq->viq_handler_added = B_FALSE; 1566 } 1567 } 1568 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ADDED; 1569 1570 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC) { 1571 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1572 int r; 1573 1574 if ((r = ddi_intr_free(vio->vio_interrupts[i])) != 1575 DDI_SUCCESS) { 1576 dev_err(vio->vio_dip, CE_WARN, "freeing " 1577 "interrupt %u failed (%d)", i, r); 1578 } 1579 } 1580 kmem_free(vio->vio_interrupts, 1581 sizeof (ddi_intr_handle_t) * vio->vio_ninterrupts); 1582 vio->vio_interrupts = NULL; 1583 vio->vio_ninterrupts = 0; 1584 vio->vio_interrupt_type = 0; 1585 vio->vio_interrupt_cap = 0; 1586 vio->vio_interrupt_priority = 0; 1587 1588 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ALLOC; 1589 } 1590 } 1591 1592 static void 1593 virtio_interrupts_unwind(virtio_t *vio) 1594 { 1595 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1596 1597 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) { 1598 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1599 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1600 if (!viq->viq_handler_added) { 1601 continue; 1602 } 1603 1604 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, 1605 viq->viq_index); 1606 virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE, 1607 VIRTIO_LEGACY_MSI_NO_VECTOR); 1608 } 1609 } 1610 1611 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) { 1612 (void) ddi_intr_block_disable(vio->vio_interrupts, 1613 vio->vio_ninterrupts); 1614 } else { 1615 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1616 (void) ddi_intr_disable(vio->vio_interrupts[i]); 1617 } 1618 } 1619 1620 /* 1621 * Disabling the interrupts makes the MSI-X fields disappear from the 1622 * BAR once more. 1623 */ 1624 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET; 1625 } 1626 1627 int 1628 virtio_interrupts_enable(virtio_t *vio) 1629 { 1630 mutex_enter(&vio->vio_mutex); 1631 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED) { 1632 mutex_exit(&vio->vio_mutex); 1633 return (DDI_SUCCESS); 1634 } 1635 1636 int r = DDI_SUCCESS; 1637 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) { 1638 r = ddi_intr_block_enable(vio->vio_interrupts, 1639 vio->vio_ninterrupts); 1640 } else { 1641 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1642 if ((r = ddi_intr_enable(vio->vio_interrupts[i])) != 1643 DDI_SUCCESS) { 1644 /* 1645 * Disable the interrupts we have enabled so 1646 * far. 1647 */ 1648 for (i--; i >= 0; i--) { 1649 (void) ddi_intr_disable( 1650 vio->vio_interrupts[i]); 1651 } 1652 break; 1653 } 1654 } 1655 } 1656 1657 if (r != DDI_SUCCESS) { 1658 mutex_exit(&vio->vio_mutex); 1659 return (r); 1660 } 1661 1662 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) { 1663 /* 1664 * When asked to enable the interrupts, the system enables 1665 * MSI-X in the PCI configuration for the device. While 1666 * enabled, the extra MSI-X configuration table fields appear 1667 * between the general and the device-specific regions of the 1668 * BAR. 1669 */ 1670 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET_MSIX; 1671 1672 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1673 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1674 if (!viq->viq_handler_added) { 1675 continue; 1676 } 1677 1678 uint16_t qi = viq->viq_index; 1679 uint16_t msi = viq->viq_handler_index; 1680 1681 /* 1682 * Route interrupts for this queue to the assigned 1683 * MSI-X vector number. 1684 */ 1685 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qi); 1686 virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE, msi); 1687 1688 /* 1689 * The device may not actually accept the vector number 1690 * we're attempting to program. We need to confirm 1691 * that configuration was successful by re-reading the 1692 * configuration we just wrote. 1693 */ 1694 if (virtio_get16(vio, VIRTIO_LEGACY_MSIX_QUEUE) != 1695 msi) { 1696 dev_err(vio->vio_dip, CE_WARN, 1697 "failed to configure MSI-X vector %u for " 1698 "queue \"%s\" (#%u)", (uint_t)msi, 1699 viq->viq_name, (uint_t)qi); 1700 1701 virtio_interrupts_unwind(vio); 1702 mutex_exit(&vio->vio_mutex); 1703 return (DDI_FAILURE); 1704 } 1705 } 1706 } 1707 1708 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ENABLED; 1709 1710 mutex_exit(&vio->vio_mutex); 1711 return (DDI_SUCCESS); 1712 } 1713 1714 static void 1715 virtio_interrupts_disable_locked(virtio_t *vio) 1716 { 1717 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1718 1719 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED)) { 1720 return; 1721 } 1722 1723 virtio_interrupts_unwind(vio); 1724 1725 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ENABLED; 1726 } 1727 1728 void 1729 virtio_interrupts_disable(virtio_t *vio) 1730 { 1731 mutex_enter(&vio->vio_mutex); 1732 virtio_interrupts_disable_locked(vio); 1733 mutex_exit(&vio->vio_mutex); 1734 } 1735