1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * This file and its contents are supplied under the terms of the 31 * Common Development and Distribution License ("CDDL"), version 1.0. 32 * You may only use this file in accordance with the terms of version 33 * 1.0 of the CDDL. 34 * 35 * A full copy of the text of the CDDL should have accompanied this 36 * source. A copy of the CDDL is also available via the Internet at 37 * http://www.illumos.org/license/CDDL. 38 */ 39 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */ 40 41 /* 42 * Copyright 2019 Joyent, Inc. 43 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. 44 */ 45 46 #include <sys/cdefs.h> 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/kmem.h> 52 #include <sys/module.h> 53 #include <sys/bus.h> 54 #include <sys/pciio.h> 55 #include <sys/sysctl.h> 56 57 #include <dev/pci/pcivar.h> 58 #include <dev/pci/pcireg.h> 59 60 #include <machine/vmm.h> 61 #include <machine/vmm_dev.h> 62 63 #include <sys/conf.h> 64 #include <sys/ddi.h> 65 #include <sys/stat.h> 66 #include <sys/sunddi.h> 67 #include <sys/pci.h> 68 #include <sys/pci_cap.h> 69 #include <sys/pcie_impl.h> 70 #include <sys/ppt_dev.h> 71 #include <sys/mkdev.h> 72 #include <sys/sysmacros.h> 73 74 #include "vmm_lapic.h" 75 76 #include "iommu.h" 77 #include "ppt.h" 78 79 #define MAX_MSIMSGS 32 80 81 /* 82 * If the MSI-X table is located in the middle of a BAR then that MMIO 83 * region gets split into two segments - one segment above the MSI-X table 84 * and the other segment below the MSI-X table - with a hole in place of 85 * the MSI-X table so accesses to it can be trapped and emulated. 86 * 87 * So, allocate a MMIO segment for each BAR register + 1 additional segment. 88 */ 89 #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1) 90 91 struct pptintr_arg { 92 struct pptdev *pptdev; 93 uint64_t addr; 94 uint64_t msg_data; 95 }; 96 97 struct pptseg { 98 vm_paddr_t gpa; 99 size_t len; 100 int wired; 101 }; 102 103 struct pptbar { 104 uint64_t base; 105 uint64_t size; 106 uint_t type; 107 ddi_acc_handle_t io_handle; 108 caddr_t io_ptr; 109 uint_t ddireg; 110 }; 111 112 struct pptdev { 113 dev_info_t *pptd_dip; 114 list_node_t pptd_node; 115 ddi_acc_handle_t pptd_cfg; 116 struct pptbar pptd_bars[PCI_BASE_NUM]; 117 struct vm *vm; 118 struct pptseg mmio[MAX_MMIOSEGS]; 119 struct { 120 int num_msgs; /* guest state */ 121 boolean_t is_fixed; 122 size_t inth_sz; 123 ddi_intr_handle_t *inth; 124 struct pptintr_arg arg[MAX_MSIMSGS]; 125 } msi; 126 127 struct { 128 int num_msgs; 129 size_t inth_sz; 130 size_t arg_sz; 131 ddi_intr_handle_t *inth; 132 struct pptintr_arg *arg; 133 } msix; 134 }; 135 136 137 static major_t ppt_major; 138 static void *ppt_state; 139 static kmutex_t pptdev_mtx; 140 static list_t pptdev_list; 141 142 #define PPT_MINOR_NAME "ppt" 143 144 static ddi_device_acc_attr_t ppt_attr = { 145 DDI_DEVICE_ATTR_V0, 146 DDI_NEVERSWAP_ACC, 147 DDI_STORECACHING_OK_ACC, 148 DDI_DEFAULT_ACC 149 }; 150 151 static int 152 ppt_open(dev_t *devp, int flag, int otyp, cred_t *cr) 153 { 154 /* XXX: require extra privs? */ 155 return (0); 156 } 157 158 #define BAR_TO_IDX(bar) (((bar) - PCI_CONF_BASE0) / PCI_BAR_SZ_32) 159 #define BAR_VALID(b) ( \ 160 (b) >= PCI_CONF_BASE0 && \ 161 (b) <= PCI_CONF_BASE5 && \ 162 ((b) & (PCI_BAR_SZ_32-1)) == 0) 163 164 static int 165 ppt_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) 166 { 167 minor_t minor = getminor(dev); 168 struct pptdev *ppt; 169 void *data = (void *)arg; 170 171 if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL) { 172 return (ENOENT); 173 } 174 175 switch (cmd) { 176 case PPT_CFG_READ: { 177 struct ppt_cfg_io cio; 178 ddi_acc_handle_t cfg = ppt->pptd_cfg; 179 180 if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) { 181 return (EFAULT); 182 } 183 switch (cio.pci_width) { 184 case 4: 185 cio.pci_data = pci_config_get32(cfg, cio.pci_off); 186 break; 187 case 2: 188 cio.pci_data = pci_config_get16(cfg, cio.pci_off); 189 break; 190 case 1: 191 cio.pci_data = pci_config_get8(cfg, cio.pci_off); 192 break; 193 default: 194 return (EINVAL); 195 } 196 197 if (ddi_copyout(&cio, data, sizeof (cio), md) != 0) { 198 return (EFAULT); 199 } 200 return (0); 201 } 202 case PPT_CFG_WRITE: { 203 struct ppt_cfg_io cio; 204 ddi_acc_handle_t cfg = ppt->pptd_cfg; 205 206 if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) { 207 return (EFAULT); 208 } 209 switch (cio.pci_width) { 210 case 4: 211 pci_config_put32(cfg, cio.pci_off, cio.pci_data); 212 break; 213 case 2: 214 pci_config_put16(cfg, cio.pci_off, cio.pci_data); 215 break; 216 case 1: 217 pci_config_put8(cfg, cio.pci_off, cio.pci_data); 218 break; 219 default: 220 return (EINVAL); 221 } 222 223 return (0); 224 } 225 case PPT_BAR_QUERY: { 226 struct ppt_bar_query barg; 227 struct pptbar *pbar; 228 229 if (ddi_copyin(data, &barg, sizeof (barg), md) != 0) { 230 return (EFAULT); 231 } 232 if (barg.pbq_baridx >= PCI_BASE_NUM) { 233 return (EINVAL); 234 } 235 pbar = &ppt->pptd_bars[barg.pbq_baridx]; 236 237 if (pbar->base == 0 || pbar->size == 0) { 238 return (ENOENT); 239 } 240 barg.pbq_type = pbar->type; 241 barg.pbq_base = pbar->base; 242 barg.pbq_size = pbar->size; 243 244 if (ddi_copyout(&barg, data, sizeof (barg), md) != 0) { 245 return (EFAULT); 246 } 247 return (0); 248 } 249 case PPT_BAR_READ: { 250 struct ppt_bar_io bio; 251 struct pptbar *pbar; 252 void *addr; 253 uint_t rnum; 254 ddi_acc_handle_t cfg; 255 256 if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) { 257 return (EFAULT); 258 } 259 rnum = bio.pbi_bar; 260 if (rnum >= PCI_BASE_NUM) { 261 return (EINVAL); 262 } 263 pbar = &ppt->pptd_bars[rnum]; 264 if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) { 265 return (EINVAL); 266 } 267 addr = pbar->io_ptr + bio.pbi_off; 268 269 switch (bio.pbi_width) { 270 case 4: 271 bio.pbi_data = ddi_get32(pbar->io_handle, addr); 272 break; 273 case 2: 274 bio.pbi_data = ddi_get16(pbar->io_handle, addr); 275 break; 276 case 1: 277 bio.pbi_data = ddi_get8(pbar->io_handle, addr); 278 break; 279 default: 280 return (EINVAL); 281 } 282 283 if (ddi_copyout(&bio, data, sizeof (bio), md) != 0) { 284 return (EFAULT); 285 } 286 return (0); 287 } 288 case PPT_BAR_WRITE: { 289 struct ppt_bar_io bio; 290 struct pptbar *pbar; 291 void *addr; 292 uint_t rnum; 293 ddi_acc_handle_t cfg; 294 295 if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) { 296 return (EFAULT); 297 } 298 rnum = bio.pbi_bar; 299 if (rnum >= PCI_BASE_NUM) { 300 return (EINVAL); 301 } 302 pbar = &ppt->pptd_bars[rnum]; 303 if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) { 304 return (EINVAL); 305 } 306 addr = pbar->io_ptr + bio.pbi_off; 307 308 switch (bio.pbi_width) { 309 case 4: 310 ddi_put32(pbar->io_handle, addr, bio.pbi_data); 311 break; 312 case 2: 313 ddi_put16(pbar->io_handle, addr, bio.pbi_data); 314 break; 315 case 1: 316 ddi_put8(pbar->io_handle, addr, bio.pbi_data); 317 break; 318 default: 319 return (EINVAL); 320 } 321 322 return (0); 323 } 324 325 default: 326 return (ENOTTY); 327 } 328 329 return (0); 330 } 331 332 static int 333 ppt_find_msix_table_bar(struct pptdev *ppt) 334 { 335 uint16_t base; 336 uint32_t off; 337 338 if (PCI_CAP_LOCATE(ppt->pptd_cfg, PCI_CAP_ID_MSI_X, &base) != 339 DDI_SUCCESS) 340 return (-1); 341 342 off = pci_config_get32(ppt->pptd_cfg, base + PCI_MSIX_TBL_OFFSET); 343 344 if (off == PCI_EINVAL32) 345 return (-1); 346 347 return (off & PCI_MSIX_TBL_BIR_MASK); 348 } 349 350 static int 351 ppt_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, 352 size_t *maplen, uint_t model) 353 { 354 minor_t minor; 355 struct pptdev *ppt; 356 int err, bar; 357 uint_t ddireg; 358 359 minor = getminor(dev); 360 361 if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL) 362 return (ENXIO); 363 364 #ifdef _MULTI_DATAMODEL 365 if (ddi_model_convert_from(model) != DDI_MODEL_NONE) 366 return (ENXIO); 367 #endif 368 369 if (off < 0 || off != P2ALIGN(off, PAGESIZE)) 370 return (EINVAL); 371 372 if ((bar = ppt_find_msix_table_bar(ppt)) == -1) 373 return (EINVAL); 374 375 ddireg = ppt->pptd_bars[bar].ddireg; 376 377 if (ddireg == 0) 378 return (EINVAL); 379 380 err = devmap_devmem_setup(dhp, ppt->pptd_dip, NULL, ddireg, off, len, 381 PROT_USER | PROT_READ | PROT_WRITE, IOMEM_DATA_CACHED, &ppt_attr); 382 383 if (err == DDI_SUCCESS) 384 *maplen = len; 385 386 return (err); 387 } 388 389 static void 390 ppt_bar_wipe(struct pptdev *ppt) 391 { 392 uint_t i; 393 394 for (i = 0; i < PCI_BASE_NUM; i++) { 395 struct pptbar *pbar = &ppt->pptd_bars[i]; 396 if (pbar->type == PCI_ADDR_IO && pbar->io_handle != NULL) { 397 ddi_regs_map_free(&pbar->io_handle); 398 } 399 } 400 bzero(&ppt->pptd_bars, sizeof (ppt->pptd_bars)); 401 } 402 403 static int 404 ppt_bar_crawl(struct pptdev *ppt) 405 { 406 pci_regspec_t *regs; 407 uint_t rcount, i; 408 int err = 0, rlen; 409 410 if (ddi_getlongprop(DDI_DEV_T_ANY, ppt->pptd_dip, DDI_PROP_DONTPASS, 411 "assigned-addresses", (caddr_t)®s, &rlen) != DDI_PROP_SUCCESS) { 412 return (EIO); 413 } 414 415 VERIFY3S(rlen, >, 0); 416 rcount = rlen / sizeof (pci_regspec_t); 417 for (i = 0; i < rcount; i++) { 418 pci_regspec_t *reg = ®s[i]; 419 struct pptbar *pbar; 420 uint_t bar, rnum; 421 422 DTRACE_PROBE1(ppt__crawl__reg, pci_regspec_t *, reg); 423 bar = PCI_REG_REG_G(reg->pci_phys_hi); 424 if (!BAR_VALID(bar)) { 425 continue; 426 } 427 428 rnum = BAR_TO_IDX(bar); 429 pbar = &ppt->pptd_bars[rnum]; 430 /* is this somehow already populated? */ 431 if (pbar->base != 0 || pbar->size != 0) { 432 err = EEXIST; 433 break; 434 } 435 436 /* 437 * Register 0 corresponds to the PCI config space. 438 * The registers which match the assigned-addresses list are 439 * offset by 1. 440 */ 441 pbar->ddireg = i + 1; 442 443 pbar->type = reg->pci_phys_hi & PCI_ADDR_MASK; 444 pbar->base = ((uint64_t)reg->pci_phys_mid << 32) | 445 (uint64_t)reg->pci_phys_low; 446 pbar->size = ((uint64_t)reg->pci_size_hi << 32) | 447 (uint64_t)reg->pci_size_low; 448 if (pbar->type == PCI_ADDR_IO) { 449 err = ddi_regs_map_setup(ppt->pptd_dip, rnum, 450 &pbar->io_ptr, 0, 0, &ppt_attr, &pbar->io_handle); 451 if (err != 0) { 452 break; 453 } 454 } 455 } 456 kmem_free(regs, rlen); 457 458 if (err != 0) { 459 ppt_bar_wipe(ppt); 460 } 461 return (err); 462 } 463 464 static boolean_t 465 ppt_bar_verify_mmio(struct pptdev *ppt, uint64_t base, uint64_t size) 466 { 467 const uint64_t map_end = base + size; 468 469 /* Zero-length or overflow mappings are not valid */ 470 if (map_end <= base) { 471 return (B_FALSE); 472 } 473 /* MMIO bounds should be page-aligned */ 474 if ((base & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) { 475 return (B_FALSE); 476 } 477 478 for (uint_t i = 0; i < PCI_BASE_NUM; i++) { 479 const struct pptbar *bar = &ppt->pptd_bars[i]; 480 const uint64_t bar_end = bar->base + bar->size; 481 482 /* Only memory BARs can be mapped */ 483 if (bar->type != PCI_ADDR_MEM32 && 484 bar->type != PCI_ADDR_MEM64) { 485 continue; 486 } 487 488 /* Does the mapping fit within this BAR? */ 489 if (base < bar->base || base >= bar_end || 490 map_end < bar->base || map_end > bar_end) { 491 continue; 492 } 493 494 /* This BAR satisfies the provided map */ 495 return (B_TRUE); 496 } 497 return (B_FALSE); 498 } 499 500 static boolean_t 501 ppt_toggle_bar(struct pptdev *ppt, boolean_t enable) 502 { 503 /* 504 * Enable/disable bus mastering and BAR decoding based on the BAR 505 * configuration. Bhyve emulates the COMMAND register so we won't see 506 * the bits changing there. 507 */ 508 ddi_acc_handle_t hdl; 509 uint16_t cmd; 510 511 if (pci_config_setup(ppt->pptd_dip, &hdl) != DDI_SUCCESS) 512 return (B_FALSE); 513 cmd = pci_config_get16(hdl, PCI_CONF_COMM); 514 515 if (enable) { 516 cmd |= PCI_COMM_ME; 517 518 for (uint_t i = 0; i < PCI_BASE_NUM; i++) { 519 const struct pptbar *bar = &ppt->pptd_bars[i]; 520 521 switch (bar->type) { 522 case PCI_ADDR_MEM32: 523 case PCI_ADDR_MEM64: 524 cmd |= PCI_COMM_MAE; 525 break; 526 case PCI_ADDR_IO: 527 cmd |= PCI_COMM_IO; 528 break; 529 } 530 } 531 } else { 532 cmd &= ~(PCI_COMM_ME | PCI_COMM_MAE | PCI_COMM_IO); 533 } 534 535 pci_config_put16(hdl, PCI_CONF_COMM, cmd); 536 pci_config_teardown(&hdl); 537 538 return (B_TRUE); 539 } 540 541 static int 542 ppt_ddi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 543 { 544 struct pptdev *ppt = NULL; 545 char name[PPT_MAXNAMELEN]; 546 int inst; 547 548 if (cmd != DDI_ATTACH) 549 return (DDI_FAILURE); 550 551 inst = ddi_get_instance(dip); 552 553 if (ddi_soft_state_zalloc(ppt_state, inst) != DDI_SUCCESS) { 554 goto fail; 555 } 556 VERIFY(ppt = ddi_get_soft_state(ppt_state, inst)); 557 ppt->pptd_dip = dip; 558 ddi_set_driver_private(dip, ppt); 559 560 if (pci_config_setup(dip, &ppt->pptd_cfg) != DDI_SUCCESS) { 561 goto fail; 562 } 563 if (ppt_bar_crawl(ppt) != 0) { 564 goto fail; 565 } 566 if (ddi_create_minor_node(dip, PPT_MINOR_NAME, S_IFCHR, inst, 567 DDI_PSEUDO, 0) != DDI_SUCCESS) { 568 goto fail; 569 } 570 571 ppt_toggle_bar(ppt, B_FALSE); 572 573 mutex_enter(&pptdev_mtx); 574 list_insert_tail(&pptdev_list, ppt); 575 mutex_exit(&pptdev_mtx); 576 577 return (DDI_SUCCESS); 578 579 fail: 580 if (ppt != NULL) { 581 ddi_remove_minor_node(dip, NULL); 582 if (ppt->pptd_cfg != NULL) { 583 pci_config_teardown(&ppt->pptd_cfg); 584 } 585 ppt_bar_wipe(ppt); 586 ddi_soft_state_free(ppt_state, inst); 587 } 588 return (DDI_FAILURE); 589 } 590 591 static int 592 ppt_ddi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 593 { 594 struct pptdev *ppt; 595 int inst; 596 597 if (cmd != DDI_DETACH) 598 return (DDI_FAILURE); 599 600 ppt = ddi_get_driver_private(dip); 601 inst = ddi_get_instance(dip); 602 603 ASSERT3P(ddi_get_soft_state(ppt_state, inst), ==, ppt); 604 605 mutex_enter(&pptdev_mtx); 606 if (ppt->vm != NULL) { 607 mutex_exit(&pptdev_mtx); 608 return (DDI_FAILURE); 609 } 610 list_remove(&pptdev_list, ppt); 611 mutex_exit(&pptdev_mtx); 612 613 ddi_remove_minor_node(dip, PPT_MINOR_NAME); 614 ppt_bar_wipe(ppt); 615 pci_config_teardown(&ppt->pptd_cfg); 616 ddi_set_driver_private(dip, NULL); 617 ddi_soft_state_free(ppt_state, inst); 618 619 return (DDI_SUCCESS); 620 } 621 622 static int 623 ppt_ddi_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 624 { 625 int error = DDI_FAILURE; 626 int inst = getminor((dev_t)arg); 627 628 switch (cmd) { 629 case DDI_INFO_DEVT2DEVINFO: { 630 struct pptdev *ppt = ddi_get_soft_state(ppt_state, inst); 631 632 if (ppt != NULL) { 633 *result = (void *)ppt->pptd_dip; 634 error = DDI_SUCCESS; 635 } 636 break; 637 } 638 case DDI_INFO_DEVT2INSTANCE: { 639 *result = (void *)(uintptr_t)inst; 640 error = DDI_SUCCESS; 641 break; 642 } 643 default: 644 break; 645 } 646 return (error); 647 } 648 649 static struct cb_ops ppt_cb_ops = { 650 ppt_open, 651 nulldev, /* close */ 652 nodev, /* strategy */ 653 nodev, /* print */ 654 nodev, /* dump */ 655 nodev, /* read */ 656 nodev, /* write */ 657 ppt_ioctl, 658 ppt_devmap, /* devmap */ 659 NULL, /* mmap */ 660 NULL, /* segmap */ 661 nochpoll, /* poll */ 662 ddi_prop_op, 663 NULL, 664 D_NEW | D_MP | D_64BIT | D_DEVMAP, 665 CB_REV 666 }; 667 668 static struct dev_ops ppt_ops = { 669 DEVO_REV, 670 0, 671 ppt_ddi_info, 672 nulldev, /* identify */ 673 nulldev, /* probe */ 674 ppt_ddi_attach, 675 ppt_ddi_detach, 676 nodev, /* reset */ 677 &ppt_cb_ops, 678 (struct bus_ops *)NULL 679 }; 680 681 static struct modldrv modldrv = { 682 &mod_driverops, 683 "bhyve pci pass-thru", 684 &ppt_ops 685 }; 686 687 static struct modlinkage modlinkage = { 688 MODREV_1, 689 &modldrv, 690 NULL 691 }; 692 693 int 694 _init(void) 695 { 696 int error; 697 698 mutex_init(&pptdev_mtx, NULL, MUTEX_DRIVER, NULL); 699 list_create(&pptdev_list, sizeof (struct pptdev), 700 offsetof(struct pptdev, pptd_node)); 701 702 error = ddi_soft_state_init(&ppt_state, sizeof (struct pptdev), 0); 703 if (error) { 704 goto fail; 705 } 706 707 error = mod_install(&modlinkage); 708 709 ppt_major = ddi_name_to_major("ppt"); 710 fail: 711 if (error) { 712 ddi_soft_state_fini(&ppt_state); 713 } 714 return (error); 715 } 716 717 int 718 _fini(void) 719 { 720 int error; 721 722 error = mod_remove(&modlinkage); 723 if (error) 724 return (error); 725 ddi_soft_state_fini(&ppt_state); 726 727 return (0); 728 } 729 730 int 731 _info(struct modinfo *modinfop) 732 { 733 return (mod_info(&modlinkage, modinfop)); 734 } 735 736 static boolean_t 737 ppt_wait_for_pending_txn(dev_info_t *dip, uint_t max_delay_us) 738 { 739 uint16_t cap_ptr, devsts; 740 ddi_acc_handle_t hdl; 741 742 if (pci_config_setup(dip, &hdl) != DDI_SUCCESS) 743 return (B_FALSE); 744 745 if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS) { 746 pci_config_teardown(&hdl); 747 return (B_FALSE); 748 } 749 750 devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS); 751 while ((devsts & PCIE_DEVSTS_TRANS_PENDING) != 0) { 752 if (max_delay_us == 0) { 753 pci_config_teardown(&hdl); 754 return (B_FALSE); 755 } 756 757 /* Poll once every 100 milliseconds up to the timeout. */ 758 if (max_delay_us > 100000) { 759 delay(drv_usectohz(100000)); 760 max_delay_us -= 100000; 761 } else { 762 delay(drv_usectohz(max_delay_us)); 763 max_delay_us = 0; 764 } 765 devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS); 766 } 767 768 pci_config_teardown(&hdl); 769 return (B_TRUE); 770 } 771 772 static uint_t 773 ppt_max_completion_tmo_us(dev_info_t *dip) 774 { 775 uint_t timo = 0; 776 uint16_t cap_ptr; 777 ddi_acc_handle_t hdl; 778 uint_t timo_ranges[] = { /* timeout ranges */ 779 50000, /* 50ms */ 780 100, /* 100us */ 781 10000, /* 10ms */ 782 0, 783 0, 784 55000, /* 55ms */ 785 210000, /* 210ms */ 786 0, 787 0, 788 900000, /* 900ms */ 789 3500000, /* 3.5s */ 790 0, 791 0, 792 13000000, /* 13s */ 793 64000000, /* 64s */ 794 0 795 }; 796 797 if (pci_config_setup(dip, &hdl) != DDI_SUCCESS) 798 return (50000); /* default 50ms */ 799 800 if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS) 801 goto out; 802 803 if ((PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_PCIECAP) & 804 PCIE_PCIECAP_VER_MASK) < PCIE_PCIECAP_VER_2_0) 805 goto out; 806 807 if ((PCI_CAP_GET32(hdl, 0, cap_ptr, PCIE_DEVCAP2) & 808 PCIE_DEVCTL2_COM_TO_RANGE_MASK) == 0) 809 goto out; 810 811 timo = timo_ranges[PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL2) & 812 PCIE_DEVCAP2_COM_TO_RANGE_MASK]; 813 814 out: 815 if (timo == 0) 816 timo = 50000; /* default 50ms */ 817 818 pci_config_teardown(&hdl); 819 return (timo); 820 } 821 822 static boolean_t 823 ppt_flr(dev_info_t *dip, boolean_t force) 824 { 825 uint16_t cap_ptr, ctl, cmd; 826 ddi_acc_handle_t hdl; 827 uint_t compl_delay = 0, max_delay_us; 828 829 if (pci_config_setup(dip, &hdl) != DDI_SUCCESS) 830 return (B_FALSE); 831 832 if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS) 833 goto fail; 834 835 if ((PCI_CAP_GET32(hdl, 0, cap_ptr, PCIE_DEVCAP) & PCIE_DEVCAP_FLR) 836 == 0) 837 goto fail; 838 839 max_delay_us = MAX(ppt_max_completion_tmo_us(dip), 10000); 840 841 /* 842 * Disable busmastering to prevent generation of new transactions while 843 * waiting for the device to go idle. If the idle timeout fails, the 844 * command register is restored which will re-enable busmastering. 845 */ 846 cmd = pci_config_get16(hdl, PCI_CONF_COMM); 847 pci_config_put16(hdl, PCI_CONF_COMM, cmd & ~PCI_COMM_ME); 848 if (!ppt_wait_for_pending_txn(dip, max_delay_us)) { 849 if (!force) { 850 pci_config_put16(hdl, PCI_CONF_COMM, cmd); 851 goto fail; 852 } 853 dev_err(dip, CE_WARN, 854 "?Resetting with transactions pending after %u us\n", 855 max_delay_us); 856 857 /* 858 * Extend the post-FLR delay to cover the maximum Completion 859 * Timeout delay of anything in flight during the FLR delay. 860 * Enforce a minimum delay of at least 10ms. 861 */ 862 compl_delay = MAX(10, (ppt_max_completion_tmo_us(dip) / 1000)); 863 } 864 865 /* Initiate the reset. */ 866 ctl = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL); 867 (void) PCI_CAP_PUT16(hdl, 0, cap_ptr, PCIE_DEVCTL, 868 ctl | PCIE_DEVCTL_INITIATE_FLR); 869 870 /* Wait for at least 100ms */ 871 delay(drv_usectohz((100 + compl_delay) * 1000)); 872 873 pci_config_teardown(&hdl); 874 return (B_TRUE); 875 876 fail: 877 /* 878 * TODO: If the FLR fails for some reason, we should attempt a reset 879 * using the PCI power management facilities (if possible). 880 */ 881 pci_config_teardown(&hdl); 882 return (B_FALSE); 883 } 884 885 static int 886 ppt_findf(struct vm *vm, int fd, struct pptdev **pptp) 887 { 888 struct pptdev *ppt = NULL; 889 file_t *fp; 890 vattr_t va; 891 int err = 0; 892 893 ASSERT(MUTEX_HELD(&pptdev_mtx)); 894 895 if ((fp = getf(fd)) == NULL) 896 return (EBADF); 897 898 va.va_mask = AT_RDEV; 899 if (VOP_GETATTR(fp->f_vnode, &va, NO_FOLLOW, fp->f_cred, NULL) != 0 || 900 getmajor(va.va_rdev) != ppt_major) { 901 err = EBADF; 902 goto fail; 903 } 904 905 ppt = ddi_get_soft_state(ppt_state, getminor(va.va_rdev)); 906 907 if (ppt == NULL) { 908 err = EBADF; 909 goto fail; 910 } 911 912 if (ppt->vm != vm) { 913 err = EBUSY; 914 goto fail; 915 } 916 917 *pptp = ppt; 918 return (0); 919 920 fail: 921 releasef(fd); 922 return (err); 923 } 924 925 static void 926 ppt_unmap_all_mmio(struct vm *vm, struct pptdev *ppt) 927 { 928 int i; 929 struct pptseg *seg; 930 931 for (i = 0; i < MAX_MMIOSEGS; i++) { 932 seg = &ppt->mmio[i]; 933 if (seg->len == 0) 934 continue; 935 (void) vm_unmap_mmio(vm, seg->gpa, seg->len); 936 bzero(seg, sizeof (struct pptseg)); 937 } 938 } 939 940 static void 941 ppt_teardown_msi(struct pptdev *ppt) 942 { 943 int i; 944 945 if (ppt->msi.num_msgs == 0) 946 return; 947 948 for (i = 0; i < ppt->msi.num_msgs; i++) { 949 int intr_cap; 950 951 (void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap); 952 if (intr_cap & DDI_INTR_FLAG_BLOCK) 953 ddi_intr_block_disable(&ppt->msi.inth[i], 1); 954 else 955 ddi_intr_disable(ppt->msi.inth[i]); 956 957 ddi_intr_remove_handler(ppt->msi.inth[i]); 958 ddi_intr_free(ppt->msi.inth[i]); 959 960 ppt->msi.inth[i] = NULL; 961 } 962 963 kmem_free(ppt->msi.inth, ppt->msi.inth_sz); 964 ppt->msi.inth = NULL; 965 ppt->msi.inth_sz = 0; 966 ppt->msi.is_fixed = B_FALSE; 967 968 ppt->msi.num_msgs = 0; 969 } 970 971 static void 972 ppt_teardown_msix_intr(struct pptdev *ppt, int idx) 973 { 974 if (ppt->msix.inth != NULL && ppt->msix.inth[idx] != NULL) { 975 int intr_cap; 976 977 (void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap); 978 if (intr_cap & DDI_INTR_FLAG_BLOCK) 979 ddi_intr_block_disable(&ppt->msix.inth[idx], 1); 980 else 981 ddi_intr_disable(ppt->msix.inth[idx]); 982 983 ddi_intr_remove_handler(ppt->msix.inth[idx]); 984 } 985 } 986 987 static void 988 ppt_teardown_msix(struct pptdev *ppt) 989 { 990 uint_t i; 991 992 if (ppt->msix.num_msgs == 0) 993 return; 994 995 for (i = 0; i < ppt->msix.num_msgs; i++) 996 ppt_teardown_msix_intr(ppt, i); 997 998 if (ppt->msix.inth) { 999 for (i = 0; i < ppt->msix.num_msgs; i++) 1000 ddi_intr_free(ppt->msix.inth[i]); 1001 kmem_free(ppt->msix.inth, ppt->msix.inth_sz); 1002 ppt->msix.inth = NULL; 1003 ppt->msix.inth_sz = 0; 1004 kmem_free(ppt->msix.arg, ppt->msix.arg_sz); 1005 ppt->msix.arg = NULL; 1006 ppt->msix.arg_sz = 0; 1007 } 1008 1009 ppt->msix.num_msgs = 0; 1010 } 1011 1012 int 1013 ppt_assigned_devices(struct vm *vm) 1014 { 1015 struct pptdev *ppt; 1016 uint_t num = 0; 1017 1018 mutex_enter(&pptdev_mtx); 1019 for (ppt = list_head(&pptdev_list); ppt != NULL; 1020 ppt = list_next(&pptdev_list, ppt)) { 1021 if (ppt->vm == vm) { 1022 num++; 1023 } 1024 } 1025 mutex_exit(&pptdev_mtx); 1026 return (num); 1027 } 1028 1029 boolean_t 1030 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa) 1031 { 1032 struct pptdev *ppt = list_head(&pptdev_list); 1033 1034 /* XXX: this should probably be restructured to avoid the lock */ 1035 mutex_enter(&pptdev_mtx); 1036 for (ppt = list_head(&pptdev_list); ppt != NULL; 1037 ppt = list_next(&pptdev_list, ppt)) { 1038 if (ppt->vm != vm) { 1039 continue; 1040 } 1041 1042 for (uint_t i = 0; i < MAX_MMIOSEGS; i++) { 1043 struct pptseg *seg = &ppt->mmio[i]; 1044 1045 if (seg->len == 0) 1046 continue; 1047 if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) { 1048 mutex_exit(&pptdev_mtx); 1049 return (B_TRUE); 1050 } 1051 } 1052 } 1053 1054 mutex_exit(&pptdev_mtx); 1055 return (B_FALSE); 1056 } 1057 1058 int 1059 ppt_assign_device(struct vm *vm, int pptfd) 1060 { 1061 struct pptdev *ppt; 1062 int err = 0; 1063 1064 mutex_enter(&pptdev_mtx); 1065 /* Passing NULL requires the device to be unowned. */ 1066 err = ppt_findf(NULL, pptfd, &ppt); 1067 if (err != 0) { 1068 mutex_exit(&pptdev_mtx); 1069 return (err); 1070 } 1071 1072 if (pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) { 1073 err = EIO; 1074 goto done; 1075 } 1076 ppt_flr(ppt->pptd_dip, B_TRUE); 1077 1078 /* 1079 * Restore the device state after reset and then perform another save 1080 * so the "pristine" state can be restored when the device is removed 1081 * from the guest. 1082 */ 1083 if (pci_restore_config_regs(ppt->pptd_dip) != DDI_SUCCESS || 1084 pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) { 1085 err = EIO; 1086 goto done; 1087 } 1088 1089 ppt_toggle_bar(ppt, B_TRUE); 1090 1091 ppt->vm = vm; 1092 iommu_remove_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip)); 1093 iommu_add_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip)); 1094 pf_set_passthru(ppt->pptd_dip, B_TRUE); 1095 1096 done: 1097 releasef(pptfd); 1098 mutex_exit(&pptdev_mtx); 1099 return (err); 1100 } 1101 1102 static void 1103 ppt_reset_pci_power_state(dev_info_t *dip) 1104 { 1105 ddi_acc_handle_t cfg; 1106 uint16_t cap_ptr; 1107 1108 if (pci_config_setup(dip, &cfg) != DDI_SUCCESS) 1109 return; 1110 1111 if (PCI_CAP_LOCATE(cfg, PCI_CAP_ID_PM, &cap_ptr) == DDI_SUCCESS) { 1112 uint16_t val; 1113 1114 val = PCI_CAP_GET16(cfg, 0, cap_ptr, PCI_PMCSR); 1115 if ((val & PCI_PMCSR_STATE_MASK) != PCI_PMCSR_D0) { 1116 val = (val & ~PCI_PMCSR_STATE_MASK) | PCI_PMCSR_D0; 1117 (void) PCI_CAP_PUT16(cfg, 0, cap_ptr, PCI_PMCSR, 1118 val); 1119 } 1120 } 1121 1122 pci_config_teardown(&cfg); 1123 } 1124 1125 static void 1126 ppt_do_unassign(struct pptdev *ppt) 1127 { 1128 struct vm *vm = ppt->vm; 1129 1130 ASSERT3P(vm, !=, NULL); 1131 ASSERT(MUTEX_HELD(&pptdev_mtx)); 1132 1133 ppt_flr(ppt->pptd_dip, B_TRUE); 1134 1135 /* 1136 * Restore from the state saved during device assignment. 1137 * If the device power state has been altered, that must be remedied 1138 * first, as it will reset register state during the transition. 1139 */ 1140 ppt_reset_pci_power_state(ppt->pptd_dip); 1141 (void) pci_restore_config_regs(ppt->pptd_dip); 1142 1143 pf_set_passthru(ppt->pptd_dip, B_FALSE); 1144 1145 ppt_unmap_all_mmio(vm, ppt); 1146 ppt_teardown_msi(ppt); 1147 ppt_teardown_msix(ppt); 1148 iommu_remove_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip)); 1149 iommu_add_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip)); 1150 ppt->vm = NULL; 1151 } 1152 1153 int 1154 ppt_unassign_device(struct vm *vm, int pptfd) 1155 { 1156 struct pptdev *ppt; 1157 int err = 0; 1158 1159 mutex_enter(&pptdev_mtx); 1160 err = ppt_findf(vm, pptfd, &ppt); 1161 if (err != 0) { 1162 mutex_exit(&pptdev_mtx); 1163 return (err); 1164 } 1165 1166 ppt_do_unassign(ppt); 1167 1168 releasef(pptfd); 1169 mutex_exit(&pptdev_mtx); 1170 return (err); 1171 } 1172 1173 void 1174 ppt_unassign_all(struct vm *vm) 1175 { 1176 struct pptdev *ppt; 1177 1178 mutex_enter(&pptdev_mtx); 1179 for (ppt = list_head(&pptdev_list); ppt != NULL; 1180 ppt = list_next(&pptdev_list, ppt)) { 1181 if (ppt->vm == vm) { 1182 ppt_do_unassign(ppt); 1183 } 1184 } 1185 mutex_exit(&pptdev_mtx); 1186 } 1187 1188 int 1189 ppt_map_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len, 1190 vm_paddr_t hpa) 1191 { 1192 struct pptdev *ppt; 1193 int err = 0; 1194 1195 if ((len & PAGEOFFSET) != 0 || len == 0 || (gpa & PAGEOFFSET) != 0 || 1196 (hpa & PAGEOFFSET) != 0 || gpa + len < gpa || hpa + len < hpa) { 1197 return (EINVAL); 1198 } 1199 1200 mutex_enter(&pptdev_mtx); 1201 err = ppt_findf(vm, pptfd, &ppt); 1202 if (err != 0) { 1203 mutex_exit(&pptdev_mtx); 1204 return (err); 1205 } 1206 1207 /* 1208 * Ensure that the host-physical range of the requested mapping fits 1209 * within one of the MMIO BARs of the device. 1210 */ 1211 if (!ppt_bar_verify_mmio(ppt, hpa, len)) { 1212 err = EINVAL; 1213 goto done; 1214 } 1215 1216 for (uint_t i = 0; i < MAX_MMIOSEGS; i++) { 1217 struct pptseg *seg = &ppt->mmio[i]; 1218 1219 if (seg->len == 0) { 1220 err = vm_map_mmio(vm, gpa, len, hpa); 1221 if (err == 0) { 1222 seg->gpa = gpa; 1223 seg->len = len; 1224 } 1225 goto done; 1226 } 1227 } 1228 err = ENOSPC; 1229 1230 done: 1231 releasef(pptfd); 1232 mutex_exit(&pptdev_mtx); 1233 return (err); 1234 } 1235 1236 int 1237 ppt_unmap_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len) 1238 { 1239 struct pptdev *ppt; 1240 int err = 0; 1241 uint_t i; 1242 1243 mutex_enter(&pptdev_mtx); 1244 err = ppt_findf(vm, pptfd, &ppt); 1245 if (err != 0) { 1246 mutex_exit(&pptdev_mtx); 1247 return (err); 1248 } 1249 1250 for (i = 0; i < MAX_MMIOSEGS; i++) { 1251 struct pptseg *seg = &ppt->mmio[i]; 1252 1253 if (seg->gpa == gpa && seg->len == len) { 1254 err = vm_unmap_mmio(vm, seg->gpa, seg->len); 1255 if (err == 0) { 1256 seg->gpa = 0; 1257 seg->len = 0; 1258 } 1259 goto out; 1260 } 1261 } 1262 err = ENOENT; 1263 out: 1264 releasef(pptfd); 1265 mutex_exit(&pptdev_mtx); 1266 return (err); 1267 } 1268 1269 static uint_t 1270 pptintr(caddr_t arg, caddr_t unused) 1271 { 1272 struct pptintr_arg *pptarg = (struct pptintr_arg *)arg; 1273 struct pptdev *ppt = pptarg->pptdev; 1274 1275 if (ppt->vm != NULL) { 1276 lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data); 1277 } else { 1278 /* 1279 * XXX 1280 * This is not expected to happen - panic? 1281 */ 1282 } 1283 1284 /* 1285 * For legacy interrupts give other filters a chance in case 1286 * the interrupt was not generated by the passthrough device. 1287 */ 1288 return (ppt->msi.is_fixed ? DDI_INTR_UNCLAIMED : DDI_INTR_CLAIMED); 1289 } 1290 1291 int 1292 ppt_setup_msi(struct vm *vm, int vcpu, int pptfd, uint64_t addr, uint64_t msg, 1293 int numvec) 1294 { 1295 int i, msi_count, intr_type; 1296 struct pptdev *ppt; 1297 int err = 0; 1298 1299 if (numvec < 0 || numvec > MAX_MSIMSGS) 1300 return (EINVAL); 1301 1302 mutex_enter(&pptdev_mtx); 1303 err = ppt_findf(vm, pptfd, &ppt); 1304 if (err != 0) { 1305 mutex_exit(&pptdev_mtx); 1306 return (err); 1307 } 1308 1309 /* Reject attempts to enable MSI while MSI-X is active. */ 1310 if (ppt->msix.num_msgs != 0 && numvec != 0) { 1311 err = EBUSY; 1312 goto done; 1313 } 1314 1315 /* Free any allocated resources */ 1316 ppt_teardown_msi(ppt); 1317 1318 if (numvec == 0) { 1319 /* nothing more to do */ 1320 goto done; 1321 } 1322 1323 if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI, 1324 &msi_count) != DDI_SUCCESS) { 1325 if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_FIXED, 1326 &msi_count) != DDI_SUCCESS) { 1327 err = EINVAL; 1328 goto done; 1329 } 1330 1331 intr_type = DDI_INTR_TYPE_FIXED; 1332 ppt->msi.is_fixed = B_TRUE; 1333 } else { 1334 intr_type = DDI_INTR_TYPE_MSI; 1335 } 1336 1337 /* 1338 * The device must be capable of supporting the number of vectors 1339 * the guest wants to allocate. 1340 */ 1341 if (numvec > msi_count) { 1342 err = EINVAL; 1343 goto done; 1344 } 1345 1346 ppt->msi.inth_sz = numvec * sizeof (ddi_intr_handle_t); 1347 ppt->msi.inth = kmem_zalloc(ppt->msi.inth_sz, KM_SLEEP); 1348 if (ddi_intr_alloc(ppt->pptd_dip, ppt->msi.inth, intr_type, 0, 1349 numvec, &msi_count, 0) != DDI_SUCCESS) { 1350 kmem_free(ppt->msi.inth, ppt->msi.inth_sz); 1351 err = EINVAL; 1352 goto done; 1353 } 1354 1355 /* Verify that we got as many vectors as the guest requested */ 1356 if (numvec != msi_count) { 1357 ppt_teardown_msi(ppt); 1358 err = EINVAL; 1359 goto done; 1360 } 1361 1362 /* Set up & enable interrupt handler for each vector. */ 1363 for (i = 0; i < numvec; i++) { 1364 int res, intr_cap = 0; 1365 1366 ppt->msi.num_msgs = i + 1; 1367 ppt->msi.arg[i].pptdev = ppt; 1368 ppt->msi.arg[i].addr = addr; 1369 ppt->msi.arg[i].msg_data = msg + i; 1370 1371 if (ddi_intr_add_handler(ppt->msi.inth[i], pptintr, 1372 &ppt->msi.arg[i], NULL) != DDI_SUCCESS) 1373 break; 1374 1375 (void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap); 1376 if (intr_cap & DDI_INTR_FLAG_BLOCK) 1377 res = ddi_intr_block_enable(&ppt->msi.inth[i], 1); 1378 else 1379 res = ddi_intr_enable(ppt->msi.inth[i]); 1380 1381 if (res != DDI_SUCCESS) 1382 break; 1383 } 1384 if (i < numvec) { 1385 ppt_teardown_msi(ppt); 1386 err = ENXIO; 1387 } 1388 1389 done: 1390 releasef(pptfd); 1391 mutex_exit(&pptdev_mtx); 1392 return (err); 1393 } 1394 1395 int 1396 ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr, 1397 uint64_t msg, uint32_t vector_control) 1398 { 1399 struct pptdev *ppt; 1400 int numvec, alloced; 1401 int err = 0; 1402 1403 mutex_enter(&pptdev_mtx); 1404 err = ppt_findf(vm, pptfd, &ppt); 1405 if (err != 0) { 1406 mutex_exit(&pptdev_mtx); 1407 return (err); 1408 } 1409 1410 /* Reject attempts to enable MSI-X while MSI is active. */ 1411 if (ppt->msi.num_msgs != 0) { 1412 err = EBUSY; 1413 goto done; 1414 } 1415 1416 /* 1417 * First-time configuration: 1418 * Allocate the MSI-X table 1419 * Allocate the IRQ resources 1420 * Set up some variables in ppt->msix 1421 */ 1422 if (ppt->msix.num_msgs == 0) { 1423 dev_info_t *dip = ppt->pptd_dip; 1424 1425 if (ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX, 1426 &numvec) != DDI_SUCCESS) { 1427 err = EINVAL; 1428 goto done; 1429 } 1430 1431 ppt->msix.num_msgs = numvec; 1432 1433 ppt->msix.arg_sz = numvec * sizeof (ppt->msix.arg[0]); 1434 ppt->msix.arg = kmem_zalloc(ppt->msix.arg_sz, KM_SLEEP); 1435 ppt->msix.inth_sz = numvec * sizeof (ddi_intr_handle_t); 1436 ppt->msix.inth = kmem_zalloc(ppt->msix.inth_sz, KM_SLEEP); 1437 1438 if (ddi_intr_alloc(dip, ppt->msix.inth, DDI_INTR_TYPE_MSIX, 0, 1439 numvec, &alloced, 0) != DDI_SUCCESS) { 1440 kmem_free(ppt->msix.arg, ppt->msix.arg_sz); 1441 kmem_free(ppt->msix.inth, ppt->msix.inth_sz); 1442 ppt->msix.arg = NULL; 1443 ppt->msix.inth = NULL; 1444 ppt->msix.arg_sz = ppt->msix.inth_sz = 0; 1445 err = EINVAL; 1446 goto done; 1447 } 1448 1449 if (numvec != alloced) { 1450 ppt_teardown_msix(ppt); 1451 err = EINVAL; 1452 goto done; 1453 } 1454 } 1455 1456 if (idx >= ppt->msix.num_msgs) { 1457 err = EINVAL; 1458 goto done; 1459 } 1460 1461 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1462 int intr_cap, res; 1463 1464 /* Tear down the IRQ if it's already set up */ 1465 ppt_teardown_msix_intr(ppt, idx); 1466 1467 ppt->msix.arg[idx].pptdev = ppt; 1468 ppt->msix.arg[idx].addr = addr; 1469 ppt->msix.arg[idx].msg_data = msg; 1470 1471 /* Setup the MSI-X interrupt */ 1472 if (ddi_intr_add_handler(ppt->msix.inth[idx], pptintr, 1473 &ppt->msix.arg[idx], NULL) != DDI_SUCCESS) { 1474 err = ENXIO; 1475 goto done; 1476 } 1477 1478 (void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap); 1479 if (intr_cap & DDI_INTR_FLAG_BLOCK) 1480 res = ddi_intr_block_enable(&ppt->msix.inth[idx], 1); 1481 else 1482 res = ddi_intr_enable(ppt->msix.inth[idx]); 1483 1484 if (res != DDI_SUCCESS) { 1485 ddi_intr_remove_handler(ppt->msix.inth[idx]); 1486 err = ENXIO; 1487 goto done; 1488 } 1489 } else { 1490 /* Masked, tear it down if it's already been set up */ 1491 ppt_teardown_msix_intr(ppt, idx); 1492 } 1493 1494 done: 1495 releasef(pptfd); 1496 mutex_exit(&pptdev_mtx); 1497 return (err); 1498 } 1499 1500 int 1501 ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit) 1502 { 1503 struct pptdev *ppt; 1504 int err = 0; 1505 1506 mutex_enter(&pptdev_mtx); 1507 err = ppt_findf(vm, pptfd, &ppt); 1508 if (err != 0) { 1509 mutex_exit(&pptdev_mtx); 1510 return (err); 1511 } 1512 1513 if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI, 1514 msilimit) != DDI_SUCCESS) { 1515 *msilimit = -1; 1516 } 1517 if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSIX, 1518 msixlimit) != DDI_SUCCESS) { 1519 *msixlimit = -1; 1520 } 1521 1522 releasef(pptfd); 1523 mutex_exit(&pptdev_mtx); 1524 return (err); 1525 } 1526 1527 int 1528 ppt_disable_msix(struct vm *vm, int pptfd) 1529 { 1530 struct pptdev *ppt; 1531 int err = 0; 1532 1533 mutex_enter(&pptdev_mtx); 1534 err = ppt_findf(vm, pptfd, &ppt); 1535 if (err != 0) { 1536 mutex_exit(&pptdev_mtx); 1537 return (err); 1538 } 1539 1540 ppt_teardown_msix(ppt); 1541 1542 releasef(pptfd); 1543 mutex_exit(&pptdev_mtx); 1544 return (err); 1545 } 1546