1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/mman.h> 33 #include <sys/pciio.h> 34 #include <sys/ioctl.h> 35 #include <sys/stat.h> 36 37 #include <sys/pci.h> 38 39 #include <dev/io/iodev.h> 40 #include <dev/pci/pcireg.h> 41 42 #include <machine/iodev.h> 43 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <err.h> 48 #include <errno.h> 49 #include <fcntl.h> 50 #include <sysexits.h> 51 #include <unistd.h> 52 53 #include <machine/vmm.h> 54 #include <vmmapi.h> 55 #include <sys/ppt_dev.h> 56 57 #include "config.h" 58 #include "debug.h" 59 #include "pci_passthru.h" 60 #include "mem.h" 61 62 #define LEGACY_SUPPORT 1 63 64 #define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1) 65 #define MSIX_CAPLEN 12 66 67 struct passthru_softc { 68 struct pci_devinst *psc_pi; 69 /* ROM is handled like a BAR */ 70 struct pcibar psc_bar[PCI_BARMAX_WITH_ROM + 1]; 71 struct { 72 int capoff; 73 int msgctrl; 74 int emulated; 75 } psc_msi; 76 struct { 77 int capoff; 78 } psc_msix; 79 int pptfd; 80 int msi_limit; 81 int msix_limit; 82 83 cfgread_handler psc_pcir_rhandler[PCI_REGMAX + 1]; 84 cfgwrite_handler psc_pcir_whandler[PCI_REGMAX + 1]; 85 }; 86 87 static int 88 msi_caplen(int msgctrl) 89 { 90 int len; 91 92 len = 10; /* minimum length of msi capability */ 93 94 if (msgctrl & PCIM_MSICTRL_64BIT) 95 len += 4; 96 97 #if 0 98 /* 99 * Ignore the 'mask' and 'pending' bits in the MSI capability. 100 * We'll let the guest manipulate them directly. 101 */ 102 if (msgctrl & PCIM_MSICTRL_VECTOR) 103 len += 10; 104 #endif 105 106 return (len); 107 } 108 109 static uint32_t 110 passthru_read_config(const struct passthru_softc *sc, long reg, int width) 111 { 112 struct ppt_cfg_io pi; 113 114 pi.pci_off = reg; 115 pi.pci_width = width; 116 117 if (ioctl(sc->pptfd, PPT_CFG_READ, &pi) != 0) { 118 return (0); 119 } 120 return (pi.pci_data); 121 } 122 123 static void 124 passthru_write_config(const struct passthru_softc *sc, long reg, int width, 125 uint32_t data) 126 { 127 struct ppt_cfg_io pi; 128 129 pi.pci_off = reg; 130 pi.pci_width = width; 131 pi.pci_data = data; 132 133 (void) ioctl(sc->pptfd, PPT_CFG_WRITE, &pi); 134 } 135 136 static int 137 passthru_get_bar(struct passthru_softc *sc, int bar, enum pcibar_type *type, 138 uint64_t *base, uint64_t *size) 139 { 140 struct ppt_bar_query pb; 141 142 pb.pbq_baridx = bar; 143 144 if (ioctl(sc->pptfd, PPT_BAR_QUERY, &pb) != 0) { 145 return (-1); 146 } 147 148 switch (pb.pbq_type) { 149 case PCI_ADDR_IO: 150 *type = PCIBAR_IO; 151 break; 152 case PCI_ADDR_MEM32: 153 *type = PCIBAR_MEM32; 154 break; 155 case PCI_ADDR_MEM64: 156 *type = PCIBAR_MEM64; 157 break; 158 default: 159 err(1, "unrecognized BAR type: %u\n", pb.pbq_type); 160 break; 161 } 162 163 *base = pb.pbq_base; 164 *size = pb.pbq_size; 165 return (0); 166 } 167 168 static int 169 passthru_dev_open(const char *path, int *pptfdp) 170 { 171 int pptfd; 172 173 if ((pptfd = open(path, O_RDWR)) < 0) { 174 return (errno); 175 } 176 177 /* XXX: verify fd with ioctl? */ 178 *pptfdp = pptfd; 179 return (0); 180 } 181 182 #ifdef LEGACY_SUPPORT 183 static int 184 passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) 185 { 186 int capoff; 187 struct msicap msicap; 188 u_char *capdata; 189 190 pci_populate_msicap(&msicap, msgnum, nextptr); 191 192 /* 193 * XXX 194 * Copy the msi capability structure in the last 16 bytes of the 195 * config space. This is wrong because it could shadow something 196 * useful to the device. 197 */ 198 capoff = 256 - roundup(sizeof(msicap), 4); 199 capdata = (u_char *)&msicap; 200 for (size_t i = 0; i < sizeof(msicap); i++) 201 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 202 203 return (capoff); 204 } 205 #endif /* LEGACY_SUPPORT */ 206 207 static void 208 passthru_intr_limit(struct passthru_softc *sc, struct msixcap *msixcap) 209 { 210 struct pci_devinst *pi = sc->psc_pi; 211 int off; 212 213 /* Reduce the number of MSI vectors if higher than OS limit */ 214 if ((off = sc->psc_msi.capoff) != 0 && sc->msi_limit != -1) { 215 int msi_limit, mmc; 216 217 msi_limit = 218 sc->msi_limit > 16 ? PCIM_MSICTRL_MMC_32 : 219 sc->msi_limit > 8 ? PCIM_MSICTRL_MMC_16 : 220 sc->msi_limit > 4 ? PCIM_MSICTRL_MMC_8 : 221 sc->msi_limit > 2 ? PCIM_MSICTRL_MMC_4 : 222 sc->msi_limit > 1 ? PCIM_MSICTRL_MMC_2 : 223 PCIM_MSICTRL_MMC_1; 224 mmc = sc->psc_msi.msgctrl & PCIM_MSICTRL_MMC_MASK; 225 226 if (mmc > msi_limit) { 227 sc->psc_msi.msgctrl &= ~PCIM_MSICTRL_MMC_MASK; 228 sc->psc_msi.msgctrl |= msi_limit; 229 pci_set_cfgdata16(pi, off + 2, sc->psc_msi.msgctrl); 230 } 231 } 232 233 /* Reduce the number of MSI-X vectors if higher than OS limit */ 234 if ((off = sc->psc_msix.capoff) != 0 && sc->msix_limit != -1) { 235 if (MSIX_TABLE_COUNT(msixcap->msgctrl) > sc->msix_limit) { 236 msixcap->msgctrl &= ~PCIM_MSIXCTRL_TABLE_SIZE; 237 msixcap->msgctrl |= sc->msix_limit - 1; 238 pci_set_cfgdata16(pi, off + 2, msixcap->msgctrl); 239 } 240 } 241 } 242 243 static int 244 cfginitmsi(struct passthru_softc *sc) 245 { 246 int i, ptr, capptr, cap, sts, caplen, table_size; 247 uint32_t u32; 248 struct pci_devinst *pi = sc->psc_pi; 249 struct msixcap msixcap; 250 char *msixcap_ptr; 251 252 /* 253 * Parse the capabilities and cache the location of the MSI 254 * and MSI-X capabilities. 255 */ 256 sts = passthru_read_config(sc, PCIR_STATUS, 2); 257 if (sts & PCIM_STATUS_CAPPRESENT) { 258 ptr = passthru_read_config(sc, PCIR_CAP_PTR, 1); 259 while (ptr != 0 && ptr != 0xff) { 260 cap = passthru_read_config(sc, ptr + PCICAP_ID, 1); 261 if (cap == PCIY_MSI) { 262 /* 263 * Copy the MSI capability into the config 264 * space of the emulated pci device 265 */ 266 sc->psc_msi.capoff = ptr; 267 sc->psc_msi.msgctrl = passthru_read_config(sc, 268 ptr + 2, 2); 269 sc->psc_msi.emulated = 0; 270 caplen = msi_caplen(sc->psc_msi.msgctrl); 271 capptr = ptr; 272 while (caplen > 0) { 273 u32 = passthru_read_config(sc, 274 capptr, 4); 275 pci_set_cfgdata32(pi, capptr, u32); 276 caplen -= 4; 277 capptr += 4; 278 } 279 } else if (cap == PCIY_MSIX) { 280 /* 281 * Copy the MSI-X capability 282 */ 283 sc->psc_msix.capoff = ptr; 284 caplen = 12; 285 msixcap_ptr = (char *)&msixcap; 286 capptr = ptr; 287 while (caplen > 0) { 288 u32 = passthru_read_config(sc, 289 capptr, 4); 290 memcpy(msixcap_ptr, &u32, 4); 291 pci_set_cfgdata32(pi, capptr, u32); 292 caplen -= 4; 293 capptr += 4; 294 msixcap_ptr += 4; 295 } 296 } 297 ptr = passthru_read_config(sc, ptr + PCICAP_NEXTPTR, 1); 298 } 299 } 300 301 passthru_intr_limit(sc, &msixcap); 302 303 if (sc->psc_msix.capoff != 0) { 304 pi->pi_msix.pba_bar = 305 msixcap.pba_info & PCIM_MSIX_BIR_MASK; 306 pi->pi_msix.pba_offset = 307 msixcap.pba_info & ~PCIM_MSIX_BIR_MASK; 308 pi->pi_msix.table_bar = 309 msixcap.table_info & PCIM_MSIX_BIR_MASK; 310 pi->pi_msix.table_offset = 311 msixcap.table_info & ~PCIM_MSIX_BIR_MASK; 312 pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); 313 pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count); 314 315 /* Allocate the emulated MSI-X table array */ 316 table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 317 pi->pi_msix.table = calloc(1, table_size); 318 319 /* Mask all table entries */ 320 for (i = 0; i < pi->pi_msix.table_count; i++) { 321 pi->pi_msix.table[i].vector_control |= 322 PCIM_MSIX_VCTRL_MASK; 323 } 324 } 325 326 #ifdef LEGACY_SUPPORT 327 /* 328 * If the passthrough device does not support MSI then craft a 329 * MSI capability for it. We link the new MSI capability at the 330 * head of the list of capabilities. 331 */ 332 if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) { 333 int origptr, msiptr; 334 origptr = passthru_read_config(sc, PCIR_CAP_PTR, 1); 335 msiptr = passthru_add_msicap(pi, 1, origptr); 336 sc->psc_msi.capoff = msiptr; 337 sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2); 338 sc->psc_msi.emulated = 1; 339 pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr); 340 } 341 #endif 342 343 /* Make sure one of the capabilities is present */ 344 if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) 345 return (-1); 346 else 347 return (0); 348 } 349 350 static uint64_t 351 msix_table_read(struct passthru_softc *sc, uint64_t offset, int size) 352 { 353 struct pci_devinst *pi; 354 struct msix_table_entry *entry; 355 uint8_t *src8; 356 uint16_t *src16; 357 uint32_t *src32; 358 uint64_t *src64; 359 uint64_t data; 360 size_t entry_offset; 361 uint32_t table_offset; 362 int index, table_count; 363 364 pi = sc->psc_pi; 365 366 table_offset = pi->pi_msix.table_offset; 367 table_count = pi->pi_msix.table_count; 368 if (offset < table_offset || 369 offset >= table_offset + table_count * MSIX_TABLE_ENTRY_SIZE) { 370 switch (size) { 371 case 1: 372 src8 = (uint8_t *)(pi->pi_msix.mapped_addr + offset); 373 data = *src8; 374 break; 375 case 2: 376 src16 = (uint16_t *)(pi->pi_msix.mapped_addr + offset); 377 data = *src16; 378 break; 379 case 4: 380 src32 = (uint32_t *)(pi->pi_msix.mapped_addr + offset); 381 data = *src32; 382 break; 383 case 8: 384 src64 = (uint64_t *)(pi->pi_msix.mapped_addr + offset); 385 data = *src64; 386 break; 387 default: 388 return (-1); 389 } 390 return (data); 391 } 392 393 offset -= table_offset; 394 index = offset / MSIX_TABLE_ENTRY_SIZE; 395 assert(index < table_count); 396 397 entry = &pi->pi_msix.table[index]; 398 entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 399 400 switch (size) { 401 case 1: 402 src8 = (uint8_t *)((uint8_t *)entry + entry_offset); 403 data = *src8; 404 break; 405 case 2: 406 src16 = (uint16_t *)((uint8_t *)entry + entry_offset); 407 data = *src16; 408 break; 409 case 4: 410 src32 = (uint32_t *)((uint8_t *)entry + entry_offset); 411 data = *src32; 412 break; 413 case 8: 414 src64 = (uint64_t *)((uint8_t *)entry + entry_offset); 415 data = *src64; 416 break; 417 default: 418 return (-1); 419 } 420 421 return (data); 422 } 423 424 static void 425 msix_table_write(struct vmctx *ctx, struct passthru_softc *sc, 426 uint64_t offset, int size, uint64_t data) 427 { 428 struct pci_devinst *pi; 429 struct msix_table_entry *entry; 430 uint8_t *dest8; 431 uint16_t *dest16; 432 uint32_t *dest32; 433 uint64_t *dest64; 434 size_t entry_offset; 435 uint32_t table_offset, vector_control; 436 int index, table_count; 437 438 pi = sc->psc_pi; 439 440 table_offset = pi->pi_msix.table_offset; 441 table_count = pi->pi_msix.table_count; 442 if (offset < table_offset || 443 offset >= table_offset + table_count * MSIX_TABLE_ENTRY_SIZE) { 444 switch (size) { 445 case 1: 446 dest8 = (uint8_t *)(pi->pi_msix.mapped_addr + offset); 447 *dest8 = data; 448 break; 449 case 2: 450 dest16 = (uint16_t *)(pi->pi_msix.mapped_addr + offset); 451 *dest16 = data; 452 break; 453 case 4: 454 dest32 = (uint32_t *)(pi->pi_msix.mapped_addr + offset); 455 *dest32 = data; 456 break; 457 case 8: 458 dest64 = (uint64_t *)(pi->pi_msix.mapped_addr + offset); 459 *dest64 = data; 460 break; 461 } 462 return; 463 } 464 465 offset -= table_offset; 466 index = offset / MSIX_TABLE_ENTRY_SIZE; 467 assert(index < table_count); 468 469 entry = &pi->pi_msix.table[index]; 470 entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 471 472 /* Only 4 byte naturally-aligned writes are supported */ 473 assert(size == 4); 474 assert(entry_offset % 4 == 0); 475 476 vector_control = entry->vector_control; 477 dest32 = (uint32_t *)((uint8_t *)entry + entry_offset); 478 *dest32 = data; 479 /* If MSI-X hasn't been enabled, do nothing */ 480 if (pi->pi_msix.enabled) { 481 /* If the entry is masked, don't set it up */ 482 if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || 483 (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 484 (void) vm_setup_pptdev_msix(ctx, sc->pptfd, 485 index, entry->addr, entry->msg_data, 486 entry->vector_control); 487 } 488 } 489 } 490 491 static int 492 init_msix_table(struct vmctx *ctx __unused, struct passthru_softc *sc) 493 { 494 struct pci_devinst *pi = sc->psc_pi; 495 uint32_t table_size, table_offset; 496 int i; 497 498 i = pci_msix_table_bar(pi); 499 assert(i >= 0); 500 501 /* 502 * Map the region of the BAR containing the MSI-X table. This is 503 * necessary for two reasons: 504 * 1. The PBA may reside in the first or last page containing the MSI-X 505 * table. 506 * 2. While PCI devices are not supposed to use the page(s) containing 507 * the MSI-X table for other purposes, some do in practice. 508 */ 509 510 /* 511 * Mapping pptfd provides access to the BAR containing the MSI-X 512 * table. See ppt_devmap() in usr/src/uts/intel/io/vmm/io/ppt.c 513 * 514 * This maps the whole BAR and then mprotect(PROT_NONE) is used below 515 * to prevent access to pages that don't contain the MSI-X table. 516 * When porting this, it was tempting to just map the MSI-X table pages 517 * but that would mean updating everywhere that assumes that 518 * pi->pi_msix.mapped_addr points to the start of the BAR. For now, 519 * keep closer to upstream. 520 */ 521 pi->pi_msix.mapped_size = sc->psc_bar[i].size; 522 pi->pi_msix.mapped_addr = (uint8_t *)mmap(NULL, pi->pi_msix.mapped_size, 523 PROT_READ | PROT_WRITE, MAP_SHARED, sc->pptfd, 0); 524 if (pi->pi_msix.mapped_addr == MAP_FAILED) { 525 warn("Failed to map MSI-X table BAR on %d", sc->pptfd); 526 return (-1); 527 } 528 529 table_offset = rounddown2(pi->pi_msix.table_offset, 4096); 530 531 table_size = pi->pi_msix.table_offset - table_offset; 532 table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 533 table_size = roundup2(table_size, 4096); 534 535 /* 536 * Unmap any pages not containing the table, we do not need to emulate 537 * accesses to them. Avoid releasing address space to help ensure that 538 * a buggy out-of-bounds access causes a crash. 539 */ 540 if (table_offset != 0) 541 if (mprotect((caddr_t)pi->pi_msix.mapped_addr, table_offset, 542 PROT_NONE) != 0) 543 warn("Failed to unmap MSI-X table BAR region"); 544 if (table_offset + table_size != pi->pi_msix.mapped_size) 545 if (mprotect((caddr_t) 546 pi->pi_msix.mapped_addr + table_offset + table_size, 547 pi->pi_msix.mapped_size - (table_offset + table_size), 548 PROT_NONE) != 0) 549 warn("Failed to unmap MSI-X table BAR region"); 550 551 return (0); 552 } 553 554 static int 555 cfginitbar(struct vmctx *ctx __unused, struct passthru_softc *sc) 556 { 557 struct pci_devinst *pi = sc->psc_pi; 558 uint_t i; 559 560 /* 561 * Initialize BAR registers 562 */ 563 for (i = 0; i <= PCI_BARMAX; i++) { 564 enum pcibar_type bartype; 565 uint64_t base, size; 566 int error; 567 568 if (passthru_get_bar(sc, i, &bartype, &base, &size) != 0) { 569 continue; 570 } 571 572 if (bartype != PCIBAR_IO) { 573 if (((base | size) & PAGE_MASK) != 0) { 574 warnx("passthru device %d BAR %d: " 575 "base %#lx or size %#lx not page aligned\n", 576 sc->pptfd, i, base, size); 577 return (-1); 578 } 579 } 580 581 /* Cache information about the "real" BAR */ 582 sc->psc_bar[i].type = bartype; 583 sc->psc_bar[i].size = size; 584 sc->psc_bar[i].addr = base; 585 sc->psc_bar[i].lobits = 0; 586 587 /* Allocate the BAR in the guest I/O or MMIO space */ 588 error = pci_emul_alloc_bar(pi, i, bartype, size); 589 if (error) 590 return (-1); 591 592 /* Use same lobits as physical bar */ 593 uint8_t lobits = passthru_read_config(sc, PCIR_BAR(i), 0x01); 594 if (bartype == PCIBAR_MEM32 || bartype == PCIBAR_MEM64) { 595 lobits &= ~PCIM_BAR_MEM_BASE; 596 } else { 597 lobits &= ~PCIM_BAR_IO_BASE; 598 } 599 sc->psc_bar[i].lobits = lobits; 600 pi->pi_bar[i].lobits = lobits; 601 602 /* 603 * 64-bit BAR takes up two slots so skip the next one. 604 */ 605 if (bartype == PCIBAR_MEM64) { 606 i++; 607 assert(i <= PCI_BARMAX); 608 sc->psc_bar[i].type = PCIBAR_MEMHI64; 609 } 610 } 611 return (0); 612 } 613 614 static int 615 cfginit(struct vmctx *ctx, struct passthru_softc *sc) 616 { 617 int error; 618 struct pci_devinst *pi = sc->psc_pi; 619 uint16_t cmd; 620 uint8_t intline, intpin; 621 622 /* 623 * Copy physical PCI header to virtual config space. COMMAND, 624 * INTLINE and INTPIN shouldn't be aligned with their physical value 625 * and they are already set by pci_emul_init(). 626 */ 627 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 628 intline = pci_get_cfgdata8(pi, PCIR_INTLINE); 629 intpin = pci_get_cfgdata8(pi, PCIR_INTPIN); 630 for (int i = 0; i <= PCIR_MAXLAT; i += 4) { 631 #ifdef __FreeBSD__ 632 pci_set_cfgdata32(pi, i, read_config(&sc->psc_sel, i, 4)); 633 #else 634 pci_set_cfgdata32(pi, i, passthru_read_config(sc, i, 4)); 635 #endif 636 } 637 638 pci_set_cfgdata16(pi, PCIR_COMMAND, cmd); 639 pci_set_cfgdata8(pi, PCIR_INTLINE, intline); 640 pci_set_cfgdata8(pi, PCIR_INTPIN, intpin); 641 642 if (cfginitmsi(sc) != 0) { 643 warnx("failed to initialize MSI for PCI %d", sc->pptfd); 644 return (-1); 645 } 646 647 if (cfginitbar(ctx, sc) != 0) { 648 warnx("failed to initialize BARs for PCI %d", sc->pptfd); 649 return (-1); 650 } 651 652 if (pci_msix_table_bar(pi) >= 0) { 653 error = init_msix_table(ctx, sc); 654 if (error != 0) { 655 warnx("failed to initialize MSI-X table for PCI %d", 656 sc->pptfd); 657 goto done; 658 } 659 } 660 661 /* Emulate most PCI header register. */ 662 if ((error = set_pcir_handler(sc, 0, PCIR_MAXLAT + 1, 663 passthru_cfgread_emulate, passthru_cfgwrite_emulate)) != 0) 664 goto done; 665 666 /* Allow access to the physical status register. */ 667 if ((error = set_pcir_handler(sc, PCIR_COMMAND, 0x04, NULL, NULL)) != 0) 668 goto done; 669 670 error = 0; /* success */ 671 done: 672 return (error); 673 } 674 675 int 676 set_pcir_handler(struct passthru_softc *sc, int reg, int len, 677 cfgread_handler rhandler, cfgwrite_handler whandler) 678 { 679 if (reg > PCI_REGMAX || reg + len > PCI_REGMAX + 1) 680 return (-1); 681 682 for (int i = reg; i < reg + len; ++i) { 683 assert(sc->psc_pcir_rhandler[i] == NULL || rhandler == NULL); 684 assert(sc->psc_pcir_whandler[i] == NULL || whandler == NULL); 685 sc->psc_pcir_rhandler[i] = rhandler; 686 sc->psc_pcir_whandler[i] = whandler; 687 } 688 689 return (0); 690 } 691 692 static int 693 passthru_legacy_config(nvlist_t *nvl, const char *opt) 694 { 695 char *config, *name, *tofree, *value; 696 697 if (opt == NULL) 698 return (0); 699 700 config = tofree = strdup(opt); 701 while ((name = strsep(&config, ",")) != NULL) { 702 value = strchr(name, '='); 703 if (value != NULL) { 704 *value++ = '\0'; 705 set_config_value_node(nvl, name, value); 706 } else { 707 if (strncmp(name, "/dev/ppt", 8) != 0) { 708 EPRINTLN("passthru: invalid path \"%s\"", name); 709 free(tofree); 710 return (-1); 711 } 712 set_config_value_node(nvl, "path", name); 713 } 714 } 715 free(tofree); 716 return (0); 717 } 718 719 static int 720 passthru_init_rom(struct vmctx *const ctx __unused, 721 struct passthru_softc *const sc, const char *const romfile) 722 { 723 if (romfile == NULL) { 724 return (0); 725 } 726 727 const int fd = open(romfile, O_RDONLY); 728 if (fd < 0) { 729 warnx("%s: can't open romfile \"%s\"", __func__, romfile); 730 return (-1); 731 } 732 733 struct stat sbuf; 734 if (fstat(fd, &sbuf) < 0) { 735 warnx("%s: can't fstat romfile \"%s\"", __func__, romfile); 736 close(fd); 737 return (-1); 738 } 739 const uint64_t rom_size = sbuf.st_size; 740 741 void *const rom_data = mmap(NULL, rom_size, PROT_READ, MAP_SHARED, fd, 742 0); 743 if (rom_data == MAP_FAILED) { 744 warnx("%s: unable to mmap romfile \"%s\" (%d)", __func__, 745 romfile, errno); 746 close(fd); 747 return (-1); 748 } 749 750 void *rom_addr; 751 int error = pci_emul_alloc_rom(sc->psc_pi, rom_size, &rom_addr); 752 if (error) { 753 warnx("%s: failed to alloc rom segment", __func__); 754 munmap(rom_data, rom_size); 755 close(fd); 756 return (error); 757 } 758 memcpy(rom_addr, rom_data, rom_size); 759 760 sc->psc_bar[PCI_ROM_IDX].type = PCIBAR_ROM; 761 sc->psc_bar[PCI_ROM_IDX].addr = (uint64_t)rom_addr; 762 sc->psc_bar[PCI_ROM_IDX].size = rom_size; 763 764 munmap(rom_data, rom_size); 765 close(fd); 766 767 return (0); 768 } 769 770 static int 771 passthru_init(struct pci_devinst *pi, nvlist_t *nvl) 772 { 773 int error, memflags, pptfd; 774 struct passthru_softc *sc; 775 const char *path; 776 struct vmctx *ctx = pi->pi_vmctx; 777 778 pptfd = -1; 779 sc = NULL; 780 error = 1; 781 782 memflags = vm_get_memflags(ctx); 783 if (!(memflags & VM_MEM_F_WIRED)) { 784 warnx("passthru requires guest memory to be wired"); 785 goto done; 786 } 787 788 path = get_config_value_node(nvl, "path"); 789 if (path == NULL || passthru_dev_open(path, &pptfd) != 0) { 790 warnx("invalid passthru options"); 791 goto done; 792 } 793 794 if (vm_assign_pptdev(ctx, pptfd) != 0) { 795 warnx("PCI device at %d is not using the ppt driver", pptfd); 796 goto done; 797 } 798 799 sc = calloc(1, sizeof(struct passthru_softc)); 800 801 pi->pi_arg = sc; 802 sc->psc_pi = pi; 803 sc->pptfd = pptfd; 804 805 if ((error = vm_get_pptdev_limits(ctx, pptfd, &sc->msi_limit, 806 &sc->msix_limit)) != 0) 807 goto done; 808 809 #ifndef __FreeBSD__ 810 /* 811 * If this function uses legacy interrupt messages, then request one for 812 * the guest in case drivers expect to see it. Note that nothing in the 813 * hypervisor is currently wired up do deliver such an interrupt should 814 * the guest actually rely upon it. 815 */ 816 uint8_t intpin = passthru_read_config(sc, PCIR_INTPIN, 1); 817 if (intpin > 0 && intpin < 5) 818 pci_lintr_request(sc->psc_pi); 819 #endif 820 821 /* initialize config space */ 822 if ((error = cfginit(ctx, sc)) != 0) 823 goto done; 824 825 /* initialize ROM */ 826 if ((error = passthru_init_rom(ctx, sc, 827 get_config_value_node(nvl, "rom"))) != 0) { 828 goto done; 829 } 830 831 done: 832 if (error) { 833 free(sc); 834 if (pptfd != -1) 835 vm_unassign_pptdev(ctx, pptfd); 836 } 837 return (error); 838 } 839 840 static int 841 msicap_access(struct passthru_softc *sc, int coff) 842 { 843 int caplen; 844 845 if (sc->psc_msi.capoff == 0) 846 return (0); 847 848 caplen = msi_caplen(sc->psc_msi.msgctrl); 849 850 if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen) 851 return (1); 852 else 853 return (0); 854 } 855 856 static int 857 msixcap_access(struct passthru_softc *sc, int coff) 858 { 859 if (sc->psc_msix.capoff == 0) 860 return (0); 861 862 return (coff >= sc->psc_msix.capoff && 863 coff < sc->psc_msix.capoff + MSIX_CAPLEN); 864 } 865 866 static int 867 passthru_cfgread_default(struct passthru_softc *sc, 868 struct pci_devinst *pi __unused, int coff, int bytes, uint32_t *rv) 869 { 870 /* 871 * MSI capability is emulated. 872 */ 873 if (msicap_access(sc, coff) || msixcap_access(sc, coff)) 874 return (-1); 875 876 /* 877 * MSI-X is also emulated since a limit on interrupts may be imposed by 878 * the OS, altering the perceived register state. 879 */ 880 if (msixcap_access(sc, coff)) 881 return (-1); 882 883 /* 884 * Emulate the command register. If a single read reads both the 885 * command and status registers, read the status register from the 886 * device's config space. 887 */ 888 if (coff == PCIR_COMMAND) { 889 if (bytes <= 2) 890 return (-1); 891 *rv = passthru_read_config(sc, PCIR_STATUS, 2) << 16 | 892 pci_get_cfgdata16(pi, PCIR_COMMAND); 893 return (0); 894 } 895 896 /* Everything else just read from the device's config space */ 897 *rv = passthru_read_config(sc, coff, bytes); 898 899 return (0); 900 } 901 902 int 903 passthru_cfgread_emulate(struct passthru_softc *sc __unused, 904 struct pci_devinst *pi __unused, int coff __unused, int bytes __unused, 905 uint32_t *rv __unused) 906 { 907 return (-1); 908 } 909 910 static int 911 passthru_cfgread(struct pci_devinst *pi, int coff, int bytes, uint32_t *rv) 912 { 913 struct passthru_softc *sc; 914 915 sc = pi->pi_arg; 916 917 if (sc->psc_pcir_rhandler[coff] != NULL) 918 return (sc->psc_pcir_rhandler[coff](sc, pi, coff, bytes, rv)); 919 920 return (passthru_cfgread_default(sc, pi, coff, bytes, rv)); 921 } 922 923 static int 924 passthru_cfgwrite_default(struct passthru_softc *sc, struct pci_devinst *pi, 925 int coff, int bytes, uint32_t val) 926 { 927 int error, msix_table_entries, i; 928 uint16_t cmd_old; 929 struct vmctx *ctx = pi->pi_vmctx; 930 931 /* 932 * MSI capability is emulated 933 */ 934 if (msicap_access(sc, coff)) { 935 pci_emul_capwrite(pi, coff, bytes, val, sc->psc_msi.capoff, 936 PCIY_MSI); 937 error = vm_setup_pptdev_msi(ctx, sc->pptfd, 938 pi->pi_msi.addr, pi->pi_msi.msg_data, pi->pi_msi.maxmsgnum); 939 if (error != 0) 940 err(1, "vm_setup_pptdev_msi"); 941 return (0); 942 } 943 944 if (msixcap_access(sc, coff)) { 945 pci_emul_capwrite(pi, coff, bytes, val, sc->psc_msix.capoff, 946 PCIY_MSIX); 947 if (pi->pi_msix.enabled) { 948 msix_table_entries = pi->pi_msix.table_count; 949 for (i = 0; i < msix_table_entries; i++) { 950 error = vm_setup_pptdev_msix(ctx, 951 sc->pptfd, i, 952 pi->pi_msix.table[i].addr, 953 pi->pi_msix.table[i].msg_data, 954 pi->pi_msix.table[i].vector_control); 955 956 if (error) 957 err(1, "vm_setup_pptdev_msix"); 958 } 959 } else { 960 error = vm_disable_pptdev_msix(ctx, sc->pptfd); 961 if (error) 962 err(1, "vm_disable_pptdev_msix"); 963 } 964 return (0); 965 } 966 967 /* 968 * The command register is emulated, but the status register 969 * is passed through. 970 */ 971 if (coff == PCIR_COMMAND) { 972 if (bytes <= 2) 973 return (-1); 974 975 /* Update the physical status register. */ 976 passthru_write_config(sc, PCIR_STATUS, 2, val >> 16); 977 978 /* Update the virtual command register. */ 979 cmd_old = pci_get_cfgdata16(pi, PCIR_COMMAND); 980 pci_set_cfgdata16(pi, PCIR_COMMAND, val & 0xffff); 981 pci_emul_cmd_changed(pi, cmd_old); 982 return (0); 983 } 984 985 passthru_write_config(sc, coff, bytes, val); 986 987 return (0); 988 } 989 990 int 991 passthru_cfgwrite_emulate(struct passthru_softc *sc __unused, 992 struct pci_devinst *pi __unused, int coff __unused, int bytes __unused, 993 uint32_t val __unused) 994 { 995 return (-1); 996 } 997 998 static int 999 passthru_cfgwrite(struct pci_devinst *pi, int coff, int bytes, uint32_t val) 1000 { 1001 struct passthru_softc *sc; 1002 1003 sc = pi->pi_arg; 1004 1005 if (sc->psc_pcir_whandler[coff] != NULL) 1006 return (sc->psc_pcir_whandler[coff](sc, pi, coff, bytes, val)); 1007 1008 return (passthru_cfgwrite_default(sc, pi, coff, bytes, val)); 1009 } 1010 1011 static void 1012 passthru_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size, 1013 uint64_t value) 1014 { 1015 struct passthru_softc *sc = pi->pi_arg; 1016 struct vmctx *ctx = pi->pi_vmctx; 1017 1018 if (baridx == pci_msix_table_bar(pi)) { 1019 msix_table_write(ctx, sc, offset, size, value); 1020 } else { 1021 struct ppt_bar_io pbi; 1022 1023 assert(pi->pi_bar[baridx].type == PCIBAR_IO); 1024 1025 pbi.pbi_bar = baridx; 1026 pbi.pbi_width = size; 1027 pbi.pbi_off = offset; 1028 pbi.pbi_data = value; 1029 (void) ioctl(sc->pptfd, PPT_BAR_WRITE, &pbi); 1030 } 1031 } 1032 1033 static uint64_t 1034 passthru_read(struct pci_devinst *pi, int baridx, uint64_t offset, int size) 1035 { 1036 struct passthru_softc *sc = pi->pi_arg; 1037 uint64_t val; 1038 1039 if (baridx == pci_msix_table_bar(pi)) { 1040 val = msix_table_read(sc, offset, size); 1041 } else { 1042 struct ppt_bar_io pbi; 1043 1044 assert(pi->pi_bar[baridx].type == PCIBAR_IO); 1045 1046 pbi.pbi_bar = baridx; 1047 pbi.pbi_width = size; 1048 pbi.pbi_off = offset; 1049 if (ioctl(sc->pptfd, PPT_BAR_READ, &pbi) == 0) { 1050 val = pbi.pbi_data; 1051 } else { 1052 val = 0; 1053 } 1054 } 1055 1056 return (val); 1057 } 1058 1059 static void 1060 passthru_msix_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, 1061 int enabled, uint64_t address) 1062 { 1063 struct passthru_softc *sc; 1064 size_t remaining; 1065 uint32_t table_size, table_offset; 1066 1067 sc = pi->pi_arg; 1068 table_offset = rounddown2(pi->pi_msix.table_offset, 4096); 1069 if (table_offset > 0) { 1070 if (!enabled) { 1071 if (vm_unmap_pptdev_mmio(ctx, sc->pptfd, address, 1072 table_offset) != 0) 1073 warnx("pci_passthru: unmap_pptdev_mmio failed"); 1074 } else { 1075 if (vm_map_pptdev_mmio(ctx, sc->pptfd, address, 1076 table_offset, sc->psc_bar[baridx].addr) != 0) 1077 warnx("pci_passthru: map_pptdev_mmio failed"); 1078 } 1079 } 1080 table_size = pi->pi_msix.table_offset - table_offset; 1081 table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 1082 table_size = roundup2(table_size, 4096); 1083 remaining = pi->pi_bar[baridx].size - table_offset - table_size; 1084 if (remaining > 0) { 1085 address += table_offset + table_size; 1086 if (!enabled) { 1087 if (vm_unmap_pptdev_mmio(ctx, sc->pptfd, address, 1088 remaining) != 0) 1089 warnx("pci_passthru: unmap_pptdev_mmio failed"); 1090 } else { 1091 if (vm_map_pptdev_mmio(ctx, sc->pptfd, address, 1092 remaining, sc->psc_bar[baridx].addr + 1093 table_offset + table_size) != 0) 1094 warnx("pci_passthru: map_pptdev_mmio failed"); 1095 } 1096 } 1097 } 1098 1099 static void 1100 passthru_mmio_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, 1101 int enabled, uint64_t address) 1102 { 1103 struct passthru_softc *sc; 1104 1105 sc = pi->pi_arg; 1106 if (!enabled) { 1107 if (vm_unmap_pptdev_mmio(ctx, sc->pptfd, address, 1108 sc->psc_bar[baridx].size) != 0) 1109 warnx("pci_passthru: unmap_pptdev_mmio failed"); 1110 } else { 1111 if (vm_map_pptdev_mmio(ctx, sc->pptfd, address, 1112 sc->psc_bar[baridx].size, sc->psc_bar[baridx].addr) != 0) 1113 warnx("pci_passthru: map_pptdev_mmio failed"); 1114 } 1115 } 1116 1117 static void 1118 passthru_addr_rom(struct pci_devinst *const pi, const int idx, 1119 const int enabled) 1120 { 1121 const uint64_t addr = pi->pi_bar[idx].addr; 1122 const uint64_t size = pi->pi_bar[idx].size; 1123 1124 if (!enabled) { 1125 if (vm_munmap_memseg(pi->pi_vmctx, addr, size) != 0) { 1126 errx(4, "%s: munmap_memseg @ [%016lx - %016lx] failed", 1127 __func__, addr, addr + size); 1128 } 1129 1130 } else { 1131 if (vm_mmap_memseg(pi->pi_vmctx, addr, VM_PCIROM, 1132 pi->pi_romoffset, size, PROT_READ | PROT_EXEC) != 0) { 1133 errx(4, "%s: mmap_memseg @ [%016lx - %016lx] failed", 1134 __func__, addr, addr + size); 1135 } 1136 } 1137 } 1138 1139 static void 1140 passthru_addr(struct pci_devinst *pi, int baridx, 1141 int enabled, uint64_t address) 1142 { 1143 struct vmctx *ctx = pi->pi_vmctx; 1144 1145 switch (pi->pi_bar[baridx].type) { 1146 case PCIBAR_IO: 1147 /* IO BARs are emulated */ 1148 break; 1149 case PCIBAR_ROM: 1150 passthru_addr_rom(pi, baridx, enabled); 1151 break; 1152 case PCIBAR_MEM32: 1153 case PCIBAR_MEM64: 1154 if (baridx == pci_msix_table_bar(pi)) 1155 passthru_msix_addr(ctx, pi, baridx, enabled, address); 1156 else 1157 passthru_mmio_addr(ctx, pi, baridx, enabled, address); 1158 break; 1159 default: 1160 errx(4, "%s: invalid BAR type %d", __func__, 1161 pi->pi_bar[baridx].type); 1162 } 1163 } 1164 1165 static const struct pci_devemu passthru = { 1166 .pe_emu = "passthru", 1167 .pe_init = passthru_init, 1168 .pe_legacy_config = passthru_legacy_config, 1169 .pe_cfgwrite = passthru_cfgwrite, 1170 .pe_cfgread = passthru_cfgread, 1171 .pe_barwrite = passthru_write, 1172 .pe_barread = passthru_read, 1173 .pe_baraddr = passthru_addr, 1174 }; 1175 PCI_EMUL_SET(passthru); 1176 1177 /* 1178 * This isn't the right place for these functions which, on FreeBSD, can 1179 * read or write from arbitrary devices. They are not supported on illumos; 1180 * not least because bhyve is generally run in a non-global zone which doesn't 1181 * have access to the devinfo tree. 1182 */ 1183 uint32_t 1184 pci_host_read_config(const struct pcisel *sel __unused, long reg __unused, 1185 int width __unused) 1186 { 1187 return (-1); 1188 } 1189 1190 void 1191 pci_host_write_config(const struct pcisel *sel __unused, long reg __unused, 1192 int width __unused, uint32_t data __unused) 1193 { 1194 errx(4, "pci_host_write_config() unimplemented on illumos"); 1195 } 1196