1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #ifndef WITHOUT_CAPSICUM 36 #include <sys/capsicum.h> 37 #endif 38 #include <sys/types.h> 39 #include <sys/mman.h> 40 #include <sys/pciio.h> 41 #include <sys/ioctl.h> 42 43 #include <sys/pci.h> 44 45 #include <dev/io/iodev.h> 46 #include <dev/pci/pcireg.h> 47 48 #include <machine/iodev.h> 49 50 #ifndef WITHOUT_CAPSICUM 51 #include <capsicum_helpers.h> 52 #endif 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <err.h> 57 #include <errno.h> 58 #include <fcntl.h> 59 #include <sysexits.h> 60 #include <unistd.h> 61 62 #include <machine/vmm.h> 63 #include <vmmapi.h> 64 #include <sys/ppt_dev.h> 65 #include "pci_emul.h" 66 #include "mem.h" 67 68 #define LEGACY_SUPPORT 1 69 70 #define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1) 71 #define MSIX_CAPLEN 12 72 73 struct passthru_softc { 74 struct pci_devinst *psc_pi; 75 struct pcibar psc_bar[PCI_BARMAX + 1]; 76 struct { 77 int capoff; 78 int msgctrl; 79 int emulated; 80 } psc_msi; 81 struct { 82 int capoff; 83 } psc_msix; 84 int pptfd; 85 int msi_limit; 86 int msix_limit; 87 }; 88 89 static int 90 msi_caplen(int msgctrl) 91 { 92 int len; 93 94 len = 10; /* minimum length of msi capability */ 95 96 if (msgctrl & PCIM_MSICTRL_64BIT) 97 len += 4; 98 99 #if 0 100 /* 101 * Ignore the 'mask' and 'pending' bits in the MSI capability. 102 * We'll let the guest manipulate them directly. 103 */ 104 if (msgctrl & PCIM_MSICTRL_VECTOR) 105 len += 10; 106 #endif 107 108 return (len); 109 } 110 111 static uint32_t 112 read_config(const struct passthru_softc *sc, long reg, int width) 113 { 114 struct ppt_cfg_io pi; 115 116 pi.pci_off = reg; 117 pi.pci_width = width; 118 119 if (ioctl(sc->pptfd, PPT_CFG_READ, &pi) != 0) { 120 return (0); 121 } 122 return (pi.pci_data); 123 } 124 125 static void 126 write_config(const struct passthru_softc *sc, long reg, int width, 127 uint32_t data) 128 { 129 struct ppt_cfg_io pi; 130 131 pi.pci_off = reg; 132 pi.pci_width = width; 133 pi.pci_data = data; 134 135 (void) ioctl(sc->pptfd, PPT_CFG_WRITE, &pi); 136 } 137 138 static int 139 passthru_get_bar(struct passthru_softc *sc, int bar, enum pcibar_type *type, 140 uint64_t *base, uint64_t *size) 141 { 142 struct ppt_bar_query pb; 143 144 pb.pbq_baridx = bar; 145 146 if (ioctl(sc->pptfd, PPT_BAR_QUERY, &pb) != 0) { 147 return (-1); 148 } 149 150 switch (pb.pbq_type) { 151 case PCI_ADDR_IO: 152 *type = PCIBAR_IO; 153 break; 154 case PCI_ADDR_MEM32: 155 *type = PCIBAR_MEM32; 156 break; 157 case PCI_ADDR_MEM64: 158 *type = PCIBAR_MEM64; 159 break; 160 default: 161 err(1, "unrecognized BAR type: %u\n", pb.pbq_type); 162 break; 163 } 164 165 *base = pb.pbq_base; 166 *size = pb.pbq_size; 167 return (0); 168 } 169 170 static int 171 passthru_dev_open(const char *path, int *pptfdp) 172 { 173 int pptfd; 174 175 if ((pptfd = open(path, O_RDWR)) < 0) { 176 return (errno); 177 } 178 179 /* XXX: verify fd with ioctl? */ 180 *pptfdp = pptfd; 181 return (0); 182 } 183 184 #ifdef LEGACY_SUPPORT 185 static int 186 passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) 187 { 188 int capoff, i; 189 struct msicap msicap; 190 u_char *capdata; 191 192 pci_populate_msicap(&msicap, msgnum, nextptr); 193 194 /* 195 * XXX 196 * Copy the msi capability structure in the last 16 bytes of the 197 * config space. This is wrong because it could shadow something 198 * useful to the device. 199 */ 200 capoff = 256 - roundup(sizeof(msicap), 4); 201 capdata = (u_char *)&msicap; 202 for (i = 0; i < sizeof(msicap); i++) 203 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 204 205 return (capoff); 206 } 207 #endif /* LEGACY_SUPPORT */ 208 209 static void 210 passthru_intr_limit(struct passthru_softc *sc, struct msixcap *msixcap) 211 { 212 struct pci_devinst *pi = sc->psc_pi; 213 int off; 214 215 /* Reduce the number of MSI vectors if higher than OS limit */ 216 if ((off = sc->psc_msi.capoff) != 0 && sc->msi_limit != -1) { 217 int msi_limit, mmc; 218 219 msi_limit = 220 sc->msi_limit > 16 ? PCIM_MSICTRL_MMC_32 : 221 sc->msi_limit > 8 ? PCIM_MSICTRL_MMC_16 : 222 sc->msi_limit > 4 ? PCIM_MSICTRL_MMC_8 : 223 sc->msi_limit > 2 ? PCIM_MSICTRL_MMC_4 : 224 sc->msi_limit > 1 ? PCIM_MSICTRL_MMC_2 : 225 PCIM_MSICTRL_MMC_1; 226 mmc = sc->psc_msi.msgctrl & PCIM_MSICTRL_MMC_MASK; 227 228 if (mmc > msi_limit) { 229 sc->psc_msi.msgctrl &= ~PCIM_MSICTRL_MMC_MASK; 230 sc->psc_msi.msgctrl |= msi_limit; 231 pci_set_cfgdata16(pi, off + 2, sc->psc_msi.msgctrl); 232 } 233 } 234 235 /* Reduce the number of MSI-X vectors if higher than OS limit */ 236 if ((off = sc->psc_msix.capoff) != 0 && sc->msix_limit != -1) { 237 if (MSIX_TABLE_COUNT(msixcap->msgctrl) > sc->msix_limit) { 238 msixcap->msgctrl &= ~PCIM_MSIXCTRL_TABLE_SIZE; 239 msixcap->msgctrl |= sc->msix_limit - 1; 240 pci_set_cfgdata16(pi, off + 2, msixcap->msgctrl); 241 } 242 } 243 } 244 245 static int 246 cfginitmsi(struct passthru_softc *sc) 247 { 248 int i, ptr, capptr, cap, sts, caplen, table_size; 249 uint32_t u32; 250 struct pci_devinst *pi = sc->psc_pi; 251 struct msixcap msixcap; 252 uint32_t *msixcap_ptr; 253 254 /* 255 * Parse the capabilities and cache the location of the MSI 256 * and MSI-X capabilities. 257 */ 258 sts = read_config(sc, PCIR_STATUS, 2); 259 if (sts & PCIM_STATUS_CAPPRESENT) { 260 ptr = read_config(sc, PCIR_CAP_PTR, 1); 261 while (ptr != 0 && ptr != 0xff) { 262 cap = read_config(sc, ptr + PCICAP_ID, 1); 263 if (cap == PCIY_MSI) { 264 /* 265 * Copy the MSI capability into the config 266 * space of the emulated pci device 267 */ 268 sc->psc_msi.capoff = ptr; 269 sc->psc_msi.msgctrl = read_config(sc, 270 ptr + 2, 2); 271 sc->psc_msi.emulated = 0; 272 caplen = msi_caplen(sc->psc_msi.msgctrl); 273 capptr = ptr; 274 while (caplen > 0) { 275 u32 = read_config(sc, capptr, 4); 276 pci_set_cfgdata32(pi, capptr, u32); 277 caplen -= 4; 278 capptr += 4; 279 } 280 } else if (cap == PCIY_MSIX) { 281 /* 282 * Copy the MSI-X capability 283 */ 284 sc->psc_msix.capoff = ptr; 285 caplen = 12; 286 msixcap_ptr = (uint32_t*) &msixcap; 287 capptr = ptr; 288 while (caplen > 0) { 289 u32 = read_config(sc, capptr, 4); 290 *msixcap_ptr = u32; 291 pci_set_cfgdata32(pi, capptr, u32); 292 caplen -= 4; 293 capptr += 4; 294 msixcap_ptr++; 295 } 296 } 297 ptr = read_config(sc, ptr + PCICAP_NEXTPTR, 1); 298 } 299 } 300 301 passthru_intr_limit(sc, &msixcap); 302 303 if (sc->psc_msix.capoff != 0) { 304 pi->pi_msix.pba_bar = 305 msixcap.pba_info & PCIM_MSIX_BIR_MASK; 306 pi->pi_msix.pba_offset = 307 msixcap.pba_info & ~PCIM_MSIX_BIR_MASK; 308 pi->pi_msix.table_bar = 309 msixcap.table_info & PCIM_MSIX_BIR_MASK; 310 pi->pi_msix.table_offset = 311 msixcap.table_info & ~PCIM_MSIX_BIR_MASK; 312 pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); 313 pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count); 314 315 /* Allocate the emulated MSI-X table array */ 316 table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 317 pi->pi_msix.table = calloc(1, table_size); 318 319 /* Mask all table entries */ 320 for (i = 0; i < pi->pi_msix.table_count; i++) { 321 pi->pi_msix.table[i].vector_control |= 322 PCIM_MSIX_VCTRL_MASK; 323 } 324 } 325 326 #ifdef LEGACY_SUPPORT 327 /* 328 * If the passthrough device does not support MSI then craft a 329 * MSI capability for it. We link the new MSI capability at the 330 * head of the list of capabilities. 331 */ 332 if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) { 333 int origptr, msiptr; 334 origptr = read_config(sc, PCIR_CAP_PTR, 1); 335 msiptr = passthru_add_msicap(pi, 1, origptr); 336 sc->psc_msi.capoff = msiptr; 337 sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2); 338 sc->psc_msi.emulated = 1; 339 pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr); 340 } 341 #endif 342 343 /* Make sure one of the capabilities is present */ 344 if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) { 345 return (-1); 346 } else { 347 return (0); 348 } 349 } 350 351 static uint64_t 352 passthru_msix_table_read(struct passthru_softc *sc, uint64_t offset, int size) 353 { 354 struct pci_devinst *pi; 355 struct msix_table_entry *entry; 356 uint8_t *src8; 357 uint16_t *src16; 358 uint32_t *src32; 359 uint64_t *src64; 360 uint64_t data; 361 size_t entry_offset; 362 int index; 363 364 pi = sc->psc_pi; 365 if (offset >= pi->pi_msix.pba_offset && 366 offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 367 switch(size) { 368 case 1: 369 src8 = (uint8_t *)(pi->pi_msix.pba_page + offset - 370 pi->pi_msix.pba_page_offset); 371 data = *src8; 372 break; 373 case 2: 374 src16 = (uint16_t *)(pi->pi_msix.pba_page + offset - 375 pi->pi_msix.pba_page_offset); 376 data = *src16; 377 break; 378 case 4: 379 src32 = (uint32_t *)(pi->pi_msix.pba_page + offset - 380 pi->pi_msix.pba_page_offset); 381 data = *src32; 382 break; 383 case 8: 384 src64 = (uint64_t *)(pi->pi_msix.pba_page + offset - 385 pi->pi_msix.pba_page_offset); 386 data = *src64; 387 break; 388 default: 389 return (-1); 390 } 391 return (data); 392 } 393 394 if (offset < pi->pi_msix.table_offset) 395 return (-1); 396 397 offset -= pi->pi_msix.table_offset; 398 index = offset / MSIX_TABLE_ENTRY_SIZE; 399 if (index >= pi->pi_msix.table_count) 400 return (-1); 401 402 entry = &pi->pi_msix.table[index]; 403 entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 404 405 switch(size) { 406 case 1: 407 src8 = (uint8_t *)((void *)entry + entry_offset); 408 data = *src8; 409 break; 410 case 2: 411 src16 = (uint16_t *)((void *)entry + entry_offset); 412 data = *src16; 413 break; 414 case 4: 415 src32 = (uint32_t *)((void *)entry + entry_offset); 416 data = *src32; 417 break; 418 case 8: 419 src64 = (uint64_t *)((void *)entry + entry_offset); 420 data = *src64; 421 break; 422 default: 423 return (-1); 424 } 425 426 return (data); 427 } 428 429 static void 430 passthru_msix_table_write(struct vmctx *ctx, int vcpu, 431 struct passthru_softc *sc, uint64_t offset, int size, uint64_t data) 432 { 433 struct pci_devinst *pi; 434 struct msix_table_entry *entry; 435 uint8_t *dest8; 436 uint16_t *dest16; 437 uint32_t *dest32; 438 uint64_t *dest64; 439 size_t entry_offset; 440 uint32_t vector_control; 441 int index; 442 443 pi = sc->psc_pi; 444 if (offset >= pi->pi_msix.pba_offset && 445 offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 446 switch(size) { 447 case 1: 448 dest8 = (uint8_t *)(pi->pi_msix.pba_page + offset - 449 pi->pi_msix.pba_page_offset); 450 *dest8 = data; 451 break; 452 case 2: 453 dest16 = (uint16_t *)(pi->pi_msix.pba_page + offset - 454 pi->pi_msix.pba_page_offset); 455 *dest16 = data; 456 break; 457 case 4: 458 dest32 = (uint32_t *)(pi->pi_msix.pba_page + offset - 459 pi->pi_msix.pba_page_offset); 460 *dest32 = data; 461 break; 462 case 8: 463 dest64 = (uint64_t *)(pi->pi_msix.pba_page + offset - 464 pi->pi_msix.pba_page_offset); 465 *dest64 = data; 466 break; 467 default: 468 break; 469 } 470 return; 471 } 472 473 if (offset < pi->pi_msix.table_offset) 474 return; 475 476 offset -= pi->pi_msix.table_offset; 477 index = offset / MSIX_TABLE_ENTRY_SIZE; 478 if (index >= pi->pi_msix.table_count) 479 return; 480 481 entry = &pi->pi_msix.table[index]; 482 entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 483 484 /* Only 4 byte naturally-aligned writes are supported */ 485 assert(size == 4); 486 assert(entry_offset % 4 == 0); 487 488 vector_control = entry->vector_control; 489 dest32 = (uint32_t *)((void *)entry + entry_offset); 490 *dest32 = data; 491 /* If MSI-X hasn't been enabled, do nothing */ 492 if (pi->pi_msix.enabled) { 493 /* If the entry is masked, don't set it up */ 494 if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || 495 (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 496 (void) vm_setup_pptdev_msix(ctx, vcpu, sc->pptfd, 497 index, entry->addr, entry->msg_data, 498 entry->vector_control); 499 } 500 } 501 } 502 503 static int 504 init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) 505 { 506 int error, idx; 507 size_t len, remaining; 508 uint32_t table_size, table_offset; 509 uint32_t pba_size, pba_offset; 510 vm_paddr_t start; 511 struct pci_devinst *pi = sc->psc_pi; 512 513 assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0); 514 515 /* 516 * If the MSI-X table BAR maps memory intended for 517 * other uses, it is at least assured that the table 518 * either resides in its own page within the region, 519 * or it resides in a page shared with only the PBA. 520 */ 521 table_offset = rounddown2(pi->pi_msix.table_offset, 4096); 522 523 table_size = pi->pi_msix.table_offset - table_offset; 524 table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 525 table_size = roundup2(table_size, 4096); 526 527 idx = pi->pi_msix.table_bar; 528 start = pi->pi_bar[idx].addr; 529 remaining = pi->pi_bar[idx].size; 530 531 if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar) { 532 pba_offset = pi->pi_msix.pba_offset; 533 pba_size = pi->pi_msix.pba_size; 534 if (pba_offset >= table_offset + table_size || 535 table_offset >= pba_offset + pba_size) { 536 /* 537 * If the PBA does not share a page with the MSI-x 538 * tables, no PBA emulation is required. 539 */ 540 pi->pi_msix.pba_page = NULL; 541 pi->pi_msix.pba_page_offset = 0; 542 } else { 543 /* 544 * The PBA overlaps with either the first or last 545 * page of the MSI-X table region. Map the 546 * appropriate page. 547 */ 548 if (pba_offset <= table_offset) 549 pi->pi_msix.pba_page_offset = table_offset; 550 else 551 pi->pi_msix.pba_page_offset = table_offset + 552 table_size - 4096; 553 pi->pi_msix.pba_page = mmap(NULL, 4096, PROT_READ | 554 PROT_WRITE, MAP_SHARED, sc->pptfd, 555 pi->pi_msix.pba_page_offset); 556 if (pi->pi_msix.pba_page == MAP_FAILED) { 557 warn("Failed to map PBA page for MSI-X on %d", 558 sc->pptfd); 559 return (-1); 560 } 561 } 562 } 563 564 /* Map everything before the MSI-X table */ 565 if (table_offset > 0) { 566 len = table_offset; 567 error = vm_map_pptdev_mmio(ctx, sc->pptfd, start, len, base); 568 if (error) 569 return (error); 570 571 base += len; 572 start += len; 573 remaining -= len; 574 } 575 576 /* Skip the MSI-X table */ 577 base += table_size; 578 start += table_size; 579 remaining -= table_size; 580 581 /* Map everything beyond the end of the MSI-X table */ 582 if (remaining > 0) { 583 len = remaining; 584 error = vm_map_pptdev_mmio(ctx, sc->pptfd, start, len, base); 585 if (error) 586 return (error); 587 } 588 589 return (0); 590 } 591 592 static int 593 cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) 594 { 595 struct pci_devinst *pi = sc->psc_pi; 596 uint_t i; 597 598 /* 599 * Initialize BAR registers 600 */ 601 for (i = 0; i <= PCI_BARMAX; i++) { 602 enum pcibar_type bartype; 603 uint64_t base, size; 604 int error; 605 606 if (passthru_get_bar(sc, i, &bartype, &base, &size) != 0) { 607 continue; 608 } 609 610 if (bartype != PCIBAR_IO) { 611 if (((base | size) & PAGE_MASK) != 0) { 612 warnx("passthru device %d BAR %d: " 613 "base %#lx or size %#lx not page aligned\n", 614 sc->pptfd, i, base, size); 615 return (-1); 616 } 617 } 618 619 /* Cache information about the "real" BAR */ 620 sc->psc_bar[i].type = bartype; 621 sc->psc_bar[i].size = size; 622 sc->psc_bar[i].addr = base; 623 624 /* Allocate the BAR in the guest I/O or MMIO space */ 625 error = pci_emul_alloc_pbar(pi, i, base, bartype, size); 626 if (error) 627 return (-1); 628 629 /* The MSI-X table needs special handling */ 630 if (i == pci_msix_table_bar(pi)) { 631 error = init_msix_table(ctx, sc, base); 632 if (error) 633 return (-1); 634 } else if (bartype != PCIBAR_IO) { 635 /* Map the physical BAR in the guest MMIO space */ 636 error = vm_map_pptdev_mmio(ctx, sc->pptfd, 637 pi->pi_bar[i].addr, pi->pi_bar[i].size, base); 638 if (error) 639 return (-1); 640 } 641 642 /* 643 * 64-bit BAR takes up two slots so skip the next one. 644 */ 645 if (bartype == PCIBAR_MEM64) { 646 i++; 647 assert(i <= PCI_BARMAX); 648 sc->psc_bar[i].type = PCIBAR_MEMHI64; 649 } 650 } 651 return (0); 652 } 653 654 static int 655 cfginit(struct vmctx *ctx, struct passthru_softc *sc) 656 { 657 struct pci_devinst *pi = sc->psc_pi; 658 659 if (cfginitmsi(sc) != 0) { 660 warnx("failed to initialize MSI for PCI %d", sc->pptfd); 661 return (-1); 662 } 663 664 if (cfginitbar(ctx, sc) != 0) { 665 warnx("failed to initialize BARs for PCI %d", sc->pptfd); 666 return (-1); 667 } 668 669 pci_set_cfgdata16(pi, PCIR_COMMAND, read_config(sc, PCIR_COMMAND, 2)); 670 671 return (0); 672 } 673 674 static int 675 passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 676 { 677 int error, memflags, pptfd; 678 struct passthru_softc *sc; 679 680 sc = NULL; 681 error = 1; 682 683 memflags = vm_get_memflags(ctx); 684 if (!(memflags & VM_MEM_F_WIRED)) { 685 warnx("passthru requires guest memory to be wired"); 686 goto done; 687 } 688 689 if (opts == NULL || passthru_dev_open(opts, &pptfd) != 0) { 690 warnx("invalid passthru options"); 691 goto done; 692 } 693 694 if (vm_assign_pptdev(ctx, pptfd) != 0) { 695 warnx("PCI device at %d is not using the ppt driver", pptfd); 696 goto done; 697 } 698 699 sc = calloc(1, sizeof(struct passthru_softc)); 700 701 pi->pi_arg = sc; 702 sc->psc_pi = pi; 703 sc->pptfd = pptfd; 704 705 if ((error = vm_get_pptdev_limits(ctx, pptfd, &sc->msi_limit, 706 &sc->msix_limit)) != 0) 707 goto done; 708 709 /* initialize config space */ 710 if ((error = cfginit(ctx, sc)) != 0) 711 goto done; 712 713 error = 0; /* success */ 714 done: 715 if (error) { 716 free(sc); 717 vm_unassign_pptdev(ctx, pptfd); 718 } 719 return (error); 720 } 721 722 static int 723 bar_access(int coff) 724 { 725 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) 726 return (1); 727 else 728 return (0); 729 } 730 731 static int 732 msicap_access(struct passthru_softc *sc, int coff) 733 { 734 int caplen; 735 736 if (sc->psc_msi.capoff == 0) 737 return (0); 738 739 caplen = msi_caplen(sc->psc_msi.msgctrl); 740 741 if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen) 742 return (1); 743 else 744 return (0); 745 } 746 747 static int 748 msixcap_access(struct passthru_softc *sc, int coff) 749 { 750 if (sc->psc_msix.capoff == 0) 751 return (0); 752 753 return (coff >= sc->psc_msix.capoff && 754 coff < sc->psc_msix.capoff + MSIX_CAPLEN); 755 } 756 757 static int 758 passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 759 int coff, int bytes, uint32_t *rv) 760 { 761 struct passthru_softc *sc; 762 763 sc = pi->pi_arg; 764 765 /* 766 * PCI BARs and MSI capability is emulated. 767 */ 768 if (bar_access(coff) || msicap_access(sc, coff)) 769 return (-1); 770 771 /* 772 * MSI-X is also emulated since a limit on interrupts may be imposed by 773 * the OS, altering the perceived register state. 774 */ 775 if (msixcap_access(sc, coff)) 776 return (-1); 777 778 #ifdef LEGACY_SUPPORT 779 /* 780 * Emulate PCIR_CAP_PTR if this device does not support MSI capability 781 * natively. 782 */ 783 if (sc->psc_msi.emulated) { 784 if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4) 785 return (-1); 786 } 787 #endif 788 789 /* 790 * Emulate the command register. If a single read reads both the 791 * command and status registers, read the status register from the 792 * device's config space. 793 */ 794 if (coff == PCIR_COMMAND) { 795 if (bytes <= 2) 796 return (-1); 797 *rv = pci_get_cfgdata16(pi, PCIR_COMMAND) << 16 | 798 read_config(sc, PCIR_STATUS, 2); 799 return (0); 800 } 801 802 /* Everything else just read from the device's config space */ 803 *rv = read_config(sc, coff, bytes); 804 805 return (0); 806 } 807 808 static int 809 passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 810 int coff, int bytes, uint32_t val) 811 { 812 int error, msix_table_entries, i; 813 struct passthru_softc *sc; 814 uint16_t cmd_old; 815 816 sc = pi->pi_arg; 817 818 /* 819 * PCI BARs are emulated 820 */ 821 if (bar_access(coff)) 822 return (-1); 823 824 /* 825 * MSI capability is emulated 826 */ 827 if (msicap_access(sc, coff)) { 828 pci_emul_capwrite(pi, coff, bytes, val, sc->psc_msi.capoff, 829 PCIY_MSI); 830 error = vm_setup_pptdev_msi(ctx, vcpu, sc->pptfd, 831 pi->pi_msi.addr, pi->pi_msi.msg_data, pi->pi_msi.maxmsgnum); 832 if (error != 0) 833 err(1, "vm_setup_pptdev_msi"); 834 return (0); 835 } 836 837 if (msixcap_access(sc, coff)) { 838 pci_emul_capwrite(pi, coff, bytes, val, sc->psc_msix.capoff, 839 PCIY_MSIX); 840 if (pi->pi_msix.enabled) { 841 msix_table_entries = pi->pi_msix.table_count; 842 for (i = 0; i < msix_table_entries; i++) { 843 error = vm_setup_pptdev_msix(ctx, vcpu, 844 sc->pptfd, i, 845 pi->pi_msix.table[i].addr, 846 pi->pi_msix.table[i].msg_data, 847 pi->pi_msix.table[i].vector_control); 848 849 if (error) 850 err(1, "vm_setup_pptdev_msix"); 851 } 852 } 853 return (0); 854 } 855 856 #ifdef LEGACY_SUPPORT 857 /* 858 * If this device does not support MSI natively then we cannot let 859 * the guest disable legacy interrupts from the device. It is the 860 * legacy interrupt that is triggering the virtual MSI to the guest. 861 */ 862 if (sc->psc_msi.emulated && pci_msi_enabled(pi)) { 863 if (coff == PCIR_COMMAND && bytes == 2) 864 val &= ~PCIM_CMD_INTxDIS; 865 } 866 #endif 867 868 write_config(sc, coff, bytes, val); 869 if (coff == PCIR_COMMAND) { 870 cmd_old = pci_get_cfgdata16(pi, PCIR_COMMAND); 871 if (bytes == 1) 872 pci_set_cfgdata8(pi, PCIR_COMMAND, val); 873 else if (bytes == 2) 874 pci_set_cfgdata16(pi, PCIR_COMMAND, val); 875 pci_emul_cmd_changed(pi, cmd_old); 876 } 877 878 return (0); 879 } 880 881 static void 882 passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 883 uint64_t offset, int size, uint64_t value) 884 { 885 struct passthru_softc *sc = pi->pi_arg; 886 887 if (baridx == pci_msix_table_bar(pi)) { 888 passthru_msix_table_write(ctx, vcpu, sc, offset, size, value); 889 } else { 890 struct ppt_bar_io pbi; 891 892 assert(pi->pi_bar[baridx].type == PCIBAR_IO); 893 894 pbi.pbi_bar = baridx; 895 pbi.pbi_width = size; 896 pbi.pbi_off = offset; 897 pbi.pbi_data = value; 898 (void) ioctl(sc->pptfd, PPT_BAR_WRITE, &pbi); 899 } 900 } 901 902 static uint64_t 903 passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 904 uint64_t offset, int size) 905 { 906 struct passthru_softc *sc = pi->pi_arg; 907 uint64_t val; 908 909 if (baridx == pci_msix_table_bar(pi)) { 910 val = passthru_msix_table_read(sc, offset, size); 911 } else { 912 struct ppt_bar_io pbi; 913 914 assert(pi->pi_bar[baridx].type == PCIBAR_IO); 915 916 pbi.pbi_bar = baridx; 917 pbi.pbi_width = size; 918 pbi.pbi_off = offset; 919 if (ioctl(sc->pptfd, PPT_BAR_READ, &pbi) == 0) { 920 val = pbi.pbi_data; 921 } else { 922 val = 0; 923 } 924 } 925 926 return (val); 927 } 928 929 struct pci_devemu passthru = { 930 .pe_emu = "passthru", 931 .pe_init = passthru_init, 932 .pe_cfgwrite = passthru_cfgwrite, 933 .pe_cfgread = passthru_cfgread, 934 .pe_barwrite = passthru_write, 935 .pe_barread = passthru_read, 936 }; 937 PCI_EMUL_SET(passthru); 938