1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #ifndef WITHOUT_CAPSICUM 36 #include <sys/capsicum.h> 37 #endif 38 #include <sys/types.h> 39 #include <sys/mman.h> 40 #include <sys/pciio.h> 41 #include <sys/ioctl.h> 42 43 #include <dev/io/iodev.h> 44 #include <dev/pci/pcireg.h> 45 46 #include <machine/iodev.h> 47 48 #include <stdio.h> 49 #include <stdlib.h> 50 #include <string.h> 51 #include <err.h> 52 #include <errno.h> 53 #include <fcntl.h> 54 #include <sysexits.h> 55 #include <unistd.h> 56 57 #include <machine/vmm.h> 58 #include <vmmapi.h> 59 #include "pci_emul.h" 60 #include "mem.h" 61 62 #ifndef _PATH_DEVPCI 63 #define _PATH_DEVPCI "/dev/pci" 64 #endif 65 66 #ifndef _PATH_DEVIO 67 #define _PATH_DEVIO "/dev/io" 68 #endif 69 70 #ifndef _PATH_MEM 71 #define _PATH_MEM "/dev/mem" 72 #endif 73 74 #define LEGACY_SUPPORT 1 75 76 #define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1) 77 #define MSIX_CAPLEN 12 78 79 static int pcifd = -1; 80 static int iofd = -1; 81 static int memfd = -1; 82 83 struct passthru_softc { 84 struct pci_devinst *psc_pi; 85 struct pcibar psc_bar[PCI_BARMAX + 1]; 86 struct { 87 int capoff; 88 int msgctrl; 89 int emulated; 90 } psc_msi; 91 struct { 92 int capoff; 93 } psc_msix; 94 struct pcisel psc_sel; 95 }; 96 97 static int 98 msi_caplen(int msgctrl) 99 { 100 int len; 101 102 len = 10; /* minimum length of msi capability */ 103 104 if (msgctrl & PCIM_MSICTRL_64BIT) 105 len += 4; 106 107 #if 0 108 /* 109 * Ignore the 'mask' and 'pending' bits in the MSI capability. 110 * We'll let the guest manipulate them directly. 111 */ 112 if (msgctrl & PCIM_MSICTRL_VECTOR) 113 len += 10; 114 #endif 115 116 return (len); 117 } 118 119 static uint32_t 120 read_config(const struct pcisel *sel, long reg, int width) 121 { 122 struct pci_io pi; 123 124 bzero(&pi, sizeof(pi)); 125 pi.pi_sel = *sel; 126 pi.pi_reg = reg; 127 pi.pi_width = width; 128 129 if (ioctl(pcifd, PCIOCREAD, &pi) < 0) 130 return (0); /* XXX */ 131 else 132 return (pi.pi_data); 133 } 134 135 static void 136 write_config(const struct pcisel *sel, long reg, int width, uint32_t data) 137 { 138 struct pci_io pi; 139 140 bzero(&pi, sizeof(pi)); 141 pi.pi_sel = *sel; 142 pi.pi_reg = reg; 143 pi.pi_width = width; 144 pi.pi_data = data; 145 146 (void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */ 147 } 148 149 #ifdef LEGACY_SUPPORT 150 static int 151 passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) 152 { 153 int capoff, i; 154 struct msicap msicap; 155 u_char *capdata; 156 157 pci_populate_msicap(&msicap, msgnum, nextptr); 158 159 /* 160 * XXX 161 * Copy the msi capability structure in the last 16 bytes of the 162 * config space. This is wrong because it could shadow something 163 * useful to the device. 164 */ 165 capoff = 256 - roundup(sizeof(msicap), 4); 166 capdata = (u_char *)&msicap; 167 for (i = 0; i < sizeof(msicap); i++) 168 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 169 170 return (capoff); 171 } 172 #endif /* LEGACY_SUPPORT */ 173 174 static int 175 cfginitmsi(struct passthru_softc *sc) 176 { 177 int i, ptr, capptr, cap, sts, caplen, table_size; 178 uint32_t u32; 179 struct pcisel sel; 180 struct pci_devinst *pi; 181 struct msixcap msixcap; 182 uint32_t *msixcap_ptr; 183 184 pi = sc->psc_pi; 185 sel = sc->psc_sel; 186 187 /* 188 * Parse the capabilities and cache the location of the MSI 189 * and MSI-X capabilities. 190 */ 191 sts = read_config(&sel, PCIR_STATUS, 2); 192 if (sts & PCIM_STATUS_CAPPRESENT) { 193 ptr = read_config(&sel, PCIR_CAP_PTR, 1); 194 while (ptr != 0 && ptr != 0xff) { 195 cap = read_config(&sel, ptr + PCICAP_ID, 1); 196 if (cap == PCIY_MSI) { 197 /* 198 * Copy the MSI capability into the config 199 * space of the emulated pci device 200 */ 201 sc->psc_msi.capoff = ptr; 202 sc->psc_msi.msgctrl = read_config(&sel, 203 ptr + 2, 2); 204 sc->psc_msi.emulated = 0; 205 caplen = msi_caplen(sc->psc_msi.msgctrl); 206 capptr = ptr; 207 while (caplen > 0) { 208 u32 = read_config(&sel, capptr, 4); 209 pci_set_cfgdata32(pi, capptr, u32); 210 caplen -= 4; 211 capptr += 4; 212 } 213 } else if (cap == PCIY_MSIX) { 214 /* 215 * Copy the MSI-X capability 216 */ 217 sc->psc_msix.capoff = ptr; 218 caplen = 12; 219 msixcap_ptr = (uint32_t*) &msixcap; 220 capptr = ptr; 221 while (caplen > 0) { 222 u32 = read_config(&sel, capptr, 4); 223 *msixcap_ptr = u32; 224 pci_set_cfgdata32(pi, capptr, u32); 225 caplen -= 4; 226 capptr += 4; 227 msixcap_ptr++; 228 } 229 } 230 ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1); 231 } 232 } 233 234 if (sc->psc_msix.capoff != 0) { 235 pi->pi_msix.pba_bar = 236 msixcap.pba_info & PCIM_MSIX_BIR_MASK; 237 pi->pi_msix.pba_offset = 238 msixcap.pba_info & ~PCIM_MSIX_BIR_MASK; 239 pi->pi_msix.table_bar = 240 msixcap.table_info & PCIM_MSIX_BIR_MASK; 241 pi->pi_msix.table_offset = 242 msixcap.table_info & ~PCIM_MSIX_BIR_MASK; 243 pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); 244 pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count); 245 246 /* Allocate the emulated MSI-X table array */ 247 table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 248 pi->pi_msix.table = calloc(1, table_size); 249 250 /* Mask all table entries */ 251 for (i = 0; i < pi->pi_msix.table_count; i++) { 252 pi->pi_msix.table[i].vector_control |= 253 PCIM_MSIX_VCTRL_MASK; 254 } 255 } 256 257 #ifdef LEGACY_SUPPORT 258 /* 259 * If the passthrough device does not support MSI then craft a 260 * MSI capability for it. We link the new MSI capability at the 261 * head of the list of capabilities. 262 */ 263 if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) { 264 int origptr, msiptr; 265 origptr = read_config(&sel, PCIR_CAP_PTR, 1); 266 msiptr = passthru_add_msicap(pi, 1, origptr); 267 sc->psc_msi.capoff = msiptr; 268 sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2); 269 sc->psc_msi.emulated = 1; 270 pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr); 271 } 272 #endif 273 274 /* Make sure one of the capabilities is present */ 275 if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) 276 return (-1); 277 else 278 return (0); 279 } 280 281 static uint64_t 282 msix_table_read(struct passthru_softc *sc, uint64_t offset, int size) 283 { 284 struct pci_devinst *pi; 285 struct msix_table_entry *entry; 286 uint8_t *src8; 287 uint16_t *src16; 288 uint32_t *src32; 289 uint64_t *src64; 290 uint64_t data; 291 size_t entry_offset; 292 int index; 293 294 pi = sc->psc_pi; 295 if (offset >= pi->pi_msix.pba_offset && 296 offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 297 switch(size) { 298 case 1: 299 src8 = (uint8_t *)(pi->pi_msix.pba_page + offset - 300 pi->pi_msix.pba_page_offset); 301 data = *src8; 302 break; 303 case 2: 304 src16 = (uint16_t *)(pi->pi_msix.pba_page + offset - 305 pi->pi_msix.pba_page_offset); 306 data = *src16; 307 break; 308 case 4: 309 src32 = (uint32_t *)(pi->pi_msix.pba_page + offset - 310 pi->pi_msix.pba_page_offset); 311 data = *src32; 312 break; 313 case 8: 314 src64 = (uint64_t *)(pi->pi_msix.pba_page + offset - 315 pi->pi_msix.pba_page_offset); 316 data = *src64; 317 break; 318 default: 319 return (-1); 320 } 321 return (data); 322 } 323 324 if (offset < pi->pi_msix.table_offset) 325 return (-1); 326 327 offset -= pi->pi_msix.table_offset; 328 index = offset / MSIX_TABLE_ENTRY_SIZE; 329 if (index >= pi->pi_msix.table_count) 330 return (-1); 331 332 entry = &pi->pi_msix.table[index]; 333 entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 334 335 switch(size) { 336 case 1: 337 src8 = (uint8_t *)((void *)entry + entry_offset); 338 data = *src8; 339 break; 340 case 2: 341 src16 = (uint16_t *)((void *)entry + entry_offset); 342 data = *src16; 343 break; 344 case 4: 345 src32 = (uint32_t *)((void *)entry + entry_offset); 346 data = *src32; 347 break; 348 case 8: 349 src64 = (uint64_t *)((void *)entry + entry_offset); 350 data = *src64; 351 break; 352 default: 353 return (-1); 354 } 355 356 return (data); 357 } 358 359 static void 360 msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, 361 uint64_t offset, int size, uint64_t data) 362 { 363 struct pci_devinst *pi; 364 struct msix_table_entry *entry; 365 uint8_t *dest8; 366 uint16_t *dest16; 367 uint32_t *dest32; 368 uint64_t *dest64; 369 size_t entry_offset; 370 uint32_t vector_control; 371 int index; 372 373 pi = sc->psc_pi; 374 if (offset >= pi->pi_msix.pba_offset && 375 offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 376 switch(size) { 377 case 1: 378 dest8 = (uint8_t *)(pi->pi_msix.pba_page + offset - 379 pi->pi_msix.pba_page_offset); 380 *dest8 = data; 381 break; 382 case 2: 383 dest16 = (uint16_t *)(pi->pi_msix.pba_page + offset - 384 pi->pi_msix.pba_page_offset); 385 *dest16 = data; 386 break; 387 case 4: 388 dest32 = (uint32_t *)(pi->pi_msix.pba_page + offset - 389 pi->pi_msix.pba_page_offset); 390 *dest32 = data; 391 break; 392 case 8: 393 dest64 = (uint64_t *)(pi->pi_msix.pba_page + offset - 394 pi->pi_msix.pba_page_offset); 395 *dest64 = data; 396 break; 397 default: 398 break; 399 } 400 return; 401 } 402 403 if (offset < pi->pi_msix.table_offset) 404 return; 405 406 offset -= pi->pi_msix.table_offset; 407 index = offset / MSIX_TABLE_ENTRY_SIZE; 408 if (index >= pi->pi_msix.table_count) 409 return; 410 411 entry = &pi->pi_msix.table[index]; 412 entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 413 414 /* Only 4 byte naturally-aligned writes are supported */ 415 assert(size == 4); 416 assert(entry_offset % 4 == 0); 417 418 vector_control = entry->vector_control; 419 dest32 = (uint32_t *)((void *)entry + entry_offset); 420 *dest32 = data; 421 /* If MSI-X hasn't been enabled, do nothing */ 422 if (pi->pi_msix.enabled) { 423 /* If the entry is masked, don't set it up */ 424 if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || 425 (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 426 (void)vm_setup_pptdev_msix(ctx, vcpu, 427 sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 428 sc->psc_sel.pc_func, index, entry->addr, 429 entry->msg_data, entry->vector_control); 430 } 431 } 432 } 433 434 static int 435 init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) 436 { 437 int b, s, f; 438 int error, idx; 439 size_t len, remaining; 440 uint32_t table_size, table_offset; 441 uint32_t pba_size, pba_offset; 442 vm_paddr_t start; 443 struct pci_devinst *pi = sc->psc_pi; 444 445 assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0); 446 447 b = sc->psc_sel.pc_bus; 448 s = sc->psc_sel.pc_dev; 449 f = sc->psc_sel.pc_func; 450 451 /* 452 * If the MSI-X table BAR maps memory intended for 453 * other uses, it is at least assured that the table 454 * either resides in its own page within the region, 455 * or it resides in a page shared with only the PBA. 456 */ 457 table_offset = rounddown2(pi->pi_msix.table_offset, 4096); 458 459 table_size = pi->pi_msix.table_offset - table_offset; 460 table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 461 table_size = roundup2(table_size, 4096); 462 463 idx = pi->pi_msix.table_bar; 464 start = pi->pi_bar[idx].addr; 465 remaining = pi->pi_bar[idx].size; 466 467 if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar) { 468 pba_offset = pi->pi_msix.pba_offset; 469 pba_size = pi->pi_msix.pba_size; 470 if (pba_offset >= table_offset + table_size || 471 table_offset >= pba_offset + pba_size) { 472 /* 473 * If the PBA does not share a page with the MSI-x 474 * tables, no PBA emulation is required. 475 */ 476 pi->pi_msix.pba_page = NULL; 477 pi->pi_msix.pba_page_offset = 0; 478 } else { 479 /* 480 * The PBA overlaps with either the first or last 481 * page of the MSI-X table region. Map the 482 * appropriate page. 483 */ 484 if (pba_offset <= table_offset) 485 pi->pi_msix.pba_page_offset = table_offset; 486 else 487 pi->pi_msix.pba_page_offset = table_offset + 488 table_size - 4096; 489 pi->pi_msix.pba_page = mmap(NULL, 4096, PROT_READ | 490 PROT_WRITE, MAP_SHARED, memfd, start + 491 pi->pi_msix.pba_page_offset); 492 if (pi->pi_msix.pba_page == MAP_FAILED) { 493 warn( 494 "Failed to map PBA page for MSI-X on %d/%d/%d", 495 b, s, f); 496 return (-1); 497 } 498 } 499 } 500 501 /* Map everything before the MSI-X table */ 502 if (table_offset > 0) { 503 len = table_offset; 504 error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base); 505 if (error) 506 return (error); 507 508 base += len; 509 start += len; 510 remaining -= len; 511 } 512 513 /* Skip the MSI-X table */ 514 base += table_size; 515 start += table_size; 516 remaining -= table_size; 517 518 /* Map everything beyond the end of the MSI-X table */ 519 if (remaining > 0) { 520 len = remaining; 521 error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base); 522 if (error) 523 return (error); 524 } 525 526 return (0); 527 } 528 529 static int 530 cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) 531 { 532 int i, error; 533 struct pci_devinst *pi; 534 struct pci_bar_io bar; 535 enum pcibar_type bartype; 536 uint64_t base, size; 537 538 pi = sc->psc_pi; 539 540 /* 541 * Initialize BAR registers 542 */ 543 for (i = 0; i <= PCI_BARMAX; i++) { 544 bzero(&bar, sizeof(bar)); 545 bar.pbi_sel = sc->psc_sel; 546 bar.pbi_reg = PCIR_BAR(i); 547 548 if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0) 549 continue; 550 551 if (PCI_BAR_IO(bar.pbi_base)) { 552 bartype = PCIBAR_IO; 553 base = bar.pbi_base & PCIM_BAR_IO_BASE; 554 } else { 555 switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) { 556 case PCIM_BAR_MEM_64: 557 bartype = PCIBAR_MEM64; 558 break; 559 default: 560 bartype = PCIBAR_MEM32; 561 break; 562 } 563 base = bar.pbi_base & PCIM_BAR_MEM_BASE; 564 } 565 size = bar.pbi_length; 566 567 if (bartype != PCIBAR_IO) { 568 if (((base | size) & PAGE_MASK) != 0) { 569 warnx("passthru device %d/%d/%d BAR %d: " 570 "base %#lx or size %#lx not page aligned\n", 571 sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 572 sc->psc_sel.pc_func, i, base, size); 573 return (-1); 574 } 575 } 576 577 /* Cache information about the "real" BAR */ 578 sc->psc_bar[i].type = bartype; 579 sc->psc_bar[i].size = size; 580 sc->psc_bar[i].addr = base; 581 582 /* Allocate the BAR in the guest I/O or MMIO space */ 583 error = pci_emul_alloc_pbar(pi, i, base, bartype, size); 584 if (error) 585 return (-1); 586 587 /* The MSI-X table needs special handling */ 588 if (i == pci_msix_table_bar(pi)) { 589 error = init_msix_table(ctx, sc, base); 590 if (error) 591 return (-1); 592 } else if (bartype != PCIBAR_IO) { 593 /* Map the physical BAR in the guest MMIO space */ 594 error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, 595 sc->psc_sel.pc_dev, sc->psc_sel.pc_func, 596 pi->pi_bar[i].addr, pi->pi_bar[i].size, base); 597 if (error) 598 return (-1); 599 } 600 601 /* 602 * 64-bit BAR takes up two slots so skip the next one. 603 */ 604 if (bartype == PCIBAR_MEM64) { 605 i++; 606 assert(i <= PCI_BARMAX); 607 sc->psc_bar[i].type = PCIBAR_MEMHI64; 608 } 609 } 610 return (0); 611 } 612 613 static int 614 cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func) 615 { 616 int error; 617 struct passthru_softc *sc; 618 619 error = 1; 620 sc = pi->pi_arg; 621 622 bzero(&sc->psc_sel, sizeof(struct pcisel)); 623 sc->psc_sel.pc_bus = bus; 624 sc->psc_sel.pc_dev = slot; 625 sc->psc_sel.pc_func = func; 626 627 if (cfginitmsi(sc) != 0) { 628 warnx("failed to initialize MSI for PCI %d/%d/%d", 629 bus, slot, func); 630 goto done; 631 } 632 633 if (cfginitbar(ctx, sc) != 0) { 634 warnx("failed to initialize BARs for PCI %d/%d/%d", 635 bus, slot, func); 636 goto done; 637 } 638 639 error = 0; /* success */ 640 done: 641 return (error); 642 } 643 644 static int 645 passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 646 { 647 int bus, slot, func, error, memflags; 648 struct passthru_softc *sc; 649 #ifndef WITHOUT_CAPSICUM 650 cap_rights_t rights; 651 cap_ioctl_t pci_ioctls[] = { PCIOCREAD, PCIOCWRITE, PCIOCGETBAR }; 652 cap_ioctl_t io_ioctls[] = { IODEV_PIO }; 653 #endif 654 655 sc = NULL; 656 error = 1; 657 658 #ifndef WITHOUT_CAPSICUM 659 cap_rights_init(&rights, CAP_IOCTL, CAP_READ, CAP_WRITE); 660 #endif 661 662 memflags = vm_get_memflags(ctx); 663 if (!(memflags & VM_MEM_F_WIRED)) { 664 warnx("passthru requires guest memory to be wired"); 665 goto done; 666 } 667 668 if (pcifd < 0) { 669 pcifd = open(_PATH_DEVPCI, O_RDWR, 0); 670 if (pcifd < 0) { 671 warn("failed to open %s", _PATH_DEVPCI); 672 goto done; 673 } 674 } 675 676 #ifndef WITHOUT_CAPSICUM 677 if (cap_rights_limit(pcifd, &rights) == -1 && errno != ENOSYS) 678 errx(EX_OSERR, "Unable to apply rights for sandbox"); 679 if (cap_ioctls_limit(pcifd, pci_ioctls, nitems(pci_ioctls)) == -1 && errno != ENOSYS) 680 errx(EX_OSERR, "Unable to apply rights for sandbox"); 681 #endif 682 683 if (iofd < 0) { 684 iofd = open(_PATH_DEVIO, O_RDWR, 0); 685 if (iofd < 0) { 686 warn("failed to open %s", _PATH_DEVIO); 687 goto done; 688 } 689 } 690 691 #ifndef WITHOUT_CAPSICUM 692 if (cap_rights_limit(iofd, &rights) == -1 && errno != ENOSYS) 693 errx(EX_OSERR, "Unable to apply rights for sandbox"); 694 if (cap_ioctls_limit(iofd, io_ioctls, nitems(io_ioctls)) == -1 && errno != ENOSYS) 695 errx(EX_OSERR, "Unable to apply rights for sandbox"); 696 #endif 697 698 if (memfd < 0) { 699 memfd = open(_PATH_MEM, O_RDWR, 0); 700 if (memfd < 0) { 701 warn("failed to open %s", _PATH_MEM); 702 goto done; 703 } 704 } 705 706 #ifndef WITHOUT_CAPSICUM 707 cap_rights_clear(&rights, CAP_IOCTL); 708 cap_rights_set(&rights, CAP_MMAP_RW); 709 if (cap_rights_limit(memfd, &rights) == -1 && errno != ENOSYS) 710 errx(EX_OSERR, "Unable to apply rights for sandbox"); 711 #endif 712 713 if (opts == NULL || 714 sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) { 715 warnx("invalid passthru options"); 716 goto done; 717 } 718 719 if (vm_assign_pptdev(ctx, bus, slot, func) != 0) { 720 warnx("PCI device at %d/%d/%d is not using the ppt(4) driver", 721 bus, slot, func); 722 goto done; 723 } 724 725 sc = calloc(1, sizeof(struct passthru_softc)); 726 727 pi->pi_arg = sc; 728 sc->psc_pi = pi; 729 730 /* initialize config space */ 731 if ((error = cfginit(ctx, pi, bus, slot, func)) != 0) 732 goto done; 733 734 error = 0; /* success */ 735 done: 736 if (error) { 737 free(sc); 738 vm_unassign_pptdev(ctx, bus, slot, func); 739 } 740 return (error); 741 } 742 743 static int 744 bar_access(int coff) 745 { 746 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) 747 return (1); 748 else 749 return (0); 750 } 751 752 static int 753 msicap_access(struct passthru_softc *sc, int coff) 754 { 755 int caplen; 756 757 if (sc->psc_msi.capoff == 0) 758 return (0); 759 760 caplen = msi_caplen(sc->psc_msi.msgctrl); 761 762 if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen) 763 return (1); 764 else 765 return (0); 766 } 767 768 static int 769 msixcap_access(struct passthru_softc *sc, int coff) 770 { 771 if (sc->psc_msix.capoff == 0) 772 return (0); 773 774 return (coff >= sc->psc_msix.capoff && 775 coff < sc->psc_msix.capoff + MSIX_CAPLEN); 776 } 777 778 static int 779 passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 780 int coff, int bytes, uint32_t *rv) 781 { 782 struct passthru_softc *sc; 783 784 sc = pi->pi_arg; 785 786 /* 787 * PCI BARs and MSI capability is emulated. 788 */ 789 if (bar_access(coff) || msicap_access(sc, coff)) 790 return (-1); 791 792 #ifdef LEGACY_SUPPORT 793 /* 794 * Emulate PCIR_CAP_PTR if this device does not support MSI capability 795 * natively. 796 */ 797 if (sc->psc_msi.emulated) { 798 if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4) 799 return (-1); 800 } 801 #endif 802 803 /* Everything else just read from the device's config space */ 804 *rv = read_config(&sc->psc_sel, coff, bytes); 805 806 return (0); 807 } 808 809 static int 810 passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 811 int coff, int bytes, uint32_t val) 812 { 813 int error, msix_table_entries, i; 814 struct passthru_softc *sc; 815 816 sc = pi->pi_arg; 817 818 /* 819 * PCI BARs are emulated 820 */ 821 if (bar_access(coff)) 822 return (-1); 823 824 /* 825 * MSI capability is emulated 826 */ 827 if (msicap_access(sc, coff)) { 828 msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val); 829 830 error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus, 831 sc->psc_sel.pc_dev, sc->psc_sel.pc_func, 832 pi->pi_msi.addr, pi->pi_msi.msg_data, 833 pi->pi_msi.maxmsgnum); 834 if (error != 0) 835 err(1, "vm_setup_pptdev_msi"); 836 return (0); 837 } 838 839 if (msixcap_access(sc, coff)) { 840 msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val); 841 if (pi->pi_msix.enabled) { 842 msix_table_entries = pi->pi_msix.table_count; 843 for (i = 0; i < msix_table_entries; i++) { 844 error = vm_setup_pptdev_msix(ctx, vcpu, 845 sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 846 sc->psc_sel.pc_func, i, 847 pi->pi_msix.table[i].addr, 848 pi->pi_msix.table[i].msg_data, 849 pi->pi_msix.table[i].vector_control); 850 851 if (error) 852 err(1, "vm_setup_pptdev_msix"); 853 } 854 } 855 return (0); 856 } 857 858 #ifdef LEGACY_SUPPORT 859 /* 860 * If this device does not support MSI natively then we cannot let 861 * the guest disable legacy interrupts from the device. It is the 862 * legacy interrupt that is triggering the virtual MSI to the guest. 863 */ 864 if (sc->psc_msi.emulated && pci_msi_enabled(pi)) { 865 if (coff == PCIR_COMMAND && bytes == 2) 866 val &= ~PCIM_CMD_INTxDIS; 867 } 868 #endif 869 870 write_config(&sc->psc_sel, coff, bytes, val); 871 872 return (0); 873 } 874 875 static void 876 passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 877 uint64_t offset, int size, uint64_t value) 878 { 879 struct passthru_softc *sc; 880 struct iodev_pio_req pio; 881 882 sc = pi->pi_arg; 883 884 if (baridx == pci_msix_table_bar(pi)) { 885 msix_table_write(ctx, vcpu, sc, offset, size, value); 886 } else { 887 assert(pi->pi_bar[baridx].type == PCIBAR_IO); 888 bzero(&pio, sizeof(struct iodev_pio_req)); 889 pio.access = IODEV_PIO_WRITE; 890 pio.port = sc->psc_bar[baridx].addr + offset; 891 pio.width = size; 892 pio.val = value; 893 894 (void)ioctl(iofd, IODEV_PIO, &pio); 895 } 896 } 897 898 static uint64_t 899 passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 900 uint64_t offset, int size) 901 { 902 struct passthru_softc *sc; 903 struct iodev_pio_req pio; 904 uint64_t val; 905 906 sc = pi->pi_arg; 907 908 if (baridx == pci_msix_table_bar(pi)) { 909 val = msix_table_read(sc, offset, size); 910 } else { 911 assert(pi->pi_bar[baridx].type == PCIBAR_IO); 912 bzero(&pio, sizeof(struct iodev_pio_req)); 913 pio.access = IODEV_PIO_READ; 914 pio.port = sc->psc_bar[baridx].addr + offset; 915 pio.width = size; 916 pio.val = 0; 917 918 (void)ioctl(iofd, IODEV_PIO, &pio); 919 920 val = pio.val; 921 } 922 923 return (val); 924 } 925 926 struct pci_devemu passthru = { 927 .pe_emu = "passthru", 928 .pe_init = passthru_init, 929 .pe_cfgwrite = passthru_cfgwrite, 930 .pe_cfgread = passthru_cfgread, 931 .pe_barwrite = passthru_write, 932 .pe_barread = passthru_read, 933 }; 934 PCI_EMUL_SET(passthru); 935