1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker_set.h> 36 #include <sys/mman.h> 37 38 #include <ctype.h> 39 #include <err.h> 40 #include <errno.h> 41 #include <pthread.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <strings.h> 46 #include <assert.h> 47 #include <stdbool.h> 48 #include <sysexits.h> 49 50 #include <machine/vmm.h> 51 #include <machine/vmm_snapshot.h> 52 #include <vmmapi.h> 53 54 #include "acpi.h" 55 #include "bhyverun.h" 56 #include "config.h" 57 #include "debug.h" 58 #include "inout.h" 59 #include "ioapic.h" 60 #include "mem.h" 61 #include "pci_emul.h" 62 #include "pci_irq.h" 63 #include "pci_lpc.h" 64 65 #define CONF1_ADDR_PORT 0x0cf8 66 #define CONF1_DATA_PORT 0x0cfc 67 68 #define CONF1_ENABLE 0x80000000ul 69 70 #define MAXBUSES (PCI_BUSMAX + 1) 71 #define MAXSLOTS (PCI_SLOTMAX + 1) 72 #define MAXFUNCS (PCI_FUNCMAX + 1) 73 74 #define GB (1024 * 1024 * 1024UL) 75 76 struct funcinfo { 77 nvlist_t *fi_config; 78 struct pci_devemu *fi_pde; 79 struct pci_devinst *fi_devi; 80 }; 81 82 struct intxinfo { 83 int ii_count; 84 int ii_pirq_pin; 85 int ii_ioapic_irq; 86 }; 87 88 struct slotinfo { 89 struct intxinfo si_intpins[4]; 90 struct funcinfo si_funcs[MAXFUNCS]; 91 }; 92 93 struct businfo { 94 uint16_t iobase, iolimit; /* I/O window */ 95 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 96 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 97 struct slotinfo slotinfo[MAXSLOTS]; 98 }; 99 100 static struct businfo *pci_businfo[MAXBUSES]; 101 102 SET_DECLARE(pci_devemu_set, struct pci_devemu); 103 104 static uint64_t pci_emul_iobase; 105 static uint8_t *pci_emul_rombase; 106 static uint64_t pci_emul_romoffset; 107 static uint8_t *pci_emul_romlim; 108 static uint64_t pci_emul_membase32; 109 static uint64_t pci_emul_membase64; 110 static uint64_t pci_emul_memlim64; 111 112 struct pci_bar_allocation { 113 TAILQ_ENTRY(pci_bar_allocation) chain; 114 struct pci_devinst *pdi; 115 int idx; 116 enum pcibar_type type; 117 uint64_t size; 118 }; 119 TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = TAILQ_HEAD_INITIALIZER( 120 pci_bars); 121 122 #define PCI_EMUL_IOBASE 0x2000 123 #define PCI_EMUL_IOLIMIT 0x10000 124 125 #define PCI_EMUL_ROMSIZE 0x10000000 126 127 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 128 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 129 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 130 131 /* 132 * OVMF always uses 0xC0000000 as base address for 32 bit PCI MMIO. Don't 133 * change this address without changing it in OVMF. 134 */ 135 #define PCI_EMUL_MEMBASE32 0xC0000000 136 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 137 #define PCI_EMUL_MEMSIZE64 (32*GB) 138 139 static struct pci_devemu *pci_emul_finddev(const char *name); 140 static void pci_lintr_route(struct pci_devinst *pi); 141 static void pci_lintr_update(struct pci_devinst *pi); 142 static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, 143 int func, int coff, int bytes, uint32_t *val); 144 145 static __inline void 146 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 147 { 148 149 if (bytes == 1) 150 pci_set_cfgdata8(pi, coff, val); 151 else if (bytes == 2) 152 pci_set_cfgdata16(pi, coff, val); 153 else 154 pci_set_cfgdata32(pi, coff, val); 155 } 156 157 static __inline uint32_t 158 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 159 { 160 161 if (bytes == 1) 162 return (pci_get_cfgdata8(pi, coff)); 163 else if (bytes == 2) 164 return (pci_get_cfgdata16(pi, coff)); 165 else 166 return (pci_get_cfgdata32(pi, coff)); 167 } 168 169 /* 170 * I/O access 171 */ 172 173 /* 174 * Slot options are in the form: 175 * 176 * <bus>:<slot>:<func>,<emul>[,<config>] 177 * <slot>[:<func>],<emul>[,<config>] 178 * 179 * slot is 0..31 180 * func is 0..7 181 * emul is a string describing the type of PCI device e.g. virtio-net 182 * config is an optional string, depending on the device, that can be 183 * used for configuration. 184 * Examples are: 185 * 1,virtio-net,tap0 186 * 3:0,dummy 187 */ 188 static void 189 pci_parse_slot_usage(char *aopt) 190 { 191 192 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 193 } 194 195 /* 196 * Helper function to parse a list of comma-separated options where 197 * each option is formatted as "name[=value]". If no value is 198 * provided, the option is treated as a boolean and is given a value 199 * of true. 200 */ 201 int 202 pci_parse_legacy_config(nvlist_t *nvl, const char *opt) 203 { 204 char *config, *name, *tofree, *value; 205 206 if (opt == NULL) 207 return (0); 208 209 config = tofree = strdup(opt); 210 while ((name = strsep(&config, ",")) != NULL) { 211 value = strchr(name, '='); 212 if (value != NULL) { 213 *value = '\0'; 214 value++; 215 set_config_value_node(nvl, name, value); 216 } else 217 set_config_bool_node(nvl, name, true); 218 } 219 free(tofree); 220 return (0); 221 } 222 223 /* 224 * PCI device configuration is stored in MIBs that encode the device's 225 * location: 226 * 227 * pci.<bus>.<slot>.<func> 228 * 229 * Where "bus", "slot", and "func" are all decimal values without 230 * leading zeroes. Each valid device must have a "device" node which 231 * identifies the driver model of the device. 232 * 233 * Device backends can provide a parser for the "config" string. If 234 * a custom parser is not provided, pci_parse_legacy_config() is used 235 * to parse the string. 236 */ 237 int 238 pci_parse_slot(char *opt) 239 { 240 char node_name[sizeof("pci.XXX.XX.X")]; 241 struct pci_devemu *pde; 242 char *emul, *config, *str, *cp; 243 int error, bnum, snum, fnum; 244 nvlist_t *nvl; 245 246 error = -1; 247 str = strdup(opt); 248 249 emul = config = NULL; 250 if ((cp = strchr(str, ',')) != NULL) { 251 *cp = '\0'; 252 emul = cp + 1; 253 if ((cp = strchr(emul, ',')) != NULL) { 254 *cp = '\0'; 255 config = cp + 1; 256 } 257 } else { 258 pci_parse_slot_usage(opt); 259 goto done; 260 } 261 262 /* <bus>:<slot>:<func> */ 263 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 264 bnum = 0; 265 /* <slot>:<func> */ 266 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 267 fnum = 0; 268 /* <slot> */ 269 if (sscanf(str, "%d", &snum) != 1) { 270 snum = -1; 271 } 272 } 273 } 274 275 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 276 fnum < 0 || fnum >= MAXFUNCS) { 277 pci_parse_slot_usage(opt); 278 goto done; 279 } 280 281 pde = pci_emul_finddev(emul); 282 if (pde == NULL) { 283 EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum, 284 fnum, emul); 285 goto done; 286 } 287 288 snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum, 289 fnum); 290 nvl = find_config_node(node_name); 291 if (nvl != NULL) { 292 EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum, 293 fnum); 294 goto done; 295 } 296 nvl = create_config_node(node_name); 297 if (pde->pe_alias != NULL) 298 set_config_value_node(nvl, "device", pde->pe_alias); 299 else 300 set_config_value_node(nvl, "device", pde->pe_emu); 301 302 if (pde->pe_legacy_config != NULL) 303 error = pde->pe_legacy_config(nvl, config); 304 else 305 error = pci_parse_legacy_config(nvl, config); 306 done: 307 free(str); 308 return (error); 309 } 310 311 void 312 pci_print_supported_devices() 313 { 314 struct pci_devemu **pdpp, *pdp; 315 316 SET_FOREACH(pdpp, pci_devemu_set) { 317 pdp = *pdpp; 318 printf("%s\n", pdp->pe_emu); 319 } 320 } 321 322 static int 323 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 324 { 325 326 if (offset < pi->pi_msix.pba_offset) 327 return (0); 328 329 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 330 return (0); 331 } 332 333 return (1); 334 } 335 336 int 337 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 338 uint64_t value) 339 { 340 int msix_entry_offset; 341 int tab_index; 342 char *dest; 343 344 /* support only 4 or 8 byte writes */ 345 if (size != 4 && size != 8) 346 return (-1); 347 348 /* 349 * Return if table index is beyond what device supports 350 */ 351 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 352 if (tab_index >= pi->pi_msix.table_count) 353 return (-1); 354 355 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 356 357 /* support only aligned writes */ 358 if ((msix_entry_offset % size) != 0) 359 return (-1); 360 361 dest = (char *)(pi->pi_msix.table + tab_index); 362 dest += msix_entry_offset; 363 364 if (size == 4) 365 *((uint32_t *)dest) = value; 366 else 367 *((uint64_t *)dest) = value; 368 369 return (0); 370 } 371 372 uint64_t 373 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 374 { 375 char *dest; 376 int msix_entry_offset; 377 int tab_index; 378 uint64_t retval = ~0; 379 380 /* 381 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 382 * table but we also allow 1 byte access to accommodate reads from 383 * ddb. 384 */ 385 if (size != 1 && size != 4 && size != 8) 386 return (retval); 387 388 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 389 390 /* support only aligned reads */ 391 if ((msix_entry_offset % size) != 0) { 392 return (retval); 393 } 394 395 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 396 397 if (tab_index < pi->pi_msix.table_count) { 398 /* valid MSI-X Table access */ 399 dest = (char *)(pi->pi_msix.table + tab_index); 400 dest += msix_entry_offset; 401 402 if (size == 1) 403 retval = *((uint8_t *)dest); 404 else if (size == 4) 405 retval = *((uint32_t *)dest); 406 else 407 retval = *((uint64_t *)dest); 408 } else if (pci_valid_pba_offset(pi, offset)) { 409 /* return 0 for PBA access */ 410 retval = 0; 411 } 412 413 return (retval); 414 } 415 416 int 417 pci_msix_table_bar(struct pci_devinst *pi) 418 { 419 420 if (pi->pi_msix.table != NULL) 421 return (pi->pi_msix.table_bar); 422 else 423 return (-1); 424 } 425 426 int 427 pci_msix_pba_bar(struct pci_devinst *pi) 428 { 429 430 if (pi->pi_msix.table != NULL) 431 return (pi->pi_msix.pba_bar); 432 else 433 return (-1); 434 } 435 436 static int 437 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 438 uint32_t *eax, void *arg) 439 { 440 struct pci_devinst *pdi = arg; 441 struct pci_devemu *pe = pdi->pi_d; 442 uint64_t offset; 443 int i; 444 445 for (i = 0; i <= PCI_BARMAX; i++) { 446 if (pdi->pi_bar[i].type == PCIBAR_IO && 447 port >= pdi->pi_bar[i].addr && 448 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 449 offset = port - pdi->pi_bar[i].addr; 450 if (in) 451 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 452 offset, bytes); 453 else 454 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 455 bytes, *eax); 456 return (0); 457 } 458 } 459 return (-1); 460 } 461 462 static int 463 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 464 int size, uint64_t *val, void *arg1, long arg2) 465 { 466 struct pci_devinst *pdi = arg1; 467 struct pci_devemu *pe = pdi->pi_d; 468 uint64_t offset; 469 int bidx = (int) arg2; 470 471 assert(bidx <= PCI_BARMAX); 472 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 473 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 474 assert(addr >= pdi->pi_bar[bidx].addr && 475 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 476 477 offset = addr - pdi->pi_bar[bidx].addr; 478 479 if (dir == MEM_F_WRITE) { 480 if (size == 8) { 481 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 482 4, *val & 0xffffffff); 483 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 484 4, *val >> 32); 485 } else { 486 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 487 size, *val); 488 } 489 } else { 490 if (size == 8) { 491 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 492 offset, 4); 493 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 494 offset + 4, 4) << 32; 495 } else { 496 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 497 offset, size); 498 } 499 } 500 501 return (0); 502 } 503 504 505 static int 506 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 507 uint64_t *addr) 508 { 509 uint64_t base; 510 511 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 512 513 base = roundup2(*baseptr, size); 514 515 if (base + size <= limit) { 516 *addr = base; 517 *baseptr = base + size; 518 return (0); 519 } else 520 return (-1); 521 } 522 523 /* 524 * Register (or unregister) the MMIO or I/O region associated with the BAR 525 * register 'idx' of an emulated pci device. 526 */ 527 static void 528 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 529 { 530 struct pci_devemu *pe; 531 int error; 532 struct inout_port iop; 533 struct mem_range mr; 534 535 pe = pi->pi_d; 536 switch (pi->pi_bar[idx].type) { 537 case PCIBAR_IO: 538 bzero(&iop, sizeof(struct inout_port)); 539 iop.name = pi->pi_name; 540 iop.port = pi->pi_bar[idx].addr; 541 iop.size = pi->pi_bar[idx].size; 542 if (registration) { 543 iop.flags = IOPORT_F_INOUT; 544 iop.handler = pci_emul_io_handler; 545 iop.arg = pi; 546 error = register_inout(&iop); 547 } else 548 error = unregister_inout(&iop); 549 if (pe->pe_baraddr != NULL) 550 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 551 pi->pi_bar[idx].addr); 552 break; 553 case PCIBAR_MEM32: 554 case PCIBAR_MEM64: 555 bzero(&mr, sizeof(struct mem_range)); 556 mr.name = pi->pi_name; 557 mr.base = pi->pi_bar[idx].addr; 558 mr.size = pi->pi_bar[idx].size; 559 if (registration) { 560 mr.flags = MEM_F_RW; 561 mr.handler = pci_emul_mem_handler; 562 mr.arg1 = pi; 563 mr.arg2 = idx; 564 error = register_mem(&mr); 565 } else 566 error = unregister_mem(&mr); 567 if (pe->pe_baraddr != NULL) 568 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 569 pi->pi_bar[idx].addr); 570 break; 571 case PCIBAR_ROM: 572 error = 0; 573 if (pe->pe_baraddr != NULL) 574 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 575 pi->pi_bar[idx].addr); 576 break; 577 default: 578 error = EINVAL; 579 break; 580 } 581 assert(error == 0); 582 } 583 584 static void 585 unregister_bar(struct pci_devinst *pi, int idx) 586 { 587 588 modify_bar_registration(pi, idx, 0); 589 } 590 591 static void 592 register_bar(struct pci_devinst *pi, int idx) 593 { 594 595 modify_bar_registration(pi, idx, 1); 596 } 597 598 /* Is the ROM enabled for the emulated pci device? */ 599 static int 600 romen(struct pci_devinst *pi) 601 { 602 return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) == 603 PCIM_BIOS_ENABLE; 604 } 605 606 /* Are we decoding i/o port accesses for the emulated pci device? */ 607 static int 608 porten(struct pci_devinst *pi) 609 { 610 uint16_t cmd; 611 612 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 613 614 return (cmd & PCIM_CMD_PORTEN); 615 } 616 617 /* Are we decoding memory accesses for the emulated pci device? */ 618 static int 619 memen(struct pci_devinst *pi) 620 { 621 uint16_t cmd; 622 623 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 624 625 return (cmd & PCIM_CMD_MEMEN); 626 } 627 628 /* 629 * Update the MMIO or I/O address that is decoded by the BAR register. 630 * 631 * If the pci device has enabled the address space decoding then intercept 632 * the address range decoded by the BAR register. 633 */ 634 static void 635 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 636 { 637 int decode; 638 639 if (pi->pi_bar[idx].type == PCIBAR_IO) 640 decode = porten(pi); 641 else 642 decode = memen(pi); 643 644 if (decode) 645 unregister_bar(pi, idx); 646 647 switch (type) { 648 case PCIBAR_IO: 649 case PCIBAR_MEM32: 650 pi->pi_bar[idx].addr = addr; 651 break; 652 case PCIBAR_MEM64: 653 pi->pi_bar[idx].addr &= ~0xffffffffUL; 654 pi->pi_bar[idx].addr |= addr; 655 break; 656 case PCIBAR_MEMHI64: 657 pi->pi_bar[idx].addr &= 0xffffffff; 658 pi->pi_bar[idx].addr |= addr; 659 break; 660 default: 661 assert(0); 662 } 663 664 if (decode) 665 register_bar(pi, idx); 666 } 667 668 int 669 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 670 uint64_t size) 671 { 672 assert((type == PCIBAR_ROM) || (idx >= 0 && idx <= PCI_BARMAX)); 673 assert((type != PCIBAR_ROM) || (idx == PCI_ROM_IDX)); 674 675 if ((size & (size - 1)) != 0) 676 size = 1UL << flsl(size); /* round up to a power of 2 */ 677 678 /* Enforce minimum BAR sizes required by the PCI standard */ 679 if (type == PCIBAR_IO) { 680 if (size < 4) 681 size = 4; 682 } else if (type == PCIBAR_ROM) { 683 if (size < ~PCIM_BIOS_ADDR_MASK + 1) 684 size = ~PCIM_BIOS_ADDR_MASK + 1; 685 } else { 686 if (size < 16) 687 size = 16; 688 } 689 690 /* 691 * To reduce fragmentation of the MMIO space, we allocate the BARs by 692 * size. Therefore, don't allocate the BAR yet. We create a list of all 693 * BAR allocation which is sorted by BAR size. When all PCI devices are 694 * initialized, we will assign an address to the BARs. 695 */ 696 697 /* create a new list entry */ 698 struct pci_bar_allocation *const new_bar = malloc(sizeof(*new_bar)); 699 memset(new_bar, 0, sizeof(*new_bar)); 700 new_bar->pdi = pdi; 701 new_bar->idx = idx; 702 new_bar->type = type; 703 new_bar->size = size; 704 705 /* 706 * Search for a BAR which size is lower than the size of our newly 707 * allocated BAR. 708 */ 709 struct pci_bar_allocation *bar = NULL; 710 TAILQ_FOREACH(bar, &pci_bars, chain) { 711 if (bar->size < size) { 712 break; 713 } 714 } 715 716 if (bar == NULL) { 717 /* 718 * Either the list is empty or new BAR is the smallest BAR of 719 * the list. Append it to the end of our list. 720 */ 721 TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain); 722 } else { 723 /* 724 * The found BAR is smaller than our new BAR. For that reason, 725 * insert our new BAR before the found BAR. 726 */ 727 TAILQ_INSERT_BEFORE(bar, new_bar, chain); 728 } 729 730 /* 731 * pci_passthru devices synchronize their physical and virtual command 732 * register on init. For that reason, the virtual cmd reg should be 733 * updated as early as possible. 734 */ 735 uint16_t enbit = 0; 736 switch (type) { 737 case PCIBAR_IO: 738 enbit = PCIM_CMD_PORTEN; 739 break; 740 case PCIBAR_MEM64: 741 case PCIBAR_MEM32: 742 enbit = PCIM_CMD_MEMEN; 743 break; 744 default: 745 enbit = 0; 746 break; 747 } 748 749 const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 750 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 751 752 return (0); 753 } 754 755 static int 756 pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx, 757 const enum pcibar_type type, const uint64_t size) 758 { 759 int error; 760 uint64_t *baseptr, limit, addr, mask, lobits, bar; 761 762 switch (type) { 763 case PCIBAR_NONE: 764 baseptr = NULL; 765 addr = mask = lobits = 0; 766 break; 767 case PCIBAR_IO: 768 baseptr = &pci_emul_iobase; 769 limit = PCI_EMUL_IOLIMIT; 770 mask = PCIM_BAR_IO_BASE; 771 lobits = PCIM_BAR_IO_SPACE; 772 break; 773 case PCIBAR_MEM64: 774 /* 775 * XXX 776 * Some drivers do not work well if the 64-bit BAR is allocated 777 * above 4GB. Allow for this by allocating small requests under 778 * 4GB unless then allocation size is larger than some arbitrary 779 * number (128MB currently). 780 */ 781 if (size > 128 * 1024 * 1024) { 782 baseptr = &pci_emul_membase64; 783 limit = pci_emul_memlim64; 784 mask = PCIM_BAR_MEM_BASE; 785 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 786 PCIM_BAR_MEM_PREFETCH; 787 } else { 788 baseptr = &pci_emul_membase32; 789 limit = PCI_EMUL_MEMLIMIT32; 790 mask = PCIM_BAR_MEM_BASE; 791 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 792 } 793 break; 794 case PCIBAR_MEM32: 795 baseptr = &pci_emul_membase32; 796 limit = PCI_EMUL_MEMLIMIT32; 797 mask = PCIM_BAR_MEM_BASE; 798 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 799 break; 800 case PCIBAR_ROM: 801 /* do not claim memory for ROM. OVMF will do it for us. */ 802 baseptr = NULL; 803 limit = 0; 804 mask = PCIM_BIOS_ADDR_MASK; 805 lobits = 0; 806 break; 807 default: 808 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 809 assert(0); 810 } 811 812 if (baseptr != NULL) { 813 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 814 if (error != 0) 815 return (error); 816 } 817 818 pdi->pi_bar[idx].type = type; 819 pdi->pi_bar[idx].addr = addr; 820 pdi->pi_bar[idx].size = size; 821 /* 822 * passthru devices are using same lobits as physical device they set 823 * this property 824 */ 825 if (pdi->pi_bar[idx].lobits != 0) { 826 lobits = pdi->pi_bar[idx].lobits; 827 } else { 828 pdi->pi_bar[idx].lobits = lobits; 829 } 830 831 /* Initialize the BAR register in config space */ 832 bar = (addr & mask) | lobits; 833 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 834 835 if (type == PCIBAR_MEM64) { 836 assert(idx + 1 <= PCI_BARMAX); 837 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 838 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 839 } 840 841 if (type != PCIBAR_ROM) { 842 register_bar(pdi, idx); 843 } 844 845 return (0); 846 } 847 848 int 849 pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size, 850 void **const addr) 851 { 852 /* allocate ROM space once on first call */ 853 if (pci_emul_rombase == 0) { 854 pci_emul_rombase = vm_create_devmem(pdi->pi_vmctx, VM_PCIROM, 855 "pcirom", PCI_EMUL_ROMSIZE); 856 if (pci_emul_rombase == MAP_FAILED) { 857 warnx("%s: failed to create rom segment", __func__); 858 return (-1); 859 } 860 pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE; 861 pci_emul_romoffset = 0; 862 } 863 864 /* ROM size should be a power of 2 and greater than 2 KB */ 865 const uint64_t rom_size = MAX(1UL << flsl(size), 866 ~PCIM_BIOS_ADDR_MASK + 1); 867 868 /* check if ROM fits into ROM space */ 869 if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE) { 870 warnx("%s: no space left in rom segment:", __func__); 871 warnx("%16lu bytes left", 872 PCI_EMUL_ROMSIZE - pci_emul_romoffset); 873 warnx("%16lu bytes required by %d/%d/%d", rom_size, pdi->pi_bus, 874 pdi->pi_slot, pdi->pi_func); 875 return (-1); 876 } 877 878 /* allocate ROM BAR */ 879 const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM, 880 rom_size); 881 if (error) 882 return error; 883 884 /* return address */ 885 *addr = pci_emul_rombase + pci_emul_romoffset; 886 887 /* save offset into ROM Space */ 888 pdi->pi_romoffset = pci_emul_romoffset; 889 890 /* increase offset for next ROM */ 891 pci_emul_romoffset += rom_size; 892 893 return (0); 894 } 895 896 #define CAP_START_OFFSET 0x40 897 static int 898 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 899 { 900 int i, capoff, reallen; 901 uint16_t sts; 902 903 assert(caplen > 0); 904 905 reallen = roundup2(caplen, 4); /* dword aligned */ 906 907 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 908 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 909 capoff = CAP_START_OFFSET; 910 else 911 capoff = pi->pi_capend + 1; 912 913 /* Check if we have enough space */ 914 if (capoff + reallen > PCI_REGMAX + 1) 915 return (-1); 916 917 /* Set the previous capability pointer */ 918 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 919 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 920 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 921 } else 922 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 923 924 /* Copy the capability */ 925 for (i = 0; i < caplen; i++) 926 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 927 928 /* Set the next capability pointer */ 929 pci_set_cfgdata8(pi, capoff + 1, 0); 930 931 pi->pi_prevcap = capoff; 932 pi->pi_capend = capoff + reallen - 1; 933 return (0); 934 } 935 936 static struct pci_devemu * 937 pci_emul_finddev(const char *name) 938 { 939 struct pci_devemu **pdpp, *pdp; 940 941 SET_FOREACH(pdpp, pci_devemu_set) { 942 pdp = *pdpp; 943 if (!strcmp(pdp->pe_emu, name)) { 944 return (pdp); 945 } 946 } 947 948 return (NULL); 949 } 950 951 static int 952 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 953 int func, struct funcinfo *fi) 954 { 955 struct pci_devinst *pdi; 956 int err; 957 958 pdi = calloc(1, sizeof(struct pci_devinst)); 959 960 pdi->pi_vmctx = ctx; 961 pdi->pi_bus = bus; 962 pdi->pi_slot = slot; 963 pdi->pi_func = func; 964 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 965 pdi->pi_lintr.pin = 0; 966 pdi->pi_lintr.state = IDLE; 967 pdi->pi_lintr.pirq_pin = 0; 968 pdi->pi_lintr.ioapic_irq = 0; 969 pdi->pi_d = pde; 970 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 971 972 /* Disable legacy interrupts */ 973 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 974 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 975 976 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 977 978 err = (*pde->pe_init)(ctx, pdi, fi->fi_config); 979 if (err == 0) 980 fi->fi_devi = pdi; 981 else 982 free(pdi); 983 984 return (err); 985 } 986 987 void 988 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 989 { 990 int mmc; 991 992 /* Number of msi messages must be a power of 2 between 1 and 32 */ 993 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 994 mmc = ffs(msgnum) - 1; 995 996 bzero(msicap, sizeof(struct msicap)); 997 msicap->capid = PCIY_MSI; 998 msicap->nextptr = nextptr; 999 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 1000 } 1001 1002 int 1003 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 1004 { 1005 struct msicap msicap; 1006 1007 pci_populate_msicap(&msicap, msgnum, 0); 1008 1009 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 1010 } 1011 1012 static void 1013 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 1014 uint32_t msix_tab_size) 1015 { 1016 1017 assert(msix_tab_size % 4096 == 0); 1018 1019 bzero(msixcap, sizeof(struct msixcap)); 1020 msixcap->capid = PCIY_MSIX; 1021 1022 /* 1023 * Message Control Register, all fields set to 1024 * zero except for the Table Size. 1025 * Note: Table size N is encoded as N-1 1026 */ 1027 msixcap->msgctrl = msgnum - 1; 1028 1029 /* 1030 * MSI-X BAR setup: 1031 * - MSI-X table start at offset 0 1032 * - PBA table starts at a 4K aligned offset after the MSI-X table 1033 */ 1034 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 1035 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 1036 } 1037 1038 static void 1039 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 1040 { 1041 int i, table_size; 1042 1043 assert(table_entries > 0); 1044 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 1045 1046 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 1047 pi->pi_msix.table = calloc(1, table_size); 1048 1049 /* set mask bit of vector control register */ 1050 for (i = 0; i < table_entries; i++) 1051 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 1052 } 1053 1054 int 1055 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 1056 { 1057 uint32_t tab_size; 1058 struct msixcap msixcap; 1059 1060 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 1061 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 1062 1063 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 1064 1065 /* Align table size to nearest 4K */ 1066 tab_size = roundup2(tab_size, 4096); 1067 1068 pi->pi_msix.table_bar = barnum; 1069 pi->pi_msix.pba_bar = barnum; 1070 pi->pi_msix.table_offset = 0; 1071 pi->pi_msix.table_count = msgnum; 1072 pi->pi_msix.pba_offset = tab_size; 1073 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 1074 1075 pci_msix_table_init(pi, msgnum); 1076 1077 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 1078 1079 /* allocate memory for MSI-X Table and PBA */ 1080 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 1081 tab_size + pi->pi_msix.pba_size); 1082 1083 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 1084 sizeof(msixcap))); 1085 } 1086 1087 static void 1088 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1089 int bytes, uint32_t val) 1090 { 1091 uint16_t msgctrl, rwmask; 1092 int off; 1093 1094 off = offset - capoff; 1095 /* Message Control Register */ 1096 if (off == 2 && bytes == 2) { 1097 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 1098 msgctrl = pci_get_cfgdata16(pi, offset); 1099 msgctrl &= ~rwmask; 1100 msgctrl |= val & rwmask; 1101 val = msgctrl; 1102 1103 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 1104 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 1105 pci_lintr_update(pi); 1106 } 1107 1108 CFGWRITE(pi, offset, val, bytes); 1109 } 1110 1111 static void 1112 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1113 int bytes, uint32_t val) 1114 { 1115 uint16_t msgctrl, rwmask, msgdata, mme; 1116 uint32_t addrlo; 1117 1118 /* 1119 * If guest is writing to the message control register make sure 1120 * we do not overwrite read-only fields. 1121 */ 1122 if ((offset - capoff) == 2 && bytes == 2) { 1123 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 1124 msgctrl = pci_get_cfgdata16(pi, offset); 1125 msgctrl &= ~rwmask; 1126 msgctrl |= val & rwmask; 1127 val = msgctrl; 1128 } 1129 CFGWRITE(pi, offset, val, bytes); 1130 1131 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 1132 addrlo = pci_get_cfgdata32(pi, capoff + 4); 1133 if (msgctrl & PCIM_MSICTRL_64BIT) 1134 msgdata = pci_get_cfgdata16(pi, capoff + 12); 1135 else 1136 msgdata = pci_get_cfgdata16(pi, capoff + 8); 1137 1138 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 1139 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 1140 if (pi->pi_msi.enabled) { 1141 pi->pi_msi.addr = addrlo; 1142 pi->pi_msi.msg_data = msgdata; 1143 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 1144 } else { 1145 pi->pi_msi.maxmsgnum = 0; 1146 } 1147 pci_lintr_update(pi); 1148 } 1149 1150 void 1151 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1152 int bytes, uint32_t val) 1153 { 1154 1155 /* XXX don't write to the readonly parts */ 1156 CFGWRITE(pi, offset, val, bytes); 1157 } 1158 1159 #define PCIECAP_VERSION 0x2 1160 int 1161 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 1162 { 1163 int err; 1164 struct pciecap pciecap; 1165 1166 bzero(&pciecap, sizeof(pciecap)); 1167 1168 /* 1169 * Use the integrated endpoint type for endpoints on a root complex bus. 1170 * 1171 * NB: bhyve currently only supports a single PCI bus that is the root 1172 * complex bus, so all endpoints are integrated. 1173 */ 1174 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 1175 type = PCIEM_TYPE_ROOT_INT_EP; 1176 1177 pciecap.capid = PCIY_EXPRESS; 1178 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 1179 if (type != PCIEM_TYPE_ROOT_INT_EP) { 1180 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 1181 pciecap.link_status = 0x11; /* gen1, x1 */ 1182 } 1183 1184 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 1185 return (err); 1186 } 1187 1188 /* 1189 * This function assumes that 'coff' is in the capabilities region of the 1190 * config space. A capoff parameter of zero will force a search for the 1191 * offset and type. 1192 */ 1193 void 1194 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 1195 uint8_t capoff, int capid) 1196 { 1197 uint8_t nextoff; 1198 1199 /* Do not allow un-aligned writes */ 1200 if ((offset & (bytes - 1)) != 0) 1201 return; 1202 1203 if (capoff == 0) { 1204 /* Find the capability that we want to update */ 1205 capoff = CAP_START_OFFSET; 1206 while (1) { 1207 nextoff = pci_get_cfgdata8(pi, capoff + 1); 1208 if (nextoff == 0) 1209 break; 1210 if (offset >= capoff && offset < nextoff) 1211 break; 1212 1213 capoff = nextoff; 1214 } 1215 assert(offset >= capoff); 1216 capid = pci_get_cfgdata8(pi, capoff); 1217 } 1218 1219 /* 1220 * Capability ID and Next Capability Pointer are readonly. 1221 * However, some o/s's do 4-byte writes that include these. 1222 * For this case, trim the write back to 2 bytes and adjust 1223 * the data. 1224 */ 1225 if (offset == capoff || offset == capoff + 1) { 1226 if (offset == capoff && bytes == 4) { 1227 bytes = 2; 1228 offset += 2; 1229 val >>= 16; 1230 } else 1231 return; 1232 } 1233 1234 switch (capid) { 1235 case PCIY_MSI: 1236 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1237 break; 1238 case PCIY_MSIX: 1239 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1240 break; 1241 case PCIY_EXPRESS: 1242 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1243 break; 1244 default: 1245 break; 1246 } 1247 } 1248 1249 static int 1250 pci_emul_iscap(struct pci_devinst *pi, int offset) 1251 { 1252 uint16_t sts; 1253 1254 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1255 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1256 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1257 return (1); 1258 } 1259 return (0); 1260 } 1261 1262 static int 1263 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1264 int size, uint64_t *val, void *arg1, long arg2) 1265 { 1266 /* 1267 * Ignore writes; return 0xff's for reads. The mem read code 1268 * will take care of truncating to the correct size. 1269 */ 1270 if (dir == MEM_F_READ) { 1271 *val = 0xffffffffffffffff; 1272 } 1273 1274 return (0); 1275 } 1276 1277 static int 1278 pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1279 int bytes, uint64_t *val, void *arg1, long arg2) 1280 { 1281 int bus, slot, func, coff, in; 1282 1283 coff = addr & 0xfff; 1284 func = (addr >> 12) & 0x7; 1285 slot = (addr >> 15) & 0x1f; 1286 bus = (addr >> 20) & 0xff; 1287 in = (dir == MEM_F_READ); 1288 if (in) 1289 *val = ~0UL; 1290 pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); 1291 return (0); 1292 } 1293 1294 uint64_t 1295 pci_ecfg_base(void) 1296 { 1297 1298 return (PCI_EMUL_ECFG_BASE); 1299 } 1300 1301 #define BUSIO_ROUNDUP 32 1302 #define BUSMEM32_ROUNDUP (1024 * 1024) 1303 #define BUSMEM64_ROUNDUP (512 * 1024 * 1024) 1304 1305 int 1306 init_pci(struct vmctx *ctx) 1307 { 1308 char node_name[sizeof("pci.XXX.XX.X")]; 1309 struct mem_range mr; 1310 struct pci_devemu *pde; 1311 struct businfo *bi; 1312 struct slotinfo *si; 1313 struct funcinfo *fi; 1314 nvlist_t *nvl; 1315 const char *emul; 1316 size_t lowmem; 1317 int bus, slot, func; 1318 int error; 1319 1320 if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32) 1321 errx(EX_OSERR, "Invalid lowmem limit"); 1322 1323 pci_emul_iobase = PCI_EMUL_IOBASE; 1324 pci_emul_membase32 = PCI_EMUL_MEMBASE32; 1325 1326 pci_emul_membase64 = 4*GB + vm_get_highmem_size(ctx); 1327 pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64); 1328 pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64; 1329 1330 for (bus = 0; bus < MAXBUSES; bus++) { 1331 snprintf(node_name, sizeof(node_name), "pci.%d", bus); 1332 nvl = find_config_node(node_name); 1333 if (nvl == NULL) 1334 continue; 1335 pci_businfo[bus] = calloc(1, sizeof(struct businfo)); 1336 bi = pci_businfo[bus]; 1337 1338 /* 1339 * Keep track of the i/o and memory resources allocated to 1340 * this bus. 1341 */ 1342 bi->iobase = pci_emul_iobase; 1343 bi->membase32 = pci_emul_membase32; 1344 bi->membase64 = pci_emul_membase64; 1345 1346 /* first run: init devices */ 1347 for (slot = 0; slot < MAXSLOTS; slot++) { 1348 si = &bi->slotinfo[slot]; 1349 for (func = 0; func < MAXFUNCS; func++) { 1350 fi = &si->si_funcs[func]; 1351 snprintf(node_name, sizeof(node_name), 1352 "pci.%d.%d.%d", bus, slot, func); 1353 nvl = find_config_node(node_name); 1354 if (nvl == NULL) 1355 continue; 1356 1357 fi->fi_config = nvl; 1358 emul = get_config_value_node(nvl, "device"); 1359 if (emul == NULL) { 1360 EPRINTLN("pci slot %d:%d:%d: missing " 1361 "\"device\" value", bus, slot, func); 1362 return (EINVAL); 1363 } 1364 pde = pci_emul_finddev(emul); 1365 if (pde == NULL) { 1366 EPRINTLN("pci slot %d:%d:%d: unknown " 1367 "device \"%s\"", bus, slot, func, 1368 emul); 1369 return (EINVAL); 1370 } 1371 if (pde->pe_alias != NULL) { 1372 EPRINTLN("pci slot %d:%d:%d: legacy " 1373 "device \"%s\", use \"%s\" instead", 1374 bus, slot, func, emul, 1375 pde->pe_alias); 1376 return (EINVAL); 1377 } 1378 fi->fi_pde = pde; 1379 error = pci_emul_init(ctx, pde, bus, slot, 1380 func, fi); 1381 if (error) 1382 return (error); 1383 } 1384 } 1385 1386 /* second run: assign BARs and free list */ 1387 struct pci_bar_allocation *bar; 1388 struct pci_bar_allocation *bar_tmp; 1389 TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) { 1390 pci_emul_assign_bar(bar->pdi, bar->idx, bar->type, 1391 bar->size); 1392 free(bar); 1393 } 1394 TAILQ_INIT(&pci_bars); 1395 1396 /* 1397 * Add some slop to the I/O and memory resources decoded by 1398 * this bus to give a guest some flexibility if it wants to 1399 * reprogram the BARs. 1400 */ 1401 pci_emul_iobase += BUSIO_ROUNDUP; 1402 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1403 bi->iolimit = pci_emul_iobase; 1404 1405 pci_emul_membase32 += BUSMEM32_ROUNDUP; 1406 pci_emul_membase32 = roundup2(pci_emul_membase32, 1407 BUSMEM32_ROUNDUP); 1408 bi->memlimit32 = pci_emul_membase32; 1409 1410 pci_emul_membase64 += BUSMEM64_ROUNDUP; 1411 pci_emul_membase64 = roundup2(pci_emul_membase64, 1412 BUSMEM64_ROUNDUP); 1413 bi->memlimit64 = pci_emul_membase64; 1414 } 1415 1416 /* 1417 * PCI backends are initialized before routing INTx interrupts 1418 * so that LPC devices are able to reserve ISA IRQs before 1419 * routing PIRQ pins. 1420 */ 1421 for (bus = 0; bus < MAXBUSES; bus++) { 1422 if ((bi = pci_businfo[bus]) == NULL) 1423 continue; 1424 1425 for (slot = 0; slot < MAXSLOTS; slot++) { 1426 si = &bi->slotinfo[slot]; 1427 for (func = 0; func < MAXFUNCS; func++) { 1428 fi = &si->si_funcs[func]; 1429 if (fi->fi_devi == NULL) 1430 continue; 1431 pci_lintr_route(fi->fi_devi); 1432 } 1433 } 1434 } 1435 lpc_pirq_routed(); 1436 1437 /* 1438 * The guest physical memory map looks like the following: 1439 * [0, lowmem) guest system memory 1440 * [lowmem, 0xC0000000) memory hole (may be absent) 1441 * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation) 1442 * [0xE0000000, 0xF0000000) PCI extended config window 1443 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1444 * [4GB, 4GB + highmem) 1445 */ 1446 1447 /* 1448 * Accesses to memory addresses that are not allocated to system 1449 * memory or PCI devices return 0xff's. 1450 */ 1451 lowmem = vm_get_lowmem_size(ctx); 1452 bzero(&mr, sizeof(struct mem_range)); 1453 mr.name = "PCI hole"; 1454 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1455 mr.base = lowmem; 1456 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1457 mr.handler = pci_emul_fallback_handler; 1458 error = register_mem_fallback(&mr); 1459 assert(error == 0); 1460 1461 /* PCI extended config space */ 1462 bzero(&mr, sizeof(struct mem_range)); 1463 mr.name = "PCI ECFG"; 1464 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1465 mr.base = PCI_EMUL_ECFG_BASE; 1466 mr.size = PCI_EMUL_ECFG_SIZE; 1467 mr.handler = pci_emul_ecfg_handler; 1468 error = register_mem(&mr); 1469 assert(error == 0); 1470 1471 return (0); 1472 } 1473 1474 static void 1475 pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1476 void *arg) 1477 { 1478 1479 dsdt_line(" Package ()"); 1480 dsdt_line(" {"); 1481 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1482 dsdt_line(" 0x%02X,", pin - 1); 1483 dsdt_line(" Zero,"); 1484 dsdt_line(" 0x%X", ioapic_irq); 1485 dsdt_line(" },"); 1486 } 1487 1488 static void 1489 pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1490 void *arg) 1491 { 1492 char *name; 1493 1494 name = lpc_pirq_name(pirq_pin); 1495 if (name == NULL) 1496 return; 1497 dsdt_line(" Package ()"); 1498 dsdt_line(" {"); 1499 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1500 dsdt_line(" 0x%02X,", pin - 1); 1501 dsdt_line(" %s,", name); 1502 dsdt_line(" 0x00"); 1503 dsdt_line(" },"); 1504 free(name); 1505 } 1506 1507 /* 1508 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1509 * corresponding to each PCI bus. 1510 */ 1511 static void 1512 pci_bus_write_dsdt(int bus) 1513 { 1514 struct businfo *bi; 1515 struct slotinfo *si; 1516 struct pci_devinst *pi; 1517 int count, func, slot; 1518 1519 /* 1520 * If there are no devices on this 'bus' then just return. 1521 */ 1522 if ((bi = pci_businfo[bus]) == NULL) { 1523 /* 1524 * Bus 0 is special because it decodes the I/O ports used 1525 * for PCI config space access even if there are no devices 1526 * on it. 1527 */ 1528 if (bus != 0) 1529 return; 1530 } 1531 1532 dsdt_line(" Device (PC%02X)", bus); 1533 dsdt_line(" {"); 1534 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1535 1536 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1537 dsdt_line(" {"); 1538 dsdt_line(" Return (0x%08X)", bus); 1539 dsdt_line(" }"); 1540 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1541 dsdt_line(" {"); 1542 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1543 "MaxFixed, PosDecode,"); 1544 dsdt_line(" 0x0000, // Granularity"); 1545 dsdt_line(" 0x%04X, // Range Minimum", bus); 1546 dsdt_line(" 0x%04X, // Range Maximum", bus); 1547 dsdt_line(" 0x0000, // Translation Offset"); 1548 dsdt_line(" 0x0001, // Length"); 1549 dsdt_line(" ,, )"); 1550 1551 if (bus == 0) { 1552 dsdt_indent(3); 1553 dsdt_fixed_ioport(0xCF8, 8); 1554 dsdt_unindent(3); 1555 1556 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1557 "PosDecode, EntireRange,"); 1558 dsdt_line(" 0x0000, // Granularity"); 1559 dsdt_line(" 0x0000, // Range Minimum"); 1560 dsdt_line(" 0x0CF7, // Range Maximum"); 1561 dsdt_line(" 0x0000, // Translation Offset"); 1562 dsdt_line(" 0x0CF8, // Length"); 1563 dsdt_line(" ,, , TypeStatic)"); 1564 1565 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1566 "PosDecode, EntireRange,"); 1567 dsdt_line(" 0x0000, // Granularity"); 1568 dsdt_line(" 0x0D00, // Range Minimum"); 1569 dsdt_line(" 0x%04X, // Range Maximum", 1570 PCI_EMUL_IOBASE - 1); 1571 dsdt_line(" 0x0000, // Translation Offset"); 1572 dsdt_line(" 0x%04X, // Length", 1573 PCI_EMUL_IOBASE - 0x0D00); 1574 dsdt_line(" ,, , TypeStatic)"); 1575 1576 if (bi == NULL) { 1577 dsdt_line(" })"); 1578 goto done; 1579 } 1580 } 1581 assert(bi != NULL); 1582 1583 /* i/o window */ 1584 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1585 "PosDecode, EntireRange,"); 1586 dsdt_line(" 0x0000, // Granularity"); 1587 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1588 dsdt_line(" 0x%04X, // Range Maximum", 1589 bi->iolimit - 1); 1590 dsdt_line(" 0x0000, // Translation Offset"); 1591 dsdt_line(" 0x%04X, // Length", 1592 bi->iolimit - bi->iobase); 1593 dsdt_line(" ,, , TypeStatic)"); 1594 1595 /* mmio window (32-bit) */ 1596 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1597 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1598 dsdt_line(" 0x00000000, // Granularity"); 1599 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1600 dsdt_line(" 0x%08X, // Range Maximum\n", 1601 bi->memlimit32 - 1); 1602 dsdt_line(" 0x00000000, // Translation Offset"); 1603 dsdt_line(" 0x%08X, // Length\n", 1604 bi->memlimit32 - bi->membase32); 1605 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1606 1607 /* mmio window (64-bit) */ 1608 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1609 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1610 dsdt_line(" 0x0000000000000000, // Granularity"); 1611 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1612 dsdt_line(" 0x%016lX, // Range Maximum\n", 1613 bi->memlimit64 - 1); 1614 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1615 dsdt_line(" 0x%016lX, // Length\n", 1616 bi->memlimit64 - bi->membase64); 1617 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1618 dsdt_line(" })"); 1619 1620 count = pci_count_lintr(bus); 1621 if (count != 0) { 1622 dsdt_indent(2); 1623 dsdt_line("Name (PPRT, Package ()"); 1624 dsdt_line("{"); 1625 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1626 dsdt_line("})"); 1627 dsdt_line("Name (APRT, Package ()"); 1628 dsdt_line("{"); 1629 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1630 dsdt_line("})"); 1631 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1632 dsdt_line("{"); 1633 dsdt_line(" If (PICM)"); 1634 dsdt_line(" {"); 1635 dsdt_line(" Return (APRT)"); 1636 dsdt_line(" }"); 1637 dsdt_line(" Else"); 1638 dsdt_line(" {"); 1639 dsdt_line(" Return (PPRT)"); 1640 dsdt_line(" }"); 1641 dsdt_line("}"); 1642 dsdt_unindent(2); 1643 } 1644 1645 dsdt_indent(2); 1646 for (slot = 0; slot < MAXSLOTS; slot++) { 1647 si = &bi->slotinfo[slot]; 1648 for (func = 0; func < MAXFUNCS; func++) { 1649 pi = si->si_funcs[func].fi_devi; 1650 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1651 pi->pi_d->pe_write_dsdt(pi); 1652 } 1653 } 1654 dsdt_unindent(2); 1655 done: 1656 dsdt_line(" }"); 1657 } 1658 1659 void 1660 pci_write_dsdt(void) 1661 { 1662 int bus; 1663 1664 dsdt_indent(1); 1665 dsdt_line("Name (PICM, 0x00)"); 1666 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1667 dsdt_line("{"); 1668 dsdt_line(" Store (Arg0, PICM)"); 1669 dsdt_line("}"); 1670 dsdt_line(""); 1671 dsdt_line("Scope (_SB)"); 1672 dsdt_line("{"); 1673 for (bus = 0; bus < MAXBUSES; bus++) 1674 pci_bus_write_dsdt(bus); 1675 dsdt_line("}"); 1676 dsdt_unindent(1); 1677 } 1678 1679 int 1680 pci_bus_configured(int bus) 1681 { 1682 assert(bus >= 0 && bus < MAXBUSES); 1683 return (pci_businfo[bus] != NULL); 1684 } 1685 1686 int 1687 pci_msi_enabled(struct pci_devinst *pi) 1688 { 1689 return (pi->pi_msi.enabled); 1690 } 1691 1692 int 1693 pci_msi_maxmsgnum(struct pci_devinst *pi) 1694 { 1695 if (pi->pi_msi.enabled) 1696 return (pi->pi_msi.maxmsgnum); 1697 else 1698 return (0); 1699 } 1700 1701 int 1702 pci_msix_enabled(struct pci_devinst *pi) 1703 { 1704 1705 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1706 } 1707 1708 void 1709 pci_generate_msix(struct pci_devinst *pi, int index) 1710 { 1711 struct msix_table_entry *mte; 1712 1713 if (!pci_msix_enabled(pi)) 1714 return; 1715 1716 if (pi->pi_msix.function_mask) 1717 return; 1718 1719 if (index >= pi->pi_msix.table_count) 1720 return; 1721 1722 mte = &pi->pi_msix.table[index]; 1723 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1724 /* XXX Set PBA bit if interrupt is disabled */ 1725 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1726 } 1727 } 1728 1729 void 1730 pci_generate_msi(struct pci_devinst *pi, int index) 1731 { 1732 1733 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1734 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1735 pi->pi_msi.msg_data + index); 1736 } 1737 } 1738 1739 static bool 1740 pci_lintr_permitted(struct pci_devinst *pi) 1741 { 1742 uint16_t cmd; 1743 1744 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1745 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1746 (cmd & PCIM_CMD_INTxDIS))); 1747 } 1748 1749 void 1750 pci_lintr_request(struct pci_devinst *pi) 1751 { 1752 struct businfo *bi; 1753 struct slotinfo *si; 1754 int bestpin, bestcount, pin; 1755 1756 bi = pci_businfo[pi->pi_bus]; 1757 assert(bi != NULL); 1758 1759 /* 1760 * Just allocate a pin from our slot. The pin will be 1761 * assigned IRQs later when interrupts are routed. 1762 */ 1763 si = &bi->slotinfo[pi->pi_slot]; 1764 bestpin = 0; 1765 bestcount = si->si_intpins[0].ii_count; 1766 for (pin = 1; pin < 4; pin++) { 1767 if (si->si_intpins[pin].ii_count < bestcount) { 1768 bestpin = pin; 1769 bestcount = si->si_intpins[pin].ii_count; 1770 } 1771 } 1772 1773 si->si_intpins[bestpin].ii_count++; 1774 pi->pi_lintr.pin = bestpin + 1; 1775 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1776 } 1777 1778 static void 1779 pci_lintr_route(struct pci_devinst *pi) 1780 { 1781 struct businfo *bi; 1782 struct intxinfo *ii; 1783 1784 if (pi->pi_lintr.pin == 0) 1785 return; 1786 1787 bi = pci_businfo[pi->pi_bus]; 1788 assert(bi != NULL); 1789 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1790 1791 /* 1792 * Attempt to allocate an I/O APIC pin for this intpin if one 1793 * is not yet assigned. 1794 */ 1795 if (ii->ii_ioapic_irq == 0) 1796 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1797 assert(ii->ii_ioapic_irq > 0); 1798 1799 /* 1800 * Attempt to allocate a PIRQ pin for this intpin if one is 1801 * not yet assigned. 1802 */ 1803 if (ii->ii_pirq_pin == 0) 1804 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1805 assert(ii->ii_pirq_pin > 0); 1806 1807 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1808 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1809 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1810 } 1811 1812 void 1813 pci_lintr_assert(struct pci_devinst *pi) 1814 { 1815 1816 assert(pi->pi_lintr.pin > 0); 1817 1818 pthread_mutex_lock(&pi->pi_lintr.lock); 1819 if (pi->pi_lintr.state == IDLE) { 1820 if (pci_lintr_permitted(pi)) { 1821 pi->pi_lintr.state = ASSERTED; 1822 pci_irq_assert(pi); 1823 } else 1824 pi->pi_lintr.state = PENDING; 1825 } 1826 pthread_mutex_unlock(&pi->pi_lintr.lock); 1827 } 1828 1829 void 1830 pci_lintr_deassert(struct pci_devinst *pi) 1831 { 1832 1833 assert(pi->pi_lintr.pin > 0); 1834 1835 pthread_mutex_lock(&pi->pi_lintr.lock); 1836 if (pi->pi_lintr.state == ASSERTED) { 1837 pi->pi_lintr.state = IDLE; 1838 pci_irq_deassert(pi); 1839 } else if (pi->pi_lintr.state == PENDING) 1840 pi->pi_lintr.state = IDLE; 1841 pthread_mutex_unlock(&pi->pi_lintr.lock); 1842 } 1843 1844 static void 1845 pci_lintr_update(struct pci_devinst *pi) 1846 { 1847 1848 pthread_mutex_lock(&pi->pi_lintr.lock); 1849 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1850 pci_irq_deassert(pi); 1851 pi->pi_lintr.state = PENDING; 1852 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1853 pi->pi_lintr.state = ASSERTED; 1854 pci_irq_assert(pi); 1855 } 1856 pthread_mutex_unlock(&pi->pi_lintr.lock); 1857 } 1858 1859 int 1860 pci_count_lintr(int bus) 1861 { 1862 int count, slot, pin; 1863 struct slotinfo *slotinfo; 1864 1865 count = 0; 1866 if (pci_businfo[bus] != NULL) { 1867 for (slot = 0; slot < MAXSLOTS; slot++) { 1868 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1869 for (pin = 0; pin < 4; pin++) { 1870 if (slotinfo->si_intpins[pin].ii_count != 0) 1871 count++; 1872 } 1873 } 1874 } 1875 return (count); 1876 } 1877 1878 void 1879 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1880 { 1881 struct businfo *bi; 1882 struct slotinfo *si; 1883 struct intxinfo *ii; 1884 int slot, pin; 1885 1886 if ((bi = pci_businfo[bus]) == NULL) 1887 return; 1888 1889 for (slot = 0; slot < MAXSLOTS; slot++) { 1890 si = &bi->slotinfo[slot]; 1891 for (pin = 0; pin < 4; pin++) { 1892 ii = &si->si_intpins[pin]; 1893 if (ii->ii_count != 0) 1894 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1895 ii->ii_ioapic_irq, arg); 1896 } 1897 } 1898 } 1899 1900 /* 1901 * Return 1 if the emulated device in 'slot' is a multi-function device. 1902 * Return 0 otherwise. 1903 */ 1904 static int 1905 pci_emul_is_mfdev(int bus, int slot) 1906 { 1907 struct businfo *bi; 1908 struct slotinfo *si; 1909 int f, numfuncs; 1910 1911 numfuncs = 0; 1912 if ((bi = pci_businfo[bus]) != NULL) { 1913 si = &bi->slotinfo[slot]; 1914 for (f = 0; f < MAXFUNCS; f++) { 1915 if (si->si_funcs[f].fi_devi != NULL) { 1916 numfuncs++; 1917 } 1918 } 1919 } 1920 return (numfuncs > 1); 1921 } 1922 1923 /* 1924 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1925 * whether or not is a multi-function being emulated in the pci 'slot'. 1926 */ 1927 static void 1928 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1929 { 1930 int mfdev; 1931 1932 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1933 mfdev = pci_emul_is_mfdev(bus, slot); 1934 switch (bytes) { 1935 case 1: 1936 case 2: 1937 *rv &= ~PCIM_MFDEV; 1938 if (mfdev) { 1939 *rv |= PCIM_MFDEV; 1940 } 1941 break; 1942 case 4: 1943 *rv &= ~(PCIM_MFDEV << 16); 1944 if (mfdev) { 1945 *rv |= (PCIM_MFDEV << 16); 1946 } 1947 break; 1948 } 1949 } 1950 } 1951 1952 /* 1953 * Update device state in response to changes to the PCI command 1954 * register. 1955 */ 1956 void 1957 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 1958 { 1959 int i; 1960 uint16_t changed, new; 1961 1962 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 1963 changed = old ^ new; 1964 1965 /* 1966 * If the MMIO or I/O address space decoding has changed then 1967 * register/unregister all BARs that decode that address space. 1968 */ 1969 for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) { 1970 switch (pi->pi_bar[i].type) { 1971 case PCIBAR_NONE: 1972 case PCIBAR_MEMHI64: 1973 break; 1974 case PCIBAR_IO: 1975 /* I/O address space decoding changed? */ 1976 if (changed & PCIM_CMD_PORTEN) { 1977 if (new & PCIM_CMD_PORTEN) 1978 register_bar(pi, i); 1979 else 1980 unregister_bar(pi, i); 1981 } 1982 break; 1983 case PCIBAR_ROM: 1984 /* skip (un-)register of ROM if it disabled */ 1985 if (!romen(pi)) 1986 break; 1987 /* fallthrough */ 1988 case PCIBAR_MEM32: 1989 case PCIBAR_MEM64: 1990 /* MMIO address space decoding changed? */ 1991 if (changed & PCIM_CMD_MEMEN) { 1992 if (new & PCIM_CMD_MEMEN) 1993 register_bar(pi, i); 1994 else 1995 unregister_bar(pi, i); 1996 } 1997 break; 1998 default: 1999 assert(0); 2000 } 2001 } 2002 2003 /* 2004 * If INTx has been unmasked and is pending, assert the 2005 * interrupt. 2006 */ 2007 pci_lintr_update(pi); 2008 } 2009 2010 static void 2011 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 2012 { 2013 int rshift; 2014 uint32_t cmd, old, readonly; 2015 2016 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 2017 2018 /* 2019 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 2020 * 2021 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 2022 * 'write 1 to clear'. However these bits are not set to '1' by 2023 * any device emulation so it is simpler to treat them as readonly. 2024 */ 2025 rshift = (coff & 0x3) * 8; 2026 readonly = 0xFFFFF880 >> rshift; 2027 2028 old = CFGREAD(pi, coff, bytes); 2029 new &= ~readonly; 2030 new |= (old & readonly); 2031 CFGWRITE(pi, coff, new, bytes); /* update config */ 2032 2033 pci_emul_cmd_changed(pi, cmd); 2034 } 2035 2036 static void 2037 pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, 2038 int coff, int bytes, uint32_t *eax) 2039 { 2040 struct businfo *bi; 2041 struct slotinfo *si; 2042 struct pci_devinst *pi; 2043 struct pci_devemu *pe; 2044 int idx, needcfg; 2045 uint64_t addr, bar, mask; 2046 2047 if ((bi = pci_businfo[bus]) != NULL) { 2048 si = &bi->slotinfo[slot]; 2049 pi = si->si_funcs[func].fi_devi; 2050 } else 2051 pi = NULL; 2052 2053 /* 2054 * Just return if there is no device at this slot:func or if the 2055 * the guest is doing an un-aligned access. 2056 */ 2057 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 2058 (coff & (bytes - 1)) != 0) { 2059 if (in) 2060 *eax = 0xffffffff; 2061 return; 2062 } 2063 2064 /* 2065 * Ignore all writes beyond the standard config space and return all 2066 * ones on reads. 2067 */ 2068 if (coff >= PCI_REGMAX + 1) { 2069 if (in) { 2070 *eax = 0xffffffff; 2071 /* 2072 * Extended capabilities begin at offset 256 in config 2073 * space. Absence of extended capabilities is signaled 2074 * with all 0s in the extended capability header at 2075 * offset 256. 2076 */ 2077 if (coff <= PCI_REGMAX + 4) 2078 *eax = 0x00000000; 2079 } 2080 return; 2081 } 2082 2083 pe = pi->pi_d; 2084 2085 /* 2086 * Config read 2087 */ 2088 if (in) { 2089 /* Let the device emulation override the default handler */ 2090 if (pe->pe_cfgread != NULL) { 2091 needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, 2092 eax); 2093 } else { 2094 needcfg = 1; 2095 } 2096 2097 if (needcfg) 2098 *eax = CFGREAD(pi, coff, bytes); 2099 2100 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); 2101 } else { 2102 /* Let the device emulation override the default handler */ 2103 if (pe->pe_cfgwrite != NULL && 2104 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 2105 return; 2106 2107 /* 2108 * Special handling for write to BAR and ROM registers 2109 */ 2110 if ((coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) || 2111 (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4)) { 2112 /* 2113 * Ignore writes to BAR registers that are not 2114 * 4-byte aligned. 2115 */ 2116 if (bytes != 4 || (coff & 0x3) != 0) 2117 return; 2118 if (coff != PCIR_BIOS) { 2119 idx = (coff - PCIR_BAR(0)) / 4; 2120 } else { 2121 idx = PCI_ROM_IDX; 2122 } 2123 mask = ~(pi->pi_bar[idx].size - 1); 2124 switch (pi->pi_bar[idx].type) { 2125 case PCIBAR_NONE: 2126 pi->pi_bar[idx].addr = bar = 0; 2127 break; 2128 case PCIBAR_IO: 2129 addr = *eax & mask; 2130 addr &= 0xffff; 2131 bar = addr | pi->pi_bar[idx].lobits; 2132 /* 2133 * Register the new BAR value for interception 2134 */ 2135 if (addr != pi->pi_bar[idx].addr) { 2136 update_bar_address(pi, addr, idx, 2137 PCIBAR_IO); 2138 } 2139 break; 2140 case PCIBAR_MEM32: 2141 addr = bar = *eax & mask; 2142 bar |= pi->pi_bar[idx].lobits; 2143 if (addr != pi->pi_bar[idx].addr) { 2144 update_bar_address(pi, addr, idx, 2145 PCIBAR_MEM32); 2146 } 2147 break; 2148 case PCIBAR_MEM64: 2149 addr = bar = *eax & mask; 2150 bar |= pi->pi_bar[idx].lobits; 2151 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 2152 update_bar_address(pi, addr, idx, 2153 PCIBAR_MEM64); 2154 } 2155 break; 2156 case PCIBAR_MEMHI64: 2157 mask = ~(pi->pi_bar[idx - 1].size - 1); 2158 addr = ((uint64_t)*eax << 32) & mask; 2159 bar = addr >> 32; 2160 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 2161 update_bar_address(pi, addr, idx - 1, 2162 PCIBAR_MEMHI64); 2163 } 2164 break; 2165 case PCIBAR_ROM: 2166 addr = bar = *eax & mask; 2167 if (memen(pi) && romen(pi)) { 2168 unregister_bar(pi, idx); 2169 } 2170 pi->pi_bar[idx].addr = addr; 2171 pi->pi_bar[idx].lobits = *eax & 2172 PCIM_BIOS_ENABLE; 2173 /* romen could have changed it value */ 2174 if (memen(pi) && romen(pi)) { 2175 register_bar(pi, idx); 2176 } 2177 bar |= pi->pi_bar[idx].lobits; 2178 break; 2179 default: 2180 assert(0); 2181 } 2182 pci_set_cfgdata32(pi, coff, bar); 2183 2184 } else if (pci_emul_iscap(pi, coff)) { 2185 pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); 2186 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 2187 pci_emul_cmdsts_write(pi, coff, *eax, bytes); 2188 } else { 2189 CFGWRITE(pi, coff, *eax, bytes); 2190 } 2191 } 2192 } 2193 2194 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 2195 2196 static int 2197 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2198 uint32_t *eax, void *arg) 2199 { 2200 uint32_t x; 2201 2202 if (bytes != 4) { 2203 if (in) 2204 *eax = (bytes == 2) ? 0xffff : 0xff; 2205 return (0); 2206 } 2207 2208 if (in) { 2209 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 2210 if (cfgenable) 2211 x |= CONF1_ENABLE; 2212 *eax = x; 2213 } else { 2214 x = *eax; 2215 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 2216 cfgoff = (x & PCI_REGMAX) & ~0x03; 2217 cfgfunc = (x >> 8) & PCI_FUNCMAX; 2218 cfgslot = (x >> 11) & PCI_SLOTMAX; 2219 cfgbus = (x >> 16) & PCI_BUSMAX; 2220 } 2221 2222 return (0); 2223 } 2224 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 2225 2226 static int 2227 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2228 uint32_t *eax, void *arg) 2229 { 2230 int coff; 2231 2232 assert(bytes == 1 || bytes == 2 || bytes == 4); 2233 2234 coff = cfgoff + (port - CONF1_DATA_PORT); 2235 if (cfgenable) { 2236 pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, 2237 eax); 2238 } else { 2239 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 2240 if (in) 2241 *eax = 0xffffffff; 2242 } 2243 return (0); 2244 } 2245 2246 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 2247 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 2248 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 2249 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 2250 2251 #ifdef BHYVE_SNAPSHOT 2252 /* 2253 * Saves/restores PCI device emulated state. Returns 0 on success. 2254 */ 2255 static int 2256 pci_snapshot_pci_dev(struct vm_snapshot_meta *meta) 2257 { 2258 struct pci_devinst *pi; 2259 int i; 2260 int ret; 2261 2262 pi = meta->dev_data; 2263 2264 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done); 2265 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done); 2266 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done); 2267 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done); 2268 2269 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done); 2270 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done); 2271 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done); 2272 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done); 2273 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done); 2274 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done); 2275 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done); 2276 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done); 2277 2278 SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata), 2279 meta, ret, done); 2280 2281 for (i = 0; i < nitems(pi->pi_bar); i++) { 2282 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done); 2283 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done); 2284 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done); 2285 } 2286 2287 /* Restore MSI-X table. */ 2288 for (i = 0; i < pi->pi_msix.table_count; i++) { 2289 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr, 2290 meta, ret, done); 2291 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data, 2292 meta, ret, done); 2293 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control, 2294 meta, ret, done); 2295 } 2296 2297 done: 2298 return (ret); 2299 } 2300 2301 static int 2302 pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde, 2303 struct pci_devinst **pdi) 2304 { 2305 struct businfo *bi; 2306 struct slotinfo *si; 2307 struct funcinfo *fi; 2308 int bus, slot, func; 2309 2310 assert(dev_name != NULL); 2311 assert(pde != NULL); 2312 assert(pdi != NULL); 2313 2314 for (bus = 0; bus < MAXBUSES; bus++) { 2315 if ((bi = pci_businfo[bus]) == NULL) 2316 continue; 2317 2318 for (slot = 0; slot < MAXSLOTS; slot++) { 2319 si = &bi->slotinfo[slot]; 2320 for (func = 0; func < MAXFUNCS; func++) { 2321 fi = &si->si_funcs[func]; 2322 if (fi->fi_pde == NULL) 2323 continue; 2324 if (strcmp(dev_name, fi->fi_pde->pe_emu) != 0) 2325 continue; 2326 2327 *pde = fi->fi_pde; 2328 *pdi = fi->fi_devi; 2329 return (0); 2330 } 2331 } 2332 } 2333 2334 return (EINVAL); 2335 } 2336 2337 int 2338 pci_snapshot(struct vm_snapshot_meta *meta) 2339 { 2340 struct pci_devemu *pde; 2341 struct pci_devinst *pdi; 2342 int ret; 2343 2344 assert(meta->dev_name != NULL); 2345 2346 ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi); 2347 if (ret != 0) { 2348 fprintf(stderr, "%s: no such name: %s\r\n", 2349 __func__, meta->dev_name); 2350 memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); 2351 return (0); 2352 } 2353 2354 meta->dev_data = pdi; 2355 2356 if (pde->pe_snapshot == NULL) { 2357 fprintf(stderr, "%s: not implemented yet for: %s\r\n", 2358 __func__, meta->dev_name); 2359 return (-1); 2360 } 2361 2362 ret = pci_snapshot_pci_dev(meta); 2363 if (ret != 0) { 2364 fprintf(stderr, "%s: failed to snapshot pci dev\r\n", 2365 __func__); 2366 return (-1); 2367 } 2368 2369 ret = (*pde->pe_snapshot)(meta); 2370 2371 return (ret); 2372 } 2373 2374 int 2375 pci_pause(struct vmctx *ctx, const char *dev_name) 2376 { 2377 struct pci_devemu *pde; 2378 struct pci_devinst *pdi; 2379 int ret; 2380 2381 assert(dev_name != NULL); 2382 2383 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2384 if (ret != 0) { 2385 /* 2386 * It is possible to call this function without 2387 * checking that the device is inserted first. 2388 */ 2389 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2390 return (0); 2391 } 2392 2393 if (pde->pe_pause == NULL) { 2394 /* The pause/resume functionality is optional. */ 2395 fprintf(stderr, "%s: not implemented for: %s\n", 2396 __func__, dev_name); 2397 return (0); 2398 } 2399 2400 return (*pde->pe_pause)(ctx, pdi); 2401 } 2402 2403 int 2404 pci_resume(struct vmctx *ctx, const char *dev_name) 2405 { 2406 struct pci_devemu *pde; 2407 struct pci_devinst *pdi; 2408 int ret; 2409 2410 assert(dev_name != NULL); 2411 2412 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2413 if (ret != 0) { 2414 /* 2415 * It is possible to call this function without 2416 * checking that the device is inserted first. 2417 */ 2418 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2419 return (0); 2420 } 2421 2422 if (pde->pe_resume == NULL) { 2423 /* The pause/resume functionality is optional. */ 2424 fprintf(stderr, "%s: not implemented for: %s\n", 2425 __func__, dev_name); 2426 return (0); 2427 } 2428 2429 return (*pde->pe_resume)(ctx, pdi); 2430 } 2431 #endif 2432 2433 #define PCI_EMUL_TEST 2434 #ifdef PCI_EMUL_TEST 2435 /* 2436 * Define a dummy test device 2437 */ 2438 #define DIOSZ 8 2439 #define DMEMSZ 4096 2440 struct pci_emul_dsoftc { 2441 uint8_t ioregs[DIOSZ]; 2442 uint8_t memregs[2][DMEMSZ]; 2443 }; 2444 2445 #define PCI_EMUL_MSI_MSGS 4 2446 #define PCI_EMUL_MSIX_MSGS 16 2447 2448 static int 2449 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) 2450 { 2451 int error; 2452 struct pci_emul_dsoftc *sc; 2453 2454 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2455 2456 pi->pi_arg = sc; 2457 2458 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2459 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2460 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2461 2462 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2463 assert(error == 0); 2464 2465 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2466 assert(error == 0); 2467 2468 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2469 assert(error == 0); 2470 2471 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2472 assert(error == 0); 2473 2474 return (0); 2475 } 2476 2477 static void 2478 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2479 uint64_t offset, int size, uint64_t value) 2480 { 2481 int i; 2482 struct pci_emul_dsoftc *sc = pi->pi_arg; 2483 2484 if (baridx == 0) { 2485 if (offset + size > DIOSZ) { 2486 printf("diow: iow too large, offset %ld size %d\n", 2487 offset, size); 2488 return; 2489 } 2490 2491 if (size == 1) { 2492 sc->ioregs[offset] = value & 0xff; 2493 } else if (size == 2) { 2494 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2495 } else if (size == 4) { 2496 *(uint32_t *)&sc->ioregs[offset] = value; 2497 } else { 2498 printf("diow: iow unknown size %d\n", size); 2499 } 2500 2501 /* 2502 * Special magic value to generate an interrupt 2503 */ 2504 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2505 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2506 2507 if (value == 0xabcdef) { 2508 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2509 pci_generate_msi(pi, i); 2510 } 2511 } 2512 2513 if (baridx == 1 || baridx == 2) { 2514 if (offset + size > DMEMSZ) { 2515 printf("diow: memw too large, offset %ld size %d\n", 2516 offset, size); 2517 return; 2518 } 2519 2520 i = baridx - 1; /* 'memregs' index */ 2521 2522 if (size == 1) { 2523 sc->memregs[i][offset] = value; 2524 } else if (size == 2) { 2525 *(uint16_t *)&sc->memregs[i][offset] = value; 2526 } else if (size == 4) { 2527 *(uint32_t *)&sc->memregs[i][offset] = value; 2528 } else if (size == 8) { 2529 *(uint64_t *)&sc->memregs[i][offset] = value; 2530 } else { 2531 printf("diow: memw unknown size %d\n", size); 2532 } 2533 2534 /* 2535 * magic interrupt ?? 2536 */ 2537 } 2538 2539 if (baridx > 2 || baridx < 0) { 2540 printf("diow: unknown bar idx %d\n", baridx); 2541 } 2542 } 2543 2544 static uint64_t 2545 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2546 uint64_t offset, int size) 2547 { 2548 struct pci_emul_dsoftc *sc = pi->pi_arg; 2549 uint32_t value; 2550 int i; 2551 2552 if (baridx == 0) { 2553 if (offset + size > DIOSZ) { 2554 printf("dior: ior too large, offset %ld size %d\n", 2555 offset, size); 2556 return (0); 2557 } 2558 2559 value = 0; 2560 if (size == 1) { 2561 value = sc->ioregs[offset]; 2562 } else if (size == 2) { 2563 value = *(uint16_t *) &sc->ioregs[offset]; 2564 } else if (size == 4) { 2565 value = *(uint32_t *) &sc->ioregs[offset]; 2566 } else { 2567 printf("dior: ior unknown size %d\n", size); 2568 } 2569 } 2570 2571 if (baridx == 1 || baridx == 2) { 2572 if (offset + size > DMEMSZ) { 2573 printf("dior: memr too large, offset %ld size %d\n", 2574 offset, size); 2575 return (0); 2576 } 2577 2578 i = baridx - 1; /* 'memregs' index */ 2579 2580 if (size == 1) { 2581 value = sc->memregs[i][offset]; 2582 } else if (size == 2) { 2583 value = *(uint16_t *) &sc->memregs[i][offset]; 2584 } else if (size == 4) { 2585 value = *(uint32_t *) &sc->memregs[i][offset]; 2586 } else if (size == 8) { 2587 value = *(uint64_t *) &sc->memregs[i][offset]; 2588 } else { 2589 printf("dior: ior unknown size %d\n", size); 2590 } 2591 } 2592 2593 2594 if (baridx > 2 || baridx < 0) { 2595 printf("dior: unknown bar idx %d\n", baridx); 2596 return (0); 2597 } 2598 2599 return (value); 2600 } 2601 2602 #ifdef BHYVE_SNAPSHOT 2603 int 2604 pci_emul_snapshot(struct vm_snapshot_meta *meta) 2605 { 2606 2607 return (0); 2608 } 2609 #endif 2610 2611 struct pci_devemu pci_dummy = { 2612 .pe_emu = "dummy", 2613 .pe_init = pci_emul_dinit, 2614 .pe_barwrite = pci_emul_diow, 2615 .pe_barread = pci_emul_dior, 2616 #ifdef BHYVE_SNAPSHOT 2617 .pe_snapshot = pci_emul_snapshot, 2618 #endif 2619 }; 2620 PCI_EMUL_SET(pci_dummy); 2621 2622 #endif /* PCI_EMUL_TEST */ 2623