1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * This file and its contents are supplied under the terms of the 30 * Common Development and Distribution License ("CDDL"), version 1.0. 31 * You may only use this file in accordance with the terms of version 32 * 1.0 of the CDDL. 33 * 34 * A full copy of the text of the CDDL should have accompanied this 35 * source. A copy of the CDDL is also available via the Internet at 36 * http://www.illumos.org/license/CDDL. 37 * 38 * Copyright 2014 Pluribus Networks Inc. 39 * Copyright 2018 Joyent, Inc. 40 */ 41 42 43 #include <sys/param.h> 44 #include <sys/linker_set.h> 45 #include <sys/mman.h> 46 47 #include <ctype.h> 48 #include <err.h> 49 #include <errno.h> 50 #include <pthread.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include <strings.h> 55 #include <assert.h> 56 #include <stdbool.h> 57 #include <sysexits.h> 58 59 #include <machine/vmm.h> 60 #include <vmmapi.h> 61 62 #include "acpi.h" 63 #include "bhyverun.h" 64 #include "bootrom.h" 65 #include "config.h" 66 #include "debug.h" 67 #include "inout.h" 68 #include "ioapic.h" 69 #include "mem.h" 70 #include "pci_emul.h" 71 #include "pci_irq.h" 72 #include "pci_lpc.h" 73 #include "pci_passthru.h" 74 #include "qemu_fwcfg.h" 75 76 #define CONF1_ADDR_PORT 0x0cf8 77 #define CONF1_DATA_PORT 0x0cfc 78 79 #define CONF1_ENABLE 0x80000000ul 80 81 #define MAXBUSES (PCI_BUSMAX + 1) 82 #define MAXSLOTS (PCI_SLOTMAX + 1) 83 #define MAXFUNCS (PCI_FUNCMAX + 1) 84 85 #define GB (1024 * 1024 * 1024UL) 86 87 struct funcinfo { 88 nvlist_t *fi_config; 89 struct pci_devemu *fi_pde; 90 struct pci_devinst *fi_devi; 91 }; 92 93 struct intxinfo { 94 int ii_count; 95 int ii_pirq_pin; 96 int ii_ioapic_irq; 97 }; 98 99 struct slotinfo { 100 struct intxinfo si_intpins[4]; 101 struct funcinfo si_funcs[MAXFUNCS]; 102 }; 103 104 struct businfo { 105 uint16_t iobase, iolimit; /* I/O window */ 106 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 107 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 108 struct slotinfo slotinfo[MAXSLOTS]; 109 }; 110 111 static struct businfo *pci_businfo[MAXBUSES]; 112 113 SET_DECLARE(pci_devemu_set, struct pci_devemu); 114 115 static uint64_t pci_emul_iobase; 116 static uint8_t *pci_emul_rombase; 117 static uint64_t pci_emul_romoffset; 118 static uint8_t *pci_emul_romlim; 119 static uint64_t pci_emul_membase32; 120 static uint64_t pci_emul_membase64; 121 static uint64_t pci_emul_memlim64; 122 123 struct pci_bar_allocation { 124 TAILQ_ENTRY(pci_bar_allocation) chain; 125 struct pci_devinst *pdi; 126 int idx; 127 enum pcibar_type type; 128 uint64_t size; 129 }; 130 131 static TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = 132 TAILQ_HEAD_INITIALIZER(pci_bars); 133 134 struct boot_device { 135 TAILQ_ENTRY(boot_device) boot_device_chain; 136 struct pci_devinst *pdi; 137 int bootindex; 138 }; 139 static TAILQ_HEAD(boot_list, boot_device) boot_devices = TAILQ_HEAD_INITIALIZER( 140 boot_devices); 141 142 #define PCI_EMUL_IOBASE 0x2000 143 #define PCI_EMUL_IOLIMIT 0x10000 144 145 #define PCI_EMUL_ROMSIZE 0x10000000 146 147 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 148 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 149 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 150 151 /* 152 * OVMF always uses 0xC0000000 as base address for 32 bit PCI MMIO. Don't 153 * change this address without changing it in OVMF. 154 */ 155 #define PCI_EMUL_MEMBASE32 0xC0000000 156 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 157 #define PCI_EMUL_MEMSIZE64 (32*GB) 158 159 static struct pci_devemu *pci_emul_finddev(const char *name); 160 static void pci_lintr_route(struct pci_devinst *pi); 161 static void pci_lintr_update(struct pci_devinst *pi); 162 static void pci_cfgrw(int in, int bus, int slot, int func, int coff, 163 int bytes, uint32_t *val); 164 165 static __inline void 166 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 167 { 168 169 if (bytes == 1) 170 pci_set_cfgdata8(pi, coff, val); 171 else if (bytes == 2) 172 pci_set_cfgdata16(pi, coff, val); 173 else 174 pci_set_cfgdata32(pi, coff, val); 175 } 176 177 static __inline uint32_t 178 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 179 { 180 181 if (bytes == 1) 182 return (pci_get_cfgdata8(pi, coff)); 183 else if (bytes == 2) 184 return (pci_get_cfgdata16(pi, coff)); 185 else 186 return (pci_get_cfgdata32(pi, coff)); 187 } 188 189 static int 190 is_pcir_bar(int coff) 191 { 192 return (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)); 193 } 194 195 static int 196 is_pcir_bios(int coff) 197 { 198 return (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4); 199 } 200 201 /* 202 * I/O access 203 */ 204 205 /* 206 * Slot options are in the form: 207 * 208 * <bus>:<slot>:<func>,<emul>[,<config>] 209 * <slot>[:<func>],<emul>[,<config>] 210 * 211 * slot is 0..31 212 * func is 0..7 213 * emul is a string describing the type of PCI device e.g. virtio-net 214 * config is an optional string, depending on the device, that can be 215 * used for configuration. 216 * Examples are: 217 * 1,virtio-net,tap0 218 * 3:0,dummy 219 */ 220 static void 221 pci_parse_slot_usage(char *aopt) 222 { 223 224 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 225 } 226 227 /* 228 * Helper function to parse a list of comma-separated options where 229 * each option is formatted as "name[=value]". If no value is 230 * provided, the option is treated as a boolean and is given a value 231 * of true. 232 */ 233 int 234 pci_parse_legacy_config(nvlist_t *nvl, const char *opt) 235 { 236 char *config, *name, *tofree, *value; 237 238 if (opt == NULL) 239 return (0); 240 241 config = tofree = strdup(opt); 242 while ((name = strsep(&config, ",")) != NULL) { 243 value = strchr(name, '='); 244 if (value != NULL) { 245 *value = '\0'; 246 value++; 247 set_config_value_node(nvl, name, value); 248 } else 249 set_config_bool_node(nvl, name, true); 250 } 251 free(tofree); 252 return (0); 253 } 254 255 /* 256 * PCI device configuration is stored in MIBs that encode the device's 257 * location: 258 * 259 * pci.<bus>.<slot>.<func> 260 * 261 * Where "bus", "slot", and "func" are all decimal values without 262 * leading zeroes. Each valid device must have a "device" node which 263 * identifies the driver model of the device. 264 * 265 * Device backends can provide a parser for the "config" string. If 266 * a custom parser is not provided, pci_parse_legacy_config() is used 267 * to parse the string. 268 */ 269 int 270 pci_parse_slot(char *opt) 271 { 272 char node_name[sizeof("pci.XXX.XX.X")]; 273 struct pci_devemu *pde; 274 char *emul, *config, *str, *cp; 275 int error, bnum, snum, fnum; 276 nvlist_t *nvl; 277 278 error = -1; 279 str = strdup(opt); 280 281 emul = config = NULL; 282 if ((cp = strchr(str, ',')) != NULL) { 283 *cp = '\0'; 284 emul = cp + 1; 285 if ((cp = strchr(emul, ',')) != NULL) { 286 *cp = '\0'; 287 config = cp + 1; 288 } 289 } else { 290 pci_parse_slot_usage(opt); 291 goto done; 292 } 293 294 /* <bus>:<slot>:<func> */ 295 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 296 bnum = 0; 297 /* <slot>:<func> */ 298 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 299 fnum = 0; 300 /* <slot> */ 301 if (sscanf(str, "%d", &snum) != 1) { 302 snum = -1; 303 } 304 } 305 } 306 307 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 308 fnum < 0 || fnum >= MAXFUNCS) { 309 pci_parse_slot_usage(opt); 310 goto done; 311 } 312 313 pde = pci_emul_finddev(emul); 314 if (pde == NULL) { 315 EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum, 316 fnum, emul); 317 goto done; 318 } 319 320 snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum, 321 fnum); 322 nvl = find_config_node(node_name); 323 if (nvl != NULL) { 324 EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum, 325 fnum); 326 goto done; 327 } 328 nvl = create_config_node(node_name); 329 if (pde->pe_alias != NULL) 330 set_config_value_node(nvl, "device", pde->pe_alias); 331 else 332 set_config_value_node(nvl, "device", pde->pe_emu); 333 334 if (pde->pe_legacy_config != NULL) 335 error = pde->pe_legacy_config(nvl, config); 336 else 337 error = pci_parse_legacy_config(nvl, config); 338 done: 339 free(str); 340 return (error); 341 } 342 343 void 344 pci_print_supported_devices(void) 345 { 346 struct pci_devemu **pdpp, *pdp; 347 348 SET_FOREACH(pdpp, pci_devemu_set) { 349 pdp = *pdpp; 350 printf("%s\n", pdp->pe_emu); 351 } 352 } 353 354 uint32_t 355 pci_config_read_reg(const struct pcisel *const host_sel, nvlist_t *nvl, 356 const uint32_t reg, const uint8_t size, const uint32_t def) 357 { 358 const char *config; 359 const nvlist_t *pci_regs; 360 361 assert(size == 1 || size == 2 || size == 4); 362 363 pci_regs = find_relative_config_node(nvl, "pcireg"); 364 if (pci_regs == NULL) { 365 return def; 366 } 367 368 switch (reg) { 369 case PCIR_DEVICE: 370 config = get_config_value_node(pci_regs, "device"); 371 break; 372 case PCIR_VENDOR: 373 config = get_config_value_node(pci_regs, "vendor"); 374 break; 375 case PCIR_REVID: 376 config = get_config_value_node(pci_regs, "revid"); 377 break; 378 case PCIR_SUBVEND_0: 379 config = get_config_value_node(pci_regs, "subvendor"); 380 break; 381 case PCIR_SUBDEV_0: 382 config = get_config_value_node(pci_regs, "subdevice"); 383 break; 384 default: 385 return (-1); 386 } 387 388 if (config == NULL) { 389 return def; 390 } else if (host_sel != NULL && strcmp(config, "host") == 0) { 391 return pci_host_read_config(host_sel, reg, size); 392 } else { 393 return strtol(config, NULL, 16); 394 } 395 } 396 397 static int 398 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 399 { 400 401 if (offset < pi->pi_msix.pba_offset) 402 return (0); 403 404 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 405 return (0); 406 } 407 408 return (1); 409 } 410 411 int 412 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 413 uint64_t value) 414 { 415 int msix_entry_offset; 416 int tab_index; 417 char *dest; 418 419 /* support only 4 or 8 byte writes */ 420 if (size != 4 && size != 8) 421 return (-1); 422 423 /* 424 * Return if table index is beyond what device supports 425 */ 426 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 427 if (tab_index >= pi->pi_msix.table_count) 428 return (-1); 429 430 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 431 432 /* support only aligned writes */ 433 if ((msix_entry_offset % size) != 0) 434 return (-1); 435 436 dest = (char *)(pi->pi_msix.table + tab_index); 437 dest += msix_entry_offset; 438 439 if (size == 4) 440 *((uint32_t *)dest) = value; 441 else 442 *((uint64_t *)dest) = value; 443 444 return (0); 445 } 446 447 uint64_t 448 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 449 { 450 char *dest; 451 int msix_entry_offset; 452 int tab_index; 453 uint64_t retval = ~0; 454 455 /* 456 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 457 * table but we also allow 1 byte access to accommodate reads from 458 * ddb. 459 */ 460 if (size != 1 && size != 4 && size != 8) 461 return (retval); 462 463 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 464 465 /* support only aligned reads */ 466 if ((msix_entry_offset % size) != 0) { 467 return (retval); 468 } 469 470 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 471 472 if (tab_index < pi->pi_msix.table_count) { 473 /* valid MSI-X Table access */ 474 dest = (char *)(pi->pi_msix.table + tab_index); 475 dest += msix_entry_offset; 476 477 if (size == 1) 478 retval = *((uint8_t *)dest); 479 else if (size == 4) 480 retval = *((uint32_t *)dest); 481 else 482 retval = *((uint64_t *)dest); 483 } else if (pci_valid_pba_offset(pi, offset)) { 484 /* return 0 for PBA access */ 485 retval = 0; 486 } 487 488 return (retval); 489 } 490 491 int 492 pci_msix_table_bar(struct pci_devinst *pi) 493 { 494 495 if (pi->pi_msix.table != NULL) 496 return (pi->pi_msix.table_bar); 497 else 498 return (-1); 499 } 500 501 int 502 pci_msix_pba_bar(struct pci_devinst *pi) 503 { 504 505 if (pi->pi_msix.table != NULL) 506 return (pi->pi_msix.pba_bar); 507 else 508 return (-1); 509 } 510 511 static int 512 pci_emul_io_handler(struct vmctx *ctx __unused, int in, int port, 513 int bytes, uint32_t *eax, void *arg) 514 { 515 struct pci_devinst *pdi = arg; 516 struct pci_devemu *pe = pdi->pi_d; 517 uint64_t offset; 518 int i; 519 520 assert(port >= 0); 521 522 for (i = 0; i <= PCI_BARMAX; i++) { 523 if (pdi->pi_bar[i].type == PCIBAR_IO && 524 (uint64_t)port >= pdi->pi_bar[i].addr && 525 (uint64_t)port + bytes <= 526 pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 527 offset = port - pdi->pi_bar[i].addr; 528 if (in) 529 *eax = (*pe->pe_barread)(pdi, i, 530 offset, bytes); 531 else 532 (*pe->pe_barwrite)(pdi, i, offset, 533 bytes, *eax); 534 return (0); 535 } 536 } 537 return (-1); 538 } 539 540 static int 541 pci_emul_mem_handler(struct vcpu *vcpu __unused, int dir, 542 uint64_t addr, int size, uint64_t *val, void *arg1, long arg2) 543 { 544 struct pci_devinst *pdi = arg1; 545 struct pci_devemu *pe = pdi->pi_d; 546 uint64_t offset; 547 int bidx = (int)arg2; 548 549 assert(bidx <= PCI_BARMAX); 550 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 551 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 552 assert(addr >= pdi->pi_bar[bidx].addr && 553 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 554 555 offset = addr - pdi->pi_bar[bidx].addr; 556 557 if (dir == MEM_F_WRITE) { 558 if (size == 8) { 559 (*pe->pe_barwrite)(pdi, bidx, offset, 560 4, *val & 0xffffffff); 561 (*pe->pe_barwrite)(pdi, bidx, offset + 4, 562 4, *val >> 32); 563 } else { 564 (*pe->pe_barwrite)(pdi, bidx, offset, 565 size, *val); 566 } 567 } else { 568 if (size == 8) { 569 *val = (*pe->pe_barread)(pdi, bidx, 570 offset, 4); 571 *val |= (*pe->pe_barread)(pdi, bidx, 572 offset + 4, 4) << 32; 573 } else { 574 *val = (*pe->pe_barread)(pdi, bidx, 575 offset, size); 576 } 577 } 578 579 return (0); 580 } 581 582 583 static int 584 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 585 uint64_t *addr) 586 { 587 uint64_t base; 588 589 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 590 591 base = roundup2(*baseptr, size); 592 593 if (base + size <= limit) { 594 *addr = base; 595 *baseptr = base + size; 596 return (0); 597 } else 598 return (-1); 599 } 600 601 /* 602 * Register (or unregister) the MMIO or I/O region associated with the BAR 603 * register 'idx' of an emulated pci device. 604 */ 605 static void 606 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 607 { 608 struct pci_devemu *pe; 609 int error; 610 struct inout_port iop; 611 struct mem_range mr; 612 613 pe = pi->pi_d; 614 switch (pi->pi_bar[idx].type) { 615 case PCIBAR_IO: 616 bzero(&iop, sizeof(struct inout_port)); 617 iop.name = pi->pi_name; 618 iop.port = pi->pi_bar[idx].addr; 619 iop.size = pi->pi_bar[idx].size; 620 if (registration) { 621 iop.flags = IOPORT_F_INOUT; 622 iop.handler = pci_emul_io_handler; 623 iop.arg = pi; 624 error = register_inout(&iop); 625 } else 626 error = unregister_inout(&iop); 627 break; 628 case PCIBAR_MEM32: 629 case PCIBAR_MEM64: 630 bzero(&mr, sizeof(struct mem_range)); 631 mr.name = pi->pi_name; 632 mr.base = pi->pi_bar[idx].addr; 633 mr.size = pi->pi_bar[idx].size; 634 if (registration) { 635 mr.flags = MEM_F_RW; 636 mr.handler = pci_emul_mem_handler; 637 mr.arg1 = pi; 638 mr.arg2 = idx; 639 error = register_mem(&mr); 640 } else 641 error = unregister_mem(&mr); 642 break; 643 case PCIBAR_ROM: 644 error = 0; 645 break; 646 default: 647 error = EINVAL; 648 break; 649 } 650 assert(error == 0); 651 652 if (pe->pe_baraddr != NULL) 653 (*pe->pe_baraddr)(pi, idx, registration, pi->pi_bar[idx].addr); 654 } 655 656 static void 657 unregister_bar(struct pci_devinst *pi, int idx) 658 { 659 660 modify_bar_registration(pi, idx, 0); 661 } 662 663 static void 664 register_bar(struct pci_devinst *pi, int idx) 665 { 666 667 modify_bar_registration(pi, idx, 1); 668 } 669 670 /* Is the ROM enabled for the emulated pci device? */ 671 static int 672 romen(struct pci_devinst *pi) 673 { 674 return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) == 675 PCIM_BIOS_ENABLE; 676 } 677 678 /* Are we decoding i/o port accesses for the emulated pci device? */ 679 static int 680 porten(struct pci_devinst *pi) 681 { 682 uint16_t cmd; 683 684 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 685 686 return (cmd & PCIM_CMD_PORTEN); 687 } 688 689 /* Are we decoding memory accesses for the emulated pci device? */ 690 static int 691 memen(struct pci_devinst *pi) 692 { 693 uint16_t cmd; 694 695 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 696 697 return (cmd & PCIM_CMD_MEMEN); 698 } 699 700 /* 701 * Update the MMIO or I/O address that is decoded by the BAR register. 702 * 703 * If the pci device has enabled the address space decoding then intercept 704 * the address range decoded by the BAR register. 705 */ 706 static void 707 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 708 { 709 int decode; 710 711 if (pi->pi_bar[idx].type == PCIBAR_IO) 712 decode = porten(pi); 713 else 714 decode = memen(pi); 715 716 if (decode) 717 unregister_bar(pi, idx); 718 719 switch (type) { 720 case PCIBAR_IO: 721 case PCIBAR_MEM32: 722 pi->pi_bar[idx].addr = addr; 723 break; 724 case PCIBAR_MEM64: 725 pi->pi_bar[idx].addr &= ~0xffffffffUL; 726 pi->pi_bar[idx].addr |= addr; 727 break; 728 case PCIBAR_MEMHI64: 729 pi->pi_bar[idx].addr &= 0xffffffff; 730 pi->pi_bar[idx].addr |= addr; 731 break; 732 default: 733 assert(0); 734 } 735 736 if (decode) 737 register_bar(pi, idx); 738 } 739 740 int 741 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 742 uint64_t size) 743 { 744 assert((type == PCIBAR_ROM) || (idx >= 0 && idx <= PCI_BARMAX)); 745 assert((type != PCIBAR_ROM) || (idx == PCI_ROM_IDX)); 746 747 if ((size & (size - 1)) != 0) 748 size = 1UL << flsl(size); /* round up to a power of 2 */ 749 750 /* Enforce minimum BAR sizes required by the PCI standard */ 751 if (type == PCIBAR_IO) { 752 if (size < 4) 753 size = 4; 754 } else if (type == PCIBAR_ROM) { 755 if (size < ~PCIM_BIOS_ADDR_MASK + 1) 756 size = ~PCIM_BIOS_ADDR_MASK + 1; 757 } else { 758 if (size < 16) 759 size = 16; 760 } 761 762 /* 763 * To reduce fragmentation of the MMIO space, we allocate the BARs by 764 * size. Therefore, don't allocate the BAR yet. We create a list of all 765 * BAR allocation which is sorted by BAR size. When all PCI devices are 766 * initialized, we will assign an address to the BARs. 767 */ 768 769 /* create a new list entry */ 770 struct pci_bar_allocation *const new_bar = malloc(sizeof(*new_bar)); 771 memset(new_bar, 0, sizeof(*new_bar)); 772 new_bar->pdi = pdi; 773 new_bar->idx = idx; 774 new_bar->type = type; 775 new_bar->size = size; 776 777 /* 778 * Search for a BAR which size is lower than the size of our newly 779 * allocated BAR. 780 */ 781 struct pci_bar_allocation *bar = NULL; 782 TAILQ_FOREACH(bar, &pci_bars, chain) { 783 if (bar->size < size) { 784 break; 785 } 786 } 787 788 if (bar == NULL) { 789 /* 790 * Either the list is empty or new BAR is the smallest BAR of 791 * the list. Append it to the end of our list. 792 */ 793 TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain); 794 } else { 795 /* 796 * The found BAR is smaller than our new BAR. For that reason, 797 * insert our new BAR before the found BAR. 798 */ 799 TAILQ_INSERT_BEFORE(bar, new_bar, chain); 800 } 801 802 #ifdef __FreeBSD__ 803 /* 804 * Enable PCI BARs only if we don't have a boot ROM, i.e., bhyveload was 805 * used to load the initial guest image. Otherwise, we rely on the boot 806 * ROM to handle this. 807 */ 808 if (!get_config_bool_default("pci.enable_bars", !bootrom_boot())) 809 return (0); 810 #else 811 /* 812 * Enable PCI BARs unless specifically requested not to. Bootroms 813 * generally used in illumos do not perform PCI BAR enumeration 814 * themselves and so need the BARs enabling here. 815 */ 816 if (!get_config_bool_default("pci.enable_bars", true)) 817 return (0); 818 #endif 819 820 /* 821 * pci_passthru devices synchronize their physical and virtual command 822 * register on init. For that reason, the virtual cmd reg should be 823 * updated as early as possible. 824 */ 825 uint16_t enbit = 0; 826 switch (type) { 827 case PCIBAR_IO: 828 enbit = PCIM_CMD_PORTEN; 829 break; 830 case PCIBAR_MEM64: 831 case PCIBAR_MEM32: 832 enbit = PCIM_CMD_MEMEN; 833 break; 834 default: 835 enbit = 0; 836 break; 837 } 838 839 const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 840 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 841 842 return (0); 843 } 844 845 static int 846 pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx, 847 const enum pcibar_type type, const uint64_t size) 848 { 849 int error; 850 uint64_t *baseptr, limit, addr, mask, lobits, bar; 851 852 switch (type) { 853 case PCIBAR_NONE: 854 baseptr = NULL; 855 addr = mask = lobits = 0; 856 break; 857 case PCIBAR_IO: 858 baseptr = &pci_emul_iobase; 859 limit = PCI_EMUL_IOLIMIT; 860 mask = PCIM_BAR_IO_BASE; 861 lobits = PCIM_BAR_IO_SPACE; 862 break; 863 case PCIBAR_MEM64: 864 /* 865 * XXX 866 * Some drivers do not work well if the 64-bit BAR is allocated 867 * above 4GB. Allow for this by allocating small requests under 868 * 4GB unless then allocation size is larger than some arbitrary 869 * number (128MB currently). 870 */ 871 if (size > 128 * 1024 * 1024) { 872 baseptr = &pci_emul_membase64; 873 limit = pci_emul_memlim64; 874 mask = PCIM_BAR_MEM_BASE; 875 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 876 PCIM_BAR_MEM_PREFETCH; 877 } else { 878 baseptr = &pci_emul_membase32; 879 limit = PCI_EMUL_MEMLIMIT32; 880 mask = PCIM_BAR_MEM_BASE; 881 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 882 } 883 break; 884 case PCIBAR_MEM32: 885 baseptr = &pci_emul_membase32; 886 limit = PCI_EMUL_MEMLIMIT32; 887 mask = PCIM_BAR_MEM_BASE; 888 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 889 break; 890 case PCIBAR_ROM: 891 /* do not claim memory for ROM. OVMF will do it for us. */ 892 baseptr = NULL; 893 limit = 0; 894 mask = PCIM_BIOS_ADDR_MASK; 895 lobits = 0; 896 break; 897 default: 898 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 899 #ifdef FreeBSD 900 assert(0); 901 #else 902 abort(); 903 #endif 904 } 905 906 if (baseptr != NULL) { 907 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 908 if (error != 0) 909 return (error); 910 } else { 911 addr = 0; 912 } 913 914 pdi->pi_bar[idx].type = type; 915 pdi->pi_bar[idx].addr = addr; 916 pdi->pi_bar[idx].size = size; 917 /* 918 * passthru devices are using same lobits as physical device they set 919 * this property 920 */ 921 if (pdi->pi_bar[idx].lobits != 0) { 922 lobits = pdi->pi_bar[idx].lobits; 923 } else { 924 pdi->pi_bar[idx].lobits = lobits; 925 } 926 927 /* Initialize the BAR register in config space */ 928 bar = (addr & mask) | lobits; 929 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 930 931 if (type == PCIBAR_MEM64) { 932 assert(idx + 1 <= PCI_BARMAX); 933 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 934 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 935 } 936 937 switch (type) { 938 case PCIBAR_IO: 939 if (porten(pdi)) 940 register_bar(pdi, idx); 941 break; 942 case PCIBAR_MEM32: 943 case PCIBAR_MEM64: 944 case PCIBAR_MEMHI64: 945 if (memen(pdi)) 946 register_bar(pdi, idx); 947 break; 948 default: 949 break; 950 } 951 952 return (0); 953 } 954 955 int 956 pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size, 957 void **const addr) 958 { 959 /* allocate ROM space once on first call */ 960 if (pci_emul_rombase == 0) { 961 pci_emul_rombase = vm_create_devmem(pdi->pi_vmctx, VM_PCIROM, 962 "pcirom", PCI_EMUL_ROMSIZE); 963 if (pci_emul_rombase == MAP_FAILED) { 964 warnx("%s: failed to create rom segment", __func__); 965 return (-1); 966 } 967 pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE; 968 pci_emul_romoffset = 0; 969 } 970 971 /* ROM size should be a power of 2 and greater than 2 KB */ 972 const uint64_t rom_size = MAX(1UL << flsl(size), 973 ~PCIM_BIOS_ADDR_MASK + 1); 974 975 /* check if ROM fits into ROM space */ 976 if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE) { 977 warnx("%s: no space left in rom segment:", __func__); 978 warnx("%16lu bytes left", 979 PCI_EMUL_ROMSIZE - pci_emul_romoffset); 980 warnx("%16lu bytes required by %d/%d/%d", rom_size, pdi->pi_bus, 981 pdi->pi_slot, pdi->pi_func); 982 return (-1); 983 } 984 985 /* allocate ROM BAR */ 986 const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM, 987 rom_size); 988 if (error) 989 return error; 990 991 /* return address */ 992 *addr = pci_emul_rombase + pci_emul_romoffset; 993 994 /* save offset into ROM Space */ 995 pdi->pi_romoffset = pci_emul_romoffset; 996 997 /* increase offset for next ROM */ 998 pci_emul_romoffset += rom_size; 999 1000 return (0); 1001 } 1002 1003 int 1004 pci_emul_add_boot_device(struct pci_devinst *pi, int bootindex) 1005 { 1006 struct boot_device *new_device, *device; 1007 1008 /* don't permit a negative bootindex */ 1009 if (bootindex < 0) { 1010 errx(4, "Invalid bootindex %d for %s", bootindex, pi->pi_name); 1011 } 1012 1013 /* alloc new boot device */ 1014 new_device = calloc(1, sizeof(struct boot_device)); 1015 if (new_device == NULL) { 1016 return (ENOMEM); 1017 } 1018 new_device->pdi = pi; 1019 new_device->bootindex = bootindex; 1020 1021 /* search for boot device with higher boot index */ 1022 TAILQ_FOREACH(device, &boot_devices, boot_device_chain) { 1023 if (device->bootindex == bootindex) { 1024 errx(4, 1025 "Could not set bootindex %d for %s. Bootindex already occupied by %s", 1026 bootindex, pi->pi_name, device->pdi->pi_name); 1027 } else if (device->bootindex > bootindex) { 1028 break; 1029 } 1030 } 1031 1032 /* add boot device to queue */ 1033 if (device == NULL) { 1034 TAILQ_INSERT_TAIL(&boot_devices, new_device, boot_device_chain); 1035 } else { 1036 TAILQ_INSERT_BEFORE(device, new_device, boot_device_chain); 1037 } 1038 1039 return (0); 1040 } 1041 1042 #define CAP_START_OFFSET 0x40 1043 static int 1044 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 1045 { 1046 int i, capoff, reallen; 1047 uint16_t sts; 1048 1049 assert(caplen > 0); 1050 1051 reallen = roundup2(caplen, 4); /* dword aligned */ 1052 1053 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1054 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 1055 capoff = CAP_START_OFFSET; 1056 else 1057 capoff = pi->pi_capend + 1; 1058 1059 /* Check if we have enough space */ 1060 if (capoff + reallen > PCI_REGMAX + 1) 1061 return (-1); 1062 1063 /* Set the previous capability pointer */ 1064 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 1065 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 1066 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 1067 } else 1068 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 1069 1070 /* Copy the capability */ 1071 for (i = 0; i < caplen; i++) 1072 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 1073 1074 /* Set the next capability pointer */ 1075 pci_set_cfgdata8(pi, capoff + 1, 0); 1076 1077 pi->pi_prevcap = capoff; 1078 pi->pi_capend = capoff + reallen - 1; 1079 return (0); 1080 } 1081 1082 static struct pci_devemu * 1083 pci_emul_finddev(const char *name) 1084 { 1085 struct pci_devemu **pdpp, *pdp; 1086 1087 SET_FOREACH(pdpp, pci_devemu_set) { 1088 pdp = *pdpp; 1089 if (!strcmp(pdp->pe_emu, name)) { 1090 return (pdp); 1091 } 1092 } 1093 1094 return (NULL); 1095 } 1096 1097 static int 1098 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 1099 int func, struct funcinfo *fi) 1100 { 1101 struct pci_devinst *pdi; 1102 int err; 1103 1104 pdi = calloc(1, sizeof(struct pci_devinst)); 1105 1106 pdi->pi_vmctx = ctx; 1107 pdi->pi_bus = bus; 1108 pdi->pi_slot = slot; 1109 pdi->pi_func = func; 1110 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 1111 pdi->pi_lintr.pin = 0; 1112 pdi->pi_lintr.state = IDLE; 1113 pdi->pi_lintr.pirq_pin = 0; 1114 pdi->pi_lintr.ioapic_irq = 0; 1115 pdi->pi_d = pde; 1116 snprintf(pdi->pi_name, PI_NAMESZ, "%s@pci.%d.%d.%d", pde->pe_emu, bus, 1117 slot, func); 1118 1119 /* Disable legacy interrupts */ 1120 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 1121 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 1122 1123 #ifdef __FreeBSD__ 1124 if (get_config_bool_default("pci.enable_bars", !bootrom_boot())) 1125 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 1126 #else 1127 if (get_config_bool_default("pci.enable_bars", true)) 1128 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 1129 #endif 1130 1131 err = (*pde->pe_init)(pdi, fi->fi_config); 1132 if (err == 0) 1133 fi->fi_devi = pdi; 1134 else 1135 free(pdi); 1136 1137 return (err); 1138 } 1139 1140 void 1141 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 1142 { 1143 int mmc; 1144 1145 /* Number of msi messages must be a power of 2 between 1 and 32 */ 1146 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 1147 mmc = ffs(msgnum) - 1; 1148 1149 bzero(msicap, sizeof(struct msicap)); 1150 msicap->capid = PCIY_MSI; 1151 msicap->nextptr = nextptr; 1152 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 1153 } 1154 1155 int 1156 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 1157 { 1158 struct msicap msicap; 1159 1160 pci_populate_msicap(&msicap, msgnum, 0); 1161 1162 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 1163 } 1164 1165 static void 1166 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 1167 uint32_t msix_tab_size) 1168 { 1169 1170 assert(msix_tab_size % 4096 == 0); 1171 1172 bzero(msixcap, sizeof(struct msixcap)); 1173 msixcap->capid = PCIY_MSIX; 1174 1175 /* 1176 * Message Control Register, all fields set to 1177 * zero except for the Table Size. 1178 * Note: Table size N is encoded as N-1 1179 */ 1180 msixcap->msgctrl = msgnum - 1; 1181 1182 /* 1183 * MSI-X BAR setup: 1184 * - MSI-X table start at offset 0 1185 * - PBA table starts at a 4K aligned offset after the MSI-X table 1186 */ 1187 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 1188 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 1189 } 1190 1191 static void 1192 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 1193 { 1194 int i, table_size; 1195 1196 assert(table_entries > 0); 1197 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 1198 1199 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 1200 pi->pi_msix.table = calloc(1, table_size); 1201 1202 /* set mask bit of vector control register */ 1203 for (i = 0; i < table_entries; i++) 1204 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 1205 } 1206 1207 int 1208 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 1209 { 1210 uint32_t tab_size; 1211 struct msixcap msixcap; 1212 1213 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 1214 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 1215 1216 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 1217 1218 /* Align table size to nearest 4K */ 1219 tab_size = roundup2(tab_size, 4096); 1220 1221 pi->pi_msix.table_bar = barnum; 1222 pi->pi_msix.pba_bar = barnum; 1223 pi->pi_msix.table_offset = 0; 1224 pi->pi_msix.table_count = msgnum; 1225 pi->pi_msix.pba_offset = tab_size; 1226 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 1227 1228 pci_msix_table_init(pi, msgnum); 1229 1230 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 1231 1232 /* allocate memory for MSI-X Table and PBA */ 1233 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 1234 tab_size + pi->pi_msix.pba_size); 1235 1236 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 1237 sizeof(msixcap))); 1238 } 1239 1240 static void 1241 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1242 int bytes, uint32_t val) 1243 { 1244 uint16_t msgctrl, rwmask; 1245 int off; 1246 1247 off = offset - capoff; 1248 /* Message Control Register */ 1249 if (off == 2 && bytes == 2) { 1250 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 1251 msgctrl = pci_get_cfgdata16(pi, offset); 1252 msgctrl &= ~rwmask; 1253 msgctrl |= val & rwmask; 1254 val = msgctrl; 1255 1256 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 1257 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 1258 pci_lintr_update(pi); 1259 } 1260 1261 CFGWRITE(pi, offset, val, bytes); 1262 } 1263 1264 static void 1265 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1266 int bytes, uint32_t val) 1267 { 1268 uint16_t msgctrl, rwmask, msgdata, mme; 1269 uint32_t addrlo; 1270 1271 /* 1272 * If guest is writing to the message control register make sure 1273 * we do not overwrite read-only fields. 1274 */ 1275 if ((offset - capoff) == 2 && bytes == 2) { 1276 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 1277 msgctrl = pci_get_cfgdata16(pi, offset); 1278 msgctrl &= ~rwmask; 1279 msgctrl |= val & rwmask; 1280 val = msgctrl; 1281 } 1282 CFGWRITE(pi, offset, val, bytes); 1283 1284 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 1285 addrlo = pci_get_cfgdata32(pi, capoff + 4); 1286 if (msgctrl & PCIM_MSICTRL_64BIT) 1287 msgdata = pci_get_cfgdata16(pi, capoff + 12); 1288 else 1289 msgdata = pci_get_cfgdata16(pi, capoff + 8); 1290 1291 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 1292 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 1293 if (pi->pi_msi.enabled) { 1294 pi->pi_msi.addr = addrlo; 1295 pi->pi_msi.msg_data = msgdata; 1296 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 1297 } else { 1298 pi->pi_msi.maxmsgnum = 0; 1299 } 1300 pci_lintr_update(pi); 1301 } 1302 1303 static void 1304 pciecap_cfgwrite(struct pci_devinst *pi, int capoff __unused, int offset, 1305 int bytes, uint32_t val) 1306 { 1307 1308 /* XXX don't write to the readonly parts */ 1309 CFGWRITE(pi, offset, val, bytes); 1310 } 1311 1312 #define PCIECAP_VERSION 0x2 1313 int 1314 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 1315 { 1316 int err; 1317 struct pciecap pciecap; 1318 1319 bzero(&pciecap, sizeof(pciecap)); 1320 1321 /* 1322 * Use the integrated endpoint type for endpoints on a root complex bus. 1323 * 1324 * NB: bhyve currently only supports a single PCI bus that is the root 1325 * complex bus, so all endpoints are integrated. 1326 */ 1327 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 1328 type = PCIEM_TYPE_ROOT_INT_EP; 1329 1330 pciecap.capid = PCIY_EXPRESS; 1331 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 1332 if (type != PCIEM_TYPE_ROOT_INT_EP) { 1333 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 1334 pciecap.link_status = 0x11; /* gen1, x1 */ 1335 } 1336 1337 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 1338 return (err); 1339 } 1340 1341 /* 1342 * This function assumes that 'coff' is in the capabilities region of the 1343 * config space. A capoff parameter of zero will force a search for the 1344 * offset and type. 1345 */ 1346 void 1347 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 1348 uint8_t capoff, int capid) 1349 { 1350 uint8_t nextoff; 1351 1352 /* Do not allow un-aligned writes */ 1353 if ((offset & (bytes - 1)) != 0) 1354 return; 1355 1356 if (capoff == 0) { 1357 /* Find the capability that we want to update */ 1358 capoff = CAP_START_OFFSET; 1359 while (1) { 1360 nextoff = pci_get_cfgdata8(pi, capoff + 1); 1361 if (nextoff == 0) 1362 break; 1363 if (offset >= capoff && offset < nextoff) 1364 break; 1365 1366 capoff = nextoff; 1367 } 1368 assert(offset >= capoff); 1369 capid = pci_get_cfgdata8(pi, capoff); 1370 } 1371 1372 /* 1373 * Capability ID and Next Capability Pointer are readonly. 1374 * However, some o/s's do 4-byte writes that include these. 1375 * For this case, trim the write back to 2 bytes and adjust 1376 * the data. 1377 */ 1378 if (offset == capoff || offset == capoff + 1) { 1379 if (offset == capoff && bytes == 4) { 1380 bytes = 2; 1381 offset += 2; 1382 val >>= 16; 1383 } else 1384 return; 1385 } 1386 1387 switch (capid) { 1388 case PCIY_MSI: 1389 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1390 break; 1391 case PCIY_MSIX: 1392 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1393 break; 1394 case PCIY_EXPRESS: 1395 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1396 break; 1397 default: 1398 break; 1399 } 1400 } 1401 1402 static int 1403 pci_emul_iscap(struct pci_devinst *pi, int offset) 1404 { 1405 uint16_t sts; 1406 1407 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1408 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1409 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1410 return (1); 1411 } 1412 return (0); 1413 } 1414 1415 static int 1416 pci_emul_fallback_handler(struct vcpu *vcpu __unused, int dir, 1417 uint64_t addr __unused, int size __unused, uint64_t *val, 1418 void *arg1 __unused, long arg2 __unused) 1419 { 1420 /* 1421 * Ignore writes; return 0xff's for reads. The mem read code 1422 * will take care of truncating to the correct size. 1423 */ 1424 if (dir == MEM_F_READ) { 1425 *val = 0xffffffffffffffff; 1426 } 1427 1428 return (0); 1429 } 1430 1431 static int 1432 pci_emul_ecfg_handler(struct vcpu *vcpu __unused, int dir, uint64_t addr, 1433 int bytes, uint64_t *val, void *arg1 __unused, long arg2 __unused) 1434 { 1435 int bus, slot, func, coff, in; 1436 1437 coff = addr & 0xfff; 1438 func = (addr >> 12) & 0x7; 1439 slot = (addr >> 15) & 0x1f; 1440 bus = (addr >> 20) & 0xff; 1441 in = (dir == MEM_F_READ); 1442 if (in) 1443 *val = ~0UL; 1444 pci_cfgrw(in, bus, slot, func, coff, bytes, (uint32_t *)val); 1445 return (0); 1446 } 1447 1448 uint64_t 1449 pci_ecfg_base(void) 1450 { 1451 1452 return (PCI_EMUL_ECFG_BASE); 1453 } 1454 1455 static int 1456 init_bootorder(void) 1457 { 1458 struct boot_device *device; 1459 FILE *fp; 1460 char *bootorder; 1461 size_t bootorder_len; 1462 1463 if (TAILQ_EMPTY(&boot_devices)) 1464 return (0); 1465 1466 fp = open_memstream(&bootorder, &bootorder_len); 1467 TAILQ_FOREACH(device, &boot_devices, boot_device_chain) { 1468 fprintf(fp, "/pci@i0cf8/pci@%d,%d\n", 1469 device->pdi->pi_slot, device->pdi->pi_func); 1470 } 1471 fclose(fp); 1472 1473 return (qemu_fwcfg_add_file("bootorder", bootorder_len, bootorder)); 1474 } 1475 1476 #define BUSIO_ROUNDUP 32 1477 #define BUSMEM32_ROUNDUP (1024 * 1024) 1478 #define BUSMEM64_ROUNDUP (512 * 1024 * 1024) 1479 1480 int 1481 init_pci(struct vmctx *ctx) 1482 { 1483 char node_name[sizeof("pci.XXX.XX.X")]; 1484 struct mem_range mr; 1485 struct pci_devemu *pde; 1486 struct businfo *bi; 1487 struct slotinfo *si; 1488 struct funcinfo *fi; 1489 nvlist_t *nvl; 1490 const char *emul; 1491 size_t lowmem; 1492 int bus, slot, func; 1493 int error; 1494 1495 if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32) 1496 errx(EX_OSERR, "Invalid lowmem limit"); 1497 1498 pci_emul_iobase = PCI_EMUL_IOBASE; 1499 pci_emul_membase32 = PCI_EMUL_MEMBASE32; 1500 1501 pci_emul_membase64 = vm_get_highmem_base(ctx) + 1502 vm_get_highmem_size(ctx); 1503 pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64); 1504 pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64; 1505 1506 TAILQ_INIT(&boot_devices); 1507 1508 for (bus = 0; bus < MAXBUSES; bus++) { 1509 snprintf(node_name, sizeof(node_name), "pci.%d", bus); 1510 nvl = find_config_node(node_name); 1511 if (nvl == NULL) 1512 continue; 1513 pci_businfo[bus] = calloc(1, sizeof(struct businfo)); 1514 bi = pci_businfo[bus]; 1515 1516 /* 1517 * Keep track of the i/o and memory resources allocated to 1518 * this bus. 1519 */ 1520 bi->iobase = pci_emul_iobase; 1521 bi->membase32 = pci_emul_membase32; 1522 bi->membase64 = pci_emul_membase64; 1523 1524 /* first run: init devices */ 1525 for (slot = 0; slot < MAXSLOTS; slot++) { 1526 si = &bi->slotinfo[slot]; 1527 for (func = 0; func < MAXFUNCS; func++) { 1528 fi = &si->si_funcs[func]; 1529 snprintf(node_name, sizeof(node_name), 1530 "pci.%d.%d.%d", bus, slot, func); 1531 nvl = find_config_node(node_name); 1532 if (nvl == NULL) 1533 continue; 1534 1535 fi->fi_config = nvl; 1536 emul = get_config_value_node(nvl, "device"); 1537 if (emul == NULL) { 1538 EPRINTLN("pci slot %d:%d:%d: missing " 1539 "\"device\" value", bus, slot, func); 1540 return (EINVAL); 1541 } 1542 pde = pci_emul_finddev(emul); 1543 if (pde == NULL) { 1544 EPRINTLN("pci slot %d:%d:%d: unknown " 1545 "device \"%s\"", bus, slot, func, 1546 emul); 1547 return (EINVAL); 1548 } 1549 if (pde->pe_alias != NULL) { 1550 EPRINTLN("pci slot %d:%d:%d: legacy " 1551 "device \"%s\", use \"%s\" instead", 1552 bus, slot, func, emul, 1553 pde->pe_alias); 1554 return (EINVAL); 1555 } 1556 fi->fi_pde = pde; 1557 error = pci_emul_init(ctx, pde, bus, slot, 1558 func, fi); 1559 if (error) 1560 return (error); 1561 } 1562 } 1563 1564 /* second run: assign BARs and free list */ 1565 struct pci_bar_allocation *bar; 1566 struct pci_bar_allocation *bar_tmp; 1567 TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) { 1568 pci_emul_assign_bar(bar->pdi, bar->idx, bar->type, 1569 bar->size); 1570 free(bar); 1571 } 1572 TAILQ_INIT(&pci_bars); 1573 1574 /* 1575 * Add some slop to the I/O and memory resources decoded by 1576 * this bus to give a guest some flexibility if it wants to 1577 * reprogram the BARs. 1578 */ 1579 pci_emul_iobase += BUSIO_ROUNDUP; 1580 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1581 bi->iolimit = pci_emul_iobase; 1582 1583 pci_emul_membase32 += BUSMEM32_ROUNDUP; 1584 pci_emul_membase32 = roundup2(pci_emul_membase32, 1585 BUSMEM32_ROUNDUP); 1586 bi->memlimit32 = pci_emul_membase32; 1587 1588 pci_emul_membase64 += BUSMEM64_ROUNDUP; 1589 pci_emul_membase64 = roundup2(pci_emul_membase64, 1590 BUSMEM64_ROUNDUP); 1591 bi->memlimit64 = pci_emul_membase64; 1592 } 1593 1594 /* 1595 * PCI backends are initialized before routing INTx interrupts 1596 * so that LPC devices are able to reserve ISA IRQs before 1597 * routing PIRQ pins. 1598 */ 1599 for (bus = 0; bus < MAXBUSES; bus++) { 1600 if ((bi = pci_businfo[bus]) == NULL) 1601 continue; 1602 1603 for (slot = 0; slot < MAXSLOTS; slot++) { 1604 si = &bi->slotinfo[slot]; 1605 for (func = 0; func < MAXFUNCS; func++) { 1606 fi = &si->si_funcs[func]; 1607 if (fi->fi_devi == NULL) 1608 continue; 1609 pci_lintr_route(fi->fi_devi); 1610 } 1611 } 1612 } 1613 lpc_pirq_routed(); 1614 1615 if ((error = init_bootorder()) != 0) { 1616 warnx("%s: Unable to init bootorder", __func__); 1617 return (error); 1618 } 1619 1620 /* 1621 * The guest physical memory map looks like the following: 1622 * [0, lowmem) guest system memory 1623 * [lowmem, 0xC0000000) memory hole (may be absent) 1624 * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation) 1625 * [0xE0000000, 0xF0000000) PCI extended config window 1626 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1627 * [4GB, 4GB + highmem) 1628 */ 1629 1630 /* 1631 * Accesses to memory addresses that are not allocated to system 1632 * memory or PCI devices return 0xff's. 1633 */ 1634 lowmem = vm_get_lowmem_size(ctx); 1635 bzero(&mr, sizeof(struct mem_range)); 1636 mr.name = "PCI hole"; 1637 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1638 mr.base = lowmem; 1639 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1640 mr.handler = pci_emul_fallback_handler; 1641 error = register_mem_fallback(&mr); 1642 assert(error == 0); 1643 1644 /* PCI extended config space */ 1645 bzero(&mr, sizeof(struct mem_range)); 1646 mr.name = "PCI ECFG"; 1647 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1648 mr.base = PCI_EMUL_ECFG_BASE; 1649 mr.size = PCI_EMUL_ECFG_SIZE; 1650 mr.handler = pci_emul_ecfg_handler; 1651 error = register_mem(&mr); 1652 assert(error == 0); 1653 1654 return (0); 1655 } 1656 1657 static void 1658 pci_apic_prt_entry(int bus __unused, int slot, int pin, int pirq_pin __unused, 1659 int ioapic_irq, void *arg __unused) 1660 { 1661 1662 dsdt_line(" Package ()"); 1663 dsdt_line(" {"); 1664 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1665 dsdt_line(" 0x%02X,", pin - 1); 1666 dsdt_line(" Zero,"); 1667 dsdt_line(" 0x%X", ioapic_irq); 1668 dsdt_line(" },"); 1669 } 1670 1671 static void 1672 pci_pirq_prt_entry(int bus __unused, int slot, int pin, int pirq_pin, 1673 int ioapic_irq __unused, void *arg __unused) 1674 { 1675 char *name; 1676 1677 name = lpc_pirq_name(pirq_pin); 1678 if (name == NULL) 1679 return; 1680 dsdt_line(" Package ()"); 1681 dsdt_line(" {"); 1682 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1683 dsdt_line(" 0x%02X,", pin - 1); 1684 dsdt_line(" %s,", name); 1685 dsdt_line(" 0x00"); 1686 dsdt_line(" },"); 1687 free(name); 1688 } 1689 1690 /* 1691 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1692 * corresponding to each PCI bus. 1693 */ 1694 static void 1695 pci_bus_write_dsdt(int bus) 1696 { 1697 struct businfo *bi; 1698 struct slotinfo *si; 1699 struct pci_devinst *pi; 1700 int count, func, slot; 1701 1702 /* 1703 * If there are no devices on this 'bus' then just return. 1704 */ 1705 if ((bi = pci_businfo[bus]) == NULL) { 1706 /* 1707 * Bus 0 is special because it decodes the I/O ports used 1708 * for PCI config space access even if there are no devices 1709 * on it. 1710 */ 1711 if (bus != 0) 1712 return; 1713 } 1714 1715 dsdt_line(" Device (PC%02X)", bus); 1716 dsdt_line(" {"); 1717 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1718 1719 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1720 dsdt_line(" {"); 1721 dsdt_line(" Return (0x%08X)", bus); 1722 dsdt_line(" }"); 1723 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1724 dsdt_line(" {"); 1725 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1726 "MaxFixed, PosDecode,"); 1727 dsdt_line(" 0x0000, // Granularity"); 1728 dsdt_line(" 0x%04X, // Range Minimum", bus); 1729 dsdt_line(" 0x%04X, // Range Maximum", bus); 1730 dsdt_line(" 0x0000, // Translation Offset"); 1731 dsdt_line(" 0x0001, // Length"); 1732 dsdt_line(" ,, )"); 1733 1734 if (bus == 0) { 1735 dsdt_indent(3); 1736 dsdt_fixed_ioport(0xCF8, 8); 1737 dsdt_unindent(3); 1738 1739 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1740 "PosDecode, EntireRange,"); 1741 dsdt_line(" 0x0000, // Granularity"); 1742 dsdt_line(" 0x0000, // Range Minimum"); 1743 dsdt_line(" 0x0CF7, // Range Maximum"); 1744 dsdt_line(" 0x0000, // Translation Offset"); 1745 dsdt_line(" 0x0CF8, // Length"); 1746 dsdt_line(" ,, , TypeStatic)"); 1747 1748 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1749 "PosDecode, EntireRange,"); 1750 dsdt_line(" 0x0000, // Granularity"); 1751 dsdt_line(" 0x0D00, // Range Minimum"); 1752 dsdt_line(" 0x%04X, // Range Maximum", 1753 PCI_EMUL_IOBASE - 1); 1754 dsdt_line(" 0x0000, // Translation Offset"); 1755 dsdt_line(" 0x%04X, // Length", 1756 PCI_EMUL_IOBASE - 0x0D00); 1757 dsdt_line(" ,, , TypeStatic)"); 1758 1759 if (bi == NULL) { 1760 dsdt_line(" })"); 1761 goto done; 1762 } 1763 } 1764 assert(bi != NULL); 1765 1766 /* i/o window */ 1767 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1768 "PosDecode, EntireRange,"); 1769 dsdt_line(" 0x0000, // Granularity"); 1770 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1771 dsdt_line(" 0x%04X, // Range Maximum", 1772 bi->iolimit - 1); 1773 dsdt_line(" 0x0000, // Translation Offset"); 1774 dsdt_line(" 0x%04X, // Length", 1775 bi->iolimit - bi->iobase); 1776 dsdt_line(" ,, , TypeStatic)"); 1777 1778 /* mmio window (32-bit) */ 1779 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1780 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1781 dsdt_line(" 0x00000000, // Granularity"); 1782 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1783 dsdt_line(" 0x%08X, // Range Maximum\n", 1784 bi->memlimit32 - 1); 1785 dsdt_line(" 0x00000000, // Translation Offset"); 1786 dsdt_line(" 0x%08X, // Length\n", 1787 bi->memlimit32 - bi->membase32); 1788 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1789 1790 /* mmio window (64-bit) */ 1791 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1792 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1793 dsdt_line(" 0x0000000000000000, // Granularity"); 1794 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1795 dsdt_line(" 0x%016lX, // Range Maximum\n", 1796 bi->memlimit64 - 1); 1797 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1798 dsdt_line(" 0x%016lX, // Length\n", 1799 bi->memlimit64 - bi->membase64); 1800 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1801 dsdt_line(" })"); 1802 1803 count = pci_count_lintr(bus); 1804 if (count != 0) { 1805 dsdt_indent(2); 1806 dsdt_line("Name (PPRT, Package ()"); 1807 dsdt_line("{"); 1808 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1809 dsdt_line("})"); 1810 dsdt_line("Name (APRT, Package ()"); 1811 dsdt_line("{"); 1812 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1813 dsdt_line("})"); 1814 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1815 dsdt_line("{"); 1816 dsdt_line(" If (PICM)"); 1817 dsdt_line(" {"); 1818 dsdt_line(" Return (APRT)"); 1819 dsdt_line(" }"); 1820 dsdt_line(" Else"); 1821 dsdt_line(" {"); 1822 dsdt_line(" Return (PPRT)"); 1823 dsdt_line(" }"); 1824 dsdt_line("}"); 1825 dsdt_unindent(2); 1826 } 1827 1828 dsdt_indent(2); 1829 for (slot = 0; slot < MAXSLOTS; slot++) { 1830 si = &bi->slotinfo[slot]; 1831 for (func = 0; func < MAXFUNCS; func++) { 1832 pi = si->si_funcs[func].fi_devi; 1833 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1834 pi->pi_d->pe_write_dsdt(pi); 1835 } 1836 } 1837 dsdt_unindent(2); 1838 done: 1839 dsdt_line(" }"); 1840 } 1841 1842 void 1843 pci_write_dsdt(void) 1844 { 1845 int bus; 1846 1847 dsdt_indent(1); 1848 dsdt_line("Name (PICM, 0x00)"); 1849 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1850 dsdt_line("{"); 1851 dsdt_line(" Store (Arg0, PICM)"); 1852 dsdt_line("}"); 1853 dsdt_line(""); 1854 dsdt_line("Scope (_SB)"); 1855 dsdt_line("{"); 1856 for (bus = 0; bus < MAXBUSES; bus++) 1857 pci_bus_write_dsdt(bus); 1858 dsdt_line("}"); 1859 dsdt_unindent(1); 1860 } 1861 1862 int 1863 pci_bus_configured(int bus) 1864 { 1865 assert(bus >= 0 && bus < MAXBUSES); 1866 return (pci_businfo[bus] != NULL); 1867 } 1868 1869 int 1870 pci_msi_enabled(struct pci_devinst *pi) 1871 { 1872 return (pi->pi_msi.enabled); 1873 } 1874 1875 int 1876 pci_msi_maxmsgnum(struct pci_devinst *pi) 1877 { 1878 if (pi->pi_msi.enabled) 1879 return (pi->pi_msi.maxmsgnum); 1880 else 1881 return (0); 1882 } 1883 1884 int 1885 pci_msix_enabled(struct pci_devinst *pi) 1886 { 1887 1888 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1889 } 1890 1891 void 1892 pci_generate_msix(struct pci_devinst *pi, int index) 1893 { 1894 struct msix_table_entry *mte; 1895 1896 if (!pci_msix_enabled(pi)) 1897 return; 1898 1899 if (pi->pi_msix.function_mask) 1900 return; 1901 1902 if (index >= pi->pi_msix.table_count) 1903 return; 1904 1905 mte = &pi->pi_msix.table[index]; 1906 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1907 /* XXX Set PBA bit if interrupt is disabled */ 1908 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1909 } 1910 } 1911 1912 void 1913 pci_generate_msi(struct pci_devinst *pi, int index) 1914 { 1915 1916 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1917 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1918 pi->pi_msi.msg_data + index); 1919 } 1920 } 1921 1922 static bool 1923 pci_lintr_permitted(struct pci_devinst *pi) 1924 { 1925 uint16_t cmd; 1926 1927 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1928 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1929 (cmd & PCIM_CMD_INTxDIS))); 1930 } 1931 1932 void 1933 pci_lintr_request(struct pci_devinst *pi) 1934 { 1935 struct businfo *bi; 1936 struct slotinfo *si; 1937 int bestpin, bestcount, pin; 1938 1939 bi = pci_businfo[pi->pi_bus]; 1940 assert(bi != NULL); 1941 1942 /* 1943 * Just allocate a pin from our slot. The pin will be 1944 * assigned IRQs later when interrupts are routed. 1945 */ 1946 si = &bi->slotinfo[pi->pi_slot]; 1947 bestpin = 0; 1948 bestcount = si->si_intpins[0].ii_count; 1949 for (pin = 1; pin < 4; pin++) { 1950 if (si->si_intpins[pin].ii_count < bestcount) { 1951 bestpin = pin; 1952 bestcount = si->si_intpins[pin].ii_count; 1953 } 1954 } 1955 1956 si->si_intpins[bestpin].ii_count++; 1957 pi->pi_lintr.pin = bestpin + 1; 1958 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1959 } 1960 1961 static void 1962 pci_lintr_route(struct pci_devinst *pi) 1963 { 1964 struct businfo *bi; 1965 struct intxinfo *ii; 1966 1967 if (pi->pi_lintr.pin == 0) 1968 return; 1969 1970 bi = pci_businfo[pi->pi_bus]; 1971 assert(bi != NULL); 1972 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1973 1974 /* 1975 * Attempt to allocate an I/O APIC pin for this intpin if one 1976 * is not yet assigned. 1977 */ 1978 if (ii->ii_ioapic_irq == 0) 1979 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1980 assert(ii->ii_ioapic_irq > 0); 1981 1982 /* 1983 * Attempt to allocate a PIRQ pin for this intpin if one is 1984 * not yet assigned. 1985 */ 1986 if (ii->ii_pirq_pin == 0) 1987 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1988 assert(ii->ii_pirq_pin > 0); 1989 1990 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1991 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1992 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1993 } 1994 1995 void 1996 pci_lintr_assert(struct pci_devinst *pi) 1997 { 1998 1999 assert(pi->pi_lintr.pin > 0); 2000 2001 pthread_mutex_lock(&pi->pi_lintr.lock); 2002 if (pi->pi_lintr.state == IDLE) { 2003 if (pci_lintr_permitted(pi)) { 2004 pi->pi_lintr.state = ASSERTED; 2005 pci_irq_assert(pi); 2006 } else 2007 pi->pi_lintr.state = PENDING; 2008 } 2009 pthread_mutex_unlock(&pi->pi_lintr.lock); 2010 } 2011 2012 void 2013 pci_lintr_deassert(struct pci_devinst *pi) 2014 { 2015 2016 assert(pi->pi_lintr.pin > 0); 2017 2018 pthread_mutex_lock(&pi->pi_lintr.lock); 2019 if (pi->pi_lintr.state == ASSERTED) { 2020 pi->pi_lintr.state = IDLE; 2021 pci_irq_deassert(pi); 2022 } else if (pi->pi_lintr.state == PENDING) 2023 pi->pi_lintr.state = IDLE; 2024 pthread_mutex_unlock(&pi->pi_lintr.lock); 2025 } 2026 2027 static void 2028 pci_lintr_update(struct pci_devinst *pi) 2029 { 2030 2031 pthread_mutex_lock(&pi->pi_lintr.lock); 2032 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 2033 pci_irq_deassert(pi); 2034 pi->pi_lintr.state = PENDING; 2035 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 2036 pi->pi_lintr.state = ASSERTED; 2037 pci_irq_assert(pi); 2038 } 2039 pthread_mutex_unlock(&pi->pi_lintr.lock); 2040 #ifndef __FreeBSD__ 2041 if (pi->pi_d->pe_lintrupdate != NULL) { 2042 pi->pi_d->pe_lintrupdate(pi); 2043 } 2044 #endif /* __FreeBSD__ */ 2045 } 2046 2047 int 2048 pci_count_lintr(int bus) 2049 { 2050 int count, slot, pin; 2051 struct slotinfo *slotinfo; 2052 2053 count = 0; 2054 if (pci_businfo[bus] != NULL) { 2055 for (slot = 0; slot < MAXSLOTS; slot++) { 2056 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 2057 for (pin = 0; pin < 4; pin++) { 2058 if (slotinfo->si_intpins[pin].ii_count != 0) 2059 count++; 2060 } 2061 } 2062 } 2063 return (count); 2064 } 2065 2066 void 2067 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 2068 { 2069 struct businfo *bi; 2070 struct slotinfo *si; 2071 struct intxinfo *ii; 2072 int slot, pin; 2073 2074 if ((bi = pci_businfo[bus]) == NULL) 2075 return; 2076 2077 for (slot = 0; slot < MAXSLOTS; slot++) { 2078 si = &bi->slotinfo[slot]; 2079 for (pin = 0; pin < 4; pin++) { 2080 ii = &si->si_intpins[pin]; 2081 if (ii->ii_count != 0) 2082 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 2083 ii->ii_ioapic_irq, arg); 2084 } 2085 } 2086 } 2087 2088 /* 2089 * Return 1 if the emulated device in 'slot' is a multi-function device. 2090 * Return 0 otherwise. 2091 */ 2092 static int 2093 pci_emul_is_mfdev(int bus, int slot) 2094 { 2095 struct businfo *bi; 2096 struct slotinfo *si; 2097 int f, numfuncs; 2098 2099 numfuncs = 0; 2100 if ((bi = pci_businfo[bus]) != NULL) { 2101 si = &bi->slotinfo[slot]; 2102 for (f = 0; f < MAXFUNCS; f++) { 2103 if (si->si_funcs[f].fi_devi != NULL) { 2104 numfuncs++; 2105 } 2106 } 2107 } 2108 return (numfuncs > 1); 2109 } 2110 2111 /* 2112 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 2113 * whether or not is a multi-function being emulated in the pci 'slot'. 2114 */ 2115 static void 2116 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 2117 { 2118 int mfdev; 2119 2120 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 2121 mfdev = pci_emul_is_mfdev(bus, slot); 2122 switch (bytes) { 2123 case 1: 2124 case 2: 2125 *rv &= ~PCIM_MFDEV; 2126 if (mfdev) { 2127 *rv |= PCIM_MFDEV; 2128 } 2129 break; 2130 case 4: 2131 *rv &= ~(PCIM_MFDEV << 16); 2132 if (mfdev) { 2133 *rv |= (PCIM_MFDEV << 16); 2134 } 2135 break; 2136 } 2137 } 2138 } 2139 2140 /* 2141 * Update device state in response to changes to the PCI command 2142 * register. 2143 */ 2144 void 2145 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 2146 { 2147 int i; 2148 uint16_t changed, new; 2149 2150 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 2151 changed = old ^ new; 2152 2153 /* 2154 * If the MMIO or I/O address space decoding has changed then 2155 * register/unregister all BARs that decode that address space. 2156 */ 2157 for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) { 2158 switch (pi->pi_bar[i].type) { 2159 case PCIBAR_NONE: 2160 case PCIBAR_MEMHI64: 2161 break; 2162 case PCIBAR_IO: 2163 /* I/O address space decoding changed? */ 2164 if (changed & PCIM_CMD_PORTEN) { 2165 if (new & PCIM_CMD_PORTEN) 2166 register_bar(pi, i); 2167 else 2168 unregister_bar(pi, i); 2169 } 2170 break; 2171 case PCIBAR_ROM: 2172 /* skip (un-)register of ROM if it disabled */ 2173 if (!romen(pi)) 2174 break; 2175 /* fallthrough */ 2176 case PCIBAR_MEM32: 2177 case PCIBAR_MEM64: 2178 /* MMIO address space decoding changed? */ 2179 if (changed & PCIM_CMD_MEMEN) { 2180 if (new & PCIM_CMD_MEMEN) 2181 register_bar(pi, i); 2182 else 2183 unregister_bar(pi, i); 2184 } 2185 break; 2186 default: 2187 assert(0); 2188 } 2189 } 2190 2191 /* 2192 * If INTx has been unmasked and is pending, assert the 2193 * interrupt. 2194 */ 2195 pci_lintr_update(pi); 2196 } 2197 2198 static void 2199 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 2200 { 2201 int rshift; 2202 uint32_t cmd, old, readonly; 2203 2204 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 2205 2206 /* 2207 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 2208 * 2209 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 2210 * 'write 1 to clear'. However these bits are not set to '1' by 2211 * any device emulation so it is simpler to treat them as readonly. 2212 */ 2213 rshift = (coff & 0x3) * 8; 2214 readonly = 0xFFFFF880 >> rshift; 2215 2216 old = CFGREAD(pi, coff, bytes); 2217 new &= ~readonly; 2218 new |= (old & readonly); 2219 CFGWRITE(pi, coff, new, bytes); /* update config */ 2220 2221 pci_emul_cmd_changed(pi, cmd); 2222 } 2223 2224 static void 2225 pci_cfgrw(int in, int bus, int slot, int func, int coff, int bytes, 2226 uint32_t *valp) 2227 { 2228 struct businfo *bi; 2229 struct slotinfo *si; 2230 struct pci_devinst *pi; 2231 struct pci_devemu *pe; 2232 int idx, needcfg; 2233 uint64_t addr, mask; 2234 uint64_t bar = 0; 2235 2236 if ((bi = pci_businfo[bus]) != NULL) { 2237 si = &bi->slotinfo[slot]; 2238 pi = si->si_funcs[func].fi_devi; 2239 } else 2240 pi = NULL; 2241 2242 /* 2243 * Just return if there is no device at this slot:func or if the 2244 * guest is doing an un-aligned access. 2245 */ 2246 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 2247 (coff & (bytes - 1)) != 0) { 2248 if (in) 2249 *valp = 0xffffffff; 2250 return; 2251 } 2252 2253 /* 2254 * Ignore all writes beyond the standard config space and return all 2255 * ones on reads. 2256 */ 2257 if (coff >= PCI_REGMAX + 1) { 2258 if (in) { 2259 *valp = 0xffffffff; 2260 /* 2261 * Extended capabilities begin at offset 256 in config 2262 * space. Absence of extended capabilities is signaled 2263 * with all 0s in the extended capability header at 2264 * offset 256. 2265 */ 2266 if (coff <= PCI_REGMAX + 4) 2267 *valp = 0x00000000; 2268 } 2269 return; 2270 } 2271 2272 pe = pi->pi_d; 2273 2274 /* 2275 * Config read 2276 */ 2277 if (in) { 2278 /* Let the device emulation override the default handler */ 2279 if (pe->pe_cfgread != NULL) { 2280 needcfg = pe->pe_cfgread(pi, coff, bytes, valp); 2281 } else { 2282 needcfg = 1; 2283 } 2284 2285 if (needcfg) 2286 *valp = CFGREAD(pi, coff, bytes); 2287 2288 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, valp); 2289 } else { 2290 /* Let the device emulation override the default handler */ 2291 if (pe->pe_cfgwrite != NULL && 2292 (*pe->pe_cfgwrite)(pi, coff, bytes, *valp) == 0) 2293 return; 2294 2295 /* 2296 * Special handling for write to BAR and ROM registers 2297 */ 2298 if (is_pcir_bar(coff) || is_pcir_bios(coff)) { 2299 /* 2300 * Ignore writes to BAR registers that are not 2301 * 4-byte aligned. 2302 */ 2303 if (bytes != 4 || (coff & 0x3) != 0) 2304 return; 2305 2306 if (is_pcir_bar(coff)) { 2307 idx = (coff - PCIR_BAR(0)) / 4; 2308 } else if (is_pcir_bios(coff)) { 2309 idx = PCI_ROM_IDX; 2310 } else { 2311 errx(4, "%s: invalid BAR offset %d", __func__, 2312 coff); 2313 } 2314 2315 mask = ~(pi->pi_bar[idx].size - 1); 2316 switch (pi->pi_bar[idx].type) { 2317 case PCIBAR_NONE: 2318 pi->pi_bar[idx].addr = bar = 0; 2319 break; 2320 case PCIBAR_IO: 2321 addr = *valp & mask; 2322 addr &= 0xffff; 2323 bar = addr | pi->pi_bar[idx].lobits; 2324 /* 2325 * Register the new BAR value for interception 2326 */ 2327 if (addr != pi->pi_bar[idx].addr) { 2328 update_bar_address(pi, addr, idx, 2329 PCIBAR_IO); 2330 } 2331 break; 2332 case PCIBAR_MEM32: 2333 addr = bar = *valp & mask; 2334 bar |= pi->pi_bar[idx].lobits; 2335 if (addr != pi->pi_bar[idx].addr) { 2336 update_bar_address(pi, addr, idx, 2337 PCIBAR_MEM32); 2338 } 2339 break; 2340 case PCIBAR_MEM64: 2341 addr = bar = *valp & mask; 2342 bar |= pi->pi_bar[idx].lobits; 2343 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 2344 update_bar_address(pi, addr, idx, 2345 PCIBAR_MEM64); 2346 } 2347 break; 2348 case PCIBAR_MEMHI64: 2349 mask = ~(pi->pi_bar[idx - 1].size - 1); 2350 addr = ((uint64_t)*valp << 32) & mask; 2351 bar = addr >> 32; 2352 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 2353 update_bar_address(pi, addr, idx - 1, 2354 PCIBAR_MEMHI64); 2355 } 2356 break; 2357 case PCIBAR_ROM: 2358 addr = bar = *valp & mask; 2359 if (memen(pi) && romen(pi)) { 2360 unregister_bar(pi, idx); 2361 } 2362 pi->pi_bar[idx].addr = addr; 2363 pi->pi_bar[idx].lobits = *valp & 2364 PCIM_BIOS_ENABLE; 2365 /* romen could have changed it value */ 2366 if (memen(pi) && romen(pi)) { 2367 register_bar(pi, idx); 2368 } 2369 bar |= pi->pi_bar[idx].lobits; 2370 break; 2371 default: 2372 assert(0); 2373 } 2374 pci_set_cfgdata32(pi, coff, bar); 2375 2376 } else if (pci_emul_iscap(pi, coff)) { 2377 pci_emul_capwrite(pi, coff, bytes, *valp, 0, 0); 2378 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 2379 pci_emul_cmdsts_write(pi, coff, *valp, bytes); 2380 } else { 2381 CFGWRITE(pi, coff, *valp, bytes); 2382 } 2383 } 2384 } 2385 2386 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 2387 2388 static int 2389 pci_emul_cfgaddr(struct vmctx *ctx __unused, int in, 2390 int port __unused, int bytes, uint32_t *eax, void *arg __unused) 2391 { 2392 uint32_t x; 2393 2394 if (bytes != 4) { 2395 if (in) 2396 *eax = (bytes == 2) ? 0xffff : 0xff; 2397 return (0); 2398 } 2399 2400 if (in) { 2401 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 2402 if (cfgenable) 2403 x |= CONF1_ENABLE; 2404 *eax = x; 2405 } else { 2406 x = *eax; 2407 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 2408 cfgoff = (x & PCI_REGMAX) & ~0x03; 2409 cfgfunc = (x >> 8) & PCI_FUNCMAX; 2410 cfgslot = (x >> 11) & PCI_SLOTMAX; 2411 cfgbus = (x >> 16) & PCI_BUSMAX; 2412 } 2413 2414 return (0); 2415 } 2416 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 2417 2418 static int 2419 pci_emul_cfgdata(struct vmctx *ctx __unused, int in, int port, 2420 int bytes, uint32_t *eax, void *arg __unused) 2421 { 2422 int coff; 2423 2424 assert(bytes == 1 || bytes == 2 || bytes == 4); 2425 2426 coff = cfgoff + (port - CONF1_DATA_PORT); 2427 if (cfgenable) { 2428 pci_cfgrw(in, cfgbus, cfgslot, cfgfunc, coff, bytes, eax); 2429 } else { 2430 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 2431 if (in) 2432 *eax = 0xffffffff; 2433 } 2434 return (0); 2435 } 2436 2437 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 2438 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 2439 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 2440 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 2441 2442 #define PCI_EMUL_TEST 2443 #ifdef PCI_EMUL_TEST 2444 /* 2445 * Define a dummy test device 2446 */ 2447 #define DIOSZ 8 2448 #define DMEMSZ 4096 2449 struct pci_emul_dsoftc { 2450 uint8_t ioregs[DIOSZ]; 2451 uint8_t memregs[2][DMEMSZ]; 2452 }; 2453 2454 #define PCI_EMUL_MSI_MSGS 4 2455 #define PCI_EMUL_MSIX_MSGS 16 2456 2457 static int 2458 pci_emul_dinit(struct pci_devinst *pi, nvlist_t *nvl __unused) 2459 { 2460 int error; 2461 struct pci_emul_dsoftc *sc; 2462 2463 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2464 2465 pi->pi_arg = sc; 2466 2467 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2468 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2469 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2470 2471 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2472 assert(error == 0); 2473 2474 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2475 assert(error == 0); 2476 2477 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2478 assert(error == 0); 2479 2480 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2481 assert(error == 0); 2482 2483 return (0); 2484 } 2485 2486 static void 2487 pci_emul_diow(struct pci_devinst *pi, int baridx, uint64_t offset, int size, 2488 uint64_t value) 2489 { 2490 int i; 2491 struct pci_emul_dsoftc *sc = pi->pi_arg; 2492 2493 if (baridx == 0) { 2494 if (offset + size > DIOSZ) { 2495 printf("diow: iow too large, offset %ld size %d\n", 2496 offset, size); 2497 return; 2498 } 2499 2500 if (size == 1) { 2501 sc->ioregs[offset] = value & 0xff; 2502 } else if (size == 2) { 2503 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2504 } else if (size == 4) { 2505 *(uint32_t *)&sc->ioregs[offset] = value; 2506 } else { 2507 printf("diow: iow unknown size %d\n", size); 2508 } 2509 2510 /* 2511 * Special magic value to generate an interrupt 2512 */ 2513 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2514 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2515 2516 if (value == 0xabcdef) { 2517 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2518 pci_generate_msi(pi, i); 2519 } 2520 } 2521 2522 if (baridx == 1 || baridx == 2) { 2523 if (offset + size > DMEMSZ) { 2524 printf("diow: memw too large, offset %ld size %d\n", 2525 offset, size); 2526 return; 2527 } 2528 2529 i = baridx - 1; /* 'memregs' index */ 2530 2531 if (size == 1) { 2532 sc->memregs[i][offset] = value; 2533 } else if (size == 2) { 2534 *(uint16_t *)&sc->memregs[i][offset] = value; 2535 } else if (size == 4) { 2536 *(uint32_t *)&sc->memregs[i][offset] = value; 2537 } else if (size == 8) { 2538 *(uint64_t *)&sc->memregs[i][offset] = value; 2539 } else { 2540 printf("diow: memw unknown size %d\n", size); 2541 } 2542 2543 /* 2544 * magic interrupt ?? 2545 */ 2546 } 2547 2548 if (baridx > 2 || baridx < 0) { 2549 printf("diow: unknown bar idx %d\n", baridx); 2550 } 2551 } 2552 2553 static uint64_t 2554 pci_emul_dior(struct pci_devinst *pi, int baridx, uint64_t offset, int size) 2555 { 2556 struct pci_emul_dsoftc *sc = pi->pi_arg; 2557 uint32_t value; 2558 int i; 2559 2560 value = 0; 2561 if (baridx == 0) { 2562 if (offset + size > DIOSZ) { 2563 printf("dior: ior too large, offset %ld size %d\n", 2564 offset, size); 2565 return (0); 2566 } 2567 2568 value = 0; 2569 if (size == 1) { 2570 value = sc->ioregs[offset]; 2571 } else if (size == 2) { 2572 value = *(uint16_t *) &sc->ioregs[offset]; 2573 } else if (size == 4) { 2574 value = *(uint32_t *) &sc->ioregs[offset]; 2575 } else { 2576 printf("dior: ior unknown size %d\n", size); 2577 } 2578 } 2579 2580 if (baridx == 1 || baridx == 2) { 2581 if (offset + size > DMEMSZ) { 2582 printf("dior: memr too large, offset %ld size %d\n", 2583 offset, size); 2584 return (0); 2585 } 2586 2587 i = baridx - 1; /* 'memregs' index */ 2588 2589 if (size == 1) { 2590 value = sc->memregs[i][offset]; 2591 } else if (size == 2) { 2592 value = *(uint16_t *) &sc->memregs[i][offset]; 2593 } else if (size == 4) { 2594 value = *(uint32_t *) &sc->memregs[i][offset]; 2595 } else if (size == 8) { 2596 value = *(uint64_t *) &sc->memregs[i][offset]; 2597 } else { 2598 printf("dior: ior unknown size %d\n", size); 2599 } 2600 } 2601 2602 2603 if (baridx > 2 || baridx < 0) { 2604 printf("dior: unknown bar idx %d\n", baridx); 2605 return (0); 2606 } 2607 2608 return (value); 2609 } 2610 2611 static const struct pci_devemu pci_dummy = { 2612 .pe_emu = "dummy", 2613 .pe_init = pci_emul_dinit, 2614 .pe_barwrite = pci_emul_diow, 2615 .pe_barread = pci_emul_dior, 2616 }; 2617 PCI_EMUL_SET(pci_dummy); 2618 2619 #endif /* PCI_EMUL_TEST */ 2620