1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * This file and its contents are supplied under the terms of the 30 * Common Development and Distribution License ("CDDL"), version 1.0. 31 * You may only use this file in accordance with the terms of version 32 * 1.0 of the CDDL. 33 * 34 * A full copy of the text of the CDDL should have accompanied this 35 * source. A copy of the CDDL is also available via the Internet at 36 * http://www.illumos.org/license/CDDL. 37 * 38 * Copyright 2014 Pluribus Networks Inc. 39 * Copyright 2018 Joyent, Inc. 40 */ 41 42 #include <sys/cdefs.h> 43 44 #include <sys/param.h> 45 #include <sys/linker_set.h> 46 #include <sys/mman.h> 47 48 #include <ctype.h> 49 #include <err.h> 50 #include <errno.h> 51 #include <pthread.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include <strings.h> 56 #include <assert.h> 57 #include <stdbool.h> 58 #include <sysexits.h> 59 60 #include <machine/vmm.h> 61 #include <vmmapi.h> 62 63 #include "acpi.h" 64 #include "bhyverun.h" 65 #include "config.h" 66 #include "debug.h" 67 #include "inout.h" 68 #include "ioapic.h" 69 #include "mem.h" 70 #include "pci_emul.h" 71 #include "pci_irq.h" 72 #include "pci_lpc.h" 73 #include "pci_passthru.h" 74 #include "qemu_fwcfg.h" 75 76 #define CONF1_ADDR_PORT 0x0cf8 77 #define CONF1_DATA_PORT 0x0cfc 78 79 #define CONF1_ENABLE 0x80000000ul 80 81 #define MAXBUSES (PCI_BUSMAX + 1) 82 #define MAXSLOTS (PCI_SLOTMAX + 1) 83 #define MAXFUNCS (PCI_FUNCMAX + 1) 84 85 #define GB (1024 * 1024 * 1024UL) 86 87 struct funcinfo { 88 nvlist_t *fi_config; 89 struct pci_devemu *fi_pde; 90 struct pci_devinst *fi_devi; 91 }; 92 93 struct intxinfo { 94 int ii_count; 95 int ii_pirq_pin; 96 int ii_ioapic_irq; 97 }; 98 99 struct slotinfo { 100 struct intxinfo si_intpins[4]; 101 struct funcinfo si_funcs[MAXFUNCS]; 102 }; 103 104 struct businfo { 105 uint16_t iobase, iolimit; /* I/O window */ 106 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 107 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 108 struct slotinfo slotinfo[MAXSLOTS]; 109 }; 110 111 static struct businfo *pci_businfo[MAXBUSES]; 112 113 SET_DECLARE(pci_devemu_set, struct pci_devemu); 114 115 static uint64_t pci_emul_iobase; 116 static uint8_t *pci_emul_rombase; 117 static uint64_t pci_emul_romoffset; 118 static uint8_t *pci_emul_romlim; 119 static uint64_t pci_emul_membase32; 120 static uint64_t pci_emul_membase64; 121 static uint64_t pci_emul_memlim64; 122 123 struct pci_bar_allocation { 124 TAILQ_ENTRY(pci_bar_allocation) chain; 125 struct pci_devinst *pdi; 126 int idx; 127 enum pcibar_type type; 128 uint64_t size; 129 }; 130 131 static TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = 132 TAILQ_HEAD_INITIALIZER(pci_bars); 133 134 struct boot_device { 135 TAILQ_ENTRY(boot_device) boot_device_chain; 136 struct pci_devinst *pdi; 137 int bootindex; 138 }; 139 static TAILQ_HEAD(boot_list, boot_device) boot_devices = TAILQ_HEAD_INITIALIZER( 140 boot_devices); 141 142 #define PCI_EMUL_IOBASE 0x2000 143 #define PCI_EMUL_IOLIMIT 0x10000 144 145 #define PCI_EMUL_ROMSIZE 0x10000000 146 147 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 148 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 149 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 150 151 /* 152 * OVMF always uses 0xC0000000 as base address for 32 bit PCI MMIO. Don't 153 * change this address without changing it in OVMF. 154 */ 155 #define PCI_EMUL_MEMBASE32 0xC0000000 156 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 157 #define PCI_EMUL_MEMSIZE64 (32*GB) 158 159 static struct pci_devemu *pci_emul_finddev(const char *name); 160 static void pci_lintr_route(struct pci_devinst *pi); 161 static void pci_lintr_update(struct pci_devinst *pi); 162 static void pci_cfgrw(int in, int bus, int slot, int func, int coff, 163 int bytes, uint32_t *val); 164 165 static __inline void 166 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 167 { 168 169 if (bytes == 1) 170 pci_set_cfgdata8(pi, coff, val); 171 else if (bytes == 2) 172 pci_set_cfgdata16(pi, coff, val); 173 else 174 pci_set_cfgdata32(pi, coff, val); 175 } 176 177 static __inline uint32_t 178 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 179 { 180 181 if (bytes == 1) 182 return (pci_get_cfgdata8(pi, coff)); 183 else if (bytes == 2) 184 return (pci_get_cfgdata16(pi, coff)); 185 else 186 return (pci_get_cfgdata32(pi, coff)); 187 } 188 189 static int 190 is_pcir_bar(int coff) 191 { 192 return (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)); 193 } 194 195 static int 196 is_pcir_bios(int coff) 197 { 198 return (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4); 199 } 200 201 /* 202 * I/O access 203 */ 204 205 /* 206 * Slot options are in the form: 207 * 208 * <bus>:<slot>:<func>,<emul>[,<config>] 209 * <slot>[:<func>],<emul>[,<config>] 210 * 211 * slot is 0..31 212 * func is 0..7 213 * emul is a string describing the type of PCI device e.g. virtio-net 214 * config is an optional string, depending on the device, that can be 215 * used for configuration. 216 * Examples are: 217 * 1,virtio-net,tap0 218 * 3:0,dummy 219 */ 220 static void 221 pci_parse_slot_usage(char *aopt) 222 { 223 224 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 225 } 226 227 /* 228 * Helper function to parse a list of comma-separated options where 229 * each option is formatted as "name[=value]". If no value is 230 * provided, the option is treated as a boolean and is given a value 231 * of true. 232 */ 233 int 234 pci_parse_legacy_config(nvlist_t *nvl, const char *opt) 235 { 236 char *config, *name, *tofree, *value; 237 238 if (opt == NULL) 239 return (0); 240 241 config = tofree = strdup(opt); 242 while ((name = strsep(&config, ",")) != NULL) { 243 value = strchr(name, '='); 244 if (value != NULL) { 245 *value = '\0'; 246 value++; 247 set_config_value_node(nvl, name, value); 248 } else 249 set_config_bool_node(nvl, name, true); 250 } 251 free(tofree); 252 return (0); 253 } 254 255 /* 256 * PCI device configuration is stored in MIBs that encode the device's 257 * location: 258 * 259 * pci.<bus>.<slot>.<func> 260 * 261 * Where "bus", "slot", and "func" are all decimal values without 262 * leading zeroes. Each valid device must have a "device" node which 263 * identifies the driver model of the device. 264 * 265 * Device backends can provide a parser for the "config" string. If 266 * a custom parser is not provided, pci_parse_legacy_config() is used 267 * to parse the string. 268 */ 269 int 270 pci_parse_slot(char *opt) 271 { 272 char node_name[sizeof("pci.XXX.XX.X")]; 273 struct pci_devemu *pde; 274 char *emul, *config, *str, *cp; 275 int error, bnum, snum, fnum; 276 nvlist_t *nvl; 277 278 error = -1; 279 str = strdup(opt); 280 281 emul = config = NULL; 282 if ((cp = strchr(str, ',')) != NULL) { 283 *cp = '\0'; 284 emul = cp + 1; 285 if ((cp = strchr(emul, ',')) != NULL) { 286 *cp = '\0'; 287 config = cp + 1; 288 } 289 } else { 290 pci_parse_slot_usage(opt); 291 goto done; 292 } 293 294 /* <bus>:<slot>:<func> */ 295 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 296 bnum = 0; 297 /* <slot>:<func> */ 298 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 299 fnum = 0; 300 /* <slot> */ 301 if (sscanf(str, "%d", &snum) != 1) { 302 snum = -1; 303 } 304 } 305 } 306 307 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 308 fnum < 0 || fnum >= MAXFUNCS) { 309 pci_parse_slot_usage(opt); 310 goto done; 311 } 312 313 pde = pci_emul_finddev(emul); 314 if (pde == NULL) { 315 EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum, 316 fnum, emul); 317 goto done; 318 } 319 320 snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum, 321 fnum); 322 nvl = find_config_node(node_name); 323 if (nvl != NULL) { 324 EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum, 325 fnum); 326 goto done; 327 } 328 nvl = create_config_node(node_name); 329 if (pde->pe_alias != NULL) 330 set_config_value_node(nvl, "device", pde->pe_alias); 331 else 332 set_config_value_node(nvl, "device", pde->pe_emu); 333 334 if (pde->pe_legacy_config != NULL) 335 error = pde->pe_legacy_config(nvl, config); 336 else 337 error = pci_parse_legacy_config(nvl, config); 338 done: 339 free(str); 340 return (error); 341 } 342 343 void 344 pci_print_supported_devices(void) 345 { 346 struct pci_devemu **pdpp, *pdp; 347 348 SET_FOREACH(pdpp, pci_devemu_set) { 349 pdp = *pdpp; 350 printf("%s\n", pdp->pe_emu); 351 } 352 } 353 354 uint32_t 355 pci_config_read_reg(const struct pcisel *const host_sel, nvlist_t *nvl, 356 const uint32_t reg, const uint8_t size, const uint32_t def) 357 { 358 const char *config; 359 const nvlist_t *pci_regs; 360 361 assert(size == 1 || size == 2 || size == 4); 362 363 pci_regs = find_relative_config_node(nvl, "pcireg"); 364 if (pci_regs == NULL) { 365 return def; 366 } 367 368 switch (reg) { 369 case PCIR_DEVICE: 370 config = get_config_value_node(pci_regs, "device"); 371 break; 372 case PCIR_VENDOR: 373 config = get_config_value_node(pci_regs, "vendor"); 374 break; 375 case PCIR_REVID: 376 config = get_config_value_node(pci_regs, "revid"); 377 break; 378 case PCIR_SUBVEND_0: 379 config = get_config_value_node(pci_regs, "subvendor"); 380 break; 381 case PCIR_SUBDEV_0: 382 config = get_config_value_node(pci_regs, "subdevice"); 383 break; 384 default: 385 return (-1); 386 } 387 388 if (config == NULL) { 389 return def; 390 } else if (host_sel != NULL && strcmp(config, "host") == 0) { 391 return read_config(host_sel, reg, size); 392 } else { 393 return strtol(config, NULL, 16); 394 } 395 } 396 397 static int 398 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 399 { 400 401 if (offset < pi->pi_msix.pba_offset) 402 return (0); 403 404 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 405 return (0); 406 } 407 408 return (1); 409 } 410 411 int 412 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 413 uint64_t value) 414 { 415 int msix_entry_offset; 416 int tab_index; 417 char *dest; 418 419 /* support only 4 or 8 byte writes */ 420 if (size != 4 && size != 8) 421 return (-1); 422 423 /* 424 * Return if table index is beyond what device supports 425 */ 426 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 427 if (tab_index >= pi->pi_msix.table_count) 428 return (-1); 429 430 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 431 432 /* support only aligned writes */ 433 if ((msix_entry_offset % size) != 0) 434 return (-1); 435 436 dest = (char *)(pi->pi_msix.table + tab_index); 437 dest += msix_entry_offset; 438 439 if (size == 4) 440 *((uint32_t *)dest) = value; 441 else 442 *((uint64_t *)dest) = value; 443 444 return (0); 445 } 446 447 uint64_t 448 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 449 { 450 char *dest; 451 int msix_entry_offset; 452 int tab_index; 453 uint64_t retval = ~0; 454 455 /* 456 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 457 * table but we also allow 1 byte access to accommodate reads from 458 * ddb. 459 */ 460 if (size != 1 && size != 4 && size != 8) 461 return (retval); 462 463 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 464 465 /* support only aligned reads */ 466 if ((msix_entry_offset % size) != 0) { 467 return (retval); 468 } 469 470 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 471 472 if (tab_index < pi->pi_msix.table_count) { 473 /* valid MSI-X Table access */ 474 dest = (char *)(pi->pi_msix.table + tab_index); 475 dest += msix_entry_offset; 476 477 if (size == 1) 478 retval = *((uint8_t *)dest); 479 else if (size == 4) 480 retval = *((uint32_t *)dest); 481 else 482 retval = *((uint64_t *)dest); 483 } else if (pci_valid_pba_offset(pi, offset)) { 484 /* return 0 for PBA access */ 485 retval = 0; 486 } 487 488 return (retval); 489 } 490 491 int 492 pci_msix_table_bar(struct pci_devinst *pi) 493 { 494 495 if (pi->pi_msix.table != NULL) 496 return (pi->pi_msix.table_bar); 497 else 498 return (-1); 499 } 500 501 int 502 pci_msix_pba_bar(struct pci_devinst *pi) 503 { 504 505 if (pi->pi_msix.table != NULL) 506 return (pi->pi_msix.pba_bar); 507 else 508 return (-1); 509 } 510 511 static int 512 pci_emul_io_handler(struct vmctx *ctx __unused, int in, int port, 513 int bytes, uint32_t *eax, void *arg) 514 { 515 struct pci_devinst *pdi = arg; 516 struct pci_devemu *pe = pdi->pi_d; 517 uint64_t offset; 518 int i; 519 520 assert(port >= 0); 521 522 for (i = 0; i <= PCI_BARMAX; i++) { 523 if (pdi->pi_bar[i].type == PCIBAR_IO && 524 (uint64_t)port >= pdi->pi_bar[i].addr && 525 (uint64_t)port + bytes <= 526 pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 527 offset = port - pdi->pi_bar[i].addr; 528 if (in) 529 *eax = (*pe->pe_barread)(pdi, i, 530 offset, bytes); 531 else 532 (*pe->pe_barwrite)(pdi, i, offset, 533 bytes, *eax); 534 return (0); 535 } 536 } 537 return (-1); 538 } 539 540 static int 541 pci_emul_mem_handler(struct vcpu *vcpu __unused, int dir, 542 uint64_t addr, int size, uint64_t *val, void *arg1, long arg2) 543 { 544 struct pci_devinst *pdi = arg1; 545 struct pci_devemu *pe = pdi->pi_d; 546 uint64_t offset; 547 int bidx = (int) arg2; 548 549 assert(bidx <= PCI_BARMAX); 550 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 551 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 552 assert(addr >= pdi->pi_bar[bidx].addr && 553 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 554 555 offset = addr - pdi->pi_bar[bidx].addr; 556 557 if (dir == MEM_F_WRITE) { 558 if (size == 8) { 559 (*pe->pe_barwrite)(pdi, bidx, offset, 560 4, *val & 0xffffffff); 561 (*pe->pe_barwrite)(pdi, bidx, offset + 4, 562 4, *val >> 32); 563 } else { 564 (*pe->pe_barwrite)(pdi, bidx, offset, 565 size, *val); 566 } 567 } else { 568 if (size == 8) { 569 *val = (*pe->pe_barread)(pdi, bidx, 570 offset, 4); 571 *val |= (*pe->pe_barread)(pdi, bidx, 572 offset + 4, 4) << 32; 573 } else { 574 *val = (*pe->pe_barread)(pdi, bidx, 575 offset, size); 576 } 577 } 578 579 return (0); 580 } 581 582 583 static int 584 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 585 uint64_t *addr) 586 { 587 uint64_t base; 588 589 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 590 591 base = roundup2(*baseptr, size); 592 593 if (base + size <= limit) { 594 *addr = base; 595 *baseptr = base + size; 596 return (0); 597 } else 598 return (-1); 599 } 600 601 /* 602 * Register (or unregister) the MMIO or I/O region associated with the BAR 603 * register 'idx' of an emulated pci device. 604 */ 605 static void 606 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 607 { 608 struct pci_devemu *pe; 609 int error; 610 struct inout_port iop; 611 struct mem_range mr; 612 613 pe = pi->pi_d; 614 switch (pi->pi_bar[idx].type) { 615 case PCIBAR_IO: 616 bzero(&iop, sizeof(struct inout_port)); 617 iop.name = pi->pi_name; 618 iop.port = pi->pi_bar[idx].addr; 619 iop.size = pi->pi_bar[idx].size; 620 if (registration) { 621 iop.flags = IOPORT_F_INOUT; 622 iop.handler = pci_emul_io_handler; 623 iop.arg = pi; 624 error = register_inout(&iop); 625 } else 626 error = unregister_inout(&iop); 627 break; 628 case PCIBAR_MEM32: 629 case PCIBAR_MEM64: 630 bzero(&mr, sizeof(struct mem_range)); 631 mr.name = pi->pi_name; 632 mr.base = pi->pi_bar[idx].addr; 633 mr.size = pi->pi_bar[idx].size; 634 if (registration) { 635 mr.flags = MEM_F_RW; 636 mr.handler = pci_emul_mem_handler; 637 mr.arg1 = pi; 638 mr.arg2 = idx; 639 error = register_mem(&mr); 640 } else 641 error = unregister_mem(&mr); 642 break; 643 case PCIBAR_ROM: 644 error = 0; 645 break; 646 default: 647 error = EINVAL; 648 break; 649 } 650 assert(error == 0); 651 652 if (pe->pe_baraddr != NULL) 653 (*pe->pe_baraddr)(pi, idx, registration, pi->pi_bar[idx].addr); 654 } 655 656 static void 657 unregister_bar(struct pci_devinst *pi, int idx) 658 { 659 660 modify_bar_registration(pi, idx, 0); 661 } 662 663 static void 664 register_bar(struct pci_devinst *pi, int idx) 665 { 666 667 modify_bar_registration(pi, idx, 1); 668 } 669 670 /* Is the ROM enabled for the emulated pci device? */ 671 static int 672 romen(struct pci_devinst *pi) 673 { 674 return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) == 675 PCIM_BIOS_ENABLE; 676 } 677 678 /* Are we decoding i/o port accesses for the emulated pci device? */ 679 static int 680 porten(struct pci_devinst *pi) 681 { 682 uint16_t cmd; 683 684 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 685 686 return (cmd & PCIM_CMD_PORTEN); 687 } 688 689 /* Are we decoding memory accesses for the emulated pci device? */ 690 static int 691 memen(struct pci_devinst *pi) 692 { 693 uint16_t cmd; 694 695 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 696 697 return (cmd & PCIM_CMD_MEMEN); 698 } 699 700 /* 701 * Update the MMIO or I/O address that is decoded by the BAR register. 702 * 703 * If the pci device has enabled the address space decoding then intercept 704 * the address range decoded by the BAR register. 705 */ 706 static void 707 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 708 { 709 int decode; 710 711 if (pi->pi_bar[idx].type == PCIBAR_IO) 712 decode = porten(pi); 713 else 714 decode = memen(pi); 715 716 if (decode) 717 unregister_bar(pi, idx); 718 719 switch (type) { 720 case PCIBAR_IO: 721 case PCIBAR_MEM32: 722 pi->pi_bar[idx].addr = addr; 723 break; 724 case PCIBAR_MEM64: 725 pi->pi_bar[idx].addr &= ~0xffffffffUL; 726 pi->pi_bar[idx].addr |= addr; 727 break; 728 case PCIBAR_MEMHI64: 729 pi->pi_bar[idx].addr &= 0xffffffff; 730 pi->pi_bar[idx].addr |= addr; 731 break; 732 default: 733 assert(0); 734 } 735 736 if (decode) 737 register_bar(pi, idx); 738 } 739 740 int 741 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 742 uint64_t size) 743 { 744 assert((type == PCIBAR_ROM) || (idx >= 0 && idx <= PCI_BARMAX)); 745 assert((type != PCIBAR_ROM) || (idx == PCI_ROM_IDX)); 746 747 if ((size & (size - 1)) != 0) 748 size = 1UL << flsl(size); /* round up to a power of 2 */ 749 750 /* Enforce minimum BAR sizes required by the PCI standard */ 751 if (type == PCIBAR_IO) { 752 if (size < 4) 753 size = 4; 754 } else if (type == PCIBAR_ROM) { 755 if (size < ~PCIM_BIOS_ADDR_MASK + 1) 756 size = ~PCIM_BIOS_ADDR_MASK + 1; 757 } else { 758 if (size < 16) 759 size = 16; 760 } 761 762 /* 763 * To reduce fragmentation of the MMIO space, we allocate the BARs by 764 * size. Therefore, don't allocate the BAR yet. We create a list of all 765 * BAR allocation which is sorted by BAR size. When all PCI devices are 766 * initialized, we will assign an address to the BARs. 767 */ 768 769 /* create a new list entry */ 770 struct pci_bar_allocation *const new_bar = malloc(sizeof(*new_bar)); 771 memset(new_bar, 0, sizeof(*new_bar)); 772 new_bar->pdi = pdi; 773 new_bar->idx = idx; 774 new_bar->type = type; 775 new_bar->size = size; 776 777 /* 778 * Search for a BAR which size is lower than the size of our newly 779 * allocated BAR. 780 */ 781 struct pci_bar_allocation *bar = NULL; 782 TAILQ_FOREACH(bar, &pci_bars, chain) { 783 if (bar->size < size) { 784 break; 785 } 786 } 787 788 if (bar == NULL) { 789 /* 790 * Either the list is empty or new BAR is the smallest BAR of 791 * the list. Append it to the end of our list. 792 */ 793 TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain); 794 } else { 795 /* 796 * The found BAR is smaller than our new BAR. For that reason, 797 * insert our new BAR before the found BAR. 798 */ 799 TAILQ_INSERT_BEFORE(bar, new_bar, chain); 800 } 801 802 /* 803 * pci_passthru devices synchronize their physical and virtual command 804 * register on init. For that reason, the virtual cmd reg should be 805 * updated as early as possible. 806 */ 807 uint16_t enbit = 0; 808 switch (type) { 809 case PCIBAR_IO: 810 enbit = PCIM_CMD_PORTEN; 811 break; 812 case PCIBAR_MEM64: 813 case PCIBAR_MEM32: 814 enbit = PCIM_CMD_MEMEN; 815 break; 816 default: 817 enbit = 0; 818 break; 819 } 820 821 const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 822 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 823 824 return (0); 825 } 826 827 static int 828 pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx, 829 const enum pcibar_type type, const uint64_t size) 830 { 831 int error; 832 uint64_t *baseptr, limit, addr, mask, lobits, bar; 833 834 switch (type) { 835 case PCIBAR_NONE: 836 baseptr = NULL; 837 addr = mask = lobits = 0; 838 break; 839 case PCIBAR_IO: 840 baseptr = &pci_emul_iobase; 841 limit = PCI_EMUL_IOLIMIT; 842 mask = PCIM_BAR_IO_BASE; 843 lobits = PCIM_BAR_IO_SPACE; 844 break; 845 case PCIBAR_MEM64: 846 /* 847 * XXX 848 * Some drivers do not work well if the 64-bit BAR is allocated 849 * above 4GB. Allow for this by allocating small requests under 850 * 4GB unless then allocation size is larger than some arbitrary 851 * number (128MB currently). 852 */ 853 if (size > 128 * 1024 * 1024) { 854 baseptr = &pci_emul_membase64; 855 limit = pci_emul_memlim64; 856 mask = PCIM_BAR_MEM_BASE; 857 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 858 PCIM_BAR_MEM_PREFETCH; 859 } else { 860 baseptr = &pci_emul_membase32; 861 limit = PCI_EMUL_MEMLIMIT32; 862 mask = PCIM_BAR_MEM_BASE; 863 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 864 } 865 break; 866 case PCIBAR_MEM32: 867 baseptr = &pci_emul_membase32; 868 limit = PCI_EMUL_MEMLIMIT32; 869 mask = PCIM_BAR_MEM_BASE; 870 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 871 break; 872 case PCIBAR_ROM: 873 /* do not claim memory for ROM. OVMF will do it for us. */ 874 baseptr = NULL; 875 limit = 0; 876 mask = PCIM_BIOS_ADDR_MASK; 877 lobits = 0; 878 break; 879 default: 880 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 881 #ifdef FreeBSD 882 assert(0); 883 #else 884 abort(); 885 #endif 886 } 887 888 if (baseptr != NULL) { 889 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 890 if (error != 0) 891 return (error); 892 } else { 893 addr = 0; 894 } 895 896 pdi->pi_bar[idx].type = type; 897 pdi->pi_bar[idx].addr = addr; 898 pdi->pi_bar[idx].size = size; 899 /* 900 * passthru devices are using same lobits as physical device they set 901 * this property 902 */ 903 if (pdi->pi_bar[idx].lobits != 0) { 904 lobits = pdi->pi_bar[idx].lobits; 905 } else { 906 pdi->pi_bar[idx].lobits = lobits; 907 } 908 909 /* Initialize the BAR register in config space */ 910 bar = (addr & mask) | lobits; 911 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 912 913 if (type == PCIBAR_MEM64) { 914 assert(idx + 1 <= PCI_BARMAX); 915 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 916 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 917 } 918 919 if (type != PCIBAR_ROM) { 920 register_bar(pdi, idx); 921 } 922 923 return (0); 924 } 925 926 int 927 pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size, 928 void **const addr) 929 { 930 /* allocate ROM space once on first call */ 931 if (pci_emul_rombase == 0) { 932 pci_emul_rombase = vm_create_devmem(pdi->pi_vmctx, VM_PCIROM, 933 "pcirom", PCI_EMUL_ROMSIZE); 934 if (pci_emul_rombase == MAP_FAILED) { 935 warnx("%s: failed to create rom segment", __func__); 936 return (-1); 937 } 938 pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE; 939 pci_emul_romoffset = 0; 940 } 941 942 /* ROM size should be a power of 2 and greater than 2 KB */ 943 const uint64_t rom_size = MAX(1UL << flsl(size), 944 ~PCIM_BIOS_ADDR_MASK + 1); 945 946 /* check if ROM fits into ROM space */ 947 if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE) { 948 warnx("%s: no space left in rom segment:", __func__); 949 warnx("%16lu bytes left", 950 PCI_EMUL_ROMSIZE - pci_emul_romoffset); 951 warnx("%16lu bytes required by %d/%d/%d", rom_size, pdi->pi_bus, 952 pdi->pi_slot, pdi->pi_func); 953 return (-1); 954 } 955 956 /* allocate ROM BAR */ 957 const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM, 958 rom_size); 959 if (error) 960 return error; 961 962 /* return address */ 963 *addr = pci_emul_rombase + pci_emul_romoffset; 964 965 /* save offset into ROM Space */ 966 pdi->pi_romoffset = pci_emul_romoffset; 967 968 /* increase offset for next ROM */ 969 pci_emul_romoffset += rom_size; 970 971 return (0); 972 } 973 974 int 975 pci_emul_add_boot_device(struct pci_devinst *pi, int bootindex) 976 { 977 struct boot_device *new_device, *device; 978 979 /* don't permit a negative bootindex */ 980 if (bootindex < 0) { 981 errx(4, "Invalid bootindex %d for %s", bootindex, pi->pi_name); 982 } 983 984 /* alloc new boot device */ 985 new_device = calloc(1, sizeof(struct boot_device)); 986 if (new_device == NULL) { 987 return (ENOMEM); 988 } 989 new_device->pdi = pi; 990 new_device->bootindex = bootindex; 991 992 /* search for boot device with higher boot index */ 993 TAILQ_FOREACH(device, &boot_devices, boot_device_chain) { 994 if (device->bootindex == bootindex) { 995 errx(4, 996 "Could not set bootindex %d for %s. Bootindex already occupied by %s", 997 bootindex, pi->pi_name, device->pdi->pi_name); 998 } else if (device->bootindex > bootindex) { 999 break; 1000 } 1001 } 1002 1003 /* add boot device to queue */ 1004 if (device == NULL) { 1005 TAILQ_INSERT_TAIL(&boot_devices, new_device, boot_device_chain); 1006 } else { 1007 TAILQ_INSERT_BEFORE(device, new_device, boot_device_chain); 1008 } 1009 1010 return (0); 1011 } 1012 1013 #define CAP_START_OFFSET 0x40 1014 static int 1015 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 1016 { 1017 int i, capoff, reallen; 1018 uint16_t sts; 1019 1020 assert(caplen > 0); 1021 1022 reallen = roundup2(caplen, 4); /* dword aligned */ 1023 1024 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1025 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 1026 capoff = CAP_START_OFFSET; 1027 else 1028 capoff = pi->pi_capend + 1; 1029 1030 /* Check if we have enough space */ 1031 if (capoff + reallen > PCI_REGMAX + 1) 1032 return (-1); 1033 1034 /* Set the previous capability pointer */ 1035 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 1036 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 1037 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 1038 } else 1039 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 1040 1041 /* Copy the capability */ 1042 for (i = 0; i < caplen; i++) 1043 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 1044 1045 /* Set the next capability pointer */ 1046 pci_set_cfgdata8(pi, capoff + 1, 0); 1047 1048 pi->pi_prevcap = capoff; 1049 pi->pi_capend = capoff + reallen - 1; 1050 return (0); 1051 } 1052 1053 static struct pci_devemu * 1054 pci_emul_finddev(const char *name) 1055 { 1056 struct pci_devemu **pdpp, *pdp; 1057 1058 SET_FOREACH(pdpp, pci_devemu_set) { 1059 pdp = *pdpp; 1060 if (!strcmp(pdp->pe_emu, name)) { 1061 return (pdp); 1062 } 1063 } 1064 1065 return (NULL); 1066 } 1067 1068 static int 1069 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 1070 int func, struct funcinfo *fi) 1071 { 1072 struct pci_devinst *pdi; 1073 int err; 1074 1075 pdi = calloc(1, sizeof(struct pci_devinst)); 1076 1077 pdi->pi_vmctx = ctx; 1078 pdi->pi_bus = bus; 1079 pdi->pi_slot = slot; 1080 pdi->pi_func = func; 1081 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 1082 pdi->pi_lintr.pin = 0; 1083 pdi->pi_lintr.state = IDLE; 1084 pdi->pi_lintr.pirq_pin = 0; 1085 pdi->pi_lintr.ioapic_irq = 0; 1086 pdi->pi_d = pde; 1087 snprintf(pdi->pi_name, PI_NAMESZ, "%s@pci.%d.%d.%d", pde->pe_emu, bus, 1088 slot, func); 1089 1090 /* Disable legacy interrupts */ 1091 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 1092 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 1093 1094 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 1095 1096 err = (*pde->pe_init)(pdi, fi->fi_config); 1097 if (err == 0) 1098 fi->fi_devi = pdi; 1099 else 1100 free(pdi); 1101 1102 return (err); 1103 } 1104 1105 void 1106 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 1107 { 1108 int mmc; 1109 1110 /* Number of msi messages must be a power of 2 between 1 and 32 */ 1111 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 1112 mmc = ffs(msgnum) - 1; 1113 1114 bzero(msicap, sizeof(struct msicap)); 1115 msicap->capid = PCIY_MSI; 1116 msicap->nextptr = nextptr; 1117 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 1118 } 1119 1120 int 1121 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 1122 { 1123 struct msicap msicap; 1124 1125 pci_populate_msicap(&msicap, msgnum, 0); 1126 1127 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 1128 } 1129 1130 static void 1131 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 1132 uint32_t msix_tab_size) 1133 { 1134 1135 assert(msix_tab_size % 4096 == 0); 1136 1137 bzero(msixcap, sizeof(struct msixcap)); 1138 msixcap->capid = PCIY_MSIX; 1139 1140 /* 1141 * Message Control Register, all fields set to 1142 * zero except for the Table Size. 1143 * Note: Table size N is encoded as N-1 1144 */ 1145 msixcap->msgctrl = msgnum - 1; 1146 1147 /* 1148 * MSI-X BAR setup: 1149 * - MSI-X table start at offset 0 1150 * - PBA table starts at a 4K aligned offset after the MSI-X table 1151 */ 1152 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 1153 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 1154 } 1155 1156 static void 1157 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 1158 { 1159 int i, table_size; 1160 1161 assert(table_entries > 0); 1162 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 1163 1164 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 1165 pi->pi_msix.table = calloc(1, table_size); 1166 1167 /* set mask bit of vector control register */ 1168 for (i = 0; i < table_entries; i++) 1169 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 1170 } 1171 1172 int 1173 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 1174 { 1175 uint32_t tab_size; 1176 struct msixcap msixcap; 1177 1178 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 1179 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 1180 1181 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 1182 1183 /* Align table size to nearest 4K */ 1184 tab_size = roundup2(tab_size, 4096); 1185 1186 pi->pi_msix.table_bar = barnum; 1187 pi->pi_msix.pba_bar = barnum; 1188 pi->pi_msix.table_offset = 0; 1189 pi->pi_msix.table_count = msgnum; 1190 pi->pi_msix.pba_offset = tab_size; 1191 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 1192 1193 pci_msix_table_init(pi, msgnum); 1194 1195 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 1196 1197 /* allocate memory for MSI-X Table and PBA */ 1198 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 1199 tab_size + pi->pi_msix.pba_size); 1200 1201 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 1202 sizeof(msixcap))); 1203 } 1204 1205 static void 1206 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1207 int bytes, uint32_t val) 1208 { 1209 uint16_t msgctrl, rwmask; 1210 int off; 1211 1212 off = offset - capoff; 1213 /* Message Control Register */ 1214 if (off == 2 && bytes == 2) { 1215 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 1216 msgctrl = pci_get_cfgdata16(pi, offset); 1217 msgctrl &= ~rwmask; 1218 msgctrl |= val & rwmask; 1219 val = msgctrl; 1220 1221 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 1222 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 1223 pci_lintr_update(pi); 1224 } 1225 1226 CFGWRITE(pi, offset, val, bytes); 1227 } 1228 1229 static void 1230 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1231 int bytes, uint32_t val) 1232 { 1233 uint16_t msgctrl, rwmask, msgdata, mme; 1234 uint32_t addrlo; 1235 1236 /* 1237 * If guest is writing to the message control register make sure 1238 * we do not overwrite read-only fields. 1239 */ 1240 if ((offset - capoff) == 2 && bytes == 2) { 1241 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 1242 msgctrl = pci_get_cfgdata16(pi, offset); 1243 msgctrl &= ~rwmask; 1244 msgctrl |= val & rwmask; 1245 val = msgctrl; 1246 } 1247 CFGWRITE(pi, offset, val, bytes); 1248 1249 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 1250 addrlo = pci_get_cfgdata32(pi, capoff + 4); 1251 if (msgctrl & PCIM_MSICTRL_64BIT) 1252 msgdata = pci_get_cfgdata16(pi, capoff + 12); 1253 else 1254 msgdata = pci_get_cfgdata16(pi, capoff + 8); 1255 1256 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 1257 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 1258 if (pi->pi_msi.enabled) { 1259 pi->pi_msi.addr = addrlo; 1260 pi->pi_msi.msg_data = msgdata; 1261 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 1262 } else { 1263 pi->pi_msi.maxmsgnum = 0; 1264 } 1265 pci_lintr_update(pi); 1266 } 1267 1268 static void 1269 pciecap_cfgwrite(struct pci_devinst *pi, int capoff __unused, int offset, 1270 int bytes, uint32_t val) 1271 { 1272 1273 /* XXX don't write to the readonly parts */ 1274 CFGWRITE(pi, offset, val, bytes); 1275 } 1276 1277 #define PCIECAP_VERSION 0x2 1278 int 1279 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 1280 { 1281 int err; 1282 struct pciecap pciecap; 1283 1284 bzero(&pciecap, sizeof(pciecap)); 1285 1286 /* 1287 * Use the integrated endpoint type for endpoints on a root complex bus. 1288 * 1289 * NB: bhyve currently only supports a single PCI bus that is the root 1290 * complex bus, so all endpoints are integrated. 1291 */ 1292 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 1293 type = PCIEM_TYPE_ROOT_INT_EP; 1294 1295 pciecap.capid = PCIY_EXPRESS; 1296 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 1297 if (type != PCIEM_TYPE_ROOT_INT_EP) { 1298 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 1299 pciecap.link_status = 0x11; /* gen1, x1 */ 1300 } 1301 1302 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 1303 return (err); 1304 } 1305 1306 /* 1307 * This function assumes that 'coff' is in the capabilities region of the 1308 * config space. A capoff parameter of zero will force a search for the 1309 * offset and type. 1310 */ 1311 void 1312 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 1313 uint8_t capoff, int capid) 1314 { 1315 uint8_t nextoff; 1316 1317 /* Do not allow un-aligned writes */ 1318 if ((offset & (bytes - 1)) != 0) 1319 return; 1320 1321 if (capoff == 0) { 1322 /* Find the capability that we want to update */ 1323 capoff = CAP_START_OFFSET; 1324 while (1) { 1325 nextoff = pci_get_cfgdata8(pi, capoff + 1); 1326 if (nextoff == 0) 1327 break; 1328 if (offset >= capoff && offset < nextoff) 1329 break; 1330 1331 capoff = nextoff; 1332 } 1333 assert(offset >= capoff); 1334 capid = pci_get_cfgdata8(pi, capoff); 1335 } 1336 1337 /* 1338 * Capability ID and Next Capability Pointer are readonly. 1339 * However, some o/s's do 4-byte writes that include these. 1340 * For this case, trim the write back to 2 bytes and adjust 1341 * the data. 1342 */ 1343 if (offset == capoff || offset == capoff + 1) { 1344 if (offset == capoff && bytes == 4) { 1345 bytes = 2; 1346 offset += 2; 1347 val >>= 16; 1348 } else 1349 return; 1350 } 1351 1352 switch (capid) { 1353 case PCIY_MSI: 1354 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1355 break; 1356 case PCIY_MSIX: 1357 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1358 break; 1359 case PCIY_EXPRESS: 1360 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1361 break; 1362 default: 1363 break; 1364 } 1365 } 1366 1367 static int 1368 pci_emul_iscap(struct pci_devinst *pi, int offset) 1369 { 1370 uint16_t sts; 1371 1372 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1373 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1374 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1375 return (1); 1376 } 1377 return (0); 1378 } 1379 1380 static int 1381 pci_emul_fallback_handler(struct vcpu *vcpu __unused, int dir, 1382 uint64_t addr __unused, int size __unused, uint64_t *val, 1383 void *arg1 __unused, long arg2 __unused) 1384 { 1385 /* 1386 * Ignore writes; return 0xff's for reads. The mem read code 1387 * will take care of truncating to the correct size. 1388 */ 1389 if (dir == MEM_F_READ) { 1390 *val = 0xffffffffffffffff; 1391 } 1392 1393 return (0); 1394 } 1395 1396 static int 1397 pci_emul_ecfg_handler(struct vcpu *vcpu __unused, int dir, uint64_t addr, 1398 int bytes, uint64_t *val, void *arg1 __unused, long arg2 __unused) 1399 { 1400 int bus, slot, func, coff, in; 1401 1402 coff = addr & 0xfff; 1403 func = (addr >> 12) & 0x7; 1404 slot = (addr >> 15) & 0x1f; 1405 bus = (addr >> 20) & 0xff; 1406 in = (dir == MEM_F_READ); 1407 if (in) 1408 *val = ~0UL; 1409 pci_cfgrw(in, bus, slot, func, coff, bytes, (uint32_t *)val); 1410 return (0); 1411 } 1412 1413 uint64_t 1414 pci_ecfg_base(void) 1415 { 1416 1417 return (PCI_EMUL_ECFG_BASE); 1418 } 1419 1420 static int 1421 init_bootorder(void) 1422 { 1423 struct boot_device *device; 1424 FILE *fp; 1425 char *bootorder; 1426 size_t bootorder_len; 1427 1428 if (TAILQ_EMPTY(&boot_devices)) 1429 return (0); 1430 1431 fp = open_memstream(&bootorder, &bootorder_len); 1432 TAILQ_FOREACH(device, &boot_devices, boot_device_chain) { 1433 fprintf(fp, "/pci@i0cf8/pci@%d,%d\n", 1434 device->pdi->pi_slot, device->pdi->pi_func); 1435 } 1436 fclose(fp); 1437 1438 return (qemu_fwcfg_add_file("bootorder", bootorder_len, bootorder)); 1439 } 1440 1441 #define BUSIO_ROUNDUP 32 1442 #define BUSMEM32_ROUNDUP (1024 * 1024) 1443 #define BUSMEM64_ROUNDUP (512 * 1024 * 1024) 1444 1445 int 1446 init_pci(struct vmctx *ctx) 1447 { 1448 char node_name[sizeof("pci.XXX.XX.X")]; 1449 struct mem_range mr; 1450 struct pci_devemu *pde; 1451 struct businfo *bi; 1452 struct slotinfo *si; 1453 struct funcinfo *fi; 1454 nvlist_t *nvl; 1455 const char *emul; 1456 size_t lowmem; 1457 int bus, slot, func; 1458 int error; 1459 1460 if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32) 1461 errx(EX_OSERR, "Invalid lowmem limit"); 1462 1463 pci_emul_iobase = PCI_EMUL_IOBASE; 1464 pci_emul_membase32 = PCI_EMUL_MEMBASE32; 1465 1466 pci_emul_membase64 = 4*GB + vm_get_highmem_size(ctx); 1467 pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64); 1468 pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64; 1469 1470 TAILQ_INIT(&boot_devices); 1471 1472 for (bus = 0; bus < MAXBUSES; bus++) { 1473 snprintf(node_name, sizeof(node_name), "pci.%d", bus); 1474 nvl = find_config_node(node_name); 1475 if (nvl == NULL) 1476 continue; 1477 pci_businfo[bus] = calloc(1, sizeof(struct businfo)); 1478 bi = pci_businfo[bus]; 1479 1480 /* 1481 * Keep track of the i/o and memory resources allocated to 1482 * this bus. 1483 */ 1484 bi->iobase = pci_emul_iobase; 1485 bi->membase32 = pci_emul_membase32; 1486 bi->membase64 = pci_emul_membase64; 1487 1488 /* first run: init devices */ 1489 for (slot = 0; slot < MAXSLOTS; slot++) { 1490 si = &bi->slotinfo[slot]; 1491 for (func = 0; func < MAXFUNCS; func++) { 1492 fi = &si->si_funcs[func]; 1493 snprintf(node_name, sizeof(node_name), 1494 "pci.%d.%d.%d", bus, slot, func); 1495 nvl = find_config_node(node_name); 1496 if (nvl == NULL) 1497 continue; 1498 1499 fi->fi_config = nvl; 1500 emul = get_config_value_node(nvl, "device"); 1501 if (emul == NULL) { 1502 EPRINTLN("pci slot %d:%d:%d: missing " 1503 "\"device\" value", bus, slot, func); 1504 return (EINVAL); 1505 } 1506 pde = pci_emul_finddev(emul); 1507 if (pde == NULL) { 1508 EPRINTLN("pci slot %d:%d:%d: unknown " 1509 "device \"%s\"", bus, slot, func, 1510 emul); 1511 return (EINVAL); 1512 } 1513 if (pde->pe_alias != NULL) { 1514 EPRINTLN("pci slot %d:%d:%d: legacy " 1515 "device \"%s\", use \"%s\" instead", 1516 bus, slot, func, emul, 1517 pde->pe_alias); 1518 return (EINVAL); 1519 } 1520 fi->fi_pde = pde; 1521 error = pci_emul_init(ctx, pde, bus, slot, 1522 func, fi); 1523 if (error) 1524 return (error); 1525 } 1526 } 1527 1528 /* second run: assign BARs and free list */ 1529 struct pci_bar_allocation *bar; 1530 struct pci_bar_allocation *bar_tmp; 1531 TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) { 1532 pci_emul_assign_bar(bar->pdi, bar->idx, bar->type, 1533 bar->size); 1534 free(bar); 1535 } 1536 TAILQ_INIT(&pci_bars); 1537 1538 /* 1539 * Add some slop to the I/O and memory resources decoded by 1540 * this bus to give a guest some flexibility if it wants to 1541 * reprogram the BARs. 1542 */ 1543 pci_emul_iobase += BUSIO_ROUNDUP; 1544 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1545 bi->iolimit = pci_emul_iobase; 1546 1547 pci_emul_membase32 += BUSMEM32_ROUNDUP; 1548 pci_emul_membase32 = roundup2(pci_emul_membase32, 1549 BUSMEM32_ROUNDUP); 1550 bi->memlimit32 = pci_emul_membase32; 1551 1552 pci_emul_membase64 += BUSMEM64_ROUNDUP; 1553 pci_emul_membase64 = roundup2(pci_emul_membase64, 1554 BUSMEM64_ROUNDUP); 1555 bi->memlimit64 = pci_emul_membase64; 1556 } 1557 1558 /* 1559 * PCI backends are initialized before routing INTx interrupts 1560 * so that LPC devices are able to reserve ISA IRQs before 1561 * routing PIRQ pins. 1562 */ 1563 for (bus = 0; bus < MAXBUSES; bus++) { 1564 if ((bi = pci_businfo[bus]) == NULL) 1565 continue; 1566 1567 for (slot = 0; slot < MAXSLOTS; slot++) { 1568 si = &bi->slotinfo[slot]; 1569 for (func = 0; func < MAXFUNCS; func++) { 1570 fi = &si->si_funcs[func]; 1571 if (fi->fi_devi == NULL) 1572 continue; 1573 pci_lintr_route(fi->fi_devi); 1574 } 1575 } 1576 } 1577 lpc_pirq_routed(); 1578 1579 if ((error = init_bootorder()) != 0) { 1580 warnx("%s: Unable to init bootorder", __func__); 1581 return (error); 1582 } 1583 1584 /* 1585 * The guest physical memory map looks like the following: 1586 * [0, lowmem) guest system memory 1587 * [lowmem, 0xC0000000) memory hole (may be absent) 1588 * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation) 1589 * [0xE0000000, 0xF0000000) PCI extended config window 1590 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1591 * [4GB, 4GB + highmem) 1592 */ 1593 1594 /* 1595 * Accesses to memory addresses that are not allocated to system 1596 * memory or PCI devices return 0xff's. 1597 */ 1598 lowmem = vm_get_lowmem_size(ctx); 1599 bzero(&mr, sizeof(struct mem_range)); 1600 mr.name = "PCI hole"; 1601 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1602 mr.base = lowmem; 1603 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1604 mr.handler = pci_emul_fallback_handler; 1605 error = register_mem_fallback(&mr); 1606 assert(error == 0); 1607 1608 /* PCI extended config space */ 1609 bzero(&mr, sizeof(struct mem_range)); 1610 mr.name = "PCI ECFG"; 1611 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1612 mr.base = PCI_EMUL_ECFG_BASE; 1613 mr.size = PCI_EMUL_ECFG_SIZE; 1614 mr.handler = pci_emul_ecfg_handler; 1615 error = register_mem(&mr); 1616 assert(error == 0); 1617 1618 return (0); 1619 } 1620 1621 static void 1622 pci_apic_prt_entry(int bus __unused, int slot, int pin, int pirq_pin __unused, 1623 int ioapic_irq, void *arg __unused) 1624 { 1625 1626 dsdt_line(" Package ()"); 1627 dsdt_line(" {"); 1628 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1629 dsdt_line(" 0x%02X,", pin - 1); 1630 dsdt_line(" Zero,"); 1631 dsdt_line(" 0x%X", ioapic_irq); 1632 dsdt_line(" },"); 1633 } 1634 1635 static void 1636 pci_pirq_prt_entry(int bus __unused, int slot, int pin, int pirq_pin, 1637 int ioapic_irq __unused, void *arg __unused) 1638 { 1639 char *name; 1640 1641 name = lpc_pirq_name(pirq_pin); 1642 if (name == NULL) 1643 return; 1644 dsdt_line(" Package ()"); 1645 dsdt_line(" {"); 1646 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1647 dsdt_line(" 0x%02X,", pin - 1); 1648 dsdt_line(" %s,", name); 1649 dsdt_line(" 0x00"); 1650 dsdt_line(" },"); 1651 free(name); 1652 } 1653 1654 /* 1655 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1656 * corresponding to each PCI bus. 1657 */ 1658 static void 1659 pci_bus_write_dsdt(int bus) 1660 { 1661 struct businfo *bi; 1662 struct slotinfo *si; 1663 struct pci_devinst *pi; 1664 int count, func, slot; 1665 1666 /* 1667 * If there are no devices on this 'bus' then just return. 1668 */ 1669 if ((bi = pci_businfo[bus]) == NULL) { 1670 /* 1671 * Bus 0 is special because it decodes the I/O ports used 1672 * for PCI config space access even if there are no devices 1673 * on it. 1674 */ 1675 if (bus != 0) 1676 return; 1677 } 1678 1679 dsdt_line(" Device (PC%02X)", bus); 1680 dsdt_line(" {"); 1681 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1682 1683 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1684 dsdt_line(" {"); 1685 dsdt_line(" Return (0x%08X)", bus); 1686 dsdt_line(" }"); 1687 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1688 dsdt_line(" {"); 1689 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1690 "MaxFixed, PosDecode,"); 1691 dsdt_line(" 0x0000, // Granularity"); 1692 dsdt_line(" 0x%04X, // Range Minimum", bus); 1693 dsdt_line(" 0x%04X, // Range Maximum", bus); 1694 dsdt_line(" 0x0000, // Translation Offset"); 1695 dsdt_line(" 0x0001, // Length"); 1696 dsdt_line(" ,, )"); 1697 1698 if (bus == 0) { 1699 dsdt_indent(3); 1700 dsdt_fixed_ioport(0xCF8, 8); 1701 dsdt_unindent(3); 1702 1703 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1704 "PosDecode, EntireRange,"); 1705 dsdt_line(" 0x0000, // Granularity"); 1706 dsdt_line(" 0x0000, // Range Minimum"); 1707 dsdt_line(" 0x0CF7, // Range Maximum"); 1708 dsdt_line(" 0x0000, // Translation Offset"); 1709 dsdt_line(" 0x0CF8, // Length"); 1710 dsdt_line(" ,, , TypeStatic)"); 1711 1712 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1713 "PosDecode, EntireRange,"); 1714 dsdt_line(" 0x0000, // Granularity"); 1715 dsdt_line(" 0x0D00, // Range Minimum"); 1716 dsdt_line(" 0x%04X, // Range Maximum", 1717 PCI_EMUL_IOBASE - 1); 1718 dsdt_line(" 0x0000, // Translation Offset"); 1719 dsdt_line(" 0x%04X, // Length", 1720 PCI_EMUL_IOBASE - 0x0D00); 1721 dsdt_line(" ,, , TypeStatic)"); 1722 1723 if (bi == NULL) { 1724 dsdt_line(" })"); 1725 goto done; 1726 } 1727 } 1728 assert(bi != NULL); 1729 1730 /* i/o window */ 1731 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1732 "PosDecode, EntireRange,"); 1733 dsdt_line(" 0x0000, // Granularity"); 1734 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1735 dsdt_line(" 0x%04X, // Range Maximum", 1736 bi->iolimit - 1); 1737 dsdt_line(" 0x0000, // Translation Offset"); 1738 dsdt_line(" 0x%04X, // Length", 1739 bi->iolimit - bi->iobase); 1740 dsdt_line(" ,, , TypeStatic)"); 1741 1742 /* mmio window (32-bit) */ 1743 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1744 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1745 dsdt_line(" 0x00000000, // Granularity"); 1746 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1747 dsdt_line(" 0x%08X, // Range Maximum\n", 1748 bi->memlimit32 - 1); 1749 dsdt_line(" 0x00000000, // Translation Offset"); 1750 dsdt_line(" 0x%08X, // Length\n", 1751 bi->memlimit32 - bi->membase32); 1752 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1753 1754 /* mmio window (64-bit) */ 1755 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1756 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1757 dsdt_line(" 0x0000000000000000, // Granularity"); 1758 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1759 dsdt_line(" 0x%016lX, // Range Maximum\n", 1760 bi->memlimit64 - 1); 1761 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1762 dsdt_line(" 0x%016lX, // Length\n", 1763 bi->memlimit64 - bi->membase64); 1764 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1765 dsdt_line(" })"); 1766 1767 count = pci_count_lintr(bus); 1768 if (count != 0) { 1769 dsdt_indent(2); 1770 dsdt_line("Name (PPRT, Package ()"); 1771 dsdt_line("{"); 1772 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1773 dsdt_line("})"); 1774 dsdt_line("Name (APRT, Package ()"); 1775 dsdt_line("{"); 1776 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1777 dsdt_line("})"); 1778 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1779 dsdt_line("{"); 1780 dsdt_line(" If (PICM)"); 1781 dsdt_line(" {"); 1782 dsdt_line(" Return (APRT)"); 1783 dsdt_line(" }"); 1784 dsdt_line(" Else"); 1785 dsdt_line(" {"); 1786 dsdt_line(" Return (PPRT)"); 1787 dsdt_line(" }"); 1788 dsdt_line("}"); 1789 dsdt_unindent(2); 1790 } 1791 1792 dsdt_indent(2); 1793 for (slot = 0; slot < MAXSLOTS; slot++) { 1794 si = &bi->slotinfo[slot]; 1795 for (func = 0; func < MAXFUNCS; func++) { 1796 pi = si->si_funcs[func].fi_devi; 1797 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1798 pi->pi_d->pe_write_dsdt(pi); 1799 } 1800 } 1801 dsdt_unindent(2); 1802 done: 1803 dsdt_line(" }"); 1804 } 1805 1806 void 1807 pci_write_dsdt(void) 1808 { 1809 int bus; 1810 1811 dsdt_indent(1); 1812 dsdt_line("Name (PICM, 0x00)"); 1813 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1814 dsdt_line("{"); 1815 dsdt_line(" Store (Arg0, PICM)"); 1816 dsdt_line("}"); 1817 dsdt_line(""); 1818 dsdt_line("Scope (_SB)"); 1819 dsdt_line("{"); 1820 for (bus = 0; bus < MAXBUSES; bus++) 1821 pci_bus_write_dsdt(bus); 1822 dsdt_line("}"); 1823 dsdt_unindent(1); 1824 } 1825 1826 int 1827 pci_bus_configured(int bus) 1828 { 1829 assert(bus >= 0 && bus < MAXBUSES); 1830 return (pci_businfo[bus] != NULL); 1831 } 1832 1833 int 1834 pci_msi_enabled(struct pci_devinst *pi) 1835 { 1836 return (pi->pi_msi.enabled); 1837 } 1838 1839 int 1840 pci_msi_maxmsgnum(struct pci_devinst *pi) 1841 { 1842 if (pi->pi_msi.enabled) 1843 return (pi->pi_msi.maxmsgnum); 1844 else 1845 return (0); 1846 } 1847 1848 int 1849 pci_msix_enabled(struct pci_devinst *pi) 1850 { 1851 1852 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1853 } 1854 1855 void 1856 pci_generate_msix(struct pci_devinst *pi, int index) 1857 { 1858 struct msix_table_entry *mte; 1859 1860 if (!pci_msix_enabled(pi)) 1861 return; 1862 1863 if (pi->pi_msix.function_mask) 1864 return; 1865 1866 if (index >= pi->pi_msix.table_count) 1867 return; 1868 1869 mte = &pi->pi_msix.table[index]; 1870 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1871 /* XXX Set PBA bit if interrupt is disabled */ 1872 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1873 } 1874 } 1875 1876 void 1877 pci_generate_msi(struct pci_devinst *pi, int index) 1878 { 1879 1880 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1881 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1882 pi->pi_msi.msg_data + index); 1883 } 1884 } 1885 1886 static bool 1887 pci_lintr_permitted(struct pci_devinst *pi) 1888 { 1889 uint16_t cmd; 1890 1891 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1892 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1893 (cmd & PCIM_CMD_INTxDIS))); 1894 } 1895 1896 void 1897 pci_lintr_request(struct pci_devinst *pi) 1898 { 1899 struct businfo *bi; 1900 struct slotinfo *si; 1901 int bestpin, bestcount, pin; 1902 1903 bi = pci_businfo[pi->pi_bus]; 1904 assert(bi != NULL); 1905 1906 /* 1907 * Just allocate a pin from our slot. The pin will be 1908 * assigned IRQs later when interrupts are routed. 1909 */ 1910 si = &bi->slotinfo[pi->pi_slot]; 1911 bestpin = 0; 1912 bestcount = si->si_intpins[0].ii_count; 1913 for (pin = 1; pin < 4; pin++) { 1914 if (si->si_intpins[pin].ii_count < bestcount) { 1915 bestpin = pin; 1916 bestcount = si->si_intpins[pin].ii_count; 1917 } 1918 } 1919 1920 si->si_intpins[bestpin].ii_count++; 1921 pi->pi_lintr.pin = bestpin + 1; 1922 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1923 } 1924 1925 static void 1926 pci_lintr_route(struct pci_devinst *pi) 1927 { 1928 struct businfo *bi; 1929 struct intxinfo *ii; 1930 1931 if (pi->pi_lintr.pin == 0) 1932 return; 1933 1934 bi = pci_businfo[pi->pi_bus]; 1935 assert(bi != NULL); 1936 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1937 1938 /* 1939 * Attempt to allocate an I/O APIC pin for this intpin if one 1940 * is not yet assigned. 1941 */ 1942 if (ii->ii_ioapic_irq == 0) 1943 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1944 assert(ii->ii_ioapic_irq > 0); 1945 1946 /* 1947 * Attempt to allocate a PIRQ pin for this intpin if one is 1948 * not yet assigned. 1949 */ 1950 if (ii->ii_pirq_pin == 0) 1951 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1952 assert(ii->ii_pirq_pin > 0); 1953 1954 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1955 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1956 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1957 } 1958 1959 void 1960 pci_lintr_assert(struct pci_devinst *pi) 1961 { 1962 1963 assert(pi->pi_lintr.pin > 0); 1964 1965 pthread_mutex_lock(&pi->pi_lintr.lock); 1966 if (pi->pi_lintr.state == IDLE) { 1967 if (pci_lintr_permitted(pi)) { 1968 pi->pi_lintr.state = ASSERTED; 1969 pci_irq_assert(pi); 1970 } else 1971 pi->pi_lintr.state = PENDING; 1972 } 1973 pthread_mutex_unlock(&pi->pi_lintr.lock); 1974 } 1975 1976 void 1977 pci_lintr_deassert(struct pci_devinst *pi) 1978 { 1979 1980 assert(pi->pi_lintr.pin > 0); 1981 1982 pthread_mutex_lock(&pi->pi_lintr.lock); 1983 if (pi->pi_lintr.state == ASSERTED) { 1984 pi->pi_lintr.state = IDLE; 1985 pci_irq_deassert(pi); 1986 } else if (pi->pi_lintr.state == PENDING) 1987 pi->pi_lintr.state = IDLE; 1988 pthread_mutex_unlock(&pi->pi_lintr.lock); 1989 } 1990 1991 static void 1992 pci_lintr_update(struct pci_devinst *pi) 1993 { 1994 1995 pthread_mutex_lock(&pi->pi_lintr.lock); 1996 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1997 pci_irq_deassert(pi); 1998 pi->pi_lintr.state = PENDING; 1999 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 2000 pi->pi_lintr.state = ASSERTED; 2001 pci_irq_assert(pi); 2002 } 2003 pthread_mutex_unlock(&pi->pi_lintr.lock); 2004 #ifndef __FreeBSD__ 2005 if (pi->pi_d->pe_lintrupdate != NULL) { 2006 pi->pi_d->pe_lintrupdate(pi); 2007 } 2008 #endif /* __FreeBSD__ */ 2009 } 2010 2011 int 2012 pci_count_lintr(int bus) 2013 { 2014 int count, slot, pin; 2015 struct slotinfo *slotinfo; 2016 2017 count = 0; 2018 if (pci_businfo[bus] != NULL) { 2019 for (slot = 0; slot < MAXSLOTS; slot++) { 2020 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 2021 for (pin = 0; pin < 4; pin++) { 2022 if (slotinfo->si_intpins[pin].ii_count != 0) 2023 count++; 2024 } 2025 } 2026 } 2027 return (count); 2028 } 2029 2030 void 2031 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 2032 { 2033 struct businfo *bi; 2034 struct slotinfo *si; 2035 struct intxinfo *ii; 2036 int slot, pin; 2037 2038 if ((bi = pci_businfo[bus]) == NULL) 2039 return; 2040 2041 for (slot = 0; slot < MAXSLOTS; slot++) { 2042 si = &bi->slotinfo[slot]; 2043 for (pin = 0; pin < 4; pin++) { 2044 ii = &si->si_intpins[pin]; 2045 if (ii->ii_count != 0) 2046 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 2047 ii->ii_ioapic_irq, arg); 2048 } 2049 } 2050 } 2051 2052 /* 2053 * Return 1 if the emulated device in 'slot' is a multi-function device. 2054 * Return 0 otherwise. 2055 */ 2056 static int 2057 pci_emul_is_mfdev(int bus, int slot) 2058 { 2059 struct businfo *bi; 2060 struct slotinfo *si; 2061 int f, numfuncs; 2062 2063 numfuncs = 0; 2064 if ((bi = pci_businfo[bus]) != NULL) { 2065 si = &bi->slotinfo[slot]; 2066 for (f = 0; f < MAXFUNCS; f++) { 2067 if (si->si_funcs[f].fi_devi != NULL) { 2068 numfuncs++; 2069 } 2070 } 2071 } 2072 return (numfuncs > 1); 2073 } 2074 2075 /* 2076 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 2077 * whether or not is a multi-function being emulated in the pci 'slot'. 2078 */ 2079 static void 2080 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 2081 { 2082 int mfdev; 2083 2084 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 2085 mfdev = pci_emul_is_mfdev(bus, slot); 2086 switch (bytes) { 2087 case 1: 2088 case 2: 2089 *rv &= ~PCIM_MFDEV; 2090 if (mfdev) { 2091 *rv |= PCIM_MFDEV; 2092 } 2093 break; 2094 case 4: 2095 *rv &= ~(PCIM_MFDEV << 16); 2096 if (mfdev) { 2097 *rv |= (PCIM_MFDEV << 16); 2098 } 2099 break; 2100 } 2101 } 2102 } 2103 2104 /* 2105 * Update device state in response to changes to the PCI command 2106 * register. 2107 */ 2108 void 2109 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 2110 { 2111 int i; 2112 uint16_t changed, new; 2113 2114 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 2115 changed = old ^ new; 2116 2117 /* 2118 * If the MMIO or I/O address space decoding has changed then 2119 * register/unregister all BARs that decode that address space. 2120 */ 2121 for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) { 2122 switch (pi->pi_bar[i].type) { 2123 case PCIBAR_NONE: 2124 case PCIBAR_MEMHI64: 2125 break; 2126 case PCIBAR_IO: 2127 /* I/O address space decoding changed? */ 2128 if (changed & PCIM_CMD_PORTEN) { 2129 if (new & PCIM_CMD_PORTEN) 2130 register_bar(pi, i); 2131 else 2132 unregister_bar(pi, i); 2133 } 2134 break; 2135 case PCIBAR_ROM: 2136 /* skip (un-)register of ROM if it disabled */ 2137 if (!romen(pi)) 2138 break; 2139 /* fallthrough */ 2140 case PCIBAR_MEM32: 2141 case PCIBAR_MEM64: 2142 /* MMIO address space decoding changed? */ 2143 if (changed & PCIM_CMD_MEMEN) { 2144 if (new & PCIM_CMD_MEMEN) 2145 register_bar(pi, i); 2146 else 2147 unregister_bar(pi, i); 2148 } 2149 break; 2150 default: 2151 assert(0); 2152 } 2153 } 2154 2155 /* 2156 * If INTx has been unmasked and is pending, assert the 2157 * interrupt. 2158 */ 2159 pci_lintr_update(pi); 2160 } 2161 2162 static void 2163 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 2164 { 2165 int rshift; 2166 uint32_t cmd, old, readonly; 2167 2168 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 2169 2170 /* 2171 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 2172 * 2173 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 2174 * 'write 1 to clear'. However these bits are not set to '1' by 2175 * any device emulation so it is simpler to treat them as readonly. 2176 */ 2177 rshift = (coff & 0x3) * 8; 2178 readonly = 0xFFFFF880 >> rshift; 2179 2180 old = CFGREAD(pi, coff, bytes); 2181 new &= ~readonly; 2182 new |= (old & readonly); 2183 CFGWRITE(pi, coff, new, bytes); /* update config */ 2184 2185 pci_emul_cmd_changed(pi, cmd); 2186 } 2187 2188 static void 2189 pci_cfgrw(int in, int bus, int slot, int func, int coff, int bytes, 2190 uint32_t *valp) 2191 { 2192 struct businfo *bi; 2193 struct slotinfo *si; 2194 struct pci_devinst *pi; 2195 struct pci_devemu *pe; 2196 int idx, needcfg; 2197 uint64_t addr, mask; 2198 uint64_t bar = 0; 2199 2200 if ((bi = pci_businfo[bus]) != NULL) { 2201 si = &bi->slotinfo[slot]; 2202 pi = si->si_funcs[func].fi_devi; 2203 } else 2204 pi = NULL; 2205 2206 /* 2207 * Just return if there is no device at this slot:func or if the 2208 * the guest is doing an un-aligned access. 2209 */ 2210 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 2211 (coff & (bytes - 1)) != 0) { 2212 if (in) 2213 *valp = 0xffffffff; 2214 return; 2215 } 2216 2217 /* 2218 * Ignore all writes beyond the standard config space and return all 2219 * ones on reads. 2220 */ 2221 if (coff >= PCI_REGMAX + 1) { 2222 if (in) { 2223 *valp = 0xffffffff; 2224 /* 2225 * Extended capabilities begin at offset 256 in config 2226 * space. Absence of extended capabilities is signaled 2227 * with all 0s in the extended capability header at 2228 * offset 256. 2229 */ 2230 if (coff <= PCI_REGMAX + 4) 2231 *valp = 0x00000000; 2232 } 2233 return; 2234 } 2235 2236 pe = pi->pi_d; 2237 2238 /* 2239 * Config read 2240 */ 2241 if (in) { 2242 /* Let the device emulation override the default handler */ 2243 if (pe->pe_cfgread != NULL) { 2244 needcfg = pe->pe_cfgread(pi, coff, bytes, valp); 2245 } else { 2246 needcfg = 1; 2247 } 2248 2249 if (needcfg) 2250 *valp = CFGREAD(pi, coff, bytes); 2251 2252 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, valp); 2253 } else { 2254 /* Let the device emulation override the default handler */ 2255 if (pe->pe_cfgwrite != NULL && 2256 (*pe->pe_cfgwrite)(pi, coff, bytes, *valp) == 0) 2257 return; 2258 2259 /* 2260 * Special handling for write to BAR and ROM registers 2261 */ 2262 if (is_pcir_bar(coff) || is_pcir_bios(coff)) { 2263 /* 2264 * Ignore writes to BAR registers that are not 2265 * 4-byte aligned. 2266 */ 2267 if (bytes != 4 || (coff & 0x3) != 0) 2268 return; 2269 2270 if (is_pcir_bar(coff)) { 2271 idx = (coff - PCIR_BAR(0)) / 4; 2272 } else if (is_pcir_bios(coff)) { 2273 idx = PCI_ROM_IDX; 2274 } else { 2275 errx(4, "%s: invalid BAR offset %d", __func__, 2276 coff); 2277 } 2278 2279 mask = ~(pi->pi_bar[idx].size - 1); 2280 switch (pi->pi_bar[idx].type) { 2281 case PCIBAR_NONE: 2282 pi->pi_bar[idx].addr = bar = 0; 2283 break; 2284 case PCIBAR_IO: 2285 addr = *valp & mask; 2286 addr &= 0xffff; 2287 bar = addr | pi->pi_bar[idx].lobits; 2288 /* 2289 * Register the new BAR value for interception 2290 */ 2291 if (addr != pi->pi_bar[idx].addr) { 2292 update_bar_address(pi, addr, idx, 2293 PCIBAR_IO); 2294 } 2295 break; 2296 case PCIBAR_MEM32: 2297 addr = bar = *valp & mask; 2298 bar |= pi->pi_bar[idx].lobits; 2299 if (addr != pi->pi_bar[idx].addr) { 2300 update_bar_address(pi, addr, idx, 2301 PCIBAR_MEM32); 2302 } 2303 break; 2304 case PCIBAR_MEM64: 2305 addr = bar = *valp & mask; 2306 bar |= pi->pi_bar[idx].lobits; 2307 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 2308 update_bar_address(pi, addr, idx, 2309 PCIBAR_MEM64); 2310 } 2311 break; 2312 case PCIBAR_MEMHI64: 2313 mask = ~(pi->pi_bar[idx - 1].size - 1); 2314 addr = ((uint64_t)*valp << 32) & mask; 2315 bar = addr >> 32; 2316 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 2317 update_bar_address(pi, addr, idx - 1, 2318 PCIBAR_MEMHI64); 2319 } 2320 break; 2321 case PCIBAR_ROM: 2322 addr = bar = *valp & mask; 2323 if (memen(pi) && romen(pi)) { 2324 unregister_bar(pi, idx); 2325 } 2326 pi->pi_bar[idx].addr = addr; 2327 pi->pi_bar[idx].lobits = *valp & 2328 PCIM_BIOS_ENABLE; 2329 /* romen could have changed it value */ 2330 if (memen(pi) && romen(pi)) { 2331 register_bar(pi, idx); 2332 } 2333 bar |= pi->pi_bar[idx].lobits; 2334 break; 2335 default: 2336 assert(0); 2337 } 2338 pci_set_cfgdata32(pi, coff, bar); 2339 2340 } else if (pci_emul_iscap(pi, coff)) { 2341 pci_emul_capwrite(pi, coff, bytes, *valp, 0, 0); 2342 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 2343 pci_emul_cmdsts_write(pi, coff, *valp, bytes); 2344 } else { 2345 CFGWRITE(pi, coff, *valp, bytes); 2346 } 2347 } 2348 } 2349 2350 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 2351 2352 static int 2353 pci_emul_cfgaddr(struct vmctx *ctx __unused, int in, 2354 int port __unused, int bytes, uint32_t *eax, void *arg __unused) 2355 { 2356 uint32_t x; 2357 2358 if (bytes != 4) { 2359 if (in) 2360 *eax = (bytes == 2) ? 0xffff : 0xff; 2361 return (0); 2362 } 2363 2364 if (in) { 2365 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 2366 if (cfgenable) 2367 x |= CONF1_ENABLE; 2368 *eax = x; 2369 } else { 2370 x = *eax; 2371 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 2372 cfgoff = (x & PCI_REGMAX) & ~0x03; 2373 cfgfunc = (x >> 8) & PCI_FUNCMAX; 2374 cfgslot = (x >> 11) & PCI_SLOTMAX; 2375 cfgbus = (x >> 16) & PCI_BUSMAX; 2376 } 2377 2378 return (0); 2379 } 2380 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 2381 2382 static int 2383 pci_emul_cfgdata(struct vmctx *ctx __unused, int in, int port, 2384 int bytes, uint32_t *eax, void *arg __unused) 2385 { 2386 int coff; 2387 2388 assert(bytes == 1 || bytes == 2 || bytes == 4); 2389 2390 coff = cfgoff + (port - CONF1_DATA_PORT); 2391 if (cfgenable) { 2392 pci_cfgrw(in, cfgbus, cfgslot, cfgfunc, coff, bytes, eax); 2393 } else { 2394 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 2395 if (in) 2396 *eax = 0xffffffff; 2397 } 2398 return (0); 2399 } 2400 2401 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 2402 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 2403 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 2404 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 2405 2406 #define PCI_EMUL_TEST 2407 #ifdef PCI_EMUL_TEST 2408 /* 2409 * Define a dummy test device 2410 */ 2411 #define DIOSZ 8 2412 #define DMEMSZ 4096 2413 struct pci_emul_dsoftc { 2414 uint8_t ioregs[DIOSZ]; 2415 uint8_t memregs[2][DMEMSZ]; 2416 }; 2417 2418 #define PCI_EMUL_MSI_MSGS 4 2419 #define PCI_EMUL_MSIX_MSGS 16 2420 2421 static int 2422 pci_emul_dinit(struct pci_devinst *pi, nvlist_t *nvl __unused) 2423 { 2424 int error; 2425 struct pci_emul_dsoftc *sc; 2426 2427 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2428 2429 pi->pi_arg = sc; 2430 2431 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2432 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2433 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2434 2435 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2436 assert(error == 0); 2437 2438 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2439 assert(error == 0); 2440 2441 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2442 assert(error == 0); 2443 2444 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2445 assert(error == 0); 2446 2447 return (0); 2448 } 2449 2450 static void 2451 pci_emul_diow(struct pci_devinst *pi, int baridx, uint64_t offset, int size, 2452 uint64_t value) 2453 { 2454 int i; 2455 struct pci_emul_dsoftc *sc = pi->pi_arg; 2456 2457 if (baridx == 0) { 2458 if (offset + size > DIOSZ) { 2459 printf("diow: iow too large, offset %ld size %d\n", 2460 offset, size); 2461 return; 2462 } 2463 2464 if (size == 1) { 2465 sc->ioregs[offset] = value & 0xff; 2466 } else if (size == 2) { 2467 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2468 } else if (size == 4) { 2469 *(uint32_t *)&sc->ioregs[offset] = value; 2470 } else { 2471 printf("diow: iow unknown size %d\n", size); 2472 } 2473 2474 /* 2475 * Special magic value to generate an interrupt 2476 */ 2477 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2478 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2479 2480 if (value == 0xabcdef) { 2481 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2482 pci_generate_msi(pi, i); 2483 } 2484 } 2485 2486 if (baridx == 1 || baridx == 2) { 2487 if (offset + size > DMEMSZ) { 2488 printf("diow: memw too large, offset %ld size %d\n", 2489 offset, size); 2490 return; 2491 } 2492 2493 i = baridx - 1; /* 'memregs' index */ 2494 2495 if (size == 1) { 2496 sc->memregs[i][offset] = value; 2497 } else if (size == 2) { 2498 *(uint16_t *)&sc->memregs[i][offset] = value; 2499 } else if (size == 4) { 2500 *(uint32_t *)&sc->memregs[i][offset] = value; 2501 } else if (size == 8) { 2502 *(uint64_t *)&sc->memregs[i][offset] = value; 2503 } else { 2504 printf("diow: memw unknown size %d\n", size); 2505 } 2506 2507 /* 2508 * magic interrupt ?? 2509 */ 2510 } 2511 2512 if (baridx > 2 || baridx < 0) { 2513 printf("diow: unknown bar idx %d\n", baridx); 2514 } 2515 } 2516 2517 static uint64_t 2518 pci_emul_dior(struct pci_devinst *pi, int baridx, uint64_t offset, int size) 2519 { 2520 struct pci_emul_dsoftc *sc = pi->pi_arg; 2521 uint32_t value; 2522 int i; 2523 2524 value = 0; 2525 if (baridx == 0) { 2526 if (offset + size > DIOSZ) { 2527 printf("dior: ior too large, offset %ld size %d\n", 2528 offset, size); 2529 return (0); 2530 } 2531 2532 value = 0; 2533 if (size == 1) { 2534 value = sc->ioregs[offset]; 2535 } else if (size == 2) { 2536 value = *(uint16_t *) &sc->ioregs[offset]; 2537 } else if (size == 4) { 2538 value = *(uint32_t *) &sc->ioregs[offset]; 2539 } else { 2540 printf("dior: ior unknown size %d\n", size); 2541 } 2542 } 2543 2544 if (baridx == 1 || baridx == 2) { 2545 if (offset + size > DMEMSZ) { 2546 printf("dior: memr too large, offset %ld size %d\n", 2547 offset, size); 2548 return (0); 2549 } 2550 2551 i = baridx - 1; /* 'memregs' index */ 2552 2553 if (size == 1) { 2554 value = sc->memregs[i][offset]; 2555 } else if (size == 2) { 2556 value = *(uint16_t *) &sc->memregs[i][offset]; 2557 } else if (size == 4) { 2558 value = *(uint32_t *) &sc->memregs[i][offset]; 2559 } else if (size == 8) { 2560 value = *(uint64_t *) &sc->memregs[i][offset]; 2561 } else { 2562 printf("dior: ior unknown size %d\n", size); 2563 } 2564 } 2565 2566 2567 if (baridx > 2 || baridx < 0) { 2568 printf("dior: unknown bar idx %d\n", baridx); 2569 return (0); 2570 } 2571 2572 return (value); 2573 } 2574 2575 static const struct pci_devemu pci_dummy = { 2576 .pe_emu = "dummy", 2577 .pe_init = pci_emul_dinit, 2578 .pe_barwrite = pci_emul_diow, 2579 .pe_barread = pci_emul_dior, 2580 }; 2581 PCI_EMUL_SET(pci_dummy); 2582 2583 #endif /* PCI_EMUL_TEST */ 2584