1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 /* 31 * This file and its contents are supplied under the terms of the 32 * Common Development and Distribution License ("CDDL"), version 1.0. 33 * You may only use this file in accordance with the terms of version 34 * 1.0 of the CDDL. 35 * 36 * A full copy of the text of the CDDL should have accompanied this 37 * source. A copy of the CDDL is also available via the Internet at 38 * http://www.illumos.org/license/CDDL. 39 * 40 * Copyright 2014 Pluribus Networks Inc. 41 * Copyright 2018 Joyent, Inc. 42 */ 43 44 #include <sys/cdefs.h> 45 __FBSDID("$FreeBSD$"); 46 47 #include <sys/param.h> 48 #include <sys/linker_set.h> 49 #include <sys/mman.h> 50 51 #include <ctype.h> 52 #include <err.h> 53 #include <errno.h> 54 #include <pthread.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <strings.h> 59 #include <assert.h> 60 #include <stdbool.h> 61 #include <sysexits.h> 62 63 #include <machine/vmm.h> 64 #include <vmmapi.h> 65 66 #include "acpi.h" 67 #include "bhyverun.h" 68 #include "config.h" 69 #include "debug.h" 70 #include "inout.h" 71 #include "ioapic.h" 72 #include "mem.h" 73 #include "pci_emul.h" 74 #include "pci_irq.h" 75 #include "pci_lpc.h" 76 77 #define CONF1_ADDR_PORT 0x0cf8 78 #define CONF1_DATA_PORT 0x0cfc 79 80 #define CONF1_ENABLE 0x80000000ul 81 82 #define MAXBUSES (PCI_BUSMAX + 1) 83 #define MAXSLOTS (PCI_SLOTMAX + 1) 84 #define MAXFUNCS (PCI_FUNCMAX + 1) 85 86 #define GB (1024 * 1024 * 1024UL) 87 88 struct funcinfo { 89 nvlist_t *fi_config; 90 struct pci_devemu *fi_pde; 91 struct pci_devinst *fi_devi; 92 }; 93 94 struct intxinfo { 95 int ii_count; 96 int ii_pirq_pin; 97 int ii_ioapic_irq; 98 }; 99 100 struct slotinfo { 101 struct intxinfo si_intpins[4]; 102 struct funcinfo si_funcs[MAXFUNCS]; 103 }; 104 105 struct businfo { 106 uint16_t iobase, iolimit; /* I/O window */ 107 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 108 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 109 struct slotinfo slotinfo[MAXSLOTS]; 110 }; 111 112 static struct businfo *pci_businfo[MAXBUSES]; 113 114 SET_DECLARE(pci_devemu_set, struct pci_devemu); 115 116 static uint64_t pci_emul_iobase; 117 static uint8_t *pci_emul_rombase; 118 static uint64_t pci_emul_romoffset; 119 static uint8_t *pci_emul_romlim; 120 static uint64_t pci_emul_membase32; 121 static uint64_t pci_emul_membase64; 122 static uint64_t pci_emul_memlim64; 123 124 struct pci_bar_allocation { 125 TAILQ_ENTRY(pci_bar_allocation) chain; 126 struct pci_devinst *pdi; 127 int idx; 128 enum pcibar_type type; 129 uint64_t size; 130 }; 131 132 static TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = 133 TAILQ_HEAD_INITIALIZER(pci_bars); 134 135 #define PCI_EMUL_IOBASE 0x2000 136 #define PCI_EMUL_IOLIMIT 0x10000 137 138 #define PCI_EMUL_ROMSIZE 0x10000000 139 140 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 141 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 142 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 143 144 /* 145 * OVMF always uses 0xC0000000 as base address for 32 bit PCI MMIO. Don't 146 * change this address without changing it in OVMF. 147 */ 148 #define PCI_EMUL_MEMBASE32 0xC0000000 149 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 150 #define PCI_EMUL_MEMSIZE64 (32*GB) 151 152 static struct pci_devemu *pci_emul_finddev(const char *name); 153 static void pci_lintr_route(struct pci_devinst *pi); 154 static void pci_lintr_update(struct pci_devinst *pi); 155 static void pci_cfgrw(struct vmctx *ctx, int in, int bus, int slot, 156 int func, int coff, int bytes, uint32_t *val); 157 158 static __inline void 159 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 160 { 161 162 if (bytes == 1) 163 pci_set_cfgdata8(pi, coff, val); 164 else if (bytes == 2) 165 pci_set_cfgdata16(pi, coff, val); 166 else 167 pci_set_cfgdata32(pi, coff, val); 168 } 169 170 static __inline uint32_t 171 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 172 { 173 174 if (bytes == 1) 175 return (pci_get_cfgdata8(pi, coff)); 176 else if (bytes == 2) 177 return (pci_get_cfgdata16(pi, coff)); 178 else 179 return (pci_get_cfgdata32(pi, coff)); 180 } 181 182 static int 183 is_pcir_bar(int coff) 184 { 185 return (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)); 186 } 187 188 static int 189 is_pcir_bios(int coff) 190 { 191 return (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4); 192 } 193 194 /* 195 * I/O access 196 */ 197 198 /* 199 * Slot options are in the form: 200 * 201 * <bus>:<slot>:<func>,<emul>[,<config>] 202 * <slot>[:<func>],<emul>[,<config>] 203 * 204 * slot is 0..31 205 * func is 0..7 206 * emul is a string describing the type of PCI device e.g. virtio-net 207 * config is an optional string, depending on the device, that can be 208 * used for configuration. 209 * Examples are: 210 * 1,virtio-net,tap0 211 * 3:0,dummy 212 */ 213 static void 214 pci_parse_slot_usage(char *aopt) 215 { 216 217 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 218 } 219 220 /* 221 * Helper function to parse a list of comma-separated options where 222 * each option is formatted as "name[=value]". If no value is 223 * provided, the option is treated as a boolean and is given a value 224 * of true. 225 */ 226 int 227 pci_parse_legacy_config(nvlist_t *nvl, const char *opt) 228 { 229 char *config, *name, *tofree, *value; 230 231 if (opt == NULL) 232 return (0); 233 234 config = tofree = strdup(opt); 235 while ((name = strsep(&config, ",")) != NULL) { 236 value = strchr(name, '='); 237 if (value != NULL) { 238 *value = '\0'; 239 value++; 240 set_config_value_node(nvl, name, value); 241 } else 242 set_config_bool_node(nvl, name, true); 243 } 244 free(tofree); 245 return (0); 246 } 247 248 /* 249 * PCI device configuration is stored in MIBs that encode the device's 250 * location: 251 * 252 * pci.<bus>.<slot>.<func> 253 * 254 * Where "bus", "slot", and "func" are all decimal values without 255 * leading zeroes. Each valid device must have a "device" node which 256 * identifies the driver model of the device. 257 * 258 * Device backends can provide a parser for the "config" string. If 259 * a custom parser is not provided, pci_parse_legacy_config() is used 260 * to parse the string. 261 */ 262 int 263 pci_parse_slot(char *opt) 264 { 265 char node_name[sizeof("pci.XXX.XX.X")]; 266 struct pci_devemu *pde; 267 char *emul, *config, *str, *cp; 268 int error, bnum, snum, fnum; 269 nvlist_t *nvl; 270 271 error = -1; 272 str = strdup(opt); 273 274 emul = config = NULL; 275 if ((cp = strchr(str, ',')) != NULL) { 276 *cp = '\0'; 277 emul = cp + 1; 278 if ((cp = strchr(emul, ',')) != NULL) { 279 *cp = '\0'; 280 config = cp + 1; 281 } 282 } else { 283 pci_parse_slot_usage(opt); 284 goto done; 285 } 286 287 /* <bus>:<slot>:<func> */ 288 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 289 bnum = 0; 290 /* <slot>:<func> */ 291 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 292 fnum = 0; 293 /* <slot> */ 294 if (sscanf(str, "%d", &snum) != 1) { 295 snum = -1; 296 } 297 } 298 } 299 300 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 301 fnum < 0 || fnum >= MAXFUNCS) { 302 pci_parse_slot_usage(opt); 303 goto done; 304 } 305 306 pde = pci_emul_finddev(emul); 307 if (pde == NULL) { 308 EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum, 309 fnum, emul); 310 goto done; 311 } 312 313 snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum, 314 fnum); 315 nvl = find_config_node(node_name); 316 if (nvl != NULL) { 317 EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum, 318 fnum); 319 goto done; 320 } 321 nvl = create_config_node(node_name); 322 if (pde->pe_alias != NULL) 323 set_config_value_node(nvl, "device", pde->pe_alias); 324 else 325 set_config_value_node(nvl, "device", pde->pe_emu); 326 327 if (pde->pe_legacy_config != NULL) 328 error = pde->pe_legacy_config(nvl, config); 329 else 330 error = pci_parse_legacy_config(nvl, config); 331 done: 332 free(str); 333 return (error); 334 } 335 336 void 337 pci_print_supported_devices(void) 338 { 339 struct pci_devemu **pdpp, *pdp; 340 341 SET_FOREACH(pdpp, pci_devemu_set) { 342 pdp = *pdpp; 343 printf("%s\n", pdp->pe_emu); 344 } 345 } 346 347 static int 348 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 349 { 350 351 if (offset < pi->pi_msix.pba_offset) 352 return (0); 353 354 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 355 return (0); 356 } 357 358 return (1); 359 } 360 361 int 362 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 363 uint64_t value) 364 { 365 int msix_entry_offset; 366 int tab_index; 367 char *dest; 368 369 /* support only 4 or 8 byte writes */ 370 if (size != 4 && size != 8) 371 return (-1); 372 373 /* 374 * Return if table index is beyond what device supports 375 */ 376 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 377 if (tab_index >= pi->pi_msix.table_count) 378 return (-1); 379 380 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 381 382 /* support only aligned writes */ 383 if ((msix_entry_offset % size) != 0) 384 return (-1); 385 386 dest = (char *)(pi->pi_msix.table + tab_index); 387 dest += msix_entry_offset; 388 389 if (size == 4) 390 *((uint32_t *)dest) = value; 391 else 392 *((uint64_t *)dest) = value; 393 394 return (0); 395 } 396 397 uint64_t 398 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 399 { 400 char *dest; 401 int msix_entry_offset; 402 int tab_index; 403 uint64_t retval = ~0; 404 405 /* 406 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 407 * table but we also allow 1 byte access to accommodate reads from 408 * ddb. 409 */ 410 if (size != 1 && size != 4 && size != 8) 411 return (retval); 412 413 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 414 415 /* support only aligned reads */ 416 if ((msix_entry_offset % size) != 0) { 417 return (retval); 418 } 419 420 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 421 422 if (tab_index < pi->pi_msix.table_count) { 423 /* valid MSI-X Table access */ 424 dest = (char *)(pi->pi_msix.table + tab_index); 425 dest += msix_entry_offset; 426 427 if (size == 1) 428 retval = *((uint8_t *)dest); 429 else if (size == 4) 430 retval = *((uint32_t *)dest); 431 else 432 retval = *((uint64_t *)dest); 433 } else if (pci_valid_pba_offset(pi, offset)) { 434 /* return 0 for PBA access */ 435 retval = 0; 436 } 437 438 return (retval); 439 } 440 441 int 442 pci_msix_table_bar(struct pci_devinst *pi) 443 { 444 445 if (pi->pi_msix.table != NULL) 446 return (pi->pi_msix.table_bar); 447 else 448 return (-1); 449 } 450 451 int 452 pci_msix_pba_bar(struct pci_devinst *pi) 453 { 454 455 if (pi->pi_msix.table != NULL) 456 return (pi->pi_msix.pba_bar); 457 else 458 return (-1); 459 } 460 461 static int 462 pci_emul_io_handler(struct vmctx *ctx, int in, int port, 463 int bytes, uint32_t *eax, void *arg) 464 { 465 struct pci_devinst *pdi = arg; 466 struct pci_devemu *pe = pdi->pi_d; 467 uint64_t offset; 468 int i; 469 470 assert(port >= 0); 471 472 for (i = 0; i <= PCI_BARMAX; i++) { 473 if (pdi->pi_bar[i].type == PCIBAR_IO && 474 (uint64_t)port >= pdi->pi_bar[i].addr && 475 (uint64_t)port + bytes <= 476 pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 477 offset = port - pdi->pi_bar[i].addr; 478 if (in) 479 *eax = (*pe->pe_barread)(ctx, pdi, i, 480 offset, bytes); 481 else 482 (*pe->pe_barwrite)(ctx, pdi, i, offset, 483 bytes, *eax); 484 return (0); 485 } 486 } 487 return (-1); 488 } 489 490 static int 491 pci_emul_mem_handler(struct vmctx *ctx, int vcpu __unused, int dir, 492 uint64_t addr, int size, uint64_t *val, void *arg1, long arg2) 493 { 494 struct pci_devinst *pdi = arg1; 495 struct pci_devemu *pe = pdi->pi_d; 496 uint64_t offset; 497 int bidx = (int) arg2; 498 499 assert(bidx <= PCI_BARMAX); 500 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 501 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 502 assert(addr >= pdi->pi_bar[bidx].addr && 503 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 504 505 offset = addr - pdi->pi_bar[bidx].addr; 506 507 if (dir == MEM_F_WRITE) { 508 if (size == 8) { 509 (*pe->pe_barwrite)(ctx, pdi, bidx, offset, 510 4, *val & 0xffffffff); 511 (*pe->pe_barwrite)(ctx, pdi, bidx, offset + 4, 512 4, *val >> 32); 513 } else { 514 (*pe->pe_barwrite)(ctx, pdi, bidx, offset, 515 size, *val); 516 } 517 } else { 518 if (size == 8) { 519 *val = (*pe->pe_barread)(ctx, pdi, bidx, 520 offset, 4); 521 *val |= (*pe->pe_barread)(ctx, pdi, bidx, 522 offset + 4, 4) << 32; 523 } else { 524 *val = (*pe->pe_barread)(ctx, pdi, bidx, 525 offset, size); 526 } 527 } 528 529 return (0); 530 } 531 532 533 static int 534 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 535 uint64_t *addr) 536 { 537 uint64_t base; 538 539 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 540 541 base = roundup2(*baseptr, size); 542 543 if (base + size <= limit) { 544 *addr = base; 545 *baseptr = base + size; 546 return (0); 547 } else 548 return (-1); 549 } 550 551 /* 552 * Register (or unregister) the MMIO or I/O region associated with the BAR 553 * register 'idx' of an emulated pci device. 554 */ 555 static void 556 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 557 { 558 struct pci_devemu *pe; 559 int error; 560 struct inout_port iop; 561 struct mem_range mr; 562 563 pe = pi->pi_d; 564 switch (pi->pi_bar[idx].type) { 565 case PCIBAR_IO: 566 bzero(&iop, sizeof(struct inout_port)); 567 iop.name = pi->pi_name; 568 iop.port = pi->pi_bar[idx].addr; 569 iop.size = pi->pi_bar[idx].size; 570 if (registration) { 571 iop.flags = IOPORT_F_INOUT; 572 iop.handler = pci_emul_io_handler; 573 iop.arg = pi; 574 error = register_inout(&iop); 575 } else 576 error = unregister_inout(&iop); 577 if (pe->pe_baraddr != NULL) 578 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 579 pi->pi_bar[idx].addr); 580 break; 581 case PCIBAR_MEM32: 582 case PCIBAR_MEM64: 583 bzero(&mr, sizeof(struct mem_range)); 584 mr.name = pi->pi_name; 585 mr.base = pi->pi_bar[idx].addr; 586 mr.size = pi->pi_bar[idx].size; 587 if (registration) { 588 mr.flags = MEM_F_RW; 589 mr.handler = pci_emul_mem_handler; 590 mr.arg1 = pi; 591 mr.arg2 = idx; 592 error = register_mem(&mr); 593 } else 594 error = unregister_mem(&mr); 595 if (pe->pe_baraddr != NULL) 596 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 597 pi->pi_bar[idx].addr); 598 break; 599 case PCIBAR_ROM: 600 error = 0; 601 if (pe->pe_baraddr != NULL) 602 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 603 pi->pi_bar[idx].addr); 604 break; 605 default: 606 error = EINVAL; 607 break; 608 } 609 assert(error == 0); 610 } 611 612 static void 613 unregister_bar(struct pci_devinst *pi, int idx) 614 { 615 616 modify_bar_registration(pi, idx, 0); 617 } 618 619 static void 620 register_bar(struct pci_devinst *pi, int idx) 621 { 622 623 modify_bar_registration(pi, idx, 1); 624 } 625 626 /* Is the ROM enabled for the emulated pci device? */ 627 static int 628 romen(struct pci_devinst *pi) 629 { 630 return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) == 631 PCIM_BIOS_ENABLE; 632 } 633 634 /* Are we decoding i/o port accesses for the emulated pci device? */ 635 static int 636 porten(struct pci_devinst *pi) 637 { 638 uint16_t cmd; 639 640 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 641 642 return (cmd & PCIM_CMD_PORTEN); 643 } 644 645 /* Are we decoding memory accesses for the emulated pci device? */ 646 static int 647 memen(struct pci_devinst *pi) 648 { 649 uint16_t cmd; 650 651 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 652 653 return (cmd & PCIM_CMD_MEMEN); 654 } 655 656 /* 657 * Update the MMIO or I/O address that is decoded by the BAR register. 658 * 659 * If the pci device has enabled the address space decoding then intercept 660 * the address range decoded by the BAR register. 661 */ 662 static void 663 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 664 { 665 int decode; 666 667 if (pi->pi_bar[idx].type == PCIBAR_IO) 668 decode = porten(pi); 669 else 670 decode = memen(pi); 671 672 if (decode) 673 unregister_bar(pi, idx); 674 675 switch (type) { 676 case PCIBAR_IO: 677 case PCIBAR_MEM32: 678 pi->pi_bar[idx].addr = addr; 679 break; 680 case PCIBAR_MEM64: 681 pi->pi_bar[idx].addr &= ~0xffffffffUL; 682 pi->pi_bar[idx].addr |= addr; 683 break; 684 case PCIBAR_MEMHI64: 685 pi->pi_bar[idx].addr &= 0xffffffff; 686 pi->pi_bar[idx].addr |= addr; 687 break; 688 default: 689 assert(0); 690 } 691 692 if (decode) 693 register_bar(pi, idx); 694 } 695 696 int 697 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 698 uint64_t size) 699 { 700 assert((type == PCIBAR_ROM) || (idx >= 0 && idx <= PCI_BARMAX)); 701 assert((type != PCIBAR_ROM) || (idx == PCI_ROM_IDX)); 702 703 if ((size & (size - 1)) != 0) 704 size = 1UL << flsl(size); /* round up to a power of 2 */ 705 706 /* Enforce minimum BAR sizes required by the PCI standard */ 707 if (type == PCIBAR_IO) { 708 if (size < 4) 709 size = 4; 710 } else if (type == PCIBAR_ROM) { 711 if (size < ~PCIM_BIOS_ADDR_MASK + 1) 712 size = ~PCIM_BIOS_ADDR_MASK + 1; 713 } else { 714 if (size < 16) 715 size = 16; 716 } 717 718 /* 719 * To reduce fragmentation of the MMIO space, we allocate the BARs by 720 * size. Therefore, don't allocate the BAR yet. We create a list of all 721 * BAR allocation which is sorted by BAR size. When all PCI devices are 722 * initialized, we will assign an address to the BARs. 723 */ 724 725 /* create a new list entry */ 726 struct pci_bar_allocation *const new_bar = malloc(sizeof(*new_bar)); 727 memset(new_bar, 0, sizeof(*new_bar)); 728 new_bar->pdi = pdi; 729 new_bar->idx = idx; 730 new_bar->type = type; 731 new_bar->size = size; 732 733 /* 734 * Search for a BAR which size is lower than the size of our newly 735 * allocated BAR. 736 */ 737 struct pci_bar_allocation *bar = NULL; 738 TAILQ_FOREACH(bar, &pci_bars, chain) { 739 if (bar->size < size) { 740 break; 741 } 742 } 743 744 if (bar == NULL) { 745 /* 746 * Either the list is empty or new BAR is the smallest BAR of 747 * the list. Append it to the end of our list. 748 */ 749 TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain); 750 } else { 751 /* 752 * The found BAR is smaller than our new BAR. For that reason, 753 * insert our new BAR before the found BAR. 754 */ 755 TAILQ_INSERT_BEFORE(bar, new_bar, chain); 756 } 757 758 /* 759 * pci_passthru devices synchronize their physical and virtual command 760 * register on init. For that reason, the virtual cmd reg should be 761 * updated as early as possible. 762 */ 763 uint16_t enbit = 0; 764 switch (type) { 765 case PCIBAR_IO: 766 enbit = PCIM_CMD_PORTEN; 767 break; 768 case PCIBAR_MEM64: 769 case PCIBAR_MEM32: 770 enbit = PCIM_CMD_MEMEN; 771 break; 772 default: 773 enbit = 0; 774 break; 775 } 776 777 const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 778 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 779 780 return (0); 781 } 782 783 static int 784 pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx, 785 const enum pcibar_type type, const uint64_t size) 786 { 787 int error; 788 uint64_t *baseptr, limit, addr, mask, lobits, bar; 789 790 switch (type) { 791 case PCIBAR_NONE: 792 baseptr = NULL; 793 addr = mask = lobits = 0; 794 break; 795 case PCIBAR_IO: 796 baseptr = &pci_emul_iobase; 797 limit = PCI_EMUL_IOLIMIT; 798 mask = PCIM_BAR_IO_BASE; 799 lobits = PCIM_BAR_IO_SPACE; 800 break; 801 case PCIBAR_MEM64: 802 /* 803 * XXX 804 * Some drivers do not work well if the 64-bit BAR is allocated 805 * above 4GB. Allow for this by allocating small requests under 806 * 4GB unless then allocation size is larger than some arbitrary 807 * number (128MB currently). 808 */ 809 if (size > 128 * 1024 * 1024) { 810 baseptr = &pci_emul_membase64; 811 limit = pci_emul_memlim64; 812 mask = PCIM_BAR_MEM_BASE; 813 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 814 PCIM_BAR_MEM_PREFETCH; 815 } else { 816 baseptr = &pci_emul_membase32; 817 limit = PCI_EMUL_MEMLIMIT32; 818 mask = PCIM_BAR_MEM_BASE; 819 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 820 } 821 break; 822 case PCIBAR_MEM32: 823 baseptr = &pci_emul_membase32; 824 limit = PCI_EMUL_MEMLIMIT32; 825 mask = PCIM_BAR_MEM_BASE; 826 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 827 break; 828 case PCIBAR_ROM: 829 /* do not claim memory for ROM. OVMF will do it for us. */ 830 baseptr = NULL; 831 limit = 0; 832 mask = PCIM_BIOS_ADDR_MASK; 833 lobits = 0; 834 break; 835 default: 836 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 837 #ifdef FreeBSD 838 assert(0); 839 #else 840 abort(); 841 #endif 842 } 843 844 if (baseptr != NULL) { 845 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 846 if (error != 0) 847 return (error); 848 } else { 849 addr = 0; 850 } 851 852 pdi->pi_bar[idx].type = type; 853 pdi->pi_bar[idx].addr = addr; 854 pdi->pi_bar[idx].size = size; 855 /* 856 * passthru devices are using same lobits as physical device they set 857 * this property 858 */ 859 if (pdi->pi_bar[idx].lobits != 0) { 860 lobits = pdi->pi_bar[idx].lobits; 861 } else { 862 pdi->pi_bar[idx].lobits = lobits; 863 } 864 865 /* Initialize the BAR register in config space */ 866 bar = (addr & mask) | lobits; 867 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 868 869 if (type == PCIBAR_MEM64) { 870 assert(idx + 1 <= PCI_BARMAX); 871 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 872 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 873 } 874 875 if (type != PCIBAR_ROM) { 876 register_bar(pdi, idx); 877 } 878 879 return (0); 880 } 881 882 int 883 pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size, 884 void **const addr) 885 { 886 /* allocate ROM space once on first call */ 887 if (pci_emul_rombase == 0) { 888 pci_emul_rombase = vm_create_devmem(pdi->pi_vmctx, VM_PCIROM, 889 "pcirom", PCI_EMUL_ROMSIZE); 890 if (pci_emul_rombase == MAP_FAILED) { 891 warnx("%s: failed to create rom segment", __func__); 892 return (-1); 893 } 894 pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE; 895 pci_emul_romoffset = 0; 896 } 897 898 /* ROM size should be a power of 2 and greater than 2 KB */ 899 const uint64_t rom_size = MAX(1UL << flsl(size), 900 ~PCIM_BIOS_ADDR_MASK + 1); 901 902 /* check if ROM fits into ROM space */ 903 if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE) { 904 warnx("%s: no space left in rom segment:", __func__); 905 warnx("%16lu bytes left", 906 PCI_EMUL_ROMSIZE - pci_emul_romoffset); 907 warnx("%16lu bytes required by %d/%d/%d", rom_size, pdi->pi_bus, 908 pdi->pi_slot, pdi->pi_func); 909 return (-1); 910 } 911 912 /* allocate ROM BAR */ 913 const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM, 914 rom_size); 915 if (error) 916 return error; 917 918 /* return address */ 919 *addr = pci_emul_rombase + pci_emul_romoffset; 920 921 /* save offset into ROM Space */ 922 pdi->pi_romoffset = pci_emul_romoffset; 923 924 /* increase offset for next ROM */ 925 pci_emul_romoffset += rom_size; 926 927 return (0); 928 } 929 930 #define CAP_START_OFFSET 0x40 931 static int 932 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 933 { 934 int i, capoff, reallen; 935 uint16_t sts; 936 937 assert(caplen > 0); 938 939 reallen = roundup2(caplen, 4); /* dword aligned */ 940 941 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 942 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 943 capoff = CAP_START_OFFSET; 944 else 945 capoff = pi->pi_capend + 1; 946 947 /* Check if we have enough space */ 948 if (capoff + reallen > PCI_REGMAX + 1) 949 return (-1); 950 951 /* Set the previous capability pointer */ 952 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 953 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 954 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 955 } else 956 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 957 958 /* Copy the capability */ 959 for (i = 0; i < caplen; i++) 960 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 961 962 /* Set the next capability pointer */ 963 pci_set_cfgdata8(pi, capoff + 1, 0); 964 965 pi->pi_prevcap = capoff; 966 pi->pi_capend = capoff + reallen - 1; 967 return (0); 968 } 969 970 static struct pci_devemu * 971 pci_emul_finddev(const char *name) 972 { 973 struct pci_devemu **pdpp, *pdp; 974 975 SET_FOREACH(pdpp, pci_devemu_set) { 976 pdp = *pdpp; 977 if (!strcmp(pdp->pe_emu, name)) { 978 return (pdp); 979 } 980 } 981 982 return (NULL); 983 } 984 985 static int 986 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 987 int func, struct funcinfo *fi) 988 { 989 struct pci_devinst *pdi; 990 int err; 991 992 pdi = calloc(1, sizeof(struct pci_devinst)); 993 994 pdi->pi_vmctx = ctx; 995 pdi->pi_bus = bus; 996 pdi->pi_slot = slot; 997 pdi->pi_func = func; 998 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 999 pdi->pi_lintr.pin = 0; 1000 pdi->pi_lintr.state = IDLE; 1001 pdi->pi_lintr.pirq_pin = 0; 1002 pdi->pi_lintr.ioapic_irq = 0; 1003 pdi->pi_d = pde; 1004 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 1005 1006 /* Disable legacy interrupts */ 1007 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 1008 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 1009 1010 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 1011 1012 err = (*pde->pe_init)(ctx, pdi, fi->fi_config); 1013 if (err == 0) 1014 fi->fi_devi = pdi; 1015 else 1016 free(pdi); 1017 1018 return (err); 1019 } 1020 1021 void 1022 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 1023 { 1024 int mmc; 1025 1026 /* Number of msi messages must be a power of 2 between 1 and 32 */ 1027 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 1028 mmc = ffs(msgnum) - 1; 1029 1030 bzero(msicap, sizeof(struct msicap)); 1031 msicap->capid = PCIY_MSI; 1032 msicap->nextptr = nextptr; 1033 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 1034 } 1035 1036 int 1037 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 1038 { 1039 struct msicap msicap; 1040 1041 pci_populate_msicap(&msicap, msgnum, 0); 1042 1043 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 1044 } 1045 1046 static void 1047 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 1048 uint32_t msix_tab_size) 1049 { 1050 1051 assert(msix_tab_size % 4096 == 0); 1052 1053 bzero(msixcap, sizeof(struct msixcap)); 1054 msixcap->capid = PCIY_MSIX; 1055 1056 /* 1057 * Message Control Register, all fields set to 1058 * zero except for the Table Size. 1059 * Note: Table size N is encoded as N-1 1060 */ 1061 msixcap->msgctrl = msgnum - 1; 1062 1063 /* 1064 * MSI-X BAR setup: 1065 * - MSI-X table start at offset 0 1066 * - PBA table starts at a 4K aligned offset after the MSI-X table 1067 */ 1068 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 1069 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 1070 } 1071 1072 static void 1073 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 1074 { 1075 int i, table_size; 1076 1077 assert(table_entries > 0); 1078 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 1079 1080 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 1081 pi->pi_msix.table = calloc(1, table_size); 1082 1083 /* set mask bit of vector control register */ 1084 for (i = 0; i < table_entries; i++) 1085 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 1086 } 1087 1088 int 1089 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 1090 { 1091 uint32_t tab_size; 1092 struct msixcap msixcap; 1093 1094 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 1095 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 1096 1097 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 1098 1099 /* Align table size to nearest 4K */ 1100 tab_size = roundup2(tab_size, 4096); 1101 1102 pi->pi_msix.table_bar = barnum; 1103 pi->pi_msix.pba_bar = barnum; 1104 pi->pi_msix.table_offset = 0; 1105 pi->pi_msix.table_count = msgnum; 1106 pi->pi_msix.pba_offset = tab_size; 1107 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 1108 1109 pci_msix_table_init(pi, msgnum); 1110 1111 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 1112 1113 /* allocate memory for MSI-X Table and PBA */ 1114 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 1115 tab_size + pi->pi_msix.pba_size); 1116 1117 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 1118 sizeof(msixcap))); 1119 } 1120 1121 static void 1122 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1123 int bytes, uint32_t val) 1124 { 1125 uint16_t msgctrl, rwmask; 1126 int off; 1127 1128 off = offset - capoff; 1129 /* Message Control Register */ 1130 if (off == 2 && bytes == 2) { 1131 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 1132 msgctrl = pci_get_cfgdata16(pi, offset); 1133 msgctrl &= ~rwmask; 1134 msgctrl |= val & rwmask; 1135 val = msgctrl; 1136 1137 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 1138 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 1139 pci_lintr_update(pi); 1140 } 1141 1142 CFGWRITE(pi, offset, val, bytes); 1143 } 1144 1145 static void 1146 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1147 int bytes, uint32_t val) 1148 { 1149 uint16_t msgctrl, rwmask, msgdata, mme; 1150 uint32_t addrlo; 1151 1152 /* 1153 * If guest is writing to the message control register make sure 1154 * we do not overwrite read-only fields. 1155 */ 1156 if ((offset - capoff) == 2 && bytes == 2) { 1157 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 1158 msgctrl = pci_get_cfgdata16(pi, offset); 1159 msgctrl &= ~rwmask; 1160 msgctrl |= val & rwmask; 1161 val = msgctrl; 1162 } 1163 CFGWRITE(pi, offset, val, bytes); 1164 1165 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 1166 addrlo = pci_get_cfgdata32(pi, capoff + 4); 1167 if (msgctrl & PCIM_MSICTRL_64BIT) 1168 msgdata = pci_get_cfgdata16(pi, capoff + 12); 1169 else 1170 msgdata = pci_get_cfgdata16(pi, capoff + 8); 1171 1172 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 1173 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 1174 if (pi->pi_msi.enabled) { 1175 pi->pi_msi.addr = addrlo; 1176 pi->pi_msi.msg_data = msgdata; 1177 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 1178 } else { 1179 pi->pi_msi.maxmsgnum = 0; 1180 } 1181 pci_lintr_update(pi); 1182 } 1183 1184 static void 1185 pciecap_cfgwrite(struct pci_devinst *pi, int capoff __unused, int offset, 1186 int bytes, uint32_t val) 1187 { 1188 1189 /* XXX don't write to the readonly parts */ 1190 CFGWRITE(pi, offset, val, bytes); 1191 } 1192 1193 #define PCIECAP_VERSION 0x2 1194 int 1195 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 1196 { 1197 int err; 1198 struct pciecap pciecap; 1199 1200 bzero(&pciecap, sizeof(pciecap)); 1201 1202 /* 1203 * Use the integrated endpoint type for endpoints on a root complex bus. 1204 * 1205 * NB: bhyve currently only supports a single PCI bus that is the root 1206 * complex bus, so all endpoints are integrated. 1207 */ 1208 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 1209 type = PCIEM_TYPE_ROOT_INT_EP; 1210 1211 pciecap.capid = PCIY_EXPRESS; 1212 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 1213 if (type != PCIEM_TYPE_ROOT_INT_EP) { 1214 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 1215 pciecap.link_status = 0x11; /* gen1, x1 */ 1216 } 1217 1218 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 1219 return (err); 1220 } 1221 1222 /* 1223 * This function assumes that 'coff' is in the capabilities region of the 1224 * config space. A capoff parameter of zero will force a search for the 1225 * offset and type. 1226 */ 1227 void 1228 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 1229 uint8_t capoff, int capid) 1230 { 1231 uint8_t nextoff; 1232 1233 /* Do not allow un-aligned writes */ 1234 if ((offset & (bytes - 1)) != 0) 1235 return; 1236 1237 if (capoff == 0) { 1238 /* Find the capability that we want to update */ 1239 capoff = CAP_START_OFFSET; 1240 while (1) { 1241 nextoff = pci_get_cfgdata8(pi, capoff + 1); 1242 if (nextoff == 0) 1243 break; 1244 if (offset >= capoff && offset < nextoff) 1245 break; 1246 1247 capoff = nextoff; 1248 } 1249 assert(offset >= capoff); 1250 capid = pci_get_cfgdata8(pi, capoff); 1251 } 1252 1253 /* 1254 * Capability ID and Next Capability Pointer are readonly. 1255 * However, some o/s's do 4-byte writes that include these. 1256 * For this case, trim the write back to 2 bytes and adjust 1257 * the data. 1258 */ 1259 if (offset == capoff || offset == capoff + 1) { 1260 if (offset == capoff && bytes == 4) { 1261 bytes = 2; 1262 offset += 2; 1263 val >>= 16; 1264 } else 1265 return; 1266 } 1267 1268 switch (capid) { 1269 case PCIY_MSI: 1270 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1271 break; 1272 case PCIY_MSIX: 1273 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1274 break; 1275 case PCIY_EXPRESS: 1276 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1277 break; 1278 default: 1279 break; 1280 } 1281 } 1282 1283 static int 1284 pci_emul_iscap(struct pci_devinst *pi, int offset) 1285 { 1286 uint16_t sts; 1287 1288 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1289 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1290 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1291 return (1); 1292 } 1293 return (0); 1294 } 1295 1296 static int 1297 pci_emul_fallback_handler(struct vmctx *ctx __unused, int vcpu __unused, 1298 int dir, uint64_t addr __unused, int size __unused, uint64_t *val, 1299 void *arg1 __unused, long arg2 __unused) 1300 { 1301 /* 1302 * Ignore writes; return 0xff's for reads. The mem read code 1303 * will take care of truncating to the correct size. 1304 */ 1305 if (dir == MEM_F_READ) { 1306 *val = 0xffffffffffffffff; 1307 } 1308 1309 return (0); 1310 } 1311 1312 static int 1313 pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu __unused, int dir, 1314 uint64_t addr, int bytes, uint64_t *val, void *arg1 __unused, 1315 long arg2 __unused) 1316 { 1317 int bus, slot, func, coff, in; 1318 1319 coff = addr & 0xfff; 1320 func = (addr >> 12) & 0x7; 1321 slot = (addr >> 15) & 0x1f; 1322 bus = (addr >> 20) & 0xff; 1323 in = (dir == MEM_F_READ); 1324 if (in) 1325 *val = ~0UL; 1326 pci_cfgrw(ctx, in, bus, slot, func, coff, bytes, (uint32_t *)val); 1327 return (0); 1328 } 1329 1330 uint64_t 1331 pci_ecfg_base(void) 1332 { 1333 1334 return (PCI_EMUL_ECFG_BASE); 1335 } 1336 1337 #define BUSIO_ROUNDUP 32 1338 #define BUSMEM32_ROUNDUP (1024 * 1024) 1339 #define BUSMEM64_ROUNDUP (512 * 1024 * 1024) 1340 1341 int 1342 init_pci(struct vmctx *ctx) 1343 { 1344 char node_name[sizeof("pci.XXX.XX.X")]; 1345 struct mem_range mr; 1346 struct pci_devemu *pde; 1347 struct businfo *bi; 1348 struct slotinfo *si; 1349 struct funcinfo *fi; 1350 nvlist_t *nvl; 1351 const char *emul; 1352 size_t lowmem; 1353 int bus, slot, func; 1354 int error; 1355 1356 if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32) 1357 errx(EX_OSERR, "Invalid lowmem limit"); 1358 1359 pci_emul_iobase = PCI_EMUL_IOBASE; 1360 pci_emul_membase32 = PCI_EMUL_MEMBASE32; 1361 1362 pci_emul_membase64 = 4*GB + vm_get_highmem_size(ctx); 1363 pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64); 1364 pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64; 1365 1366 for (bus = 0; bus < MAXBUSES; bus++) { 1367 snprintf(node_name, sizeof(node_name), "pci.%d", bus); 1368 nvl = find_config_node(node_name); 1369 if (nvl == NULL) 1370 continue; 1371 pci_businfo[bus] = calloc(1, sizeof(struct businfo)); 1372 bi = pci_businfo[bus]; 1373 1374 /* 1375 * Keep track of the i/o and memory resources allocated to 1376 * this bus. 1377 */ 1378 bi->iobase = pci_emul_iobase; 1379 bi->membase32 = pci_emul_membase32; 1380 bi->membase64 = pci_emul_membase64; 1381 1382 /* first run: init devices */ 1383 for (slot = 0; slot < MAXSLOTS; slot++) { 1384 si = &bi->slotinfo[slot]; 1385 for (func = 0; func < MAXFUNCS; func++) { 1386 fi = &si->si_funcs[func]; 1387 snprintf(node_name, sizeof(node_name), 1388 "pci.%d.%d.%d", bus, slot, func); 1389 nvl = find_config_node(node_name); 1390 if (nvl == NULL) 1391 continue; 1392 1393 fi->fi_config = nvl; 1394 emul = get_config_value_node(nvl, "device"); 1395 if (emul == NULL) { 1396 EPRINTLN("pci slot %d:%d:%d: missing " 1397 "\"device\" value", bus, slot, func); 1398 return (EINVAL); 1399 } 1400 pde = pci_emul_finddev(emul); 1401 if (pde == NULL) { 1402 EPRINTLN("pci slot %d:%d:%d: unknown " 1403 "device \"%s\"", bus, slot, func, 1404 emul); 1405 return (EINVAL); 1406 } 1407 if (pde->pe_alias != NULL) { 1408 EPRINTLN("pci slot %d:%d:%d: legacy " 1409 "device \"%s\", use \"%s\" instead", 1410 bus, slot, func, emul, 1411 pde->pe_alias); 1412 return (EINVAL); 1413 } 1414 fi->fi_pde = pde; 1415 error = pci_emul_init(ctx, pde, bus, slot, 1416 func, fi); 1417 if (error) 1418 return (error); 1419 } 1420 } 1421 1422 /* second run: assign BARs and free list */ 1423 struct pci_bar_allocation *bar; 1424 struct pci_bar_allocation *bar_tmp; 1425 TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) { 1426 pci_emul_assign_bar(bar->pdi, bar->idx, bar->type, 1427 bar->size); 1428 free(bar); 1429 } 1430 TAILQ_INIT(&pci_bars); 1431 1432 /* 1433 * Add some slop to the I/O and memory resources decoded by 1434 * this bus to give a guest some flexibility if it wants to 1435 * reprogram the BARs. 1436 */ 1437 pci_emul_iobase += BUSIO_ROUNDUP; 1438 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1439 bi->iolimit = pci_emul_iobase; 1440 1441 pci_emul_membase32 += BUSMEM32_ROUNDUP; 1442 pci_emul_membase32 = roundup2(pci_emul_membase32, 1443 BUSMEM32_ROUNDUP); 1444 bi->memlimit32 = pci_emul_membase32; 1445 1446 pci_emul_membase64 += BUSMEM64_ROUNDUP; 1447 pci_emul_membase64 = roundup2(pci_emul_membase64, 1448 BUSMEM64_ROUNDUP); 1449 bi->memlimit64 = pci_emul_membase64; 1450 } 1451 1452 /* 1453 * PCI backends are initialized before routing INTx interrupts 1454 * so that LPC devices are able to reserve ISA IRQs before 1455 * routing PIRQ pins. 1456 */ 1457 for (bus = 0; bus < MAXBUSES; bus++) { 1458 if ((bi = pci_businfo[bus]) == NULL) 1459 continue; 1460 1461 for (slot = 0; slot < MAXSLOTS; slot++) { 1462 si = &bi->slotinfo[slot]; 1463 for (func = 0; func < MAXFUNCS; func++) { 1464 fi = &si->si_funcs[func]; 1465 if (fi->fi_devi == NULL) 1466 continue; 1467 pci_lintr_route(fi->fi_devi); 1468 } 1469 } 1470 } 1471 lpc_pirq_routed(); 1472 1473 /* 1474 * The guest physical memory map looks like the following: 1475 * [0, lowmem) guest system memory 1476 * [lowmem, 0xC0000000) memory hole (may be absent) 1477 * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation) 1478 * [0xE0000000, 0xF0000000) PCI extended config window 1479 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1480 * [4GB, 4GB + highmem) 1481 */ 1482 1483 /* 1484 * Accesses to memory addresses that are not allocated to system 1485 * memory or PCI devices return 0xff's. 1486 */ 1487 lowmem = vm_get_lowmem_size(ctx); 1488 bzero(&mr, sizeof(struct mem_range)); 1489 mr.name = "PCI hole"; 1490 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1491 mr.base = lowmem; 1492 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1493 mr.handler = pci_emul_fallback_handler; 1494 error = register_mem_fallback(&mr); 1495 assert(error == 0); 1496 1497 /* PCI extended config space */ 1498 bzero(&mr, sizeof(struct mem_range)); 1499 mr.name = "PCI ECFG"; 1500 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1501 mr.base = PCI_EMUL_ECFG_BASE; 1502 mr.size = PCI_EMUL_ECFG_SIZE; 1503 mr.handler = pci_emul_ecfg_handler; 1504 error = register_mem(&mr); 1505 assert(error == 0); 1506 1507 return (0); 1508 } 1509 1510 static void 1511 pci_apic_prt_entry(int bus __unused, int slot, int pin, int pirq_pin __unused, 1512 int ioapic_irq, void *arg __unused) 1513 { 1514 1515 dsdt_line(" Package ()"); 1516 dsdt_line(" {"); 1517 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1518 dsdt_line(" 0x%02X,", pin - 1); 1519 dsdt_line(" Zero,"); 1520 dsdt_line(" 0x%X", ioapic_irq); 1521 dsdt_line(" },"); 1522 } 1523 1524 static void 1525 pci_pirq_prt_entry(int bus __unused, int slot, int pin, int pirq_pin, 1526 int ioapic_irq __unused, void *arg __unused) 1527 { 1528 char *name; 1529 1530 name = lpc_pirq_name(pirq_pin); 1531 if (name == NULL) 1532 return; 1533 dsdt_line(" Package ()"); 1534 dsdt_line(" {"); 1535 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1536 dsdt_line(" 0x%02X,", pin - 1); 1537 dsdt_line(" %s,", name); 1538 dsdt_line(" 0x00"); 1539 dsdt_line(" },"); 1540 free(name); 1541 } 1542 1543 /* 1544 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1545 * corresponding to each PCI bus. 1546 */ 1547 static void 1548 pci_bus_write_dsdt(int bus) 1549 { 1550 struct businfo *bi; 1551 struct slotinfo *si; 1552 struct pci_devinst *pi; 1553 int count, func, slot; 1554 1555 /* 1556 * If there are no devices on this 'bus' then just return. 1557 */ 1558 if ((bi = pci_businfo[bus]) == NULL) { 1559 /* 1560 * Bus 0 is special because it decodes the I/O ports used 1561 * for PCI config space access even if there are no devices 1562 * on it. 1563 */ 1564 if (bus != 0) 1565 return; 1566 } 1567 1568 dsdt_line(" Device (PC%02X)", bus); 1569 dsdt_line(" {"); 1570 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1571 1572 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1573 dsdt_line(" {"); 1574 dsdt_line(" Return (0x%08X)", bus); 1575 dsdt_line(" }"); 1576 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1577 dsdt_line(" {"); 1578 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1579 "MaxFixed, PosDecode,"); 1580 dsdt_line(" 0x0000, // Granularity"); 1581 dsdt_line(" 0x%04X, // Range Minimum", bus); 1582 dsdt_line(" 0x%04X, // Range Maximum", bus); 1583 dsdt_line(" 0x0000, // Translation Offset"); 1584 dsdt_line(" 0x0001, // Length"); 1585 dsdt_line(" ,, )"); 1586 1587 if (bus == 0) { 1588 dsdt_indent(3); 1589 dsdt_fixed_ioport(0xCF8, 8); 1590 dsdt_unindent(3); 1591 1592 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1593 "PosDecode, EntireRange,"); 1594 dsdt_line(" 0x0000, // Granularity"); 1595 dsdt_line(" 0x0000, // Range Minimum"); 1596 dsdt_line(" 0x0CF7, // Range Maximum"); 1597 dsdt_line(" 0x0000, // Translation Offset"); 1598 dsdt_line(" 0x0CF8, // Length"); 1599 dsdt_line(" ,, , TypeStatic)"); 1600 1601 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1602 "PosDecode, EntireRange,"); 1603 dsdt_line(" 0x0000, // Granularity"); 1604 dsdt_line(" 0x0D00, // Range Minimum"); 1605 dsdt_line(" 0x%04X, // Range Maximum", 1606 PCI_EMUL_IOBASE - 1); 1607 dsdt_line(" 0x0000, // Translation Offset"); 1608 dsdt_line(" 0x%04X, // Length", 1609 PCI_EMUL_IOBASE - 0x0D00); 1610 dsdt_line(" ,, , TypeStatic)"); 1611 1612 if (bi == NULL) { 1613 dsdt_line(" })"); 1614 goto done; 1615 } 1616 } 1617 assert(bi != NULL); 1618 1619 /* i/o window */ 1620 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1621 "PosDecode, EntireRange,"); 1622 dsdt_line(" 0x0000, // Granularity"); 1623 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1624 dsdt_line(" 0x%04X, // Range Maximum", 1625 bi->iolimit - 1); 1626 dsdt_line(" 0x0000, // Translation Offset"); 1627 dsdt_line(" 0x%04X, // Length", 1628 bi->iolimit - bi->iobase); 1629 dsdt_line(" ,, , TypeStatic)"); 1630 1631 /* mmio window (32-bit) */ 1632 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1633 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1634 dsdt_line(" 0x00000000, // Granularity"); 1635 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1636 dsdt_line(" 0x%08X, // Range Maximum\n", 1637 bi->memlimit32 - 1); 1638 dsdt_line(" 0x00000000, // Translation Offset"); 1639 dsdt_line(" 0x%08X, // Length\n", 1640 bi->memlimit32 - bi->membase32); 1641 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1642 1643 /* mmio window (64-bit) */ 1644 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1645 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1646 dsdt_line(" 0x0000000000000000, // Granularity"); 1647 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1648 dsdt_line(" 0x%016lX, // Range Maximum\n", 1649 bi->memlimit64 - 1); 1650 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1651 dsdt_line(" 0x%016lX, // Length\n", 1652 bi->memlimit64 - bi->membase64); 1653 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1654 dsdt_line(" })"); 1655 1656 count = pci_count_lintr(bus); 1657 if (count != 0) { 1658 dsdt_indent(2); 1659 dsdt_line("Name (PPRT, Package ()"); 1660 dsdt_line("{"); 1661 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1662 dsdt_line("})"); 1663 dsdt_line("Name (APRT, Package ()"); 1664 dsdt_line("{"); 1665 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1666 dsdt_line("})"); 1667 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1668 dsdt_line("{"); 1669 dsdt_line(" If (PICM)"); 1670 dsdt_line(" {"); 1671 dsdt_line(" Return (APRT)"); 1672 dsdt_line(" }"); 1673 dsdt_line(" Else"); 1674 dsdt_line(" {"); 1675 dsdt_line(" Return (PPRT)"); 1676 dsdt_line(" }"); 1677 dsdt_line("}"); 1678 dsdt_unindent(2); 1679 } 1680 1681 dsdt_indent(2); 1682 for (slot = 0; slot < MAXSLOTS; slot++) { 1683 si = &bi->slotinfo[slot]; 1684 for (func = 0; func < MAXFUNCS; func++) { 1685 pi = si->si_funcs[func].fi_devi; 1686 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1687 pi->pi_d->pe_write_dsdt(pi); 1688 } 1689 } 1690 dsdt_unindent(2); 1691 done: 1692 dsdt_line(" }"); 1693 } 1694 1695 void 1696 pci_write_dsdt(void) 1697 { 1698 int bus; 1699 1700 dsdt_indent(1); 1701 dsdt_line("Name (PICM, 0x00)"); 1702 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1703 dsdt_line("{"); 1704 dsdt_line(" Store (Arg0, PICM)"); 1705 dsdt_line("}"); 1706 dsdt_line(""); 1707 dsdt_line("Scope (_SB)"); 1708 dsdt_line("{"); 1709 for (bus = 0; bus < MAXBUSES; bus++) 1710 pci_bus_write_dsdt(bus); 1711 dsdt_line("}"); 1712 dsdt_unindent(1); 1713 } 1714 1715 int 1716 pci_bus_configured(int bus) 1717 { 1718 assert(bus >= 0 && bus < MAXBUSES); 1719 return (pci_businfo[bus] != NULL); 1720 } 1721 1722 int 1723 pci_msi_enabled(struct pci_devinst *pi) 1724 { 1725 return (pi->pi_msi.enabled); 1726 } 1727 1728 int 1729 pci_msi_maxmsgnum(struct pci_devinst *pi) 1730 { 1731 if (pi->pi_msi.enabled) 1732 return (pi->pi_msi.maxmsgnum); 1733 else 1734 return (0); 1735 } 1736 1737 int 1738 pci_msix_enabled(struct pci_devinst *pi) 1739 { 1740 1741 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1742 } 1743 1744 void 1745 pci_generate_msix(struct pci_devinst *pi, int index) 1746 { 1747 struct msix_table_entry *mte; 1748 1749 if (!pci_msix_enabled(pi)) 1750 return; 1751 1752 if (pi->pi_msix.function_mask) 1753 return; 1754 1755 if (index >= pi->pi_msix.table_count) 1756 return; 1757 1758 mte = &pi->pi_msix.table[index]; 1759 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1760 /* XXX Set PBA bit if interrupt is disabled */ 1761 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1762 } 1763 } 1764 1765 void 1766 pci_generate_msi(struct pci_devinst *pi, int index) 1767 { 1768 1769 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1770 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1771 pi->pi_msi.msg_data + index); 1772 } 1773 } 1774 1775 static bool 1776 pci_lintr_permitted(struct pci_devinst *pi) 1777 { 1778 uint16_t cmd; 1779 1780 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1781 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1782 (cmd & PCIM_CMD_INTxDIS))); 1783 } 1784 1785 void 1786 pci_lintr_request(struct pci_devinst *pi) 1787 { 1788 struct businfo *bi; 1789 struct slotinfo *si; 1790 int bestpin, bestcount, pin; 1791 1792 bi = pci_businfo[pi->pi_bus]; 1793 assert(bi != NULL); 1794 1795 /* 1796 * Just allocate a pin from our slot. The pin will be 1797 * assigned IRQs later when interrupts are routed. 1798 */ 1799 si = &bi->slotinfo[pi->pi_slot]; 1800 bestpin = 0; 1801 bestcount = si->si_intpins[0].ii_count; 1802 for (pin = 1; pin < 4; pin++) { 1803 if (si->si_intpins[pin].ii_count < bestcount) { 1804 bestpin = pin; 1805 bestcount = si->si_intpins[pin].ii_count; 1806 } 1807 } 1808 1809 si->si_intpins[bestpin].ii_count++; 1810 pi->pi_lintr.pin = bestpin + 1; 1811 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1812 } 1813 1814 static void 1815 pci_lintr_route(struct pci_devinst *pi) 1816 { 1817 struct businfo *bi; 1818 struct intxinfo *ii; 1819 1820 if (pi->pi_lintr.pin == 0) 1821 return; 1822 1823 bi = pci_businfo[pi->pi_bus]; 1824 assert(bi != NULL); 1825 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1826 1827 /* 1828 * Attempt to allocate an I/O APIC pin for this intpin if one 1829 * is not yet assigned. 1830 */ 1831 if (ii->ii_ioapic_irq == 0) 1832 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1833 assert(ii->ii_ioapic_irq > 0); 1834 1835 /* 1836 * Attempt to allocate a PIRQ pin for this intpin if one is 1837 * not yet assigned. 1838 */ 1839 if (ii->ii_pirq_pin == 0) 1840 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1841 assert(ii->ii_pirq_pin > 0); 1842 1843 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1844 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1845 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1846 } 1847 1848 void 1849 pci_lintr_assert(struct pci_devinst *pi) 1850 { 1851 1852 assert(pi->pi_lintr.pin > 0); 1853 1854 pthread_mutex_lock(&pi->pi_lintr.lock); 1855 if (pi->pi_lintr.state == IDLE) { 1856 if (pci_lintr_permitted(pi)) { 1857 pi->pi_lintr.state = ASSERTED; 1858 pci_irq_assert(pi); 1859 } else 1860 pi->pi_lintr.state = PENDING; 1861 } 1862 pthread_mutex_unlock(&pi->pi_lintr.lock); 1863 } 1864 1865 void 1866 pci_lintr_deassert(struct pci_devinst *pi) 1867 { 1868 1869 assert(pi->pi_lintr.pin > 0); 1870 1871 pthread_mutex_lock(&pi->pi_lintr.lock); 1872 if (pi->pi_lintr.state == ASSERTED) { 1873 pi->pi_lintr.state = IDLE; 1874 pci_irq_deassert(pi); 1875 } else if (pi->pi_lintr.state == PENDING) 1876 pi->pi_lintr.state = IDLE; 1877 pthread_mutex_unlock(&pi->pi_lintr.lock); 1878 } 1879 1880 static void 1881 pci_lintr_update(struct pci_devinst *pi) 1882 { 1883 1884 pthread_mutex_lock(&pi->pi_lintr.lock); 1885 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1886 pci_irq_deassert(pi); 1887 pi->pi_lintr.state = PENDING; 1888 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1889 pi->pi_lintr.state = ASSERTED; 1890 pci_irq_assert(pi); 1891 } 1892 pthread_mutex_unlock(&pi->pi_lintr.lock); 1893 #ifndef __FreeBSD__ 1894 if (pi->pi_d->pe_lintrupdate != NULL) { 1895 pi->pi_d->pe_lintrupdate(pi); 1896 } 1897 #endif /* __FreeBSD__ */ 1898 } 1899 1900 int 1901 pci_count_lintr(int bus) 1902 { 1903 int count, slot, pin; 1904 struct slotinfo *slotinfo; 1905 1906 count = 0; 1907 if (pci_businfo[bus] != NULL) { 1908 for (slot = 0; slot < MAXSLOTS; slot++) { 1909 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1910 for (pin = 0; pin < 4; pin++) { 1911 if (slotinfo->si_intpins[pin].ii_count != 0) 1912 count++; 1913 } 1914 } 1915 } 1916 return (count); 1917 } 1918 1919 void 1920 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1921 { 1922 struct businfo *bi; 1923 struct slotinfo *si; 1924 struct intxinfo *ii; 1925 int slot, pin; 1926 1927 if ((bi = pci_businfo[bus]) == NULL) 1928 return; 1929 1930 for (slot = 0; slot < MAXSLOTS; slot++) { 1931 si = &bi->slotinfo[slot]; 1932 for (pin = 0; pin < 4; pin++) { 1933 ii = &si->si_intpins[pin]; 1934 if (ii->ii_count != 0) 1935 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1936 ii->ii_ioapic_irq, arg); 1937 } 1938 } 1939 } 1940 1941 /* 1942 * Return 1 if the emulated device in 'slot' is a multi-function device. 1943 * Return 0 otherwise. 1944 */ 1945 static int 1946 pci_emul_is_mfdev(int bus, int slot) 1947 { 1948 struct businfo *bi; 1949 struct slotinfo *si; 1950 int f, numfuncs; 1951 1952 numfuncs = 0; 1953 if ((bi = pci_businfo[bus]) != NULL) { 1954 si = &bi->slotinfo[slot]; 1955 for (f = 0; f < MAXFUNCS; f++) { 1956 if (si->si_funcs[f].fi_devi != NULL) { 1957 numfuncs++; 1958 } 1959 } 1960 } 1961 return (numfuncs > 1); 1962 } 1963 1964 /* 1965 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1966 * whether or not is a multi-function being emulated in the pci 'slot'. 1967 */ 1968 static void 1969 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1970 { 1971 int mfdev; 1972 1973 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1974 mfdev = pci_emul_is_mfdev(bus, slot); 1975 switch (bytes) { 1976 case 1: 1977 case 2: 1978 *rv &= ~PCIM_MFDEV; 1979 if (mfdev) { 1980 *rv |= PCIM_MFDEV; 1981 } 1982 break; 1983 case 4: 1984 *rv &= ~(PCIM_MFDEV << 16); 1985 if (mfdev) { 1986 *rv |= (PCIM_MFDEV << 16); 1987 } 1988 break; 1989 } 1990 } 1991 } 1992 1993 /* 1994 * Update device state in response to changes to the PCI command 1995 * register. 1996 */ 1997 void 1998 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 1999 { 2000 int i; 2001 uint16_t changed, new; 2002 2003 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 2004 changed = old ^ new; 2005 2006 /* 2007 * If the MMIO or I/O address space decoding has changed then 2008 * register/unregister all BARs that decode that address space. 2009 */ 2010 for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) { 2011 switch (pi->pi_bar[i].type) { 2012 case PCIBAR_NONE: 2013 case PCIBAR_MEMHI64: 2014 break; 2015 case PCIBAR_IO: 2016 /* I/O address space decoding changed? */ 2017 if (changed & PCIM_CMD_PORTEN) { 2018 if (new & PCIM_CMD_PORTEN) 2019 register_bar(pi, i); 2020 else 2021 unregister_bar(pi, i); 2022 } 2023 break; 2024 case PCIBAR_ROM: 2025 /* skip (un-)register of ROM if it disabled */ 2026 if (!romen(pi)) 2027 break; 2028 /* fallthrough */ 2029 case PCIBAR_MEM32: 2030 case PCIBAR_MEM64: 2031 /* MMIO address space decoding changed? */ 2032 if (changed & PCIM_CMD_MEMEN) { 2033 if (new & PCIM_CMD_MEMEN) 2034 register_bar(pi, i); 2035 else 2036 unregister_bar(pi, i); 2037 } 2038 break; 2039 default: 2040 assert(0); 2041 } 2042 } 2043 2044 /* 2045 * If INTx has been unmasked and is pending, assert the 2046 * interrupt. 2047 */ 2048 pci_lintr_update(pi); 2049 } 2050 2051 static void 2052 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 2053 { 2054 int rshift; 2055 uint32_t cmd, old, readonly; 2056 2057 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 2058 2059 /* 2060 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 2061 * 2062 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 2063 * 'write 1 to clear'. However these bits are not set to '1' by 2064 * any device emulation so it is simpler to treat them as readonly. 2065 */ 2066 rshift = (coff & 0x3) * 8; 2067 readonly = 0xFFFFF880 >> rshift; 2068 2069 old = CFGREAD(pi, coff, bytes); 2070 new &= ~readonly; 2071 new |= (old & readonly); 2072 CFGWRITE(pi, coff, new, bytes); /* update config */ 2073 2074 pci_emul_cmd_changed(pi, cmd); 2075 } 2076 2077 static void 2078 pci_cfgrw(struct vmctx *ctx, int in, int bus, int slot, int func, 2079 int coff, int bytes, uint32_t *eax) 2080 { 2081 struct businfo *bi; 2082 struct slotinfo *si; 2083 struct pci_devinst *pi; 2084 struct pci_devemu *pe; 2085 int idx, needcfg; 2086 uint64_t addr, mask; 2087 uint64_t bar = 0; 2088 2089 if ((bi = pci_businfo[bus]) != NULL) { 2090 si = &bi->slotinfo[slot]; 2091 pi = si->si_funcs[func].fi_devi; 2092 } else 2093 pi = NULL; 2094 2095 /* 2096 * Just return if there is no device at this slot:func or if the 2097 * the guest is doing an un-aligned access. 2098 */ 2099 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 2100 (coff & (bytes - 1)) != 0) { 2101 if (in) 2102 *eax = 0xffffffff; 2103 return; 2104 } 2105 2106 /* 2107 * Ignore all writes beyond the standard config space and return all 2108 * ones on reads. 2109 */ 2110 if (coff >= PCI_REGMAX + 1) { 2111 if (in) { 2112 *eax = 0xffffffff; 2113 /* 2114 * Extended capabilities begin at offset 256 in config 2115 * space. Absence of extended capabilities is signaled 2116 * with all 0s in the extended capability header at 2117 * offset 256. 2118 */ 2119 if (coff <= PCI_REGMAX + 4) 2120 *eax = 0x00000000; 2121 } 2122 return; 2123 } 2124 2125 pe = pi->pi_d; 2126 2127 /* 2128 * Config read 2129 */ 2130 if (in) { 2131 /* Let the device emulation override the default handler */ 2132 if (pe->pe_cfgread != NULL) { 2133 needcfg = pe->pe_cfgread(ctx, pi, coff, bytes, eax); 2134 } else { 2135 needcfg = 1; 2136 } 2137 2138 if (needcfg) 2139 *eax = CFGREAD(pi, coff, bytes); 2140 2141 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); 2142 } else { 2143 /* Let the device emulation override the default handler */ 2144 if (pe->pe_cfgwrite != NULL && 2145 (*pe->pe_cfgwrite)(ctx, pi, coff, bytes, *eax) == 0) 2146 return; 2147 2148 /* 2149 * Special handling for write to BAR and ROM registers 2150 */ 2151 if (is_pcir_bar(coff) || is_pcir_bios(coff)) { 2152 /* 2153 * Ignore writes to BAR registers that are not 2154 * 4-byte aligned. 2155 */ 2156 if (bytes != 4 || (coff & 0x3) != 0) 2157 return; 2158 2159 if (is_pcir_bar(coff)) { 2160 idx = (coff - PCIR_BAR(0)) / 4; 2161 } else if (is_pcir_bios(coff)) { 2162 idx = PCI_ROM_IDX; 2163 } else { 2164 errx(4, "%s: invalid BAR offset %d", __func__, 2165 coff); 2166 } 2167 2168 mask = ~(pi->pi_bar[idx].size - 1); 2169 switch (pi->pi_bar[idx].type) { 2170 case PCIBAR_NONE: 2171 pi->pi_bar[idx].addr = bar = 0; 2172 break; 2173 case PCIBAR_IO: 2174 addr = *eax & mask; 2175 addr &= 0xffff; 2176 bar = addr | pi->pi_bar[idx].lobits; 2177 /* 2178 * Register the new BAR value for interception 2179 */ 2180 if (addr != pi->pi_bar[idx].addr) { 2181 update_bar_address(pi, addr, idx, 2182 PCIBAR_IO); 2183 } 2184 break; 2185 case PCIBAR_MEM32: 2186 addr = bar = *eax & mask; 2187 bar |= pi->pi_bar[idx].lobits; 2188 if (addr != pi->pi_bar[idx].addr) { 2189 update_bar_address(pi, addr, idx, 2190 PCIBAR_MEM32); 2191 } 2192 break; 2193 case PCIBAR_MEM64: 2194 addr = bar = *eax & mask; 2195 bar |= pi->pi_bar[idx].lobits; 2196 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 2197 update_bar_address(pi, addr, idx, 2198 PCIBAR_MEM64); 2199 } 2200 break; 2201 case PCIBAR_MEMHI64: 2202 mask = ~(pi->pi_bar[idx - 1].size - 1); 2203 addr = ((uint64_t)*eax << 32) & mask; 2204 bar = addr >> 32; 2205 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 2206 update_bar_address(pi, addr, idx - 1, 2207 PCIBAR_MEMHI64); 2208 } 2209 break; 2210 case PCIBAR_ROM: 2211 addr = bar = *eax & mask; 2212 if (memen(pi) && romen(pi)) { 2213 unregister_bar(pi, idx); 2214 } 2215 pi->pi_bar[idx].addr = addr; 2216 pi->pi_bar[idx].lobits = *eax & 2217 PCIM_BIOS_ENABLE; 2218 /* romen could have changed it value */ 2219 if (memen(pi) && romen(pi)) { 2220 register_bar(pi, idx); 2221 } 2222 bar |= pi->pi_bar[idx].lobits; 2223 break; 2224 default: 2225 assert(0); 2226 } 2227 pci_set_cfgdata32(pi, coff, bar); 2228 2229 } else if (pci_emul_iscap(pi, coff)) { 2230 pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); 2231 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 2232 pci_emul_cmdsts_write(pi, coff, *eax, bytes); 2233 } else { 2234 CFGWRITE(pi, coff, *eax, bytes); 2235 } 2236 } 2237 } 2238 2239 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 2240 2241 static int 2242 pci_emul_cfgaddr(struct vmctx *ctx __unused, int in, 2243 int port __unused, int bytes, uint32_t *eax, void *arg __unused) 2244 { 2245 uint32_t x; 2246 2247 if (bytes != 4) { 2248 if (in) 2249 *eax = (bytes == 2) ? 0xffff : 0xff; 2250 return (0); 2251 } 2252 2253 if (in) { 2254 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 2255 if (cfgenable) 2256 x |= CONF1_ENABLE; 2257 *eax = x; 2258 } else { 2259 x = *eax; 2260 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 2261 cfgoff = (x & PCI_REGMAX) & ~0x03; 2262 cfgfunc = (x >> 8) & PCI_FUNCMAX; 2263 cfgslot = (x >> 11) & PCI_SLOTMAX; 2264 cfgbus = (x >> 16) & PCI_BUSMAX; 2265 } 2266 2267 return (0); 2268 } 2269 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 2270 2271 static int 2272 pci_emul_cfgdata(struct vmctx *ctx, int in, int port, 2273 int bytes, uint32_t *eax, void *arg __unused) 2274 { 2275 int coff; 2276 2277 assert(bytes == 1 || bytes == 2 || bytes == 4); 2278 2279 coff = cfgoff + (port - CONF1_DATA_PORT); 2280 if (cfgenable) { 2281 pci_cfgrw(ctx, in, cfgbus, cfgslot, cfgfunc, coff, bytes, 2282 eax); 2283 } else { 2284 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 2285 if (in) 2286 *eax = 0xffffffff; 2287 } 2288 return (0); 2289 } 2290 2291 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 2292 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 2293 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 2294 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 2295 2296 #define PCI_EMUL_TEST 2297 #ifdef PCI_EMUL_TEST 2298 /* 2299 * Define a dummy test device 2300 */ 2301 #define DIOSZ 8 2302 #define DMEMSZ 4096 2303 struct pci_emul_dsoftc { 2304 uint8_t ioregs[DIOSZ]; 2305 uint8_t memregs[2][DMEMSZ]; 2306 }; 2307 2308 #define PCI_EMUL_MSI_MSGS 4 2309 #define PCI_EMUL_MSIX_MSGS 16 2310 2311 static int 2312 pci_emul_dinit(struct vmctx *ctx __unused, struct pci_devinst *pi, 2313 nvlist_t *nvl __unused) 2314 { 2315 int error; 2316 struct pci_emul_dsoftc *sc; 2317 2318 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2319 2320 pi->pi_arg = sc; 2321 2322 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2323 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2324 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2325 2326 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2327 assert(error == 0); 2328 2329 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2330 assert(error == 0); 2331 2332 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2333 assert(error == 0); 2334 2335 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2336 assert(error == 0); 2337 2338 return (0); 2339 } 2340 2341 static void 2342 pci_emul_diow(struct vmctx *ctx __unused, 2343 struct pci_devinst *pi, int baridx, uint64_t offset, int size, 2344 uint64_t value) 2345 { 2346 int i; 2347 struct pci_emul_dsoftc *sc = pi->pi_arg; 2348 2349 if (baridx == 0) { 2350 if (offset + size > DIOSZ) { 2351 printf("diow: iow too large, offset %ld size %d\n", 2352 offset, size); 2353 return; 2354 } 2355 2356 if (size == 1) { 2357 sc->ioregs[offset] = value & 0xff; 2358 } else if (size == 2) { 2359 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2360 } else if (size == 4) { 2361 *(uint32_t *)&sc->ioregs[offset] = value; 2362 } else { 2363 printf("diow: iow unknown size %d\n", size); 2364 } 2365 2366 /* 2367 * Special magic value to generate an interrupt 2368 */ 2369 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2370 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2371 2372 if (value == 0xabcdef) { 2373 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2374 pci_generate_msi(pi, i); 2375 } 2376 } 2377 2378 if (baridx == 1 || baridx == 2) { 2379 if (offset + size > DMEMSZ) { 2380 printf("diow: memw too large, offset %ld size %d\n", 2381 offset, size); 2382 return; 2383 } 2384 2385 i = baridx - 1; /* 'memregs' index */ 2386 2387 if (size == 1) { 2388 sc->memregs[i][offset] = value; 2389 } else if (size == 2) { 2390 *(uint16_t *)&sc->memregs[i][offset] = value; 2391 } else if (size == 4) { 2392 *(uint32_t *)&sc->memregs[i][offset] = value; 2393 } else if (size == 8) { 2394 *(uint64_t *)&sc->memregs[i][offset] = value; 2395 } else { 2396 printf("diow: memw unknown size %d\n", size); 2397 } 2398 2399 /* 2400 * magic interrupt ?? 2401 */ 2402 } 2403 2404 if (baridx > 2 || baridx < 0) { 2405 printf("diow: unknown bar idx %d\n", baridx); 2406 } 2407 } 2408 2409 static uint64_t 2410 pci_emul_dior(struct vmctx *ctx __unused, 2411 struct pci_devinst *pi, int baridx, uint64_t offset, int size) 2412 { 2413 struct pci_emul_dsoftc *sc = pi->pi_arg; 2414 uint32_t value; 2415 int i; 2416 2417 value = 0; 2418 if (baridx == 0) { 2419 if (offset + size > DIOSZ) { 2420 printf("dior: ior too large, offset %ld size %d\n", 2421 offset, size); 2422 return (0); 2423 } 2424 2425 value = 0; 2426 if (size == 1) { 2427 value = sc->ioregs[offset]; 2428 } else if (size == 2) { 2429 value = *(uint16_t *) &sc->ioregs[offset]; 2430 } else if (size == 4) { 2431 value = *(uint32_t *) &sc->ioregs[offset]; 2432 } else { 2433 printf("dior: ior unknown size %d\n", size); 2434 } 2435 } 2436 2437 if (baridx == 1 || baridx == 2) { 2438 if (offset + size > DMEMSZ) { 2439 printf("dior: memr too large, offset %ld size %d\n", 2440 offset, size); 2441 return (0); 2442 } 2443 2444 i = baridx - 1; /* 'memregs' index */ 2445 2446 if (size == 1) { 2447 value = sc->memregs[i][offset]; 2448 } else if (size == 2) { 2449 value = *(uint16_t *) &sc->memregs[i][offset]; 2450 } else if (size == 4) { 2451 value = *(uint32_t *) &sc->memregs[i][offset]; 2452 } else if (size == 8) { 2453 value = *(uint64_t *) &sc->memregs[i][offset]; 2454 } else { 2455 printf("dior: ior unknown size %d\n", size); 2456 } 2457 } 2458 2459 2460 if (baridx > 2 || baridx < 0) { 2461 printf("dior: unknown bar idx %d\n", baridx); 2462 return (0); 2463 } 2464 2465 return (value); 2466 } 2467 2468 static const struct pci_devemu pci_dummy = { 2469 .pe_emu = "dummy", 2470 .pe_init = pci_emul_dinit, 2471 .pe_barwrite = pci_emul_diow, 2472 .pe_barread = pci_emul_dior, 2473 }; 2474 PCI_EMUL_SET(pci_dummy); 2475 2476 #endif /* PCI_EMUL_TEST */ 2477