1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker_set.h> 36 #include <vm/vm.h> 37 #include <vm/vm_param.h> 38 #include <vm/pmap.h> 39 40 #include <ctype.h> 41 #include <errno.h> 42 #include <pthread.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <strings.h> 47 #include <assert.h> 48 #include <stdbool.h> 49 50 #include <machine/vmm.h> 51 #include <machine/vmm_snapshot.h> 52 #include <machine/cpufunc.h> 53 #include <machine/specialreg.h> 54 #include <vmmapi.h> 55 56 #include "acpi.h" 57 #include "bhyverun.h" 58 #include "config.h" 59 #include "debug.h" 60 #include "inout.h" 61 #include "ioapic.h" 62 #include "mem.h" 63 #include "pci_emul.h" 64 #include "pci_irq.h" 65 #include "pci_lpc.h" 66 67 #define CONF1_ADDR_PORT 0x0cf8 68 #define CONF1_DATA_PORT 0x0cfc 69 70 #define CONF1_ENABLE 0x80000000ul 71 72 #define MAXBUSES (PCI_BUSMAX + 1) 73 #define MAXSLOTS (PCI_SLOTMAX + 1) 74 #define MAXFUNCS (PCI_FUNCMAX + 1) 75 76 struct funcinfo { 77 nvlist_t *fi_config; 78 struct pci_devemu *fi_pde; 79 struct pci_devinst *fi_devi; 80 }; 81 82 struct intxinfo { 83 int ii_count; 84 int ii_pirq_pin; 85 int ii_ioapic_irq; 86 }; 87 88 struct slotinfo { 89 struct intxinfo si_intpins[4]; 90 struct funcinfo si_funcs[MAXFUNCS]; 91 }; 92 93 struct businfo { 94 uint16_t iobase, iolimit; /* I/O window */ 95 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 96 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 97 struct slotinfo slotinfo[MAXSLOTS]; 98 }; 99 100 static struct businfo *pci_businfo[MAXBUSES]; 101 102 SET_DECLARE(pci_devemu_set, struct pci_devemu); 103 104 static uint64_t pci_emul_iobase; 105 static uint64_t pci_emul_membase32; 106 static uint64_t pci_emul_membase64; 107 static uint64_t pci_emul_memlim64; 108 109 #define PCI_EMUL_IOBASE 0x2000 110 #define PCI_EMUL_IOLIMIT 0x10000 111 112 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 113 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 114 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 115 116 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 117 118 static struct pci_devemu *pci_emul_finddev(const char *name); 119 static void pci_lintr_route(struct pci_devinst *pi); 120 static void pci_lintr_update(struct pci_devinst *pi); 121 static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, 122 int func, int coff, int bytes, uint32_t *val); 123 124 static __inline void 125 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 126 { 127 128 if (bytes == 1) 129 pci_set_cfgdata8(pi, coff, val); 130 else if (bytes == 2) 131 pci_set_cfgdata16(pi, coff, val); 132 else 133 pci_set_cfgdata32(pi, coff, val); 134 } 135 136 static __inline uint32_t 137 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 138 { 139 140 if (bytes == 1) 141 return (pci_get_cfgdata8(pi, coff)); 142 else if (bytes == 2) 143 return (pci_get_cfgdata16(pi, coff)); 144 else 145 return (pci_get_cfgdata32(pi, coff)); 146 } 147 148 /* 149 * I/O access 150 */ 151 152 /* 153 * Slot options are in the form: 154 * 155 * <bus>:<slot>:<func>,<emul>[,<config>] 156 * <slot>[:<func>],<emul>[,<config>] 157 * 158 * slot is 0..31 159 * func is 0..7 160 * emul is a string describing the type of PCI device e.g. virtio-net 161 * config is an optional string, depending on the device, that can be 162 * used for configuration. 163 * Examples are: 164 * 1,virtio-net,tap0 165 * 3:0,dummy 166 */ 167 static void 168 pci_parse_slot_usage(char *aopt) 169 { 170 171 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 172 } 173 174 /* 175 * Helper function to parse a list of comma-separated options where 176 * each option is formatted as "name[=value]". If no value is 177 * provided, the option is treated as a boolean and is given a value 178 * of true. 179 */ 180 int 181 pci_parse_legacy_config(nvlist_t *nvl, const char *opt) 182 { 183 char *config, *name, *tofree, *value; 184 185 if (opt == NULL) 186 return (0); 187 188 config = tofree = strdup(opt); 189 while ((name = strsep(&config, ",")) != NULL) { 190 value = strchr(name, '='); 191 if (value != NULL) { 192 *value = '\0'; 193 value++; 194 set_config_value_node(nvl, name, value); 195 } else 196 set_config_bool_node(nvl, name, true); 197 } 198 free(tofree); 199 return (0); 200 } 201 202 /* 203 * PCI device configuration is stored in MIBs that encode the device's 204 * location: 205 * 206 * pci.<bus>.<slot>.<func> 207 * 208 * Where "bus", "slot", and "func" are all decimal values without 209 * leading zeroes. Each valid device must have a "device" node which 210 * identifies the driver model of the device. 211 * 212 * Device backends can provide a parser for the "config" string. If 213 * a custom parser is not provided, pci_parse_legacy_config() is used 214 * to parse the string. 215 */ 216 int 217 pci_parse_slot(char *opt) 218 { 219 char node_name[sizeof("pci.XXX.XX.X")]; 220 struct pci_devemu *pde; 221 char *emul, *config, *str, *cp; 222 int error, bnum, snum, fnum; 223 nvlist_t *nvl; 224 225 error = -1; 226 str = strdup(opt); 227 228 emul = config = NULL; 229 if ((cp = strchr(str, ',')) != NULL) { 230 *cp = '\0'; 231 emul = cp + 1; 232 if ((cp = strchr(emul, ',')) != NULL) { 233 *cp = '\0'; 234 config = cp + 1; 235 } 236 } else { 237 pci_parse_slot_usage(opt); 238 goto done; 239 } 240 241 /* <bus>:<slot>:<func> */ 242 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 243 bnum = 0; 244 /* <slot>:<func> */ 245 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 246 fnum = 0; 247 /* <slot> */ 248 if (sscanf(str, "%d", &snum) != 1) { 249 snum = -1; 250 } 251 } 252 } 253 254 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 255 fnum < 0 || fnum >= MAXFUNCS) { 256 pci_parse_slot_usage(opt); 257 goto done; 258 } 259 260 pde = pci_emul_finddev(emul); 261 if (pde == NULL) { 262 EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum, 263 fnum, emul); 264 goto done; 265 } 266 267 snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum, 268 fnum); 269 nvl = find_config_node(node_name); 270 if (nvl != NULL) { 271 EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum, 272 fnum); 273 goto done; 274 } 275 nvl = create_config_node(node_name); 276 if (pde->pe_alias != NULL) 277 set_config_value_node(nvl, "device", pde->pe_alias); 278 else 279 set_config_value_node(nvl, "device", pde->pe_emu); 280 281 if (pde->pe_legacy_config != NULL) 282 error = pde->pe_legacy_config(nvl, config); 283 else 284 error = pci_parse_legacy_config(nvl, config); 285 done: 286 free(str); 287 return (error); 288 } 289 290 void 291 pci_print_supported_devices() 292 { 293 struct pci_devemu **pdpp, *pdp; 294 295 SET_FOREACH(pdpp, pci_devemu_set) { 296 pdp = *pdpp; 297 printf("%s\n", pdp->pe_emu); 298 } 299 } 300 301 static int 302 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 303 { 304 305 if (offset < pi->pi_msix.pba_offset) 306 return (0); 307 308 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 309 return (0); 310 } 311 312 return (1); 313 } 314 315 int 316 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 317 uint64_t value) 318 { 319 int msix_entry_offset; 320 int tab_index; 321 char *dest; 322 323 /* support only 4 or 8 byte writes */ 324 if (size != 4 && size != 8) 325 return (-1); 326 327 /* 328 * Return if table index is beyond what device supports 329 */ 330 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 331 if (tab_index >= pi->pi_msix.table_count) 332 return (-1); 333 334 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 335 336 /* support only aligned writes */ 337 if ((msix_entry_offset % size) != 0) 338 return (-1); 339 340 dest = (char *)(pi->pi_msix.table + tab_index); 341 dest += msix_entry_offset; 342 343 if (size == 4) 344 *((uint32_t *)dest) = value; 345 else 346 *((uint64_t *)dest) = value; 347 348 return (0); 349 } 350 351 uint64_t 352 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 353 { 354 char *dest; 355 int msix_entry_offset; 356 int tab_index; 357 uint64_t retval = ~0; 358 359 /* 360 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 361 * table but we also allow 1 byte access to accommodate reads from 362 * ddb. 363 */ 364 if (size != 1 && size != 4 && size != 8) 365 return (retval); 366 367 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 368 369 /* support only aligned reads */ 370 if ((msix_entry_offset % size) != 0) { 371 return (retval); 372 } 373 374 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 375 376 if (tab_index < pi->pi_msix.table_count) { 377 /* valid MSI-X Table access */ 378 dest = (char *)(pi->pi_msix.table + tab_index); 379 dest += msix_entry_offset; 380 381 if (size == 1) 382 retval = *((uint8_t *)dest); 383 else if (size == 4) 384 retval = *((uint32_t *)dest); 385 else 386 retval = *((uint64_t *)dest); 387 } else if (pci_valid_pba_offset(pi, offset)) { 388 /* return 0 for PBA access */ 389 retval = 0; 390 } 391 392 return (retval); 393 } 394 395 int 396 pci_msix_table_bar(struct pci_devinst *pi) 397 { 398 399 if (pi->pi_msix.table != NULL) 400 return (pi->pi_msix.table_bar); 401 else 402 return (-1); 403 } 404 405 int 406 pci_msix_pba_bar(struct pci_devinst *pi) 407 { 408 409 if (pi->pi_msix.table != NULL) 410 return (pi->pi_msix.pba_bar); 411 else 412 return (-1); 413 } 414 415 static int 416 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 417 uint32_t *eax, void *arg) 418 { 419 struct pci_devinst *pdi = arg; 420 struct pci_devemu *pe = pdi->pi_d; 421 uint64_t offset; 422 int i; 423 424 for (i = 0; i <= PCI_BARMAX; i++) { 425 if (pdi->pi_bar[i].type == PCIBAR_IO && 426 port >= pdi->pi_bar[i].addr && 427 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 428 offset = port - pdi->pi_bar[i].addr; 429 if (in) 430 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 431 offset, bytes); 432 else 433 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 434 bytes, *eax); 435 return (0); 436 } 437 } 438 return (-1); 439 } 440 441 static int 442 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 443 int size, uint64_t *val, void *arg1, long arg2) 444 { 445 struct pci_devinst *pdi = arg1; 446 struct pci_devemu *pe = pdi->pi_d; 447 uint64_t offset; 448 int bidx = (int) arg2; 449 450 assert(bidx <= PCI_BARMAX); 451 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 452 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 453 assert(addr >= pdi->pi_bar[bidx].addr && 454 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 455 456 offset = addr - pdi->pi_bar[bidx].addr; 457 458 if (dir == MEM_F_WRITE) { 459 if (size == 8) { 460 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 461 4, *val & 0xffffffff); 462 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 463 4, *val >> 32); 464 } else { 465 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 466 size, *val); 467 } 468 } else { 469 if (size == 8) { 470 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 471 offset, 4); 472 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 473 offset + 4, 4) << 32; 474 } else { 475 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 476 offset, size); 477 } 478 } 479 480 return (0); 481 } 482 483 484 static int 485 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 486 uint64_t *addr) 487 { 488 uint64_t base; 489 490 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 491 492 base = roundup2(*baseptr, size); 493 494 if (base + size <= limit) { 495 *addr = base; 496 *baseptr = base + size; 497 return (0); 498 } else 499 return (-1); 500 } 501 502 /* 503 * Register (or unregister) the MMIO or I/O region associated with the BAR 504 * register 'idx' of an emulated pci device. 505 */ 506 static void 507 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 508 { 509 struct pci_devemu *pe; 510 int error; 511 struct inout_port iop; 512 struct mem_range mr; 513 514 pe = pi->pi_d; 515 switch (pi->pi_bar[idx].type) { 516 case PCIBAR_IO: 517 bzero(&iop, sizeof(struct inout_port)); 518 iop.name = pi->pi_name; 519 iop.port = pi->pi_bar[idx].addr; 520 iop.size = pi->pi_bar[idx].size; 521 if (registration) { 522 iop.flags = IOPORT_F_INOUT; 523 iop.handler = pci_emul_io_handler; 524 iop.arg = pi; 525 error = register_inout(&iop); 526 } else 527 error = unregister_inout(&iop); 528 if (pe->pe_baraddr != NULL) 529 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 530 pi->pi_bar[idx].addr); 531 break; 532 case PCIBAR_MEM32: 533 case PCIBAR_MEM64: 534 bzero(&mr, sizeof(struct mem_range)); 535 mr.name = pi->pi_name; 536 mr.base = pi->pi_bar[idx].addr; 537 mr.size = pi->pi_bar[idx].size; 538 if (registration) { 539 mr.flags = MEM_F_RW; 540 mr.handler = pci_emul_mem_handler; 541 mr.arg1 = pi; 542 mr.arg2 = idx; 543 error = register_mem(&mr); 544 } else 545 error = unregister_mem(&mr); 546 if (pe->pe_baraddr != NULL) 547 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 548 pi->pi_bar[idx].addr); 549 break; 550 default: 551 error = EINVAL; 552 break; 553 } 554 assert(error == 0); 555 } 556 557 static void 558 unregister_bar(struct pci_devinst *pi, int idx) 559 { 560 561 modify_bar_registration(pi, idx, 0); 562 } 563 564 static void 565 register_bar(struct pci_devinst *pi, int idx) 566 { 567 568 modify_bar_registration(pi, idx, 1); 569 } 570 571 /* Are we decoding i/o port accesses for the emulated pci device? */ 572 static int 573 porten(struct pci_devinst *pi) 574 { 575 uint16_t cmd; 576 577 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 578 579 return (cmd & PCIM_CMD_PORTEN); 580 } 581 582 /* Are we decoding memory accesses for the emulated pci device? */ 583 static int 584 memen(struct pci_devinst *pi) 585 { 586 uint16_t cmd; 587 588 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 589 590 return (cmd & PCIM_CMD_MEMEN); 591 } 592 593 /* 594 * Update the MMIO or I/O address that is decoded by the BAR register. 595 * 596 * If the pci device has enabled the address space decoding then intercept 597 * the address range decoded by the BAR register. 598 */ 599 static void 600 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 601 { 602 int decode; 603 604 if (pi->pi_bar[idx].type == PCIBAR_IO) 605 decode = porten(pi); 606 else 607 decode = memen(pi); 608 609 if (decode) 610 unregister_bar(pi, idx); 611 612 switch (type) { 613 case PCIBAR_IO: 614 case PCIBAR_MEM32: 615 pi->pi_bar[idx].addr = addr; 616 break; 617 case PCIBAR_MEM64: 618 pi->pi_bar[idx].addr &= ~0xffffffffUL; 619 pi->pi_bar[idx].addr |= addr; 620 break; 621 case PCIBAR_MEMHI64: 622 pi->pi_bar[idx].addr &= 0xffffffff; 623 pi->pi_bar[idx].addr |= addr; 624 break; 625 default: 626 assert(0); 627 } 628 629 if (decode) 630 register_bar(pi, idx); 631 } 632 633 int 634 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 635 uint64_t size) 636 { 637 int error; 638 uint64_t *baseptr, limit, addr, mask, lobits, bar; 639 uint16_t cmd, enbit; 640 641 assert(idx >= 0 && idx <= PCI_BARMAX); 642 643 if ((size & (size - 1)) != 0) 644 size = 1UL << flsl(size); /* round up to a power of 2 */ 645 646 /* Enforce minimum BAR sizes required by the PCI standard */ 647 if (type == PCIBAR_IO) { 648 if (size < 4) 649 size = 4; 650 } else { 651 if (size < 16) 652 size = 16; 653 } 654 655 switch (type) { 656 case PCIBAR_NONE: 657 baseptr = NULL; 658 addr = mask = lobits = enbit = 0; 659 break; 660 case PCIBAR_IO: 661 baseptr = &pci_emul_iobase; 662 limit = PCI_EMUL_IOLIMIT; 663 mask = PCIM_BAR_IO_BASE; 664 lobits = PCIM_BAR_IO_SPACE; 665 enbit = PCIM_CMD_PORTEN; 666 break; 667 case PCIBAR_MEM64: 668 /* 669 * XXX 670 * Some drivers do not work well if the 64-bit BAR is allocated 671 * above 4GB. Allow for this by allocating small requests under 672 * 4GB unless then allocation size is larger than some arbitrary 673 * number (128MB currently). 674 */ 675 if (size > 128 * 1024 * 1024) { 676 baseptr = &pci_emul_membase64; 677 limit = pci_emul_memlim64; 678 mask = PCIM_BAR_MEM_BASE; 679 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 680 PCIM_BAR_MEM_PREFETCH; 681 } else { 682 baseptr = &pci_emul_membase32; 683 limit = PCI_EMUL_MEMLIMIT32; 684 mask = PCIM_BAR_MEM_BASE; 685 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 686 } 687 enbit = PCIM_CMD_MEMEN; 688 break; 689 case PCIBAR_MEM32: 690 baseptr = &pci_emul_membase32; 691 limit = PCI_EMUL_MEMLIMIT32; 692 mask = PCIM_BAR_MEM_BASE; 693 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 694 enbit = PCIM_CMD_MEMEN; 695 break; 696 default: 697 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 698 assert(0); 699 } 700 701 if (baseptr != NULL) { 702 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 703 if (error != 0) 704 return (error); 705 } 706 707 pdi->pi_bar[idx].type = type; 708 pdi->pi_bar[idx].addr = addr; 709 pdi->pi_bar[idx].size = size; 710 /* 711 * passthru devices are using same lobits as physical device they set 712 * this property 713 */ 714 if (pdi->pi_bar[idx].lobits != 0) { 715 lobits = pdi->pi_bar[idx].lobits; 716 } else { 717 pdi->pi_bar[idx].lobits = lobits; 718 } 719 720 /* Initialize the BAR register in config space */ 721 bar = (addr & mask) | lobits; 722 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 723 724 if (type == PCIBAR_MEM64) { 725 assert(idx + 1 <= PCI_BARMAX); 726 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 727 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 728 } 729 730 cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 731 if ((cmd & enbit) != enbit) 732 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 733 register_bar(pdi, idx); 734 735 return (0); 736 } 737 738 #define CAP_START_OFFSET 0x40 739 static int 740 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 741 { 742 int i, capoff, reallen; 743 uint16_t sts; 744 745 assert(caplen > 0); 746 747 reallen = roundup2(caplen, 4); /* dword aligned */ 748 749 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 750 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 751 capoff = CAP_START_OFFSET; 752 else 753 capoff = pi->pi_capend + 1; 754 755 /* Check if we have enough space */ 756 if (capoff + reallen > PCI_REGMAX + 1) 757 return (-1); 758 759 /* Set the previous capability pointer */ 760 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 761 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 762 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 763 } else 764 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 765 766 /* Copy the capability */ 767 for (i = 0; i < caplen; i++) 768 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 769 770 /* Set the next capability pointer */ 771 pci_set_cfgdata8(pi, capoff + 1, 0); 772 773 pi->pi_prevcap = capoff; 774 pi->pi_capend = capoff + reallen - 1; 775 return (0); 776 } 777 778 static struct pci_devemu * 779 pci_emul_finddev(const char *name) 780 { 781 struct pci_devemu **pdpp, *pdp; 782 783 SET_FOREACH(pdpp, pci_devemu_set) { 784 pdp = *pdpp; 785 if (!strcmp(pdp->pe_emu, name)) { 786 return (pdp); 787 } 788 } 789 790 return (NULL); 791 } 792 793 static int 794 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 795 int func, struct funcinfo *fi) 796 { 797 struct pci_devinst *pdi; 798 int err; 799 800 pdi = calloc(1, sizeof(struct pci_devinst)); 801 802 pdi->pi_vmctx = ctx; 803 pdi->pi_bus = bus; 804 pdi->pi_slot = slot; 805 pdi->pi_func = func; 806 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 807 pdi->pi_lintr.pin = 0; 808 pdi->pi_lintr.state = IDLE; 809 pdi->pi_lintr.pirq_pin = 0; 810 pdi->pi_lintr.ioapic_irq = 0; 811 pdi->pi_d = pde; 812 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 813 814 /* Disable legacy interrupts */ 815 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 816 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 817 818 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 819 820 err = (*pde->pe_init)(ctx, pdi, fi->fi_config); 821 if (err == 0) 822 fi->fi_devi = pdi; 823 else 824 free(pdi); 825 826 return (err); 827 } 828 829 void 830 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 831 { 832 int mmc; 833 834 /* Number of msi messages must be a power of 2 between 1 and 32 */ 835 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 836 mmc = ffs(msgnum) - 1; 837 838 bzero(msicap, sizeof(struct msicap)); 839 msicap->capid = PCIY_MSI; 840 msicap->nextptr = nextptr; 841 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 842 } 843 844 int 845 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 846 { 847 struct msicap msicap; 848 849 pci_populate_msicap(&msicap, msgnum, 0); 850 851 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 852 } 853 854 static void 855 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 856 uint32_t msix_tab_size) 857 { 858 859 assert(msix_tab_size % 4096 == 0); 860 861 bzero(msixcap, sizeof(struct msixcap)); 862 msixcap->capid = PCIY_MSIX; 863 864 /* 865 * Message Control Register, all fields set to 866 * zero except for the Table Size. 867 * Note: Table size N is encoded as N-1 868 */ 869 msixcap->msgctrl = msgnum - 1; 870 871 /* 872 * MSI-X BAR setup: 873 * - MSI-X table start at offset 0 874 * - PBA table starts at a 4K aligned offset after the MSI-X table 875 */ 876 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 877 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 878 } 879 880 static void 881 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 882 { 883 int i, table_size; 884 885 assert(table_entries > 0); 886 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 887 888 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 889 pi->pi_msix.table = calloc(1, table_size); 890 891 /* set mask bit of vector control register */ 892 for (i = 0; i < table_entries; i++) 893 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 894 } 895 896 int 897 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 898 { 899 uint32_t tab_size; 900 struct msixcap msixcap; 901 902 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 903 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 904 905 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 906 907 /* Align table size to nearest 4K */ 908 tab_size = roundup2(tab_size, 4096); 909 910 pi->pi_msix.table_bar = barnum; 911 pi->pi_msix.pba_bar = barnum; 912 pi->pi_msix.table_offset = 0; 913 pi->pi_msix.table_count = msgnum; 914 pi->pi_msix.pba_offset = tab_size; 915 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 916 917 pci_msix_table_init(pi, msgnum); 918 919 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 920 921 /* allocate memory for MSI-X Table and PBA */ 922 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 923 tab_size + pi->pi_msix.pba_size); 924 925 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 926 sizeof(msixcap))); 927 } 928 929 static void 930 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 931 int bytes, uint32_t val) 932 { 933 uint16_t msgctrl, rwmask; 934 int off; 935 936 off = offset - capoff; 937 /* Message Control Register */ 938 if (off == 2 && bytes == 2) { 939 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 940 msgctrl = pci_get_cfgdata16(pi, offset); 941 msgctrl &= ~rwmask; 942 msgctrl |= val & rwmask; 943 val = msgctrl; 944 945 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 946 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 947 pci_lintr_update(pi); 948 } 949 950 CFGWRITE(pi, offset, val, bytes); 951 } 952 953 static void 954 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 955 int bytes, uint32_t val) 956 { 957 uint16_t msgctrl, rwmask, msgdata, mme; 958 uint32_t addrlo; 959 960 /* 961 * If guest is writing to the message control register make sure 962 * we do not overwrite read-only fields. 963 */ 964 if ((offset - capoff) == 2 && bytes == 2) { 965 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 966 msgctrl = pci_get_cfgdata16(pi, offset); 967 msgctrl &= ~rwmask; 968 msgctrl |= val & rwmask; 969 val = msgctrl; 970 } 971 CFGWRITE(pi, offset, val, bytes); 972 973 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 974 addrlo = pci_get_cfgdata32(pi, capoff + 4); 975 if (msgctrl & PCIM_MSICTRL_64BIT) 976 msgdata = pci_get_cfgdata16(pi, capoff + 12); 977 else 978 msgdata = pci_get_cfgdata16(pi, capoff + 8); 979 980 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 981 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 982 if (pi->pi_msi.enabled) { 983 pi->pi_msi.addr = addrlo; 984 pi->pi_msi.msg_data = msgdata; 985 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 986 } else { 987 pi->pi_msi.maxmsgnum = 0; 988 } 989 pci_lintr_update(pi); 990 } 991 992 void 993 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 994 int bytes, uint32_t val) 995 { 996 997 /* XXX don't write to the readonly parts */ 998 CFGWRITE(pi, offset, val, bytes); 999 } 1000 1001 #define PCIECAP_VERSION 0x2 1002 int 1003 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 1004 { 1005 int err; 1006 struct pciecap pciecap; 1007 1008 bzero(&pciecap, sizeof(pciecap)); 1009 1010 /* 1011 * Use the integrated endpoint type for endpoints on a root complex bus. 1012 * 1013 * NB: bhyve currently only supports a single PCI bus that is the root 1014 * complex bus, so all endpoints are integrated. 1015 */ 1016 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 1017 type = PCIEM_TYPE_ROOT_INT_EP; 1018 1019 pciecap.capid = PCIY_EXPRESS; 1020 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 1021 if (type != PCIEM_TYPE_ROOT_INT_EP) { 1022 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 1023 pciecap.link_status = 0x11; /* gen1, x1 */ 1024 } 1025 1026 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 1027 return (err); 1028 } 1029 1030 /* 1031 * This function assumes that 'coff' is in the capabilities region of the 1032 * config space. A capoff parameter of zero will force a search for the 1033 * offset and type. 1034 */ 1035 void 1036 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 1037 uint8_t capoff, int capid) 1038 { 1039 uint8_t nextoff; 1040 1041 /* Do not allow un-aligned writes */ 1042 if ((offset & (bytes - 1)) != 0) 1043 return; 1044 1045 if (capoff == 0) { 1046 /* Find the capability that we want to update */ 1047 capoff = CAP_START_OFFSET; 1048 while (1) { 1049 nextoff = pci_get_cfgdata8(pi, capoff + 1); 1050 if (nextoff == 0) 1051 break; 1052 if (offset >= capoff && offset < nextoff) 1053 break; 1054 1055 capoff = nextoff; 1056 } 1057 assert(offset >= capoff); 1058 capid = pci_get_cfgdata8(pi, capoff); 1059 } 1060 1061 /* 1062 * Capability ID and Next Capability Pointer are readonly. 1063 * However, some o/s's do 4-byte writes that include these. 1064 * For this case, trim the write back to 2 bytes and adjust 1065 * the data. 1066 */ 1067 if (offset == capoff || offset == capoff + 1) { 1068 if (offset == capoff && bytes == 4) { 1069 bytes = 2; 1070 offset += 2; 1071 val >>= 16; 1072 } else 1073 return; 1074 } 1075 1076 switch (capid) { 1077 case PCIY_MSI: 1078 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1079 break; 1080 case PCIY_MSIX: 1081 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1082 break; 1083 case PCIY_EXPRESS: 1084 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1085 break; 1086 default: 1087 break; 1088 } 1089 } 1090 1091 static int 1092 pci_emul_iscap(struct pci_devinst *pi, int offset) 1093 { 1094 uint16_t sts; 1095 1096 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1097 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1098 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1099 return (1); 1100 } 1101 return (0); 1102 } 1103 1104 static int 1105 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1106 int size, uint64_t *val, void *arg1, long arg2) 1107 { 1108 /* 1109 * Ignore writes; return 0xff's for reads. The mem read code 1110 * will take care of truncating to the correct size. 1111 */ 1112 if (dir == MEM_F_READ) { 1113 *val = 0xffffffffffffffff; 1114 } 1115 1116 return (0); 1117 } 1118 1119 static int 1120 pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1121 int bytes, uint64_t *val, void *arg1, long arg2) 1122 { 1123 int bus, slot, func, coff, in; 1124 1125 coff = addr & 0xfff; 1126 func = (addr >> 12) & 0x7; 1127 slot = (addr >> 15) & 0x1f; 1128 bus = (addr >> 20) & 0xff; 1129 in = (dir == MEM_F_READ); 1130 if (in) 1131 *val = ~0UL; 1132 pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); 1133 return (0); 1134 } 1135 1136 uint64_t 1137 pci_ecfg_base(void) 1138 { 1139 1140 return (PCI_EMUL_ECFG_BASE); 1141 } 1142 1143 #define BUSIO_ROUNDUP 32 1144 #define BUSMEM_ROUNDUP (1024 * 1024) 1145 1146 int 1147 init_pci(struct vmctx *ctx) 1148 { 1149 char node_name[sizeof("pci.XXX.XX.X")]; 1150 struct mem_range mr; 1151 struct pci_devemu *pde; 1152 struct businfo *bi; 1153 struct slotinfo *si; 1154 struct funcinfo *fi; 1155 nvlist_t *nvl; 1156 const char *emul; 1157 size_t lowmem; 1158 uint64_t cpu_maxphysaddr, pci_emul_memresv64; 1159 u_int regs[4]; 1160 int bus, slot, func, error; 1161 1162 pci_emul_iobase = PCI_EMUL_IOBASE; 1163 pci_emul_membase32 = vm_get_lowmem_limit(ctx); 1164 1165 do_cpuid(0x80000008, regs); 1166 cpu_maxphysaddr = 1ULL << (regs[0] & 0xff); 1167 if (cpu_maxphysaddr > VM_MAXUSER_ADDRESS_LA48) 1168 cpu_maxphysaddr = VM_MAXUSER_ADDRESS_LA48; 1169 pci_emul_memresv64 = cpu_maxphysaddr / 4; 1170 /* 1171 * Max power of 2 that is less then 1172 * cpu_maxphysaddr - pci_emul_memresv64. 1173 */ 1174 pci_emul_membase64 = 1ULL << (flsl(cpu_maxphysaddr - 1175 pci_emul_memresv64) - 1); 1176 pci_emul_memlim64 = cpu_maxphysaddr; 1177 1178 for (bus = 0; bus < MAXBUSES; bus++) { 1179 snprintf(node_name, sizeof(node_name), "pci.%d", bus); 1180 nvl = find_config_node(node_name); 1181 if (nvl == NULL) 1182 continue; 1183 pci_businfo[bus] = calloc(1, sizeof(struct businfo)); 1184 bi = pci_businfo[bus]; 1185 1186 /* 1187 * Keep track of the i/o and memory resources allocated to 1188 * this bus. 1189 */ 1190 bi->iobase = pci_emul_iobase; 1191 bi->membase32 = pci_emul_membase32; 1192 bi->membase64 = pci_emul_membase64; 1193 1194 for (slot = 0; slot < MAXSLOTS; slot++) { 1195 si = &bi->slotinfo[slot]; 1196 for (func = 0; func < MAXFUNCS; func++) { 1197 fi = &si->si_funcs[func]; 1198 snprintf(node_name, sizeof(node_name), 1199 "pci.%d.%d.%d", bus, slot, func); 1200 nvl = find_config_node(node_name); 1201 if (nvl == NULL) 1202 continue; 1203 1204 fi->fi_config = nvl; 1205 emul = get_config_value_node(nvl, "device"); 1206 if (emul == NULL) { 1207 EPRINTLN("pci slot %d:%d:%d: missing " 1208 "\"device\" value", bus, slot, func); 1209 return (EINVAL); 1210 } 1211 pde = pci_emul_finddev(emul); 1212 if (pde == NULL) { 1213 EPRINTLN("pci slot %d:%d:%d: unknown " 1214 "device \"%s\"", bus, slot, func, 1215 emul); 1216 return (EINVAL); 1217 } 1218 if (pde->pe_alias != NULL) { 1219 EPRINTLN("pci slot %d:%d:%d: legacy " 1220 "device \"%s\", use \"%s\" instead", 1221 bus, slot, func, emul, 1222 pde->pe_alias); 1223 return (EINVAL); 1224 } 1225 fi->fi_pde = pde; 1226 error = pci_emul_init(ctx, pde, bus, slot, 1227 func, fi); 1228 if (error) 1229 return (error); 1230 } 1231 } 1232 1233 /* 1234 * Add some slop to the I/O and memory resources decoded by 1235 * this bus to give a guest some flexibility if it wants to 1236 * reprogram the BARs. 1237 */ 1238 pci_emul_iobase += BUSIO_ROUNDUP; 1239 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1240 bi->iolimit = pci_emul_iobase; 1241 1242 pci_emul_membase32 += BUSMEM_ROUNDUP; 1243 pci_emul_membase32 = roundup2(pci_emul_membase32, 1244 BUSMEM_ROUNDUP); 1245 bi->memlimit32 = pci_emul_membase32; 1246 1247 pci_emul_membase64 += BUSMEM_ROUNDUP; 1248 pci_emul_membase64 = roundup2(pci_emul_membase64, 1249 BUSMEM_ROUNDUP); 1250 bi->memlimit64 = pci_emul_membase64; 1251 } 1252 1253 /* 1254 * PCI backends are initialized before routing INTx interrupts 1255 * so that LPC devices are able to reserve ISA IRQs before 1256 * routing PIRQ pins. 1257 */ 1258 for (bus = 0; bus < MAXBUSES; bus++) { 1259 if ((bi = pci_businfo[bus]) == NULL) 1260 continue; 1261 1262 for (slot = 0; slot < MAXSLOTS; slot++) { 1263 si = &bi->slotinfo[slot]; 1264 for (func = 0; func < MAXFUNCS; func++) { 1265 fi = &si->si_funcs[func]; 1266 if (fi->fi_devi == NULL) 1267 continue; 1268 pci_lintr_route(fi->fi_devi); 1269 } 1270 } 1271 } 1272 lpc_pirq_routed(); 1273 1274 /* 1275 * The guest physical memory map looks like the following: 1276 * [0, lowmem) guest system memory 1277 * [lowmem, lowmem_limit) memory hole (may be absent) 1278 * [lowmem_limit, 0xE0000000) PCI hole (32-bit BAR allocation) 1279 * [0xE0000000, 0xF0000000) PCI extended config window 1280 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1281 * [4GB, 4GB + highmem) 1282 */ 1283 1284 /* 1285 * Accesses to memory addresses that are not allocated to system 1286 * memory or PCI devices return 0xff's. 1287 */ 1288 lowmem = vm_get_lowmem_size(ctx); 1289 bzero(&mr, sizeof(struct mem_range)); 1290 mr.name = "PCI hole"; 1291 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1292 mr.base = lowmem; 1293 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1294 mr.handler = pci_emul_fallback_handler; 1295 error = register_mem_fallback(&mr); 1296 assert(error == 0); 1297 1298 /* PCI extended config space */ 1299 bzero(&mr, sizeof(struct mem_range)); 1300 mr.name = "PCI ECFG"; 1301 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1302 mr.base = PCI_EMUL_ECFG_BASE; 1303 mr.size = PCI_EMUL_ECFG_SIZE; 1304 mr.handler = pci_emul_ecfg_handler; 1305 error = register_mem(&mr); 1306 assert(error == 0); 1307 1308 return (0); 1309 } 1310 1311 static void 1312 pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1313 void *arg) 1314 { 1315 1316 dsdt_line(" Package ()"); 1317 dsdt_line(" {"); 1318 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1319 dsdt_line(" 0x%02X,", pin - 1); 1320 dsdt_line(" Zero,"); 1321 dsdt_line(" 0x%X", ioapic_irq); 1322 dsdt_line(" },"); 1323 } 1324 1325 static void 1326 pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1327 void *arg) 1328 { 1329 char *name; 1330 1331 name = lpc_pirq_name(pirq_pin); 1332 if (name == NULL) 1333 return; 1334 dsdt_line(" Package ()"); 1335 dsdt_line(" {"); 1336 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1337 dsdt_line(" 0x%02X,", pin - 1); 1338 dsdt_line(" %s,", name); 1339 dsdt_line(" 0x00"); 1340 dsdt_line(" },"); 1341 free(name); 1342 } 1343 1344 /* 1345 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1346 * corresponding to each PCI bus. 1347 */ 1348 static void 1349 pci_bus_write_dsdt(int bus) 1350 { 1351 struct businfo *bi; 1352 struct slotinfo *si; 1353 struct pci_devinst *pi; 1354 int count, func, slot; 1355 1356 /* 1357 * If there are no devices on this 'bus' then just return. 1358 */ 1359 if ((bi = pci_businfo[bus]) == NULL) { 1360 /* 1361 * Bus 0 is special because it decodes the I/O ports used 1362 * for PCI config space access even if there are no devices 1363 * on it. 1364 */ 1365 if (bus != 0) 1366 return; 1367 } 1368 1369 dsdt_line(" Device (PC%02X)", bus); 1370 dsdt_line(" {"); 1371 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1372 1373 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1374 dsdt_line(" {"); 1375 dsdt_line(" Return (0x%08X)", bus); 1376 dsdt_line(" }"); 1377 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1378 dsdt_line(" {"); 1379 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1380 "MaxFixed, PosDecode,"); 1381 dsdt_line(" 0x0000, // Granularity"); 1382 dsdt_line(" 0x%04X, // Range Minimum", bus); 1383 dsdt_line(" 0x%04X, // Range Maximum", bus); 1384 dsdt_line(" 0x0000, // Translation Offset"); 1385 dsdt_line(" 0x0001, // Length"); 1386 dsdt_line(" ,, )"); 1387 1388 if (bus == 0) { 1389 dsdt_indent(3); 1390 dsdt_fixed_ioport(0xCF8, 8); 1391 dsdt_unindent(3); 1392 1393 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1394 "PosDecode, EntireRange,"); 1395 dsdt_line(" 0x0000, // Granularity"); 1396 dsdt_line(" 0x0000, // Range Minimum"); 1397 dsdt_line(" 0x0CF7, // Range Maximum"); 1398 dsdt_line(" 0x0000, // Translation Offset"); 1399 dsdt_line(" 0x0CF8, // Length"); 1400 dsdt_line(" ,, , TypeStatic)"); 1401 1402 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1403 "PosDecode, EntireRange,"); 1404 dsdt_line(" 0x0000, // Granularity"); 1405 dsdt_line(" 0x0D00, // Range Minimum"); 1406 dsdt_line(" 0x%04X, // Range Maximum", 1407 PCI_EMUL_IOBASE - 1); 1408 dsdt_line(" 0x0000, // Translation Offset"); 1409 dsdt_line(" 0x%04X, // Length", 1410 PCI_EMUL_IOBASE - 0x0D00); 1411 dsdt_line(" ,, , TypeStatic)"); 1412 1413 if (bi == NULL) { 1414 dsdt_line(" })"); 1415 goto done; 1416 } 1417 } 1418 assert(bi != NULL); 1419 1420 /* i/o window */ 1421 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1422 "PosDecode, EntireRange,"); 1423 dsdt_line(" 0x0000, // Granularity"); 1424 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1425 dsdt_line(" 0x%04X, // Range Maximum", 1426 bi->iolimit - 1); 1427 dsdt_line(" 0x0000, // Translation Offset"); 1428 dsdt_line(" 0x%04X, // Length", 1429 bi->iolimit - bi->iobase); 1430 dsdt_line(" ,, , TypeStatic)"); 1431 1432 /* mmio window (32-bit) */ 1433 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1434 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1435 dsdt_line(" 0x00000000, // Granularity"); 1436 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1437 dsdt_line(" 0x%08X, // Range Maximum\n", 1438 bi->memlimit32 - 1); 1439 dsdt_line(" 0x00000000, // Translation Offset"); 1440 dsdt_line(" 0x%08X, // Length\n", 1441 bi->memlimit32 - bi->membase32); 1442 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1443 1444 /* mmio window (64-bit) */ 1445 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1446 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1447 dsdt_line(" 0x0000000000000000, // Granularity"); 1448 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1449 dsdt_line(" 0x%016lX, // Range Maximum\n", 1450 bi->memlimit64 - 1); 1451 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1452 dsdt_line(" 0x%016lX, // Length\n", 1453 bi->memlimit64 - bi->membase64); 1454 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1455 dsdt_line(" })"); 1456 1457 count = pci_count_lintr(bus); 1458 if (count != 0) { 1459 dsdt_indent(2); 1460 dsdt_line("Name (PPRT, Package ()"); 1461 dsdt_line("{"); 1462 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1463 dsdt_line("})"); 1464 dsdt_line("Name (APRT, Package ()"); 1465 dsdt_line("{"); 1466 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1467 dsdt_line("})"); 1468 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1469 dsdt_line("{"); 1470 dsdt_line(" If (PICM)"); 1471 dsdt_line(" {"); 1472 dsdt_line(" Return (APRT)"); 1473 dsdt_line(" }"); 1474 dsdt_line(" Else"); 1475 dsdt_line(" {"); 1476 dsdt_line(" Return (PPRT)"); 1477 dsdt_line(" }"); 1478 dsdt_line("}"); 1479 dsdt_unindent(2); 1480 } 1481 1482 dsdt_indent(2); 1483 for (slot = 0; slot < MAXSLOTS; slot++) { 1484 si = &bi->slotinfo[slot]; 1485 for (func = 0; func < MAXFUNCS; func++) { 1486 pi = si->si_funcs[func].fi_devi; 1487 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1488 pi->pi_d->pe_write_dsdt(pi); 1489 } 1490 } 1491 dsdt_unindent(2); 1492 done: 1493 dsdt_line(" }"); 1494 } 1495 1496 void 1497 pci_write_dsdt(void) 1498 { 1499 int bus; 1500 1501 dsdt_indent(1); 1502 dsdt_line("Name (PICM, 0x00)"); 1503 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1504 dsdt_line("{"); 1505 dsdt_line(" Store (Arg0, PICM)"); 1506 dsdt_line("}"); 1507 dsdt_line(""); 1508 dsdt_line("Scope (_SB)"); 1509 dsdt_line("{"); 1510 for (bus = 0; bus < MAXBUSES; bus++) 1511 pci_bus_write_dsdt(bus); 1512 dsdt_line("}"); 1513 dsdt_unindent(1); 1514 } 1515 1516 int 1517 pci_bus_configured(int bus) 1518 { 1519 assert(bus >= 0 && bus < MAXBUSES); 1520 return (pci_businfo[bus] != NULL); 1521 } 1522 1523 int 1524 pci_msi_enabled(struct pci_devinst *pi) 1525 { 1526 return (pi->pi_msi.enabled); 1527 } 1528 1529 int 1530 pci_msi_maxmsgnum(struct pci_devinst *pi) 1531 { 1532 if (pi->pi_msi.enabled) 1533 return (pi->pi_msi.maxmsgnum); 1534 else 1535 return (0); 1536 } 1537 1538 int 1539 pci_msix_enabled(struct pci_devinst *pi) 1540 { 1541 1542 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1543 } 1544 1545 void 1546 pci_generate_msix(struct pci_devinst *pi, int index) 1547 { 1548 struct msix_table_entry *mte; 1549 1550 if (!pci_msix_enabled(pi)) 1551 return; 1552 1553 if (pi->pi_msix.function_mask) 1554 return; 1555 1556 if (index >= pi->pi_msix.table_count) 1557 return; 1558 1559 mte = &pi->pi_msix.table[index]; 1560 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1561 /* XXX Set PBA bit if interrupt is disabled */ 1562 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1563 } 1564 } 1565 1566 void 1567 pci_generate_msi(struct pci_devinst *pi, int index) 1568 { 1569 1570 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1571 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1572 pi->pi_msi.msg_data + index); 1573 } 1574 } 1575 1576 static bool 1577 pci_lintr_permitted(struct pci_devinst *pi) 1578 { 1579 uint16_t cmd; 1580 1581 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1582 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1583 (cmd & PCIM_CMD_INTxDIS))); 1584 } 1585 1586 void 1587 pci_lintr_request(struct pci_devinst *pi) 1588 { 1589 struct businfo *bi; 1590 struct slotinfo *si; 1591 int bestpin, bestcount, pin; 1592 1593 bi = pci_businfo[pi->pi_bus]; 1594 assert(bi != NULL); 1595 1596 /* 1597 * Just allocate a pin from our slot. The pin will be 1598 * assigned IRQs later when interrupts are routed. 1599 */ 1600 si = &bi->slotinfo[pi->pi_slot]; 1601 bestpin = 0; 1602 bestcount = si->si_intpins[0].ii_count; 1603 for (pin = 1; pin < 4; pin++) { 1604 if (si->si_intpins[pin].ii_count < bestcount) { 1605 bestpin = pin; 1606 bestcount = si->si_intpins[pin].ii_count; 1607 } 1608 } 1609 1610 si->si_intpins[bestpin].ii_count++; 1611 pi->pi_lintr.pin = bestpin + 1; 1612 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1613 } 1614 1615 static void 1616 pci_lintr_route(struct pci_devinst *pi) 1617 { 1618 struct businfo *bi; 1619 struct intxinfo *ii; 1620 1621 if (pi->pi_lintr.pin == 0) 1622 return; 1623 1624 bi = pci_businfo[pi->pi_bus]; 1625 assert(bi != NULL); 1626 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1627 1628 /* 1629 * Attempt to allocate an I/O APIC pin for this intpin if one 1630 * is not yet assigned. 1631 */ 1632 if (ii->ii_ioapic_irq == 0) 1633 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1634 assert(ii->ii_ioapic_irq > 0); 1635 1636 /* 1637 * Attempt to allocate a PIRQ pin for this intpin if one is 1638 * not yet assigned. 1639 */ 1640 if (ii->ii_pirq_pin == 0) 1641 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1642 assert(ii->ii_pirq_pin > 0); 1643 1644 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1645 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1646 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1647 } 1648 1649 void 1650 pci_lintr_assert(struct pci_devinst *pi) 1651 { 1652 1653 assert(pi->pi_lintr.pin > 0); 1654 1655 pthread_mutex_lock(&pi->pi_lintr.lock); 1656 if (pi->pi_lintr.state == IDLE) { 1657 if (pci_lintr_permitted(pi)) { 1658 pi->pi_lintr.state = ASSERTED; 1659 pci_irq_assert(pi); 1660 } else 1661 pi->pi_lintr.state = PENDING; 1662 } 1663 pthread_mutex_unlock(&pi->pi_lintr.lock); 1664 } 1665 1666 void 1667 pci_lintr_deassert(struct pci_devinst *pi) 1668 { 1669 1670 assert(pi->pi_lintr.pin > 0); 1671 1672 pthread_mutex_lock(&pi->pi_lintr.lock); 1673 if (pi->pi_lintr.state == ASSERTED) { 1674 pi->pi_lintr.state = IDLE; 1675 pci_irq_deassert(pi); 1676 } else if (pi->pi_lintr.state == PENDING) 1677 pi->pi_lintr.state = IDLE; 1678 pthread_mutex_unlock(&pi->pi_lintr.lock); 1679 } 1680 1681 static void 1682 pci_lintr_update(struct pci_devinst *pi) 1683 { 1684 1685 pthread_mutex_lock(&pi->pi_lintr.lock); 1686 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1687 pci_irq_deassert(pi); 1688 pi->pi_lintr.state = PENDING; 1689 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1690 pi->pi_lintr.state = ASSERTED; 1691 pci_irq_assert(pi); 1692 } 1693 pthread_mutex_unlock(&pi->pi_lintr.lock); 1694 } 1695 1696 int 1697 pci_count_lintr(int bus) 1698 { 1699 int count, slot, pin; 1700 struct slotinfo *slotinfo; 1701 1702 count = 0; 1703 if (pci_businfo[bus] != NULL) { 1704 for (slot = 0; slot < MAXSLOTS; slot++) { 1705 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1706 for (pin = 0; pin < 4; pin++) { 1707 if (slotinfo->si_intpins[pin].ii_count != 0) 1708 count++; 1709 } 1710 } 1711 } 1712 return (count); 1713 } 1714 1715 void 1716 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1717 { 1718 struct businfo *bi; 1719 struct slotinfo *si; 1720 struct intxinfo *ii; 1721 int slot, pin; 1722 1723 if ((bi = pci_businfo[bus]) == NULL) 1724 return; 1725 1726 for (slot = 0; slot < MAXSLOTS; slot++) { 1727 si = &bi->slotinfo[slot]; 1728 for (pin = 0; pin < 4; pin++) { 1729 ii = &si->si_intpins[pin]; 1730 if (ii->ii_count != 0) 1731 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1732 ii->ii_ioapic_irq, arg); 1733 } 1734 } 1735 } 1736 1737 /* 1738 * Return 1 if the emulated device in 'slot' is a multi-function device. 1739 * Return 0 otherwise. 1740 */ 1741 static int 1742 pci_emul_is_mfdev(int bus, int slot) 1743 { 1744 struct businfo *bi; 1745 struct slotinfo *si; 1746 int f, numfuncs; 1747 1748 numfuncs = 0; 1749 if ((bi = pci_businfo[bus]) != NULL) { 1750 si = &bi->slotinfo[slot]; 1751 for (f = 0; f < MAXFUNCS; f++) { 1752 if (si->si_funcs[f].fi_devi != NULL) { 1753 numfuncs++; 1754 } 1755 } 1756 } 1757 return (numfuncs > 1); 1758 } 1759 1760 /* 1761 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1762 * whether or not is a multi-function being emulated in the pci 'slot'. 1763 */ 1764 static void 1765 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1766 { 1767 int mfdev; 1768 1769 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1770 mfdev = pci_emul_is_mfdev(bus, slot); 1771 switch (bytes) { 1772 case 1: 1773 case 2: 1774 *rv &= ~PCIM_MFDEV; 1775 if (mfdev) { 1776 *rv |= PCIM_MFDEV; 1777 } 1778 break; 1779 case 4: 1780 *rv &= ~(PCIM_MFDEV << 16); 1781 if (mfdev) { 1782 *rv |= (PCIM_MFDEV << 16); 1783 } 1784 break; 1785 } 1786 } 1787 } 1788 1789 /* 1790 * Update device state in response to changes to the PCI command 1791 * register. 1792 */ 1793 void 1794 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 1795 { 1796 int i; 1797 uint16_t changed, new; 1798 1799 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 1800 changed = old ^ new; 1801 1802 /* 1803 * If the MMIO or I/O address space decoding has changed then 1804 * register/unregister all BARs that decode that address space. 1805 */ 1806 for (i = 0; i <= PCI_BARMAX; i++) { 1807 switch (pi->pi_bar[i].type) { 1808 case PCIBAR_NONE: 1809 case PCIBAR_MEMHI64: 1810 break; 1811 case PCIBAR_IO: 1812 /* I/O address space decoding changed? */ 1813 if (changed & PCIM_CMD_PORTEN) { 1814 if (new & PCIM_CMD_PORTEN) 1815 register_bar(pi, i); 1816 else 1817 unregister_bar(pi, i); 1818 } 1819 break; 1820 case PCIBAR_MEM32: 1821 case PCIBAR_MEM64: 1822 /* MMIO address space decoding changed? */ 1823 if (changed & PCIM_CMD_MEMEN) { 1824 if (new & PCIM_CMD_MEMEN) 1825 register_bar(pi, i); 1826 else 1827 unregister_bar(pi, i); 1828 } 1829 break; 1830 default: 1831 assert(0); 1832 } 1833 } 1834 1835 /* 1836 * If INTx has been unmasked and is pending, assert the 1837 * interrupt. 1838 */ 1839 pci_lintr_update(pi); 1840 } 1841 1842 static void 1843 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 1844 { 1845 int rshift; 1846 uint32_t cmd, old, readonly; 1847 1848 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1849 1850 /* 1851 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 1852 * 1853 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 1854 * 'write 1 to clear'. However these bits are not set to '1' by 1855 * any device emulation so it is simpler to treat them as readonly. 1856 */ 1857 rshift = (coff & 0x3) * 8; 1858 readonly = 0xFFFFF880 >> rshift; 1859 1860 old = CFGREAD(pi, coff, bytes); 1861 new &= ~readonly; 1862 new |= (old & readonly); 1863 CFGWRITE(pi, coff, new, bytes); /* update config */ 1864 1865 pci_emul_cmd_changed(pi, cmd); 1866 } 1867 1868 static void 1869 pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, 1870 int coff, int bytes, uint32_t *eax) 1871 { 1872 struct businfo *bi; 1873 struct slotinfo *si; 1874 struct pci_devinst *pi; 1875 struct pci_devemu *pe; 1876 int idx, needcfg; 1877 uint64_t addr, bar, mask; 1878 1879 if ((bi = pci_businfo[bus]) != NULL) { 1880 si = &bi->slotinfo[slot]; 1881 pi = si->si_funcs[func].fi_devi; 1882 } else 1883 pi = NULL; 1884 1885 /* 1886 * Just return if there is no device at this slot:func or if the 1887 * the guest is doing an un-aligned access. 1888 */ 1889 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 1890 (coff & (bytes - 1)) != 0) { 1891 if (in) 1892 *eax = 0xffffffff; 1893 return; 1894 } 1895 1896 /* 1897 * Ignore all writes beyond the standard config space and return all 1898 * ones on reads. 1899 */ 1900 if (coff >= PCI_REGMAX + 1) { 1901 if (in) { 1902 *eax = 0xffffffff; 1903 /* 1904 * Extended capabilities begin at offset 256 in config 1905 * space. Absence of extended capabilities is signaled 1906 * with all 0s in the extended capability header at 1907 * offset 256. 1908 */ 1909 if (coff <= PCI_REGMAX + 4) 1910 *eax = 0x00000000; 1911 } 1912 return; 1913 } 1914 1915 pe = pi->pi_d; 1916 1917 /* 1918 * Config read 1919 */ 1920 if (in) { 1921 /* Let the device emulation override the default handler */ 1922 if (pe->pe_cfgread != NULL) { 1923 needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, 1924 eax); 1925 } else { 1926 needcfg = 1; 1927 } 1928 1929 if (needcfg) 1930 *eax = CFGREAD(pi, coff, bytes); 1931 1932 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); 1933 } else { 1934 /* Let the device emulation override the default handler */ 1935 if (pe->pe_cfgwrite != NULL && 1936 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1937 return; 1938 1939 /* 1940 * Special handling for write to BAR registers 1941 */ 1942 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1943 /* 1944 * Ignore writes to BAR registers that are not 1945 * 4-byte aligned. 1946 */ 1947 if (bytes != 4 || (coff & 0x3) != 0) 1948 return; 1949 idx = (coff - PCIR_BAR(0)) / 4; 1950 mask = ~(pi->pi_bar[idx].size - 1); 1951 switch (pi->pi_bar[idx].type) { 1952 case PCIBAR_NONE: 1953 pi->pi_bar[idx].addr = bar = 0; 1954 break; 1955 case PCIBAR_IO: 1956 addr = *eax & mask; 1957 addr &= 0xffff; 1958 bar = addr | pi->pi_bar[idx].lobits; 1959 /* 1960 * Register the new BAR value for interception 1961 */ 1962 if (addr != pi->pi_bar[idx].addr) { 1963 update_bar_address(pi, addr, idx, 1964 PCIBAR_IO); 1965 } 1966 break; 1967 case PCIBAR_MEM32: 1968 addr = bar = *eax & mask; 1969 bar |= pi->pi_bar[idx].lobits; 1970 if (addr != pi->pi_bar[idx].addr) { 1971 update_bar_address(pi, addr, idx, 1972 PCIBAR_MEM32); 1973 } 1974 break; 1975 case PCIBAR_MEM64: 1976 addr = bar = *eax & mask; 1977 bar |= pi->pi_bar[idx].lobits; 1978 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1979 update_bar_address(pi, addr, idx, 1980 PCIBAR_MEM64); 1981 } 1982 break; 1983 case PCIBAR_MEMHI64: 1984 mask = ~(pi->pi_bar[idx - 1].size - 1); 1985 addr = ((uint64_t)*eax << 32) & mask; 1986 bar = addr >> 32; 1987 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1988 update_bar_address(pi, addr, idx - 1, 1989 PCIBAR_MEMHI64); 1990 } 1991 break; 1992 default: 1993 assert(0); 1994 } 1995 pci_set_cfgdata32(pi, coff, bar); 1996 1997 } else if (pci_emul_iscap(pi, coff)) { 1998 pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); 1999 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 2000 pci_emul_cmdsts_write(pi, coff, *eax, bytes); 2001 } else { 2002 CFGWRITE(pi, coff, *eax, bytes); 2003 } 2004 } 2005 } 2006 2007 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 2008 2009 static int 2010 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2011 uint32_t *eax, void *arg) 2012 { 2013 uint32_t x; 2014 2015 if (bytes != 4) { 2016 if (in) 2017 *eax = (bytes == 2) ? 0xffff : 0xff; 2018 return (0); 2019 } 2020 2021 if (in) { 2022 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 2023 if (cfgenable) 2024 x |= CONF1_ENABLE; 2025 *eax = x; 2026 } else { 2027 x = *eax; 2028 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 2029 cfgoff = (x & PCI_REGMAX) & ~0x03; 2030 cfgfunc = (x >> 8) & PCI_FUNCMAX; 2031 cfgslot = (x >> 11) & PCI_SLOTMAX; 2032 cfgbus = (x >> 16) & PCI_BUSMAX; 2033 } 2034 2035 return (0); 2036 } 2037 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 2038 2039 static int 2040 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2041 uint32_t *eax, void *arg) 2042 { 2043 int coff; 2044 2045 assert(bytes == 1 || bytes == 2 || bytes == 4); 2046 2047 coff = cfgoff + (port - CONF1_DATA_PORT); 2048 if (cfgenable) { 2049 pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, 2050 eax); 2051 } else { 2052 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 2053 if (in) 2054 *eax = 0xffffffff; 2055 } 2056 return (0); 2057 } 2058 2059 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 2060 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 2061 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 2062 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 2063 2064 #ifdef BHYVE_SNAPSHOT 2065 /* 2066 * Saves/restores PCI device emulated state. Returns 0 on success. 2067 */ 2068 static int 2069 pci_snapshot_pci_dev(struct vm_snapshot_meta *meta) 2070 { 2071 struct pci_devinst *pi; 2072 int i; 2073 int ret; 2074 2075 pi = meta->dev_data; 2076 2077 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done); 2078 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done); 2079 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done); 2080 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done); 2081 2082 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done); 2083 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done); 2084 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done); 2085 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done); 2086 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done); 2087 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done); 2088 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done); 2089 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done); 2090 2091 SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata), 2092 meta, ret, done); 2093 2094 for (i = 0; i < nitems(pi->pi_bar); i++) { 2095 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done); 2096 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done); 2097 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done); 2098 } 2099 2100 /* Restore MSI-X table. */ 2101 for (i = 0; i < pi->pi_msix.table_count; i++) { 2102 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr, 2103 meta, ret, done); 2104 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data, 2105 meta, ret, done); 2106 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control, 2107 meta, ret, done); 2108 } 2109 2110 done: 2111 return (ret); 2112 } 2113 2114 static int 2115 pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde, 2116 struct pci_devinst **pdi) 2117 { 2118 struct businfo *bi; 2119 struct slotinfo *si; 2120 struct funcinfo *fi; 2121 int bus, slot, func; 2122 2123 assert(dev_name != NULL); 2124 assert(pde != NULL); 2125 assert(pdi != NULL); 2126 2127 for (bus = 0; bus < MAXBUSES; bus++) { 2128 if ((bi = pci_businfo[bus]) == NULL) 2129 continue; 2130 2131 for (slot = 0; slot < MAXSLOTS; slot++) { 2132 si = &bi->slotinfo[slot]; 2133 for (func = 0; func < MAXFUNCS; func++) { 2134 fi = &si->si_funcs[func]; 2135 if (fi->fi_pde == NULL) 2136 continue; 2137 if (strcmp(dev_name, fi->fi_pde->pe_emu) != 0) 2138 continue; 2139 2140 *pde = fi->fi_pde; 2141 *pdi = fi->fi_devi; 2142 return (0); 2143 } 2144 } 2145 } 2146 2147 return (EINVAL); 2148 } 2149 2150 int 2151 pci_snapshot(struct vm_snapshot_meta *meta) 2152 { 2153 struct pci_devemu *pde; 2154 struct pci_devinst *pdi; 2155 int ret; 2156 2157 assert(meta->dev_name != NULL); 2158 2159 ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi); 2160 if (ret != 0) { 2161 fprintf(stderr, "%s: no such name: %s\r\n", 2162 __func__, meta->dev_name); 2163 memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); 2164 return (0); 2165 } 2166 2167 meta->dev_data = pdi; 2168 2169 if (pde->pe_snapshot == NULL) { 2170 fprintf(stderr, "%s: not implemented yet for: %s\r\n", 2171 __func__, meta->dev_name); 2172 return (-1); 2173 } 2174 2175 ret = pci_snapshot_pci_dev(meta); 2176 if (ret != 0) { 2177 fprintf(stderr, "%s: failed to snapshot pci dev\r\n", 2178 __func__); 2179 return (-1); 2180 } 2181 2182 ret = (*pde->pe_snapshot)(meta); 2183 2184 return (ret); 2185 } 2186 2187 int 2188 pci_pause(struct vmctx *ctx, const char *dev_name) 2189 { 2190 struct pci_devemu *pde; 2191 struct pci_devinst *pdi; 2192 int ret; 2193 2194 assert(dev_name != NULL); 2195 2196 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2197 if (ret != 0) { 2198 /* 2199 * It is possible to call this function without 2200 * checking that the device is inserted first. 2201 */ 2202 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2203 return (0); 2204 } 2205 2206 if (pde->pe_pause == NULL) { 2207 /* The pause/resume functionality is optional. */ 2208 fprintf(stderr, "%s: not implemented for: %s\n", 2209 __func__, dev_name); 2210 return (0); 2211 } 2212 2213 return (*pde->pe_pause)(ctx, pdi); 2214 } 2215 2216 int 2217 pci_resume(struct vmctx *ctx, const char *dev_name) 2218 { 2219 struct pci_devemu *pde; 2220 struct pci_devinst *pdi; 2221 int ret; 2222 2223 assert(dev_name != NULL); 2224 2225 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2226 if (ret != 0) { 2227 /* 2228 * It is possible to call this function without 2229 * checking that the device is inserted first. 2230 */ 2231 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2232 return (0); 2233 } 2234 2235 if (pde->pe_resume == NULL) { 2236 /* The pause/resume functionality is optional. */ 2237 fprintf(stderr, "%s: not implemented for: %s\n", 2238 __func__, dev_name); 2239 return (0); 2240 } 2241 2242 return (*pde->pe_resume)(ctx, pdi); 2243 } 2244 #endif 2245 2246 #define PCI_EMUL_TEST 2247 #ifdef PCI_EMUL_TEST 2248 /* 2249 * Define a dummy test device 2250 */ 2251 #define DIOSZ 8 2252 #define DMEMSZ 4096 2253 struct pci_emul_dsoftc { 2254 uint8_t ioregs[DIOSZ]; 2255 uint8_t memregs[2][DMEMSZ]; 2256 }; 2257 2258 #define PCI_EMUL_MSI_MSGS 4 2259 #define PCI_EMUL_MSIX_MSGS 16 2260 2261 static int 2262 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) 2263 { 2264 int error; 2265 struct pci_emul_dsoftc *sc; 2266 2267 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2268 2269 pi->pi_arg = sc; 2270 2271 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2272 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2273 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2274 2275 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2276 assert(error == 0); 2277 2278 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2279 assert(error == 0); 2280 2281 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2282 assert(error == 0); 2283 2284 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2285 assert(error == 0); 2286 2287 return (0); 2288 } 2289 2290 static void 2291 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2292 uint64_t offset, int size, uint64_t value) 2293 { 2294 int i; 2295 struct pci_emul_dsoftc *sc = pi->pi_arg; 2296 2297 if (baridx == 0) { 2298 if (offset + size > DIOSZ) { 2299 printf("diow: iow too large, offset %ld size %d\n", 2300 offset, size); 2301 return; 2302 } 2303 2304 if (size == 1) { 2305 sc->ioregs[offset] = value & 0xff; 2306 } else if (size == 2) { 2307 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2308 } else if (size == 4) { 2309 *(uint32_t *)&sc->ioregs[offset] = value; 2310 } else { 2311 printf("diow: iow unknown size %d\n", size); 2312 } 2313 2314 /* 2315 * Special magic value to generate an interrupt 2316 */ 2317 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2318 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2319 2320 if (value == 0xabcdef) { 2321 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2322 pci_generate_msi(pi, i); 2323 } 2324 } 2325 2326 if (baridx == 1 || baridx == 2) { 2327 if (offset + size > DMEMSZ) { 2328 printf("diow: memw too large, offset %ld size %d\n", 2329 offset, size); 2330 return; 2331 } 2332 2333 i = baridx - 1; /* 'memregs' index */ 2334 2335 if (size == 1) { 2336 sc->memregs[i][offset] = value; 2337 } else if (size == 2) { 2338 *(uint16_t *)&sc->memregs[i][offset] = value; 2339 } else if (size == 4) { 2340 *(uint32_t *)&sc->memregs[i][offset] = value; 2341 } else if (size == 8) { 2342 *(uint64_t *)&sc->memregs[i][offset] = value; 2343 } else { 2344 printf("diow: memw unknown size %d\n", size); 2345 } 2346 2347 /* 2348 * magic interrupt ?? 2349 */ 2350 } 2351 2352 if (baridx > 2 || baridx < 0) { 2353 printf("diow: unknown bar idx %d\n", baridx); 2354 } 2355 } 2356 2357 static uint64_t 2358 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2359 uint64_t offset, int size) 2360 { 2361 struct pci_emul_dsoftc *sc = pi->pi_arg; 2362 uint32_t value; 2363 int i; 2364 2365 if (baridx == 0) { 2366 if (offset + size > DIOSZ) { 2367 printf("dior: ior too large, offset %ld size %d\n", 2368 offset, size); 2369 return (0); 2370 } 2371 2372 value = 0; 2373 if (size == 1) { 2374 value = sc->ioregs[offset]; 2375 } else if (size == 2) { 2376 value = *(uint16_t *) &sc->ioregs[offset]; 2377 } else if (size == 4) { 2378 value = *(uint32_t *) &sc->ioregs[offset]; 2379 } else { 2380 printf("dior: ior unknown size %d\n", size); 2381 } 2382 } 2383 2384 if (baridx == 1 || baridx == 2) { 2385 if (offset + size > DMEMSZ) { 2386 printf("dior: memr too large, offset %ld size %d\n", 2387 offset, size); 2388 return (0); 2389 } 2390 2391 i = baridx - 1; /* 'memregs' index */ 2392 2393 if (size == 1) { 2394 value = sc->memregs[i][offset]; 2395 } else if (size == 2) { 2396 value = *(uint16_t *) &sc->memregs[i][offset]; 2397 } else if (size == 4) { 2398 value = *(uint32_t *) &sc->memregs[i][offset]; 2399 } else if (size == 8) { 2400 value = *(uint64_t *) &sc->memregs[i][offset]; 2401 } else { 2402 printf("dior: ior unknown size %d\n", size); 2403 } 2404 } 2405 2406 2407 if (baridx > 2 || baridx < 0) { 2408 printf("dior: unknown bar idx %d\n", baridx); 2409 return (0); 2410 } 2411 2412 return (value); 2413 } 2414 2415 #ifdef BHYVE_SNAPSHOT 2416 int 2417 pci_emul_snapshot(struct vm_snapshot_meta *meta) 2418 { 2419 2420 return (0); 2421 } 2422 #endif 2423 2424 struct pci_devemu pci_dummy = { 2425 .pe_emu = "dummy", 2426 .pe_init = pci_emul_dinit, 2427 .pe_barwrite = pci_emul_diow, 2428 .pe_barread = pci_emul_dior, 2429 #ifdef BHYVE_SNAPSHOT 2430 .pe_snapshot = pci_emul_snapshot, 2431 #endif 2432 }; 2433 PCI_EMUL_SET(pci_dummy); 2434 2435 #endif /* PCI_EMUL_TEST */ 2436