1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker_set.h> 36 37 #include <ctype.h> 38 #include <err.h> 39 #include <errno.h> 40 #include <pthread.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <strings.h> 45 #include <assert.h> 46 #include <stdbool.h> 47 #include <sysexits.h> 48 49 #include <machine/vmm.h> 50 #include <machine/vmm_snapshot.h> 51 #include <vmmapi.h> 52 53 #include "acpi.h" 54 #include "bhyverun.h" 55 #include "config.h" 56 #include "debug.h" 57 #include "inout.h" 58 #include "ioapic.h" 59 #include "mem.h" 60 #include "pci_emul.h" 61 #include "pci_irq.h" 62 #include "pci_lpc.h" 63 64 #define CONF1_ADDR_PORT 0x0cf8 65 #define CONF1_DATA_PORT 0x0cfc 66 67 #define CONF1_ENABLE 0x80000000ul 68 69 #define MAXBUSES (PCI_BUSMAX + 1) 70 #define MAXSLOTS (PCI_SLOTMAX + 1) 71 #define MAXFUNCS (PCI_FUNCMAX + 1) 72 73 #define GB (1024 * 1024 * 1024UL) 74 75 struct funcinfo { 76 nvlist_t *fi_config; 77 struct pci_devemu *fi_pde; 78 struct pci_devinst *fi_devi; 79 }; 80 81 struct intxinfo { 82 int ii_count; 83 int ii_pirq_pin; 84 int ii_ioapic_irq; 85 }; 86 87 struct slotinfo { 88 struct intxinfo si_intpins[4]; 89 struct funcinfo si_funcs[MAXFUNCS]; 90 }; 91 92 struct businfo { 93 uint16_t iobase, iolimit; /* I/O window */ 94 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 95 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 96 struct slotinfo slotinfo[MAXSLOTS]; 97 }; 98 99 static struct businfo *pci_businfo[MAXBUSES]; 100 101 SET_DECLARE(pci_devemu_set, struct pci_devemu); 102 103 static uint64_t pci_emul_iobase; 104 static uint64_t pci_emul_membase32; 105 static uint64_t pci_emul_membase64; 106 static uint64_t pci_emul_memlim64; 107 108 #define PCI_EMUL_IOBASE 0x2000 109 #define PCI_EMUL_IOLIMIT 0x10000 110 111 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 112 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 113 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 114 115 /* 116 * OVMF always uses 0xC0000000 as base address for 32 bit PCI MMIO. Don't 117 * change this address without changing it in OVMF. 118 */ 119 #define PCI_EMUL_MEMBASE32 0xC0000000 120 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 121 #define PCI_EMUL_MEMSIZE64 (32*GB) 122 123 static struct pci_devemu *pci_emul_finddev(const char *name); 124 static void pci_lintr_route(struct pci_devinst *pi); 125 static void pci_lintr_update(struct pci_devinst *pi); 126 static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, 127 int func, int coff, int bytes, uint32_t *val); 128 129 static __inline void 130 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 131 { 132 133 if (bytes == 1) 134 pci_set_cfgdata8(pi, coff, val); 135 else if (bytes == 2) 136 pci_set_cfgdata16(pi, coff, val); 137 else 138 pci_set_cfgdata32(pi, coff, val); 139 } 140 141 static __inline uint32_t 142 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 143 { 144 145 if (bytes == 1) 146 return (pci_get_cfgdata8(pi, coff)); 147 else if (bytes == 2) 148 return (pci_get_cfgdata16(pi, coff)); 149 else 150 return (pci_get_cfgdata32(pi, coff)); 151 } 152 153 /* 154 * I/O access 155 */ 156 157 /* 158 * Slot options are in the form: 159 * 160 * <bus>:<slot>:<func>,<emul>[,<config>] 161 * <slot>[:<func>],<emul>[,<config>] 162 * 163 * slot is 0..31 164 * func is 0..7 165 * emul is a string describing the type of PCI device e.g. virtio-net 166 * config is an optional string, depending on the device, that can be 167 * used for configuration. 168 * Examples are: 169 * 1,virtio-net,tap0 170 * 3:0,dummy 171 */ 172 static void 173 pci_parse_slot_usage(char *aopt) 174 { 175 176 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 177 } 178 179 /* 180 * Helper function to parse a list of comma-separated options where 181 * each option is formatted as "name[=value]". If no value is 182 * provided, the option is treated as a boolean and is given a value 183 * of true. 184 */ 185 int 186 pci_parse_legacy_config(nvlist_t *nvl, const char *opt) 187 { 188 char *config, *name, *tofree, *value; 189 190 if (opt == NULL) 191 return (0); 192 193 config = tofree = strdup(opt); 194 while ((name = strsep(&config, ",")) != NULL) { 195 value = strchr(name, '='); 196 if (value != NULL) { 197 *value = '\0'; 198 value++; 199 set_config_value_node(nvl, name, value); 200 } else 201 set_config_bool_node(nvl, name, true); 202 } 203 free(tofree); 204 return (0); 205 } 206 207 /* 208 * PCI device configuration is stored in MIBs that encode the device's 209 * location: 210 * 211 * pci.<bus>.<slot>.<func> 212 * 213 * Where "bus", "slot", and "func" are all decimal values without 214 * leading zeroes. Each valid device must have a "device" node which 215 * identifies the driver model of the device. 216 * 217 * Device backends can provide a parser for the "config" string. If 218 * a custom parser is not provided, pci_parse_legacy_config() is used 219 * to parse the string. 220 */ 221 int 222 pci_parse_slot(char *opt) 223 { 224 char node_name[sizeof("pci.XXX.XX.X")]; 225 struct pci_devemu *pde; 226 char *emul, *config, *str, *cp; 227 int error, bnum, snum, fnum; 228 nvlist_t *nvl; 229 230 error = -1; 231 str = strdup(opt); 232 233 emul = config = NULL; 234 if ((cp = strchr(str, ',')) != NULL) { 235 *cp = '\0'; 236 emul = cp + 1; 237 if ((cp = strchr(emul, ',')) != NULL) { 238 *cp = '\0'; 239 config = cp + 1; 240 } 241 } else { 242 pci_parse_slot_usage(opt); 243 goto done; 244 } 245 246 /* <bus>:<slot>:<func> */ 247 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 248 bnum = 0; 249 /* <slot>:<func> */ 250 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 251 fnum = 0; 252 /* <slot> */ 253 if (sscanf(str, "%d", &snum) != 1) { 254 snum = -1; 255 } 256 } 257 } 258 259 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 260 fnum < 0 || fnum >= MAXFUNCS) { 261 pci_parse_slot_usage(opt); 262 goto done; 263 } 264 265 pde = pci_emul_finddev(emul); 266 if (pde == NULL) { 267 EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum, 268 fnum, emul); 269 goto done; 270 } 271 272 snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum, 273 fnum); 274 nvl = find_config_node(node_name); 275 if (nvl != NULL) { 276 EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum, 277 fnum); 278 goto done; 279 } 280 nvl = create_config_node(node_name); 281 if (pde->pe_alias != NULL) 282 set_config_value_node(nvl, "device", pde->pe_alias); 283 else 284 set_config_value_node(nvl, "device", pde->pe_emu); 285 286 if (pde->pe_legacy_config != NULL) 287 error = pde->pe_legacy_config(nvl, config); 288 else 289 error = pci_parse_legacy_config(nvl, config); 290 done: 291 free(str); 292 return (error); 293 } 294 295 void 296 pci_print_supported_devices() 297 { 298 struct pci_devemu **pdpp, *pdp; 299 300 SET_FOREACH(pdpp, pci_devemu_set) { 301 pdp = *pdpp; 302 printf("%s\n", pdp->pe_emu); 303 } 304 } 305 306 static int 307 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 308 { 309 310 if (offset < pi->pi_msix.pba_offset) 311 return (0); 312 313 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 314 return (0); 315 } 316 317 return (1); 318 } 319 320 int 321 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 322 uint64_t value) 323 { 324 int msix_entry_offset; 325 int tab_index; 326 char *dest; 327 328 /* support only 4 or 8 byte writes */ 329 if (size != 4 && size != 8) 330 return (-1); 331 332 /* 333 * Return if table index is beyond what device supports 334 */ 335 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 336 if (tab_index >= pi->pi_msix.table_count) 337 return (-1); 338 339 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 340 341 /* support only aligned writes */ 342 if ((msix_entry_offset % size) != 0) 343 return (-1); 344 345 dest = (char *)(pi->pi_msix.table + tab_index); 346 dest += msix_entry_offset; 347 348 if (size == 4) 349 *((uint32_t *)dest) = value; 350 else 351 *((uint64_t *)dest) = value; 352 353 return (0); 354 } 355 356 uint64_t 357 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 358 { 359 char *dest; 360 int msix_entry_offset; 361 int tab_index; 362 uint64_t retval = ~0; 363 364 /* 365 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 366 * table but we also allow 1 byte access to accommodate reads from 367 * ddb. 368 */ 369 if (size != 1 && size != 4 && size != 8) 370 return (retval); 371 372 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 373 374 /* support only aligned reads */ 375 if ((msix_entry_offset % size) != 0) { 376 return (retval); 377 } 378 379 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 380 381 if (tab_index < pi->pi_msix.table_count) { 382 /* valid MSI-X Table access */ 383 dest = (char *)(pi->pi_msix.table + tab_index); 384 dest += msix_entry_offset; 385 386 if (size == 1) 387 retval = *((uint8_t *)dest); 388 else if (size == 4) 389 retval = *((uint32_t *)dest); 390 else 391 retval = *((uint64_t *)dest); 392 } else if (pci_valid_pba_offset(pi, offset)) { 393 /* return 0 for PBA access */ 394 retval = 0; 395 } 396 397 return (retval); 398 } 399 400 int 401 pci_msix_table_bar(struct pci_devinst *pi) 402 { 403 404 if (pi->pi_msix.table != NULL) 405 return (pi->pi_msix.table_bar); 406 else 407 return (-1); 408 } 409 410 int 411 pci_msix_pba_bar(struct pci_devinst *pi) 412 { 413 414 if (pi->pi_msix.table != NULL) 415 return (pi->pi_msix.pba_bar); 416 else 417 return (-1); 418 } 419 420 static int 421 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 422 uint32_t *eax, void *arg) 423 { 424 struct pci_devinst *pdi = arg; 425 struct pci_devemu *pe = pdi->pi_d; 426 uint64_t offset; 427 int i; 428 429 for (i = 0; i <= PCI_BARMAX; i++) { 430 if (pdi->pi_bar[i].type == PCIBAR_IO && 431 port >= pdi->pi_bar[i].addr && 432 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 433 offset = port - pdi->pi_bar[i].addr; 434 if (in) 435 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 436 offset, bytes); 437 else 438 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 439 bytes, *eax); 440 return (0); 441 } 442 } 443 return (-1); 444 } 445 446 static int 447 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 448 int size, uint64_t *val, void *arg1, long arg2) 449 { 450 struct pci_devinst *pdi = arg1; 451 struct pci_devemu *pe = pdi->pi_d; 452 uint64_t offset; 453 int bidx = (int) arg2; 454 455 assert(bidx <= PCI_BARMAX); 456 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 457 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 458 assert(addr >= pdi->pi_bar[bidx].addr && 459 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 460 461 offset = addr - pdi->pi_bar[bidx].addr; 462 463 if (dir == MEM_F_WRITE) { 464 if (size == 8) { 465 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 466 4, *val & 0xffffffff); 467 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 468 4, *val >> 32); 469 } else { 470 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 471 size, *val); 472 } 473 } else { 474 if (size == 8) { 475 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 476 offset, 4); 477 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 478 offset + 4, 4) << 32; 479 } else { 480 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 481 offset, size); 482 } 483 } 484 485 return (0); 486 } 487 488 489 static int 490 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 491 uint64_t *addr) 492 { 493 uint64_t base; 494 495 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 496 497 base = roundup2(*baseptr, size); 498 499 if (base + size <= limit) { 500 *addr = base; 501 *baseptr = base + size; 502 return (0); 503 } else 504 return (-1); 505 } 506 507 /* 508 * Register (or unregister) the MMIO or I/O region associated with the BAR 509 * register 'idx' of an emulated pci device. 510 */ 511 static void 512 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 513 { 514 struct pci_devemu *pe; 515 int error; 516 struct inout_port iop; 517 struct mem_range mr; 518 519 pe = pi->pi_d; 520 switch (pi->pi_bar[idx].type) { 521 case PCIBAR_IO: 522 bzero(&iop, sizeof(struct inout_port)); 523 iop.name = pi->pi_name; 524 iop.port = pi->pi_bar[idx].addr; 525 iop.size = pi->pi_bar[idx].size; 526 if (registration) { 527 iop.flags = IOPORT_F_INOUT; 528 iop.handler = pci_emul_io_handler; 529 iop.arg = pi; 530 error = register_inout(&iop); 531 } else 532 error = unregister_inout(&iop); 533 if (pe->pe_baraddr != NULL) 534 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 535 pi->pi_bar[idx].addr); 536 break; 537 case PCIBAR_MEM32: 538 case PCIBAR_MEM64: 539 bzero(&mr, sizeof(struct mem_range)); 540 mr.name = pi->pi_name; 541 mr.base = pi->pi_bar[idx].addr; 542 mr.size = pi->pi_bar[idx].size; 543 if (registration) { 544 mr.flags = MEM_F_RW; 545 mr.handler = pci_emul_mem_handler; 546 mr.arg1 = pi; 547 mr.arg2 = idx; 548 error = register_mem(&mr); 549 } else 550 error = unregister_mem(&mr); 551 if (pe->pe_baraddr != NULL) 552 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 553 pi->pi_bar[idx].addr); 554 break; 555 default: 556 error = EINVAL; 557 break; 558 } 559 assert(error == 0); 560 } 561 562 static void 563 unregister_bar(struct pci_devinst *pi, int idx) 564 { 565 566 modify_bar_registration(pi, idx, 0); 567 } 568 569 static void 570 register_bar(struct pci_devinst *pi, int idx) 571 { 572 573 modify_bar_registration(pi, idx, 1); 574 } 575 576 /* Are we decoding i/o port accesses for the emulated pci device? */ 577 static int 578 porten(struct pci_devinst *pi) 579 { 580 uint16_t cmd; 581 582 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 583 584 return (cmd & PCIM_CMD_PORTEN); 585 } 586 587 /* Are we decoding memory accesses for the emulated pci device? */ 588 static int 589 memen(struct pci_devinst *pi) 590 { 591 uint16_t cmd; 592 593 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 594 595 return (cmd & PCIM_CMD_MEMEN); 596 } 597 598 /* 599 * Update the MMIO or I/O address that is decoded by the BAR register. 600 * 601 * If the pci device has enabled the address space decoding then intercept 602 * the address range decoded by the BAR register. 603 */ 604 static void 605 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 606 { 607 int decode; 608 609 if (pi->pi_bar[idx].type == PCIBAR_IO) 610 decode = porten(pi); 611 else 612 decode = memen(pi); 613 614 if (decode) 615 unregister_bar(pi, idx); 616 617 switch (type) { 618 case PCIBAR_IO: 619 case PCIBAR_MEM32: 620 pi->pi_bar[idx].addr = addr; 621 break; 622 case PCIBAR_MEM64: 623 pi->pi_bar[idx].addr &= ~0xffffffffUL; 624 pi->pi_bar[idx].addr |= addr; 625 break; 626 case PCIBAR_MEMHI64: 627 pi->pi_bar[idx].addr &= 0xffffffff; 628 pi->pi_bar[idx].addr |= addr; 629 break; 630 default: 631 assert(0); 632 } 633 634 if (decode) 635 register_bar(pi, idx); 636 } 637 638 int 639 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 640 uint64_t size) 641 { 642 int error; 643 uint64_t *baseptr, limit, addr, mask, lobits, bar; 644 uint16_t cmd, enbit; 645 646 assert(idx >= 0 && idx <= PCI_BARMAX); 647 648 if ((size & (size - 1)) != 0) 649 size = 1UL << flsl(size); /* round up to a power of 2 */ 650 651 /* Enforce minimum BAR sizes required by the PCI standard */ 652 if (type == PCIBAR_IO) { 653 if (size < 4) 654 size = 4; 655 } else { 656 if (size < 16) 657 size = 16; 658 } 659 660 switch (type) { 661 case PCIBAR_NONE: 662 baseptr = NULL; 663 addr = mask = lobits = enbit = 0; 664 break; 665 case PCIBAR_IO: 666 baseptr = &pci_emul_iobase; 667 limit = PCI_EMUL_IOLIMIT; 668 mask = PCIM_BAR_IO_BASE; 669 lobits = PCIM_BAR_IO_SPACE; 670 enbit = PCIM_CMD_PORTEN; 671 break; 672 case PCIBAR_MEM64: 673 /* 674 * XXX 675 * Some drivers do not work well if the 64-bit BAR is allocated 676 * above 4GB. Allow for this by allocating small requests under 677 * 4GB unless then allocation size is larger than some arbitrary 678 * number (128MB currently). 679 */ 680 if (size > 128 * 1024 * 1024) { 681 baseptr = &pci_emul_membase64; 682 limit = pci_emul_memlim64; 683 mask = PCIM_BAR_MEM_BASE; 684 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 685 PCIM_BAR_MEM_PREFETCH; 686 } else { 687 baseptr = &pci_emul_membase32; 688 limit = PCI_EMUL_MEMLIMIT32; 689 mask = PCIM_BAR_MEM_BASE; 690 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 691 } 692 enbit = PCIM_CMD_MEMEN; 693 break; 694 case PCIBAR_MEM32: 695 baseptr = &pci_emul_membase32; 696 limit = PCI_EMUL_MEMLIMIT32; 697 mask = PCIM_BAR_MEM_BASE; 698 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 699 enbit = PCIM_CMD_MEMEN; 700 break; 701 default: 702 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 703 assert(0); 704 } 705 706 if (baseptr != NULL) { 707 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 708 if (error != 0) 709 return (error); 710 } 711 712 pdi->pi_bar[idx].type = type; 713 pdi->pi_bar[idx].addr = addr; 714 pdi->pi_bar[idx].size = size; 715 /* 716 * passthru devices are using same lobits as physical device they set 717 * this property 718 */ 719 if (pdi->pi_bar[idx].lobits != 0) { 720 lobits = pdi->pi_bar[idx].lobits; 721 } else { 722 pdi->pi_bar[idx].lobits = lobits; 723 } 724 725 /* Initialize the BAR register in config space */ 726 bar = (addr & mask) | lobits; 727 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 728 729 if (type == PCIBAR_MEM64) { 730 assert(idx + 1 <= PCI_BARMAX); 731 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 732 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 733 } 734 735 cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 736 if ((cmd & enbit) != enbit) 737 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 738 register_bar(pdi, idx); 739 740 return (0); 741 } 742 743 #define CAP_START_OFFSET 0x40 744 static int 745 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 746 { 747 int i, capoff, reallen; 748 uint16_t sts; 749 750 assert(caplen > 0); 751 752 reallen = roundup2(caplen, 4); /* dword aligned */ 753 754 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 755 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 756 capoff = CAP_START_OFFSET; 757 else 758 capoff = pi->pi_capend + 1; 759 760 /* Check if we have enough space */ 761 if (capoff + reallen > PCI_REGMAX + 1) 762 return (-1); 763 764 /* Set the previous capability pointer */ 765 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 766 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 767 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 768 } else 769 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 770 771 /* Copy the capability */ 772 for (i = 0; i < caplen; i++) 773 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 774 775 /* Set the next capability pointer */ 776 pci_set_cfgdata8(pi, capoff + 1, 0); 777 778 pi->pi_prevcap = capoff; 779 pi->pi_capend = capoff + reallen - 1; 780 return (0); 781 } 782 783 static struct pci_devemu * 784 pci_emul_finddev(const char *name) 785 { 786 struct pci_devemu **pdpp, *pdp; 787 788 SET_FOREACH(pdpp, pci_devemu_set) { 789 pdp = *pdpp; 790 if (!strcmp(pdp->pe_emu, name)) { 791 return (pdp); 792 } 793 } 794 795 return (NULL); 796 } 797 798 static int 799 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 800 int func, struct funcinfo *fi) 801 { 802 struct pci_devinst *pdi; 803 int err; 804 805 pdi = calloc(1, sizeof(struct pci_devinst)); 806 807 pdi->pi_vmctx = ctx; 808 pdi->pi_bus = bus; 809 pdi->pi_slot = slot; 810 pdi->pi_func = func; 811 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 812 pdi->pi_lintr.pin = 0; 813 pdi->pi_lintr.state = IDLE; 814 pdi->pi_lintr.pirq_pin = 0; 815 pdi->pi_lintr.ioapic_irq = 0; 816 pdi->pi_d = pde; 817 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 818 819 /* Disable legacy interrupts */ 820 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 821 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 822 823 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 824 825 err = (*pde->pe_init)(ctx, pdi, fi->fi_config); 826 if (err == 0) 827 fi->fi_devi = pdi; 828 else 829 free(pdi); 830 831 return (err); 832 } 833 834 void 835 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 836 { 837 int mmc; 838 839 /* Number of msi messages must be a power of 2 between 1 and 32 */ 840 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 841 mmc = ffs(msgnum) - 1; 842 843 bzero(msicap, sizeof(struct msicap)); 844 msicap->capid = PCIY_MSI; 845 msicap->nextptr = nextptr; 846 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 847 } 848 849 int 850 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 851 { 852 struct msicap msicap; 853 854 pci_populate_msicap(&msicap, msgnum, 0); 855 856 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 857 } 858 859 static void 860 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 861 uint32_t msix_tab_size) 862 { 863 864 assert(msix_tab_size % 4096 == 0); 865 866 bzero(msixcap, sizeof(struct msixcap)); 867 msixcap->capid = PCIY_MSIX; 868 869 /* 870 * Message Control Register, all fields set to 871 * zero except for the Table Size. 872 * Note: Table size N is encoded as N-1 873 */ 874 msixcap->msgctrl = msgnum - 1; 875 876 /* 877 * MSI-X BAR setup: 878 * - MSI-X table start at offset 0 879 * - PBA table starts at a 4K aligned offset after the MSI-X table 880 */ 881 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 882 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 883 } 884 885 static void 886 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 887 { 888 int i, table_size; 889 890 assert(table_entries > 0); 891 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 892 893 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 894 pi->pi_msix.table = calloc(1, table_size); 895 896 /* set mask bit of vector control register */ 897 for (i = 0; i < table_entries; i++) 898 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 899 } 900 901 int 902 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 903 { 904 uint32_t tab_size; 905 struct msixcap msixcap; 906 907 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 908 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 909 910 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 911 912 /* Align table size to nearest 4K */ 913 tab_size = roundup2(tab_size, 4096); 914 915 pi->pi_msix.table_bar = barnum; 916 pi->pi_msix.pba_bar = barnum; 917 pi->pi_msix.table_offset = 0; 918 pi->pi_msix.table_count = msgnum; 919 pi->pi_msix.pba_offset = tab_size; 920 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 921 922 pci_msix_table_init(pi, msgnum); 923 924 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 925 926 /* allocate memory for MSI-X Table and PBA */ 927 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 928 tab_size + pi->pi_msix.pba_size); 929 930 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 931 sizeof(msixcap))); 932 } 933 934 static void 935 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 936 int bytes, uint32_t val) 937 { 938 uint16_t msgctrl, rwmask; 939 int off; 940 941 off = offset - capoff; 942 /* Message Control Register */ 943 if (off == 2 && bytes == 2) { 944 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 945 msgctrl = pci_get_cfgdata16(pi, offset); 946 msgctrl &= ~rwmask; 947 msgctrl |= val & rwmask; 948 val = msgctrl; 949 950 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 951 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 952 pci_lintr_update(pi); 953 } 954 955 CFGWRITE(pi, offset, val, bytes); 956 } 957 958 static void 959 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 960 int bytes, uint32_t val) 961 { 962 uint16_t msgctrl, rwmask, msgdata, mme; 963 uint32_t addrlo; 964 965 /* 966 * If guest is writing to the message control register make sure 967 * we do not overwrite read-only fields. 968 */ 969 if ((offset - capoff) == 2 && bytes == 2) { 970 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 971 msgctrl = pci_get_cfgdata16(pi, offset); 972 msgctrl &= ~rwmask; 973 msgctrl |= val & rwmask; 974 val = msgctrl; 975 } 976 CFGWRITE(pi, offset, val, bytes); 977 978 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 979 addrlo = pci_get_cfgdata32(pi, capoff + 4); 980 if (msgctrl & PCIM_MSICTRL_64BIT) 981 msgdata = pci_get_cfgdata16(pi, capoff + 12); 982 else 983 msgdata = pci_get_cfgdata16(pi, capoff + 8); 984 985 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 986 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 987 if (pi->pi_msi.enabled) { 988 pi->pi_msi.addr = addrlo; 989 pi->pi_msi.msg_data = msgdata; 990 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 991 } else { 992 pi->pi_msi.maxmsgnum = 0; 993 } 994 pci_lintr_update(pi); 995 } 996 997 void 998 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 999 int bytes, uint32_t val) 1000 { 1001 1002 /* XXX don't write to the readonly parts */ 1003 CFGWRITE(pi, offset, val, bytes); 1004 } 1005 1006 #define PCIECAP_VERSION 0x2 1007 int 1008 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 1009 { 1010 int err; 1011 struct pciecap pciecap; 1012 1013 bzero(&pciecap, sizeof(pciecap)); 1014 1015 /* 1016 * Use the integrated endpoint type for endpoints on a root complex bus. 1017 * 1018 * NB: bhyve currently only supports a single PCI bus that is the root 1019 * complex bus, so all endpoints are integrated. 1020 */ 1021 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 1022 type = PCIEM_TYPE_ROOT_INT_EP; 1023 1024 pciecap.capid = PCIY_EXPRESS; 1025 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 1026 if (type != PCIEM_TYPE_ROOT_INT_EP) { 1027 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 1028 pciecap.link_status = 0x11; /* gen1, x1 */ 1029 } 1030 1031 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 1032 return (err); 1033 } 1034 1035 /* 1036 * This function assumes that 'coff' is in the capabilities region of the 1037 * config space. A capoff parameter of zero will force a search for the 1038 * offset and type. 1039 */ 1040 void 1041 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 1042 uint8_t capoff, int capid) 1043 { 1044 uint8_t nextoff; 1045 1046 /* Do not allow un-aligned writes */ 1047 if ((offset & (bytes - 1)) != 0) 1048 return; 1049 1050 if (capoff == 0) { 1051 /* Find the capability that we want to update */ 1052 capoff = CAP_START_OFFSET; 1053 while (1) { 1054 nextoff = pci_get_cfgdata8(pi, capoff + 1); 1055 if (nextoff == 0) 1056 break; 1057 if (offset >= capoff && offset < nextoff) 1058 break; 1059 1060 capoff = nextoff; 1061 } 1062 assert(offset >= capoff); 1063 capid = pci_get_cfgdata8(pi, capoff); 1064 } 1065 1066 /* 1067 * Capability ID and Next Capability Pointer are readonly. 1068 * However, some o/s's do 4-byte writes that include these. 1069 * For this case, trim the write back to 2 bytes and adjust 1070 * the data. 1071 */ 1072 if (offset == capoff || offset == capoff + 1) { 1073 if (offset == capoff && bytes == 4) { 1074 bytes = 2; 1075 offset += 2; 1076 val >>= 16; 1077 } else 1078 return; 1079 } 1080 1081 switch (capid) { 1082 case PCIY_MSI: 1083 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1084 break; 1085 case PCIY_MSIX: 1086 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1087 break; 1088 case PCIY_EXPRESS: 1089 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1090 break; 1091 default: 1092 break; 1093 } 1094 } 1095 1096 static int 1097 pci_emul_iscap(struct pci_devinst *pi, int offset) 1098 { 1099 uint16_t sts; 1100 1101 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1102 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1103 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1104 return (1); 1105 } 1106 return (0); 1107 } 1108 1109 static int 1110 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1111 int size, uint64_t *val, void *arg1, long arg2) 1112 { 1113 /* 1114 * Ignore writes; return 0xff's for reads. The mem read code 1115 * will take care of truncating to the correct size. 1116 */ 1117 if (dir == MEM_F_READ) { 1118 *val = 0xffffffffffffffff; 1119 } 1120 1121 return (0); 1122 } 1123 1124 static int 1125 pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1126 int bytes, uint64_t *val, void *arg1, long arg2) 1127 { 1128 int bus, slot, func, coff, in; 1129 1130 coff = addr & 0xfff; 1131 func = (addr >> 12) & 0x7; 1132 slot = (addr >> 15) & 0x1f; 1133 bus = (addr >> 20) & 0xff; 1134 in = (dir == MEM_F_READ); 1135 if (in) 1136 *val = ~0UL; 1137 pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); 1138 return (0); 1139 } 1140 1141 uint64_t 1142 pci_ecfg_base(void) 1143 { 1144 1145 return (PCI_EMUL_ECFG_BASE); 1146 } 1147 1148 #define BUSIO_ROUNDUP 32 1149 #define BUSMEM_ROUNDUP (1024 * 1024) 1150 1151 int 1152 init_pci(struct vmctx *ctx) 1153 { 1154 char node_name[sizeof("pci.XXX.XX.X")]; 1155 struct mem_range mr; 1156 struct pci_devemu *pde; 1157 struct businfo *bi; 1158 struct slotinfo *si; 1159 struct funcinfo *fi; 1160 nvlist_t *nvl; 1161 const char *emul; 1162 size_t lowmem; 1163 int bus, slot, func; 1164 int error; 1165 1166 if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32) 1167 errx(EX_OSERR, "Invalid lowmem limit"); 1168 1169 pci_emul_iobase = PCI_EMUL_IOBASE; 1170 pci_emul_membase32 = PCI_EMUL_MEMBASE32; 1171 1172 pci_emul_membase64 = 4*GB + vm_get_highmem_size(ctx); 1173 pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64); 1174 pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64; 1175 1176 for (bus = 0; bus < MAXBUSES; bus++) { 1177 snprintf(node_name, sizeof(node_name), "pci.%d", bus); 1178 nvl = find_config_node(node_name); 1179 if (nvl == NULL) 1180 continue; 1181 pci_businfo[bus] = calloc(1, sizeof(struct businfo)); 1182 bi = pci_businfo[bus]; 1183 1184 /* 1185 * Keep track of the i/o and memory resources allocated to 1186 * this bus. 1187 */ 1188 bi->iobase = pci_emul_iobase; 1189 bi->membase32 = pci_emul_membase32; 1190 bi->membase64 = pci_emul_membase64; 1191 1192 for (slot = 0; slot < MAXSLOTS; slot++) { 1193 si = &bi->slotinfo[slot]; 1194 for (func = 0; func < MAXFUNCS; func++) { 1195 fi = &si->si_funcs[func]; 1196 snprintf(node_name, sizeof(node_name), 1197 "pci.%d.%d.%d", bus, slot, func); 1198 nvl = find_config_node(node_name); 1199 if (nvl == NULL) 1200 continue; 1201 1202 fi->fi_config = nvl; 1203 emul = get_config_value_node(nvl, "device"); 1204 if (emul == NULL) { 1205 EPRINTLN("pci slot %d:%d:%d: missing " 1206 "\"device\" value", bus, slot, func); 1207 return (EINVAL); 1208 } 1209 pde = pci_emul_finddev(emul); 1210 if (pde == NULL) { 1211 EPRINTLN("pci slot %d:%d:%d: unknown " 1212 "device \"%s\"", bus, slot, func, 1213 emul); 1214 return (EINVAL); 1215 } 1216 if (pde->pe_alias != NULL) { 1217 EPRINTLN("pci slot %d:%d:%d: legacy " 1218 "device \"%s\", use \"%s\" instead", 1219 bus, slot, func, emul, 1220 pde->pe_alias); 1221 return (EINVAL); 1222 } 1223 fi->fi_pde = pde; 1224 error = pci_emul_init(ctx, pde, bus, slot, 1225 func, fi); 1226 if (error) 1227 return (error); 1228 } 1229 } 1230 1231 /* 1232 * Add some slop to the I/O and memory resources decoded by 1233 * this bus to give a guest some flexibility if it wants to 1234 * reprogram the BARs. 1235 */ 1236 pci_emul_iobase += BUSIO_ROUNDUP; 1237 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1238 bi->iolimit = pci_emul_iobase; 1239 1240 pci_emul_membase32 += BUSMEM_ROUNDUP; 1241 pci_emul_membase32 = roundup2(pci_emul_membase32, 1242 BUSMEM_ROUNDUP); 1243 bi->memlimit32 = pci_emul_membase32; 1244 1245 pci_emul_membase64 += BUSMEM_ROUNDUP; 1246 pci_emul_membase64 = roundup2(pci_emul_membase64, 1247 BUSMEM_ROUNDUP); 1248 bi->memlimit64 = pci_emul_membase64; 1249 } 1250 1251 /* 1252 * PCI backends are initialized before routing INTx interrupts 1253 * so that LPC devices are able to reserve ISA IRQs before 1254 * routing PIRQ pins. 1255 */ 1256 for (bus = 0; bus < MAXBUSES; bus++) { 1257 if ((bi = pci_businfo[bus]) == NULL) 1258 continue; 1259 1260 for (slot = 0; slot < MAXSLOTS; slot++) { 1261 si = &bi->slotinfo[slot]; 1262 for (func = 0; func < MAXFUNCS; func++) { 1263 fi = &si->si_funcs[func]; 1264 if (fi->fi_devi == NULL) 1265 continue; 1266 pci_lintr_route(fi->fi_devi); 1267 } 1268 } 1269 } 1270 lpc_pirq_routed(); 1271 1272 /* 1273 * The guest physical memory map looks like the following: 1274 * [0, lowmem) guest system memory 1275 * [lowmem, 0xC0000000) memory hole (may be absent) 1276 * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation) 1277 * [0xE0000000, 0xF0000000) PCI extended config window 1278 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1279 * [4GB, 4GB + highmem) 1280 */ 1281 1282 /* 1283 * Accesses to memory addresses that are not allocated to system 1284 * memory or PCI devices return 0xff's. 1285 */ 1286 lowmem = vm_get_lowmem_size(ctx); 1287 bzero(&mr, sizeof(struct mem_range)); 1288 mr.name = "PCI hole"; 1289 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1290 mr.base = lowmem; 1291 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1292 mr.handler = pci_emul_fallback_handler; 1293 error = register_mem_fallback(&mr); 1294 assert(error == 0); 1295 1296 /* PCI extended config space */ 1297 bzero(&mr, sizeof(struct mem_range)); 1298 mr.name = "PCI ECFG"; 1299 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1300 mr.base = PCI_EMUL_ECFG_BASE; 1301 mr.size = PCI_EMUL_ECFG_SIZE; 1302 mr.handler = pci_emul_ecfg_handler; 1303 error = register_mem(&mr); 1304 assert(error == 0); 1305 1306 return (0); 1307 } 1308 1309 static void 1310 pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1311 void *arg) 1312 { 1313 1314 dsdt_line(" Package ()"); 1315 dsdt_line(" {"); 1316 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1317 dsdt_line(" 0x%02X,", pin - 1); 1318 dsdt_line(" Zero,"); 1319 dsdt_line(" 0x%X", ioapic_irq); 1320 dsdt_line(" },"); 1321 } 1322 1323 static void 1324 pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1325 void *arg) 1326 { 1327 char *name; 1328 1329 name = lpc_pirq_name(pirq_pin); 1330 if (name == NULL) 1331 return; 1332 dsdt_line(" Package ()"); 1333 dsdt_line(" {"); 1334 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1335 dsdt_line(" 0x%02X,", pin - 1); 1336 dsdt_line(" %s,", name); 1337 dsdt_line(" 0x00"); 1338 dsdt_line(" },"); 1339 free(name); 1340 } 1341 1342 /* 1343 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1344 * corresponding to each PCI bus. 1345 */ 1346 static void 1347 pci_bus_write_dsdt(int bus) 1348 { 1349 struct businfo *bi; 1350 struct slotinfo *si; 1351 struct pci_devinst *pi; 1352 int count, func, slot; 1353 1354 /* 1355 * If there are no devices on this 'bus' then just return. 1356 */ 1357 if ((bi = pci_businfo[bus]) == NULL) { 1358 /* 1359 * Bus 0 is special because it decodes the I/O ports used 1360 * for PCI config space access even if there are no devices 1361 * on it. 1362 */ 1363 if (bus != 0) 1364 return; 1365 } 1366 1367 dsdt_line(" Device (PC%02X)", bus); 1368 dsdt_line(" {"); 1369 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1370 1371 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1372 dsdt_line(" {"); 1373 dsdt_line(" Return (0x%08X)", bus); 1374 dsdt_line(" }"); 1375 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1376 dsdt_line(" {"); 1377 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1378 "MaxFixed, PosDecode,"); 1379 dsdt_line(" 0x0000, // Granularity"); 1380 dsdt_line(" 0x%04X, // Range Minimum", bus); 1381 dsdt_line(" 0x%04X, // Range Maximum", bus); 1382 dsdt_line(" 0x0000, // Translation Offset"); 1383 dsdt_line(" 0x0001, // Length"); 1384 dsdt_line(" ,, )"); 1385 1386 if (bus == 0) { 1387 dsdt_indent(3); 1388 dsdt_fixed_ioport(0xCF8, 8); 1389 dsdt_unindent(3); 1390 1391 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1392 "PosDecode, EntireRange,"); 1393 dsdt_line(" 0x0000, // Granularity"); 1394 dsdt_line(" 0x0000, // Range Minimum"); 1395 dsdt_line(" 0x0CF7, // Range Maximum"); 1396 dsdt_line(" 0x0000, // Translation Offset"); 1397 dsdt_line(" 0x0CF8, // Length"); 1398 dsdt_line(" ,, , TypeStatic)"); 1399 1400 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1401 "PosDecode, EntireRange,"); 1402 dsdt_line(" 0x0000, // Granularity"); 1403 dsdt_line(" 0x0D00, // Range Minimum"); 1404 dsdt_line(" 0x%04X, // Range Maximum", 1405 PCI_EMUL_IOBASE - 1); 1406 dsdt_line(" 0x0000, // Translation Offset"); 1407 dsdt_line(" 0x%04X, // Length", 1408 PCI_EMUL_IOBASE - 0x0D00); 1409 dsdt_line(" ,, , TypeStatic)"); 1410 1411 if (bi == NULL) { 1412 dsdt_line(" })"); 1413 goto done; 1414 } 1415 } 1416 assert(bi != NULL); 1417 1418 /* i/o window */ 1419 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1420 "PosDecode, EntireRange,"); 1421 dsdt_line(" 0x0000, // Granularity"); 1422 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1423 dsdt_line(" 0x%04X, // Range Maximum", 1424 bi->iolimit - 1); 1425 dsdt_line(" 0x0000, // Translation Offset"); 1426 dsdt_line(" 0x%04X, // Length", 1427 bi->iolimit - bi->iobase); 1428 dsdt_line(" ,, , TypeStatic)"); 1429 1430 /* mmio window (32-bit) */ 1431 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1432 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1433 dsdt_line(" 0x00000000, // Granularity"); 1434 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1435 dsdt_line(" 0x%08X, // Range Maximum\n", 1436 bi->memlimit32 - 1); 1437 dsdt_line(" 0x00000000, // Translation Offset"); 1438 dsdt_line(" 0x%08X, // Length\n", 1439 bi->memlimit32 - bi->membase32); 1440 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1441 1442 /* mmio window (64-bit) */ 1443 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1444 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1445 dsdt_line(" 0x0000000000000000, // Granularity"); 1446 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1447 dsdt_line(" 0x%016lX, // Range Maximum\n", 1448 bi->memlimit64 - 1); 1449 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1450 dsdt_line(" 0x%016lX, // Length\n", 1451 bi->memlimit64 - bi->membase64); 1452 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1453 dsdt_line(" })"); 1454 1455 count = pci_count_lintr(bus); 1456 if (count != 0) { 1457 dsdt_indent(2); 1458 dsdt_line("Name (PPRT, Package ()"); 1459 dsdt_line("{"); 1460 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1461 dsdt_line("})"); 1462 dsdt_line("Name (APRT, Package ()"); 1463 dsdt_line("{"); 1464 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1465 dsdt_line("})"); 1466 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1467 dsdt_line("{"); 1468 dsdt_line(" If (PICM)"); 1469 dsdt_line(" {"); 1470 dsdt_line(" Return (APRT)"); 1471 dsdt_line(" }"); 1472 dsdt_line(" Else"); 1473 dsdt_line(" {"); 1474 dsdt_line(" Return (PPRT)"); 1475 dsdt_line(" }"); 1476 dsdt_line("}"); 1477 dsdt_unindent(2); 1478 } 1479 1480 dsdt_indent(2); 1481 for (slot = 0; slot < MAXSLOTS; slot++) { 1482 si = &bi->slotinfo[slot]; 1483 for (func = 0; func < MAXFUNCS; func++) { 1484 pi = si->si_funcs[func].fi_devi; 1485 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1486 pi->pi_d->pe_write_dsdt(pi); 1487 } 1488 } 1489 dsdt_unindent(2); 1490 done: 1491 dsdt_line(" }"); 1492 } 1493 1494 void 1495 pci_write_dsdt(void) 1496 { 1497 int bus; 1498 1499 dsdt_indent(1); 1500 dsdt_line("Name (PICM, 0x00)"); 1501 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1502 dsdt_line("{"); 1503 dsdt_line(" Store (Arg0, PICM)"); 1504 dsdt_line("}"); 1505 dsdt_line(""); 1506 dsdt_line("Scope (_SB)"); 1507 dsdt_line("{"); 1508 for (bus = 0; bus < MAXBUSES; bus++) 1509 pci_bus_write_dsdt(bus); 1510 dsdt_line("}"); 1511 dsdt_unindent(1); 1512 } 1513 1514 int 1515 pci_bus_configured(int bus) 1516 { 1517 assert(bus >= 0 && bus < MAXBUSES); 1518 return (pci_businfo[bus] != NULL); 1519 } 1520 1521 int 1522 pci_msi_enabled(struct pci_devinst *pi) 1523 { 1524 return (pi->pi_msi.enabled); 1525 } 1526 1527 int 1528 pci_msi_maxmsgnum(struct pci_devinst *pi) 1529 { 1530 if (pi->pi_msi.enabled) 1531 return (pi->pi_msi.maxmsgnum); 1532 else 1533 return (0); 1534 } 1535 1536 int 1537 pci_msix_enabled(struct pci_devinst *pi) 1538 { 1539 1540 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1541 } 1542 1543 void 1544 pci_generate_msix(struct pci_devinst *pi, int index) 1545 { 1546 struct msix_table_entry *mte; 1547 1548 if (!pci_msix_enabled(pi)) 1549 return; 1550 1551 if (pi->pi_msix.function_mask) 1552 return; 1553 1554 if (index >= pi->pi_msix.table_count) 1555 return; 1556 1557 mte = &pi->pi_msix.table[index]; 1558 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1559 /* XXX Set PBA bit if interrupt is disabled */ 1560 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1561 } 1562 } 1563 1564 void 1565 pci_generate_msi(struct pci_devinst *pi, int index) 1566 { 1567 1568 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1569 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1570 pi->pi_msi.msg_data + index); 1571 } 1572 } 1573 1574 static bool 1575 pci_lintr_permitted(struct pci_devinst *pi) 1576 { 1577 uint16_t cmd; 1578 1579 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1580 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1581 (cmd & PCIM_CMD_INTxDIS))); 1582 } 1583 1584 void 1585 pci_lintr_request(struct pci_devinst *pi) 1586 { 1587 struct businfo *bi; 1588 struct slotinfo *si; 1589 int bestpin, bestcount, pin; 1590 1591 bi = pci_businfo[pi->pi_bus]; 1592 assert(bi != NULL); 1593 1594 /* 1595 * Just allocate a pin from our slot. The pin will be 1596 * assigned IRQs later when interrupts are routed. 1597 */ 1598 si = &bi->slotinfo[pi->pi_slot]; 1599 bestpin = 0; 1600 bestcount = si->si_intpins[0].ii_count; 1601 for (pin = 1; pin < 4; pin++) { 1602 if (si->si_intpins[pin].ii_count < bestcount) { 1603 bestpin = pin; 1604 bestcount = si->si_intpins[pin].ii_count; 1605 } 1606 } 1607 1608 si->si_intpins[bestpin].ii_count++; 1609 pi->pi_lintr.pin = bestpin + 1; 1610 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1611 } 1612 1613 static void 1614 pci_lintr_route(struct pci_devinst *pi) 1615 { 1616 struct businfo *bi; 1617 struct intxinfo *ii; 1618 1619 if (pi->pi_lintr.pin == 0) 1620 return; 1621 1622 bi = pci_businfo[pi->pi_bus]; 1623 assert(bi != NULL); 1624 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1625 1626 /* 1627 * Attempt to allocate an I/O APIC pin for this intpin if one 1628 * is not yet assigned. 1629 */ 1630 if (ii->ii_ioapic_irq == 0) 1631 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1632 assert(ii->ii_ioapic_irq > 0); 1633 1634 /* 1635 * Attempt to allocate a PIRQ pin for this intpin if one is 1636 * not yet assigned. 1637 */ 1638 if (ii->ii_pirq_pin == 0) 1639 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1640 assert(ii->ii_pirq_pin > 0); 1641 1642 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1643 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1644 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1645 } 1646 1647 void 1648 pci_lintr_assert(struct pci_devinst *pi) 1649 { 1650 1651 assert(pi->pi_lintr.pin > 0); 1652 1653 pthread_mutex_lock(&pi->pi_lintr.lock); 1654 if (pi->pi_lintr.state == IDLE) { 1655 if (pci_lintr_permitted(pi)) { 1656 pi->pi_lintr.state = ASSERTED; 1657 pci_irq_assert(pi); 1658 } else 1659 pi->pi_lintr.state = PENDING; 1660 } 1661 pthread_mutex_unlock(&pi->pi_lintr.lock); 1662 } 1663 1664 void 1665 pci_lintr_deassert(struct pci_devinst *pi) 1666 { 1667 1668 assert(pi->pi_lintr.pin > 0); 1669 1670 pthread_mutex_lock(&pi->pi_lintr.lock); 1671 if (pi->pi_lintr.state == ASSERTED) { 1672 pi->pi_lintr.state = IDLE; 1673 pci_irq_deassert(pi); 1674 } else if (pi->pi_lintr.state == PENDING) 1675 pi->pi_lintr.state = IDLE; 1676 pthread_mutex_unlock(&pi->pi_lintr.lock); 1677 } 1678 1679 static void 1680 pci_lintr_update(struct pci_devinst *pi) 1681 { 1682 1683 pthread_mutex_lock(&pi->pi_lintr.lock); 1684 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1685 pci_irq_deassert(pi); 1686 pi->pi_lintr.state = PENDING; 1687 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1688 pi->pi_lintr.state = ASSERTED; 1689 pci_irq_assert(pi); 1690 } 1691 pthread_mutex_unlock(&pi->pi_lintr.lock); 1692 } 1693 1694 int 1695 pci_count_lintr(int bus) 1696 { 1697 int count, slot, pin; 1698 struct slotinfo *slotinfo; 1699 1700 count = 0; 1701 if (pci_businfo[bus] != NULL) { 1702 for (slot = 0; slot < MAXSLOTS; slot++) { 1703 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1704 for (pin = 0; pin < 4; pin++) { 1705 if (slotinfo->si_intpins[pin].ii_count != 0) 1706 count++; 1707 } 1708 } 1709 } 1710 return (count); 1711 } 1712 1713 void 1714 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1715 { 1716 struct businfo *bi; 1717 struct slotinfo *si; 1718 struct intxinfo *ii; 1719 int slot, pin; 1720 1721 if ((bi = pci_businfo[bus]) == NULL) 1722 return; 1723 1724 for (slot = 0; slot < MAXSLOTS; slot++) { 1725 si = &bi->slotinfo[slot]; 1726 for (pin = 0; pin < 4; pin++) { 1727 ii = &si->si_intpins[pin]; 1728 if (ii->ii_count != 0) 1729 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1730 ii->ii_ioapic_irq, arg); 1731 } 1732 } 1733 } 1734 1735 /* 1736 * Return 1 if the emulated device in 'slot' is a multi-function device. 1737 * Return 0 otherwise. 1738 */ 1739 static int 1740 pci_emul_is_mfdev(int bus, int slot) 1741 { 1742 struct businfo *bi; 1743 struct slotinfo *si; 1744 int f, numfuncs; 1745 1746 numfuncs = 0; 1747 if ((bi = pci_businfo[bus]) != NULL) { 1748 si = &bi->slotinfo[slot]; 1749 for (f = 0; f < MAXFUNCS; f++) { 1750 if (si->si_funcs[f].fi_devi != NULL) { 1751 numfuncs++; 1752 } 1753 } 1754 } 1755 return (numfuncs > 1); 1756 } 1757 1758 /* 1759 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1760 * whether or not is a multi-function being emulated in the pci 'slot'. 1761 */ 1762 static void 1763 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1764 { 1765 int mfdev; 1766 1767 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1768 mfdev = pci_emul_is_mfdev(bus, slot); 1769 switch (bytes) { 1770 case 1: 1771 case 2: 1772 *rv &= ~PCIM_MFDEV; 1773 if (mfdev) { 1774 *rv |= PCIM_MFDEV; 1775 } 1776 break; 1777 case 4: 1778 *rv &= ~(PCIM_MFDEV << 16); 1779 if (mfdev) { 1780 *rv |= (PCIM_MFDEV << 16); 1781 } 1782 break; 1783 } 1784 } 1785 } 1786 1787 /* 1788 * Update device state in response to changes to the PCI command 1789 * register. 1790 */ 1791 void 1792 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 1793 { 1794 int i; 1795 uint16_t changed, new; 1796 1797 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 1798 changed = old ^ new; 1799 1800 /* 1801 * If the MMIO or I/O address space decoding has changed then 1802 * register/unregister all BARs that decode that address space. 1803 */ 1804 for (i = 0; i <= PCI_BARMAX; i++) { 1805 switch (pi->pi_bar[i].type) { 1806 case PCIBAR_NONE: 1807 case PCIBAR_MEMHI64: 1808 break; 1809 case PCIBAR_IO: 1810 /* I/O address space decoding changed? */ 1811 if (changed & PCIM_CMD_PORTEN) { 1812 if (new & PCIM_CMD_PORTEN) 1813 register_bar(pi, i); 1814 else 1815 unregister_bar(pi, i); 1816 } 1817 break; 1818 case PCIBAR_MEM32: 1819 case PCIBAR_MEM64: 1820 /* MMIO address space decoding changed? */ 1821 if (changed & PCIM_CMD_MEMEN) { 1822 if (new & PCIM_CMD_MEMEN) 1823 register_bar(pi, i); 1824 else 1825 unregister_bar(pi, i); 1826 } 1827 break; 1828 default: 1829 assert(0); 1830 } 1831 } 1832 1833 /* 1834 * If INTx has been unmasked and is pending, assert the 1835 * interrupt. 1836 */ 1837 pci_lintr_update(pi); 1838 } 1839 1840 static void 1841 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 1842 { 1843 int rshift; 1844 uint32_t cmd, old, readonly; 1845 1846 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1847 1848 /* 1849 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 1850 * 1851 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 1852 * 'write 1 to clear'. However these bits are not set to '1' by 1853 * any device emulation so it is simpler to treat them as readonly. 1854 */ 1855 rshift = (coff & 0x3) * 8; 1856 readonly = 0xFFFFF880 >> rshift; 1857 1858 old = CFGREAD(pi, coff, bytes); 1859 new &= ~readonly; 1860 new |= (old & readonly); 1861 CFGWRITE(pi, coff, new, bytes); /* update config */ 1862 1863 pci_emul_cmd_changed(pi, cmd); 1864 } 1865 1866 static void 1867 pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, 1868 int coff, int bytes, uint32_t *eax) 1869 { 1870 struct businfo *bi; 1871 struct slotinfo *si; 1872 struct pci_devinst *pi; 1873 struct pci_devemu *pe; 1874 int idx, needcfg; 1875 uint64_t addr, bar, mask; 1876 1877 if ((bi = pci_businfo[bus]) != NULL) { 1878 si = &bi->slotinfo[slot]; 1879 pi = si->si_funcs[func].fi_devi; 1880 } else 1881 pi = NULL; 1882 1883 /* 1884 * Just return if there is no device at this slot:func or if the 1885 * the guest is doing an un-aligned access. 1886 */ 1887 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 1888 (coff & (bytes - 1)) != 0) { 1889 if (in) 1890 *eax = 0xffffffff; 1891 return; 1892 } 1893 1894 /* 1895 * Ignore all writes beyond the standard config space and return all 1896 * ones on reads. 1897 */ 1898 if (coff >= PCI_REGMAX + 1) { 1899 if (in) { 1900 *eax = 0xffffffff; 1901 /* 1902 * Extended capabilities begin at offset 256 in config 1903 * space. Absence of extended capabilities is signaled 1904 * with all 0s in the extended capability header at 1905 * offset 256. 1906 */ 1907 if (coff <= PCI_REGMAX + 4) 1908 *eax = 0x00000000; 1909 } 1910 return; 1911 } 1912 1913 pe = pi->pi_d; 1914 1915 /* 1916 * Config read 1917 */ 1918 if (in) { 1919 /* Let the device emulation override the default handler */ 1920 if (pe->pe_cfgread != NULL) { 1921 needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, 1922 eax); 1923 } else { 1924 needcfg = 1; 1925 } 1926 1927 if (needcfg) 1928 *eax = CFGREAD(pi, coff, bytes); 1929 1930 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); 1931 } else { 1932 /* Let the device emulation override the default handler */ 1933 if (pe->pe_cfgwrite != NULL && 1934 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1935 return; 1936 1937 /* 1938 * Special handling for write to BAR registers 1939 */ 1940 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1941 /* 1942 * Ignore writes to BAR registers that are not 1943 * 4-byte aligned. 1944 */ 1945 if (bytes != 4 || (coff & 0x3) != 0) 1946 return; 1947 idx = (coff - PCIR_BAR(0)) / 4; 1948 mask = ~(pi->pi_bar[idx].size - 1); 1949 switch (pi->pi_bar[idx].type) { 1950 case PCIBAR_NONE: 1951 pi->pi_bar[idx].addr = bar = 0; 1952 break; 1953 case PCIBAR_IO: 1954 addr = *eax & mask; 1955 addr &= 0xffff; 1956 bar = addr | pi->pi_bar[idx].lobits; 1957 /* 1958 * Register the new BAR value for interception 1959 */ 1960 if (addr != pi->pi_bar[idx].addr) { 1961 update_bar_address(pi, addr, idx, 1962 PCIBAR_IO); 1963 } 1964 break; 1965 case PCIBAR_MEM32: 1966 addr = bar = *eax & mask; 1967 bar |= pi->pi_bar[idx].lobits; 1968 if (addr != pi->pi_bar[idx].addr) { 1969 update_bar_address(pi, addr, idx, 1970 PCIBAR_MEM32); 1971 } 1972 break; 1973 case PCIBAR_MEM64: 1974 addr = bar = *eax & mask; 1975 bar |= pi->pi_bar[idx].lobits; 1976 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1977 update_bar_address(pi, addr, idx, 1978 PCIBAR_MEM64); 1979 } 1980 break; 1981 case PCIBAR_MEMHI64: 1982 mask = ~(pi->pi_bar[idx - 1].size - 1); 1983 addr = ((uint64_t)*eax << 32) & mask; 1984 bar = addr >> 32; 1985 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1986 update_bar_address(pi, addr, idx - 1, 1987 PCIBAR_MEMHI64); 1988 } 1989 break; 1990 default: 1991 assert(0); 1992 } 1993 pci_set_cfgdata32(pi, coff, bar); 1994 1995 } else if (pci_emul_iscap(pi, coff)) { 1996 pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); 1997 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 1998 pci_emul_cmdsts_write(pi, coff, *eax, bytes); 1999 } else { 2000 CFGWRITE(pi, coff, *eax, bytes); 2001 } 2002 } 2003 } 2004 2005 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 2006 2007 static int 2008 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2009 uint32_t *eax, void *arg) 2010 { 2011 uint32_t x; 2012 2013 if (bytes != 4) { 2014 if (in) 2015 *eax = (bytes == 2) ? 0xffff : 0xff; 2016 return (0); 2017 } 2018 2019 if (in) { 2020 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 2021 if (cfgenable) 2022 x |= CONF1_ENABLE; 2023 *eax = x; 2024 } else { 2025 x = *eax; 2026 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 2027 cfgoff = (x & PCI_REGMAX) & ~0x03; 2028 cfgfunc = (x >> 8) & PCI_FUNCMAX; 2029 cfgslot = (x >> 11) & PCI_SLOTMAX; 2030 cfgbus = (x >> 16) & PCI_BUSMAX; 2031 } 2032 2033 return (0); 2034 } 2035 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 2036 2037 static int 2038 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2039 uint32_t *eax, void *arg) 2040 { 2041 int coff; 2042 2043 assert(bytes == 1 || bytes == 2 || bytes == 4); 2044 2045 coff = cfgoff + (port - CONF1_DATA_PORT); 2046 if (cfgenable) { 2047 pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, 2048 eax); 2049 } else { 2050 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 2051 if (in) 2052 *eax = 0xffffffff; 2053 } 2054 return (0); 2055 } 2056 2057 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 2058 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 2059 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 2060 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 2061 2062 #ifdef BHYVE_SNAPSHOT 2063 /* 2064 * Saves/restores PCI device emulated state. Returns 0 on success. 2065 */ 2066 static int 2067 pci_snapshot_pci_dev(struct vm_snapshot_meta *meta) 2068 { 2069 struct pci_devinst *pi; 2070 int i; 2071 int ret; 2072 2073 pi = meta->dev_data; 2074 2075 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done); 2076 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done); 2077 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done); 2078 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done); 2079 2080 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done); 2081 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done); 2082 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done); 2083 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done); 2084 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done); 2085 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done); 2086 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done); 2087 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done); 2088 2089 SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata), 2090 meta, ret, done); 2091 2092 for (i = 0; i < nitems(pi->pi_bar); i++) { 2093 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done); 2094 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done); 2095 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done); 2096 } 2097 2098 /* Restore MSI-X table. */ 2099 for (i = 0; i < pi->pi_msix.table_count; i++) { 2100 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr, 2101 meta, ret, done); 2102 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data, 2103 meta, ret, done); 2104 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control, 2105 meta, ret, done); 2106 } 2107 2108 done: 2109 return (ret); 2110 } 2111 2112 static int 2113 pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde, 2114 struct pci_devinst **pdi) 2115 { 2116 struct businfo *bi; 2117 struct slotinfo *si; 2118 struct funcinfo *fi; 2119 int bus, slot, func; 2120 2121 assert(dev_name != NULL); 2122 assert(pde != NULL); 2123 assert(pdi != NULL); 2124 2125 for (bus = 0; bus < MAXBUSES; bus++) { 2126 if ((bi = pci_businfo[bus]) == NULL) 2127 continue; 2128 2129 for (slot = 0; slot < MAXSLOTS; slot++) { 2130 si = &bi->slotinfo[slot]; 2131 for (func = 0; func < MAXFUNCS; func++) { 2132 fi = &si->si_funcs[func]; 2133 if (fi->fi_pde == NULL) 2134 continue; 2135 if (strcmp(dev_name, fi->fi_pde->pe_emu) != 0) 2136 continue; 2137 2138 *pde = fi->fi_pde; 2139 *pdi = fi->fi_devi; 2140 return (0); 2141 } 2142 } 2143 } 2144 2145 return (EINVAL); 2146 } 2147 2148 int 2149 pci_snapshot(struct vm_snapshot_meta *meta) 2150 { 2151 struct pci_devemu *pde; 2152 struct pci_devinst *pdi; 2153 int ret; 2154 2155 assert(meta->dev_name != NULL); 2156 2157 ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi); 2158 if (ret != 0) { 2159 fprintf(stderr, "%s: no such name: %s\r\n", 2160 __func__, meta->dev_name); 2161 memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); 2162 return (0); 2163 } 2164 2165 meta->dev_data = pdi; 2166 2167 if (pde->pe_snapshot == NULL) { 2168 fprintf(stderr, "%s: not implemented yet for: %s\r\n", 2169 __func__, meta->dev_name); 2170 return (-1); 2171 } 2172 2173 ret = pci_snapshot_pci_dev(meta); 2174 if (ret != 0) { 2175 fprintf(stderr, "%s: failed to snapshot pci dev\r\n", 2176 __func__); 2177 return (-1); 2178 } 2179 2180 ret = (*pde->pe_snapshot)(meta); 2181 2182 return (ret); 2183 } 2184 2185 int 2186 pci_pause(struct vmctx *ctx, const char *dev_name) 2187 { 2188 struct pci_devemu *pde; 2189 struct pci_devinst *pdi; 2190 int ret; 2191 2192 assert(dev_name != NULL); 2193 2194 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2195 if (ret != 0) { 2196 /* 2197 * It is possible to call this function without 2198 * checking that the device is inserted first. 2199 */ 2200 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2201 return (0); 2202 } 2203 2204 if (pde->pe_pause == NULL) { 2205 /* The pause/resume functionality is optional. */ 2206 fprintf(stderr, "%s: not implemented for: %s\n", 2207 __func__, dev_name); 2208 return (0); 2209 } 2210 2211 return (*pde->pe_pause)(ctx, pdi); 2212 } 2213 2214 int 2215 pci_resume(struct vmctx *ctx, const char *dev_name) 2216 { 2217 struct pci_devemu *pde; 2218 struct pci_devinst *pdi; 2219 int ret; 2220 2221 assert(dev_name != NULL); 2222 2223 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2224 if (ret != 0) { 2225 /* 2226 * It is possible to call this function without 2227 * checking that the device is inserted first. 2228 */ 2229 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2230 return (0); 2231 } 2232 2233 if (pde->pe_resume == NULL) { 2234 /* The pause/resume functionality is optional. */ 2235 fprintf(stderr, "%s: not implemented for: %s\n", 2236 __func__, dev_name); 2237 return (0); 2238 } 2239 2240 return (*pde->pe_resume)(ctx, pdi); 2241 } 2242 #endif 2243 2244 #define PCI_EMUL_TEST 2245 #ifdef PCI_EMUL_TEST 2246 /* 2247 * Define a dummy test device 2248 */ 2249 #define DIOSZ 8 2250 #define DMEMSZ 4096 2251 struct pci_emul_dsoftc { 2252 uint8_t ioregs[DIOSZ]; 2253 uint8_t memregs[2][DMEMSZ]; 2254 }; 2255 2256 #define PCI_EMUL_MSI_MSGS 4 2257 #define PCI_EMUL_MSIX_MSGS 16 2258 2259 static int 2260 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) 2261 { 2262 int error; 2263 struct pci_emul_dsoftc *sc; 2264 2265 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2266 2267 pi->pi_arg = sc; 2268 2269 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2270 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2271 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2272 2273 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2274 assert(error == 0); 2275 2276 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2277 assert(error == 0); 2278 2279 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2280 assert(error == 0); 2281 2282 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2283 assert(error == 0); 2284 2285 return (0); 2286 } 2287 2288 static void 2289 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2290 uint64_t offset, int size, uint64_t value) 2291 { 2292 int i; 2293 struct pci_emul_dsoftc *sc = pi->pi_arg; 2294 2295 if (baridx == 0) { 2296 if (offset + size > DIOSZ) { 2297 printf("diow: iow too large, offset %ld size %d\n", 2298 offset, size); 2299 return; 2300 } 2301 2302 if (size == 1) { 2303 sc->ioregs[offset] = value & 0xff; 2304 } else if (size == 2) { 2305 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2306 } else if (size == 4) { 2307 *(uint32_t *)&sc->ioregs[offset] = value; 2308 } else { 2309 printf("diow: iow unknown size %d\n", size); 2310 } 2311 2312 /* 2313 * Special magic value to generate an interrupt 2314 */ 2315 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2316 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2317 2318 if (value == 0xabcdef) { 2319 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2320 pci_generate_msi(pi, i); 2321 } 2322 } 2323 2324 if (baridx == 1 || baridx == 2) { 2325 if (offset + size > DMEMSZ) { 2326 printf("diow: memw too large, offset %ld size %d\n", 2327 offset, size); 2328 return; 2329 } 2330 2331 i = baridx - 1; /* 'memregs' index */ 2332 2333 if (size == 1) { 2334 sc->memregs[i][offset] = value; 2335 } else if (size == 2) { 2336 *(uint16_t *)&sc->memregs[i][offset] = value; 2337 } else if (size == 4) { 2338 *(uint32_t *)&sc->memregs[i][offset] = value; 2339 } else if (size == 8) { 2340 *(uint64_t *)&sc->memregs[i][offset] = value; 2341 } else { 2342 printf("diow: memw unknown size %d\n", size); 2343 } 2344 2345 /* 2346 * magic interrupt ?? 2347 */ 2348 } 2349 2350 if (baridx > 2 || baridx < 0) { 2351 printf("diow: unknown bar idx %d\n", baridx); 2352 } 2353 } 2354 2355 static uint64_t 2356 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2357 uint64_t offset, int size) 2358 { 2359 struct pci_emul_dsoftc *sc = pi->pi_arg; 2360 uint32_t value; 2361 int i; 2362 2363 if (baridx == 0) { 2364 if (offset + size > DIOSZ) { 2365 printf("dior: ior too large, offset %ld size %d\n", 2366 offset, size); 2367 return (0); 2368 } 2369 2370 value = 0; 2371 if (size == 1) { 2372 value = sc->ioregs[offset]; 2373 } else if (size == 2) { 2374 value = *(uint16_t *) &sc->ioregs[offset]; 2375 } else if (size == 4) { 2376 value = *(uint32_t *) &sc->ioregs[offset]; 2377 } else { 2378 printf("dior: ior unknown size %d\n", size); 2379 } 2380 } 2381 2382 if (baridx == 1 || baridx == 2) { 2383 if (offset + size > DMEMSZ) { 2384 printf("dior: memr too large, offset %ld size %d\n", 2385 offset, size); 2386 return (0); 2387 } 2388 2389 i = baridx - 1; /* 'memregs' index */ 2390 2391 if (size == 1) { 2392 value = sc->memregs[i][offset]; 2393 } else if (size == 2) { 2394 value = *(uint16_t *) &sc->memregs[i][offset]; 2395 } else if (size == 4) { 2396 value = *(uint32_t *) &sc->memregs[i][offset]; 2397 } else if (size == 8) { 2398 value = *(uint64_t *) &sc->memregs[i][offset]; 2399 } else { 2400 printf("dior: ior unknown size %d\n", size); 2401 } 2402 } 2403 2404 2405 if (baridx > 2 || baridx < 0) { 2406 printf("dior: unknown bar idx %d\n", baridx); 2407 return (0); 2408 } 2409 2410 return (value); 2411 } 2412 2413 #ifdef BHYVE_SNAPSHOT 2414 int 2415 pci_emul_snapshot(struct vm_snapshot_meta *meta) 2416 { 2417 2418 return (0); 2419 } 2420 #endif 2421 2422 struct pci_devemu pci_dummy = { 2423 .pe_emu = "dummy", 2424 .pe_init = pci_emul_dinit, 2425 .pe_barwrite = pci_emul_diow, 2426 .pe_barread = pci_emul_dior, 2427 #ifdef BHYVE_SNAPSHOT 2428 .pe_snapshot = pci_emul_snapshot, 2429 #endif 2430 }; 2431 PCI_EMUL_SET(pci_dummy); 2432 2433 #endif /* PCI_EMUL_TEST */ 2434