1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker_set.h> 36 #include <vm/vm.h> 37 #include <vm/vm_param.h> 38 #include <vm/pmap.h> 39 40 #include <ctype.h> 41 #include <errno.h> 42 #include <pthread.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <strings.h> 47 #include <assert.h> 48 #include <stdbool.h> 49 50 #include <machine/vmm.h> 51 #include <machine/vmm_snapshot.h> 52 #include <machine/cpufunc.h> 53 #include <machine/specialreg.h> 54 #include <vmmapi.h> 55 56 #include "acpi.h" 57 #include "bhyverun.h" 58 #include "config.h" 59 #include "debug.h" 60 #include "inout.h" 61 #include "ioapic.h" 62 #include "mem.h" 63 #include "pci_emul.h" 64 #include "pci_irq.h" 65 #include "pci_lpc.h" 66 67 #define CONF1_ADDR_PORT 0x0cf8 68 #define CONF1_DATA_PORT 0x0cfc 69 70 #define CONF1_ENABLE 0x80000000ul 71 72 #define MAXBUSES (PCI_BUSMAX + 1) 73 #define MAXSLOTS (PCI_SLOTMAX + 1) 74 #define MAXFUNCS (PCI_FUNCMAX + 1) 75 76 struct funcinfo { 77 nvlist_t *fi_config; 78 struct pci_devemu *fi_pde; 79 struct pci_devinst *fi_devi; 80 }; 81 82 struct intxinfo { 83 int ii_count; 84 int ii_pirq_pin; 85 int ii_ioapic_irq; 86 }; 87 88 struct slotinfo { 89 struct intxinfo si_intpins[4]; 90 struct funcinfo si_funcs[MAXFUNCS]; 91 }; 92 93 struct businfo { 94 uint16_t iobase, iolimit; /* I/O window */ 95 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 96 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 97 struct slotinfo slotinfo[MAXSLOTS]; 98 }; 99 100 static struct businfo *pci_businfo[MAXBUSES]; 101 102 SET_DECLARE(pci_devemu_set, struct pci_devemu); 103 104 static uint64_t pci_emul_iobase; 105 static uint64_t pci_emul_membase32; 106 static uint64_t pci_emul_membase64; 107 static uint64_t pci_emul_memlim64; 108 109 #define PCI_EMUL_IOBASE 0x2000 110 #define PCI_EMUL_IOLIMIT 0x10000 111 112 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 113 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 114 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 115 116 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 117 118 static struct pci_devemu *pci_emul_finddev(const char *name); 119 static void pci_lintr_route(struct pci_devinst *pi); 120 static void pci_lintr_update(struct pci_devinst *pi); 121 static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, 122 int func, int coff, int bytes, uint32_t *val); 123 124 static __inline void 125 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 126 { 127 128 if (bytes == 1) 129 pci_set_cfgdata8(pi, coff, val); 130 else if (bytes == 2) 131 pci_set_cfgdata16(pi, coff, val); 132 else 133 pci_set_cfgdata32(pi, coff, val); 134 } 135 136 static __inline uint32_t 137 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 138 { 139 140 if (bytes == 1) 141 return (pci_get_cfgdata8(pi, coff)); 142 else if (bytes == 2) 143 return (pci_get_cfgdata16(pi, coff)); 144 else 145 return (pci_get_cfgdata32(pi, coff)); 146 } 147 148 /* 149 * I/O access 150 */ 151 152 /* 153 * Slot options are in the form: 154 * 155 * <bus>:<slot>:<func>,<emul>[,<config>] 156 * <slot>[:<func>],<emul>[,<config>] 157 * 158 * slot is 0..31 159 * func is 0..7 160 * emul is a string describing the type of PCI device e.g. virtio-net 161 * config is an optional string, depending on the device, that can be 162 * used for configuration. 163 * Examples are: 164 * 1,virtio-net,tap0 165 * 3:0,dummy 166 */ 167 static void 168 pci_parse_slot_usage(char *aopt) 169 { 170 171 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 172 } 173 174 /* 175 * Helper function to parse a list of comma-separated options where 176 * each option is formatted as "name[=value]". If no value is 177 * provided, the option is treated as a boolean and is given a value 178 * of true. 179 */ 180 int 181 pci_parse_legacy_config(nvlist_t *nvl, const char *opt) 182 { 183 char *config, *name, *tofree, *value; 184 185 if (opt == NULL) 186 return (0); 187 188 config = tofree = strdup(opt); 189 while ((name = strsep(&config, ",")) != NULL) { 190 value = strchr(name, '='); 191 if (value != NULL) { 192 *value = '\0'; 193 value++; 194 set_config_value_node(nvl, name, value); 195 } else 196 set_config_bool_node(nvl, name, true); 197 } 198 free(tofree); 199 return (0); 200 } 201 202 /* 203 * PCI device configuration is stored in MIBs that encode the device's 204 * location: 205 * 206 * pci.<bus>.<slot>.<func> 207 * 208 * Where "bus", "slot", and "func" are all decimal values without 209 * leading zeroes. Each valid device must have a "device" node which 210 * identifies the driver model of the device. 211 * 212 * Device backends can provide a parser for the "config" string. If 213 * a custom parser is not provided, pci_parse_legacy_config() is used 214 * to parse the string. 215 */ 216 int 217 pci_parse_slot(char *opt) 218 { 219 char node_name[sizeof("pci.XXX.XX.X")]; 220 struct pci_devemu *pde; 221 char *emul, *config, *str, *cp; 222 int error, bnum, snum, fnum; 223 nvlist_t *nvl; 224 225 error = -1; 226 str = strdup(opt); 227 228 emul = config = NULL; 229 if ((cp = strchr(str, ',')) != NULL) { 230 *cp = '\0'; 231 emul = cp + 1; 232 if ((cp = strchr(emul, ',')) != NULL) { 233 *cp = '\0'; 234 config = cp + 1; 235 } 236 } else { 237 pci_parse_slot_usage(opt); 238 goto done; 239 } 240 241 /* <bus>:<slot>:<func> */ 242 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 243 bnum = 0; 244 /* <slot>:<func> */ 245 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 246 fnum = 0; 247 /* <slot> */ 248 if (sscanf(str, "%d", &snum) != 1) { 249 snum = -1; 250 } 251 } 252 } 253 254 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 255 fnum < 0 || fnum >= MAXFUNCS) { 256 pci_parse_slot_usage(opt); 257 goto done; 258 } 259 260 pde = pci_emul_finddev(emul); 261 if (pde == NULL) { 262 EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum, 263 fnum, emul); 264 goto done; 265 } 266 267 snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum, 268 fnum); 269 nvl = find_config_node(node_name); 270 if (nvl != NULL) { 271 EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum, 272 fnum); 273 goto done; 274 } 275 nvl = create_config_node(node_name); 276 if (pde->pe_alias != NULL) 277 set_config_value_node(nvl, "device", pde->pe_alias); 278 else 279 set_config_value_node(nvl, "device", pde->pe_emu); 280 281 if (pde->pe_legacy_config != NULL) 282 error = pde->pe_legacy_config(nvl, config); 283 else 284 error = pci_parse_legacy_config(nvl, config); 285 done: 286 free(str); 287 return (error); 288 } 289 290 void 291 pci_print_supported_devices() 292 { 293 struct pci_devemu **pdpp, *pdp; 294 295 SET_FOREACH(pdpp, pci_devemu_set) { 296 pdp = *pdpp; 297 printf("%s\n", pdp->pe_emu); 298 } 299 } 300 301 static int 302 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 303 { 304 305 if (offset < pi->pi_msix.pba_offset) 306 return (0); 307 308 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 309 return (0); 310 } 311 312 return (1); 313 } 314 315 int 316 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 317 uint64_t value) 318 { 319 int msix_entry_offset; 320 int tab_index; 321 char *dest; 322 323 /* support only 4 or 8 byte writes */ 324 if (size != 4 && size != 8) 325 return (-1); 326 327 /* 328 * Return if table index is beyond what device supports 329 */ 330 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 331 if (tab_index >= pi->pi_msix.table_count) 332 return (-1); 333 334 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 335 336 /* support only aligned writes */ 337 if ((msix_entry_offset % size) != 0) 338 return (-1); 339 340 dest = (char *)(pi->pi_msix.table + tab_index); 341 dest += msix_entry_offset; 342 343 if (size == 4) 344 *((uint32_t *)dest) = value; 345 else 346 *((uint64_t *)dest) = value; 347 348 return (0); 349 } 350 351 uint64_t 352 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 353 { 354 char *dest; 355 int msix_entry_offset; 356 int tab_index; 357 uint64_t retval = ~0; 358 359 /* 360 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 361 * table but we also allow 1 byte access to accommodate reads from 362 * ddb. 363 */ 364 if (size != 1 && size != 4 && size != 8) 365 return (retval); 366 367 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 368 369 /* support only aligned reads */ 370 if ((msix_entry_offset % size) != 0) { 371 return (retval); 372 } 373 374 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 375 376 if (tab_index < pi->pi_msix.table_count) { 377 /* valid MSI-X Table access */ 378 dest = (char *)(pi->pi_msix.table + tab_index); 379 dest += msix_entry_offset; 380 381 if (size == 1) 382 retval = *((uint8_t *)dest); 383 else if (size == 4) 384 retval = *((uint32_t *)dest); 385 else 386 retval = *((uint64_t *)dest); 387 } else if (pci_valid_pba_offset(pi, offset)) { 388 /* return 0 for PBA access */ 389 retval = 0; 390 } 391 392 return (retval); 393 } 394 395 int 396 pci_msix_table_bar(struct pci_devinst *pi) 397 { 398 399 if (pi->pi_msix.table != NULL) 400 return (pi->pi_msix.table_bar); 401 else 402 return (-1); 403 } 404 405 int 406 pci_msix_pba_bar(struct pci_devinst *pi) 407 { 408 409 if (pi->pi_msix.table != NULL) 410 return (pi->pi_msix.pba_bar); 411 else 412 return (-1); 413 } 414 415 static int 416 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 417 uint32_t *eax, void *arg) 418 { 419 struct pci_devinst *pdi = arg; 420 struct pci_devemu *pe = pdi->pi_d; 421 uint64_t offset; 422 int i; 423 424 for (i = 0; i <= PCI_BARMAX; i++) { 425 if (pdi->pi_bar[i].type == PCIBAR_IO && 426 port >= pdi->pi_bar[i].addr && 427 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 428 offset = port - pdi->pi_bar[i].addr; 429 if (in) 430 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 431 offset, bytes); 432 else 433 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 434 bytes, *eax); 435 return (0); 436 } 437 } 438 return (-1); 439 } 440 441 static int 442 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 443 int size, uint64_t *val, void *arg1, long arg2) 444 { 445 struct pci_devinst *pdi = arg1; 446 struct pci_devemu *pe = pdi->pi_d; 447 uint64_t offset; 448 int bidx = (int) arg2; 449 450 assert(bidx <= PCI_BARMAX); 451 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 452 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 453 assert(addr >= pdi->pi_bar[bidx].addr && 454 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 455 456 offset = addr - pdi->pi_bar[bidx].addr; 457 458 if (dir == MEM_F_WRITE) { 459 if (size == 8) { 460 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 461 4, *val & 0xffffffff); 462 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 463 4, *val >> 32); 464 } else { 465 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 466 size, *val); 467 } 468 } else { 469 if (size == 8) { 470 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 471 offset, 4); 472 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 473 offset + 4, 4) << 32; 474 } else { 475 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 476 offset, size); 477 } 478 } 479 480 return (0); 481 } 482 483 484 static int 485 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 486 uint64_t *addr) 487 { 488 uint64_t base; 489 490 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 491 492 base = roundup2(*baseptr, size); 493 494 if (base + size <= limit) { 495 *addr = base; 496 *baseptr = base + size; 497 return (0); 498 } else 499 return (-1); 500 } 501 502 /* 503 * Register (or unregister) the MMIO or I/O region associated with the BAR 504 * register 'idx' of an emulated pci device. 505 */ 506 static void 507 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 508 { 509 struct pci_devemu *pe; 510 int error; 511 struct inout_port iop; 512 struct mem_range mr; 513 514 pe = pi->pi_d; 515 switch (pi->pi_bar[idx].type) { 516 case PCIBAR_IO: 517 bzero(&iop, sizeof(struct inout_port)); 518 iop.name = pi->pi_name; 519 iop.port = pi->pi_bar[idx].addr; 520 iop.size = pi->pi_bar[idx].size; 521 if (registration) { 522 iop.flags = IOPORT_F_INOUT; 523 iop.handler = pci_emul_io_handler; 524 iop.arg = pi; 525 error = register_inout(&iop); 526 } else 527 error = unregister_inout(&iop); 528 if (pe->pe_baraddr != NULL) 529 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 530 pi->pi_bar[idx].addr); 531 break; 532 case PCIBAR_MEM32: 533 case PCIBAR_MEM64: 534 bzero(&mr, sizeof(struct mem_range)); 535 mr.name = pi->pi_name; 536 mr.base = pi->pi_bar[idx].addr; 537 mr.size = pi->pi_bar[idx].size; 538 if (registration) { 539 mr.flags = MEM_F_RW; 540 mr.handler = pci_emul_mem_handler; 541 mr.arg1 = pi; 542 mr.arg2 = idx; 543 error = register_mem(&mr); 544 } else 545 error = unregister_mem(&mr); 546 if (pe->pe_baraddr != NULL) 547 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 548 pi->pi_bar[idx].addr); 549 break; 550 default: 551 error = EINVAL; 552 break; 553 } 554 assert(error == 0); 555 } 556 557 static void 558 unregister_bar(struct pci_devinst *pi, int idx) 559 { 560 561 modify_bar_registration(pi, idx, 0); 562 } 563 564 static void 565 register_bar(struct pci_devinst *pi, int idx) 566 { 567 568 modify_bar_registration(pi, idx, 1); 569 } 570 571 /* Are we decoding i/o port accesses for the emulated pci device? */ 572 static int 573 porten(struct pci_devinst *pi) 574 { 575 uint16_t cmd; 576 577 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 578 579 return (cmd & PCIM_CMD_PORTEN); 580 } 581 582 /* Are we decoding memory accesses for the emulated pci device? */ 583 static int 584 memen(struct pci_devinst *pi) 585 { 586 uint16_t cmd; 587 588 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 589 590 return (cmd & PCIM_CMD_MEMEN); 591 } 592 593 /* 594 * Update the MMIO or I/O address that is decoded by the BAR register. 595 * 596 * If the pci device has enabled the address space decoding then intercept 597 * the address range decoded by the BAR register. 598 */ 599 static void 600 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 601 { 602 int decode; 603 604 if (pi->pi_bar[idx].type == PCIBAR_IO) 605 decode = porten(pi); 606 else 607 decode = memen(pi); 608 609 if (decode) 610 unregister_bar(pi, idx); 611 612 switch (type) { 613 case PCIBAR_IO: 614 case PCIBAR_MEM32: 615 pi->pi_bar[idx].addr = addr; 616 break; 617 case PCIBAR_MEM64: 618 pi->pi_bar[idx].addr &= ~0xffffffffUL; 619 pi->pi_bar[idx].addr |= addr; 620 break; 621 case PCIBAR_MEMHI64: 622 pi->pi_bar[idx].addr &= 0xffffffff; 623 pi->pi_bar[idx].addr |= addr; 624 break; 625 default: 626 assert(0); 627 } 628 629 if (decode) 630 register_bar(pi, idx); 631 } 632 633 int 634 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 635 uint64_t size) 636 { 637 int error; 638 uint64_t *baseptr, limit, addr, mask, lobits, bar; 639 uint16_t cmd, enbit; 640 641 assert(idx >= 0 && idx <= PCI_BARMAX); 642 643 if ((size & (size - 1)) != 0) 644 size = 1UL << flsl(size); /* round up to a power of 2 */ 645 646 /* Enforce minimum BAR sizes required by the PCI standard */ 647 if (type == PCIBAR_IO) { 648 if (size < 4) 649 size = 4; 650 } else { 651 if (size < 16) 652 size = 16; 653 } 654 655 switch (type) { 656 case PCIBAR_NONE: 657 baseptr = NULL; 658 addr = mask = lobits = enbit = 0; 659 break; 660 case PCIBAR_IO: 661 baseptr = &pci_emul_iobase; 662 limit = PCI_EMUL_IOLIMIT; 663 mask = PCIM_BAR_IO_BASE; 664 lobits = PCIM_BAR_IO_SPACE; 665 enbit = PCIM_CMD_PORTEN; 666 break; 667 case PCIBAR_MEM64: 668 /* 669 * XXX 670 * Some drivers do not work well if the 64-bit BAR is allocated 671 * above 4GB. Allow for this by allocating small requests under 672 * 4GB unless then allocation size is larger than some arbitrary 673 * number (128MB currently). 674 */ 675 if (size > 128 * 1024 * 1024) { 676 baseptr = &pci_emul_membase64; 677 limit = pci_emul_memlim64; 678 mask = PCIM_BAR_MEM_BASE; 679 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 680 PCIM_BAR_MEM_PREFETCH; 681 } else { 682 baseptr = &pci_emul_membase32; 683 limit = PCI_EMUL_MEMLIMIT32; 684 mask = PCIM_BAR_MEM_BASE; 685 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 686 } 687 enbit = PCIM_CMD_MEMEN; 688 break; 689 case PCIBAR_MEM32: 690 baseptr = &pci_emul_membase32; 691 limit = PCI_EMUL_MEMLIMIT32; 692 mask = PCIM_BAR_MEM_BASE; 693 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 694 enbit = PCIM_CMD_MEMEN; 695 break; 696 default: 697 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 698 assert(0); 699 } 700 701 if (baseptr != NULL) { 702 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 703 if (error != 0) 704 return (error); 705 } 706 707 pdi->pi_bar[idx].type = type; 708 pdi->pi_bar[idx].addr = addr; 709 pdi->pi_bar[idx].size = size; 710 711 /* Initialize the BAR register in config space */ 712 bar = (addr & mask) | lobits; 713 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 714 715 if (type == PCIBAR_MEM64) { 716 assert(idx + 1 <= PCI_BARMAX); 717 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 718 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 719 } 720 721 cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 722 if ((cmd & enbit) != enbit) 723 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 724 register_bar(pdi, idx); 725 726 return (0); 727 } 728 729 #define CAP_START_OFFSET 0x40 730 static int 731 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 732 { 733 int i, capoff, reallen; 734 uint16_t sts; 735 736 assert(caplen > 0); 737 738 reallen = roundup2(caplen, 4); /* dword aligned */ 739 740 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 741 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 742 capoff = CAP_START_OFFSET; 743 else 744 capoff = pi->pi_capend + 1; 745 746 /* Check if we have enough space */ 747 if (capoff + reallen > PCI_REGMAX + 1) 748 return (-1); 749 750 /* Set the previous capability pointer */ 751 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 752 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 753 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 754 } else 755 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 756 757 /* Copy the capability */ 758 for (i = 0; i < caplen; i++) 759 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 760 761 /* Set the next capability pointer */ 762 pci_set_cfgdata8(pi, capoff + 1, 0); 763 764 pi->pi_prevcap = capoff; 765 pi->pi_capend = capoff + reallen - 1; 766 return (0); 767 } 768 769 static struct pci_devemu * 770 pci_emul_finddev(const char *name) 771 { 772 struct pci_devemu **pdpp, *pdp; 773 774 SET_FOREACH(pdpp, pci_devemu_set) { 775 pdp = *pdpp; 776 if (!strcmp(pdp->pe_emu, name)) { 777 return (pdp); 778 } 779 } 780 781 return (NULL); 782 } 783 784 static int 785 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 786 int func, struct funcinfo *fi) 787 { 788 struct pci_devinst *pdi; 789 int err; 790 791 pdi = calloc(1, sizeof(struct pci_devinst)); 792 793 pdi->pi_vmctx = ctx; 794 pdi->pi_bus = bus; 795 pdi->pi_slot = slot; 796 pdi->pi_func = func; 797 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 798 pdi->pi_lintr.pin = 0; 799 pdi->pi_lintr.state = IDLE; 800 pdi->pi_lintr.pirq_pin = 0; 801 pdi->pi_lintr.ioapic_irq = 0; 802 pdi->pi_d = pde; 803 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 804 805 /* Disable legacy interrupts */ 806 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 807 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 808 809 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 810 811 err = (*pde->pe_init)(ctx, pdi, fi->fi_config); 812 if (err == 0) 813 fi->fi_devi = pdi; 814 else 815 free(pdi); 816 817 return (err); 818 } 819 820 void 821 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 822 { 823 int mmc; 824 825 /* Number of msi messages must be a power of 2 between 1 and 32 */ 826 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 827 mmc = ffs(msgnum) - 1; 828 829 bzero(msicap, sizeof(struct msicap)); 830 msicap->capid = PCIY_MSI; 831 msicap->nextptr = nextptr; 832 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 833 } 834 835 int 836 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 837 { 838 struct msicap msicap; 839 840 pci_populate_msicap(&msicap, msgnum, 0); 841 842 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 843 } 844 845 static void 846 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 847 uint32_t msix_tab_size) 848 { 849 850 assert(msix_tab_size % 4096 == 0); 851 852 bzero(msixcap, sizeof(struct msixcap)); 853 msixcap->capid = PCIY_MSIX; 854 855 /* 856 * Message Control Register, all fields set to 857 * zero except for the Table Size. 858 * Note: Table size N is encoded as N-1 859 */ 860 msixcap->msgctrl = msgnum - 1; 861 862 /* 863 * MSI-X BAR setup: 864 * - MSI-X table start at offset 0 865 * - PBA table starts at a 4K aligned offset after the MSI-X table 866 */ 867 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 868 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 869 } 870 871 static void 872 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 873 { 874 int i, table_size; 875 876 assert(table_entries > 0); 877 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 878 879 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 880 pi->pi_msix.table = calloc(1, table_size); 881 882 /* set mask bit of vector control register */ 883 for (i = 0; i < table_entries; i++) 884 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 885 } 886 887 int 888 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 889 { 890 uint32_t tab_size; 891 struct msixcap msixcap; 892 893 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 894 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 895 896 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 897 898 /* Align table size to nearest 4K */ 899 tab_size = roundup2(tab_size, 4096); 900 901 pi->pi_msix.table_bar = barnum; 902 pi->pi_msix.pba_bar = barnum; 903 pi->pi_msix.table_offset = 0; 904 pi->pi_msix.table_count = msgnum; 905 pi->pi_msix.pba_offset = tab_size; 906 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 907 908 pci_msix_table_init(pi, msgnum); 909 910 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 911 912 /* allocate memory for MSI-X Table and PBA */ 913 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 914 tab_size + pi->pi_msix.pba_size); 915 916 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 917 sizeof(msixcap))); 918 } 919 920 static void 921 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 922 int bytes, uint32_t val) 923 { 924 uint16_t msgctrl, rwmask; 925 int off; 926 927 off = offset - capoff; 928 /* Message Control Register */ 929 if (off == 2 && bytes == 2) { 930 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 931 msgctrl = pci_get_cfgdata16(pi, offset); 932 msgctrl &= ~rwmask; 933 msgctrl |= val & rwmask; 934 val = msgctrl; 935 936 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 937 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 938 pci_lintr_update(pi); 939 } 940 941 CFGWRITE(pi, offset, val, bytes); 942 } 943 944 static void 945 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 946 int bytes, uint32_t val) 947 { 948 uint16_t msgctrl, rwmask, msgdata, mme; 949 uint32_t addrlo; 950 951 /* 952 * If guest is writing to the message control register make sure 953 * we do not overwrite read-only fields. 954 */ 955 if ((offset - capoff) == 2 && bytes == 2) { 956 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 957 msgctrl = pci_get_cfgdata16(pi, offset); 958 msgctrl &= ~rwmask; 959 msgctrl |= val & rwmask; 960 val = msgctrl; 961 } 962 CFGWRITE(pi, offset, val, bytes); 963 964 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 965 addrlo = pci_get_cfgdata32(pi, capoff + 4); 966 if (msgctrl & PCIM_MSICTRL_64BIT) 967 msgdata = pci_get_cfgdata16(pi, capoff + 12); 968 else 969 msgdata = pci_get_cfgdata16(pi, capoff + 8); 970 971 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 972 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 973 if (pi->pi_msi.enabled) { 974 pi->pi_msi.addr = addrlo; 975 pi->pi_msi.msg_data = msgdata; 976 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 977 } else { 978 pi->pi_msi.maxmsgnum = 0; 979 } 980 pci_lintr_update(pi); 981 } 982 983 void 984 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 985 int bytes, uint32_t val) 986 { 987 988 /* XXX don't write to the readonly parts */ 989 CFGWRITE(pi, offset, val, bytes); 990 } 991 992 #define PCIECAP_VERSION 0x2 993 int 994 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 995 { 996 int err; 997 struct pciecap pciecap; 998 999 bzero(&pciecap, sizeof(pciecap)); 1000 1001 /* 1002 * Use the integrated endpoint type for endpoints on a root complex bus. 1003 * 1004 * NB: bhyve currently only supports a single PCI bus that is the root 1005 * complex bus, so all endpoints are integrated. 1006 */ 1007 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 1008 type = PCIEM_TYPE_ROOT_INT_EP; 1009 1010 pciecap.capid = PCIY_EXPRESS; 1011 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 1012 if (type != PCIEM_TYPE_ROOT_INT_EP) { 1013 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 1014 pciecap.link_status = 0x11; /* gen1, x1 */ 1015 } 1016 1017 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 1018 return (err); 1019 } 1020 1021 /* 1022 * This function assumes that 'coff' is in the capabilities region of the 1023 * config space. A capoff parameter of zero will force a search for the 1024 * offset and type. 1025 */ 1026 void 1027 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 1028 uint8_t capoff, int capid) 1029 { 1030 uint8_t nextoff; 1031 1032 /* Do not allow un-aligned writes */ 1033 if ((offset & (bytes - 1)) != 0) 1034 return; 1035 1036 if (capoff == 0) { 1037 /* Find the capability that we want to update */ 1038 capoff = CAP_START_OFFSET; 1039 while (1) { 1040 nextoff = pci_get_cfgdata8(pi, capoff + 1); 1041 if (nextoff == 0) 1042 break; 1043 if (offset >= capoff && offset < nextoff) 1044 break; 1045 1046 capoff = nextoff; 1047 } 1048 assert(offset >= capoff); 1049 capid = pci_get_cfgdata8(pi, capoff); 1050 } 1051 1052 /* 1053 * Capability ID and Next Capability Pointer are readonly. 1054 * However, some o/s's do 4-byte writes that include these. 1055 * For this case, trim the write back to 2 bytes and adjust 1056 * the data. 1057 */ 1058 if (offset == capoff || offset == capoff + 1) { 1059 if (offset == capoff && bytes == 4) { 1060 bytes = 2; 1061 offset += 2; 1062 val >>= 16; 1063 } else 1064 return; 1065 } 1066 1067 switch (capid) { 1068 case PCIY_MSI: 1069 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1070 break; 1071 case PCIY_MSIX: 1072 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1073 break; 1074 case PCIY_EXPRESS: 1075 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1076 break; 1077 default: 1078 break; 1079 } 1080 } 1081 1082 static int 1083 pci_emul_iscap(struct pci_devinst *pi, int offset) 1084 { 1085 uint16_t sts; 1086 1087 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1088 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1089 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1090 return (1); 1091 } 1092 return (0); 1093 } 1094 1095 static int 1096 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1097 int size, uint64_t *val, void *arg1, long arg2) 1098 { 1099 /* 1100 * Ignore writes; return 0xff's for reads. The mem read code 1101 * will take care of truncating to the correct size. 1102 */ 1103 if (dir == MEM_F_READ) { 1104 *val = 0xffffffffffffffff; 1105 } 1106 1107 return (0); 1108 } 1109 1110 static int 1111 pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1112 int bytes, uint64_t *val, void *arg1, long arg2) 1113 { 1114 int bus, slot, func, coff, in; 1115 1116 coff = addr & 0xfff; 1117 func = (addr >> 12) & 0x7; 1118 slot = (addr >> 15) & 0x1f; 1119 bus = (addr >> 20) & 0xff; 1120 in = (dir == MEM_F_READ); 1121 if (in) 1122 *val = ~0UL; 1123 pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); 1124 return (0); 1125 } 1126 1127 uint64_t 1128 pci_ecfg_base(void) 1129 { 1130 1131 return (PCI_EMUL_ECFG_BASE); 1132 } 1133 1134 #define BUSIO_ROUNDUP 32 1135 #define BUSMEM_ROUNDUP (1024 * 1024) 1136 1137 int 1138 init_pci(struct vmctx *ctx) 1139 { 1140 char node_name[sizeof("pci.XXX.XX.X")]; 1141 struct mem_range mr; 1142 struct pci_devemu *pde; 1143 struct businfo *bi; 1144 struct slotinfo *si; 1145 struct funcinfo *fi; 1146 nvlist_t *nvl; 1147 const char *emul; 1148 size_t lowmem; 1149 uint64_t cpu_maxphysaddr, pci_emul_memresv64; 1150 u_int regs[4]; 1151 int bus, slot, func, error; 1152 1153 pci_emul_iobase = PCI_EMUL_IOBASE; 1154 pci_emul_membase32 = vm_get_lowmem_limit(ctx); 1155 1156 do_cpuid(0x80000008, regs); 1157 cpu_maxphysaddr = 1ULL << (regs[0] & 0xff); 1158 if (cpu_maxphysaddr > VM_MAXUSER_ADDRESS_LA48) 1159 cpu_maxphysaddr = VM_MAXUSER_ADDRESS_LA48; 1160 pci_emul_memresv64 = cpu_maxphysaddr / 4; 1161 /* 1162 * Max power of 2 that is less then 1163 * cpu_maxphysaddr - pci_emul_memresv64. 1164 */ 1165 pci_emul_membase64 = 1ULL << (flsl(cpu_maxphysaddr - 1166 pci_emul_memresv64) - 1); 1167 pci_emul_memlim64 = cpu_maxphysaddr; 1168 1169 for (bus = 0; bus < MAXBUSES; bus++) { 1170 snprintf(node_name, sizeof(node_name), "pci.%d", bus); 1171 nvl = find_config_node(node_name); 1172 if (nvl == NULL) 1173 continue; 1174 pci_businfo[bus] = calloc(1, sizeof(struct businfo)); 1175 bi = pci_businfo[bus]; 1176 1177 /* 1178 * Keep track of the i/o and memory resources allocated to 1179 * this bus. 1180 */ 1181 bi->iobase = pci_emul_iobase; 1182 bi->membase32 = pci_emul_membase32; 1183 bi->membase64 = pci_emul_membase64; 1184 1185 for (slot = 0; slot < MAXSLOTS; slot++) { 1186 si = &bi->slotinfo[slot]; 1187 for (func = 0; func < MAXFUNCS; func++) { 1188 fi = &si->si_funcs[func]; 1189 snprintf(node_name, sizeof(node_name), 1190 "pci.%d.%d.%d", bus, slot, func); 1191 nvl = find_config_node(node_name); 1192 if (nvl == NULL) 1193 continue; 1194 1195 fi->fi_config = nvl; 1196 emul = get_config_value_node(nvl, "device"); 1197 if (emul == NULL) { 1198 EPRINTLN("pci slot %d:%d:%d: missing " 1199 "\"device\" value", bus, slot, func); 1200 return (EINVAL); 1201 } 1202 pde = pci_emul_finddev(emul); 1203 if (pde == NULL) { 1204 EPRINTLN("pci slot %d:%d:%d: unknown " 1205 "device \"%s\"", bus, slot, func, 1206 emul); 1207 return (EINVAL); 1208 } 1209 if (pde->pe_alias != NULL) { 1210 EPRINTLN("pci slot %d:%d:%d: legacy " 1211 "device \"%s\", use \"%s\" instead", 1212 bus, slot, func, emul, 1213 pde->pe_alias); 1214 return (EINVAL); 1215 } 1216 fi->fi_pde = pde; 1217 error = pci_emul_init(ctx, pde, bus, slot, 1218 func, fi); 1219 if (error) 1220 return (error); 1221 } 1222 } 1223 1224 /* 1225 * Add some slop to the I/O and memory resources decoded by 1226 * this bus to give a guest some flexibility if it wants to 1227 * reprogram the BARs. 1228 */ 1229 pci_emul_iobase += BUSIO_ROUNDUP; 1230 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1231 bi->iolimit = pci_emul_iobase; 1232 1233 pci_emul_membase32 += BUSMEM_ROUNDUP; 1234 pci_emul_membase32 = roundup2(pci_emul_membase32, 1235 BUSMEM_ROUNDUP); 1236 bi->memlimit32 = pci_emul_membase32; 1237 1238 pci_emul_membase64 += BUSMEM_ROUNDUP; 1239 pci_emul_membase64 = roundup2(pci_emul_membase64, 1240 BUSMEM_ROUNDUP); 1241 bi->memlimit64 = pci_emul_membase64; 1242 } 1243 1244 /* 1245 * PCI backends are initialized before routing INTx interrupts 1246 * so that LPC devices are able to reserve ISA IRQs before 1247 * routing PIRQ pins. 1248 */ 1249 for (bus = 0; bus < MAXBUSES; bus++) { 1250 if ((bi = pci_businfo[bus]) == NULL) 1251 continue; 1252 1253 for (slot = 0; slot < MAXSLOTS; slot++) { 1254 si = &bi->slotinfo[slot]; 1255 for (func = 0; func < MAXFUNCS; func++) { 1256 fi = &si->si_funcs[func]; 1257 if (fi->fi_devi == NULL) 1258 continue; 1259 pci_lintr_route(fi->fi_devi); 1260 } 1261 } 1262 } 1263 lpc_pirq_routed(); 1264 1265 /* 1266 * The guest physical memory map looks like the following: 1267 * [0, lowmem) guest system memory 1268 * [lowmem, lowmem_limit) memory hole (may be absent) 1269 * [lowmem_limit, 0xE0000000) PCI hole (32-bit BAR allocation) 1270 * [0xE0000000, 0xF0000000) PCI extended config window 1271 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1272 * [4GB, 4GB + highmem) 1273 */ 1274 1275 /* 1276 * Accesses to memory addresses that are not allocated to system 1277 * memory or PCI devices return 0xff's. 1278 */ 1279 lowmem = vm_get_lowmem_size(ctx); 1280 bzero(&mr, sizeof(struct mem_range)); 1281 mr.name = "PCI hole"; 1282 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1283 mr.base = lowmem; 1284 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1285 mr.handler = pci_emul_fallback_handler; 1286 error = register_mem_fallback(&mr); 1287 assert(error == 0); 1288 1289 /* PCI extended config space */ 1290 bzero(&mr, sizeof(struct mem_range)); 1291 mr.name = "PCI ECFG"; 1292 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1293 mr.base = PCI_EMUL_ECFG_BASE; 1294 mr.size = PCI_EMUL_ECFG_SIZE; 1295 mr.handler = pci_emul_ecfg_handler; 1296 error = register_mem(&mr); 1297 assert(error == 0); 1298 1299 return (0); 1300 } 1301 1302 static void 1303 pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1304 void *arg) 1305 { 1306 1307 dsdt_line(" Package ()"); 1308 dsdt_line(" {"); 1309 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1310 dsdt_line(" 0x%02X,", pin - 1); 1311 dsdt_line(" Zero,"); 1312 dsdt_line(" 0x%X", ioapic_irq); 1313 dsdt_line(" },"); 1314 } 1315 1316 static void 1317 pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1318 void *arg) 1319 { 1320 char *name; 1321 1322 name = lpc_pirq_name(pirq_pin); 1323 if (name == NULL) 1324 return; 1325 dsdt_line(" Package ()"); 1326 dsdt_line(" {"); 1327 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1328 dsdt_line(" 0x%02X,", pin - 1); 1329 dsdt_line(" %s,", name); 1330 dsdt_line(" 0x00"); 1331 dsdt_line(" },"); 1332 free(name); 1333 } 1334 1335 /* 1336 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1337 * corresponding to each PCI bus. 1338 */ 1339 static void 1340 pci_bus_write_dsdt(int bus) 1341 { 1342 struct businfo *bi; 1343 struct slotinfo *si; 1344 struct pci_devinst *pi; 1345 int count, func, slot; 1346 1347 /* 1348 * If there are no devices on this 'bus' then just return. 1349 */ 1350 if ((bi = pci_businfo[bus]) == NULL) { 1351 /* 1352 * Bus 0 is special because it decodes the I/O ports used 1353 * for PCI config space access even if there are no devices 1354 * on it. 1355 */ 1356 if (bus != 0) 1357 return; 1358 } 1359 1360 dsdt_line(" Device (PC%02X)", bus); 1361 dsdt_line(" {"); 1362 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1363 1364 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1365 dsdt_line(" {"); 1366 dsdt_line(" Return (0x%08X)", bus); 1367 dsdt_line(" }"); 1368 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1369 dsdt_line(" {"); 1370 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1371 "MaxFixed, PosDecode,"); 1372 dsdt_line(" 0x0000, // Granularity"); 1373 dsdt_line(" 0x%04X, // Range Minimum", bus); 1374 dsdt_line(" 0x%04X, // Range Maximum", bus); 1375 dsdt_line(" 0x0000, // Translation Offset"); 1376 dsdt_line(" 0x0001, // Length"); 1377 dsdt_line(" ,, )"); 1378 1379 if (bus == 0) { 1380 dsdt_indent(3); 1381 dsdt_fixed_ioport(0xCF8, 8); 1382 dsdt_unindent(3); 1383 1384 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1385 "PosDecode, EntireRange,"); 1386 dsdt_line(" 0x0000, // Granularity"); 1387 dsdt_line(" 0x0000, // Range Minimum"); 1388 dsdt_line(" 0x0CF7, // Range Maximum"); 1389 dsdt_line(" 0x0000, // Translation Offset"); 1390 dsdt_line(" 0x0CF8, // Length"); 1391 dsdt_line(" ,, , TypeStatic)"); 1392 1393 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1394 "PosDecode, EntireRange,"); 1395 dsdt_line(" 0x0000, // Granularity"); 1396 dsdt_line(" 0x0D00, // Range Minimum"); 1397 dsdt_line(" 0x%04X, // Range Maximum", 1398 PCI_EMUL_IOBASE - 1); 1399 dsdt_line(" 0x0000, // Translation Offset"); 1400 dsdt_line(" 0x%04X, // Length", 1401 PCI_EMUL_IOBASE - 0x0D00); 1402 dsdt_line(" ,, , TypeStatic)"); 1403 1404 if (bi == NULL) { 1405 dsdt_line(" })"); 1406 goto done; 1407 } 1408 } 1409 assert(bi != NULL); 1410 1411 /* i/o window */ 1412 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1413 "PosDecode, EntireRange,"); 1414 dsdt_line(" 0x0000, // Granularity"); 1415 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1416 dsdt_line(" 0x%04X, // Range Maximum", 1417 bi->iolimit - 1); 1418 dsdt_line(" 0x0000, // Translation Offset"); 1419 dsdt_line(" 0x%04X, // Length", 1420 bi->iolimit - bi->iobase); 1421 dsdt_line(" ,, , TypeStatic)"); 1422 1423 /* mmio window (32-bit) */ 1424 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1425 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1426 dsdt_line(" 0x00000000, // Granularity"); 1427 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1428 dsdt_line(" 0x%08X, // Range Maximum\n", 1429 bi->memlimit32 - 1); 1430 dsdt_line(" 0x00000000, // Translation Offset"); 1431 dsdt_line(" 0x%08X, // Length\n", 1432 bi->memlimit32 - bi->membase32); 1433 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1434 1435 /* mmio window (64-bit) */ 1436 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1437 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1438 dsdt_line(" 0x0000000000000000, // Granularity"); 1439 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1440 dsdt_line(" 0x%016lX, // Range Maximum\n", 1441 bi->memlimit64 - 1); 1442 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1443 dsdt_line(" 0x%016lX, // Length\n", 1444 bi->memlimit64 - bi->membase64); 1445 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1446 dsdt_line(" })"); 1447 1448 count = pci_count_lintr(bus); 1449 if (count != 0) { 1450 dsdt_indent(2); 1451 dsdt_line("Name (PPRT, Package ()"); 1452 dsdt_line("{"); 1453 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1454 dsdt_line("})"); 1455 dsdt_line("Name (APRT, Package ()"); 1456 dsdt_line("{"); 1457 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1458 dsdt_line("})"); 1459 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1460 dsdt_line("{"); 1461 dsdt_line(" If (PICM)"); 1462 dsdt_line(" {"); 1463 dsdt_line(" Return (APRT)"); 1464 dsdt_line(" }"); 1465 dsdt_line(" Else"); 1466 dsdt_line(" {"); 1467 dsdt_line(" Return (PPRT)"); 1468 dsdt_line(" }"); 1469 dsdt_line("}"); 1470 dsdt_unindent(2); 1471 } 1472 1473 dsdt_indent(2); 1474 for (slot = 0; slot < MAXSLOTS; slot++) { 1475 si = &bi->slotinfo[slot]; 1476 for (func = 0; func < MAXFUNCS; func++) { 1477 pi = si->si_funcs[func].fi_devi; 1478 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1479 pi->pi_d->pe_write_dsdt(pi); 1480 } 1481 } 1482 dsdt_unindent(2); 1483 done: 1484 dsdt_line(" }"); 1485 } 1486 1487 void 1488 pci_write_dsdt(void) 1489 { 1490 int bus; 1491 1492 dsdt_indent(1); 1493 dsdt_line("Name (PICM, 0x00)"); 1494 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1495 dsdt_line("{"); 1496 dsdt_line(" Store (Arg0, PICM)"); 1497 dsdt_line("}"); 1498 dsdt_line(""); 1499 dsdt_line("Scope (_SB)"); 1500 dsdt_line("{"); 1501 for (bus = 0; bus < MAXBUSES; bus++) 1502 pci_bus_write_dsdt(bus); 1503 dsdt_line("}"); 1504 dsdt_unindent(1); 1505 } 1506 1507 int 1508 pci_bus_configured(int bus) 1509 { 1510 assert(bus >= 0 && bus < MAXBUSES); 1511 return (pci_businfo[bus] != NULL); 1512 } 1513 1514 int 1515 pci_msi_enabled(struct pci_devinst *pi) 1516 { 1517 return (pi->pi_msi.enabled); 1518 } 1519 1520 int 1521 pci_msi_maxmsgnum(struct pci_devinst *pi) 1522 { 1523 if (pi->pi_msi.enabled) 1524 return (pi->pi_msi.maxmsgnum); 1525 else 1526 return (0); 1527 } 1528 1529 int 1530 pci_msix_enabled(struct pci_devinst *pi) 1531 { 1532 1533 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1534 } 1535 1536 void 1537 pci_generate_msix(struct pci_devinst *pi, int index) 1538 { 1539 struct msix_table_entry *mte; 1540 1541 if (!pci_msix_enabled(pi)) 1542 return; 1543 1544 if (pi->pi_msix.function_mask) 1545 return; 1546 1547 if (index >= pi->pi_msix.table_count) 1548 return; 1549 1550 mte = &pi->pi_msix.table[index]; 1551 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1552 /* XXX Set PBA bit if interrupt is disabled */ 1553 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1554 } 1555 } 1556 1557 void 1558 pci_generate_msi(struct pci_devinst *pi, int index) 1559 { 1560 1561 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1562 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1563 pi->pi_msi.msg_data + index); 1564 } 1565 } 1566 1567 static bool 1568 pci_lintr_permitted(struct pci_devinst *pi) 1569 { 1570 uint16_t cmd; 1571 1572 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1573 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1574 (cmd & PCIM_CMD_INTxDIS))); 1575 } 1576 1577 void 1578 pci_lintr_request(struct pci_devinst *pi) 1579 { 1580 struct businfo *bi; 1581 struct slotinfo *si; 1582 int bestpin, bestcount, pin; 1583 1584 bi = pci_businfo[pi->pi_bus]; 1585 assert(bi != NULL); 1586 1587 /* 1588 * Just allocate a pin from our slot. The pin will be 1589 * assigned IRQs later when interrupts are routed. 1590 */ 1591 si = &bi->slotinfo[pi->pi_slot]; 1592 bestpin = 0; 1593 bestcount = si->si_intpins[0].ii_count; 1594 for (pin = 1; pin < 4; pin++) { 1595 if (si->si_intpins[pin].ii_count < bestcount) { 1596 bestpin = pin; 1597 bestcount = si->si_intpins[pin].ii_count; 1598 } 1599 } 1600 1601 si->si_intpins[bestpin].ii_count++; 1602 pi->pi_lintr.pin = bestpin + 1; 1603 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1604 } 1605 1606 static void 1607 pci_lintr_route(struct pci_devinst *pi) 1608 { 1609 struct businfo *bi; 1610 struct intxinfo *ii; 1611 1612 if (pi->pi_lintr.pin == 0) 1613 return; 1614 1615 bi = pci_businfo[pi->pi_bus]; 1616 assert(bi != NULL); 1617 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1618 1619 /* 1620 * Attempt to allocate an I/O APIC pin for this intpin if one 1621 * is not yet assigned. 1622 */ 1623 if (ii->ii_ioapic_irq == 0) 1624 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1625 assert(ii->ii_ioapic_irq > 0); 1626 1627 /* 1628 * Attempt to allocate a PIRQ pin for this intpin if one is 1629 * not yet assigned. 1630 */ 1631 if (ii->ii_pirq_pin == 0) 1632 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1633 assert(ii->ii_pirq_pin > 0); 1634 1635 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1636 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1637 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1638 } 1639 1640 void 1641 pci_lintr_assert(struct pci_devinst *pi) 1642 { 1643 1644 assert(pi->pi_lintr.pin > 0); 1645 1646 pthread_mutex_lock(&pi->pi_lintr.lock); 1647 if (pi->pi_lintr.state == IDLE) { 1648 if (pci_lintr_permitted(pi)) { 1649 pi->pi_lintr.state = ASSERTED; 1650 pci_irq_assert(pi); 1651 } else 1652 pi->pi_lintr.state = PENDING; 1653 } 1654 pthread_mutex_unlock(&pi->pi_lintr.lock); 1655 } 1656 1657 void 1658 pci_lintr_deassert(struct pci_devinst *pi) 1659 { 1660 1661 assert(pi->pi_lintr.pin > 0); 1662 1663 pthread_mutex_lock(&pi->pi_lintr.lock); 1664 if (pi->pi_lintr.state == ASSERTED) { 1665 pi->pi_lintr.state = IDLE; 1666 pci_irq_deassert(pi); 1667 } else if (pi->pi_lintr.state == PENDING) 1668 pi->pi_lintr.state = IDLE; 1669 pthread_mutex_unlock(&pi->pi_lintr.lock); 1670 } 1671 1672 static void 1673 pci_lintr_update(struct pci_devinst *pi) 1674 { 1675 1676 pthread_mutex_lock(&pi->pi_lintr.lock); 1677 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1678 pci_irq_deassert(pi); 1679 pi->pi_lintr.state = PENDING; 1680 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1681 pi->pi_lintr.state = ASSERTED; 1682 pci_irq_assert(pi); 1683 } 1684 pthread_mutex_unlock(&pi->pi_lintr.lock); 1685 } 1686 1687 int 1688 pci_count_lintr(int bus) 1689 { 1690 int count, slot, pin; 1691 struct slotinfo *slotinfo; 1692 1693 count = 0; 1694 if (pci_businfo[bus] != NULL) { 1695 for (slot = 0; slot < MAXSLOTS; slot++) { 1696 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1697 for (pin = 0; pin < 4; pin++) { 1698 if (slotinfo->si_intpins[pin].ii_count != 0) 1699 count++; 1700 } 1701 } 1702 } 1703 return (count); 1704 } 1705 1706 void 1707 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1708 { 1709 struct businfo *bi; 1710 struct slotinfo *si; 1711 struct intxinfo *ii; 1712 int slot, pin; 1713 1714 if ((bi = pci_businfo[bus]) == NULL) 1715 return; 1716 1717 for (slot = 0; slot < MAXSLOTS; slot++) { 1718 si = &bi->slotinfo[slot]; 1719 for (pin = 0; pin < 4; pin++) { 1720 ii = &si->si_intpins[pin]; 1721 if (ii->ii_count != 0) 1722 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1723 ii->ii_ioapic_irq, arg); 1724 } 1725 } 1726 } 1727 1728 /* 1729 * Return 1 if the emulated device in 'slot' is a multi-function device. 1730 * Return 0 otherwise. 1731 */ 1732 static int 1733 pci_emul_is_mfdev(int bus, int slot) 1734 { 1735 struct businfo *bi; 1736 struct slotinfo *si; 1737 int f, numfuncs; 1738 1739 numfuncs = 0; 1740 if ((bi = pci_businfo[bus]) != NULL) { 1741 si = &bi->slotinfo[slot]; 1742 for (f = 0; f < MAXFUNCS; f++) { 1743 if (si->si_funcs[f].fi_devi != NULL) { 1744 numfuncs++; 1745 } 1746 } 1747 } 1748 return (numfuncs > 1); 1749 } 1750 1751 /* 1752 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1753 * whether or not is a multi-function being emulated in the pci 'slot'. 1754 */ 1755 static void 1756 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1757 { 1758 int mfdev; 1759 1760 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1761 mfdev = pci_emul_is_mfdev(bus, slot); 1762 switch (bytes) { 1763 case 1: 1764 case 2: 1765 *rv &= ~PCIM_MFDEV; 1766 if (mfdev) { 1767 *rv |= PCIM_MFDEV; 1768 } 1769 break; 1770 case 4: 1771 *rv &= ~(PCIM_MFDEV << 16); 1772 if (mfdev) { 1773 *rv |= (PCIM_MFDEV << 16); 1774 } 1775 break; 1776 } 1777 } 1778 } 1779 1780 /* 1781 * Update device state in response to changes to the PCI command 1782 * register. 1783 */ 1784 void 1785 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 1786 { 1787 int i; 1788 uint16_t changed, new; 1789 1790 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 1791 changed = old ^ new; 1792 1793 /* 1794 * If the MMIO or I/O address space decoding has changed then 1795 * register/unregister all BARs that decode that address space. 1796 */ 1797 for (i = 0; i <= PCI_BARMAX; i++) { 1798 switch (pi->pi_bar[i].type) { 1799 case PCIBAR_NONE: 1800 case PCIBAR_MEMHI64: 1801 break; 1802 case PCIBAR_IO: 1803 /* I/O address space decoding changed? */ 1804 if (changed & PCIM_CMD_PORTEN) { 1805 if (new & PCIM_CMD_PORTEN) 1806 register_bar(pi, i); 1807 else 1808 unregister_bar(pi, i); 1809 } 1810 break; 1811 case PCIBAR_MEM32: 1812 case PCIBAR_MEM64: 1813 /* MMIO address space decoding changed? */ 1814 if (changed & PCIM_CMD_MEMEN) { 1815 if (new & PCIM_CMD_MEMEN) 1816 register_bar(pi, i); 1817 else 1818 unregister_bar(pi, i); 1819 } 1820 break; 1821 default: 1822 assert(0); 1823 } 1824 } 1825 1826 /* 1827 * If INTx has been unmasked and is pending, assert the 1828 * interrupt. 1829 */ 1830 pci_lintr_update(pi); 1831 } 1832 1833 static void 1834 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 1835 { 1836 int rshift; 1837 uint32_t cmd, old, readonly; 1838 1839 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1840 1841 /* 1842 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 1843 * 1844 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 1845 * 'write 1 to clear'. However these bits are not set to '1' by 1846 * any device emulation so it is simpler to treat them as readonly. 1847 */ 1848 rshift = (coff & 0x3) * 8; 1849 readonly = 0xFFFFF880 >> rshift; 1850 1851 old = CFGREAD(pi, coff, bytes); 1852 new &= ~readonly; 1853 new |= (old & readonly); 1854 CFGWRITE(pi, coff, new, bytes); /* update config */ 1855 1856 pci_emul_cmd_changed(pi, cmd); 1857 } 1858 1859 static void 1860 pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, 1861 int coff, int bytes, uint32_t *eax) 1862 { 1863 struct businfo *bi; 1864 struct slotinfo *si; 1865 struct pci_devinst *pi; 1866 struct pci_devemu *pe; 1867 int idx, needcfg; 1868 uint64_t addr, bar, mask; 1869 1870 if ((bi = pci_businfo[bus]) != NULL) { 1871 si = &bi->slotinfo[slot]; 1872 pi = si->si_funcs[func].fi_devi; 1873 } else 1874 pi = NULL; 1875 1876 /* 1877 * Just return if there is no device at this slot:func or if the 1878 * the guest is doing an un-aligned access. 1879 */ 1880 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 1881 (coff & (bytes - 1)) != 0) { 1882 if (in) 1883 *eax = 0xffffffff; 1884 return; 1885 } 1886 1887 /* 1888 * Ignore all writes beyond the standard config space and return all 1889 * ones on reads. 1890 */ 1891 if (coff >= PCI_REGMAX + 1) { 1892 if (in) { 1893 *eax = 0xffffffff; 1894 /* 1895 * Extended capabilities begin at offset 256 in config 1896 * space. Absence of extended capabilities is signaled 1897 * with all 0s in the extended capability header at 1898 * offset 256. 1899 */ 1900 if (coff <= PCI_REGMAX + 4) 1901 *eax = 0x00000000; 1902 } 1903 return; 1904 } 1905 1906 pe = pi->pi_d; 1907 1908 /* 1909 * Config read 1910 */ 1911 if (in) { 1912 /* Let the device emulation override the default handler */ 1913 if (pe->pe_cfgread != NULL) { 1914 needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, 1915 eax); 1916 } else { 1917 needcfg = 1; 1918 } 1919 1920 if (needcfg) 1921 *eax = CFGREAD(pi, coff, bytes); 1922 1923 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); 1924 } else { 1925 /* Let the device emulation override the default handler */ 1926 if (pe->pe_cfgwrite != NULL && 1927 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1928 return; 1929 1930 /* 1931 * Special handling for write to BAR registers 1932 */ 1933 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1934 /* 1935 * Ignore writes to BAR registers that are not 1936 * 4-byte aligned. 1937 */ 1938 if (bytes != 4 || (coff & 0x3) != 0) 1939 return; 1940 idx = (coff - PCIR_BAR(0)) / 4; 1941 mask = ~(pi->pi_bar[idx].size - 1); 1942 switch (pi->pi_bar[idx].type) { 1943 case PCIBAR_NONE: 1944 pi->pi_bar[idx].addr = bar = 0; 1945 break; 1946 case PCIBAR_IO: 1947 addr = *eax & mask; 1948 addr &= 0xffff; 1949 bar = addr | PCIM_BAR_IO_SPACE; 1950 /* 1951 * Register the new BAR value for interception 1952 */ 1953 if (addr != pi->pi_bar[idx].addr) { 1954 update_bar_address(pi, addr, idx, 1955 PCIBAR_IO); 1956 } 1957 break; 1958 case PCIBAR_MEM32: 1959 addr = bar = *eax & mask; 1960 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 1961 if (addr != pi->pi_bar[idx].addr) { 1962 update_bar_address(pi, addr, idx, 1963 PCIBAR_MEM32); 1964 } 1965 break; 1966 case PCIBAR_MEM64: 1967 addr = bar = *eax & mask; 1968 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 1969 PCIM_BAR_MEM_PREFETCH; 1970 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1971 update_bar_address(pi, addr, idx, 1972 PCIBAR_MEM64); 1973 } 1974 break; 1975 case PCIBAR_MEMHI64: 1976 mask = ~(pi->pi_bar[idx - 1].size - 1); 1977 addr = ((uint64_t)*eax << 32) & mask; 1978 bar = addr >> 32; 1979 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1980 update_bar_address(pi, addr, idx - 1, 1981 PCIBAR_MEMHI64); 1982 } 1983 break; 1984 default: 1985 assert(0); 1986 } 1987 pci_set_cfgdata32(pi, coff, bar); 1988 1989 } else if (pci_emul_iscap(pi, coff)) { 1990 pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); 1991 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 1992 pci_emul_cmdsts_write(pi, coff, *eax, bytes); 1993 } else { 1994 CFGWRITE(pi, coff, *eax, bytes); 1995 } 1996 } 1997 } 1998 1999 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 2000 2001 static int 2002 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2003 uint32_t *eax, void *arg) 2004 { 2005 uint32_t x; 2006 2007 if (bytes != 4) { 2008 if (in) 2009 *eax = (bytes == 2) ? 0xffff : 0xff; 2010 return (0); 2011 } 2012 2013 if (in) { 2014 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 2015 if (cfgenable) 2016 x |= CONF1_ENABLE; 2017 *eax = x; 2018 } else { 2019 x = *eax; 2020 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 2021 cfgoff = x & PCI_REGMAX; 2022 cfgfunc = (x >> 8) & PCI_FUNCMAX; 2023 cfgslot = (x >> 11) & PCI_SLOTMAX; 2024 cfgbus = (x >> 16) & PCI_BUSMAX; 2025 } 2026 2027 return (0); 2028 } 2029 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 2030 2031 static int 2032 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2033 uint32_t *eax, void *arg) 2034 { 2035 int coff; 2036 2037 assert(bytes == 1 || bytes == 2 || bytes == 4); 2038 2039 coff = cfgoff + (port - CONF1_DATA_PORT); 2040 if (cfgenable) { 2041 pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, 2042 eax); 2043 } else { 2044 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 2045 if (in) 2046 *eax = 0xffffffff; 2047 } 2048 return (0); 2049 } 2050 2051 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 2052 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 2053 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 2054 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 2055 2056 #ifdef BHYVE_SNAPSHOT 2057 /* 2058 * Saves/restores PCI device emulated state. Returns 0 on success. 2059 */ 2060 static int 2061 pci_snapshot_pci_dev(struct vm_snapshot_meta *meta) 2062 { 2063 struct pci_devinst *pi; 2064 int i; 2065 int ret; 2066 2067 pi = meta->dev_data; 2068 2069 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done); 2070 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done); 2071 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done); 2072 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done); 2073 2074 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done); 2075 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done); 2076 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done); 2077 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done); 2078 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done); 2079 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done); 2080 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done); 2081 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done); 2082 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_page_offset, meta, ret, done); 2083 2084 SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata), 2085 meta, ret, done); 2086 2087 for (i = 0; i < nitems(pi->pi_bar); i++) { 2088 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done); 2089 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done); 2090 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done); 2091 } 2092 2093 /* Restore MSI-X table. */ 2094 for (i = 0; i < pi->pi_msix.table_count; i++) { 2095 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr, 2096 meta, ret, done); 2097 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data, 2098 meta, ret, done); 2099 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control, 2100 meta, ret, done); 2101 } 2102 2103 done: 2104 return (ret); 2105 } 2106 2107 static int 2108 pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde, 2109 struct pci_devinst **pdi) 2110 { 2111 struct businfo *bi; 2112 struct slotinfo *si; 2113 struct funcinfo *fi; 2114 int bus, slot, func; 2115 2116 assert(dev_name != NULL); 2117 assert(pde != NULL); 2118 assert(pdi != NULL); 2119 2120 for (bus = 0; bus < MAXBUSES; bus++) { 2121 if ((bi = pci_businfo[bus]) == NULL) 2122 continue; 2123 2124 for (slot = 0; slot < MAXSLOTS; slot++) { 2125 si = &bi->slotinfo[slot]; 2126 for (func = 0; func < MAXFUNCS; func++) { 2127 fi = &si->si_funcs[func]; 2128 if (fi->fi_pde == NULL) 2129 continue; 2130 if (strcmp(dev_name, fi->fi_pde->pe_emu) != 0) 2131 continue; 2132 2133 *pde = fi->fi_pde; 2134 *pdi = fi->fi_devi; 2135 return (0); 2136 } 2137 } 2138 } 2139 2140 return (EINVAL); 2141 } 2142 2143 int 2144 pci_snapshot(struct vm_snapshot_meta *meta) 2145 { 2146 struct pci_devemu *pde; 2147 struct pci_devinst *pdi; 2148 int ret; 2149 2150 assert(meta->dev_name != NULL); 2151 2152 ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi); 2153 if (ret != 0) { 2154 fprintf(stderr, "%s: no such name: %s\r\n", 2155 __func__, meta->dev_name); 2156 memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); 2157 return (0); 2158 } 2159 2160 meta->dev_data = pdi; 2161 2162 if (pde->pe_snapshot == NULL) { 2163 fprintf(stderr, "%s: not implemented yet for: %s\r\n", 2164 __func__, meta->dev_name); 2165 return (-1); 2166 } 2167 2168 ret = pci_snapshot_pci_dev(meta); 2169 if (ret != 0) { 2170 fprintf(stderr, "%s: failed to snapshot pci dev\r\n", 2171 __func__); 2172 return (-1); 2173 } 2174 2175 ret = (*pde->pe_snapshot)(meta); 2176 2177 return (ret); 2178 } 2179 2180 int 2181 pci_pause(struct vmctx *ctx, const char *dev_name) 2182 { 2183 struct pci_devemu *pde; 2184 struct pci_devinst *pdi; 2185 int ret; 2186 2187 assert(dev_name != NULL); 2188 2189 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2190 if (ret != 0) { 2191 /* 2192 * It is possible to call this function without 2193 * checking that the device is inserted first. 2194 */ 2195 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2196 return (0); 2197 } 2198 2199 if (pde->pe_pause == NULL) { 2200 /* The pause/resume functionality is optional. */ 2201 fprintf(stderr, "%s: not implemented for: %s\n", 2202 __func__, dev_name); 2203 return (0); 2204 } 2205 2206 return (*pde->pe_pause)(ctx, pdi); 2207 } 2208 2209 int 2210 pci_resume(struct vmctx *ctx, const char *dev_name) 2211 { 2212 struct pci_devemu *pde; 2213 struct pci_devinst *pdi; 2214 int ret; 2215 2216 assert(dev_name != NULL); 2217 2218 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2219 if (ret != 0) { 2220 /* 2221 * It is possible to call this function without 2222 * checking that the device is inserted first. 2223 */ 2224 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2225 return (0); 2226 } 2227 2228 if (pde->pe_resume == NULL) { 2229 /* The pause/resume functionality is optional. */ 2230 fprintf(stderr, "%s: not implemented for: %s\n", 2231 __func__, dev_name); 2232 return (0); 2233 } 2234 2235 return (*pde->pe_resume)(ctx, pdi); 2236 } 2237 #endif 2238 2239 #define PCI_EMUL_TEST 2240 #ifdef PCI_EMUL_TEST 2241 /* 2242 * Define a dummy test device 2243 */ 2244 #define DIOSZ 8 2245 #define DMEMSZ 4096 2246 struct pci_emul_dsoftc { 2247 uint8_t ioregs[DIOSZ]; 2248 uint8_t memregs[2][DMEMSZ]; 2249 }; 2250 2251 #define PCI_EMUL_MSI_MSGS 4 2252 #define PCI_EMUL_MSIX_MSGS 16 2253 2254 static int 2255 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) 2256 { 2257 int error; 2258 struct pci_emul_dsoftc *sc; 2259 2260 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2261 2262 pi->pi_arg = sc; 2263 2264 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2265 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2266 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2267 2268 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2269 assert(error == 0); 2270 2271 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2272 assert(error == 0); 2273 2274 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2275 assert(error == 0); 2276 2277 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2278 assert(error == 0); 2279 2280 return (0); 2281 } 2282 2283 static void 2284 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2285 uint64_t offset, int size, uint64_t value) 2286 { 2287 int i; 2288 struct pci_emul_dsoftc *sc = pi->pi_arg; 2289 2290 if (baridx == 0) { 2291 if (offset + size > DIOSZ) { 2292 printf("diow: iow too large, offset %ld size %d\n", 2293 offset, size); 2294 return; 2295 } 2296 2297 if (size == 1) { 2298 sc->ioregs[offset] = value & 0xff; 2299 } else if (size == 2) { 2300 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2301 } else if (size == 4) { 2302 *(uint32_t *)&sc->ioregs[offset] = value; 2303 } else { 2304 printf("diow: iow unknown size %d\n", size); 2305 } 2306 2307 /* 2308 * Special magic value to generate an interrupt 2309 */ 2310 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2311 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2312 2313 if (value == 0xabcdef) { 2314 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2315 pci_generate_msi(pi, i); 2316 } 2317 } 2318 2319 if (baridx == 1 || baridx == 2) { 2320 if (offset + size > DMEMSZ) { 2321 printf("diow: memw too large, offset %ld size %d\n", 2322 offset, size); 2323 return; 2324 } 2325 2326 i = baridx - 1; /* 'memregs' index */ 2327 2328 if (size == 1) { 2329 sc->memregs[i][offset] = value; 2330 } else if (size == 2) { 2331 *(uint16_t *)&sc->memregs[i][offset] = value; 2332 } else if (size == 4) { 2333 *(uint32_t *)&sc->memregs[i][offset] = value; 2334 } else if (size == 8) { 2335 *(uint64_t *)&sc->memregs[i][offset] = value; 2336 } else { 2337 printf("diow: memw unknown size %d\n", size); 2338 } 2339 2340 /* 2341 * magic interrupt ?? 2342 */ 2343 } 2344 2345 if (baridx > 2 || baridx < 0) { 2346 printf("diow: unknown bar idx %d\n", baridx); 2347 } 2348 } 2349 2350 static uint64_t 2351 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2352 uint64_t offset, int size) 2353 { 2354 struct pci_emul_dsoftc *sc = pi->pi_arg; 2355 uint32_t value; 2356 int i; 2357 2358 if (baridx == 0) { 2359 if (offset + size > DIOSZ) { 2360 printf("dior: ior too large, offset %ld size %d\n", 2361 offset, size); 2362 return (0); 2363 } 2364 2365 value = 0; 2366 if (size == 1) { 2367 value = sc->ioregs[offset]; 2368 } else if (size == 2) { 2369 value = *(uint16_t *) &sc->ioregs[offset]; 2370 } else if (size == 4) { 2371 value = *(uint32_t *) &sc->ioregs[offset]; 2372 } else { 2373 printf("dior: ior unknown size %d\n", size); 2374 } 2375 } 2376 2377 if (baridx == 1 || baridx == 2) { 2378 if (offset + size > DMEMSZ) { 2379 printf("dior: memr too large, offset %ld size %d\n", 2380 offset, size); 2381 return (0); 2382 } 2383 2384 i = baridx - 1; /* 'memregs' index */ 2385 2386 if (size == 1) { 2387 value = sc->memregs[i][offset]; 2388 } else if (size == 2) { 2389 value = *(uint16_t *) &sc->memregs[i][offset]; 2390 } else if (size == 4) { 2391 value = *(uint32_t *) &sc->memregs[i][offset]; 2392 } else if (size == 8) { 2393 value = *(uint64_t *) &sc->memregs[i][offset]; 2394 } else { 2395 printf("dior: ior unknown size %d\n", size); 2396 } 2397 } 2398 2399 2400 if (baridx > 2 || baridx < 0) { 2401 printf("dior: unknown bar idx %d\n", baridx); 2402 return (0); 2403 } 2404 2405 return (value); 2406 } 2407 2408 #ifdef BHYVE_SNAPSHOT 2409 int 2410 pci_emul_snapshot(struct vm_snapshot_meta *meta) 2411 { 2412 2413 return (0); 2414 } 2415 #endif 2416 2417 struct pci_devemu pci_dummy = { 2418 .pe_emu = "dummy", 2419 .pe_init = pci_emul_dinit, 2420 .pe_barwrite = pci_emul_diow, 2421 .pe_barread = pci_emul_dior, 2422 #ifdef BHYVE_SNAPSHOT 2423 .pe_snapshot = pci_emul_snapshot, 2424 #endif 2425 }; 2426 PCI_EMUL_SET(pci_dummy); 2427 2428 #endif /* PCI_EMUL_TEST */ 2429