1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 /* 31 * This file and its contents are supplied under the terms of the 32 * Common Development and Distribution License ("CDDL"), version 1.0. 33 * You may only use this file in accordance with the terms of version 34 * 1.0 of the CDDL. 35 * 36 * A full copy of the text of the CDDL should have accompanied this 37 * source. A copy of the CDDL is also available via the Internet at 38 * http://www.illumos.org/license/CDDL. 39 * 40 * Copyright 2014 Pluribus Networks Inc. 41 * Copyright 2018 Joyent, Inc. 42 */ 43 44 #include <sys/cdefs.h> 45 __FBSDID("$FreeBSD$"); 46 47 #include <sys/param.h> 48 #include <sys/linker_set.h> 49 #include <sys/mman.h> 50 51 #include <ctype.h> 52 #include <err.h> 53 #include <errno.h> 54 #include <pthread.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <strings.h> 59 #include <assert.h> 60 #include <stdbool.h> 61 #include <sysexits.h> 62 63 #include <machine/vmm.h> 64 #include <vmmapi.h> 65 66 #include "acpi.h" 67 #include "bhyverun.h" 68 #include "config.h" 69 #include "debug.h" 70 #include "inout.h" 71 #include "ioapic.h" 72 #include "mem.h" 73 #include "pci_emul.h" 74 #include "pci_irq.h" 75 #include "pci_lpc.h" 76 77 #define CONF1_ADDR_PORT 0x0cf8 78 #define CONF1_DATA_PORT 0x0cfc 79 80 #define CONF1_ENABLE 0x80000000ul 81 82 #define MAXBUSES (PCI_BUSMAX + 1) 83 #define MAXSLOTS (PCI_SLOTMAX + 1) 84 #define MAXFUNCS (PCI_FUNCMAX + 1) 85 86 #define GB (1024 * 1024 * 1024UL) 87 88 struct funcinfo { 89 nvlist_t *fi_config; 90 struct pci_devemu *fi_pde; 91 struct pci_devinst *fi_devi; 92 }; 93 94 struct intxinfo { 95 int ii_count; 96 int ii_pirq_pin; 97 int ii_ioapic_irq; 98 }; 99 100 struct slotinfo { 101 struct intxinfo si_intpins[4]; 102 struct funcinfo si_funcs[MAXFUNCS]; 103 }; 104 105 struct businfo { 106 uint16_t iobase, iolimit; /* I/O window */ 107 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 108 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 109 struct slotinfo slotinfo[MAXSLOTS]; 110 }; 111 112 static struct businfo *pci_businfo[MAXBUSES]; 113 114 SET_DECLARE(pci_devemu_set, struct pci_devemu); 115 116 static uint64_t pci_emul_iobase; 117 static uint8_t *pci_emul_rombase; 118 static uint64_t pci_emul_romoffset; 119 static uint8_t *pci_emul_romlim; 120 static uint64_t pci_emul_membase32; 121 static uint64_t pci_emul_membase64; 122 static uint64_t pci_emul_memlim64; 123 124 struct pci_bar_allocation { 125 TAILQ_ENTRY(pci_bar_allocation) chain; 126 struct pci_devinst *pdi; 127 int idx; 128 enum pcibar_type type; 129 uint64_t size; 130 }; 131 TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars = TAILQ_HEAD_INITIALIZER( 132 pci_bars); 133 134 #define PCI_EMUL_IOBASE 0x2000 135 #define PCI_EMUL_IOLIMIT 0x10000 136 137 #define PCI_EMUL_ROMSIZE 0x10000000 138 139 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 140 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 141 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 142 143 /* 144 * OVMF always uses 0xC0000000 as base address for 32 bit PCI MMIO. Don't 145 * change this address without changing it in OVMF. 146 */ 147 #define PCI_EMUL_MEMBASE32 0xC0000000 148 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 149 #define PCI_EMUL_MEMSIZE64 (32*GB) 150 151 static struct pci_devemu *pci_emul_finddev(const char *name); 152 static void pci_lintr_route(struct pci_devinst *pi); 153 static void pci_lintr_update(struct pci_devinst *pi); 154 static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, 155 int func, int coff, int bytes, uint32_t *val); 156 157 static __inline void 158 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 159 { 160 161 if (bytes == 1) 162 pci_set_cfgdata8(pi, coff, val); 163 else if (bytes == 2) 164 pci_set_cfgdata16(pi, coff, val); 165 else 166 pci_set_cfgdata32(pi, coff, val); 167 } 168 169 static __inline uint32_t 170 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 171 { 172 173 if (bytes == 1) 174 return (pci_get_cfgdata8(pi, coff)); 175 else if (bytes == 2) 176 return (pci_get_cfgdata16(pi, coff)); 177 else 178 return (pci_get_cfgdata32(pi, coff)); 179 } 180 181 /* 182 * I/O access 183 */ 184 185 /* 186 * Slot options are in the form: 187 * 188 * <bus>:<slot>:<func>,<emul>[,<config>] 189 * <slot>[:<func>],<emul>[,<config>] 190 * 191 * slot is 0..31 192 * func is 0..7 193 * emul is a string describing the type of PCI device e.g. virtio-net 194 * config is an optional string, depending on the device, that can be 195 * used for configuration. 196 * Examples are: 197 * 1,virtio-net,tap0 198 * 3:0,dummy 199 */ 200 static void 201 pci_parse_slot_usage(char *aopt) 202 { 203 204 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 205 } 206 207 /* 208 * Helper function to parse a list of comma-separated options where 209 * each option is formatted as "name[=value]". If no value is 210 * provided, the option is treated as a boolean and is given a value 211 * of true. 212 */ 213 int 214 pci_parse_legacy_config(nvlist_t *nvl, const char *opt) 215 { 216 char *config, *name, *tofree, *value; 217 218 if (opt == NULL) 219 return (0); 220 221 config = tofree = strdup(opt); 222 while ((name = strsep(&config, ",")) != NULL) { 223 value = strchr(name, '='); 224 if (value != NULL) { 225 *value = '\0'; 226 value++; 227 set_config_value_node(nvl, name, value); 228 } else 229 set_config_bool_node(nvl, name, true); 230 } 231 free(tofree); 232 return (0); 233 } 234 235 /* 236 * PCI device configuration is stored in MIBs that encode the device's 237 * location: 238 * 239 * pci.<bus>.<slot>.<func> 240 * 241 * Where "bus", "slot", and "func" are all decimal values without 242 * leading zeroes. Each valid device must have a "device" node which 243 * identifies the driver model of the device. 244 * 245 * Device backends can provide a parser for the "config" string. If 246 * a custom parser is not provided, pci_parse_legacy_config() is used 247 * to parse the string. 248 */ 249 int 250 pci_parse_slot(char *opt) 251 { 252 char node_name[sizeof("pci.XXX.XX.X")]; 253 struct pci_devemu *pde; 254 char *emul, *config, *str, *cp; 255 int error, bnum, snum, fnum; 256 nvlist_t *nvl; 257 258 error = -1; 259 str = strdup(opt); 260 261 emul = config = NULL; 262 if ((cp = strchr(str, ',')) != NULL) { 263 *cp = '\0'; 264 emul = cp + 1; 265 if ((cp = strchr(emul, ',')) != NULL) { 266 *cp = '\0'; 267 config = cp + 1; 268 } 269 } else { 270 pci_parse_slot_usage(opt); 271 goto done; 272 } 273 274 /* <bus>:<slot>:<func> */ 275 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 276 bnum = 0; 277 /* <slot>:<func> */ 278 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 279 fnum = 0; 280 /* <slot> */ 281 if (sscanf(str, "%d", &snum) != 1) { 282 snum = -1; 283 } 284 } 285 } 286 287 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 288 fnum < 0 || fnum >= MAXFUNCS) { 289 pci_parse_slot_usage(opt); 290 goto done; 291 } 292 293 pde = pci_emul_finddev(emul); 294 if (pde == NULL) { 295 EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum, 296 fnum, emul); 297 goto done; 298 } 299 300 snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum, 301 fnum); 302 nvl = find_config_node(node_name); 303 if (nvl != NULL) { 304 EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum, 305 fnum); 306 goto done; 307 } 308 nvl = create_config_node(node_name); 309 if (pde->pe_alias != NULL) 310 set_config_value_node(nvl, "device", pde->pe_alias); 311 else 312 set_config_value_node(nvl, "device", pde->pe_emu); 313 314 if (pde->pe_legacy_config != NULL) 315 error = pde->pe_legacy_config(nvl, config); 316 else 317 error = pci_parse_legacy_config(nvl, config); 318 done: 319 free(str); 320 return (error); 321 } 322 323 void 324 pci_print_supported_devices() 325 { 326 struct pci_devemu **pdpp, *pdp; 327 328 SET_FOREACH(pdpp, pci_devemu_set) { 329 pdp = *pdpp; 330 printf("%s\n", pdp->pe_emu); 331 } 332 } 333 334 static int 335 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 336 { 337 338 if (offset < pi->pi_msix.pba_offset) 339 return (0); 340 341 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 342 return (0); 343 } 344 345 return (1); 346 } 347 348 int 349 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 350 uint64_t value) 351 { 352 int msix_entry_offset; 353 int tab_index; 354 char *dest; 355 356 /* support only 4 or 8 byte writes */ 357 if (size != 4 && size != 8) 358 return (-1); 359 360 /* 361 * Return if table index is beyond what device supports 362 */ 363 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 364 if (tab_index >= pi->pi_msix.table_count) 365 return (-1); 366 367 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 368 369 /* support only aligned writes */ 370 if ((msix_entry_offset % size) != 0) 371 return (-1); 372 373 dest = (char *)(pi->pi_msix.table + tab_index); 374 dest += msix_entry_offset; 375 376 if (size == 4) 377 *((uint32_t *)dest) = value; 378 else 379 *((uint64_t *)dest) = value; 380 381 return (0); 382 } 383 384 uint64_t 385 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 386 { 387 char *dest; 388 int msix_entry_offset; 389 int tab_index; 390 uint64_t retval = ~0; 391 392 /* 393 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 394 * table but we also allow 1 byte access to accommodate reads from 395 * ddb. 396 */ 397 if (size != 1 && size != 4 && size != 8) 398 return (retval); 399 400 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 401 402 /* support only aligned reads */ 403 if ((msix_entry_offset % size) != 0) { 404 return (retval); 405 } 406 407 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 408 409 if (tab_index < pi->pi_msix.table_count) { 410 /* valid MSI-X Table access */ 411 dest = (char *)(pi->pi_msix.table + tab_index); 412 dest += msix_entry_offset; 413 414 if (size == 1) 415 retval = *((uint8_t *)dest); 416 else if (size == 4) 417 retval = *((uint32_t *)dest); 418 else 419 retval = *((uint64_t *)dest); 420 } else if (pci_valid_pba_offset(pi, offset)) { 421 /* return 0 for PBA access */ 422 retval = 0; 423 } 424 425 return (retval); 426 } 427 428 int 429 pci_msix_table_bar(struct pci_devinst *pi) 430 { 431 432 if (pi->pi_msix.table != NULL) 433 return (pi->pi_msix.table_bar); 434 else 435 return (-1); 436 } 437 438 int 439 pci_msix_pba_bar(struct pci_devinst *pi) 440 { 441 442 if (pi->pi_msix.table != NULL) 443 return (pi->pi_msix.pba_bar); 444 else 445 return (-1); 446 } 447 448 static int 449 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 450 uint32_t *eax, void *arg) 451 { 452 struct pci_devinst *pdi = arg; 453 struct pci_devemu *pe = pdi->pi_d; 454 uint64_t offset; 455 int i; 456 457 for (i = 0; i <= PCI_BARMAX; i++) { 458 if (pdi->pi_bar[i].type == PCIBAR_IO && 459 port >= pdi->pi_bar[i].addr && 460 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 461 offset = port - pdi->pi_bar[i].addr; 462 if (in) 463 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 464 offset, bytes); 465 else 466 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 467 bytes, *eax); 468 return (0); 469 } 470 } 471 return (-1); 472 } 473 474 static int 475 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 476 int size, uint64_t *val, void *arg1, long arg2) 477 { 478 struct pci_devinst *pdi = arg1; 479 struct pci_devemu *pe = pdi->pi_d; 480 uint64_t offset; 481 int bidx = (int) arg2; 482 483 assert(bidx <= PCI_BARMAX); 484 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 485 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 486 assert(addr >= pdi->pi_bar[bidx].addr && 487 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 488 489 offset = addr - pdi->pi_bar[bidx].addr; 490 491 if (dir == MEM_F_WRITE) { 492 if (size == 8) { 493 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 494 4, *val & 0xffffffff); 495 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 496 4, *val >> 32); 497 } else { 498 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 499 size, *val); 500 } 501 } else { 502 if (size == 8) { 503 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 504 offset, 4); 505 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 506 offset + 4, 4) << 32; 507 } else { 508 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 509 offset, size); 510 } 511 } 512 513 return (0); 514 } 515 516 517 static int 518 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 519 uint64_t *addr) 520 { 521 uint64_t base; 522 523 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 524 525 base = roundup2(*baseptr, size); 526 527 if (base + size <= limit) { 528 *addr = base; 529 *baseptr = base + size; 530 return (0); 531 } else 532 return (-1); 533 } 534 535 /* 536 * Register (or unregister) the MMIO or I/O region associated with the BAR 537 * register 'idx' of an emulated pci device. 538 */ 539 static void 540 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 541 { 542 struct pci_devemu *pe; 543 int error; 544 struct inout_port iop; 545 struct mem_range mr; 546 547 pe = pi->pi_d; 548 switch (pi->pi_bar[idx].type) { 549 case PCIBAR_IO: 550 bzero(&iop, sizeof(struct inout_port)); 551 iop.name = pi->pi_name; 552 iop.port = pi->pi_bar[idx].addr; 553 iop.size = pi->pi_bar[idx].size; 554 if (registration) { 555 iop.flags = IOPORT_F_INOUT; 556 iop.handler = pci_emul_io_handler; 557 iop.arg = pi; 558 error = register_inout(&iop); 559 } else 560 error = unregister_inout(&iop); 561 if (pe->pe_baraddr != NULL) 562 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 563 pi->pi_bar[idx].addr); 564 break; 565 case PCIBAR_MEM32: 566 case PCIBAR_MEM64: 567 bzero(&mr, sizeof(struct mem_range)); 568 mr.name = pi->pi_name; 569 mr.base = pi->pi_bar[idx].addr; 570 mr.size = pi->pi_bar[idx].size; 571 if (registration) { 572 mr.flags = MEM_F_RW; 573 mr.handler = pci_emul_mem_handler; 574 mr.arg1 = pi; 575 mr.arg2 = idx; 576 error = register_mem(&mr); 577 } else 578 error = unregister_mem(&mr); 579 if (pe->pe_baraddr != NULL) 580 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 581 pi->pi_bar[idx].addr); 582 break; 583 case PCIBAR_ROM: 584 error = 0; 585 if (pe->pe_baraddr != NULL) 586 (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, 587 pi->pi_bar[idx].addr); 588 break; 589 default: 590 error = EINVAL; 591 break; 592 } 593 assert(error == 0); 594 } 595 596 static void 597 unregister_bar(struct pci_devinst *pi, int idx) 598 { 599 600 modify_bar_registration(pi, idx, 0); 601 } 602 603 static void 604 register_bar(struct pci_devinst *pi, int idx) 605 { 606 607 modify_bar_registration(pi, idx, 1); 608 } 609 610 /* Is the ROM enabled for the emulated pci device? */ 611 static int 612 romen(struct pci_devinst *pi) 613 { 614 return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) == 615 PCIM_BIOS_ENABLE; 616 } 617 618 /* Are we decoding i/o port accesses for the emulated pci device? */ 619 static int 620 porten(struct pci_devinst *pi) 621 { 622 uint16_t cmd; 623 624 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 625 626 return (cmd & PCIM_CMD_PORTEN); 627 } 628 629 /* Are we decoding memory accesses for the emulated pci device? */ 630 static int 631 memen(struct pci_devinst *pi) 632 { 633 uint16_t cmd; 634 635 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 636 637 return (cmd & PCIM_CMD_MEMEN); 638 } 639 640 /* 641 * Update the MMIO or I/O address that is decoded by the BAR register. 642 * 643 * If the pci device has enabled the address space decoding then intercept 644 * the address range decoded by the BAR register. 645 */ 646 static void 647 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 648 { 649 int decode; 650 651 if (pi->pi_bar[idx].type == PCIBAR_IO) 652 decode = porten(pi); 653 else 654 decode = memen(pi); 655 656 if (decode) 657 unregister_bar(pi, idx); 658 659 switch (type) { 660 case PCIBAR_IO: 661 case PCIBAR_MEM32: 662 pi->pi_bar[idx].addr = addr; 663 break; 664 case PCIBAR_MEM64: 665 pi->pi_bar[idx].addr &= ~0xffffffffUL; 666 pi->pi_bar[idx].addr |= addr; 667 break; 668 case PCIBAR_MEMHI64: 669 pi->pi_bar[idx].addr &= 0xffffffff; 670 pi->pi_bar[idx].addr |= addr; 671 break; 672 default: 673 assert(0); 674 } 675 676 if (decode) 677 register_bar(pi, idx); 678 } 679 680 int 681 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 682 uint64_t size) 683 { 684 assert((type == PCIBAR_ROM) || (idx >= 0 && idx <= PCI_BARMAX)); 685 assert((type != PCIBAR_ROM) || (idx == PCI_ROM_IDX)); 686 687 if ((size & (size - 1)) != 0) 688 size = 1UL << flsl(size); /* round up to a power of 2 */ 689 690 /* Enforce minimum BAR sizes required by the PCI standard */ 691 if (type == PCIBAR_IO) { 692 if (size < 4) 693 size = 4; 694 } else if (type == PCIBAR_ROM) { 695 if (size < ~PCIM_BIOS_ADDR_MASK + 1) 696 size = ~PCIM_BIOS_ADDR_MASK + 1; 697 } else { 698 if (size < 16) 699 size = 16; 700 } 701 702 /* 703 * To reduce fragmentation of the MMIO space, we allocate the BARs by 704 * size. Therefore, don't allocate the BAR yet. We create a list of all 705 * BAR allocation which is sorted by BAR size. When all PCI devices are 706 * initialized, we will assign an address to the BARs. 707 */ 708 709 /* create a new list entry */ 710 struct pci_bar_allocation *const new_bar = malloc(sizeof(*new_bar)); 711 memset(new_bar, 0, sizeof(*new_bar)); 712 new_bar->pdi = pdi; 713 new_bar->idx = idx; 714 new_bar->type = type; 715 new_bar->size = size; 716 717 /* 718 * Search for a BAR which size is lower than the size of our newly 719 * allocated BAR. 720 */ 721 struct pci_bar_allocation *bar = NULL; 722 TAILQ_FOREACH(bar, &pci_bars, chain) { 723 if (bar->size < size) { 724 break; 725 } 726 } 727 728 if (bar == NULL) { 729 /* 730 * Either the list is empty or new BAR is the smallest BAR of 731 * the list. Append it to the end of our list. 732 */ 733 TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain); 734 } else { 735 /* 736 * The found BAR is smaller than our new BAR. For that reason, 737 * insert our new BAR before the found BAR. 738 */ 739 TAILQ_INSERT_BEFORE(bar, new_bar, chain); 740 } 741 742 /* 743 * pci_passthru devices synchronize their physical and virtual command 744 * register on init. For that reason, the virtual cmd reg should be 745 * updated as early as possible. 746 */ 747 uint16_t enbit = 0; 748 switch (type) { 749 case PCIBAR_IO: 750 enbit = PCIM_CMD_PORTEN; 751 break; 752 case PCIBAR_MEM64: 753 case PCIBAR_MEM32: 754 enbit = PCIM_CMD_MEMEN; 755 break; 756 default: 757 enbit = 0; 758 break; 759 } 760 761 const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 762 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 763 764 return (0); 765 } 766 767 static int 768 pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx, 769 const enum pcibar_type type, const uint64_t size) 770 { 771 int error; 772 uint64_t *baseptr, limit, addr, mask, lobits, bar; 773 774 switch (type) { 775 case PCIBAR_NONE: 776 baseptr = NULL; 777 addr = mask = lobits = 0; 778 break; 779 case PCIBAR_IO: 780 baseptr = &pci_emul_iobase; 781 limit = PCI_EMUL_IOLIMIT; 782 mask = PCIM_BAR_IO_BASE; 783 lobits = PCIM_BAR_IO_SPACE; 784 break; 785 case PCIBAR_MEM64: 786 /* 787 * XXX 788 * Some drivers do not work well if the 64-bit BAR is allocated 789 * above 4GB. Allow for this by allocating small requests under 790 * 4GB unless then allocation size is larger than some arbitrary 791 * number (128MB currently). 792 */ 793 if (size > 128 * 1024 * 1024) { 794 baseptr = &pci_emul_membase64; 795 limit = pci_emul_memlim64; 796 mask = PCIM_BAR_MEM_BASE; 797 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 798 PCIM_BAR_MEM_PREFETCH; 799 } else { 800 baseptr = &pci_emul_membase32; 801 limit = PCI_EMUL_MEMLIMIT32; 802 mask = PCIM_BAR_MEM_BASE; 803 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 804 } 805 break; 806 case PCIBAR_MEM32: 807 baseptr = &pci_emul_membase32; 808 limit = PCI_EMUL_MEMLIMIT32; 809 mask = PCIM_BAR_MEM_BASE; 810 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 811 break; 812 case PCIBAR_ROM: 813 /* do not claim memory for ROM. OVMF will do it for us. */ 814 baseptr = NULL; 815 limit = 0; 816 mask = PCIM_BIOS_ADDR_MASK; 817 lobits = 0; 818 #ifndef __FreeBSD__ 819 addr = 0; 820 #endif 821 break; 822 default: 823 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 824 #ifdef FreeBSD 825 assert(0); 826 #else 827 abort(); 828 #endif 829 } 830 831 if (baseptr != NULL) { 832 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 833 if (error != 0) 834 return (error); 835 } 836 837 pdi->pi_bar[idx].type = type; 838 pdi->pi_bar[idx].addr = addr; 839 pdi->pi_bar[idx].size = size; 840 /* 841 * passthru devices are using same lobits as physical device they set 842 * this property 843 */ 844 if (pdi->pi_bar[idx].lobits != 0) { 845 lobits = pdi->pi_bar[idx].lobits; 846 } else { 847 pdi->pi_bar[idx].lobits = lobits; 848 } 849 850 /* Initialize the BAR register in config space */ 851 bar = (addr & mask) | lobits; 852 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 853 854 if (type == PCIBAR_MEM64) { 855 assert(idx + 1 <= PCI_BARMAX); 856 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 857 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 858 } 859 860 if (type != PCIBAR_ROM) { 861 register_bar(pdi, idx); 862 } 863 864 return (0); 865 } 866 867 int 868 pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size, 869 void **const addr) 870 { 871 /* allocate ROM space once on first call */ 872 if (pci_emul_rombase == 0) { 873 pci_emul_rombase = vm_create_devmem(pdi->pi_vmctx, VM_PCIROM, 874 "pcirom", PCI_EMUL_ROMSIZE); 875 if (pci_emul_rombase == MAP_FAILED) { 876 warnx("%s: failed to create rom segment", __func__); 877 return (-1); 878 } 879 pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE; 880 pci_emul_romoffset = 0; 881 } 882 883 /* ROM size should be a power of 2 and greater than 2 KB */ 884 const uint64_t rom_size = MAX(1UL << flsl(size), 885 ~PCIM_BIOS_ADDR_MASK + 1); 886 887 /* check if ROM fits into ROM space */ 888 if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE) { 889 warnx("%s: no space left in rom segment:", __func__); 890 warnx("%16lu bytes left", 891 PCI_EMUL_ROMSIZE - pci_emul_romoffset); 892 warnx("%16lu bytes required by %d/%d/%d", rom_size, pdi->pi_bus, 893 pdi->pi_slot, pdi->pi_func); 894 return (-1); 895 } 896 897 /* allocate ROM BAR */ 898 const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM, 899 rom_size); 900 if (error) 901 return error; 902 903 /* return address */ 904 *addr = pci_emul_rombase + pci_emul_romoffset; 905 906 /* save offset into ROM Space */ 907 pdi->pi_romoffset = pci_emul_romoffset; 908 909 /* increase offset for next ROM */ 910 pci_emul_romoffset += rom_size; 911 912 return (0); 913 } 914 915 #define CAP_START_OFFSET 0x40 916 static int 917 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 918 { 919 int i, capoff, reallen; 920 uint16_t sts; 921 922 assert(caplen > 0); 923 924 reallen = roundup2(caplen, 4); /* dword aligned */ 925 926 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 927 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 928 capoff = CAP_START_OFFSET; 929 else 930 capoff = pi->pi_capend + 1; 931 932 /* Check if we have enough space */ 933 if (capoff + reallen > PCI_REGMAX + 1) 934 return (-1); 935 936 /* Set the previous capability pointer */ 937 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 938 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 939 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 940 } else 941 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 942 943 /* Copy the capability */ 944 for (i = 0; i < caplen; i++) 945 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 946 947 /* Set the next capability pointer */ 948 pci_set_cfgdata8(pi, capoff + 1, 0); 949 950 pi->pi_prevcap = capoff; 951 pi->pi_capend = capoff + reallen - 1; 952 return (0); 953 } 954 955 static struct pci_devemu * 956 pci_emul_finddev(const char *name) 957 { 958 struct pci_devemu **pdpp, *pdp; 959 960 SET_FOREACH(pdpp, pci_devemu_set) { 961 pdp = *pdpp; 962 if (!strcmp(pdp->pe_emu, name)) { 963 return (pdp); 964 } 965 } 966 967 return (NULL); 968 } 969 970 static int 971 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 972 int func, struct funcinfo *fi) 973 { 974 struct pci_devinst *pdi; 975 int err; 976 977 pdi = calloc(1, sizeof(struct pci_devinst)); 978 979 pdi->pi_vmctx = ctx; 980 pdi->pi_bus = bus; 981 pdi->pi_slot = slot; 982 pdi->pi_func = func; 983 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 984 pdi->pi_lintr.pin = 0; 985 pdi->pi_lintr.state = IDLE; 986 pdi->pi_lintr.pirq_pin = 0; 987 pdi->pi_lintr.ioapic_irq = 0; 988 pdi->pi_d = pde; 989 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 990 991 /* Disable legacy interrupts */ 992 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 993 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 994 995 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 996 997 err = (*pde->pe_init)(ctx, pdi, fi->fi_config); 998 if (err == 0) 999 fi->fi_devi = pdi; 1000 else 1001 free(pdi); 1002 1003 return (err); 1004 } 1005 1006 void 1007 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 1008 { 1009 int mmc; 1010 1011 /* Number of msi messages must be a power of 2 between 1 and 32 */ 1012 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 1013 mmc = ffs(msgnum) - 1; 1014 1015 bzero(msicap, sizeof(struct msicap)); 1016 msicap->capid = PCIY_MSI; 1017 msicap->nextptr = nextptr; 1018 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 1019 } 1020 1021 int 1022 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 1023 { 1024 struct msicap msicap; 1025 1026 pci_populate_msicap(&msicap, msgnum, 0); 1027 1028 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 1029 } 1030 1031 static void 1032 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 1033 uint32_t msix_tab_size) 1034 { 1035 1036 assert(msix_tab_size % 4096 == 0); 1037 1038 bzero(msixcap, sizeof(struct msixcap)); 1039 msixcap->capid = PCIY_MSIX; 1040 1041 /* 1042 * Message Control Register, all fields set to 1043 * zero except for the Table Size. 1044 * Note: Table size N is encoded as N-1 1045 */ 1046 msixcap->msgctrl = msgnum - 1; 1047 1048 /* 1049 * MSI-X BAR setup: 1050 * - MSI-X table start at offset 0 1051 * - PBA table starts at a 4K aligned offset after the MSI-X table 1052 */ 1053 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 1054 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 1055 } 1056 1057 static void 1058 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 1059 { 1060 int i, table_size; 1061 1062 assert(table_entries > 0); 1063 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 1064 1065 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 1066 pi->pi_msix.table = calloc(1, table_size); 1067 1068 /* set mask bit of vector control register */ 1069 for (i = 0; i < table_entries; i++) 1070 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 1071 } 1072 1073 int 1074 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 1075 { 1076 uint32_t tab_size; 1077 struct msixcap msixcap; 1078 1079 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 1080 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 1081 1082 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 1083 1084 /* Align table size to nearest 4K */ 1085 tab_size = roundup2(tab_size, 4096); 1086 1087 pi->pi_msix.table_bar = barnum; 1088 pi->pi_msix.pba_bar = barnum; 1089 pi->pi_msix.table_offset = 0; 1090 pi->pi_msix.table_count = msgnum; 1091 pi->pi_msix.pba_offset = tab_size; 1092 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 1093 1094 pci_msix_table_init(pi, msgnum); 1095 1096 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 1097 1098 /* allocate memory for MSI-X Table and PBA */ 1099 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 1100 tab_size + pi->pi_msix.pba_size); 1101 1102 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 1103 sizeof(msixcap))); 1104 } 1105 1106 static void 1107 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1108 int bytes, uint32_t val) 1109 { 1110 uint16_t msgctrl, rwmask; 1111 int off; 1112 1113 off = offset - capoff; 1114 /* Message Control Register */ 1115 if (off == 2 && bytes == 2) { 1116 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 1117 msgctrl = pci_get_cfgdata16(pi, offset); 1118 msgctrl &= ~rwmask; 1119 msgctrl |= val & rwmask; 1120 val = msgctrl; 1121 1122 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 1123 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 1124 pci_lintr_update(pi); 1125 } 1126 1127 CFGWRITE(pi, offset, val, bytes); 1128 } 1129 1130 static void 1131 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1132 int bytes, uint32_t val) 1133 { 1134 uint16_t msgctrl, rwmask, msgdata, mme; 1135 uint32_t addrlo; 1136 1137 /* 1138 * If guest is writing to the message control register make sure 1139 * we do not overwrite read-only fields. 1140 */ 1141 if ((offset - capoff) == 2 && bytes == 2) { 1142 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 1143 msgctrl = pci_get_cfgdata16(pi, offset); 1144 msgctrl &= ~rwmask; 1145 msgctrl |= val & rwmask; 1146 val = msgctrl; 1147 } 1148 CFGWRITE(pi, offset, val, bytes); 1149 1150 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 1151 addrlo = pci_get_cfgdata32(pi, capoff + 4); 1152 if (msgctrl & PCIM_MSICTRL_64BIT) 1153 msgdata = pci_get_cfgdata16(pi, capoff + 12); 1154 else 1155 msgdata = pci_get_cfgdata16(pi, capoff + 8); 1156 1157 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 1158 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 1159 if (pi->pi_msi.enabled) { 1160 pi->pi_msi.addr = addrlo; 1161 pi->pi_msi.msg_data = msgdata; 1162 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 1163 } else { 1164 pi->pi_msi.maxmsgnum = 0; 1165 } 1166 pci_lintr_update(pi); 1167 } 1168 1169 void 1170 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 1171 int bytes, uint32_t val) 1172 { 1173 1174 /* XXX don't write to the readonly parts */ 1175 CFGWRITE(pi, offset, val, bytes); 1176 } 1177 1178 #define PCIECAP_VERSION 0x2 1179 int 1180 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 1181 { 1182 int err; 1183 struct pciecap pciecap; 1184 1185 bzero(&pciecap, sizeof(pciecap)); 1186 1187 /* 1188 * Use the integrated endpoint type for endpoints on a root complex bus. 1189 * 1190 * NB: bhyve currently only supports a single PCI bus that is the root 1191 * complex bus, so all endpoints are integrated. 1192 */ 1193 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 1194 type = PCIEM_TYPE_ROOT_INT_EP; 1195 1196 pciecap.capid = PCIY_EXPRESS; 1197 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 1198 if (type != PCIEM_TYPE_ROOT_INT_EP) { 1199 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 1200 pciecap.link_status = 0x11; /* gen1, x1 */ 1201 } 1202 1203 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 1204 return (err); 1205 } 1206 1207 /* 1208 * This function assumes that 'coff' is in the capabilities region of the 1209 * config space. A capoff parameter of zero will force a search for the 1210 * offset and type. 1211 */ 1212 void 1213 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 1214 uint8_t capoff, int capid) 1215 { 1216 uint8_t nextoff; 1217 1218 /* Do not allow un-aligned writes */ 1219 if ((offset & (bytes - 1)) != 0) 1220 return; 1221 1222 if (capoff == 0) { 1223 /* Find the capability that we want to update */ 1224 capoff = CAP_START_OFFSET; 1225 while (1) { 1226 nextoff = pci_get_cfgdata8(pi, capoff + 1); 1227 if (nextoff == 0) 1228 break; 1229 if (offset >= capoff && offset < nextoff) 1230 break; 1231 1232 capoff = nextoff; 1233 } 1234 assert(offset >= capoff); 1235 capid = pci_get_cfgdata8(pi, capoff); 1236 } 1237 1238 /* 1239 * Capability ID and Next Capability Pointer are readonly. 1240 * However, some o/s's do 4-byte writes that include these. 1241 * For this case, trim the write back to 2 bytes and adjust 1242 * the data. 1243 */ 1244 if (offset == capoff || offset == capoff + 1) { 1245 if (offset == capoff && bytes == 4) { 1246 bytes = 2; 1247 offset += 2; 1248 val >>= 16; 1249 } else 1250 return; 1251 } 1252 1253 switch (capid) { 1254 case PCIY_MSI: 1255 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1256 break; 1257 case PCIY_MSIX: 1258 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1259 break; 1260 case PCIY_EXPRESS: 1261 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1262 break; 1263 default: 1264 break; 1265 } 1266 } 1267 1268 static int 1269 pci_emul_iscap(struct pci_devinst *pi, int offset) 1270 { 1271 uint16_t sts; 1272 1273 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1274 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1275 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1276 return (1); 1277 } 1278 return (0); 1279 } 1280 1281 static int 1282 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1283 int size, uint64_t *val, void *arg1, long arg2) 1284 { 1285 /* 1286 * Ignore writes; return 0xff's for reads. The mem read code 1287 * will take care of truncating to the correct size. 1288 */ 1289 if (dir == MEM_F_READ) { 1290 *val = 0xffffffffffffffff; 1291 } 1292 1293 return (0); 1294 } 1295 1296 static int 1297 pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1298 int bytes, uint64_t *val, void *arg1, long arg2) 1299 { 1300 int bus, slot, func, coff, in; 1301 1302 coff = addr & 0xfff; 1303 func = (addr >> 12) & 0x7; 1304 slot = (addr >> 15) & 0x1f; 1305 bus = (addr >> 20) & 0xff; 1306 in = (dir == MEM_F_READ); 1307 if (in) 1308 *val = ~0UL; 1309 pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); 1310 return (0); 1311 } 1312 1313 uint64_t 1314 pci_ecfg_base(void) 1315 { 1316 1317 return (PCI_EMUL_ECFG_BASE); 1318 } 1319 1320 #define BUSIO_ROUNDUP 32 1321 #define BUSMEM32_ROUNDUP (1024 * 1024) 1322 #define BUSMEM64_ROUNDUP (512 * 1024 * 1024) 1323 1324 int 1325 init_pci(struct vmctx *ctx) 1326 { 1327 char node_name[sizeof("pci.XXX.XX.X")]; 1328 struct mem_range mr; 1329 struct pci_devemu *pde; 1330 struct businfo *bi; 1331 struct slotinfo *si; 1332 struct funcinfo *fi; 1333 nvlist_t *nvl; 1334 const char *emul; 1335 size_t lowmem; 1336 int bus, slot, func; 1337 int error; 1338 1339 if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32) 1340 errx(EX_OSERR, "Invalid lowmem limit"); 1341 1342 pci_emul_iobase = PCI_EMUL_IOBASE; 1343 pci_emul_membase32 = PCI_EMUL_MEMBASE32; 1344 1345 pci_emul_membase64 = 4*GB + vm_get_highmem_size(ctx); 1346 pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64); 1347 pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64; 1348 1349 for (bus = 0; bus < MAXBUSES; bus++) { 1350 snprintf(node_name, sizeof(node_name), "pci.%d", bus); 1351 nvl = find_config_node(node_name); 1352 if (nvl == NULL) 1353 continue; 1354 pci_businfo[bus] = calloc(1, sizeof(struct businfo)); 1355 bi = pci_businfo[bus]; 1356 1357 /* 1358 * Keep track of the i/o and memory resources allocated to 1359 * this bus. 1360 */ 1361 bi->iobase = pci_emul_iobase; 1362 bi->membase32 = pci_emul_membase32; 1363 bi->membase64 = pci_emul_membase64; 1364 1365 /* first run: init devices */ 1366 for (slot = 0; slot < MAXSLOTS; slot++) { 1367 si = &bi->slotinfo[slot]; 1368 for (func = 0; func < MAXFUNCS; func++) { 1369 fi = &si->si_funcs[func]; 1370 snprintf(node_name, sizeof(node_name), 1371 "pci.%d.%d.%d", bus, slot, func); 1372 nvl = find_config_node(node_name); 1373 if (nvl == NULL) 1374 continue; 1375 1376 fi->fi_config = nvl; 1377 emul = get_config_value_node(nvl, "device"); 1378 if (emul == NULL) { 1379 EPRINTLN("pci slot %d:%d:%d: missing " 1380 "\"device\" value", bus, slot, func); 1381 return (EINVAL); 1382 } 1383 pde = pci_emul_finddev(emul); 1384 if (pde == NULL) { 1385 EPRINTLN("pci slot %d:%d:%d: unknown " 1386 "device \"%s\"", bus, slot, func, 1387 emul); 1388 return (EINVAL); 1389 } 1390 if (pde->pe_alias != NULL) { 1391 EPRINTLN("pci slot %d:%d:%d: legacy " 1392 "device \"%s\", use \"%s\" instead", 1393 bus, slot, func, emul, 1394 pde->pe_alias); 1395 return (EINVAL); 1396 } 1397 fi->fi_pde = pde; 1398 error = pci_emul_init(ctx, pde, bus, slot, 1399 func, fi); 1400 if (error) 1401 return (error); 1402 } 1403 } 1404 1405 /* second run: assign BARs and free list */ 1406 struct pci_bar_allocation *bar; 1407 struct pci_bar_allocation *bar_tmp; 1408 TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) { 1409 pci_emul_assign_bar(bar->pdi, bar->idx, bar->type, 1410 bar->size); 1411 free(bar); 1412 } 1413 TAILQ_INIT(&pci_bars); 1414 1415 /* 1416 * Add some slop to the I/O and memory resources decoded by 1417 * this bus to give a guest some flexibility if it wants to 1418 * reprogram the BARs. 1419 */ 1420 pci_emul_iobase += BUSIO_ROUNDUP; 1421 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1422 bi->iolimit = pci_emul_iobase; 1423 1424 pci_emul_membase32 += BUSMEM32_ROUNDUP; 1425 pci_emul_membase32 = roundup2(pci_emul_membase32, 1426 BUSMEM32_ROUNDUP); 1427 bi->memlimit32 = pci_emul_membase32; 1428 1429 pci_emul_membase64 += BUSMEM64_ROUNDUP; 1430 pci_emul_membase64 = roundup2(pci_emul_membase64, 1431 BUSMEM64_ROUNDUP); 1432 bi->memlimit64 = pci_emul_membase64; 1433 } 1434 1435 /* 1436 * PCI backends are initialized before routing INTx interrupts 1437 * so that LPC devices are able to reserve ISA IRQs before 1438 * routing PIRQ pins. 1439 */ 1440 for (bus = 0; bus < MAXBUSES; bus++) { 1441 if ((bi = pci_businfo[bus]) == NULL) 1442 continue; 1443 1444 for (slot = 0; slot < MAXSLOTS; slot++) { 1445 si = &bi->slotinfo[slot]; 1446 for (func = 0; func < MAXFUNCS; func++) { 1447 fi = &si->si_funcs[func]; 1448 if (fi->fi_devi == NULL) 1449 continue; 1450 pci_lintr_route(fi->fi_devi); 1451 } 1452 } 1453 } 1454 lpc_pirq_routed(); 1455 1456 /* 1457 * The guest physical memory map looks like the following: 1458 * [0, lowmem) guest system memory 1459 * [lowmem, 0xC0000000) memory hole (may be absent) 1460 * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation) 1461 * [0xE0000000, 0xF0000000) PCI extended config window 1462 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1463 * [4GB, 4GB + highmem) 1464 */ 1465 1466 /* 1467 * Accesses to memory addresses that are not allocated to system 1468 * memory or PCI devices return 0xff's. 1469 */ 1470 lowmem = vm_get_lowmem_size(ctx); 1471 bzero(&mr, sizeof(struct mem_range)); 1472 mr.name = "PCI hole"; 1473 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1474 mr.base = lowmem; 1475 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1476 mr.handler = pci_emul_fallback_handler; 1477 error = register_mem_fallback(&mr); 1478 assert(error == 0); 1479 1480 /* PCI extended config space */ 1481 bzero(&mr, sizeof(struct mem_range)); 1482 mr.name = "PCI ECFG"; 1483 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1484 mr.base = PCI_EMUL_ECFG_BASE; 1485 mr.size = PCI_EMUL_ECFG_SIZE; 1486 mr.handler = pci_emul_ecfg_handler; 1487 error = register_mem(&mr); 1488 assert(error == 0); 1489 1490 return (0); 1491 } 1492 1493 static void 1494 pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1495 void *arg) 1496 { 1497 1498 dsdt_line(" Package ()"); 1499 dsdt_line(" {"); 1500 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1501 dsdt_line(" 0x%02X,", pin - 1); 1502 dsdt_line(" Zero,"); 1503 dsdt_line(" 0x%X", ioapic_irq); 1504 dsdt_line(" },"); 1505 } 1506 1507 static void 1508 pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1509 void *arg) 1510 { 1511 char *name; 1512 1513 name = lpc_pirq_name(pirq_pin); 1514 if (name == NULL) 1515 return; 1516 dsdt_line(" Package ()"); 1517 dsdt_line(" {"); 1518 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1519 dsdt_line(" 0x%02X,", pin - 1); 1520 dsdt_line(" %s,", name); 1521 dsdt_line(" 0x00"); 1522 dsdt_line(" },"); 1523 free(name); 1524 } 1525 1526 /* 1527 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1528 * corresponding to each PCI bus. 1529 */ 1530 static void 1531 pci_bus_write_dsdt(int bus) 1532 { 1533 struct businfo *bi; 1534 struct slotinfo *si; 1535 struct pci_devinst *pi; 1536 int count, func, slot; 1537 1538 /* 1539 * If there are no devices on this 'bus' then just return. 1540 */ 1541 if ((bi = pci_businfo[bus]) == NULL) { 1542 /* 1543 * Bus 0 is special because it decodes the I/O ports used 1544 * for PCI config space access even if there are no devices 1545 * on it. 1546 */ 1547 if (bus != 0) 1548 return; 1549 } 1550 1551 dsdt_line(" Device (PC%02X)", bus); 1552 dsdt_line(" {"); 1553 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1554 1555 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1556 dsdt_line(" {"); 1557 dsdt_line(" Return (0x%08X)", bus); 1558 dsdt_line(" }"); 1559 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1560 dsdt_line(" {"); 1561 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1562 "MaxFixed, PosDecode,"); 1563 dsdt_line(" 0x0000, // Granularity"); 1564 dsdt_line(" 0x%04X, // Range Minimum", bus); 1565 dsdt_line(" 0x%04X, // Range Maximum", bus); 1566 dsdt_line(" 0x0000, // Translation Offset"); 1567 dsdt_line(" 0x0001, // Length"); 1568 dsdt_line(" ,, )"); 1569 1570 if (bus == 0) { 1571 dsdt_indent(3); 1572 dsdt_fixed_ioport(0xCF8, 8); 1573 dsdt_unindent(3); 1574 1575 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1576 "PosDecode, EntireRange,"); 1577 dsdt_line(" 0x0000, // Granularity"); 1578 dsdt_line(" 0x0000, // Range Minimum"); 1579 dsdt_line(" 0x0CF7, // Range Maximum"); 1580 dsdt_line(" 0x0000, // Translation Offset"); 1581 dsdt_line(" 0x0CF8, // Length"); 1582 dsdt_line(" ,, , TypeStatic)"); 1583 1584 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1585 "PosDecode, EntireRange,"); 1586 dsdt_line(" 0x0000, // Granularity"); 1587 dsdt_line(" 0x0D00, // Range Minimum"); 1588 dsdt_line(" 0x%04X, // Range Maximum", 1589 PCI_EMUL_IOBASE - 1); 1590 dsdt_line(" 0x0000, // Translation Offset"); 1591 dsdt_line(" 0x%04X, // Length", 1592 PCI_EMUL_IOBASE - 0x0D00); 1593 dsdt_line(" ,, , TypeStatic)"); 1594 1595 if (bi == NULL) { 1596 dsdt_line(" })"); 1597 goto done; 1598 } 1599 } 1600 assert(bi != NULL); 1601 1602 /* i/o window */ 1603 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1604 "PosDecode, EntireRange,"); 1605 dsdt_line(" 0x0000, // Granularity"); 1606 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1607 dsdt_line(" 0x%04X, // Range Maximum", 1608 bi->iolimit - 1); 1609 dsdt_line(" 0x0000, // Translation Offset"); 1610 dsdt_line(" 0x%04X, // Length", 1611 bi->iolimit - bi->iobase); 1612 dsdt_line(" ,, , TypeStatic)"); 1613 1614 /* mmio window (32-bit) */ 1615 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1616 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1617 dsdt_line(" 0x00000000, // Granularity"); 1618 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1619 dsdt_line(" 0x%08X, // Range Maximum\n", 1620 bi->memlimit32 - 1); 1621 dsdt_line(" 0x00000000, // Translation Offset"); 1622 dsdt_line(" 0x%08X, // Length\n", 1623 bi->memlimit32 - bi->membase32); 1624 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1625 1626 /* mmio window (64-bit) */ 1627 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1628 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1629 dsdt_line(" 0x0000000000000000, // Granularity"); 1630 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1631 dsdt_line(" 0x%016lX, // Range Maximum\n", 1632 bi->memlimit64 - 1); 1633 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1634 dsdt_line(" 0x%016lX, // Length\n", 1635 bi->memlimit64 - bi->membase64); 1636 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1637 dsdt_line(" })"); 1638 1639 count = pci_count_lintr(bus); 1640 if (count != 0) { 1641 dsdt_indent(2); 1642 dsdt_line("Name (PPRT, Package ()"); 1643 dsdt_line("{"); 1644 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1645 dsdt_line("})"); 1646 dsdt_line("Name (APRT, Package ()"); 1647 dsdt_line("{"); 1648 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1649 dsdt_line("})"); 1650 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1651 dsdt_line("{"); 1652 dsdt_line(" If (PICM)"); 1653 dsdt_line(" {"); 1654 dsdt_line(" Return (APRT)"); 1655 dsdt_line(" }"); 1656 dsdt_line(" Else"); 1657 dsdt_line(" {"); 1658 dsdt_line(" Return (PPRT)"); 1659 dsdt_line(" }"); 1660 dsdt_line("}"); 1661 dsdt_unindent(2); 1662 } 1663 1664 dsdt_indent(2); 1665 for (slot = 0; slot < MAXSLOTS; slot++) { 1666 si = &bi->slotinfo[slot]; 1667 for (func = 0; func < MAXFUNCS; func++) { 1668 pi = si->si_funcs[func].fi_devi; 1669 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1670 pi->pi_d->pe_write_dsdt(pi); 1671 } 1672 } 1673 dsdt_unindent(2); 1674 done: 1675 dsdt_line(" }"); 1676 } 1677 1678 void 1679 pci_write_dsdt(void) 1680 { 1681 int bus; 1682 1683 dsdt_indent(1); 1684 dsdt_line("Name (PICM, 0x00)"); 1685 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1686 dsdt_line("{"); 1687 dsdt_line(" Store (Arg0, PICM)"); 1688 dsdt_line("}"); 1689 dsdt_line(""); 1690 dsdt_line("Scope (_SB)"); 1691 dsdt_line("{"); 1692 for (bus = 0; bus < MAXBUSES; bus++) 1693 pci_bus_write_dsdt(bus); 1694 dsdt_line("}"); 1695 dsdt_unindent(1); 1696 } 1697 1698 int 1699 pci_bus_configured(int bus) 1700 { 1701 assert(bus >= 0 && bus < MAXBUSES); 1702 return (pci_businfo[bus] != NULL); 1703 } 1704 1705 int 1706 pci_msi_enabled(struct pci_devinst *pi) 1707 { 1708 return (pi->pi_msi.enabled); 1709 } 1710 1711 int 1712 pci_msi_maxmsgnum(struct pci_devinst *pi) 1713 { 1714 if (pi->pi_msi.enabled) 1715 return (pi->pi_msi.maxmsgnum); 1716 else 1717 return (0); 1718 } 1719 1720 int 1721 pci_msix_enabled(struct pci_devinst *pi) 1722 { 1723 1724 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1725 } 1726 1727 void 1728 pci_generate_msix(struct pci_devinst *pi, int index) 1729 { 1730 struct msix_table_entry *mte; 1731 1732 if (!pci_msix_enabled(pi)) 1733 return; 1734 1735 if (pi->pi_msix.function_mask) 1736 return; 1737 1738 if (index >= pi->pi_msix.table_count) 1739 return; 1740 1741 mte = &pi->pi_msix.table[index]; 1742 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1743 /* XXX Set PBA bit if interrupt is disabled */ 1744 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1745 } 1746 } 1747 1748 void 1749 pci_generate_msi(struct pci_devinst *pi, int index) 1750 { 1751 1752 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1753 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1754 pi->pi_msi.msg_data + index); 1755 } 1756 } 1757 1758 static bool 1759 pci_lintr_permitted(struct pci_devinst *pi) 1760 { 1761 uint16_t cmd; 1762 1763 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1764 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1765 (cmd & PCIM_CMD_INTxDIS))); 1766 } 1767 1768 void 1769 pci_lintr_request(struct pci_devinst *pi) 1770 { 1771 struct businfo *bi; 1772 struct slotinfo *si; 1773 int bestpin, bestcount, pin; 1774 1775 bi = pci_businfo[pi->pi_bus]; 1776 assert(bi != NULL); 1777 1778 /* 1779 * Just allocate a pin from our slot. The pin will be 1780 * assigned IRQs later when interrupts are routed. 1781 */ 1782 si = &bi->slotinfo[pi->pi_slot]; 1783 bestpin = 0; 1784 bestcount = si->si_intpins[0].ii_count; 1785 for (pin = 1; pin < 4; pin++) { 1786 if (si->si_intpins[pin].ii_count < bestcount) { 1787 bestpin = pin; 1788 bestcount = si->si_intpins[pin].ii_count; 1789 } 1790 } 1791 1792 si->si_intpins[bestpin].ii_count++; 1793 pi->pi_lintr.pin = bestpin + 1; 1794 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1795 } 1796 1797 static void 1798 pci_lintr_route(struct pci_devinst *pi) 1799 { 1800 struct businfo *bi; 1801 struct intxinfo *ii; 1802 1803 if (pi->pi_lintr.pin == 0) 1804 return; 1805 1806 bi = pci_businfo[pi->pi_bus]; 1807 assert(bi != NULL); 1808 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1809 1810 /* 1811 * Attempt to allocate an I/O APIC pin for this intpin if one 1812 * is not yet assigned. 1813 */ 1814 if (ii->ii_ioapic_irq == 0) 1815 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1816 assert(ii->ii_ioapic_irq > 0); 1817 1818 /* 1819 * Attempt to allocate a PIRQ pin for this intpin if one is 1820 * not yet assigned. 1821 */ 1822 if (ii->ii_pirq_pin == 0) 1823 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1824 assert(ii->ii_pirq_pin > 0); 1825 1826 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1827 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1828 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1829 } 1830 1831 void 1832 pci_lintr_assert(struct pci_devinst *pi) 1833 { 1834 1835 assert(pi->pi_lintr.pin > 0); 1836 1837 pthread_mutex_lock(&pi->pi_lintr.lock); 1838 if (pi->pi_lintr.state == IDLE) { 1839 if (pci_lintr_permitted(pi)) { 1840 pi->pi_lintr.state = ASSERTED; 1841 pci_irq_assert(pi); 1842 } else 1843 pi->pi_lintr.state = PENDING; 1844 } 1845 pthread_mutex_unlock(&pi->pi_lintr.lock); 1846 } 1847 1848 void 1849 pci_lintr_deassert(struct pci_devinst *pi) 1850 { 1851 1852 assert(pi->pi_lintr.pin > 0); 1853 1854 pthread_mutex_lock(&pi->pi_lintr.lock); 1855 if (pi->pi_lintr.state == ASSERTED) { 1856 pi->pi_lintr.state = IDLE; 1857 pci_irq_deassert(pi); 1858 } else if (pi->pi_lintr.state == PENDING) 1859 pi->pi_lintr.state = IDLE; 1860 pthread_mutex_unlock(&pi->pi_lintr.lock); 1861 } 1862 1863 static void 1864 pci_lintr_update(struct pci_devinst *pi) 1865 { 1866 1867 pthread_mutex_lock(&pi->pi_lintr.lock); 1868 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1869 pci_irq_deassert(pi); 1870 pi->pi_lintr.state = PENDING; 1871 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1872 pi->pi_lintr.state = ASSERTED; 1873 pci_irq_assert(pi); 1874 } 1875 pthread_mutex_unlock(&pi->pi_lintr.lock); 1876 #ifndef __FreeBSD__ 1877 if (pi->pi_d->pe_lintrupdate != NULL) { 1878 pi->pi_d->pe_lintrupdate(pi); 1879 } 1880 #endif /* __FreeBSD__ */ 1881 } 1882 1883 int 1884 pci_count_lintr(int bus) 1885 { 1886 int count, slot, pin; 1887 struct slotinfo *slotinfo; 1888 1889 count = 0; 1890 if (pci_businfo[bus] != NULL) { 1891 for (slot = 0; slot < MAXSLOTS; slot++) { 1892 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1893 for (pin = 0; pin < 4; pin++) { 1894 if (slotinfo->si_intpins[pin].ii_count != 0) 1895 count++; 1896 } 1897 } 1898 } 1899 return (count); 1900 } 1901 1902 void 1903 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1904 { 1905 struct businfo *bi; 1906 struct slotinfo *si; 1907 struct intxinfo *ii; 1908 int slot, pin; 1909 1910 if ((bi = pci_businfo[bus]) == NULL) 1911 return; 1912 1913 for (slot = 0; slot < MAXSLOTS; slot++) { 1914 si = &bi->slotinfo[slot]; 1915 for (pin = 0; pin < 4; pin++) { 1916 ii = &si->si_intpins[pin]; 1917 if (ii->ii_count != 0) 1918 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1919 ii->ii_ioapic_irq, arg); 1920 } 1921 } 1922 } 1923 1924 /* 1925 * Return 1 if the emulated device in 'slot' is a multi-function device. 1926 * Return 0 otherwise. 1927 */ 1928 static int 1929 pci_emul_is_mfdev(int bus, int slot) 1930 { 1931 struct businfo *bi; 1932 struct slotinfo *si; 1933 int f, numfuncs; 1934 1935 numfuncs = 0; 1936 if ((bi = pci_businfo[bus]) != NULL) { 1937 si = &bi->slotinfo[slot]; 1938 for (f = 0; f < MAXFUNCS; f++) { 1939 if (si->si_funcs[f].fi_devi != NULL) { 1940 numfuncs++; 1941 } 1942 } 1943 } 1944 return (numfuncs > 1); 1945 } 1946 1947 /* 1948 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1949 * whether or not is a multi-function being emulated in the pci 'slot'. 1950 */ 1951 static void 1952 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1953 { 1954 int mfdev; 1955 1956 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1957 mfdev = pci_emul_is_mfdev(bus, slot); 1958 switch (bytes) { 1959 case 1: 1960 case 2: 1961 *rv &= ~PCIM_MFDEV; 1962 if (mfdev) { 1963 *rv |= PCIM_MFDEV; 1964 } 1965 break; 1966 case 4: 1967 *rv &= ~(PCIM_MFDEV << 16); 1968 if (mfdev) { 1969 *rv |= (PCIM_MFDEV << 16); 1970 } 1971 break; 1972 } 1973 } 1974 } 1975 1976 /* 1977 * Update device state in response to changes to the PCI command 1978 * register. 1979 */ 1980 void 1981 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 1982 { 1983 int i; 1984 uint16_t changed, new; 1985 1986 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 1987 changed = old ^ new; 1988 1989 /* 1990 * If the MMIO or I/O address space decoding has changed then 1991 * register/unregister all BARs that decode that address space. 1992 */ 1993 for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) { 1994 switch (pi->pi_bar[i].type) { 1995 case PCIBAR_NONE: 1996 case PCIBAR_MEMHI64: 1997 break; 1998 case PCIBAR_IO: 1999 /* I/O address space decoding changed? */ 2000 if (changed & PCIM_CMD_PORTEN) { 2001 if (new & PCIM_CMD_PORTEN) 2002 register_bar(pi, i); 2003 else 2004 unregister_bar(pi, i); 2005 } 2006 break; 2007 case PCIBAR_ROM: 2008 /* skip (un-)register of ROM if it disabled */ 2009 if (!romen(pi)) 2010 break; 2011 /* fallthrough */ 2012 case PCIBAR_MEM32: 2013 case PCIBAR_MEM64: 2014 /* MMIO address space decoding changed? */ 2015 if (changed & PCIM_CMD_MEMEN) { 2016 if (new & PCIM_CMD_MEMEN) 2017 register_bar(pi, i); 2018 else 2019 unregister_bar(pi, i); 2020 } 2021 break; 2022 default: 2023 assert(0); 2024 } 2025 } 2026 2027 /* 2028 * If INTx has been unmasked and is pending, assert the 2029 * interrupt. 2030 */ 2031 pci_lintr_update(pi); 2032 } 2033 2034 static void 2035 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 2036 { 2037 int rshift; 2038 uint32_t cmd, old, readonly; 2039 2040 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 2041 2042 /* 2043 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 2044 * 2045 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 2046 * 'write 1 to clear'. However these bits are not set to '1' by 2047 * any device emulation so it is simpler to treat them as readonly. 2048 */ 2049 rshift = (coff & 0x3) * 8; 2050 readonly = 0xFFFFF880 >> rshift; 2051 2052 old = CFGREAD(pi, coff, bytes); 2053 new &= ~readonly; 2054 new |= (old & readonly); 2055 CFGWRITE(pi, coff, new, bytes); /* update config */ 2056 2057 pci_emul_cmd_changed(pi, cmd); 2058 } 2059 2060 static void 2061 pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, 2062 int coff, int bytes, uint32_t *eax) 2063 { 2064 struct businfo *bi; 2065 struct slotinfo *si; 2066 struct pci_devinst *pi; 2067 struct pci_devemu *pe; 2068 int idx, needcfg; 2069 uint64_t addr, mask; 2070 uint64_t bar = 0; 2071 2072 if ((bi = pci_businfo[bus]) != NULL) { 2073 si = &bi->slotinfo[slot]; 2074 pi = si->si_funcs[func].fi_devi; 2075 } else 2076 pi = NULL; 2077 2078 /* 2079 * Just return if there is no device at this slot:func or if the 2080 * the guest is doing an un-aligned access. 2081 */ 2082 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 2083 (coff & (bytes - 1)) != 0) { 2084 if (in) 2085 *eax = 0xffffffff; 2086 return; 2087 } 2088 2089 /* 2090 * Ignore all writes beyond the standard config space and return all 2091 * ones on reads. 2092 */ 2093 if (coff >= PCI_REGMAX + 1) { 2094 if (in) { 2095 *eax = 0xffffffff; 2096 /* 2097 * Extended capabilities begin at offset 256 in config 2098 * space. Absence of extended capabilities is signaled 2099 * with all 0s in the extended capability header at 2100 * offset 256. 2101 */ 2102 if (coff <= PCI_REGMAX + 4) 2103 *eax = 0x00000000; 2104 } 2105 return; 2106 } 2107 2108 pe = pi->pi_d; 2109 2110 /* 2111 * Config read 2112 */ 2113 if (in) { 2114 /* Let the device emulation override the default handler */ 2115 if (pe->pe_cfgread != NULL) { 2116 needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, 2117 eax); 2118 } else { 2119 needcfg = 1; 2120 } 2121 2122 if (needcfg) 2123 *eax = CFGREAD(pi, coff, bytes); 2124 2125 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); 2126 } else { 2127 /* Let the device emulation override the default handler */ 2128 if (pe->pe_cfgwrite != NULL && 2129 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 2130 return; 2131 2132 /* 2133 * Special handling for write to BAR and ROM registers 2134 */ 2135 if ((coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) || 2136 (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4)) { 2137 /* 2138 * Ignore writes to BAR registers that are not 2139 * 4-byte aligned. 2140 */ 2141 if (bytes != 4 || (coff & 0x3) != 0) 2142 return; 2143 #ifndef __FreeBSD__ 2144 if (coff < PCIR_BIOS) { 2145 idx = (coff - PCIR_BAR(0)) / 4; 2146 } else { 2147 idx = PCI_ROM_IDX; 2148 } 2149 #else 2150 if (coff != PCIR_BIOS) { 2151 idx = (coff - PCIR_BAR(0)) / 4; 2152 } else { 2153 idx = PCI_ROM_IDX; 2154 } 2155 #endif 2156 mask = ~(pi->pi_bar[idx].size - 1); 2157 switch (pi->pi_bar[idx].type) { 2158 case PCIBAR_NONE: 2159 pi->pi_bar[idx].addr = bar = 0; 2160 break; 2161 case PCIBAR_IO: 2162 addr = *eax & mask; 2163 addr &= 0xffff; 2164 bar = addr | pi->pi_bar[idx].lobits; 2165 /* 2166 * Register the new BAR value for interception 2167 */ 2168 if (addr != pi->pi_bar[idx].addr) { 2169 update_bar_address(pi, addr, idx, 2170 PCIBAR_IO); 2171 } 2172 break; 2173 case PCIBAR_MEM32: 2174 addr = bar = *eax & mask; 2175 bar |= pi->pi_bar[idx].lobits; 2176 if (addr != pi->pi_bar[idx].addr) { 2177 update_bar_address(pi, addr, idx, 2178 PCIBAR_MEM32); 2179 } 2180 break; 2181 case PCIBAR_MEM64: 2182 addr = bar = *eax & mask; 2183 bar |= pi->pi_bar[idx].lobits; 2184 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 2185 update_bar_address(pi, addr, idx, 2186 PCIBAR_MEM64); 2187 } 2188 break; 2189 case PCIBAR_MEMHI64: 2190 mask = ~(pi->pi_bar[idx - 1].size - 1); 2191 addr = ((uint64_t)*eax << 32) & mask; 2192 bar = addr >> 32; 2193 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 2194 update_bar_address(pi, addr, idx - 1, 2195 PCIBAR_MEMHI64); 2196 } 2197 break; 2198 case PCIBAR_ROM: 2199 addr = bar = *eax & mask; 2200 if (memen(pi) && romen(pi)) { 2201 unregister_bar(pi, idx); 2202 } 2203 pi->pi_bar[idx].addr = addr; 2204 pi->pi_bar[idx].lobits = *eax & 2205 PCIM_BIOS_ENABLE; 2206 /* romen could have changed it value */ 2207 if (memen(pi) && romen(pi)) { 2208 register_bar(pi, idx); 2209 } 2210 bar |= pi->pi_bar[idx].lobits; 2211 break; 2212 default: 2213 assert(0); 2214 } 2215 pci_set_cfgdata32(pi, coff, bar); 2216 2217 } else if (pci_emul_iscap(pi, coff)) { 2218 pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); 2219 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 2220 pci_emul_cmdsts_write(pi, coff, *eax, bytes); 2221 } else { 2222 CFGWRITE(pi, coff, *eax, bytes); 2223 } 2224 } 2225 } 2226 2227 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 2228 2229 static int 2230 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2231 uint32_t *eax, void *arg) 2232 { 2233 uint32_t x; 2234 2235 if (bytes != 4) { 2236 if (in) 2237 *eax = (bytes == 2) ? 0xffff : 0xff; 2238 return (0); 2239 } 2240 2241 if (in) { 2242 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 2243 if (cfgenable) 2244 x |= CONF1_ENABLE; 2245 *eax = x; 2246 } else { 2247 x = *eax; 2248 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 2249 cfgoff = (x & PCI_REGMAX) & ~0x03; 2250 cfgfunc = (x >> 8) & PCI_FUNCMAX; 2251 cfgslot = (x >> 11) & PCI_SLOTMAX; 2252 cfgbus = (x >> 16) & PCI_BUSMAX; 2253 } 2254 2255 return (0); 2256 } 2257 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 2258 2259 static int 2260 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 2261 uint32_t *eax, void *arg) 2262 { 2263 int coff; 2264 2265 assert(bytes == 1 || bytes == 2 || bytes == 4); 2266 2267 coff = cfgoff + (port - CONF1_DATA_PORT); 2268 if (cfgenable) { 2269 pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, 2270 eax); 2271 } else { 2272 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 2273 if (in) 2274 *eax = 0xffffffff; 2275 } 2276 return (0); 2277 } 2278 2279 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 2280 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 2281 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 2282 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 2283 2284 #define PCI_EMUL_TEST 2285 #ifdef PCI_EMUL_TEST 2286 /* 2287 * Define a dummy test device 2288 */ 2289 #define DIOSZ 8 2290 #define DMEMSZ 4096 2291 struct pci_emul_dsoftc { 2292 uint8_t ioregs[DIOSZ]; 2293 uint8_t memregs[2][DMEMSZ]; 2294 }; 2295 2296 #define PCI_EMUL_MSI_MSGS 4 2297 #define PCI_EMUL_MSIX_MSGS 16 2298 2299 static int 2300 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) 2301 { 2302 int error; 2303 struct pci_emul_dsoftc *sc; 2304 2305 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2306 2307 pi->pi_arg = sc; 2308 2309 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2310 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2311 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2312 2313 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2314 assert(error == 0); 2315 2316 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2317 assert(error == 0); 2318 2319 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2320 assert(error == 0); 2321 2322 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2323 assert(error == 0); 2324 2325 return (0); 2326 } 2327 2328 static void 2329 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2330 uint64_t offset, int size, uint64_t value) 2331 { 2332 int i; 2333 struct pci_emul_dsoftc *sc = pi->pi_arg; 2334 2335 if (baridx == 0) { 2336 if (offset + size > DIOSZ) { 2337 printf("diow: iow too large, offset %ld size %d\n", 2338 offset, size); 2339 return; 2340 } 2341 2342 if (size == 1) { 2343 sc->ioregs[offset] = value & 0xff; 2344 } else if (size == 2) { 2345 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2346 } else if (size == 4) { 2347 *(uint32_t *)&sc->ioregs[offset] = value; 2348 } else { 2349 printf("diow: iow unknown size %d\n", size); 2350 } 2351 2352 /* 2353 * Special magic value to generate an interrupt 2354 */ 2355 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2356 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2357 2358 if (value == 0xabcdef) { 2359 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2360 pci_generate_msi(pi, i); 2361 } 2362 } 2363 2364 if (baridx == 1 || baridx == 2) { 2365 if (offset + size > DMEMSZ) { 2366 printf("diow: memw too large, offset %ld size %d\n", 2367 offset, size); 2368 return; 2369 } 2370 2371 i = baridx - 1; /* 'memregs' index */ 2372 2373 if (size == 1) { 2374 sc->memregs[i][offset] = value; 2375 } else if (size == 2) { 2376 *(uint16_t *)&sc->memregs[i][offset] = value; 2377 } else if (size == 4) { 2378 *(uint32_t *)&sc->memregs[i][offset] = value; 2379 } else if (size == 8) { 2380 *(uint64_t *)&sc->memregs[i][offset] = value; 2381 } else { 2382 printf("diow: memw unknown size %d\n", size); 2383 } 2384 2385 /* 2386 * magic interrupt ?? 2387 */ 2388 } 2389 2390 if (baridx > 2 || baridx < 0) { 2391 printf("diow: unknown bar idx %d\n", baridx); 2392 } 2393 } 2394 2395 static uint64_t 2396 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2397 uint64_t offset, int size) 2398 { 2399 struct pci_emul_dsoftc *sc = pi->pi_arg; 2400 uint32_t value; 2401 int i; 2402 2403 value = 0; 2404 if (baridx == 0) { 2405 if (offset + size > DIOSZ) { 2406 printf("dior: ior too large, offset %ld size %d\n", 2407 offset, size); 2408 return (0); 2409 } 2410 2411 value = 0; 2412 if (size == 1) { 2413 value = sc->ioregs[offset]; 2414 } else if (size == 2) { 2415 value = *(uint16_t *) &sc->ioregs[offset]; 2416 } else if (size == 4) { 2417 value = *(uint32_t *) &sc->ioregs[offset]; 2418 } else { 2419 printf("dior: ior unknown size %d\n", size); 2420 } 2421 } 2422 2423 if (baridx == 1 || baridx == 2) { 2424 if (offset + size > DMEMSZ) { 2425 printf("dior: memr too large, offset %ld size %d\n", 2426 offset, size); 2427 return (0); 2428 } 2429 2430 i = baridx - 1; /* 'memregs' index */ 2431 2432 if (size == 1) { 2433 value = sc->memregs[i][offset]; 2434 } else if (size == 2) { 2435 value = *(uint16_t *) &sc->memregs[i][offset]; 2436 } else if (size == 4) { 2437 value = *(uint32_t *) &sc->memregs[i][offset]; 2438 } else if (size == 8) { 2439 value = *(uint64_t *) &sc->memregs[i][offset]; 2440 } else { 2441 printf("dior: ior unknown size %d\n", size); 2442 } 2443 } 2444 2445 2446 if (baridx > 2 || baridx < 0) { 2447 printf("dior: unknown bar idx %d\n", baridx); 2448 return (0); 2449 } 2450 2451 return (value); 2452 } 2453 2454 struct pci_devemu pci_dummy = { 2455 .pe_emu = "dummy", 2456 .pe_init = pci_emul_dinit, 2457 .pe_barwrite = pci_emul_diow, 2458 .pe_barread = pci_emul_dior, 2459 }; 2460 PCI_EMUL_SET(pci_dummy); 2461 2462 #endif /* PCI_EMUL_TEST */ 2463