1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker_set.h> 36 37 #include <ctype.h> 38 #include <errno.h> 39 #include <pthread.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <strings.h> 44 #include <assert.h> 45 #include <stdbool.h> 46 47 #include <machine/vmm.h> 48 #include <machine/vmm_snapshot.h> 49 #include <vmmapi.h> 50 51 #include "acpi.h" 52 #include "bhyverun.h" 53 #include "debug.h" 54 #include "inout.h" 55 #include "ioapic.h" 56 #include "mem.h" 57 #include "pci_emul.h" 58 #include "pci_irq.h" 59 #include "pci_lpc.h" 60 61 #define CONF1_ADDR_PORT 0x0cf8 62 #define CONF1_DATA_PORT 0x0cfc 63 64 #define CONF1_ENABLE 0x80000000ul 65 66 #define MAXBUSES (PCI_BUSMAX + 1) 67 #define MAXSLOTS (PCI_SLOTMAX + 1) 68 #define MAXFUNCS (PCI_FUNCMAX + 1) 69 70 struct funcinfo { 71 char *fi_name; 72 char *fi_param; 73 struct pci_devinst *fi_devi; 74 }; 75 76 struct intxinfo { 77 int ii_count; 78 int ii_pirq_pin; 79 int ii_ioapic_irq; 80 }; 81 82 struct slotinfo { 83 struct intxinfo si_intpins[4]; 84 struct funcinfo si_funcs[MAXFUNCS]; 85 }; 86 87 struct businfo { 88 uint16_t iobase, iolimit; /* I/O window */ 89 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 90 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 91 struct slotinfo slotinfo[MAXSLOTS]; 92 }; 93 94 static struct businfo *pci_businfo[MAXBUSES]; 95 96 SET_DECLARE(pci_devemu_set, struct pci_devemu); 97 98 static uint64_t pci_emul_iobase; 99 static uint64_t pci_emul_membase32; 100 static uint64_t pci_emul_membase64; 101 102 #define PCI_EMUL_IOBASE 0x2000 103 #define PCI_EMUL_IOLIMIT 0x10000 104 105 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 106 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 107 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 108 109 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 110 111 #define PCI_EMUL_MEMBASE64 0xD000000000UL 112 #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL 113 114 static struct pci_devemu *pci_emul_finddev(char *name); 115 static void pci_lintr_route(struct pci_devinst *pi); 116 static void pci_lintr_update(struct pci_devinst *pi); 117 static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, 118 int func, int coff, int bytes, uint32_t *val); 119 120 static __inline void 121 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 122 { 123 124 if (bytes == 1) 125 pci_set_cfgdata8(pi, coff, val); 126 else if (bytes == 2) 127 pci_set_cfgdata16(pi, coff, val); 128 else 129 pci_set_cfgdata32(pi, coff, val); 130 } 131 132 static __inline uint32_t 133 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 134 { 135 136 if (bytes == 1) 137 return (pci_get_cfgdata8(pi, coff)); 138 else if (bytes == 2) 139 return (pci_get_cfgdata16(pi, coff)); 140 else 141 return (pci_get_cfgdata32(pi, coff)); 142 } 143 144 /* 145 * I/O access 146 */ 147 148 /* 149 * Slot options are in the form: 150 * 151 * <bus>:<slot>:<func>,<emul>[,<config>] 152 * <slot>[:<func>],<emul>[,<config>] 153 * 154 * slot is 0..31 155 * func is 0..7 156 * emul is a string describing the type of PCI device e.g. virtio-net 157 * config is an optional string, depending on the device, that can be 158 * used for configuration. 159 * Examples are: 160 * 1,virtio-net,tap0 161 * 3:0,dummy 162 */ 163 static void 164 pci_parse_slot_usage(char *aopt) 165 { 166 167 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 168 } 169 170 int 171 pci_parse_slot(char *opt) 172 { 173 struct businfo *bi; 174 struct slotinfo *si; 175 char *emul, *config, *str, *cp; 176 int error, bnum, snum, fnum; 177 178 error = -1; 179 str = strdup(opt); 180 181 emul = config = NULL; 182 if ((cp = strchr(str, ',')) != NULL) { 183 *cp = '\0'; 184 emul = cp + 1; 185 if ((cp = strchr(emul, ',')) != NULL) { 186 *cp = '\0'; 187 config = cp + 1; 188 } 189 } else { 190 pci_parse_slot_usage(opt); 191 goto done; 192 } 193 194 /* <bus>:<slot>:<func> */ 195 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 196 bnum = 0; 197 /* <slot>:<func> */ 198 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 199 fnum = 0; 200 /* <slot> */ 201 if (sscanf(str, "%d", &snum) != 1) { 202 snum = -1; 203 } 204 } 205 } 206 207 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 208 fnum < 0 || fnum >= MAXFUNCS) { 209 pci_parse_slot_usage(opt); 210 goto done; 211 } 212 213 if (pci_businfo[bnum] == NULL) 214 pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); 215 216 bi = pci_businfo[bnum]; 217 si = &bi->slotinfo[snum]; 218 219 if (si->si_funcs[fnum].fi_name != NULL) { 220 EPRINTLN("pci slot %d:%d already occupied!", 221 snum, fnum); 222 goto done; 223 } 224 225 if (pci_emul_finddev(emul) == NULL) { 226 EPRINTLN("pci slot %d:%d: unknown device \"%s\"", 227 snum, fnum, emul); 228 goto done; 229 } 230 231 error = 0; 232 si->si_funcs[fnum].fi_name = emul; 233 si->si_funcs[fnum].fi_param = config; 234 235 done: 236 if (error) 237 free(str); 238 239 return (error); 240 } 241 242 void 243 pci_print_supported_devices() 244 { 245 struct pci_devemu **pdpp, *pdp; 246 247 SET_FOREACH(pdpp, pci_devemu_set) { 248 pdp = *pdpp; 249 printf("%s\n", pdp->pe_emu); 250 } 251 } 252 253 static int 254 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 255 { 256 257 if (offset < pi->pi_msix.pba_offset) 258 return (0); 259 260 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 261 return (0); 262 } 263 264 return (1); 265 } 266 267 int 268 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 269 uint64_t value) 270 { 271 int msix_entry_offset; 272 int tab_index; 273 char *dest; 274 275 /* support only 4 or 8 byte writes */ 276 if (size != 4 && size != 8) 277 return (-1); 278 279 /* 280 * Return if table index is beyond what device supports 281 */ 282 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 283 if (tab_index >= pi->pi_msix.table_count) 284 return (-1); 285 286 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 287 288 /* support only aligned writes */ 289 if ((msix_entry_offset % size) != 0) 290 return (-1); 291 292 dest = (char *)(pi->pi_msix.table + tab_index); 293 dest += msix_entry_offset; 294 295 if (size == 4) 296 *((uint32_t *)dest) = value; 297 else 298 *((uint64_t *)dest) = value; 299 300 return (0); 301 } 302 303 uint64_t 304 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 305 { 306 char *dest; 307 int msix_entry_offset; 308 int tab_index; 309 uint64_t retval = ~0; 310 311 /* 312 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 313 * table but we also allow 1 byte access to accommodate reads from 314 * ddb. 315 */ 316 if (size != 1 && size != 4 && size != 8) 317 return (retval); 318 319 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 320 321 /* support only aligned reads */ 322 if ((msix_entry_offset % size) != 0) { 323 return (retval); 324 } 325 326 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 327 328 if (tab_index < pi->pi_msix.table_count) { 329 /* valid MSI-X Table access */ 330 dest = (char *)(pi->pi_msix.table + tab_index); 331 dest += msix_entry_offset; 332 333 if (size == 1) 334 retval = *((uint8_t *)dest); 335 else if (size == 4) 336 retval = *((uint32_t *)dest); 337 else 338 retval = *((uint64_t *)dest); 339 } else if (pci_valid_pba_offset(pi, offset)) { 340 /* return 0 for PBA access */ 341 retval = 0; 342 } 343 344 return (retval); 345 } 346 347 int 348 pci_msix_table_bar(struct pci_devinst *pi) 349 { 350 351 if (pi->pi_msix.table != NULL) 352 return (pi->pi_msix.table_bar); 353 else 354 return (-1); 355 } 356 357 int 358 pci_msix_pba_bar(struct pci_devinst *pi) 359 { 360 361 if (pi->pi_msix.table != NULL) 362 return (pi->pi_msix.pba_bar); 363 else 364 return (-1); 365 } 366 367 static int 368 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 369 uint32_t *eax, void *arg) 370 { 371 struct pci_devinst *pdi = arg; 372 struct pci_devemu *pe = pdi->pi_d; 373 uint64_t offset; 374 int i; 375 376 for (i = 0; i <= PCI_BARMAX; i++) { 377 if (pdi->pi_bar[i].type == PCIBAR_IO && 378 port >= pdi->pi_bar[i].addr && 379 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 380 offset = port - pdi->pi_bar[i].addr; 381 if (in) 382 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 383 offset, bytes); 384 else 385 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 386 bytes, *eax); 387 return (0); 388 } 389 } 390 return (-1); 391 } 392 393 static int 394 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 395 int size, uint64_t *val, void *arg1, long arg2) 396 { 397 struct pci_devinst *pdi = arg1; 398 struct pci_devemu *pe = pdi->pi_d; 399 uint64_t offset; 400 int bidx = (int) arg2; 401 402 assert(bidx <= PCI_BARMAX); 403 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 404 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 405 assert(addr >= pdi->pi_bar[bidx].addr && 406 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 407 408 offset = addr - pdi->pi_bar[bidx].addr; 409 410 if (dir == MEM_F_WRITE) { 411 if (size == 8) { 412 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 413 4, *val & 0xffffffff); 414 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 415 4, *val >> 32); 416 } else { 417 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 418 size, *val); 419 } 420 } else { 421 if (size == 8) { 422 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 423 offset, 4); 424 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 425 offset + 4, 4) << 32; 426 } else { 427 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 428 offset, size); 429 } 430 } 431 432 return (0); 433 } 434 435 436 static int 437 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 438 uint64_t *addr) 439 { 440 uint64_t base; 441 442 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 443 444 base = roundup2(*baseptr, size); 445 446 if (base + size <= limit) { 447 *addr = base; 448 *baseptr = base + size; 449 return (0); 450 } else 451 return (-1); 452 } 453 454 int 455 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 456 uint64_t size) 457 { 458 459 return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); 460 } 461 462 /* 463 * Register (or unregister) the MMIO or I/O region associated with the BAR 464 * register 'idx' of an emulated pci device. 465 */ 466 static void 467 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 468 { 469 int error; 470 struct inout_port iop; 471 struct mem_range mr; 472 473 switch (pi->pi_bar[idx].type) { 474 case PCIBAR_IO: 475 bzero(&iop, sizeof(struct inout_port)); 476 iop.name = pi->pi_name; 477 iop.port = pi->pi_bar[idx].addr; 478 iop.size = pi->pi_bar[idx].size; 479 if (registration) { 480 iop.flags = IOPORT_F_INOUT; 481 iop.handler = pci_emul_io_handler; 482 iop.arg = pi; 483 error = register_inout(&iop); 484 } else 485 error = unregister_inout(&iop); 486 break; 487 case PCIBAR_MEM32: 488 case PCIBAR_MEM64: 489 bzero(&mr, sizeof(struct mem_range)); 490 mr.name = pi->pi_name; 491 mr.base = pi->pi_bar[idx].addr; 492 mr.size = pi->pi_bar[idx].size; 493 if (registration) { 494 mr.flags = MEM_F_RW; 495 mr.handler = pci_emul_mem_handler; 496 mr.arg1 = pi; 497 mr.arg2 = idx; 498 error = register_mem(&mr); 499 } else 500 error = unregister_mem(&mr); 501 break; 502 default: 503 error = EINVAL; 504 break; 505 } 506 assert(error == 0); 507 } 508 509 static void 510 unregister_bar(struct pci_devinst *pi, int idx) 511 { 512 513 modify_bar_registration(pi, idx, 0); 514 } 515 516 static void 517 register_bar(struct pci_devinst *pi, int idx) 518 { 519 520 modify_bar_registration(pi, idx, 1); 521 } 522 523 /* Are we decoding i/o port accesses for the emulated pci device? */ 524 static int 525 porten(struct pci_devinst *pi) 526 { 527 uint16_t cmd; 528 529 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 530 531 return (cmd & PCIM_CMD_PORTEN); 532 } 533 534 /* Are we decoding memory accesses for the emulated pci device? */ 535 static int 536 memen(struct pci_devinst *pi) 537 { 538 uint16_t cmd; 539 540 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 541 542 return (cmd & PCIM_CMD_MEMEN); 543 } 544 545 /* 546 * Update the MMIO or I/O address that is decoded by the BAR register. 547 * 548 * If the pci device has enabled the address space decoding then intercept 549 * the address range decoded by the BAR register. 550 */ 551 static void 552 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 553 { 554 int decode; 555 556 if (pi->pi_bar[idx].type == PCIBAR_IO) 557 decode = porten(pi); 558 else 559 decode = memen(pi); 560 561 if (decode) 562 unregister_bar(pi, idx); 563 564 switch (type) { 565 case PCIBAR_IO: 566 case PCIBAR_MEM32: 567 pi->pi_bar[idx].addr = addr; 568 break; 569 case PCIBAR_MEM64: 570 pi->pi_bar[idx].addr &= ~0xffffffffUL; 571 pi->pi_bar[idx].addr |= addr; 572 break; 573 case PCIBAR_MEMHI64: 574 pi->pi_bar[idx].addr &= 0xffffffff; 575 pi->pi_bar[idx].addr |= addr; 576 break; 577 default: 578 assert(0); 579 } 580 581 if (decode) 582 register_bar(pi, idx); 583 } 584 585 int 586 pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, 587 enum pcibar_type type, uint64_t size) 588 { 589 int error; 590 uint64_t *baseptr, limit, addr, mask, lobits, bar; 591 uint16_t cmd, enbit; 592 593 assert(idx >= 0 && idx <= PCI_BARMAX); 594 595 if ((size & (size - 1)) != 0) 596 size = 1UL << flsl(size); /* round up to a power of 2 */ 597 598 /* Enforce minimum BAR sizes required by the PCI standard */ 599 if (type == PCIBAR_IO) { 600 if (size < 4) 601 size = 4; 602 } else { 603 if (size < 16) 604 size = 16; 605 } 606 607 switch (type) { 608 case PCIBAR_NONE: 609 baseptr = NULL; 610 addr = mask = lobits = enbit = 0; 611 break; 612 case PCIBAR_IO: 613 baseptr = &pci_emul_iobase; 614 limit = PCI_EMUL_IOLIMIT; 615 mask = PCIM_BAR_IO_BASE; 616 lobits = PCIM_BAR_IO_SPACE; 617 enbit = PCIM_CMD_PORTEN; 618 break; 619 case PCIBAR_MEM64: 620 /* 621 * XXX 622 * Some drivers do not work well if the 64-bit BAR is allocated 623 * above 4GB. Allow for this by allocating small requests under 624 * 4GB unless then allocation size is larger than some arbitrary 625 * number (32MB currently). 626 */ 627 if (size > 32 * 1024 * 1024) { 628 /* 629 * XXX special case for device requiring peer-peer DMA 630 */ 631 if (size == 0x100000000UL) 632 baseptr = &hostbase; 633 else 634 baseptr = &pci_emul_membase64; 635 limit = PCI_EMUL_MEMLIMIT64; 636 mask = PCIM_BAR_MEM_BASE; 637 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 638 PCIM_BAR_MEM_PREFETCH; 639 } else { 640 baseptr = &pci_emul_membase32; 641 limit = PCI_EMUL_MEMLIMIT32; 642 mask = PCIM_BAR_MEM_BASE; 643 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 644 } 645 enbit = PCIM_CMD_MEMEN; 646 break; 647 case PCIBAR_MEM32: 648 baseptr = &pci_emul_membase32; 649 limit = PCI_EMUL_MEMLIMIT32; 650 mask = PCIM_BAR_MEM_BASE; 651 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 652 enbit = PCIM_CMD_MEMEN; 653 break; 654 default: 655 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 656 assert(0); 657 } 658 659 if (baseptr != NULL) { 660 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 661 if (error != 0) 662 return (error); 663 } 664 665 pdi->pi_bar[idx].type = type; 666 pdi->pi_bar[idx].addr = addr; 667 pdi->pi_bar[idx].size = size; 668 669 /* Initialize the BAR register in config space */ 670 bar = (addr & mask) | lobits; 671 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 672 673 if (type == PCIBAR_MEM64) { 674 assert(idx + 1 <= PCI_BARMAX); 675 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 676 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 677 } 678 679 cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 680 if ((cmd & enbit) != enbit) 681 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 682 register_bar(pdi, idx); 683 684 return (0); 685 } 686 687 #define CAP_START_OFFSET 0x40 688 static int 689 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 690 { 691 int i, capoff, reallen; 692 uint16_t sts; 693 694 assert(caplen > 0); 695 696 reallen = roundup2(caplen, 4); /* dword aligned */ 697 698 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 699 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 700 capoff = CAP_START_OFFSET; 701 else 702 capoff = pi->pi_capend + 1; 703 704 /* Check if we have enough space */ 705 if (capoff + reallen > PCI_REGMAX + 1) 706 return (-1); 707 708 /* Set the previous capability pointer */ 709 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 710 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 711 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 712 } else 713 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 714 715 /* Copy the capability */ 716 for (i = 0; i < caplen; i++) 717 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 718 719 /* Set the next capability pointer */ 720 pci_set_cfgdata8(pi, capoff + 1, 0); 721 722 pi->pi_prevcap = capoff; 723 pi->pi_capend = capoff + reallen - 1; 724 return (0); 725 } 726 727 static struct pci_devemu * 728 pci_emul_finddev(char *name) 729 { 730 struct pci_devemu **pdpp, *pdp; 731 732 SET_FOREACH(pdpp, pci_devemu_set) { 733 pdp = *pdpp; 734 if (!strcmp(pdp->pe_emu, name)) { 735 return (pdp); 736 } 737 } 738 739 return (NULL); 740 } 741 742 static int 743 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 744 int func, struct funcinfo *fi) 745 { 746 struct pci_devinst *pdi; 747 int err; 748 749 pdi = calloc(1, sizeof(struct pci_devinst)); 750 751 pdi->pi_vmctx = ctx; 752 pdi->pi_bus = bus; 753 pdi->pi_slot = slot; 754 pdi->pi_func = func; 755 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 756 pdi->pi_lintr.pin = 0; 757 pdi->pi_lintr.state = IDLE; 758 pdi->pi_lintr.pirq_pin = 0; 759 pdi->pi_lintr.ioapic_irq = 0; 760 pdi->pi_d = pde; 761 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 762 763 /* Disable legacy interrupts */ 764 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 765 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 766 767 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 768 769 err = (*pde->pe_init)(ctx, pdi, fi->fi_param); 770 if (err == 0) 771 fi->fi_devi = pdi; 772 else 773 free(pdi); 774 775 return (err); 776 } 777 778 void 779 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 780 { 781 int mmc; 782 783 /* Number of msi messages must be a power of 2 between 1 and 32 */ 784 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 785 mmc = ffs(msgnum) - 1; 786 787 bzero(msicap, sizeof(struct msicap)); 788 msicap->capid = PCIY_MSI; 789 msicap->nextptr = nextptr; 790 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 791 } 792 793 int 794 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 795 { 796 struct msicap msicap; 797 798 pci_populate_msicap(&msicap, msgnum, 0); 799 800 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 801 } 802 803 static void 804 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 805 uint32_t msix_tab_size) 806 { 807 808 assert(msix_tab_size % 4096 == 0); 809 810 bzero(msixcap, sizeof(struct msixcap)); 811 msixcap->capid = PCIY_MSIX; 812 813 /* 814 * Message Control Register, all fields set to 815 * zero except for the Table Size. 816 * Note: Table size N is encoded as N-1 817 */ 818 msixcap->msgctrl = msgnum - 1; 819 820 /* 821 * MSI-X BAR setup: 822 * - MSI-X table start at offset 0 823 * - PBA table starts at a 4K aligned offset after the MSI-X table 824 */ 825 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 826 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 827 } 828 829 static void 830 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 831 { 832 int i, table_size; 833 834 assert(table_entries > 0); 835 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 836 837 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 838 pi->pi_msix.table = calloc(1, table_size); 839 840 /* set mask bit of vector control register */ 841 for (i = 0; i < table_entries; i++) 842 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 843 } 844 845 int 846 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 847 { 848 uint32_t tab_size; 849 struct msixcap msixcap; 850 851 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 852 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 853 854 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 855 856 /* Align table size to nearest 4K */ 857 tab_size = roundup2(tab_size, 4096); 858 859 pi->pi_msix.table_bar = barnum; 860 pi->pi_msix.pba_bar = barnum; 861 pi->pi_msix.table_offset = 0; 862 pi->pi_msix.table_count = msgnum; 863 pi->pi_msix.pba_offset = tab_size; 864 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 865 866 pci_msix_table_init(pi, msgnum); 867 868 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 869 870 /* allocate memory for MSI-X Table and PBA */ 871 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 872 tab_size + pi->pi_msix.pba_size); 873 874 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 875 sizeof(msixcap))); 876 } 877 878 static void 879 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 880 int bytes, uint32_t val) 881 { 882 uint16_t msgctrl, rwmask; 883 int off; 884 885 off = offset - capoff; 886 /* Message Control Register */ 887 if (off == 2 && bytes == 2) { 888 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 889 msgctrl = pci_get_cfgdata16(pi, offset); 890 msgctrl &= ~rwmask; 891 msgctrl |= val & rwmask; 892 val = msgctrl; 893 894 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 895 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 896 pci_lintr_update(pi); 897 } 898 899 CFGWRITE(pi, offset, val, bytes); 900 } 901 902 static void 903 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 904 int bytes, uint32_t val) 905 { 906 uint16_t msgctrl, rwmask, msgdata, mme; 907 uint32_t addrlo; 908 909 /* 910 * If guest is writing to the message control register make sure 911 * we do not overwrite read-only fields. 912 */ 913 if ((offset - capoff) == 2 && bytes == 2) { 914 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 915 msgctrl = pci_get_cfgdata16(pi, offset); 916 msgctrl &= ~rwmask; 917 msgctrl |= val & rwmask; 918 val = msgctrl; 919 } 920 CFGWRITE(pi, offset, val, bytes); 921 922 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 923 addrlo = pci_get_cfgdata32(pi, capoff + 4); 924 if (msgctrl & PCIM_MSICTRL_64BIT) 925 msgdata = pci_get_cfgdata16(pi, capoff + 12); 926 else 927 msgdata = pci_get_cfgdata16(pi, capoff + 8); 928 929 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 930 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 931 if (pi->pi_msi.enabled) { 932 pi->pi_msi.addr = addrlo; 933 pi->pi_msi.msg_data = msgdata; 934 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 935 } else { 936 pi->pi_msi.maxmsgnum = 0; 937 } 938 pci_lintr_update(pi); 939 } 940 941 void 942 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 943 int bytes, uint32_t val) 944 { 945 946 /* XXX don't write to the readonly parts */ 947 CFGWRITE(pi, offset, val, bytes); 948 } 949 950 #define PCIECAP_VERSION 0x2 951 int 952 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 953 { 954 int err; 955 struct pciecap pciecap; 956 957 bzero(&pciecap, sizeof(pciecap)); 958 959 /* 960 * Use the integrated endpoint type for endpoints on a root complex bus. 961 * 962 * NB: bhyve currently only supports a single PCI bus that is the root 963 * complex bus, so all endpoints are integrated. 964 */ 965 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 966 type = PCIEM_TYPE_ROOT_INT_EP; 967 968 pciecap.capid = PCIY_EXPRESS; 969 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 970 if (type != PCIEM_TYPE_ROOT_INT_EP) { 971 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 972 pciecap.link_status = 0x11; /* gen1, x1 */ 973 } 974 975 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 976 return (err); 977 } 978 979 /* 980 * This function assumes that 'coff' is in the capabilities region of the 981 * config space. A capoff parameter of zero will force a search for the 982 * offset and type. 983 */ 984 void 985 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 986 uint8_t capoff, int capid) 987 { 988 uint8_t nextoff; 989 990 /* Do not allow un-aligned writes */ 991 if ((offset & (bytes - 1)) != 0) 992 return; 993 994 if (capoff == 0) { 995 /* Find the capability that we want to update */ 996 capoff = CAP_START_OFFSET; 997 while (1) { 998 nextoff = pci_get_cfgdata8(pi, capoff + 1); 999 if (nextoff == 0) 1000 break; 1001 if (offset >= capoff && offset < nextoff) 1002 break; 1003 1004 capoff = nextoff; 1005 } 1006 assert(offset >= capoff); 1007 capid = pci_get_cfgdata8(pi, capoff); 1008 } 1009 1010 /* 1011 * Capability ID and Next Capability Pointer are readonly. 1012 * However, some o/s's do 4-byte writes that include these. 1013 * For this case, trim the write back to 2 bytes and adjust 1014 * the data. 1015 */ 1016 if (offset == capoff || offset == capoff + 1) { 1017 if (offset == capoff && bytes == 4) { 1018 bytes = 2; 1019 offset += 2; 1020 val >>= 16; 1021 } else 1022 return; 1023 } 1024 1025 switch (capid) { 1026 case PCIY_MSI: 1027 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1028 break; 1029 case PCIY_MSIX: 1030 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1031 break; 1032 case PCIY_EXPRESS: 1033 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1034 break; 1035 default: 1036 break; 1037 } 1038 } 1039 1040 static int 1041 pci_emul_iscap(struct pci_devinst *pi, int offset) 1042 { 1043 uint16_t sts; 1044 1045 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1046 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1047 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1048 return (1); 1049 } 1050 return (0); 1051 } 1052 1053 static int 1054 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1055 int size, uint64_t *val, void *arg1, long arg2) 1056 { 1057 /* 1058 * Ignore writes; return 0xff's for reads. The mem read code 1059 * will take care of truncating to the correct size. 1060 */ 1061 if (dir == MEM_F_READ) { 1062 *val = 0xffffffffffffffff; 1063 } 1064 1065 return (0); 1066 } 1067 1068 static int 1069 pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1070 int bytes, uint64_t *val, void *arg1, long arg2) 1071 { 1072 int bus, slot, func, coff, in; 1073 1074 coff = addr & 0xfff; 1075 func = (addr >> 12) & 0x7; 1076 slot = (addr >> 15) & 0x1f; 1077 bus = (addr >> 20) & 0xff; 1078 in = (dir == MEM_F_READ); 1079 if (in) 1080 *val = ~0UL; 1081 pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); 1082 return (0); 1083 } 1084 1085 uint64_t 1086 pci_ecfg_base(void) 1087 { 1088 1089 return (PCI_EMUL_ECFG_BASE); 1090 } 1091 1092 #define BUSIO_ROUNDUP 32 1093 #define BUSMEM_ROUNDUP (1024 * 1024) 1094 1095 int 1096 init_pci(struct vmctx *ctx) 1097 { 1098 struct mem_range mr; 1099 struct pci_devemu *pde; 1100 struct businfo *bi; 1101 struct slotinfo *si; 1102 struct funcinfo *fi; 1103 size_t lowmem; 1104 int bus, slot, func; 1105 int error; 1106 1107 pci_emul_iobase = PCI_EMUL_IOBASE; 1108 pci_emul_membase32 = vm_get_lowmem_limit(ctx); 1109 pci_emul_membase64 = PCI_EMUL_MEMBASE64; 1110 1111 for (bus = 0; bus < MAXBUSES; bus++) { 1112 if ((bi = pci_businfo[bus]) == NULL) 1113 continue; 1114 /* 1115 * Keep track of the i/o and memory resources allocated to 1116 * this bus. 1117 */ 1118 bi->iobase = pci_emul_iobase; 1119 bi->membase32 = pci_emul_membase32; 1120 bi->membase64 = pci_emul_membase64; 1121 1122 for (slot = 0; slot < MAXSLOTS; slot++) { 1123 si = &bi->slotinfo[slot]; 1124 for (func = 0; func < MAXFUNCS; func++) { 1125 fi = &si->si_funcs[func]; 1126 if (fi->fi_name == NULL) 1127 continue; 1128 pde = pci_emul_finddev(fi->fi_name); 1129 assert(pde != NULL); 1130 error = pci_emul_init(ctx, pde, bus, slot, 1131 func, fi); 1132 if (error) 1133 return (error); 1134 } 1135 } 1136 1137 /* 1138 * Add some slop to the I/O and memory resources decoded by 1139 * this bus to give a guest some flexibility if it wants to 1140 * reprogram the BARs. 1141 */ 1142 pci_emul_iobase += BUSIO_ROUNDUP; 1143 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1144 bi->iolimit = pci_emul_iobase; 1145 1146 pci_emul_membase32 += BUSMEM_ROUNDUP; 1147 pci_emul_membase32 = roundup2(pci_emul_membase32, 1148 BUSMEM_ROUNDUP); 1149 bi->memlimit32 = pci_emul_membase32; 1150 1151 pci_emul_membase64 += BUSMEM_ROUNDUP; 1152 pci_emul_membase64 = roundup2(pci_emul_membase64, 1153 BUSMEM_ROUNDUP); 1154 bi->memlimit64 = pci_emul_membase64; 1155 } 1156 1157 /* 1158 * PCI backends are initialized before routing INTx interrupts 1159 * so that LPC devices are able to reserve ISA IRQs before 1160 * routing PIRQ pins. 1161 */ 1162 for (bus = 0; bus < MAXBUSES; bus++) { 1163 if ((bi = pci_businfo[bus]) == NULL) 1164 continue; 1165 1166 for (slot = 0; slot < MAXSLOTS; slot++) { 1167 si = &bi->slotinfo[slot]; 1168 for (func = 0; func < MAXFUNCS; func++) { 1169 fi = &si->si_funcs[func]; 1170 if (fi->fi_devi == NULL) 1171 continue; 1172 pci_lintr_route(fi->fi_devi); 1173 } 1174 } 1175 } 1176 lpc_pirq_routed(); 1177 1178 /* 1179 * The guest physical memory map looks like the following: 1180 * [0, lowmem) guest system memory 1181 * [lowmem, lowmem_limit) memory hole (may be absent) 1182 * [lowmem_limit, 0xE0000000) PCI hole (32-bit BAR allocation) 1183 * [0xE0000000, 0xF0000000) PCI extended config window 1184 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1185 * [4GB, 4GB + highmem) 1186 */ 1187 1188 /* 1189 * Accesses to memory addresses that are not allocated to system 1190 * memory or PCI devices return 0xff's. 1191 */ 1192 lowmem = vm_get_lowmem_size(ctx); 1193 bzero(&mr, sizeof(struct mem_range)); 1194 mr.name = "PCI hole"; 1195 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1196 mr.base = lowmem; 1197 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1198 mr.handler = pci_emul_fallback_handler; 1199 error = register_mem_fallback(&mr); 1200 assert(error == 0); 1201 1202 /* PCI extended config space */ 1203 bzero(&mr, sizeof(struct mem_range)); 1204 mr.name = "PCI ECFG"; 1205 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1206 mr.base = PCI_EMUL_ECFG_BASE; 1207 mr.size = PCI_EMUL_ECFG_SIZE; 1208 mr.handler = pci_emul_ecfg_handler; 1209 error = register_mem(&mr); 1210 assert(error == 0); 1211 1212 return (0); 1213 } 1214 1215 static void 1216 pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1217 void *arg) 1218 { 1219 1220 dsdt_line(" Package ()"); 1221 dsdt_line(" {"); 1222 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1223 dsdt_line(" 0x%02X,", pin - 1); 1224 dsdt_line(" Zero,"); 1225 dsdt_line(" 0x%X", ioapic_irq); 1226 dsdt_line(" },"); 1227 } 1228 1229 static void 1230 pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1231 void *arg) 1232 { 1233 char *name; 1234 1235 name = lpc_pirq_name(pirq_pin); 1236 if (name == NULL) 1237 return; 1238 dsdt_line(" Package ()"); 1239 dsdt_line(" {"); 1240 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1241 dsdt_line(" 0x%02X,", pin - 1); 1242 dsdt_line(" %s,", name); 1243 dsdt_line(" 0x00"); 1244 dsdt_line(" },"); 1245 free(name); 1246 } 1247 1248 /* 1249 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1250 * corresponding to each PCI bus. 1251 */ 1252 static void 1253 pci_bus_write_dsdt(int bus) 1254 { 1255 struct businfo *bi; 1256 struct slotinfo *si; 1257 struct pci_devinst *pi; 1258 int count, func, slot; 1259 1260 /* 1261 * If there are no devices on this 'bus' then just return. 1262 */ 1263 if ((bi = pci_businfo[bus]) == NULL) { 1264 /* 1265 * Bus 0 is special because it decodes the I/O ports used 1266 * for PCI config space access even if there are no devices 1267 * on it. 1268 */ 1269 if (bus != 0) 1270 return; 1271 } 1272 1273 dsdt_line(" Device (PC%02X)", bus); 1274 dsdt_line(" {"); 1275 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1276 1277 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1278 dsdt_line(" {"); 1279 dsdt_line(" Return (0x%08X)", bus); 1280 dsdt_line(" }"); 1281 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1282 dsdt_line(" {"); 1283 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1284 "MaxFixed, PosDecode,"); 1285 dsdt_line(" 0x0000, // Granularity"); 1286 dsdt_line(" 0x%04X, // Range Minimum", bus); 1287 dsdt_line(" 0x%04X, // Range Maximum", bus); 1288 dsdt_line(" 0x0000, // Translation Offset"); 1289 dsdt_line(" 0x0001, // Length"); 1290 dsdt_line(" ,, )"); 1291 1292 if (bus == 0) { 1293 dsdt_indent(3); 1294 dsdt_fixed_ioport(0xCF8, 8); 1295 dsdt_unindent(3); 1296 1297 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1298 "PosDecode, EntireRange,"); 1299 dsdt_line(" 0x0000, // Granularity"); 1300 dsdt_line(" 0x0000, // Range Minimum"); 1301 dsdt_line(" 0x0CF7, // Range Maximum"); 1302 dsdt_line(" 0x0000, // Translation Offset"); 1303 dsdt_line(" 0x0CF8, // Length"); 1304 dsdt_line(" ,, , TypeStatic)"); 1305 1306 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1307 "PosDecode, EntireRange,"); 1308 dsdt_line(" 0x0000, // Granularity"); 1309 dsdt_line(" 0x0D00, // Range Minimum"); 1310 dsdt_line(" 0x%04X, // Range Maximum", 1311 PCI_EMUL_IOBASE - 1); 1312 dsdt_line(" 0x0000, // Translation Offset"); 1313 dsdt_line(" 0x%04X, // Length", 1314 PCI_EMUL_IOBASE - 0x0D00); 1315 dsdt_line(" ,, , TypeStatic)"); 1316 1317 if (bi == NULL) { 1318 dsdt_line(" })"); 1319 goto done; 1320 } 1321 } 1322 assert(bi != NULL); 1323 1324 /* i/o window */ 1325 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1326 "PosDecode, EntireRange,"); 1327 dsdt_line(" 0x0000, // Granularity"); 1328 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1329 dsdt_line(" 0x%04X, // Range Maximum", 1330 bi->iolimit - 1); 1331 dsdt_line(" 0x0000, // Translation Offset"); 1332 dsdt_line(" 0x%04X, // Length", 1333 bi->iolimit - bi->iobase); 1334 dsdt_line(" ,, , TypeStatic)"); 1335 1336 /* mmio window (32-bit) */ 1337 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1338 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1339 dsdt_line(" 0x00000000, // Granularity"); 1340 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1341 dsdt_line(" 0x%08X, // Range Maximum\n", 1342 bi->memlimit32 - 1); 1343 dsdt_line(" 0x00000000, // Translation Offset"); 1344 dsdt_line(" 0x%08X, // Length\n", 1345 bi->memlimit32 - bi->membase32); 1346 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1347 1348 /* mmio window (64-bit) */ 1349 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1350 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1351 dsdt_line(" 0x0000000000000000, // Granularity"); 1352 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1353 dsdt_line(" 0x%016lX, // Range Maximum\n", 1354 bi->memlimit64 - 1); 1355 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1356 dsdt_line(" 0x%016lX, // Length\n", 1357 bi->memlimit64 - bi->membase64); 1358 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1359 dsdt_line(" })"); 1360 1361 count = pci_count_lintr(bus); 1362 if (count != 0) { 1363 dsdt_indent(2); 1364 dsdt_line("Name (PPRT, Package ()"); 1365 dsdt_line("{"); 1366 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1367 dsdt_line("})"); 1368 dsdt_line("Name (APRT, Package ()"); 1369 dsdt_line("{"); 1370 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1371 dsdt_line("})"); 1372 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1373 dsdt_line("{"); 1374 dsdt_line(" If (PICM)"); 1375 dsdt_line(" {"); 1376 dsdt_line(" Return (APRT)"); 1377 dsdt_line(" }"); 1378 dsdt_line(" Else"); 1379 dsdt_line(" {"); 1380 dsdt_line(" Return (PPRT)"); 1381 dsdt_line(" }"); 1382 dsdt_line("}"); 1383 dsdt_unindent(2); 1384 } 1385 1386 dsdt_indent(2); 1387 for (slot = 0; slot < MAXSLOTS; slot++) { 1388 si = &bi->slotinfo[slot]; 1389 for (func = 0; func < MAXFUNCS; func++) { 1390 pi = si->si_funcs[func].fi_devi; 1391 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1392 pi->pi_d->pe_write_dsdt(pi); 1393 } 1394 } 1395 dsdt_unindent(2); 1396 done: 1397 dsdt_line(" }"); 1398 } 1399 1400 void 1401 pci_write_dsdt(void) 1402 { 1403 int bus; 1404 1405 dsdt_indent(1); 1406 dsdt_line("Name (PICM, 0x00)"); 1407 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1408 dsdt_line("{"); 1409 dsdt_line(" Store (Arg0, PICM)"); 1410 dsdt_line("}"); 1411 dsdt_line(""); 1412 dsdt_line("Scope (_SB)"); 1413 dsdt_line("{"); 1414 for (bus = 0; bus < MAXBUSES; bus++) 1415 pci_bus_write_dsdt(bus); 1416 dsdt_line("}"); 1417 dsdt_unindent(1); 1418 } 1419 1420 int 1421 pci_bus_configured(int bus) 1422 { 1423 assert(bus >= 0 && bus < MAXBUSES); 1424 return (pci_businfo[bus] != NULL); 1425 } 1426 1427 int 1428 pci_msi_enabled(struct pci_devinst *pi) 1429 { 1430 return (pi->pi_msi.enabled); 1431 } 1432 1433 int 1434 pci_msi_maxmsgnum(struct pci_devinst *pi) 1435 { 1436 if (pi->pi_msi.enabled) 1437 return (pi->pi_msi.maxmsgnum); 1438 else 1439 return (0); 1440 } 1441 1442 int 1443 pci_msix_enabled(struct pci_devinst *pi) 1444 { 1445 1446 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1447 } 1448 1449 void 1450 pci_generate_msix(struct pci_devinst *pi, int index) 1451 { 1452 struct msix_table_entry *mte; 1453 1454 if (!pci_msix_enabled(pi)) 1455 return; 1456 1457 if (pi->pi_msix.function_mask) 1458 return; 1459 1460 if (index >= pi->pi_msix.table_count) 1461 return; 1462 1463 mte = &pi->pi_msix.table[index]; 1464 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1465 /* XXX Set PBA bit if interrupt is disabled */ 1466 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1467 } 1468 } 1469 1470 void 1471 pci_generate_msi(struct pci_devinst *pi, int index) 1472 { 1473 1474 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1475 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1476 pi->pi_msi.msg_data + index); 1477 } 1478 } 1479 1480 static bool 1481 pci_lintr_permitted(struct pci_devinst *pi) 1482 { 1483 uint16_t cmd; 1484 1485 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1486 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1487 (cmd & PCIM_CMD_INTxDIS))); 1488 } 1489 1490 void 1491 pci_lintr_request(struct pci_devinst *pi) 1492 { 1493 struct businfo *bi; 1494 struct slotinfo *si; 1495 int bestpin, bestcount, pin; 1496 1497 bi = pci_businfo[pi->pi_bus]; 1498 assert(bi != NULL); 1499 1500 /* 1501 * Just allocate a pin from our slot. The pin will be 1502 * assigned IRQs later when interrupts are routed. 1503 */ 1504 si = &bi->slotinfo[pi->pi_slot]; 1505 bestpin = 0; 1506 bestcount = si->si_intpins[0].ii_count; 1507 for (pin = 1; pin < 4; pin++) { 1508 if (si->si_intpins[pin].ii_count < bestcount) { 1509 bestpin = pin; 1510 bestcount = si->si_intpins[pin].ii_count; 1511 } 1512 } 1513 1514 si->si_intpins[bestpin].ii_count++; 1515 pi->pi_lintr.pin = bestpin + 1; 1516 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1517 } 1518 1519 static void 1520 pci_lintr_route(struct pci_devinst *pi) 1521 { 1522 struct businfo *bi; 1523 struct intxinfo *ii; 1524 1525 if (pi->pi_lintr.pin == 0) 1526 return; 1527 1528 bi = pci_businfo[pi->pi_bus]; 1529 assert(bi != NULL); 1530 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1531 1532 /* 1533 * Attempt to allocate an I/O APIC pin for this intpin if one 1534 * is not yet assigned. 1535 */ 1536 if (ii->ii_ioapic_irq == 0) 1537 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1538 assert(ii->ii_ioapic_irq > 0); 1539 1540 /* 1541 * Attempt to allocate a PIRQ pin for this intpin if one is 1542 * not yet assigned. 1543 */ 1544 if (ii->ii_pirq_pin == 0) 1545 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1546 assert(ii->ii_pirq_pin > 0); 1547 1548 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1549 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1550 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1551 } 1552 1553 void 1554 pci_lintr_assert(struct pci_devinst *pi) 1555 { 1556 1557 assert(pi->pi_lintr.pin > 0); 1558 1559 pthread_mutex_lock(&pi->pi_lintr.lock); 1560 if (pi->pi_lintr.state == IDLE) { 1561 if (pci_lintr_permitted(pi)) { 1562 pi->pi_lintr.state = ASSERTED; 1563 pci_irq_assert(pi); 1564 } else 1565 pi->pi_lintr.state = PENDING; 1566 } 1567 pthread_mutex_unlock(&pi->pi_lintr.lock); 1568 } 1569 1570 void 1571 pci_lintr_deassert(struct pci_devinst *pi) 1572 { 1573 1574 assert(pi->pi_lintr.pin > 0); 1575 1576 pthread_mutex_lock(&pi->pi_lintr.lock); 1577 if (pi->pi_lintr.state == ASSERTED) { 1578 pi->pi_lintr.state = IDLE; 1579 pci_irq_deassert(pi); 1580 } else if (pi->pi_lintr.state == PENDING) 1581 pi->pi_lintr.state = IDLE; 1582 pthread_mutex_unlock(&pi->pi_lintr.lock); 1583 } 1584 1585 static void 1586 pci_lintr_update(struct pci_devinst *pi) 1587 { 1588 1589 pthread_mutex_lock(&pi->pi_lintr.lock); 1590 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1591 pci_irq_deassert(pi); 1592 pi->pi_lintr.state = PENDING; 1593 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1594 pi->pi_lintr.state = ASSERTED; 1595 pci_irq_assert(pi); 1596 } 1597 pthread_mutex_unlock(&pi->pi_lintr.lock); 1598 } 1599 1600 int 1601 pci_count_lintr(int bus) 1602 { 1603 int count, slot, pin; 1604 struct slotinfo *slotinfo; 1605 1606 count = 0; 1607 if (pci_businfo[bus] != NULL) { 1608 for (slot = 0; slot < MAXSLOTS; slot++) { 1609 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1610 for (pin = 0; pin < 4; pin++) { 1611 if (slotinfo->si_intpins[pin].ii_count != 0) 1612 count++; 1613 } 1614 } 1615 } 1616 return (count); 1617 } 1618 1619 void 1620 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1621 { 1622 struct businfo *bi; 1623 struct slotinfo *si; 1624 struct intxinfo *ii; 1625 int slot, pin; 1626 1627 if ((bi = pci_businfo[bus]) == NULL) 1628 return; 1629 1630 for (slot = 0; slot < MAXSLOTS; slot++) { 1631 si = &bi->slotinfo[slot]; 1632 for (pin = 0; pin < 4; pin++) { 1633 ii = &si->si_intpins[pin]; 1634 if (ii->ii_count != 0) 1635 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1636 ii->ii_ioapic_irq, arg); 1637 } 1638 } 1639 } 1640 1641 /* 1642 * Return 1 if the emulated device in 'slot' is a multi-function device. 1643 * Return 0 otherwise. 1644 */ 1645 static int 1646 pci_emul_is_mfdev(int bus, int slot) 1647 { 1648 struct businfo *bi; 1649 struct slotinfo *si; 1650 int f, numfuncs; 1651 1652 numfuncs = 0; 1653 if ((bi = pci_businfo[bus]) != NULL) { 1654 si = &bi->slotinfo[slot]; 1655 for (f = 0; f < MAXFUNCS; f++) { 1656 if (si->si_funcs[f].fi_devi != NULL) { 1657 numfuncs++; 1658 } 1659 } 1660 } 1661 return (numfuncs > 1); 1662 } 1663 1664 /* 1665 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1666 * whether or not is a multi-function being emulated in the pci 'slot'. 1667 */ 1668 static void 1669 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1670 { 1671 int mfdev; 1672 1673 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1674 mfdev = pci_emul_is_mfdev(bus, slot); 1675 switch (bytes) { 1676 case 1: 1677 case 2: 1678 *rv &= ~PCIM_MFDEV; 1679 if (mfdev) { 1680 *rv |= PCIM_MFDEV; 1681 } 1682 break; 1683 case 4: 1684 *rv &= ~(PCIM_MFDEV << 16); 1685 if (mfdev) { 1686 *rv |= (PCIM_MFDEV << 16); 1687 } 1688 break; 1689 } 1690 } 1691 } 1692 1693 /* 1694 * Update device state in response to changes to the PCI command 1695 * register. 1696 */ 1697 void 1698 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 1699 { 1700 int i; 1701 uint16_t changed, new; 1702 1703 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 1704 changed = old ^ new; 1705 1706 /* 1707 * If the MMIO or I/O address space decoding has changed then 1708 * register/unregister all BARs that decode that address space. 1709 */ 1710 for (i = 0; i <= PCI_BARMAX; i++) { 1711 switch (pi->pi_bar[i].type) { 1712 case PCIBAR_NONE: 1713 case PCIBAR_MEMHI64: 1714 break; 1715 case PCIBAR_IO: 1716 /* I/O address space decoding changed? */ 1717 if (changed & PCIM_CMD_PORTEN) { 1718 if (new & PCIM_CMD_PORTEN) 1719 register_bar(pi, i); 1720 else 1721 unregister_bar(pi, i); 1722 } 1723 break; 1724 case PCIBAR_MEM32: 1725 case PCIBAR_MEM64: 1726 /* MMIO address space decoding changed? */ 1727 if (changed & PCIM_CMD_MEMEN) { 1728 if (new & PCIM_CMD_MEMEN) 1729 register_bar(pi, i); 1730 else 1731 unregister_bar(pi, i); 1732 } 1733 break; 1734 default: 1735 assert(0); 1736 } 1737 } 1738 1739 /* 1740 * If INTx has been unmasked and is pending, assert the 1741 * interrupt. 1742 */ 1743 pci_lintr_update(pi); 1744 } 1745 1746 static void 1747 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 1748 { 1749 int rshift; 1750 uint32_t cmd, old, readonly; 1751 1752 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1753 1754 /* 1755 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 1756 * 1757 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 1758 * 'write 1 to clear'. However these bits are not set to '1' by 1759 * any device emulation so it is simpler to treat them as readonly. 1760 */ 1761 rshift = (coff & 0x3) * 8; 1762 readonly = 0xFFFFF880 >> rshift; 1763 1764 old = CFGREAD(pi, coff, bytes); 1765 new &= ~readonly; 1766 new |= (old & readonly); 1767 CFGWRITE(pi, coff, new, bytes); /* update config */ 1768 1769 pci_emul_cmd_changed(pi, cmd); 1770 } 1771 1772 static void 1773 pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, 1774 int coff, int bytes, uint32_t *eax) 1775 { 1776 struct businfo *bi; 1777 struct slotinfo *si; 1778 struct pci_devinst *pi; 1779 struct pci_devemu *pe; 1780 int idx, needcfg; 1781 uint64_t addr, bar, mask; 1782 1783 if ((bi = pci_businfo[bus]) != NULL) { 1784 si = &bi->slotinfo[slot]; 1785 pi = si->si_funcs[func].fi_devi; 1786 } else 1787 pi = NULL; 1788 1789 /* 1790 * Just return if there is no device at this slot:func or if the 1791 * the guest is doing an un-aligned access. 1792 */ 1793 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 1794 (coff & (bytes - 1)) != 0) { 1795 if (in) 1796 *eax = 0xffffffff; 1797 return; 1798 } 1799 1800 /* 1801 * Ignore all writes beyond the standard config space and return all 1802 * ones on reads. 1803 */ 1804 if (coff >= PCI_REGMAX + 1) { 1805 if (in) { 1806 *eax = 0xffffffff; 1807 /* 1808 * Extended capabilities begin at offset 256 in config 1809 * space. Absence of extended capabilities is signaled 1810 * with all 0s in the extended capability header at 1811 * offset 256. 1812 */ 1813 if (coff <= PCI_REGMAX + 4) 1814 *eax = 0x00000000; 1815 } 1816 return; 1817 } 1818 1819 pe = pi->pi_d; 1820 1821 /* 1822 * Config read 1823 */ 1824 if (in) { 1825 /* Let the device emulation override the default handler */ 1826 if (pe->pe_cfgread != NULL) { 1827 needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, 1828 eax); 1829 } else { 1830 needcfg = 1; 1831 } 1832 1833 if (needcfg) 1834 *eax = CFGREAD(pi, coff, bytes); 1835 1836 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); 1837 } else { 1838 /* Let the device emulation override the default handler */ 1839 if (pe->pe_cfgwrite != NULL && 1840 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1841 return; 1842 1843 /* 1844 * Special handling for write to BAR registers 1845 */ 1846 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1847 /* 1848 * Ignore writes to BAR registers that are not 1849 * 4-byte aligned. 1850 */ 1851 if (bytes != 4 || (coff & 0x3) != 0) 1852 return; 1853 idx = (coff - PCIR_BAR(0)) / 4; 1854 mask = ~(pi->pi_bar[idx].size - 1); 1855 switch (pi->pi_bar[idx].type) { 1856 case PCIBAR_NONE: 1857 pi->pi_bar[idx].addr = bar = 0; 1858 break; 1859 case PCIBAR_IO: 1860 addr = *eax & mask; 1861 addr &= 0xffff; 1862 bar = addr | PCIM_BAR_IO_SPACE; 1863 /* 1864 * Register the new BAR value for interception 1865 */ 1866 if (addr != pi->pi_bar[idx].addr) { 1867 update_bar_address(pi, addr, idx, 1868 PCIBAR_IO); 1869 } 1870 break; 1871 case PCIBAR_MEM32: 1872 addr = bar = *eax & mask; 1873 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 1874 if (addr != pi->pi_bar[idx].addr) { 1875 update_bar_address(pi, addr, idx, 1876 PCIBAR_MEM32); 1877 } 1878 break; 1879 case PCIBAR_MEM64: 1880 addr = bar = *eax & mask; 1881 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 1882 PCIM_BAR_MEM_PREFETCH; 1883 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1884 update_bar_address(pi, addr, idx, 1885 PCIBAR_MEM64); 1886 } 1887 break; 1888 case PCIBAR_MEMHI64: 1889 mask = ~(pi->pi_bar[idx - 1].size - 1); 1890 addr = ((uint64_t)*eax << 32) & mask; 1891 bar = addr >> 32; 1892 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1893 update_bar_address(pi, addr, idx - 1, 1894 PCIBAR_MEMHI64); 1895 } 1896 break; 1897 default: 1898 assert(0); 1899 } 1900 pci_set_cfgdata32(pi, coff, bar); 1901 1902 } else if (pci_emul_iscap(pi, coff)) { 1903 pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); 1904 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 1905 pci_emul_cmdsts_write(pi, coff, *eax, bytes); 1906 } else { 1907 CFGWRITE(pi, coff, *eax, bytes); 1908 } 1909 } 1910 } 1911 1912 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 1913 1914 static int 1915 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1916 uint32_t *eax, void *arg) 1917 { 1918 uint32_t x; 1919 1920 if (bytes != 4) { 1921 if (in) 1922 *eax = (bytes == 2) ? 0xffff : 0xff; 1923 return (0); 1924 } 1925 1926 if (in) { 1927 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 1928 if (cfgenable) 1929 x |= CONF1_ENABLE; 1930 *eax = x; 1931 } else { 1932 x = *eax; 1933 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 1934 cfgoff = x & PCI_REGMAX; 1935 cfgfunc = (x >> 8) & PCI_FUNCMAX; 1936 cfgslot = (x >> 11) & PCI_SLOTMAX; 1937 cfgbus = (x >> 16) & PCI_BUSMAX; 1938 } 1939 1940 return (0); 1941 } 1942 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 1943 1944 static int 1945 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1946 uint32_t *eax, void *arg) 1947 { 1948 int coff; 1949 1950 assert(bytes == 1 || bytes == 2 || bytes == 4); 1951 1952 coff = cfgoff + (port - CONF1_DATA_PORT); 1953 if (cfgenable) { 1954 pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, 1955 eax); 1956 } else { 1957 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 1958 if (in) 1959 *eax = 0xffffffff; 1960 } 1961 return (0); 1962 } 1963 1964 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 1965 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 1966 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 1967 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 1968 1969 #ifdef BHYVE_SNAPSHOT 1970 /* 1971 * Saves/restores PCI device emulated state. Returns 0 on success. 1972 */ 1973 static int 1974 pci_snapshot_pci_dev(struct vm_snapshot_meta *meta) 1975 { 1976 struct pci_devinst *pi; 1977 int i; 1978 int ret; 1979 1980 pi = meta->dev_data; 1981 1982 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done); 1983 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done); 1984 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done); 1985 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done); 1986 1987 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done); 1988 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done); 1989 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done); 1990 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done); 1991 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done); 1992 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done); 1993 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done); 1994 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done); 1995 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_page_offset, meta, ret, done); 1996 1997 SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata), 1998 meta, ret, done); 1999 2000 for (i = 0; i < nitems(pi->pi_bar); i++) { 2001 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done); 2002 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done); 2003 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done); 2004 } 2005 2006 /* Restore MSI-X table. */ 2007 for (i = 0; i < pi->pi_msix.table_count; i++) { 2008 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr, 2009 meta, ret, done); 2010 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data, 2011 meta, ret, done); 2012 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control, 2013 meta, ret, done); 2014 } 2015 2016 done: 2017 return (ret); 2018 } 2019 2020 static int 2021 pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde, 2022 struct pci_devinst **pdi) 2023 { 2024 struct businfo *bi; 2025 struct slotinfo *si; 2026 struct funcinfo *fi; 2027 int bus, slot, func; 2028 2029 assert(dev_name != NULL); 2030 assert(pde != NULL); 2031 assert(pdi != NULL); 2032 2033 for (bus = 0; bus < MAXBUSES; bus++) { 2034 if ((bi = pci_businfo[bus]) == NULL) 2035 continue; 2036 2037 for (slot = 0; slot < MAXSLOTS; slot++) { 2038 si = &bi->slotinfo[slot]; 2039 for (func = 0; func < MAXFUNCS; func++) { 2040 fi = &si->si_funcs[func]; 2041 if (fi->fi_name == NULL) 2042 continue; 2043 if (strcmp(dev_name, fi->fi_name)) 2044 continue; 2045 2046 *pde = pci_emul_finddev(fi->fi_name); 2047 assert(*pde != NULL); 2048 2049 *pdi = fi->fi_devi; 2050 return (0); 2051 } 2052 } 2053 } 2054 2055 return (EINVAL); 2056 } 2057 2058 int 2059 pci_snapshot(struct vm_snapshot_meta *meta) 2060 { 2061 struct pci_devemu *pde; 2062 struct pci_devinst *pdi; 2063 int ret; 2064 2065 assert(meta->dev_name != NULL); 2066 2067 ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi); 2068 if (ret != 0) { 2069 fprintf(stderr, "%s: no such name: %s\r\n", 2070 __func__, meta->dev_name); 2071 memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); 2072 return (0); 2073 } 2074 2075 meta->dev_data = pdi; 2076 2077 if (pde->pe_snapshot == NULL) { 2078 fprintf(stderr, "%s: not implemented yet for: %s\r\n", 2079 __func__, meta->dev_name); 2080 return (-1); 2081 } 2082 2083 ret = pci_snapshot_pci_dev(meta); 2084 if (ret != 0) { 2085 fprintf(stderr, "%s: failed to snapshot pci dev\r\n", 2086 __func__); 2087 return (-1); 2088 } 2089 2090 ret = (*pde->pe_snapshot)(meta); 2091 2092 return (ret); 2093 } 2094 2095 int 2096 pci_pause(struct vmctx *ctx, const char *dev_name) 2097 { 2098 struct pci_devemu *pde; 2099 struct pci_devinst *pdi; 2100 int ret; 2101 2102 assert(dev_name != NULL); 2103 2104 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2105 if (ret != 0) { 2106 /* 2107 * It is possible to call this function without 2108 * checking that the device is inserted first. 2109 */ 2110 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2111 return (0); 2112 } 2113 2114 if (pde->pe_pause == NULL) { 2115 /* The pause/resume functionality is optional. */ 2116 fprintf(stderr, "%s: not implemented for: %s\n", 2117 __func__, dev_name); 2118 return (0); 2119 } 2120 2121 return (*pde->pe_pause)(ctx, pdi); 2122 } 2123 2124 int 2125 pci_resume(struct vmctx *ctx, const char *dev_name) 2126 { 2127 struct pci_devemu *pde; 2128 struct pci_devinst *pdi; 2129 int ret; 2130 2131 assert(dev_name != NULL); 2132 2133 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2134 if (ret != 0) { 2135 /* 2136 * It is possible to call this function without 2137 * checking that the device is inserted first. 2138 */ 2139 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2140 return (0); 2141 } 2142 2143 if (pde->pe_resume == NULL) { 2144 /* The pause/resume functionality is optional. */ 2145 fprintf(stderr, "%s: not implemented for: %s\n", 2146 __func__, dev_name); 2147 return (0); 2148 } 2149 2150 return (*pde->pe_resume)(ctx, pdi); 2151 } 2152 #endif 2153 2154 #define PCI_EMUL_TEST 2155 #ifdef PCI_EMUL_TEST 2156 /* 2157 * Define a dummy test device 2158 */ 2159 #define DIOSZ 8 2160 #define DMEMSZ 4096 2161 struct pci_emul_dsoftc { 2162 uint8_t ioregs[DIOSZ]; 2163 uint8_t memregs[2][DMEMSZ]; 2164 }; 2165 2166 #define PCI_EMUL_MSI_MSGS 4 2167 #define PCI_EMUL_MSIX_MSGS 16 2168 2169 static int 2170 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 2171 { 2172 int error; 2173 struct pci_emul_dsoftc *sc; 2174 2175 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2176 2177 pi->pi_arg = sc; 2178 2179 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2180 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2181 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2182 2183 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2184 assert(error == 0); 2185 2186 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2187 assert(error == 0); 2188 2189 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2190 assert(error == 0); 2191 2192 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2193 assert(error == 0); 2194 2195 return (0); 2196 } 2197 2198 static void 2199 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2200 uint64_t offset, int size, uint64_t value) 2201 { 2202 int i; 2203 struct pci_emul_dsoftc *sc = pi->pi_arg; 2204 2205 if (baridx == 0) { 2206 if (offset + size > DIOSZ) { 2207 printf("diow: iow too large, offset %ld size %d\n", 2208 offset, size); 2209 return; 2210 } 2211 2212 if (size == 1) { 2213 sc->ioregs[offset] = value & 0xff; 2214 } else if (size == 2) { 2215 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2216 } else if (size == 4) { 2217 *(uint32_t *)&sc->ioregs[offset] = value; 2218 } else { 2219 printf("diow: iow unknown size %d\n", size); 2220 } 2221 2222 /* 2223 * Special magic value to generate an interrupt 2224 */ 2225 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2226 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2227 2228 if (value == 0xabcdef) { 2229 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2230 pci_generate_msi(pi, i); 2231 } 2232 } 2233 2234 if (baridx == 1 || baridx == 2) { 2235 if (offset + size > DMEMSZ) { 2236 printf("diow: memw too large, offset %ld size %d\n", 2237 offset, size); 2238 return; 2239 } 2240 2241 i = baridx - 1; /* 'memregs' index */ 2242 2243 if (size == 1) { 2244 sc->memregs[i][offset] = value; 2245 } else if (size == 2) { 2246 *(uint16_t *)&sc->memregs[i][offset] = value; 2247 } else if (size == 4) { 2248 *(uint32_t *)&sc->memregs[i][offset] = value; 2249 } else if (size == 8) { 2250 *(uint64_t *)&sc->memregs[i][offset] = value; 2251 } else { 2252 printf("diow: memw unknown size %d\n", size); 2253 } 2254 2255 /* 2256 * magic interrupt ?? 2257 */ 2258 } 2259 2260 if (baridx > 2 || baridx < 0) { 2261 printf("diow: unknown bar idx %d\n", baridx); 2262 } 2263 } 2264 2265 static uint64_t 2266 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2267 uint64_t offset, int size) 2268 { 2269 struct pci_emul_dsoftc *sc = pi->pi_arg; 2270 uint32_t value; 2271 int i; 2272 2273 if (baridx == 0) { 2274 if (offset + size > DIOSZ) { 2275 printf("dior: ior too large, offset %ld size %d\n", 2276 offset, size); 2277 return (0); 2278 } 2279 2280 value = 0; 2281 if (size == 1) { 2282 value = sc->ioregs[offset]; 2283 } else if (size == 2) { 2284 value = *(uint16_t *) &sc->ioregs[offset]; 2285 } else if (size == 4) { 2286 value = *(uint32_t *) &sc->ioregs[offset]; 2287 } else { 2288 printf("dior: ior unknown size %d\n", size); 2289 } 2290 } 2291 2292 if (baridx == 1 || baridx == 2) { 2293 if (offset + size > DMEMSZ) { 2294 printf("dior: memr too large, offset %ld size %d\n", 2295 offset, size); 2296 return (0); 2297 } 2298 2299 i = baridx - 1; /* 'memregs' index */ 2300 2301 if (size == 1) { 2302 value = sc->memregs[i][offset]; 2303 } else if (size == 2) { 2304 value = *(uint16_t *) &sc->memregs[i][offset]; 2305 } else if (size == 4) { 2306 value = *(uint32_t *) &sc->memregs[i][offset]; 2307 } else if (size == 8) { 2308 value = *(uint64_t *) &sc->memregs[i][offset]; 2309 } else { 2310 printf("dior: ior unknown size %d\n", size); 2311 } 2312 } 2313 2314 2315 if (baridx > 2 || baridx < 0) { 2316 printf("dior: unknown bar idx %d\n", baridx); 2317 return (0); 2318 } 2319 2320 return (value); 2321 } 2322 2323 #ifdef BHYVE_SNAPSHOT 2324 int 2325 pci_emul_snapshot(struct vm_snapshot_meta *meta) 2326 { 2327 2328 return (0); 2329 } 2330 #endif 2331 2332 struct pci_devemu pci_dummy = { 2333 .pe_emu = "dummy", 2334 .pe_init = pci_emul_dinit, 2335 .pe_barwrite = pci_emul_diow, 2336 .pe_barread = pci_emul_dior, 2337 #ifdef BHYVE_SNAPSHOT 2338 .pe_snapshot = pci_emul_snapshot, 2339 #endif 2340 }; 2341 PCI_EMUL_SET(pci_dummy); 2342 2343 #endif /* PCI_EMUL_TEST */ 2344