1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker_set.h> 36 37 #include <ctype.h> 38 #include <errno.h> 39 #include <pthread.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <strings.h> 44 #include <assert.h> 45 #include <stdbool.h> 46 47 #include <machine/vmm.h> 48 #include <machine/vmm_snapshot.h> 49 #include <vmmapi.h> 50 51 #include "acpi.h" 52 #include "bhyverun.h" 53 #include "debug.h" 54 #include "inout.h" 55 #include "ioapic.h" 56 #include "mem.h" 57 #include "pci_emul.h" 58 #include "pci_irq.h" 59 #include "pci_lpc.h" 60 61 #define CONF1_ADDR_PORT 0x0cf8 62 #define CONF1_DATA_PORT 0x0cfc 63 64 #define CONF1_ENABLE 0x80000000ul 65 66 #define MAXBUSES (PCI_BUSMAX + 1) 67 #define MAXSLOTS (PCI_SLOTMAX + 1) 68 #define MAXFUNCS (PCI_FUNCMAX + 1) 69 70 struct funcinfo { 71 char *fi_name; 72 char *fi_param; 73 struct pci_devinst *fi_devi; 74 }; 75 76 struct intxinfo { 77 int ii_count; 78 int ii_pirq_pin; 79 int ii_ioapic_irq; 80 }; 81 82 struct slotinfo { 83 struct intxinfo si_intpins[4]; 84 struct funcinfo si_funcs[MAXFUNCS]; 85 }; 86 87 struct businfo { 88 uint16_t iobase, iolimit; /* I/O window */ 89 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 90 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 91 struct slotinfo slotinfo[MAXSLOTS]; 92 }; 93 94 static struct businfo *pci_businfo[MAXBUSES]; 95 96 SET_DECLARE(pci_devemu_set, struct pci_devemu); 97 98 static uint64_t pci_emul_iobase; 99 static uint64_t pci_emul_membase32; 100 static uint64_t pci_emul_membase64; 101 102 #define PCI_EMUL_IOBASE 0x2000 103 #define PCI_EMUL_IOLIMIT 0x10000 104 105 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 106 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 107 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 108 109 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 110 111 #define PCI_EMUL_MEMBASE64 0xD000000000UL 112 #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL 113 114 static struct pci_devemu *pci_emul_finddev(char *name); 115 static void pci_lintr_route(struct pci_devinst *pi); 116 static void pci_lintr_update(struct pci_devinst *pi); 117 static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, 118 int func, int coff, int bytes, uint32_t *val); 119 120 static __inline void 121 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 122 { 123 124 if (bytes == 1) 125 pci_set_cfgdata8(pi, coff, val); 126 else if (bytes == 2) 127 pci_set_cfgdata16(pi, coff, val); 128 else 129 pci_set_cfgdata32(pi, coff, val); 130 } 131 132 static __inline uint32_t 133 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 134 { 135 136 if (bytes == 1) 137 return (pci_get_cfgdata8(pi, coff)); 138 else if (bytes == 2) 139 return (pci_get_cfgdata16(pi, coff)); 140 else 141 return (pci_get_cfgdata32(pi, coff)); 142 } 143 144 /* 145 * I/O access 146 */ 147 148 /* 149 * Slot options are in the form: 150 * 151 * <bus>:<slot>:<func>,<emul>[,<config>] 152 * <slot>[:<func>],<emul>[,<config>] 153 * 154 * slot is 0..31 155 * func is 0..7 156 * emul is a string describing the type of PCI device e.g. virtio-net 157 * config is an optional string, depending on the device, that can be 158 * used for configuration. 159 * Examples are: 160 * 1,virtio-net,tap0 161 * 3:0,dummy 162 */ 163 static void 164 pci_parse_slot_usage(char *aopt) 165 { 166 167 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 168 } 169 170 int 171 pci_parse_slot(char *opt) 172 { 173 struct businfo *bi; 174 struct slotinfo *si; 175 char *emul, *config, *str, *cp; 176 int error, bnum, snum, fnum; 177 178 error = -1; 179 str = strdup(opt); 180 181 emul = config = NULL; 182 if ((cp = strchr(str, ',')) != NULL) { 183 *cp = '\0'; 184 emul = cp + 1; 185 if ((cp = strchr(emul, ',')) != NULL) { 186 *cp = '\0'; 187 config = cp + 1; 188 } 189 } else { 190 pci_parse_slot_usage(opt); 191 goto done; 192 } 193 194 /* <bus>:<slot>:<func> */ 195 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 196 bnum = 0; 197 /* <slot>:<func> */ 198 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 199 fnum = 0; 200 /* <slot> */ 201 if (sscanf(str, "%d", &snum) != 1) { 202 snum = -1; 203 } 204 } 205 } 206 207 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 208 fnum < 0 || fnum >= MAXFUNCS) { 209 pci_parse_slot_usage(opt); 210 goto done; 211 } 212 213 if (pci_businfo[bnum] == NULL) 214 pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); 215 216 bi = pci_businfo[bnum]; 217 si = &bi->slotinfo[snum]; 218 219 if (si->si_funcs[fnum].fi_name != NULL) { 220 EPRINTLN("pci slot %d:%d already occupied!", 221 snum, fnum); 222 goto done; 223 } 224 225 if (pci_emul_finddev(emul) == NULL) { 226 EPRINTLN("pci slot %d:%d: unknown device \"%s\"", 227 snum, fnum, emul); 228 goto done; 229 } 230 231 error = 0; 232 si->si_funcs[fnum].fi_name = emul; 233 si->si_funcs[fnum].fi_param = config; 234 235 done: 236 if (error) 237 free(str); 238 239 return (error); 240 } 241 242 void 243 pci_print_supported_devices() 244 { 245 struct pci_devemu **pdpp, *pdp; 246 247 SET_FOREACH(pdpp, pci_devemu_set) { 248 pdp = *pdpp; 249 printf("%s\n", pdp->pe_emu); 250 } 251 } 252 253 static int 254 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 255 { 256 257 if (offset < pi->pi_msix.pba_offset) 258 return (0); 259 260 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 261 return (0); 262 } 263 264 return (1); 265 } 266 267 int 268 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 269 uint64_t value) 270 { 271 int msix_entry_offset; 272 int tab_index; 273 char *dest; 274 275 /* support only 4 or 8 byte writes */ 276 if (size != 4 && size != 8) 277 return (-1); 278 279 /* 280 * Return if table index is beyond what device supports 281 */ 282 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 283 if (tab_index >= pi->pi_msix.table_count) 284 return (-1); 285 286 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 287 288 /* support only aligned writes */ 289 if ((msix_entry_offset % size) != 0) 290 return (-1); 291 292 dest = (char *)(pi->pi_msix.table + tab_index); 293 dest += msix_entry_offset; 294 295 if (size == 4) 296 *((uint32_t *)dest) = value; 297 else 298 *((uint64_t *)dest) = value; 299 300 return (0); 301 } 302 303 uint64_t 304 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 305 { 306 char *dest; 307 int msix_entry_offset; 308 int tab_index; 309 uint64_t retval = ~0; 310 311 /* 312 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 313 * table but we also allow 1 byte access to accommodate reads from 314 * ddb. 315 */ 316 if (size != 1 && size != 4 && size != 8) 317 return (retval); 318 319 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 320 321 /* support only aligned reads */ 322 if ((msix_entry_offset % size) != 0) { 323 return (retval); 324 } 325 326 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 327 328 if (tab_index < pi->pi_msix.table_count) { 329 /* valid MSI-X Table access */ 330 dest = (char *)(pi->pi_msix.table + tab_index); 331 dest += msix_entry_offset; 332 333 if (size == 1) 334 retval = *((uint8_t *)dest); 335 else if (size == 4) 336 retval = *((uint32_t *)dest); 337 else 338 retval = *((uint64_t *)dest); 339 } else if (pci_valid_pba_offset(pi, offset)) { 340 /* return 0 for PBA access */ 341 retval = 0; 342 } 343 344 return (retval); 345 } 346 347 int 348 pci_msix_table_bar(struct pci_devinst *pi) 349 { 350 351 if (pi->pi_msix.table != NULL) 352 return (pi->pi_msix.table_bar); 353 else 354 return (-1); 355 } 356 357 int 358 pci_msix_pba_bar(struct pci_devinst *pi) 359 { 360 361 if (pi->pi_msix.table != NULL) 362 return (pi->pi_msix.pba_bar); 363 else 364 return (-1); 365 } 366 367 static int 368 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 369 uint32_t *eax, void *arg) 370 { 371 struct pci_devinst *pdi = arg; 372 struct pci_devemu *pe = pdi->pi_d; 373 uint64_t offset; 374 int i; 375 376 for (i = 0; i <= PCI_BARMAX; i++) { 377 if (pdi->pi_bar[i].type == PCIBAR_IO && 378 port >= pdi->pi_bar[i].addr && 379 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 380 offset = port - pdi->pi_bar[i].addr; 381 if (in) 382 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 383 offset, bytes); 384 else 385 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 386 bytes, *eax); 387 return (0); 388 } 389 } 390 return (-1); 391 } 392 393 static int 394 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 395 int size, uint64_t *val, void *arg1, long arg2) 396 { 397 struct pci_devinst *pdi = arg1; 398 struct pci_devemu *pe = pdi->pi_d; 399 uint64_t offset; 400 int bidx = (int) arg2; 401 402 assert(bidx <= PCI_BARMAX); 403 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 404 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 405 assert(addr >= pdi->pi_bar[bidx].addr && 406 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 407 408 offset = addr - pdi->pi_bar[bidx].addr; 409 410 if (dir == MEM_F_WRITE) { 411 if (size == 8) { 412 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 413 4, *val & 0xffffffff); 414 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 415 4, *val >> 32); 416 } else { 417 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 418 size, *val); 419 } 420 } else { 421 if (size == 8) { 422 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 423 offset, 4); 424 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 425 offset + 4, 4) << 32; 426 } else { 427 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 428 offset, size); 429 } 430 } 431 432 return (0); 433 } 434 435 436 static int 437 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 438 uint64_t *addr) 439 { 440 uint64_t base; 441 442 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 443 444 base = roundup2(*baseptr, size); 445 446 if (base + size <= limit) { 447 *addr = base; 448 *baseptr = base + size; 449 return (0); 450 } else 451 return (-1); 452 } 453 454 int 455 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 456 uint64_t size) 457 { 458 459 return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); 460 } 461 462 /* 463 * Register (or unregister) the MMIO or I/O region associated with the BAR 464 * register 'idx' of an emulated pci device. 465 */ 466 static void 467 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 468 { 469 int error; 470 struct inout_port iop; 471 struct mem_range mr; 472 473 switch (pi->pi_bar[idx].type) { 474 case PCIBAR_IO: 475 bzero(&iop, sizeof(struct inout_port)); 476 iop.name = pi->pi_name; 477 iop.port = pi->pi_bar[idx].addr; 478 iop.size = pi->pi_bar[idx].size; 479 if (registration) { 480 iop.flags = IOPORT_F_INOUT; 481 iop.handler = pci_emul_io_handler; 482 iop.arg = pi; 483 error = register_inout(&iop); 484 } else 485 error = unregister_inout(&iop); 486 break; 487 case PCIBAR_MEM32: 488 case PCIBAR_MEM64: 489 bzero(&mr, sizeof(struct mem_range)); 490 mr.name = pi->pi_name; 491 mr.base = pi->pi_bar[idx].addr; 492 mr.size = pi->pi_bar[idx].size; 493 if (registration) { 494 mr.flags = MEM_F_RW; 495 mr.handler = pci_emul_mem_handler; 496 mr.arg1 = pi; 497 mr.arg2 = idx; 498 error = register_mem(&mr); 499 } else 500 error = unregister_mem(&mr); 501 break; 502 default: 503 error = EINVAL; 504 break; 505 } 506 assert(error == 0); 507 } 508 509 static void 510 unregister_bar(struct pci_devinst *pi, int idx) 511 { 512 513 modify_bar_registration(pi, idx, 0); 514 } 515 516 static void 517 register_bar(struct pci_devinst *pi, int idx) 518 { 519 520 modify_bar_registration(pi, idx, 1); 521 } 522 523 /* Are we decoding i/o port accesses for the emulated pci device? */ 524 static int 525 porten(struct pci_devinst *pi) 526 { 527 uint16_t cmd; 528 529 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 530 531 return (cmd & PCIM_CMD_PORTEN); 532 } 533 534 /* Are we decoding memory accesses for the emulated pci device? */ 535 static int 536 memen(struct pci_devinst *pi) 537 { 538 uint16_t cmd; 539 540 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 541 542 return (cmd & PCIM_CMD_MEMEN); 543 } 544 545 /* 546 * Update the MMIO or I/O address that is decoded by the BAR register. 547 * 548 * If the pci device has enabled the address space decoding then intercept 549 * the address range decoded by the BAR register. 550 */ 551 static void 552 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 553 { 554 int decode; 555 556 if (pi->pi_bar[idx].type == PCIBAR_IO) 557 decode = porten(pi); 558 else 559 decode = memen(pi); 560 561 if (decode) 562 unregister_bar(pi, idx); 563 564 switch (type) { 565 case PCIBAR_IO: 566 case PCIBAR_MEM32: 567 pi->pi_bar[idx].addr = addr; 568 break; 569 case PCIBAR_MEM64: 570 pi->pi_bar[idx].addr &= ~0xffffffffUL; 571 pi->pi_bar[idx].addr |= addr; 572 break; 573 case PCIBAR_MEMHI64: 574 pi->pi_bar[idx].addr &= 0xffffffff; 575 pi->pi_bar[idx].addr |= addr; 576 break; 577 default: 578 assert(0); 579 } 580 581 if (decode) 582 register_bar(pi, idx); 583 } 584 585 int 586 pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, 587 enum pcibar_type type, uint64_t size) 588 { 589 int error; 590 uint64_t *baseptr, limit, addr, mask, lobits, bar; 591 uint16_t cmd, enbit; 592 593 assert(idx >= 0 && idx <= PCI_BARMAX); 594 595 if ((size & (size - 1)) != 0) 596 size = 1UL << flsl(size); /* round up to a power of 2 */ 597 598 /* Enforce minimum BAR sizes required by the PCI standard */ 599 if (type == PCIBAR_IO) { 600 if (size < 4) 601 size = 4; 602 } else { 603 if (size < 16) 604 size = 16; 605 } 606 607 switch (type) { 608 case PCIBAR_NONE: 609 baseptr = NULL; 610 addr = mask = lobits = enbit = 0; 611 break; 612 case PCIBAR_IO: 613 baseptr = &pci_emul_iobase; 614 limit = PCI_EMUL_IOLIMIT; 615 mask = PCIM_BAR_IO_BASE; 616 lobits = PCIM_BAR_IO_SPACE; 617 enbit = PCIM_CMD_PORTEN; 618 break; 619 case PCIBAR_MEM64: 620 /* 621 * XXX 622 * Some drivers do not work well if the 64-bit BAR is allocated 623 * above 4GB. Allow for this by allocating small requests under 624 * 4GB unless then allocation size is larger than some arbitrary 625 * number (32MB currently). 626 */ 627 if (size > 32 * 1024 * 1024) { 628 /* 629 * XXX special case for device requiring peer-peer DMA 630 */ 631 if (size == 0x100000000UL) 632 baseptr = &hostbase; 633 else 634 baseptr = &pci_emul_membase64; 635 limit = PCI_EMUL_MEMLIMIT64; 636 mask = PCIM_BAR_MEM_BASE; 637 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 638 PCIM_BAR_MEM_PREFETCH; 639 } else { 640 baseptr = &pci_emul_membase32; 641 limit = PCI_EMUL_MEMLIMIT32; 642 mask = PCIM_BAR_MEM_BASE; 643 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 644 } 645 enbit = PCIM_CMD_MEMEN; 646 break; 647 case PCIBAR_MEM32: 648 baseptr = &pci_emul_membase32; 649 limit = PCI_EMUL_MEMLIMIT32; 650 mask = PCIM_BAR_MEM_BASE; 651 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 652 enbit = PCIM_CMD_MEMEN; 653 break; 654 default: 655 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 656 assert(0); 657 } 658 659 if (baseptr != NULL) { 660 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 661 if (error != 0) 662 return (error); 663 } 664 665 pdi->pi_bar[idx].type = type; 666 pdi->pi_bar[idx].addr = addr; 667 pdi->pi_bar[idx].size = size; 668 669 /* Initialize the BAR register in config space */ 670 bar = (addr & mask) | lobits; 671 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 672 673 if (type == PCIBAR_MEM64) { 674 assert(idx + 1 <= PCI_BARMAX); 675 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 676 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 677 } 678 679 cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 680 if ((cmd & enbit) != enbit) 681 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 682 register_bar(pdi, idx); 683 684 return (0); 685 } 686 687 #define CAP_START_OFFSET 0x40 688 static int 689 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 690 { 691 int i, capoff, reallen; 692 uint16_t sts; 693 694 assert(caplen > 0); 695 696 reallen = roundup2(caplen, 4); /* dword aligned */ 697 698 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 699 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 700 capoff = CAP_START_OFFSET; 701 else 702 capoff = pi->pi_capend + 1; 703 704 /* Check if we have enough space */ 705 if (capoff + reallen > PCI_REGMAX + 1) 706 return (-1); 707 708 /* Set the previous capability pointer */ 709 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 710 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 711 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 712 } else 713 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 714 715 /* Copy the capability */ 716 for (i = 0; i < caplen; i++) 717 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 718 719 /* Set the next capability pointer */ 720 pci_set_cfgdata8(pi, capoff + 1, 0); 721 722 pi->pi_prevcap = capoff; 723 pi->pi_capend = capoff + reallen - 1; 724 return (0); 725 } 726 727 static struct pci_devemu * 728 pci_emul_finddev(char *name) 729 { 730 struct pci_devemu **pdpp, *pdp; 731 732 SET_FOREACH(pdpp, pci_devemu_set) { 733 pdp = *pdpp; 734 if (!strcmp(pdp->pe_emu, name)) { 735 return (pdp); 736 } 737 } 738 739 return (NULL); 740 } 741 742 static int 743 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 744 int func, struct funcinfo *fi) 745 { 746 struct pci_devinst *pdi; 747 int err; 748 749 pdi = calloc(1, sizeof(struct pci_devinst)); 750 751 pdi->pi_vmctx = ctx; 752 pdi->pi_bus = bus; 753 pdi->pi_slot = slot; 754 pdi->pi_func = func; 755 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 756 pdi->pi_lintr.pin = 0; 757 pdi->pi_lintr.state = IDLE; 758 pdi->pi_lintr.pirq_pin = 0; 759 pdi->pi_lintr.ioapic_irq = 0; 760 pdi->pi_d = pde; 761 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 762 763 /* Disable legacy interrupts */ 764 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 765 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 766 767 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 768 769 err = (*pde->pe_init)(ctx, pdi, fi->fi_param); 770 if (err == 0) 771 fi->fi_devi = pdi; 772 else 773 free(pdi); 774 775 return (err); 776 } 777 778 void 779 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 780 { 781 int mmc; 782 783 /* Number of msi messages must be a power of 2 between 1 and 32 */ 784 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 785 mmc = ffs(msgnum) - 1; 786 787 bzero(msicap, sizeof(struct msicap)); 788 msicap->capid = PCIY_MSI; 789 msicap->nextptr = nextptr; 790 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 791 } 792 793 int 794 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 795 { 796 struct msicap msicap; 797 798 pci_populate_msicap(&msicap, msgnum, 0); 799 800 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 801 } 802 803 static void 804 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 805 uint32_t msix_tab_size) 806 { 807 808 assert(msix_tab_size % 4096 == 0); 809 810 bzero(msixcap, sizeof(struct msixcap)); 811 msixcap->capid = PCIY_MSIX; 812 813 /* 814 * Message Control Register, all fields set to 815 * zero except for the Table Size. 816 * Note: Table size N is encoded as N-1 817 */ 818 msixcap->msgctrl = msgnum - 1; 819 820 /* 821 * MSI-X BAR setup: 822 * - MSI-X table start at offset 0 823 * - PBA table starts at a 4K aligned offset after the MSI-X table 824 */ 825 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 826 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 827 } 828 829 static void 830 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 831 { 832 int i, table_size; 833 834 assert(table_entries > 0); 835 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 836 837 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 838 pi->pi_msix.table = calloc(1, table_size); 839 840 /* set mask bit of vector control register */ 841 for (i = 0; i < table_entries; i++) 842 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 843 } 844 845 int 846 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 847 { 848 uint32_t tab_size; 849 struct msixcap msixcap; 850 851 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 852 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 853 854 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 855 856 /* Align table size to nearest 4K */ 857 tab_size = roundup2(tab_size, 4096); 858 859 pi->pi_msix.table_bar = barnum; 860 pi->pi_msix.pba_bar = barnum; 861 pi->pi_msix.table_offset = 0; 862 pi->pi_msix.table_count = msgnum; 863 pi->pi_msix.pba_offset = tab_size; 864 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 865 866 pci_msix_table_init(pi, msgnum); 867 868 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 869 870 /* allocate memory for MSI-X Table and PBA */ 871 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 872 tab_size + pi->pi_msix.pba_size); 873 874 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 875 sizeof(msixcap))); 876 } 877 878 void 879 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 880 int bytes, uint32_t val) 881 { 882 uint16_t msgctrl, rwmask; 883 int off; 884 885 off = offset - capoff; 886 /* Message Control Register */ 887 if (off == 2 && bytes == 2) { 888 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 889 msgctrl = pci_get_cfgdata16(pi, offset); 890 msgctrl &= ~rwmask; 891 msgctrl |= val & rwmask; 892 val = msgctrl; 893 894 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 895 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 896 pci_lintr_update(pi); 897 } 898 899 CFGWRITE(pi, offset, val, bytes); 900 } 901 902 void 903 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 904 int bytes, uint32_t val) 905 { 906 uint16_t msgctrl, rwmask, msgdata, mme; 907 uint32_t addrlo; 908 909 /* 910 * If guest is writing to the message control register make sure 911 * we do not overwrite read-only fields. 912 */ 913 if ((offset - capoff) == 2 && bytes == 2) { 914 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 915 msgctrl = pci_get_cfgdata16(pi, offset); 916 msgctrl &= ~rwmask; 917 msgctrl |= val & rwmask; 918 val = msgctrl; 919 } 920 CFGWRITE(pi, offset, val, bytes); 921 922 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 923 addrlo = pci_get_cfgdata32(pi, capoff + 4); 924 if (msgctrl & PCIM_MSICTRL_64BIT) 925 msgdata = pci_get_cfgdata16(pi, capoff + 12); 926 else 927 msgdata = pci_get_cfgdata16(pi, capoff + 8); 928 929 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 930 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 931 if (pi->pi_msi.enabled) { 932 pi->pi_msi.addr = addrlo; 933 pi->pi_msi.msg_data = msgdata; 934 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 935 } else { 936 pi->pi_msi.maxmsgnum = 0; 937 } 938 pci_lintr_update(pi); 939 } 940 941 void 942 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 943 int bytes, uint32_t val) 944 { 945 946 /* XXX don't write to the readonly parts */ 947 CFGWRITE(pi, offset, val, bytes); 948 } 949 950 #define PCIECAP_VERSION 0x2 951 int 952 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 953 { 954 int err; 955 struct pciecap pciecap; 956 957 bzero(&pciecap, sizeof(pciecap)); 958 959 /* 960 * Use the integrated endpoint type for endpoints on a root complex bus. 961 * 962 * NB: bhyve currently only supports a single PCI bus that is the root 963 * complex bus, so all endpoints are integrated. 964 */ 965 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 966 type = PCIEM_TYPE_ROOT_INT_EP; 967 968 pciecap.capid = PCIY_EXPRESS; 969 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 970 if (type != PCIEM_TYPE_ROOT_INT_EP) { 971 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 972 pciecap.link_status = 0x11; /* gen1, x1 */ 973 } 974 975 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 976 return (err); 977 } 978 979 /* 980 * This function assumes that 'coff' is in the capabilities region of the 981 * config space. 982 */ 983 static void 984 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) 985 { 986 int capid; 987 uint8_t capoff, nextoff; 988 989 /* Do not allow un-aligned writes */ 990 if ((offset & (bytes - 1)) != 0) 991 return; 992 993 /* Find the capability that we want to update */ 994 capoff = CAP_START_OFFSET; 995 while (1) { 996 nextoff = pci_get_cfgdata8(pi, capoff + 1); 997 if (nextoff == 0) 998 break; 999 if (offset >= capoff && offset < nextoff) 1000 break; 1001 1002 capoff = nextoff; 1003 } 1004 assert(offset >= capoff); 1005 1006 /* 1007 * Capability ID and Next Capability Pointer are readonly. 1008 * However, some o/s's do 4-byte writes that include these. 1009 * For this case, trim the write back to 2 bytes and adjust 1010 * the data. 1011 */ 1012 if (offset == capoff || offset == capoff + 1) { 1013 if (offset == capoff && bytes == 4) { 1014 bytes = 2; 1015 offset += 2; 1016 val >>= 16; 1017 } else 1018 return; 1019 } 1020 1021 capid = pci_get_cfgdata8(pi, capoff); 1022 switch (capid) { 1023 case PCIY_MSI: 1024 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1025 break; 1026 case PCIY_MSIX: 1027 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1028 break; 1029 case PCIY_EXPRESS: 1030 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1031 break; 1032 default: 1033 break; 1034 } 1035 } 1036 1037 static int 1038 pci_emul_iscap(struct pci_devinst *pi, int offset) 1039 { 1040 uint16_t sts; 1041 1042 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1043 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1044 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1045 return (1); 1046 } 1047 return (0); 1048 } 1049 1050 static int 1051 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1052 int size, uint64_t *val, void *arg1, long arg2) 1053 { 1054 /* 1055 * Ignore writes; return 0xff's for reads. The mem read code 1056 * will take care of truncating to the correct size. 1057 */ 1058 if (dir == MEM_F_READ) { 1059 *val = 0xffffffffffffffff; 1060 } 1061 1062 return (0); 1063 } 1064 1065 static int 1066 pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1067 int bytes, uint64_t *val, void *arg1, long arg2) 1068 { 1069 int bus, slot, func, coff, in; 1070 1071 coff = addr & 0xfff; 1072 func = (addr >> 12) & 0x7; 1073 slot = (addr >> 15) & 0x1f; 1074 bus = (addr >> 20) & 0xff; 1075 in = (dir == MEM_F_READ); 1076 if (in) 1077 *val = ~0UL; 1078 pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); 1079 return (0); 1080 } 1081 1082 uint64_t 1083 pci_ecfg_base(void) 1084 { 1085 1086 return (PCI_EMUL_ECFG_BASE); 1087 } 1088 1089 #define BUSIO_ROUNDUP 32 1090 #define BUSMEM_ROUNDUP (1024 * 1024) 1091 1092 int 1093 init_pci(struct vmctx *ctx) 1094 { 1095 struct mem_range mr; 1096 struct pci_devemu *pde; 1097 struct businfo *bi; 1098 struct slotinfo *si; 1099 struct funcinfo *fi; 1100 size_t lowmem; 1101 int bus, slot, func; 1102 int error; 1103 1104 pci_emul_iobase = PCI_EMUL_IOBASE; 1105 pci_emul_membase32 = vm_get_lowmem_limit(ctx); 1106 pci_emul_membase64 = PCI_EMUL_MEMBASE64; 1107 1108 for (bus = 0; bus < MAXBUSES; bus++) { 1109 if ((bi = pci_businfo[bus]) == NULL) 1110 continue; 1111 /* 1112 * Keep track of the i/o and memory resources allocated to 1113 * this bus. 1114 */ 1115 bi->iobase = pci_emul_iobase; 1116 bi->membase32 = pci_emul_membase32; 1117 bi->membase64 = pci_emul_membase64; 1118 1119 for (slot = 0; slot < MAXSLOTS; slot++) { 1120 si = &bi->slotinfo[slot]; 1121 for (func = 0; func < MAXFUNCS; func++) { 1122 fi = &si->si_funcs[func]; 1123 if (fi->fi_name == NULL) 1124 continue; 1125 pde = pci_emul_finddev(fi->fi_name); 1126 assert(pde != NULL); 1127 error = pci_emul_init(ctx, pde, bus, slot, 1128 func, fi); 1129 if (error) 1130 return (error); 1131 } 1132 } 1133 1134 /* 1135 * Add some slop to the I/O and memory resources decoded by 1136 * this bus to give a guest some flexibility if it wants to 1137 * reprogram the BARs. 1138 */ 1139 pci_emul_iobase += BUSIO_ROUNDUP; 1140 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1141 bi->iolimit = pci_emul_iobase; 1142 1143 pci_emul_membase32 += BUSMEM_ROUNDUP; 1144 pci_emul_membase32 = roundup2(pci_emul_membase32, 1145 BUSMEM_ROUNDUP); 1146 bi->memlimit32 = pci_emul_membase32; 1147 1148 pci_emul_membase64 += BUSMEM_ROUNDUP; 1149 pci_emul_membase64 = roundup2(pci_emul_membase64, 1150 BUSMEM_ROUNDUP); 1151 bi->memlimit64 = pci_emul_membase64; 1152 } 1153 1154 /* 1155 * PCI backends are initialized before routing INTx interrupts 1156 * so that LPC devices are able to reserve ISA IRQs before 1157 * routing PIRQ pins. 1158 */ 1159 for (bus = 0; bus < MAXBUSES; bus++) { 1160 if ((bi = pci_businfo[bus]) == NULL) 1161 continue; 1162 1163 for (slot = 0; slot < MAXSLOTS; slot++) { 1164 si = &bi->slotinfo[slot]; 1165 for (func = 0; func < MAXFUNCS; func++) { 1166 fi = &si->si_funcs[func]; 1167 if (fi->fi_devi == NULL) 1168 continue; 1169 pci_lintr_route(fi->fi_devi); 1170 } 1171 } 1172 } 1173 lpc_pirq_routed(); 1174 1175 /* 1176 * The guest physical memory map looks like the following: 1177 * [0, lowmem) guest system memory 1178 * [lowmem, lowmem_limit) memory hole (may be absent) 1179 * [lowmem_limit, 0xE0000000) PCI hole (32-bit BAR allocation) 1180 * [0xE0000000, 0xF0000000) PCI extended config window 1181 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1182 * [4GB, 4GB + highmem) 1183 */ 1184 1185 /* 1186 * Accesses to memory addresses that are not allocated to system 1187 * memory or PCI devices return 0xff's. 1188 */ 1189 lowmem = vm_get_lowmem_size(ctx); 1190 bzero(&mr, sizeof(struct mem_range)); 1191 mr.name = "PCI hole"; 1192 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1193 mr.base = lowmem; 1194 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1195 mr.handler = pci_emul_fallback_handler; 1196 error = register_mem_fallback(&mr); 1197 assert(error == 0); 1198 1199 /* PCI extended config space */ 1200 bzero(&mr, sizeof(struct mem_range)); 1201 mr.name = "PCI ECFG"; 1202 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1203 mr.base = PCI_EMUL_ECFG_BASE; 1204 mr.size = PCI_EMUL_ECFG_SIZE; 1205 mr.handler = pci_emul_ecfg_handler; 1206 error = register_mem(&mr); 1207 assert(error == 0); 1208 1209 return (0); 1210 } 1211 1212 static void 1213 pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1214 void *arg) 1215 { 1216 1217 dsdt_line(" Package ()"); 1218 dsdt_line(" {"); 1219 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1220 dsdt_line(" 0x%02X,", pin - 1); 1221 dsdt_line(" Zero,"); 1222 dsdt_line(" 0x%X", ioapic_irq); 1223 dsdt_line(" },"); 1224 } 1225 1226 static void 1227 pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1228 void *arg) 1229 { 1230 char *name; 1231 1232 name = lpc_pirq_name(pirq_pin); 1233 if (name == NULL) 1234 return; 1235 dsdt_line(" Package ()"); 1236 dsdt_line(" {"); 1237 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1238 dsdt_line(" 0x%02X,", pin - 1); 1239 dsdt_line(" %s,", name); 1240 dsdt_line(" 0x00"); 1241 dsdt_line(" },"); 1242 free(name); 1243 } 1244 1245 /* 1246 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1247 * corresponding to each PCI bus. 1248 */ 1249 static void 1250 pci_bus_write_dsdt(int bus) 1251 { 1252 struct businfo *bi; 1253 struct slotinfo *si; 1254 struct pci_devinst *pi; 1255 int count, func, slot; 1256 1257 /* 1258 * If there are no devices on this 'bus' then just return. 1259 */ 1260 if ((bi = pci_businfo[bus]) == NULL) { 1261 /* 1262 * Bus 0 is special because it decodes the I/O ports used 1263 * for PCI config space access even if there are no devices 1264 * on it. 1265 */ 1266 if (bus != 0) 1267 return; 1268 } 1269 1270 dsdt_line(" Device (PC%02X)", bus); 1271 dsdt_line(" {"); 1272 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1273 1274 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1275 dsdt_line(" {"); 1276 dsdt_line(" Return (0x%08X)", bus); 1277 dsdt_line(" }"); 1278 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1279 dsdt_line(" {"); 1280 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1281 "MaxFixed, PosDecode,"); 1282 dsdt_line(" 0x0000, // Granularity"); 1283 dsdt_line(" 0x%04X, // Range Minimum", bus); 1284 dsdt_line(" 0x%04X, // Range Maximum", bus); 1285 dsdt_line(" 0x0000, // Translation Offset"); 1286 dsdt_line(" 0x0001, // Length"); 1287 dsdt_line(" ,, )"); 1288 1289 if (bus == 0) { 1290 dsdt_indent(3); 1291 dsdt_fixed_ioport(0xCF8, 8); 1292 dsdt_unindent(3); 1293 1294 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1295 "PosDecode, EntireRange,"); 1296 dsdt_line(" 0x0000, // Granularity"); 1297 dsdt_line(" 0x0000, // Range Minimum"); 1298 dsdt_line(" 0x0CF7, // Range Maximum"); 1299 dsdt_line(" 0x0000, // Translation Offset"); 1300 dsdt_line(" 0x0CF8, // Length"); 1301 dsdt_line(" ,, , TypeStatic)"); 1302 1303 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1304 "PosDecode, EntireRange,"); 1305 dsdt_line(" 0x0000, // Granularity"); 1306 dsdt_line(" 0x0D00, // Range Minimum"); 1307 dsdt_line(" 0x%04X, // Range Maximum", 1308 PCI_EMUL_IOBASE - 1); 1309 dsdt_line(" 0x0000, // Translation Offset"); 1310 dsdt_line(" 0x%04X, // Length", 1311 PCI_EMUL_IOBASE - 0x0D00); 1312 dsdt_line(" ,, , TypeStatic)"); 1313 1314 if (bi == NULL) { 1315 dsdt_line(" })"); 1316 goto done; 1317 } 1318 } 1319 assert(bi != NULL); 1320 1321 /* i/o window */ 1322 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1323 "PosDecode, EntireRange,"); 1324 dsdt_line(" 0x0000, // Granularity"); 1325 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1326 dsdt_line(" 0x%04X, // Range Maximum", 1327 bi->iolimit - 1); 1328 dsdt_line(" 0x0000, // Translation Offset"); 1329 dsdt_line(" 0x%04X, // Length", 1330 bi->iolimit - bi->iobase); 1331 dsdt_line(" ,, , TypeStatic)"); 1332 1333 /* mmio window (32-bit) */ 1334 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1335 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1336 dsdt_line(" 0x00000000, // Granularity"); 1337 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1338 dsdt_line(" 0x%08X, // Range Maximum\n", 1339 bi->memlimit32 - 1); 1340 dsdt_line(" 0x00000000, // Translation Offset"); 1341 dsdt_line(" 0x%08X, // Length\n", 1342 bi->memlimit32 - bi->membase32); 1343 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1344 1345 /* mmio window (64-bit) */ 1346 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1347 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1348 dsdt_line(" 0x0000000000000000, // Granularity"); 1349 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1350 dsdt_line(" 0x%016lX, // Range Maximum\n", 1351 bi->memlimit64 - 1); 1352 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1353 dsdt_line(" 0x%016lX, // Length\n", 1354 bi->memlimit64 - bi->membase64); 1355 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1356 dsdt_line(" })"); 1357 1358 count = pci_count_lintr(bus); 1359 if (count != 0) { 1360 dsdt_indent(2); 1361 dsdt_line("Name (PPRT, Package ()"); 1362 dsdt_line("{"); 1363 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1364 dsdt_line("})"); 1365 dsdt_line("Name (APRT, Package ()"); 1366 dsdt_line("{"); 1367 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1368 dsdt_line("})"); 1369 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1370 dsdt_line("{"); 1371 dsdt_line(" If (PICM)"); 1372 dsdt_line(" {"); 1373 dsdt_line(" Return (APRT)"); 1374 dsdt_line(" }"); 1375 dsdt_line(" Else"); 1376 dsdt_line(" {"); 1377 dsdt_line(" Return (PPRT)"); 1378 dsdt_line(" }"); 1379 dsdt_line("}"); 1380 dsdt_unindent(2); 1381 } 1382 1383 dsdt_indent(2); 1384 for (slot = 0; slot < MAXSLOTS; slot++) { 1385 si = &bi->slotinfo[slot]; 1386 for (func = 0; func < MAXFUNCS; func++) { 1387 pi = si->si_funcs[func].fi_devi; 1388 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1389 pi->pi_d->pe_write_dsdt(pi); 1390 } 1391 } 1392 dsdt_unindent(2); 1393 done: 1394 dsdt_line(" }"); 1395 } 1396 1397 void 1398 pci_write_dsdt(void) 1399 { 1400 int bus; 1401 1402 dsdt_indent(1); 1403 dsdt_line("Name (PICM, 0x00)"); 1404 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1405 dsdt_line("{"); 1406 dsdt_line(" Store (Arg0, PICM)"); 1407 dsdt_line("}"); 1408 dsdt_line(""); 1409 dsdt_line("Scope (_SB)"); 1410 dsdt_line("{"); 1411 for (bus = 0; bus < MAXBUSES; bus++) 1412 pci_bus_write_dsdt(bus); 1413 dsdt_line("}"); 1414 dsdt_unindent(1); 1415 } 1416 1417 int 1418 pci_bus_configured(int bus) 1419 { 1420 assert(bus >= 0 && bus < MAXBUSES); 1421 return (pci_businfo[bus] != NULL); 1422 } 1423 1424 int 1425 pci_msi_enabled(struct pci_devinst *pi) 1426 { 1427 return (pi->pi_msi.enabled); 1428 } 1429 1430 int 1431 pci_msi_maxmsgnum(struct pci_devinst *pi) 1432 { 1433 if (pi->pi_msi.enabled) 1434 return (pi->pi_msi.maxmsgnum); 1435 else 1436 return (0); 1437 } 1438 1439 int 1440 pci_msix_enabled(struct pci_devinst *pi) 1441 { 1442 1443 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1444 } 1445 1446 void 1447 pci_generate_msix(struct pci_devinst *pi, int index) 1448 { 1449 struct msix_table_entry *mte; 1450 1451 if (!pci_msix_enabled(pi)) 1452 return; 1453 1454 if (pi->pi_msix.function_mask) 1455 return; 1456 1457 if (index >= pi->pi_msix.table_count) 1458 return; 1459 1460 mte = &pi->pi_msix.table[index]; 1461 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1462 /* XXX Set PBA bit if interrupt is disabled */ 1463 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1464 } 1465 } 1466 1467 void 1468 pci_generate_msi(struct pci_devinst *pi, int index) 1469 { 1470 1471 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1472 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1473 pi->pi_msi.msg_data + index); 1474 } 1475 } 1476 1477 static bool 1478 pci_lintr_permitted(struct pci_devinst *pi) 1479 { 1480 uint16_t cmd; 1481 1482 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1483 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1484 (cmd & PCIM_CMD_INTxDIS))); 1485 } 1486 1487 void 1488 pci_lintr_request(struct pci_devinst *pi) 1489 { 1490 struct businfo *bi; 1491 struct slotinfo *si; 1492 int bestpin, bestcount, pin; 1493 1494 bi = pci_businfo[pi->pi_bus]; 1495 assert(bi != NULL); 1496 1497 /* 1498 * Just allocate a pin from our slot. The pin will be 1499 * assigned IRQs later when interrupts are routed. 1500 */ 1501 si = &bi->slotinfo[pi->pi_slot]; 1502 bestpin = 0; 1503 bestcount = si->si_intpins[0].ii_count; 1504 for (pin = 1; pin < 4; pin++) { 1505 if (si->si_intpins[pin].ii_count < bestcount) { 1506 bestpin = pin; 1507 bestcount = si->si_intpins[pin].ii_count; 1508 } 1509 } 1510 1511 si->si_intpins[bestpin].ii_count++; 1512 pi->pi_lintr.pin = bestpin + 1; 1513 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1514 } 1515 1516 static void 1517 pci_lintr_route(struct pci_devinst *pi) 1518 { 1519 struct businfo *bi; 1520 struct intxinfo *ii; 1521 1522 if (pi->pi_lintr.pin == 0) 1523 return; 1524 1525 bi = pci_businfo[pi->pi_bus]; 1526 assert(bi != NULL); 1527 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1528 1529 /* 1530 * Attempt to allocate an I/O APIC pin for this intpin if one 1531 * is not yet assigned. 1532 */ 1533 if (ii->ii_ioapic_irq == 0) 1534 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1535 assert(ii->ii_ioapic_irq > 0); 1536 1537 /* 1538 * Attempt to allocate a PIRQ pin for this intpin if one is 1539 * not yet assigned. 1540 */ 1541 if (ii->ii_pirq_pin == 0) 1542 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1543 assert(ii->ii_pirq_pin > 0); 1544 1545 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1546 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1547 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1548 } 1549 1550 void 1551 pci_lintr_assert(struct pci_devinst *pi) 1552 { 1553 1554 assert(pi->pi_lintr.pin > 0); 1555 1556 pthread_mutex_lock(&pi->pi_lintr.lock); 1557 if (pi->pi_lintr.state == IDLE) { 1558 if (pci_lintr_permitted(pi)) { 1559 pi->pi_lintr.state = ASSERTED; 1560 pci_irq_assert(pi); 1561 } else 1562 pi->pi_lintr.state = PENDING; 1563 } 1564 pthread_mutex_unlock(&pi->pi_lintr.lock); 1565 } 1566 1567 void 1568 pci_lintr_deassert(struct pci_devinst *pi) 1569 { 1570 1571 assert(pi->pi_lintr.pin > 0); 1572 1573 pthread_mutex_lock(&pi->pi_lintr.lock); 1574 if (pi->pi_lintr.state == ASSERTED) { 1575 pi->pi_lintr.state = IDLE; 1576 pci_irq_deassert(pi); 1577 } else if (pi->pi_lintr.state == PENDING) 1578 pi->pi_lintr.state = IDLE; 1579 pthread_mutex_unlock(&pi->pi_lintr.lock); 1580 } 1581 1582 static void 1583 pci_lintr_update(struct pci_devinst *pi) 1584 { 1585 1586 pthread_mutex_lock(&pi->pi_lintr.lock); 1587 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1588 pci_irq_deassert(pi); 1589 pi->pi_lintr.state = PENDING; 1590 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1591 pi->pi_lintr.state = ASSERTED; 1592 pci_irq_assert(pi); 1593 } 1594 pthread_mutex_unlock(&pi->pi_lintr.lock); 1595 } 1596 1597 int 1598 pci_count_lintr(int bus) 1599 { 1600 int count, slot, pin; 1601 struct slotinfo *slotinfo; 1602 1603 count = 0; 1604 if (pci_businfo[bus] != NULL) { 1605 for (slot = 0; slot < MAXSLOTS; slot++) { 1606 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1607 for (pin = 0; pin < 4; pin++) { 1608 if (slotinfo->si_intpins[pin].ii_count != 0) 1609 count++; 1610 } 1611 } 1612 } 1613 return (count); 1614 } 1615 1616 void 1617 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1618 { 1619 struct businfo *bi; 1620 struct slotinfo *si; 1621 struct intxinfo *ii; 1622 int slot, pin; 1623 1624 if ((bi = pci_businfo[bus]) == NULL) 1625 return; 1626 1627 for (slot = 0; slot < MAXSLOTS; slot++) { 1628 si = &bi->slotinfo[slot]; 1629 for (pin = 0; pin < 4; pin++) { 1630 ii = &si->si_intpins[pin]; 1631 if (ii->ii_count != 0) 1632 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1633 ii->ii_ioapic_irq, arg); 1634 } 1635 } 1636 } 1637 1638 /* 1639 * Return 1 if the emulated device in 'slot' is a multi-function device. 1640 * Return 0 otherwise. 1641 */ 1642 static int 1643 pci_emul_is_mfdev(int bus, int slot) 1644 { 1645 struct businfo *bi; 1646 struct slotinfo *si; 1647 int f, numfuncs; 1648 1649 numfuncs = 0; 1650 if ((bi = pci_businfo[bus]) != NULL) { 1651 si = &bi->slotinfo[slot]; 1652 for (f = 0; f < MAXFUNCS; f++) { 1653 if (si->si_funcs[f].fi_devi != NULL) { 1654 numfuncs++; 1655 } 1656 } 1657 } 1658 return (numfuncs > 1); 1659 } 1660 1661 /* 1662 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1663 * whether or not is a multi-function being emulated in the pci 'slot'. 1664 */ 1665 static void 1666 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1667 { 1668 int mfdev; 1669 1670 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1671 mfdev = pci_emul_is_mfdev(bus, slot); 1672 switch (bytes) { 1673 case 1: 1674 case 2: 1675 *rv &= ~PCIM_MFDEV; 1676 if (mfdev) { 1677 *rv |= PCIM_MFDEV; 1678 } 1679 break; 1680 case 4: 1681 *rv &= ~(PCIM_MFDEV << 16); 1682 if (mfdev) { 1683 *rv |= (PCIM_MFDEV << 16); 1684 } 1685 break; 1686 } 1687 } 1688 } 1689 1690 /* 1691 * Update device state in response to changes to the PCI command 1692 * register. 1693 */ 1694 void 1695 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 1696 { 1697 int i; 1698 uint16_t changed, new; 1699 1700 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 1701 changed = old ^ new; 1702 1703 /* 1704 * If the MMIO or I/O address space decoding has changed then 1705 * register/unregister all BARs that decode that address space. 1706 */ 1707 for (i = 0; i <= PCI_BARMAX; i++) { 1708 switch (pi->pi_bar[i].type) { 1709 case PCIBAR_NONE: 1710 case PCIBAR_MEMHI64: 1711 break; 1712 case PCIBAR_IO: 1713 /* I/O address space decoding changed? */ 1714 if (changed & PCIM_CMD_PORTEN) { 1715 if (new & PCIM_CMD_PORTEN) 1716 register_bar(pi, i); 1717 else 1718 unregister_bar(pi, i); 1719 } 1720 break; 1721 case PCIBAR_MEM32: 1722 case PCIBAR_MEM64: 1723 /* MMIO address space decoding changed? */ 1724 if (changed & PCIM_CMD_MEMEN) { 1725 if (new & PCIM_CMD_MEMEN) 1726 register_bar(pi, i); 1727 else 1728 unregister_bar(pi, i); 1729 } 1730 break; 1731 default: 1732 assert(0); 1733 } 1734 } 1735 1736 /* 1737 * If INTx has been unmasked and is pending, assert the 1738 * interrupt. 1739 */ 1740 pci_lintr_update(pi); 1741 } 1742 1743 static void 1744 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 1745 { 1746 int rshift; 1747 uint32_t cmd, old, readonly; 1748 1749 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1750 1751 /* 1752 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 1753 * 1754 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 1755 * 'write 1 to clear'. However these bits are not set to '1' by 1756 * any device emulation so it is simpler to treat them as readonly. 1757 */ 1758 rshift = (coff & 0x3) * 8; 1759 readonly = 0xFFFFF880 >> rshift; 1760 1761 old = CFGREAD(pi, coff, bytes); 1762 new &= ~readonly; 1763 new |= (old & readonly); 1764 CFGWRITE(pi, coff, new, bytes); /* update config */ 1765 1766 pci_emul_cmd_changed(pi, cmd); 1767 } 1768 1769 static void 1770 pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, 1771 int coff, int bytes, uint32_t *eax) 1772 { 1773 struct businfo *bi; 1774 struct slotinfo *si; 1775 struct pci_devinst *pi; 1776 struct pci_devemu *pe; 1777 int idx, needcfg; 1778 uint64_t addr, bar, mask; 1779 1780 if ((bi = pci_businfo[bus]) != NULL) { 1781 si = &bi->slotinfo[slot]; 1782 pi = si->si_funcs[func].fi_devi; 1783 } else 1784 pi = NULL; 1785 1786 /* 1787 * Just return if there is no device at this slot:func or if the 1788 * the guest is doing an un-aligned access. 1789 */ 1790 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 1791 (coff & (bytes - 1)) != 0) { 1792 if (in) 1793 *eax = 0xffffffff; 1794 return; 1795 } 1796 1797 /* 1798 * Ignore all writes beyond the standard config space and return all 1799 * ones on reads. 1800 */ 1801 if (coff >= PCI_REGMAX + 1) { 1802 if (in) { 1803 *eax = 0xffffffff; 1804 /* 1805 * Extended capabilities begin at offset 256 in config 1806 * space. Absence of extended capabilities is signaled 1807 * with all 0s in the extended capability header at 1808 * offset 256. 1809 */ 1810 if (coff <= PCI_REGMAX + 4) 1811 *eax = 0x00000000; 1812 } 1813 return; 1814 } 1815 1816 pe = pi->pi_d; 1817 1818 /* 1819 * Config read 1820 */ 1821 if (in) { 1822 /* Let the device emulation override the default handler */ 1823 if (pe->pe_cfgread != NULL) { 1824 needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, 1825 eax); 1826 } else { 1827 needcfg = 1; 1828 } 1829 1830 if (needcfg) 1831 *eax = CFGREAD(pi, coff, bytes); 1832 1833 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); 1834 } else { 1835 /* Let the device emulation override the default handler */ 1836 if (pe->pe_cfgwrite != NULL && 1837 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1838 return; 1839 1840 /* 1841 * Special handling for write to BAR registers 1842 */ 1843 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1844 /* 1845 * Ignore writes to BAR registers that are not 1846 * 4-byte aligned. 1847 */ 1848 if (bytes != 4 || (coff & 0x3) != 0) 1849 return; 1850 idx = (coff - PCIR_BAR(0)) / 4; 1851 mask = ~(pi->pi_bar[idx].size - 1); 1852 switch (pi->pi_bar[idx].type) { 1853 case PCIBAR_NONE: 1854 pi->pi_bar[idx].addr = bar = 0; 1855 break; 1856 case PCIBAR_IO: 1857 addr = *eax & mask; 1858 addr &= 0xffff; 1859 bar = addr | PCIM_BAR_IO_SPACE; 1860 /* 1861 * Register the new BAR value for interception 1862 */ 1863 if (addr != pi->pi_bar[idx].addr) { 1864 update_bar_address(pi, addr, idx, 1865 PCIBAR_IO); 1866 } 1867 break; 1868 case PCIBAR_MEM32: 1869 addr = bar = *eax & mask; 1870 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 1871 if (addr != pi->pi_bar[idx].addr) { 1872 update_bar_address(pi, addr, idx, 1873 PCIBAR_MEM32); 1874 } 1875 break; 1876 case PCIBAR_MEM64: 1877 addr = bar = *eax & mask; 1878 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 1879 PCIM_BAR_MEM_PREFETCH; 1880 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1881 update_bar_address(pi, addr, idx, 1882 PCIBAR_MEM64); 1883 } 1884 break; 1885 case PCIBAR_MEMHI64: 1886 mask = ~(pi->pi_bar[idx - 1].size - 1); 1887 addr = ((uint64_t)*eax << 32) & mask; 1888 bar = addr >> 32; 1889 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1890 update_bar_address(pi, addr, idx - 1, 1891 PCIBAR_MEMHI64); 1892 } 1893 break; 1894 default: 1895 assert(0); 1896 } 1897 pci_set_cfgdata32(pi, coff, bar); 1898 1899 } else if (pci_emul_iscap(pi, coff)) { 1900 pci_emul_capwrite(pi, coff, bytes, *eax); 1901 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 1902 pci_emul_cmdsts_write(pi, coff, *eax, bytes); 1903 } else { 1904 CFGWRITE(pi, coff, *eax, bytes); 1905 } 1906 } 1907 } 1908 1909 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 1910 1911 static int 1912 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1913 uint32_t *eax, void *arg) 1914 { 1915 uint32_t x; 1916 1917 if (bytes != 4) { 1918 if (in) 1919 *eax = (bytes == 2) ? 0xffff : 0xff; 1920 return (0); 1921 } 1922 1923 if (in) { 1924 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 1925 if (cfgenable) 1926 x |= CONF1_ENABLE; 1927 *eax = x; 1928 } else { 1929 x = *eax; 1930 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 1931 cfgoff = x & PCI_REGMAX; 1932 cfgfunc = (x >> 8) & PCI_FUNCMAX; 1933 cfgslot = (x >> 11) & PCI_SLOTMAX; 1934 cfgbus = (x >> 16) & PCI_BUSMAX; 1935 } 1936 1937 return (0); 1938 } 1939 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 1940 1941 static int 1942 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1943 uint32_t *eax, void *arg) 1944 { 1945 int coff; 1946 1947 assert(bytes == 1 || bytes == 2 || bytes == 4); 1948 1949 coff = cfgoff + (port - CONF1_DATA_PORT); 1950 if (cfgenable) { 1951 pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, 1952 eax); 1953 } else { 1954 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 1955 if (in) 1956 *eax = 0xffffffff; 1957 } 1958 return (0); 1959 } 1960 1961 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 1962 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 1963 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 1964 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 1965 1966 #ifdef BHYVE_SNAPSHOT 1967 /* 1968 * Saves/restores PCI device emulated state. Returns 0 on success. 1969 */ 1970 static int 1971 pci_snapshot_pci_dev(struct vm_snapshot_meta *meta) 1972 { 1973 struct pci_devinst *pi; 1974 int i; 1975 int ret; 1976 1977 pi = meta->dev_data; 1978 1979 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done); 1980 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done); 1981 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done); 1982 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done); 1983 1984 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done); 1985 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done); 1986 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done); 1987 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done); 1988 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done); 1989 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done); 1990 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done); 1991 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done); 1992 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_page_offset, meta, ret, done); 1993 1994 SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata), 1995 meta, ret, done); 1996 1997 for (i = 0; i < nitems(pi->pi_bar); i++) { 1998 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done); 1999 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done); 2000 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done); 2001 } 2002 2003 /* Restore MSI-X table. */ 2004 for (i = 0; i < pi->pi_msix.table_count; i++) { 2005 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr, 2006 meta, ret, done); 2007 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data, 2008 meta, ret, done); 2009 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control, 2010 meta, ret, done); 2011 } 2012 2013 done: 2014 return (ret); 2015 } 2016 2017 static int 2018 pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde, 2019 struct pci_devinst **pdi) 2020 { 2021 struct businfo *bi; 2022 struct slotinfo *si; 2023 struct funcinfo *fi; 2024 int bus, slot, func; 2025 2026 assert(dev_name != NULL); 2027 assert(pde != NULL); 2028 assert(pdi != NULL); 2029 2030 for (bus = 0; bus < MAXBUSES; bus++) { 2031 if ((bi = pci_businfo[bus]) == NULL) 2032 continue; 2033 2034 for (slot = 0; slot < MAXSLOTS; slot++) { 2035 si = &bi->slotinfo[slot]; 2036 for (func = 0; func < MAXFUNCS; func++) { 2037 fi = &si->si_funcs[func]; 2038 if (fi->fi_name == NULL) 2039 continue; 2040 if (strcmp(dev_name, fi->fi_name)) 2041 continue; 2042 2043 *pde = pci_emul_finddev(fi->fi_name); 2044 assert(*pde != NULL); 2045 2046 *pdi = fi->fi_devi; 2047 return (0); 2048 } 2049 } 2050 } 2051 2052 return (EINVAL); 2053 } 2054 2055 int 2056 pci_snapshot(struct vm_snapshot_meta *meta) 2057 { 2058 struct pci_devemu *pde; 2059 struct pci_devinst *pdi; 2060 int ret; 2061 2062 assert(meta->dev_name != NULL); 2063 2064 ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi); 2065 if (ret != 0) { 2066 fprintf(stderr, "%s: no such name: %s\r\n", 2067 __func__, meta->dev_name); 2068 memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); 2069 return (0); 2070 } 2071 2072 meta->dev_data = pdi; 2073 2074 if (pde->pe_snapshot == NULL) { 2075 fprintf(stderr, "%s: not implemented yet for: %s\r\n", 2076 __func__, meta->dev_name); 2077 return (-1); 2078 } 2079 2080 ret = pci_snapshot_pci_dev(meta); 2081 if (ret != 0) { 2082 fprintf(stderr, "%s: failed to snapshot pci dev\r\n", 2083 __func__); 2084 return (-1); 2085 } 2086 2087 ret = (*pde->pe_snapshot)(meta); 2088 2089 return (ret); 2090 } 2091 2092 int 2093 pci_pause(struct vmctx *ctx, const char *dev_name) 2094 { 2095 struct pci_devemu *pde; 2096 struct pci_devinst *pdi; 2097 int ret; 2098 2099 assert(dev_name != NULL); 2100 2101 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2102 if (ret != 0) { 2103 /* 2104 * It is possible to call this function without 2105 * checking that the device is inserted first. 2106 */ 2107 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2108 return (0); 2109 } 2110 2111 if (pde->pe_pause == NULL) { 2112 /* The pause/resume functionality is optional. */ 2113 fprintf(stderr, "%s: not implemented for: %s\n", 2114 __func__, dev_name); 2115 return (0); 2116 } 2117 2118 return (*pde->pe_pause)(ctx, pdi); 2119 } 2120 2121 int 2122 pci_resume(struct vmctx *ctx, const char *dev_name) 2123 { 2124 struct pci_devemu *pde; 2125 struct pci_devinst *pdi; 2126 int ret; 2127 2128 assert(dev_name != NULL); 2129 2130 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2131 if (ret != 0) { 2132 /* 2133 * It is possible to call this function without 2134 * checking that the device is inserted first. 2135 */ 2136 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2137 return (0); 2138 } 2139 2140 if (pde->pe_resume == NULL) { 2141 /* The pause/resume functionality is optional. */ 2142 fprintf(stderr, "%s: not implemented for: %s\n", 2143 __func__, dev_name); 2144 return (0); 2145 } 2146 2147 return (*pde->pe_resume)(ctx, pdi); 2148 } 2149 #endif 2150 2151 #define PCI_EMUL_TEST 2152 #ifdef PCI_EMUL_TEST 2153 /* 2154 * Define a dummy test device 2155 */ 2156 #define DIOSZ 8 2157 #define DMEMSZ 4096 2158 struct pci_emul_dsoftc { 2159 uint8_t ioregs[DIOSZ]; 2160 uint8_t memregs[2][DMEMSZ]; 2161 }; 2162 2163 #define PCI_EMUL_MSI_MSGS 4 2164 #define PCI_EMUL_MSIX_MSGS 16 2165 2166 static int 2167 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 2168 { 2169 int error; 2170 struct pci_emul_dsoftc *sc; 2171 2172 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2173 2174 pi->pi_arg = sc; 2175 2176 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2177 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2178 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2179 2180 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2181 assert(error == 0); 2182 2183 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2184 assert(error == 0); 2185 2186 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2187 assert(error == 0); 2188 2189 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2190 assert(error == 0); 2191 2192 return (0); 2193 } 2194 2195 static void 2196 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2197 uint64_t offset, int size, uint64_t value) 2198 { 2199 int i; 2200 struct pci_emul_dsoftc *sc = pi->pi_arg; 2201 2202 if (baridx == 0) { 2203 if (offset + size > DIOSZ) { 2204 printf("diow: iow too large, offset %ld size %d\n", 2205 offset, size); 2206 return; 2207 } 2208 2209 if (size == 1) { 2210 sc->ioregs[offset] = value & 0xff; 2211 } else if (size == 2) { 2212 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2213 } else if (size == 4) { 2214 *(uint32_t *)&sc->ioregs[offset] = value; 2215 } else { 2216 printf("diow: iow unknown size %d\n", size); 2217 } 2218 2219 /* 2220 * Special magic value to generate an interrupt 2221 */ 2222 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2223 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2224 2225 if (value == 0xabcdef) { 2226 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2227 pci_generate_msi(pi, i); 2228 } 2229 } 2230 2231 if (baridx == 1 || baridx == 2) { 2232 if (offset + size > DMEMSZ) { 2233 printf("diow: memw too large, offset %ld size %d\n", 2234 offset, size); 2235 return; 2236 } 2237 2238 i = baridx - 1; /* 'memregs' index */ 2239 2240 if (size == 1) { 2241 sc->memregs[i][offset] = value; 2242 } else if (size == 2) { 2243 *(uint16_t *)&sc->memregs[i][offset] = value; 2244 } else if (size == 4) { 2245 *(uint32_t *)&sc->memregs[i][offset] = value; 2246 } else if (size == 8) { 2247 *(uint64_t *)&sc->memregs[i][offset] = value; 2248 } else { 2249 printf("diow: memw unknown size %d\n", size); 2250 } 2251 2252 /* 2253 * magic interrupt ?? 2254 */ 2255 } 2256 2257 if (baridx > 2 || baridx < 0) { 2258 printf("diow: unknown bar idx %d\n", baridx); 2259 } 2260 } 2261 2262 static uint64_t 2263 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2264 uint64_t offset, int size) 2265 { 2266 struct pci_emul_dsoftc *sc = pi->pi_arg; 2267 uint32_t value; 2268 int i; 2269 2270 if (baridx == 0) { 2271 if (offset + size > DIOSZ) { 2272 printf("dior: ior too large, offset %ld size %d\n", 2273 offset, size); 2274 return (0); 2275 } 2276 2277 value = 0; 2278 if (size == 1) { 2279 value = sc->ioregs[offset]; 2280 } else if (size == 2) { 2281 value = *(uint16_t *) &sc->ioregs[offset]; 2282 } else if (size == 4) { 2283 value = *(uint32_t *) &sc->ioregs[offset]; 2284 } else { 2285 printf("dior: ior unknown size %d\n", size); 2286 } 2287 } 2288 2289 if (baridx == 1 || baridx == 2) { 2290 if (offset + size > DMEMSZ) { 2291 printf("dior: memr too large, offset %ld size %d\n", 2292 offset, size); 2293 return (0); 2294 } 2295 2296 i = baridx - 1; /* 'memregs' index */ 2297 2298 if (size == 1) { 2299 value = sc->memregs[i][offset]; 2300 } else if (size == 2) { 2301 value = *(uint16_t *) &sc->memregs[i][offset]; 2302 } else if (size == 4) { 2303 value = *(uint32_t *) &sc->memregs[i][offset]; 2304 } else if (size == 8) { 2305 value = *(uint64_t *) &sc->memregs[i][offset]; 2306 } else { 2307 printf("dior: ior unknown size %d\n", size); 2308 } 2309 } 2310 2311 2312 if (baridx > 2 || baridx < 0) { 2313 printf("dior: unknown bar idx %d\n", baridx); 2314 return (0); 2315 } 2316 2317 return (value); 2318 } 2319 2320 #ifdef BHYVE_SNAPSHOT 2321 int 2322 pci_emul_snapshot(struct vm_snapshot_meta *meta) 2323 { 2324 2325 return (0); 2326 } 2327 #endif 2328 2329 struct pci_devemu pci_dummy = { 2330 .pe_emu = "dummy", 2331 .pe_init = pci_emul_dinit, 2332 .pe_barwrite = pci_emul_diow, 2333 .pe_barread = pci_emul_dior, 2334 #ifdef BHYVE_SNAPSHOT 2335 .pe_snapshot = pci_emul_snapshot, 2336 #endif 2337 }; 2338 PCI_EMUL_SET(pci_dummy); 2339 2340 #endif /* PCI_EMUL_TEST */ 2341