1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker_set.h> 36 #include <vm/vm.h> 37 #include <vm/vm_param.h> 38 #include <vm/pmap.h> 39 40 #include <ctype.h> 41 #include <errno.h> 42 #include <pthread.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <strings.h> 47 #include <assert.h> 48 #include <stdbool.h> 49 50 #include <machine/vmm.h> 51 #include <machine/vmm_snapshot.h> 52 #include <machine/cpufunc.h> 53 #include <machine/specialreg.h> 54 #include <vmmapi.h> 55 56 #include "acpi.h" 57 #include "bhyverun.h" 58 #include "debug.h" 59 #include "inout.h" 60 #include "ioapic.h" 61 #include "mem.h" 62 #include "pci_emul.h" 63 #include "pci_irq.h" 64 #include "pci_lpc.h" 65 66 #define CONF1_ADDR_PORT 0x0cf8 67 #define CONF1_DATA_PORT 0x0cfc 68 69 #define CONF1_ENABLE 0x80000000ul 70 71 #define MAXBUSES (PCI_BUSMAX + 1) 72 #define MAXSLOTS (PCI_SLOTMAX + 1) 73 #define MAXFUNCS (PCI_FUNCMAX + 1) 74 75 struct funcinfo { 76 char *fi_name; 77 char *fi_param; 78 struct pci_devinst *fi_devi; 79 }; 80 81 struct intxinfo { 82 int ii_count; 83 int ii_pirq_pin; 84 int ii_ioapic_irq; 85 }; 86 87 struct slotinfo { 88 struct intxinfo si_intpins[4]; 89 struct funcinfo si_funcs[MAXFUNCS]; 90 }; 91 92 struct businfo { 93 uint16_t iobase, iolimit; /* I/O window */ 94 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 95 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 96 struct slotinfo slotinfo[MAXSLOTS]; 97 }; 98 99 static struct businfo *pci_businfo[MAXBUSES]; 100 101 SET_DECLARE(pci_devemu_set, struct pci_devemu); 102 103 static uint64_t pci_emul_iobase; 104 static uint64_t pci_emul_membase32; 105 static uint64_t pci_emul_membase64; 106 static uint64_t pci_emul_memlim64; 107 108 #define PCI_EMUL_IOBASE 0x2000 109 #define PCI_EMUL_IOLIMIT 0x10000 110 111 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ 112 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ 113 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); 114 115 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE 116 117 static struct pci_devemu *pci_emul_finddev(char *name); 118 static void pci_lintr_route(struct pci_devinst *pi); 119 static void pci_lintr_update(struct pci_devinst *pi); 120 static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, 121 int func, int coff, int bytes, uint32_t *val); 122 123 static __inline void 124 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) 125 { 126 127 if (bytes == 1) 128 pci_set_cfgdata8(pi, coff, val); 129 else if (bytes == 2) 130 pci_set_cfgdata16(pi, coff, val); 131 else 132 pci_set_cfgdata32(pi, coff, val); 133 } 134 135 static __inline uint32_t 136 CFGREAD(struct pci_devinst *pi, int coff, int bytes) 137 { 138 139 if (bytes == 1) 140 return (pci_get_cfgdata8(pi, coff)); 141 else if (bytes == 2) 142 return (pci_get_cfgdata16(pi, coff)); 143 else 144 return (pci_get_cfgdata32(pi, coff)); 145 } 146 147 /* 148 * I/O access 149 */ 150 151 /* 152 * Slot options are in the form: 153 * 154 * <bus>:<slot>:<func>,<emul>[,<config>] 155 * <slot>[:<func>],<emul>[,<config>] 156 * 157 * slot is 0..31 158 * func is 0..7 159 * emul is a string describing the type of PCI device e.g. virtio-net 160 * config is an optional string, depending on the device, that can be 161 * used for configuration. 162 * Examples are: 163 * 1,virtio-net,tap0 164 * 3:0,dummy 165 */ 166 static void 167 pci_parse_slot_usage(char *aopt) 168 { 169 170 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); 171 } 172 173 int 174 pci_parse_slot(char *opt) 175 { 176 struct businfo *bi; 177 struct slotinfo *si; 178 char *emul, *config, *str, *cp; 179 int error, bnum, snum, fnum; 180 181 error = -1; 182 str = strdup(opt); 183 184 emul = config = NULL; 185 if ((cp = strchr(str, ',')) != NULL) { 186 *cp = '\0'; 187 emul = cp + 1; 188 if ((cp = strchr(emul, ',')) != NULL) { 189 *cp = '\0'; 190 config = cp + 1; 191 } 192 } else { 193 pci_parse_slot_usage(opt); 194 goto done; 195 } 196 197 /* <bus>:<slot>:<func> */ 198 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 199 bnum = 0; 200 /* <slot>:<func> */ 201 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 202 fnum = 0; 203 /* <slot> */ 204 if (sscanf(str, "%d", &snum) != 1) { 205 snum = -1; 206 } 207 } 208 } 209 210 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 211 fnum < 0 || fnum >= MAXFUNCS) { 212 pci_parse_slot_usage(opt); 213 goto done; 214 } 215 216 if (pci_businfo[bnum] == NULL) 217 pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); 218 219 bi = pci_businfo[bnum]; 220 si = &bi->slotinfo[snum]; 221 222 if (si->si_funcs[fnum].fi_name != NULL) { 223 EPRINTLN("pci slot %d:%d already occupied!", 224 snum, fnum); 225 goto done; 226 } 227 228 if (pci_emul_finddev(emul) == NULL) { 229 EPRINTLN("pci slot %d:%d: unknown device \"%s\"", 230 snum, fnum, emul); 231 goto done; 232 } 233 234 error = 0; 235 si->si_funcs[fnum].fi_name = emul; 236 si->si_funcs[fnum].fi_param = config; 237 238 done: 239 if (error) 240 free(str); 241 242 return (error); 243 } 244 245 void 246 pci_print_supported_devices() 247 { 248 struct pci_devemu **pdpp, *pdp; 249 250 SET_FOREACH(pdpp, pci_devemu_set) { 251 pdp = *pdpp; 252 printf("%s\n", pdp->pe_emu); 253 } 254 } 255 256 static int 257 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 258 { 259 260 if (offset < pi->pi_msix.pba_offset) 261 return (0); 262 263 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 264 return (0); 265 } 266 267 return (1); 268 } 269 270 int 271 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 272 uint64_t value) 273 { 274 int msix_entry_offset; 275 int tab_index; 276 char *dest; 277 278 /* support only 4 or 8 byte writes */ 279 if (size != 4 && size != 8) 280 return (-1); 281 282 /* 283 * Return if table index is beyond what device supports 284 */ 285 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 286 if (tab_index >= pi->pi_msix.table_count) 287 return (-1); 288 289 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 290 291 /* support only aligned writes */ 292 if ((msix_entry_offset % size) != 0) 293 return (-1); 294 295 dest = (char *)(pi->pi_msix.table + tab_index); 296 dest += msix_entry_offset; 297 298 if (size == 4) 299 *((uint32_t *)dest) = value; 300 else 301 *((uint64_t *)dest) = value; 302 303 return (0); 304 } 305 306 uint64_t 307 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 308 { 309 char *dest; 310 int msix_entry_offset; 311 int tab_index; 312 uint64_t retval = ~0; 313 314 /* 315 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 316 * table but we also allow 1 byte access to accommodate reads from 317 * ddb. 318 */ 319 if (size != 1 && size != 4 && size != 8) 320 return (retval); 321 322 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 323 324 /* support only aligned reads */ 325 if ((msix_entry_offset % size) != 0) { 326 return (retval); 327 } 328 329 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 330 331 if (tab_index < pi->pi_msix.table_count) { 332 /* valid MSI-X Table access */ 333 dest = (char *)(pi->pi_msix.table + tab_index); 334 dest += msix_entry_offset; 335 336 if (size == 1) 337 retval = *((uint8_t *)dest); 338 else if (size == 4) 339 retval = *((uint32_t *)dest); 340 else 341 retval = *((uint64_t *)dest); 342 } else if (pci_valid_pba_offset(pi, offset)) { 343 /* return 0 for PBA access */ 344 retval = 0; 345 } 346 347 return (retval); 348 } 349 350 int 351 pci_msix_table_bar(struct pci_devinst *pi) 352 { 353 354 if (pi->pi_msix.table != NULL) 355 return (pi->pi_msix.table_bar); 356 else 357 return (-1); 358 } 359 360 int 361 pci_msix_pba_bar(struct pci_devinst *pi) 362 { 363 364 if (pi->pi_msix.table != NULL) 365 return (pi->pi_msix.pba_bar); 366 else 367 return (-1); 368 } 369 370 static int 371 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 372 uint32_t *eax, void *arg) 373 { 374 struct pci_devinst *pdi = arg; 375 struct pci_devemu *pe = pdi->pi_d; 376 uint64_t offset; 377 int i; 378 379 for (i = 0; i <= PCI_BARMAX; i++) { 380 if (pdi->pi_bar[i].type == PCIBAR_IO && 381 port >= pdi->pi_bar[i].addr && 382 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 383 offset = port - pdi->pi_bar[i].addr; 384 if (in) 385 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 386 offset, bytes); 387 else 388 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 389 bytes, *eax); 390 return (0); 391 } 392 } 393 return (-1); 394 } 395 396 static int 397 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 398 int size, uint64_t *val, void *arg1, long arg2) 399 { 400 struct pci_devinst *pdi = arg1; 401 struct pci_devemu *pe = pdi->pi_d; 402 uint64_t offset; 403 int bidx = (int) arg2; 404 405 assert(bidx <= PCI_BARMAX); 406 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 407 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 408 assert(addr >= pdi->pi_bar[bidx].addr && 409 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 410 411 offset = addr - pdi->pi_bar[bidx].addr; 412 413 if (dir == MEM_F_WRITE) { 414 if (size == 8) { 415 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 416 4, *val & 0xffffffff); 417 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 418 4, *val >> 32); 419 } else { 420 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 421 size, *val); 422 } 423 } else { 424 if (size == 8) { 425 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 426 offset, 4); 427 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 428 offset + 4, 4) << 32; 429 } else { 430 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 431 offset, size); 432 } 433 } 434 435 return (0); 436 } 437 438 439 static int 440 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 441 uint64_t *addr) 442 { 443 uint64_t base; 444 445 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 446 447 base = roundup2(*baseptr, size); 448 449 if (base + size <= limit) { 450 *addr = base; 451 *baseptr = base + size; 452 return (0); 453 } else 454 return (-1); 455 } 456 457 /* 458 * Register (or unregister) the MMIO or I/O region associated with the BAR 459 * register 'idx' of an emulated pci device. 460 */ 461 static void 462 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 463 { 464 int error; 465 struct inout_port iop; 466 struct mem_range mr; 467 468 switch (pi->pi_bar[idx].type) { 469 case PCIBAR_IO: 470 bzero(&iop, sizeof(struct inout_port)); 471 iop.name = pi->pi_name; 472 iop.port = pi->pi_bar[idx].addr; 473 iop.size = pi->pi_bar[idx].size; 474 if (registration) { 475 iop.flags = IOPORT_F_INOUT; 476 iop.handler = pci_emul_io_handler; 477 iop.arg = pi; 478 error = register_inout(&iop); 479 } else 480 error = unregister_inout(&iop); 481 break; 482 case PCIBAR_MEM32: 483 case PCIBAR_MEM64: 484 bzero(&mr, sizeof(struct mem_range)); 485 mr.name = pi->pi_name; 486 mr.base = pi->pi_bar[idx].addr; 487 mr.size = pi->pi_bar[idx].size; 488 if (registration) { 489 mr.flags = MEM_F_RW; 490 mr.handler = pci_emul_mem_handler; 491 mr.arg1 = pi; 492 mr.arg2 = idx; 493 error = register_mem(&mr); 494 } else 495 error = unregister_mem(&mr); 496 break; 497 default: 498 error = EINVAL; 499 break; 500 } 501 assert(error == 0); 502 } 503 504 static void 505 unregister_bar(struct pci_devinst *pi, int idx) 506 { 507 508 modify_bar_registration(pi, idx, 0); 509 } 510 511 static void 512 register_bar(struct pci_devinst *pi, int idx) 513 { 514 515 modify_bar_registration(pi, idx, 1); 516 } 517 518 /* Are we decoding i/o port accesses for the emulated pci device? */ 519 static int 520 porten(struct pci_devinst *pi) 521 { 522 uint16_t cmd; 523 524 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 525 526 return (cmd & PCIM_CMD_PORTEN); 527 } 528 529 /* Are we decoding memory accesses for the emulated pci device? */ 530 static int 531 memen(struct pci_devinst *pi) 532 { 533 uint16_t cmd; 534 535 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 536 537 return (cmd & PCIM_CMD_MEMEN); 538 } 539 540 /* 541 * Update the MMIO or I/O address that is decoded by the BAR register. 542 * 543 * If the pci device has enabled the address space decoding then intercept 544 * the address range decoded by the BAR register. 545 */ 546 static void 547 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 548 { 549 int decode; 550 551 if (pi->pi_bar[idx].type == PCIBAR_IO) 552 decode = porten(pi); 553 else 554 decode = memen(pi); 555 556 if (decode) 557 unregister_bar(pi, idx); 558 559 switch (type) { 560 case PCIBAR_IO: 561 case PCIBAR_MEM32: 562 pi->pi_bar[idx].addr = addr; 563 break; 564 case PCIBAR_MEM64: 565 pi->pi_bar[idx].addr &= ~0xffffffffUL; 566 pi->pi_bar[idx].addr |= addr; 567 break; 568 case PCIBAR_MEMHI64: 569 pi->pi_bar[idx].addr &= 0xffffffff; 570 pi->pi_bar[idx].addr |= addr; 571 break; 572 default: 573 assert(0); 574 } 575 576 if (decode) 577 register_bar(pi, idx); 578 } 579 580 int 581 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 582 uint64_t size) 583 { 584 int error; 585 uint64_t *baseptr, limit, addr, mask, lobits, bar; 586 uint16_t cmd, enbit; 587 588 assert(idx >= 0 && idx <= PCI_BARMAX); 589 590 if ((size & (size - 1)) != 0) 591 size = 1UL << flsl(size); /* round up to a power of 2 */ 592 593 /* Enforce minimum BAR sizes required by the PCI standard */ 594 if (type == PCIBAR_IO) { 595 if (size < 4) 596 size = 4; 597 } else { 598 if (size < 16) 599 size = 16; 600 } 601 602 switch (type) { 603 case PCIBAR_NONE: 604 baseptr = NULL; 605 addr = mask = lobits = enbit = 0; 606 break; 607 case PCIBAR_IO: 608 baseptr = &pci_emul_iobase; 609 limit = PCI_EMUL_IOLIMIT; 610 mask = PCIM_BAR_IO_BASE; 611 lobits = PCIM_BAR_IO_SPACE; 612 enbit = PCIM_CMD_PORTEN; 613 break; 614 case PCIBAR_MEM64: 615 /* 616 * XXX 617 * Some drivers do not work well if the 64-bit BAR is allocated 618 * above 4GB. Allow for this by allocating small requests under 619 * 4GB unless then allocation size is larger than some arbitrary 620 * number (128MB currently). 621 */ 622 if (size > 128 * 1024 * 1024) { 623 baseptr = &pci_emul_membase64; 624 limit = pci_emul_memlim64; 625 mask = PCIM_BAR_MEM_BASE; 626 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 627 PCIM_BAR_MEM_PREFETCH; 628 } else { 629 baseptr = &pci_emul_membase32; 630 limit = PCI_EMUL_MEMLIMIT32; 631 mask = PCIM_BAR_MEM_BASE; 632 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 633 } 634 enbit = PCIM_CMD_MEMEN; 635 break; 636 case PCIBAR_MEM32: 637 baseptr = &pci_emul_membase32; 638 limit = PCI_EMUL_MEMLIMIT32; 639 mask = PCIM_BAR_MEM_BASE; 640 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 641 enbit = PCIM_CMD_MEMEN; 642 break; 643 default: 644 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 645 assert(0); 646 } 647 648 if (baseptr != NULL) { 649 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 650 if (error != 0) 651 return (error); 652 } 653 654 pdi->pi_bar[idx].type = type; 655 pdi->pi_bar[idx].addr = addr; 656 pdi->pi_bar[idx].size = size; 657 658 /* Initialize the BAR register in config space */ 659 bar = (addr & mask) | lobits; 660 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 661 662 if (type == PCIBAR_MEM64) { 663 assert(idx + 1 <= PCI_BARMAX); 664 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 665 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 666 } 667 668 cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); 669 if ((cmd & enbit) != enbit) 670 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); 671 register_bar(pdi, idx); 672 673 return (0); 674 } 675 676 #define CAP_START_OFFSET 0x40 677 static int 678 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 679 { 680 int i, capoff, reallen; 681 uint16_t sts; 682 683 assert(caplen > 0); 684 685 reallen = roundup2(caplen, 4); /* dword aligned */ 686 687 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 688 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 689 capoff = CAP_START_OFFSET; 690 else 691 capoff = pi->pi_capend + 1; 692 693 /* Check if we have enough space */ 694 if (capoff + reallen > PCI_REGMAX + 1) 695 return (-1); 696 697 /* Set the previous capability pointer */ 698 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 699 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 700 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 701 } else 702 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 703 704 /* Copy the capability */ 705 for (i = 0; i < caplen; i++) 706 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 707 708 /* Set the next capability pointer */ 709 pci_set_cfgdata8(pi, capoff + 1, 0); 710 711 pi->pi_prevcap = capoff; 712 pi->pi_capend = capoff + reallen - 1; 713 return (0); 714 } 715 716 static struct pci_devemu * 717 pci_emul_finddev(char *name) 718 { 719 struct pci_devemu **pdpp, *pdp; 720 721 SET_FOREACH(pdpp, pci_devemu_set) { 722 pdp = *pdpp; 723 if (!strcmp(pdp->pe_emu, name)) { 724 return (pdp); 725 } 726 } 727 728 return (NULL); 729 } 730 731 static int 732 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 733 int func, struct funcinfo *fi) 734 { 735 struct pci_devinst *pdi; 736 int err; 737 738 pdi = calloc(1, sizeof(struct pci_devinst)); 739 740 pdi->pi_vmctx = ctx; 741 pdi->pi_bus = bus; 742 pdi->pi_slot = slot; 743 pdi->pi_func = func; 744 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 745 pdi->pi_lintr.pin = 0; 746 pdi->pi_lintr.state = IDLE; 747 pdi->pi_lintr.pirq_pin = 0; 748 pdi->pi_lintr.ioapic_irq = 0; 749 pdi->pi_d = pde; 750 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 751 752 /* Disable legacy interrupts */ 753 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 754 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 755 756 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); 757 758 err = (*pde->pe_init)(ctx, pdi, fi->fi_param); 759 if (err == 0) 760 fi->fi_devi = pdi; 761 else 762 free(pdi); 763 764 return (err); 765 } 766 767 void 768 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 769 { 770 int mmc; 771 772 /* Number of msi messages must be a power of 2 between 1 and 32 */ 773 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 774 mmc = ffs(msgnum) - 1; 775 776 bzero(msicap, sizeof(struct msicap)); 777 msicap->capid = PCIY_MSI; 778 msicap->nextptr = nextptr; 779 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 780 } 781 782 int 783 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 784 { 785 struct msicap msicap; 786 787 pci_populate_msicap(&msicap, msgnum, 0); 788 789 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 790 } 791 792 static void 793 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 794 uint32_t msix_tab_size) 795 { 796 797 assert(msix_tab_size % 4096 == 0); 798 799 bzero(msixcap, sizeof(struct msixcap)); 800 msixcap->capid = PCIY_MSIX; 801 802 /* 803 * Message Control Register, all fields set to 804 * zero except for the Table Size. 805 * Note: Table size N is encoded as N-1 806 */ 807 msixcap->msgctrl = msgnum - 1; 808 809 /* 810 * MSI-X BAR setup: 811 * - MSI-X table start at offset 0 812 * - PBA table starts at a 4K aligned offset after the MSI-X table 813 */ 814 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 815 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 816 } 817 818 static void 819 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 820 { 821 int i, table_size; 822 823 assert(table_entries > 0); 824 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 825 826 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 827 pi->pi_msix.table = calloc(1, table_size); 828 829 /* set mask bit of vector control register */ 830 for (i = 0; i < table_entries; i++) 831 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 832 } 833 834 int 835 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 836 { 837 uint32_t tab_size; 838 struct msixcap msixcap; 839 840 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 841 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 842 843 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 844 845 /* Align table size to nearest 4K */ 846 tab_size = roundup2(tab_size, 4096); 847 848 pi->pi_msix.table_bar = barnum; 849 pi->pi_msix.pba_bar = barnum; 850 pi->pi_msix.table_offset = 0; 851 pi->pi_msix.table_count = msgnum; 852 pi->pi_msix.pba_offset = tab_size; 853 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 854 855 pci_msix_table_init(pi, msgnum); 856 857 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 858 859 /* allocate memory for MSI-X Table and PBA */ 860 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 861 tab_size + pi->pi_msix.pba_size); 862 863 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 864 sizeof(msixcap))); 865 } 866 867 static void 868 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 869 int bytes, uint32_t val) 870 { 871 uint16_t msgctrl, rwmask; 872 int off; 873 874 off = offset - capoff; 875 /* Message Control Register */ 876 if (off == 2 && bytes == 2) { 877 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 878 msgctrl = pci_get_cfgdata16(pi, offset); 879 msgctrl &= ~rwmask; 880 msgctrl |= val & rwmask; 881 val = msgctrl; 882 883 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 884 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 885 pci_lintr_update(pi); 886 } 887 888 CFGWRITE(pi, offset, val, bytes); 889 } 890 891 static void 892 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 893 int bytes, uint32_t val) 894 { 895 uint16_t msgctrl, rwmask, msgdata, mme; 896 uint32_t addrlo; 897 898 /* 899 * If guest is writing to the message control register make sure 900 * we do not overwrite read-only fields. 901 */ 902 if ((offset - capoff) == 2 && bytes == 2) { 903 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 904 msgctrl = pci_get_cfgdata16(pi, offset); 905 msgctrl &= ~rwmask; 906 msgctrl |= val & rwmask; 907 val = msgctrl; 908 } 909 CFGWRITE(pi, offset, val, bytes); 910 911 msgctrl = pci_get_cfgdata16(pi, capoff + 2); 912 addrlo = pci_get_cfgdata32(pi, capoff + 4); 913 if (msgctrl & PCIM_MSICTRL_64BIT) 914 msgdata = pci_get_cfgdata16(pi, capoff + 12); 915 else 916 msgdata = pci_get_cfgdata16(pi, capoff + 8); 917 918 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 919 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 920 if (pi->pi_msi.enabled) { 921 pi->pi_msi.addr = addrlo; 922 pi->pi_msi.msg_data = msgdata; 923 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 924 } else { 925 pi->pi_msi.maxmsgnum = 0; 926 } 927 pci_lintr_update(pi); 928 } 929 930 void 931 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 932 int bytes, uint32_t val) 933 { 934 935 /* XXX don't write to the readonly parts */ 936 CFGWRITE(pi, offset, val, bytes); 937 } 938 939 #define PCIECAP_VERSION 0x2 940 int 941 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 942 { 943 int err; 944 struct pciecap pciecap; 945 946 bzero(&pciecap, sizeof(pciecap)); 947 948 /* 949 * Use the integrated endpoint type for endpoints on a root complex bus. 950 * 951 * NB: bhyve currently only supports a single PCI bus that is the root 952 * complex bus, so all endpoints are integrated. 953 */ 954 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) 955 type = PCIEM_TYPE_ROOT_INT_EP; 956 957 pciecap.capid = PCIY_EXPRESS; 958 pciecap.pcie_capabilities = PCIECAP_VERSION | type; 959 if (type != PCIEM_TYPE_ROOT_INT_EP) { 960 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 961 pciecap.link_status = 0x11; /* gen1, x1 */ 962 } 963 964 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 965 return (err); 966 } 967 968 /* 969 * This function assumes that 'coff' is in the capabilities region of the 970 * config space. A capoff parameter of zero will force a search for the 971 * offset and type. 972 */ 973 void 974 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, 975 uint8_t capoff, int capid) 976 { 977 uint8_t nextoff; 978 979 /* Do not allow un-aligned writes */ 980 if ((offset & (bytes - 1)) != 0) 981 return; 982 983 if (capoff == 0) { 984 /* Find the capability that we want to update */ 985 capoff = CAP_START_OFFSET; 986 while (1) { 987 nextoff = pci_get_cfgdata8(pi, capoff + 1); 988 if (nextoff == 0) 989 break; 990 if (offset >= capoff && offset < nextoff) 991 break; 992 993 capoff = nextoff; 994 } 995 assert(offset >= capoff); 996 capid = pci_get_cfgdata8(pi, capoff); 997 } 998 999 /* 1000 * Capability ID and Next Capability Pointer are readonly. 1001 * However, some o/s's do 4-byte writes that include these. 1002 * For this case, trim the write back to 2 bytes and adjust 1003 * the data. 1004 */ 1005 if (offset == capoff || offset == capoff + 1) { 1006 if (offset == capoff && bytes == 4) { 1007 bytes = 2; 1008 offset += 2; 1009 val >>= 16; 1010 } else 1011 return; 1012 } 1013 1014 switch (capid) { 1015 case PCIY_MSI: 1016 msicap_cfgwrite(pi, capoff, offset, bytes, val); 1017 break; 1018 case PCIY_MSIX: 1019 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 1020 break; 1021 case PCIY_EXPRESS: 1022 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 1023 break; 1024 default: 1025 break; 1026 } 1027 } 1028 1029 static int 1030 pci_emul_iscap(struct pci_devinst *pi, int offset) 1031 { 1032 uint16_t sts; 1033 1034 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1035 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1036 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1037 return (1); 1038 } 1039 return (0); 1040 } 1041 1042 static int 1043 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1044 int size, uint64_t *val, void *arg1, long arg2) 1045 { 1046 /* 1047 * Ignore writes; return 0xff's for reads. The mem read code 1048 * will take care of truncating to the correct size. 1049 */ 1050 if (dir == MEM_F_READ) { 1051 *val = 0xffffffffffffffff; 1052 } 1053 1054 return (0); 1055 } 1056 1057 static int 1058 pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1059 int bytes, uint64_t *val, void *arg1, long arg2) 1060 { 1061 int bus, slot, func, coff, in; 1062 1063 coff = addr & 0xfff; 1064 func = (addr >> 12) & 0x7; 1065 slot = (addr >> 15) & 0x1f; 1066 bus = (addr >> 20) & 0xff; 1067 in = (dir == MEM_F_READ); 1068 if (in) 1069 *val = ~0UL; 1070 pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); 1071 return (0); 1072 } 1073 1074 uint64_t 1075 pci_ecfg_base(void) 1076 { 1077 1078 return (PCI_EMUL_ECFG_BASE); 1079 } 1080 1081 #define BUSIO_ROUNDUP 32 1082 #define BUSMEM_ROUNDUP (1024 * 1024) 1083 1084 int 1085 init_pci(struct vmctx *ctx) 1086 { 1087 struct mem_range mr; 1088 struct pci_devemu *pde; 1089 struct businfo *bi; 1090 struct slotinfo *si; 1091 struct funcinfo *fi; 1092 size_t lowmem; 1093 uint64_t cpu_maxphysaddr, pci_emul_memresv64; 1094 u_int regs[4]; 1095 int bus, slot, func, error; 1096 1097 pci_emul_iobase = PCI_EMUL_IOBASE; 1098 pci_emul_membase32 = vm_get_lowmem_limit(ctx); 1099 1100 do_cpuid(0x80000008, regs); 1101 cpu_maxphysaddr = 1ULL << (regs[0] & 0xff); 1102 if (cpu_maxphysaddr > VM_MAXUSER_ADDRESS_LA48) 1103 cpu_maxphysaddr = VM_MAXUSER_ADDRESS_LA48; 1104 pci_emul_memresv64 = cpu_maxphysaddr / 4; 1105 /* 1106 * Max power of 2 that is less then 1107 * cpu_maxphysaddr - pci_emul_memresv64. 1108 */ 1109 pci_emul_membase64 = 1ULL << (flsl(cpu_maxphysaddr - 1110 pci_emul_memresv64) - 1); 1111 pci_emul_memlim64 = cpu_maxphysaddr; 1112 1113 for (bus = 0; bus < MAXBUSES; bus++) { 1114 if ((bi = pci_businfo[bus]) == NULL) 1115 continue; 1116 /* 1117 * Keep track of the i/o and memory resources allocated to 1118 * this bus. 1119 */ 1120 bi->iobase = pci_emul_iobase; 1121 bi->membase32 = pci_emul_membase32; 1122 bi->membase64 = pci_emul_membase64; 1123 1124 for (slot = 0; slot < MAXSLOTS; slot++) { 1125 si = &bi->slotinfo[slot]; 1126 for (func = 0; func < MAXFUNCS; func++) { 1127 fi = &si->si_funcs[func]; 1128 if (fi->fi_name == NULL) 1129 continue; 1130 pde = pci_emul_finddev(fi->fi_name); 1131 assert(pde != NULL); 1132 error = pci_emul_init(ctx, pde, bus, slot, 1133 func, fi); 1134 if (error) 1135 return (error); 1136 } 1137 } 1138 1139 /* 1140 * Add some slop to the I/O and memory resources decoded by 1141 * this bus to give a guest some flexibility if it wants to 1142 * reprogram the BARs. 1143 */ 1144 pci_emul_iobase += BUSIO_ROUNDUP; 1145 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1146 bi->iolimit = pci_emul_iobase; 1147 1148 pci_emul_membase32 += BUSMEM_ROUNDUP; 1149 pci_emul_membase32 = roundup2(pci_emul_membase32, 1150 BUSMEM_ROUNDUP); 1151 bi->memlimit32 = pci_emul_membase32; 1152 1153 pci_emul_membase64 += BUSMEM_ROUNDUP; 1154 pci_emul_membase64 = roundup2(pci_emul_membase64, 1155 BUSMEM_ROUNDUP); 1156 bi->memlimit64 = pci_emul_membase64; 1157 } 1158 1159 /* 1160 * PCI backends are initialized before routing INTx interrupts 1161 * so that LPC devices are able to reserve ISA IRQs before 1162 * routing PIRQ pins. 1163 */ 1164 for (bus = 0; bus < MAXBUSES; bus++) { 1165 if ((bi = pci_businfo[bus]) == NULL) 1166 continue; 1167 1168 for (slot = 0; slot < MAXSLOTS; slot++) { 1169 si = &bi->slotinfo[slot]; 1170 for (func = 0; func < MAXFUNCS; func++) { 1171 fi = &si->si_funcs[func]; 1172 if (fi->fi_devi == NULL) 1173 continue; 1174 pci_lintr_route(fi->fi_devi); 1175 } 1176 } 1177 } 1178 lpc_pirq_routed(); 1179 1180 /* 1181 * The guest physical memory map looks like the following: 1182 * [0, lowmem) guest system memory 1183 * [lowmem, lowmem_limit) memory hole (may be absent) 1184 * [lowmem_limit, 0xE0000000) PCI hole (32-bit BAR allocation) 1185 * [0xE0000000, 0xF0000000) PCI extended config window 1186 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware 1187 * [4GB, 4GB + highmem) 1188 */ 1189 1190 /* 1191 * Accesses to memory addresses that are not allocated to system 1192 * memory or PCI devices return 0xff's. 1193 */ 1194 lowmem = vm_get_lowmem_size(ctx); 1195 bzero(&mr, sizeof(struct mem_range)); 1196 mr.name = "PCI hole"; 1197 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1198 mr.base = lowmem; 1199 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1200 mr.handler = pci_emul_fallback_handler; 1201 error = register_mem_fallback(&mr); 1202 assert(error == 0); 1203 1204 /* PCI extended config space */ 1205 bzero(&mr, sizeof(struct mem_range)); 1206 mr.name = "PCI ECFG"; 1207 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; 1208 mr.base = PCI_EMUL_ECFG_BASE; 1209 mr.size = PCI_EMUL_ECFG_SIZE; 1210 mr.handler = pci_emul_ecfg_handler; 1211 error = register_mem(&mr); 1212 assert(error == 0); 1213 1214 return (0); 1215 } 1216 1217 static void 1218 pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1219 void *arg) 1220 { 1221 1222 dsdt_line(" Package ()"); 1223 dsdt_line(" {"); 1224 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1225 dsdt_line(" 0x%02X,", pin - 1); 1226 dsdt_line(" Zero,"); 1227 dsdt_line(" 0x%X", ioapic_irq); 1228 dsdt_line(" },"); 1229 } 1230 1231 static void 1232 pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1233 void *arg) 1234 { 1235 char *name; 1236 1237 name = lpc_pirq_name(pirq_pin); 1238 if (name == NULL) 1239 return; 1240 dsdt_line(" Package ()"); 1241 dsdt_line(" {"); 1242 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1243 dsdt_line(" 0x%02X,", pin - 1); 1244 dsdt_line(" %s,", name); 1245 dsdt_line(" 0x00"); 1246 dsdt_line(" },"); 1247 free(name); 1248 } 1249 1250 /* 1251 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1252 * corresponding to each PCI bus. 1253 */ 1254 static void 1255 pci_bus_write_dsdt(int bus) 1256 { 1257 struct businfo *bi; 1258 struct slotinfo *si; 1259 struct pci_devinst *pi; 1260 int count, func, slot; 1261 1262 /* 1263 * If there are no devices on this 'bus' then just return. 1264 */ 1265 if ((bi = pci_businfo[bus]) == NULL) { 1266 /* 1267 * Bus 0 is special because it decodes the I/O ports used 1268 * for PCI config space access even if there are no devices 1269 * on it. 1270 */ 1271 if (bus != 0) 1272 return; 1273 } 1274 1275 dsdt_line(" Device (PC%02X)", bus); 1276 dsdt_line(" {"); 1277 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1278 1279 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1280 dsdt_line(" {"); 1281 dsdt_line(" Return (0x%08X)", bus); 1282 dsdt_line(" }"); 1283 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1284 dsdt_line(" {"); 1285 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1286 "MaxFixed, PosDecode,"); 1287 dsdt_line(" 0x0000, // Granularity"); 1288 dsdt_line(" 0x%04X, // Range Minimum", bus); 1289 dsdt_line(" 0x%04X, // Range Maximum", bus); 1290 dsdt_line(" 0x0000, // Translation Offset"); 1291 dsdt_line(" 0x0001, // Length"); 1292 dsdt_line(" ,, )"); 1293 1294 if (bus == 0) { 1295 dsdt_indent(3); 1296 dsdt_fixed_ioport(0xCF8, 8); 1297 dsdt_unindent(3); 1298 1299 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1300 "PosDecode, EntireRange,"); 1301 dsdt_line(" 0x0000, // Granularity"); 1302 dsdt_line(" 0x0000, // Range Minimum"); 1303 dsdt_line(" 0x0CF7, // Range Maximum"); 1304 dsdt_line(" 0x0000, // Translation Offset"); 1305 dsdt_line(" 0x0CF8, // Length"); 1306 dsdt_line(" ,, , TypeStatic)"); 1307 1308 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1309 "PosDecode, EntireRange,"); 1310 dsdt_line(" 0x0000, // Granularity"); 1311 dsdt_line(" 0x0D00, // Range Minimum"); 1312 dsdt_line(" 0x%04X, // Range Maximum", 1313 PCI_EMUL_IOBASE - 1); 1314 dsdt_line(" 0x0000, // Translation Offset"); 1315 dsdt_line(" 0x%04X, // Length", 1316 PCI_EMUL_IOBASE - 0x0D00); 1317 dsdt_line(" ,, , TypeStatic)"); 1318 1319 if (bi == NULL) { 1320 dsdt_line(" })"); 1321 goto done; 1322 } 1323 } 1324 assert(bi != NULL); 1325 1326 /* i/o window */ 1327 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1328 "PosDecode, EntireRange,"); 1329 dsdt_line(" 0x0000, // Granularity"); 1330 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1331 dsdt_line(" 0x%04X, // Range Maximum", 1332 bi->iolimit - 1); 1333 dsdt_line(" 0x0000, // Translation Offset"); 1334 dsdt_line(" 0x%04X, // Length", 1335 bi->iolimit - bi->iobase); 1336 dsdt_line(" ,, , TypeStatic)"); 1337 1338 /* mmio window (32-bit) */ 1339 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1340 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1341 dsdt_line(" 0x00000000, // Granularity"); 1342 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1343 dsdt_line(" 0x%08X, // Range Maximum\n", 1344 bi->memlimit32 - 1); 1345 dsdt_line(" 0x00000000, // Translation Offset"); 1346 dsdt_line(" 0x%08X, // Length\n", 1347 bi->memlimit32 - bi->membase32); 1348 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1349 1350 /* mmio window (64-bit) */ 1351 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1352 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1353 dsdt_line(" 0x0000000000000000, // Granularity"); 1354 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1355 dsdt_line(" 0x%016lX, // Range Maximum\n", 1356 bi->memlimit64 - 1); 1357 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1358 dsdt_line(" 0x%016lX, // Length\n", 1359 bi->memlimit64 - bi->membase64); 1360 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1361 dsdt_line(" })"); 1362 1363 count = pci_count_lintr(bus); 1364 if (count != 0) { 1365 dsdt_indent(2); 1366 dsdt_line("Name (PPRT, Package ()"); 1367 dsdt_line("{"); 1368 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1369 dsdt_line("})"); 1370 dsdt_line("Name (APRT, Package ()"); 1371 dsdt_line("{"); 1372 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1373 dsdt_line("})"); 1374 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1375 dsdt_line("{"); 1376 dsdt_line(" If (PICM)"); 1377 dsdt_line(" {"); 1378 dsdt_line(" Return (APRT)"); 1379 dsdt_line(" }"); 1380 dsdt_line(" Else"); 1381 dsdt_line(" {"); 1382 dsdt_line(" Return (PPRT)"); 1383 dsdt_line(" }"); 1384 dsdt_line("}"); 1385 dsdt_unindent(2); 1386 } 1387 1388 dsdt_indent(2); 1389 for (slot = 0; slot < MAXSLOTS; slot++) { 1390 si = &bi->slotinfo[slot]; 1391 for (func = 0; func < MAXFUNCS; func++) { 1392 pi = si->si_funcs[func].fi_devi; 1393 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1394 pi->pi_d->pe_write_dsdt(pi); 1395 } 1396 } 1397 dsdt_unindent(2); 1398 done: 1399 dsdt_line(" }"); 1400 } 1401 1402 void 1403 pci_write_dsdt(void) 1404 { 1405 int bus; 1406 1407 dsdt_indent(1); 1408 dsdt_line("Name (PICM, 0x00)"); 1409 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1410 dsdt_line("{"); 1411 dsdt_line(" Store (Arg0, PICM)"); 1412 dsdt_line("}"); 1413 dsdt_line(""); 1414 dsdt_line("Scope (_SB)"); 1415 dsdt_line("{"); 1416 for (bus = 0; bus < MAXBUSES; bus++) 1417 pci_bus_write_dsdt(bus); 1418 dsdt_line("}"); 1419 dsdt_unindent(1); 1420 } 1421 1422 int 1423 pci_bus_configured(int bus) 1424 { 1425 assert(bus >= 0 && bus < MAXBUSES); 1426 return (pci_businfo[bus] != NULL); 1427 } 1428 1429 int 1430 pci_msi_enabled(struct pci_devinst *pi) 1431 { 1432 return (pi->pi_msi.enabled); 1433 } 1434 1435 int 1436 pci_msi_maxmsgnum(struct pci_devinst *pi) 1437 { 1438 if (pi->pi_msi.enabled) 1439 return (pi->pi_msi.maxmsgnum); 1440 else 1441 return (0); 1442 } 1443 1444 int 1445 pci_msix_enabled(struct pci_devinst *pi) 1446 { 1447 1448 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1449 } 1450 1451 void 1452 pci_generate_msix(struct pci_devinst *pi, int index) 1453 { 1454 struct msix_table_entry *mte; 1455 1456 if (!pci_msix_enabled(pi)) 1457 return; 1458 1459 if (pi->pi_msix.function_mask) 1460 return; 1461 1462 if (index >= pi->pi_msix.table_count) 1463 return; 1464 1465 mte = &pi->pi_msix.table[index]; 1466 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1467 /* XXX Set PBA bit if interrupt is disabled */ 1468 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1469 } 1470 } 1471 1472 void 1473 pci_generate_msi(struct pci_devinst *pi, int index) 1474 { 1475 1476 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1477 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1478 pi->pi_msi.msg_data + index); 1479 } 1480 } 1481 1482 static bool 1483 pci_lintr_permitted(struct pci_devinst *pi) 1484 { 1485 uint16_t cmd; 1486 1487 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1488 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1489 (cmd & PCIM_CMD_INTxDIS))); 1490 } 1491 1492 void 1493 pci_lintr_request(struct pci_devinst *pi) 1494 { 1495 struct businfo *bi; 1496 struct slotinfo *si; 1497 int bestpin, bestcount, pin; 1498 1499 bi = pci_businfo[pi->pi_bus]; 1500 assert(bi != NULL); 1501 1502 /* 1503 * Just allocate a pin from our slot. The pin will be 1504 * assigned IRQs later when interrupts are routed. 1505 */ 1506 si = &bi->slotinfo[pi->pi_slot]; 1507 bestpin = 0; 1508 bestcount = si->si_intpins[0].ii_count; 1509 for (pin = 1; pin < 4; pin++) { 1510 if (si->si_intpins[pin].ii_count < bestcount) { 1511 bestpin = pin; 1512 bestcount = si->si_intpins[pin].ii_count; 1513 } 1514 } 1515 1516 si->si_intpins[bestpin].ii_count++; 1517 pi->pi_lintr.pin = bestpin + 1; 1518 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1519 } 1520 1521 static void 1522 pci_lintr_route(struct pci_devinst *pi) 1523 { 1524 struct businfo *bi; 1525 struct intxinfo *ii; 1526 1527 if (pi->pi_lintr.pin == 0) 1528 return; 1529 1530 bi = pci_businfo[pi->pi_bus]; 1531 assert(bi != NULL); 1532 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1533 1534 /* 1535 * Attempt to allocate an I/O APIC pin for this intpin if one 1536 * is not yet assigned. 1537 */ 1538 if (ii->ii_ioapic_irq == 0) 1539 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); 1540 assert(ii->ii_ioapic_irq > 0); 1541 1542 /* 1543 * Attempt to allocate a PIRQ pin for this intpin if one is 1544 * not yet assigned. 1545 */ 1546 if (ii->ii_pirq_pin == 0) 1547 ii->ii_pirq_pin = pirq_alloc_pin(pi); 1548 assert(ii->ii_pirq_pin > 0); 1549 1550 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1551 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1552 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1553 } 1554 1555 void 1556 pci_lintr_assert(struct pci_devinst *pi) 1557 { 1558 1559 assert(pi->pi_lintr.pin > 0); 1560 1561 pthread_mutex_lock(&pi->pi_lintr.lock); 1562 if (pi->pi_lintr.state == IDLE) { 1563 if (pci_lintr_permitted(pi)) { 1564 pi->pi_lintr.state = ASSERTED; 1565 pci_irq_assert(pi); 1566 } else 1567 pi->pi_lintr.state = PENDING; 1568 } 1569 pthread_mutex_unlock(&pi->pi_lintr.lock); 1570 } 1571 1572 void 1573 pci_lintr_deassert(struct pci_devinst *pi) 1574 { 1575 1576 assert(pi->pi_lintr.pin > 0); 1577 1578 pthread_mutex_lock(&pi->pi_lintr.lock); 1579 if (pi->pi_lintr.state == ASSERTED) { 1580 pi->pi_lintr.state = IDLE; 1581 pci_irq_deassert(pi); 1582 } else if (pi->pi_lintr.state == PENDING) 1583 pi->pi_lintr.state = IDLE; 1584 pthread_mutex_unlock(&pi->pi_lintr.lock); 1585 } 1586 1587 static void 1588 pci_lintr_update(struct pci_devinst *pi) 1589 { 1590 1591 pthread_mutex_lock(&pi->pi_lintr.lock); 1592 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1593 pci_irq_deassert(pi); 1594 pi->pi_lintr.state = PENDING; 1595 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1596 pi->pi_lintr.state = ASSERTED; 1597 pci_irq_assert(pi); 1598 } 1599 pthread_mutex_unlock(&pi->pi_lintr.lock); 1600 } 1601 1602 int 1603 pci_count_lintr(int bus) 1604 { 1605 int count, slot, pin; 1606 struct slotinfo *slotinfo; 1607 1608 count = 0; 1609 if (pci_businfo[bus] != NULL) { 1610 for (slot = 0; slot < MAXSLOTS; slot++) { 1611 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1612 for (pin = 0; pin < 4; pin++) { 1613 if (slotinfo->si_intpins[pin].ii_count != 0) 1614 count++; 1615 } 1616 } 1617 } 1618 return (count); 1619 } 1620 1621 void 1622 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1623 { 1624 struct businfo *bi; 1625 struct slotinfo *si; 1626 struct intxinfo *ii; 1627 int slot, pin; 1628 1629 if ((bi = pci_businfo[bus]) == NULL) 1630 return; 1631 1632 for (slot = 0; slot < MAXSLOTS; slot++) { 1633 si = &bi->slotinfo[slot]; 1634 for (pin = 0; pin < 4; pin++) { 1635 ii = &si->si_intpins[pin]; 1636 if (ii->ii_count != 0) 1637 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1638 ii->ii_ioapic_irq, arg); 1639 } 1640 } 1641 } 1642 1643 /* 1644 * Return 1 if the emulated device in 'slot' is a multi-function device. 1645 * Return 0 otherwise. 1646 */ 1647 static int 1648 pci_emul_is_mfdev(int bus, int slot) 1649 { 1650 struct businfo *bi; 1651 struct slotinfo *si; 1652 int f, numfuncs; 1653 1654 numfuncs = 0; 1655 if ((bi = pci_businfo[bus]) != NULL) { 1656 si = &bi->slotinfo[slot]; 1657 for (f = 0; f < MAXFUNCS; f++) { 1658 if (si->si_funcs[f].fi_devi != NULL) { 1659 numfuncs++; 1660 } 1661 } 1662 } 1663 return (numfuncs > 1); 1664 } 1665 1666 /* 1667 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1668 * whether or not is a multi-function being emulated in the pci 'slot'. 1669 */ 1670 static void 1671 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1672 { 1673 int mfdev; 1674 1675 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1676 mfdev = pci_emul_is_mfdev(bus, slot); 1677 switch (bytes) { 1678 case 1: 1679 case 2: 1680 *rv &= ~PCIM_MFDEV; 1681 if (mfdev) { 1682 *rv |= PCIM_MFDEV; 1683 } 1684 break; 1685 case 4: 1686 *rv &= ~(PCIM_MFDEV << 16); 1687 if (mfdev) { 1688 *rv |= (PCIM_MFDEV << 16); 1689 } 1690 break; 1691 } 1692 } 1693 } 1694 1695 /* 1696 * Update device state in response to changes to the PCI command 1697 * register. 1698 */ 1699 void 1700 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) 1701 { 1702 int i; 1703 uint16_t changed, new; 1704 1705 new = pci_get_cfgdata16(pi, PCIR_COMMAND); 1706 changed = old ^ new; 1707 1708 /* 1709 * If the MMIO or I/O address space decoding has changed then 1710 * register/unregister all BARs that decode that address space. 1711 */ 1712 for (i = 0; i <= PCI_BARMAX; i++) { 1713 switch (pi->pi_bar[i].type) { 1714 case PCIBAR_NONE: 1715 case PCIBAR_MEMHI64: 1716 break; 1717 case PCIBAR_IO: 1718 /* I/O address space decoding changed? */ 1719 if (changed & PCIM_CMD_PORTEN) { 1720 if (new & PCIM_CMD_PORTEN) 1721 register_bar(pi, i); 1722 else 1723 unregister_bar(pi, i); 1724 } 1725 break; 1726 case PCIBAR_MEM32: 1727 case PCIBAR_MEM64: 1728 /* MMIO address space decoding changed? */ 1729 if (changed & PCIM_CMD_MEMEN) { 1730 if (new & PCIM_CMD_MEMEN) 1731 register_bar(pi, i); 1732 else 1733 unregister_bar(pi, i); 1734 } 1735 break; 1736 default: 1737 assert(0); 1738 } 1739 } 1740 1741 /* 1742 * If INTx has been unmasked and is pending, assert the 1743 * interrupt. 1744 */ 1745 pci_lintr_update(pi); 1746 } 1747 1748 static void 1749 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) 1750 { 1751 int rshift; 1752 uint32_t cmd, old, readonly; 1753 1754 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1755 1756 /* 1757 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. 1758 * 1759 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are 1760 * 'write 1 to clear'. However these bits are not set to '1' by 1761 * any device emulation so it is simpler to treat them as readonly. 1762 */ 1763 rshift = (coff & 0x3) * 8; 1764 readonly = 0xFFFFF880 >> rshift; 1765 1766 old = CFGREAD(pi, coff, bytes); 1767 new &= ~readonly; 1768 new |= (old & readonly); 1769 CFGWRITE(pi, coff, new, bytes); /* update config */ 1770 1771 pci_emul_cmd_changed(pi, cmd); 1772 } 1773 1774 static void 1775 pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, 1776 int coff, int bytes, uint32_t *eax) 1777 { 1778 struct businfo *bi; 1779 struct slotinfo *si; 1780 struct pci_devinst *pi; 1781 struct pci_devemu *pe; 1782 int idx, needcfg; 1783 uint64_t addr, bar, mask; 1784 1785 if ((bi = pci_businfo[bus]) != NULL) { 1786 si = &bi->slotinfo[slot]; 1787 pi = si->si_funcs[func].fi_devi; 1788 } else 1789 pi = NULL; 1790 1791 /* 1792 * Just return if there is no device at this slot:func or if the 1793 * the guest is doing an un-aligned access. 1794 */ 1795 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || 1796 (coff & (bytes - 1)) != 0) { 1797 if (in) 1798 *eax = 0xffffffff; 1799 return; 1800 } 1801 1802 /* 1803 * Ignore all writes beyond the standard config space and return all 1804 * ones on reads. 1805 */ 1806 if (coff >= PCI_REGMAX + 1) { 1807 if (in) { 1808 *eax = 0xffffffff; 1809 /* 1810 * Extended capabilities begin at offset 256 in config 1811 * space. Absence of extended capabilities is signaled 1812 * with all 0s in the extended capability header at 1813 * offset 256. 1814 */ 1815 if (coff <= PCI_REGMAX + 4) 1816 *eax = 0x00000000; 1817 } 1818 return; 1819 } 1820 1821 pe = pi->pi_d; 1822 1823 /* 1824 * Config read 1825 */ 1826 if (in) { 1827 /* Let the device emulation override the default handler */ 1828 if (pe->pe_cfgread != NULL) { 1829 needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, 1830 eax); 1831 } else { 1832 needcfg = 1; 1833 } 1834 1835 if (needcfg) 1836 *eax = CFGREAD(pi, coff, bytes); 1837 1838 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); 1839 } else { 1840 /* Let the device emulation override the default handler */ 1841 if (pe->pe_cfgwrite != NULL && 1842 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1843 return; 1844 1845 /* 1846 * Special handling for write to BAR registers 1847 */ 1848 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1849 /* 1850 * Ignore writes to BAR registers that are not 1851 * 4-byte aligned. 1852 */ 1853 if (bytes != 4 || (coff & 0x3) != 0) 1854 return; 1855 idx = (coff - PCIR_BAR(0)) / 4; 1856 mask = ~(pi->pi_bar[idx].size - 1); 1857 switch (pi->pi_bar[idx].type) { 1858 case PCIBAR_NONE: 1859 pi->pi_bar[idx].addr = bar = 0; 1860 break; 1861 case PCIBAR_IO: 1862 addr = *eax & mask; 1863 addr &= 0xffff; 1864 bar = addr | PCIM_BAR_IO_SPACE; 1865 /* 1866 * Register the new BAR value for interception 1867 */ 1868 if (addr != pi->pi_bar[idx].addr) { 1869 update_bar_address(pi, addr, idx, 1870 PCIBAR_IO); 1871 } 1872 break; 1873 case PCIBAR_MEM32: 1874 addr = bar = *eax & mask; 1875 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 1876 if (addr != pi->pi_bar[idx].addr) { 1877 update_bar_address(pi, addr, idx, 1878 PCIBAR_MEM32); 1879 } 1880 break; 1881 case PCIBAR_MEM64: 1882 addr = bar = *eax & mask; 1883 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 1884 PCIM_BAR_MEM_PREFETCH; 1885 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1886 update_bar_address(pi, addr, idx, 1887 PCIBAR_MEM64); 1888 } 1889 break; 1890 case PCIBAR_MEMHI64: 1891 mask = ~(pi->pi_bar[idx - 1].size - 1); 1892 addr = ((uint64_t)*eax << 32) & mask; 1893 bar = addr >> 32; 1894 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1895 update_bar_address(pi, addr, idx - 1, 1896 PCIBAR_MEMHI64); 1897 } 1898 break; 1899 default: 1900 assert(0); 1901 } 1902 pci_set_cfgdata32(pi, coff, bar); 1903 1904 } else if (pci_emul_iscap(pi, coff)) { 1905 pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); 1906 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { 1907 pci_emul_cmdsts_write(pi, coff, *eax, bytes); 1908 } else { 1909 CFGWRITE(pi, coff, *eax, bytes); 1910 } 1911 } 1912 } 1913 1914 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 1915 1916 static int 1917 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1918 uint32_t *eax, void *arg) 1919 { 1920 uint32_t x; 1921 1922 if (bytes != 4) { 1923 if (in) 1924 *eax = (bytes == 2) ? 0xffff : 0xff; 1925 return (0); 1926 } 1927 1928 if (in) { 1929 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; 1930 if (cfgenable) 1931 x |= CONF1_ENABLE; 1932 *eax = x; 1933 } else { 1934 x = *eax; 1935 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 1936 cfgoff = x & PCI_REGMAX; 1937 cfgfunc = (x >> 8) & PCI_FUNCMAX; 1938 cfgslot = (x >> 11) & PCI_SLOTMAX; 1939 cfgbus = (x >> 16) & PCI_BUSMAX; 1940 } 1941 1942 return (0); 1943 } 1944 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 1945 1946 static int 1947 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1948 uint32_t *eax, void *arg) 1949 { 1950 int coff; 1951 1952 assert(bytes == 1 || bytes == 2 || bytes == 4); 1953 1954 coff = cfgoff + (port - CONF1_DATA_PORT); 1955 if (cfgenable) { 1956 pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, 1957 eax); 1958 } else { 1959 /* Ignore accesses to cfgdata if not enabled by cfgaddr */ 1960 if (in) 1961 *eax = 0xffffffff; 1962 } 1963 return (0); 1964 } 1965 1966 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 1967 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 1968 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 1969 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 1970 1971 #ifdef BHYVE_SNAPSHOT 1972 /* 1973 * Saves/restores PCI device emulated state. Returns 0 on success. 1974 */ 1975 static int 1976 pci_snapshot_pci_dev(struct vm_snapshot_meta *meta) 1977 { 1978 struct pci_devinst *pi; 1979 int i; 1980 int ret; 1981 1982 pi = meta->dev_data; 1983 1984 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done); 1985 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done); 1986 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done); 1987 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done); 1988 1989 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done); 1990 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done); 1991 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done); 1992 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done); 1993 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done); 1994 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done); 1995 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done); 1996 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done); 1997 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_page_offset, meta, ret, done); 1998 1999 SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata), 2000 meta, ret, done); 2001 2002 for (i = 0; i < nitems(pi->pi_bar); i++) { 2003 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done); 2004 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done); 2005 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done); 2006 } 2007 2008 /* Restore MSI-X table. */ 2009 for (i = 0; i < pi->pi_msix.table_count; i++) { 2010 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr, 2011 meta, ret, done); 2012 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data, 2013 meta, ret, done); 2014 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control, 2015 meta, ret, done); 2016 } 2017 2018 done: 2019 return (ret); 2020 } 2021 2022 static int 2023 pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde, 2024 struct pci_devinst **pdi) 2025 { 2026 struct businfo *bi; 2027 struct slotinfo *si; 2028 struct funcinfo *fi; 2029 int bus, slot, func; 2030 2031 assert(dev_name != NULL); 2032 assert(pde != NULL); 2033 assert(pdi != NULL); 2034 2035 for (bus = 0; bus < MAXBUSES; bus++) { 2036 if ((bi = pci_businfo[bus]) == NULL) 2037 continue; 2038 2039 for (slot = 0; slot < MAXSLOTS; slot++) { 2040 si = &bi->slotinfo[slot]; 2041 for (func = 0; func < MAXFUNCS; func++) { 2042 fi = &si->si_funcs[func]; 2043 if (fi->fi_name == NULL) 2044 continue; 2045 if (strcmp(dev_name, fi->fi_name)) 2046 continue; 2047 2048 *pde = pci_emul_finddev(fi->fi_name); 2049 assert(*pde != NULL); 2050 2051 *pdi = fi->fi_devi; 2052 return (0); 2053 } 2054 } 2055 } 2056 2057 return (EINVAL); 2058 } 2059 2060 int 2061 pci_snapshot(struct vm_snapshot_meta *meta) 2062 { 2063 struct pci_devemu *pde; 2064 struct pci_devinst *pdi; 2065 int ret; 2066 2067 assert(meta->dev_name != NULL); 2068 2069 ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi); 2070 if (ret != 0) { 2071 fprintf(stderr, "%s: no such name: %s\r\n", 2072 __func__, meta->dev_name); 2073 memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); 2074 return (0); 2075 } 2076 2077 meta->dev_data = pdi; 2078 2079 if (pde->pe_snapshot == NULL) { 2080 fprintf(stderr, "%s: not implemented yet for: %s\r\n", 2081 __func__, meta->dev_name); 2082 return (-1); 2083 } 2084 2085 ret = pci_snapshot_pci_dev(meta); 2086 if (ret != 0) { 2087 fprintf(stderr, "%s: failed to snapshot pci dev\r\n", 2088 __func__); 2089 return (-1); 2090 } 2091 2092 ret = (*pde->pe_snapshot)(meta); 2093 2094 return (ret); 2095 } 2096 2097 int 2098 pci_pause(struct vmctx *ctx, const char *dev_name) 2099 { 2100 struct pci_devemu *pde; 2101 struct pci_devinst *pdi; 2102 int ret; 2103 2104 assert(dev_name != NULL); 2105 2106 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2107 if (ret != 0) { 2108 /* 2109 * It is possible to call this function without 2110 * checking that the device is inserted first. 2111 */ 2112 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2113 return (0); 2114 } 2115 2116 if (pde->pe_pause == NULL) { 2117 /* The pause/resume functionality is optional. */ 2118 fprintf(stderr, "%s: not implemented for: %s\n", 2119 __func__, dev_name); 2120 return (0); 2121 } 2122 2123 return (*pde->pe_pause)(ctx, pdi); 2124 } 2125 2126 int 2127 pci_resume(struct vmctx *ctx, const char *dev_name) 2128 { 2129 struct pci_devemu *pde; 2130 struct pci_devinst *pdi; 2131 int ret; 2132 2133 assert(dev_name != NULL); 2134 2135 ret = pci_find_slotted_dev(dev_name, &pde, &pdi); 2136 if (ret != 0) { 2137 /* 2138 * It is possible to call this function without 2139 * checking that the device is inserted first. 2140 */ 2141 fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); 2142 return (0); 2143 } 2144 2145 if (pde->pe_resume == NULL) { 2146 /* The pause/resume functionality is optional. */ 2147 fprintf(stderr, "%s: not implemented for: %s\n", 2148 __func__, dev_name); 2149 return (0); 2150 } 2151 2152 return (*pde->pe_resume)(ctx, pdi); 2153 } 2154 #endif 2155 2156 #define PCI_EMUL_TEST 2157 #ifdef PCI_EMUL_TEST 2158 /* 2159 * Define a dummy test device 2160 */ 2161 #define DIOSZ 8 2162 #define DMEMSZ 4096 2163 struct pci_emul_dsoftc { 2164 uint8_t ioregs[DIOSZ]; 2165 uint8_t memregs[2][DMEMSZ]; 2166 }; 2167 2168 #define PCI_EMUL_MSI_MSGS 4 2169 #define PCI_EMUL_MSIX_MSGS 16 2170 2171 static int 2172 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 2173 { 2174 int error; 2175 struct pci_emul_dsoftc *sc; 2176 2177 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 2178 2179 pi->pi_arg = sc; 2180 2181 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 2182 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 2183 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 2184 2185 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 2186 assert(error == 0); 2187 2188 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 2189 assert(error == 0); 2190 2191 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 2192 assert(error == 0); 2193 2194 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); 2195 assert(error == 0); 2196 2197 return (0); 2198 } 2199 2200 static void 2201 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2202 uint64_t offset, int size, uint64_t value) 2203 { 2204 int i; 2205 struct pci_emul_dsoftc *sc = pi->pi_arg; 2206 2207 if (baridx == 0) { 2208 if (offset + size > DIOSZ) { 2209 printf("diow: iow too large, offset %ld size %d\n", 2210 offset, size); 2211 return; 2212 } 2213 2214 if (size == 1) { 2215 sc->ioregs[offset] = value & 0xff; 2216 } else if (size == 2) { 2217 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 2218 } else if (size == 4) { 2219 *(uint32_t *)&sc->ioregs[offset] = value; 2220 } else { 2221 printf("diow: iow unknown size %d\n", size); 2222 } 2223 2224 /* 2225 * Special magic value to generate an interrupt 2226 */ 2227 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 2228 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 2229 2230 if (value == 0xabcdef) { 2231 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 2232 pci_generate_msi(pi, i); 2233 } 2234 } 2235 2236 if (baridx == 1 || baridx == 2) { 2237 if (offset + size > DMEMSZ) { 2238 printf("diow: memw too large, offset %ld size %d\n", 2239 offset, size); 2240 return; 2241 } 2242 2243 i = baridx - 1; /* 'memregs' index */ 2244 2245 if (size == 1) { 2246 sc->memregs[i][offset] = value; 2247 } else if (size == 2) { 2248 *(uint16_t *)&sc->memregs[i][offset] = value; 2249 } else if (size == 4) { 2250 *(uint32_t *)&sc->memregs[i][offset] = value; 2251 } else if (size == 8) { 2252 *(uint64_t *)&sc->memregs[i][offset] = value; 2253 } else { 2254 printf("diow: memw unknown size %d\n", size); 2255 } 2256 2257 /* 2258 * magic interrupt ?? 2259 */ 2260 } 2261 2262 if (baridx > 2 || baridx < 0) { 2263 printf("diow: unknown bar idx %d\n", baridx); 2264 } 2265 } 2266 2267 static uint64_t 2268 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2269 uint64_t offset, int size) 2270 { 2271 struct pci_emul_dsoftc *sc = pi->pi_arg; 2272 uint32_t value; 2273 int i; 2274 2275 if (baridx == 0) { 2276 if (offset + size > DIOSZ) { 2277 printf("dior: ior too large, offset %ld size %d\n", 2278 offset, size); 2279 return (0); 2280 } 2281 2282 value = 0; 2283 if (size == 1) { 2284 value = sc->ioregs[offset]; 2285 } else if (size == 2) { 2286 value = *(uint16_t *) &sc->ioregs[offset]; 2287 } else if (size == 4) { 2288 value = *(uint32_t *) &sc->ioregs[offset]; 2289 } else { 2290 printf("dior: ior unknown size %d\n", size); 2291 } 2292 } 2293 2294 if (baridx == 1 || baridx == 2) { 2295 if (offset + size > DMEMSZ) { 2296 printf("dior: memr too large, offset %ld size %d\n", 2297 offset, size); 2298 return (0); 2299 } 2300 2301 i = baridx - 1; /* 'memregs' index */ 2302 2303 if (size == 1) { 2304 value = sc->memregs[i][offset]; 2305 } else if (size == 2) { 2306 value = *(uint16_t *) &sc->memregs[i][offset]; 2307 } else if (size == 4) { 2308 value = *(uint32_t *) &sc->memregs[i][offset]; 2309 } else if (size == 8) { 2310 value = *(uint64_t *) &sc->memregs[i][offset]; 2311 } else { 2312 printf("dior: ior unknown size %d\n", size); 2313 } 2314 } 2315 2316 2317 if (baridx > 2 || baridx < 0) { 2318 printf("dior: unknown bar idx %d\n", baridx); 2319 return (0); 2320 } 2321 2322 return (value); 2323 } 2324 2325 #ifdef BHYVE_SNAPSHOT 2326 int 2327 pci_emul_snapshot(struct vm_snapshot_meta *meta) 2328 { 2329 2330 return (0); 2331 } 2332 #endif 2333 2334 struct pci_devemu pci_dummy = { 2335 .pe_emu = "dummy", 2336 .pe_init = pci_emul_dinit, 2337 .pe_barwrite = pci_emul_diow, 2338 .pe_barread = pci_emul_dior, 2339 #ifdef BHYVE_SNAPSHOT 2340 .pe_snapshot = pci_emul_snapshot, 2341 #endif 2342 }; 2343 PCI_EMUL_SET(pci_dummy); 2344 2345 #endif /* PCI_EMUL_TEST */ 2346