1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/linker_set.h> 34 #include <sys/errno.h> 35 36 #include <ctype.h> 37 #include <pthread.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <strings.h> 42 #include <assert.h> 43 #include <stdbool.h> 44 45 #include <machine/vmm.h> 46 #include <vmmapi.h> 47 48 #include "acpi.h" 49 #include "bhyverun.h" 50 #include "inout.h" 51 #include "ioapic.h" 52 #include "mem.h" 53 #include "pci_emul.h" 54 #include "pci_irq.h" 55 #include "pci_lpc.h" 56 57 #define CONF1_ADDR_PORT 0x0cf8 58 #define CONF1_DATA_PORT 0x0cfc 59 60 #define CONF1_ENABLE 0x80000000ul 61 62 #define CFGWRITE(pi,off,val,b) \ 63 do { \ 64 if ((b) == 1) { \ 65 pci_set_cfgdata8((pi),(off),(val)); \ 66 } else if ((b) == 2) { \ 67 pci_set_cfgdata16((pi),(off),(val)); \ 68 } else { \ 69 pci_set_cfgdata32((pi),(off),(val)); \ 70 } \ 71 } while (0) 72 73 #define MAXBUSES (PCI_BUSMAX + 1) 74 #define MAXSLOTS (PCI_SLOTMAX + 1) 75 #define MAXFUNCS (PCI_FUNCMAX + 1) 76 77 struct funcinfo { 78 char *fi_name; 79 char *fi_param; 80 struct pci_devinst *fi_devi; 81 }; 82 83 struct intxinfo { 84 int ii_count; 85 int ii_pirq_pin; 86 int ii_ioapic_irq; 87 }; 88 89 struct slotinfo { 90 struct intxinfo si_intpins[4]; 91 struct funcinfo si_funcs[MAXFUNCS]; 92 }; 93 94 struct businfo { 95 uint16_t iobase, iolimit; /* I/O window */ 96 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 97 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 98 struct slotinfo slotinfo[MAXSLOTS]; 99 }; 100 101 static struct businfo *pci_businfo[MAXBUSES]; 102 103 SET_DECLARE(pci_devemu_set, struct pci_devemu); 104 105 static uint64_t pci_emul_iobase; 106 static uint64_t pci_emul_membase32; 107 static uint64_t pci_emul_membase64; 108 109 #define PCI_EMUL_IOBASE 0x2000 110 #define PCI_EMUL_IOLIMIT 0x10000 111 112 #define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ 113 114 #define PCI_EMUL_MEMBASE64 0xD000000000UL 115 #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL 116 117 static struct pci_devemu *pci_emul_finddev(char *name); 118 static void pci_lintr_route(struct pci_devinst *pi); 119 static void pci_lintr_update(struct pci_devinst *pi); 120 121 static struct mem_range pci_mem_hole; 122 123 /* 124 * I/O access 125 */ 126 127 /* 128 * Slot options are in the form: 129 * 130 * <bus>:<slot>:<func>,<emul>[,<config>] 131 * <slot>[:<func>],<emul>[,<config>] 132 * 133 * slot is 0..31 134 * func is 0..7 135 * emul is a string describing the type of PCI device e.g. virtio-net 136 * config is an optional string, depending on the device, that can be 137 * used for configuration. 138 * Examples are: 139 * 1,virtio-net,tap0 140 * 3:0,dummy 141 */ 142 static void 143 pci_parse_slot_usage(char *aopt) 144 { 145 146 fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt); 147 } 148 149 int 150 pci_parse_slot(char *opt) 151 { 152 struct businfo *bi; 153 struct slotinfo *si; 154 char *emul, *config, *str, *cp; 155 int error, bnum, snum, fnum; 156 157 error = -1; 158 str = strdup(opt); 159 160 emul = config = NULL; 161 if ((cp = strchr(str, ',')) != NULL) { 162 *cp = '\0'; 163 emul = cp + 1; 164 if ((cp = strchr(emul, ',')) != NULL) { 165 *cp = '\0'; 166 config = cp + 1; 167 } 168 } else { 169 pci_parse_slot_usage(opt); 170 goto done; 171 } 172 173 /* <bus>:<slot>:<func> */ 174 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 175 bnum = 0; 176 /* <slot>:<func> */ 177 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 178 fnum = 0; 179 /* <slot> */ 180 if (sscanf(str, "%d", &snum) != 1) { 181 snum = -1; 182 } 183 } 184 } 185 186 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 187 fnum < 0 || fnum >= MAXFUNCS) { 188 pci_parse_slot_usage(opt); 189 goto done; 190 } 191 192 if (pci_businfo[bnum] == NULL) 193 pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); 194 195 bi = pci_businfo[bnum]; 196 si = &bi->slotinfo[snum]; 197 198 if (si->si_funcs[fnum].fi_name != NULL) { 199 fprintf(stderr, "pci slot %d:%d already occupied!\n", 200 snum, fnum); 201 goto done; 202 } 203 204 if (pci_emul_finddev(emul) == NULL) { 205 fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n", 206 snum, fnum, emul); 207 goto done; 208 } 209 210 error = 0; 211 si->si_funcs[fnum].fi_name = emul; 212 si->si_funcs[fnum].fi_param = config; 213 214 done: 215 if (error) 216 free(str); 217 218 return (error); 219 } 220 221 static int 222 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 223 { 224 225 if (offset < pi->pi_msix.pba_offset) 226 return (0); 227 228 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 229 return (0); 230 } 231 232 return (1); 233 } 234 235 int 236 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 237 uint64_t value) 238 { 239 int msix_entry_offset; 240 int tab_index; 241 char *dest; 242 243 /* support only 4 or 8 byte writes */ 244 if (size != 4 && size != 8) 245 return (-1); 246 247 /* 248 * Return if table index is beyond what device supports 249 */ 250 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 251 if (tab_index >= pi->pi_msix.table_count) 252 return (-1); 253 254 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 255 256 /* support only aligned writes */ 257 if ((msix_entry_offset % size) != 0) 258 return (-1); 259 260 dest = (char *)(pi->pi_msix.table + tab_index); 261 dest += msix_entry_offset; 262 263 if (size == 4) 264 *((uint32_t *)dest) = value; 265 else 266 *((uint64_t *)dest) = value; 267 268 return (0); 269 } 270 271 uint64_t 272 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 273 { 274 char *dest; 275 int msix_entry_offset; 276 int tab_index; 277 uint64_t retval = ~0; 278 279 /* 280 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 281 * table but we also allow 1 byte access to accomodate reads from 282 * ddb. 283 */ 284 if (size != 1 && size != 4 && size != 8) 285 return (retval); 286 287 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 288 289 /* support only aligned reads */ 290 if ((msix_entry_offset % size) != 0) { 291 return (retval); 292 } 293 294 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 295 296 if (tab_index < pi->pi_msix.table_count) { 297 /* valid MSI-X Table access */ 298 dest = (char *)(pi->pi_msix.table + tab_index); 299 dest += msix_entry_offset; 300 301 if (size == 1) 302 retval = *((uint8_t *)dest); 303 else if (size == 4) 304 retval = *((uint32_t *)dest); 305 else 306 retval = *((uint64_t *)dest); 307 } else if (pci_valid_pba_offset(pi, offset)) { 308 /* return 0 for PBA access */ 309 retval = 0; 310 } 311 312 return (retval); 313 } 314 315 int 316 pci_msix_table_bar(struct pci_devinst *pi) 317 { 318 319 if (pi->pi_msix.table != NULL) 320 return (pi->pi_msix.table_bar); 321 else 322 return (-1); 323 } 324 325 int 326 pci_msix_pba_bar(struct pci_devinst *pi) 327 { 328 329 if (pi->pi_msix.table != NULL) 330 return (pi->pi_msix.pba_bar); 331 else 332 return (-1); 333 } 334 335 static int 336 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 337 uint32_t *eax, void *arg) 338 { 339 struct pci_devinst *pdi = arg; 340 struct pci_devemu *pe = pdi->pi_d; 341 uint64_t offset; 342 int i; 343 344 for (i = 0; i <= PCI_BARMAX; i++) { 345 if (pdi->pi_bar[i].type == PCIBAR_IO && 346 port >= pdi->pi_bar[i].addr && 347 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 348 offset = port - pdi->pi_bar[i].addr; 349 if (in) 350 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 351 offset, bytes); 352 else 353 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 354 bytes, *eax); 355 return (0); 356 } 357 } 358 return (-1); 359 } 360 361 static int 362 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 363 int size, uint64_t *val, void *arg1, long arg2) 364 { 365 struct pci_devinst *pdi = arg1; 366 struct pci_devemu *pe = pdi->pi_d; 367 uint64_t offset; 368 int bidx = (int) arg2; 369 370 assert(bidx <= PCI_BARMAX); 371 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 372 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 373 assert(addr >= pdi->pi_bar[bidx].addr && 374 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 375 376 offset = addr - pdi->pi_bar[bidx].addr; 377 378 if (dir == MEM_F_WRITE) { 379 if (size == 8) { 380 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 381 4, *val & 0xffffffff); 382 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 383 4, *val >> 32); 384 } else { 385 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 386 size, *val); 387 } 388 } else { 389 if (size == 8) { 390 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 391 offset, 4); 392 *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 393 offset + 4, 4) << 32; 394 } else { 395 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, 396 offset, size); 397 } 398 } 399 400 return (0); 401 } 402 403 404 static int 405 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 406 uint64_t *addr) 407 { 408 uint64_t base; 409 410 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 411 412 base = roundup2(*baseptr, size); 413 414 if (base + size <= limit) { 415 *addr = base; 416 *baseptr = base + size; 417 return (0); 418 } else 419 return (-1); 420 } 421 422 int 423 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 424 uint64_t size) 425 { 426 427 return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); 428 } 429 430 /* 431 * Register (or unregister) the MMIO or I/O region associated with the BAR 432 * register 'idx' of an emulated pci device. 433 */ 434 static void 435 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 436 { 437 int error; 438 struct inout_port iop; 439 struct mem_range mr; 440 441 switch (pi->pi_bar[idx].type) { 442 case PCIBAR_IO: 443 bzero(&iop, sizeof(struct inout_port)); 444 iop.name = pi->pi_name; 445 iop.port = pi->pi_bar[idx].addr; 446 iop.size = pi->pi_bar[idx].size; 447 if (registration) { 448 iop.flags = IOPORT_F_INOUT; 449 iop.handler = pci_emul_io_handler; 450 iop.arg = pi; 451 error = register_inout(&iop); 452 } else 453 error = unregister_inout(&iop); 454 break; 455 case PCIBAR_MEM32: 456 case PCIBAR_MEM64: 457 bzero(&mr, sizeof(struct mem_range)); 458 mr.name = pi->pi_name; 459 mr.base = pi->pi_bar[idx].addr; 460 mr.size = pi->pi_bar[idx].size; 461 if (registration) { 462 mr.flags = MEM_F_RW; 463 mr.handler = pci_emul_mem_handler; 464 mr.arg1 = pi; 465 mr.arg2 = idx; 466 error = register_mem(&mr); 467 } else 468 error = unregister_mem(&mr); 469 break; 470 default: 471 error = EINVAL; 472 break; 473 } 474 assert(error == 0); 475 } 476 477 static void 478 unregister_bar(struct pci_devinst *pi, int idx) 479 { 480 481 modify_bar_registration(pi, idx, 0); 482 } 483 484 static void 485 register_bar(struct pci_devinst *pi, int idx) 486 { 487 488 modify_bar_registration(pi, idx, 1); 489 } 490 491 /* Are we decoding i/o port accesses for the emulated pci device? */ 492 static int 493 porten(struct pci_devinst *pi) 494 { 495 uint16_t cmd; 496 497 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 498 499 return (cmd & PCIM_CMD_PORTEN); 500 } 501 502 /* Are we decoding memory accesses for the emulated pci device? */ 503 static int 504 memen(struct pci_devinst *pi) 505 { 506 uint16_t cmd; 507 508 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 509 510 return (cmd & PCIM_CMD_MEMEN); 511 } 512 513 /* 514 * Update the MMIO or I/O address that is decoded by the BAR register. 515 * 516 * If the pci device has enabled the address space decoding then intercept 517 * the address range decoded by the BAR register. 518 */ 519 static void 520 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 521 { 522 int decode; 523 524 if (pi->pi_bar[idx].type == PCIBAR_IO) 525 decode = porten(pi); 526 else 527 decode = memen(pi); 528 529 if (decode) 530 unregister_bar(pi, idx); 531 532 switch (type) { 533 case PCIBAR_IO: 534 case PCIBAR_MEM32: 535 pi->pi_bar[idx].addr = addr; 536 break; 537 case PCIBAR_MEM64: 538 pi->pi_bar[idx].addr &= ~0xffffffffUL; 539 pi->pi_bar[idx].addr |= addr; 540 break; 541 case PCIBAR_MEMHI64: 542 pi->pi_bar[idx].addr &= 0xffffffff; 543 pi->pi_bar[idx].addr |= addr; 544 break; 545 default: 546 assert(0); 547 } 548 549 if (decode) 550 register_bar(pi, idx); 551 } 552 553 int 554 pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, 555 enum pcibar_type type, uint64_t size) 556 { 557 int error; 558 uint64_t *baseptr, limit, addr, mask, lobits, bar; 559 560 assert(idx >= 0 && idx <= PCI_BARMAX); 561 562 if ((size & (size - 1)) != 0) 563 size = 1UL << flsl(size); /* round up to a power of 2 */ 564 565 /* Enforce minimum BAR sizes required by the PCI standard */ 566 if (type == PCIBAR_IO) { 567 if (size < 4) 568 size = 4; 569 } else { 570 if (size < 16) 571 size = 16; 572 } 573 574 switch (type) { 575 case PCIBAR_NONE: 576 baseptr = NULL; 577 addr = mask = lobits = 0; 578 break; 579 case PCIBAR_IO: 580 baseptr = &pci_emul_iobase; 581 limit = PCI_EMUL_IOLIMIT; 582 mask = PCIM_BAR_IO_BASE; 583 lobits = PCIM_BAR_IO_SPACE; 584 break; 585 case PCIBAR_MEM64: 586 /* 587 * XXX 588 * Some drivers do not work well if the 64-bit BAR is allocated 589 * above 4GB. Allow for this by allocating small requests under 590 * 4GB unless then allocation size is larger than some arbitrary 591 * number (32MB currently). 592 */ 593 if (size > 32 * 1024 * 1024) { 594 /* 595 * XXX special case for device requiring peer-peer DMA 596 */ 597 if (size == 0x100000000UL) 598 baseptr = &hostbase; 599 else 600 baseptr = &pci_emul_membase64; 601 limit = PCI_EMUL_MEMLIMIT64; 602 mask = PCIM_BAR_MEM_BASE; 603 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 604 PCIM_BAR_MEM_PREFETCH; 605 break; 606 } else { 607 baseptr = &pci_emul_membase32; 608 limit = PCI_EMUL_MEMLIMIT32; 609 mask = PCIM_BAR_MEM_BASE; 610 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 611 } 612 break; 613 case PCIBAR_MEM32: 614 baseptr = &pci_emul_membase32; 615 limit = PCI_EMUL_MEMLIMIT32; 616 mask = PCIM_BAR_MEM_BASE; 617 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 618 break; 619 default: 620 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 621 assert(0); 622 } 623 624 if (baseptr != NULL) { 625 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 626 if (error != 0) 627 return (error); 628 } 629 630 pdi->pi_bar[idx].type = type; 631 pdi->pi_bar[idx].addr = addr; 632 pdi->pi_bar[idx].size = size; 633 634 /* Initialize the BAR register in config space */ 635 bar = (addr & mask) | lobits; 636 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 637 638 if (type == PCIBAR_MEM64) { 639 assert(idx + 1 <= PCI_BARMAX); 640 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 641 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 642 } 643 644 register_bar(pdi, idx); 645 646 return (0); 647 } 648 649 #define CAP_START_OFFSET 0x40 650 static int 651 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 652 { 653 int i, capoff, reallen; 654 uint16_t sts; 655 656 assert(caplen > 0); 657 658 reallen = roundup2(caplen, 4); /* dword aligned */ 659 660 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 661 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 662 capoff = CAP_START_OFFSET; 663 else 664 capoff = pi->pi_capend + 1; 665 666 /* Check if we have enough space */ 667 if (capoff + reallen > PCI_REGMAX + 1) 668 return (-1); 669 670 /* Set the previous capability pointer */ 671 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 672 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 673 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 674 } else 675 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 676 677 /* Copy the capability */ 678 for (i = 0; i < caplen; i++) 679 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 680 681 /* Set the next capability pointer */ 682 pci_set_cfgdata8(pi, capoff + 1, 0); 683 684 pi->pi_prevcap = capoff; 685 pi->pi_capend = capoff + reallen - 1; 686 return (0); 687 } 688 689 static struct pci_devemu * 690 pci_emul_finddev(char *name) 691 { 692 struct pci_devemu **pdpp, *pdp; 693 694 SET_FOREACH(pdpp, pci_devemu_set) { 695 pdp = *pdpp; 696 if (!strcmp(pdp->pe_emu, name)) { 697 return (pdp); 698 } 699 } 700 701 return (NULL); 702 } 703 704 static int 705 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 706 int func, struct funcinfo *fi) 707 { 708 struct pci_devinst *pdi; 709 int err; 710 711 pdi = calloc(1, sizeof(struct pci_devinst)); 712 713 pdi->pi_vmctx = ctx; 714 pdi->pi_bus = bus; 715 pdi->pi_slot = slot; 716 pdi->pi_func = func; 717 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 718 pdi->pi_lintr.pin = 0; 719 pdi->pi_lintr.state = IDLE; 720 pdi->pi_lintr.pirq_pin = 0; 721 pdi->pi_lintr.ioapic_irq = 0; 722 pdi->pi_d = pde; 723 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 724 725 /* Disable legacy interrupts */ 726 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 727 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 728 729 pci_set_cfgdata8(pdi, PCIR_COMMAND, 730 PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); 731 732 err = (*pde->pe_init)(ctx, pdi, fi->fi_param); 733 if (err == 0) 734 fi->fi_devi = pdi; 735 else 736 free(pdi); 737 738 return (err); 739 } 740 741 void 742 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 743 { 744 int mmc; 745 746 CTASSERT(sizeof(struct msicap) == 14); 747 748 /* Number of msi messages must be a power of 2 between 1 and 32 */ 749 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 750 mmc = ffs(msgnum) - 1; 751 752 bzero(msicap, sizeof(struct msicap)); 753 msicap->capid = PCIY_MSI; 754 msicap->nextptr = nextptr; 755 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 756 } 757 758 int 759 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 760 { 761 struct msicap msicap; 762 763 pci_populate_msicap(&msicap, msgnum, 0); 764 765 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 766 } 767 768 static void 769 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 770 uint32_t msix_tab_size) 771 { 772 CTASSERT(sizeof(struct msixcap) == 12); 773 774 assert(msix_tab_size % 4096 == 0); 775 776 bzero(msixcap, sizeof(struct msixcap)); 777 msixcap->capid = PCIY_MSIX; 778 779 /* 780 * Message Control Register, all fields set to 781 * zero except for the Table Size. 782 * Note: Table size N is encoded as N-1 783 */ 784 msixcap->msgctrl = msgnum - 1; 785 786 /* 787 * MSI-X BAR setup: 788 * - MSI-X table start at offset 0 789 * - PBA table starts at a 4K aligned offset after the MSI-X table 790 */ 791 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 792 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 793 } 794 795 static void 796 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 797 { 798 int i, table_size; 799 800 assert(table_entries > 0); 801 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 802 803 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 804 pi->pi_msix.table = calloc(1, table_size); 805 806 /* set mask bit of vector control register */ 807 for (i = 0; i < table_entries; i++) 808 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 809 } 810 811 int 812 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 813 { 814 uint32_t tab_size; 815 struct msixcap msixcap; 816 817 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 818 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 819 820 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 821 822 /* Align table size to nearest 4K */ 823 tab_size = roundup2(tab_size, 4096); 824 825 pi->pi_msix.table_bar = barnum; 826 pi->pi_msix.pba_bar = barnum; 827 pi->pi_msix.table_offset = 0; 828 pi->pi_msix.table_count = msgnum; 829 pi->pi_msix.pba_offset = tab_size; 830 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 831 832 pci_msix_table_init(pi, msgnum); 833 834 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 835 836 /* allocate memory for MSI-X Table and PBA */ 837 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 838 tab_size + pi->pi_msix.pba_size); 839 840 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 841 sizeof(msixcap))); 842 } 843 844 void 845 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 846 int bytes, uint32_t val) 847 { 848 uint16_t msgctrl, rwmask; 849 int off, table_bar; 850 851 off = offset - capoff; 852 table_bar = pi->pi_msix.table_bar; 853 /* Message Control Register */ 854 if (off == 2 && bytes == 2) { 855 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 856 msgctrl = pci_get_cfgdata16(pi, offset); 857 msgctrl &= ~rwmask; 858 msgctrl |= val & rwmask; 859 val = msgctrl; 860 861 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 862 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 863 pci_lintr_update(pi); 864 } 865 866 CFGWRITE(pi, offset, val, bytes); 867 } 868 869 void 870 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 871 int bytes, uint32_t val) 872 { 873 uint16_t msgctrl, rwmask, msgdata, mme; 874 uint32_t addrlo; 875 876 /* 877 * If guest is writing to the message control register make sure 878 * we do not overwrite read-only fields. 879 */ 880 if ((offset - capoff) == 2 && bytes == 2) { 881 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 882 msgctrl = pci_get_cfgdata16(pi, offset); 883 msgctrl &= ~rwmask; 884 msgctrl |= val & rwmask; 885 val = msgctrl; 886 887 addrlo = pci_get_cfgdata32(pi, capoff + 4); 888 if (msgctrl & PCIM_MSICTRL_64BIT) 889 msgdata = pci_get_cfgdata16(pi, capoff + 12); 890 else 891 msgdata = pci_get_cfgdata16(pi, capoff + 8); 892 893 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 894 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 895 if (pi->pi_msi.enabled) { 896 pi->pi_msi.addr = addrlo; 897 pi->pi_msi.msg_data = msgdata; 898 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 899 } else { 900 pi->pi_msi.maxmsgnum = 0; 901 } 902 pci_lintr_update(pi); 903 } 904 905 CFGWRITE(pi, offset, val, bytes); 906 } 907 908 void 909 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 910 int bytes, uint32_t val) 911 { 912 913 /* XXX don't write to the readonly parts */ 914 CFGWRITE(pi, offset, val, bytes); 915 } 916 917 #define PCIECAP_VERSION 0x2 918 int 919 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 920 { 921 int err; 922 struct pciecap pciecap; 923 924 CTASSERT(sizeof(struct pciecap) == 60); 925 926 if (type != PCIEM_TYPE_ROOT_PORT) 927 return (-1); 928 929 bzero(&pciecap, sizeof(pciecap)); 930 931 pciecap.capid = PCIY_EXPRESS; 932 pciecap.pcie_capabilities = PCIECAP_VERSION | PCIEM_TYPE_ROOT_PORT; 933 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 934 pciecap.link_status = 0x11; /* gen1, x1 */ 935 936 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 937 return (err); 938 } 939 940 /* 941 * This function assumes that 'coff' is in the capabilities region of the 942 * config space. 943 */ 944 static void 945 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) 946 { 947 int capid; 948 uint8_t capoff, nextoff; 949 950 /* Do not allow un-aligned writes */ 951 if ((offset & (bytes - 1)) != 0) 952 return; 953 954 /* Find the capability that we want to update */ 955 capoff = CAP_START_OFFSET; 956 while (1) { 957 nextoff = pci_get_cfgdata8(pi, capoff + 1); 958 if (nextoff == 0) 959 break; 960 if (offset >= capoff && offset < nextoff) 961 break; 962 963 capoff = nextoff; 964 } 965 assert(offset >= capoff); 966 967 /* 968 * Capability ID and Next Capability Pointer are readonly. 969 * However, some o/s's do 4-byte writes that include these. 970 * For this case, trim the write back to 2 bytes and adjust 971 * the data. 972 */ 973 if (offset == capoff || offset == capoff + 1) { 974 if (offset == capoff && bytes == 4) { 975 bytes = 2; 976 offset += 2; 977 val >>= 16; 978 } else 979 return; 980 } 981 982 capid = pci_get_cfgdata8(pi, capoff); 983 switch (capid) { 984 case PCIY_MSI: 985 msicap_cfgwrite(pi, capoff, offset, bytes, val); 986 break; 987 case PCIY_MSIX: 988 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 989 break; 990 case PCIY_EXPRESS: 991 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 992 break; 993 default: 994 break; 995 } 996 } 997 998 static int 999 pci_emul_iscap(struct pci_devinst *pi, int offset) 1000 { 1001 uint16_t sts; 1002 1003 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 1004 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 1005 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 1006 return (1); 1007 } 1008 return (0); 1009 } 1010 1011 static int 1012 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 1013 int size, uint64_t *val, void *arg1, long arg2) 1014 { 1015 /* 1016 * Ignore writes; return 0xff's for reads. The mem read code 1017 * will take care of truncating to the correct size. 1018 */ 1019 if (dir == MEM_F_READ) { 1020 *val = 0xffffffffffffffff; 1021 } 1022 1023 return (0); 1024 } 1025 1026 #define BUSIO_ROUNDUP 32 1027 #define BUSMEM_ROUNDUP (1024 * 1024) 1028 1029 int 1030 init_pci(struct vmctx *ctx) 1031 { 1032 struct pci_devemu *pde; 1033 struct businfo *bi; 1034 struct slotinfo *si; 1035 struct funcinfo *fi; 1036 size_t lowmem; 1037 int bus, slot, func; 1038 int error; 1039 1040 pci_emul_iobase = PCI_EMUL_IOBASE; 1041 pci_emul_membase32 = vm_get_lowmem_limit(ctx); 1042 pci_emul_membase64 = PCI_EMUL_MEMBASE64; 1043 1044 for (bus = 0; bus < MAXBUSES; bus++) { 1045 if ((bi = pci_businfo[bus]) == NULL) 1046 continue; 1047 /* 1048 * Keep track of the i/o and memory resources allocated to 1049 * this bus. 1050 */ 1051 bi->iobase = pci_emul_iobase; 1052 bi->membase32 = pci_emul_membase32; 1053 bi->membase64 = pci_emul_membase64; 1054 1055 for (slot = 0; slot < MAXSLOTS; slot++) { 1056 si = &bi->slotinfo[slot]; 1057 for (func = 0; func < MAXFUNCS; func++) { 1058 fi = &si->si_funcs[func]; 1059 if (fi->fi_name == NULL) 1060 continue; 1061 pde = pci_emul_finddev(fi->fi_name); 1062 assert(pde != NULL); 1063 error = pci_emul_init(ctx, pde, bus, slot, 1064 func, fi); 1065 if (error) 1066 return (error); 1067 } 1068 } 1069 1070 /* 1071 * Add some slop to the I/O and memory resources decoded by 1072 * this bus to give a guest some flexibility if it wants to 1073 * reprogram the BARs. 1074 */ 1075 pci_emul_iobase += BUSIO_ROUNDUP; 1076 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1077 bi->iolimit = pci_emul_iobase; 1078 1079 pci_emul_membase32 += BUSMEM_ROUNDUP; 1080 pci_emul_membase32 = roundup2(pci_emul_membase32, 1081 BUSMEM_ROUNDUP); 1082 bi->memlimit32 = pci_emul_membase32; 1083 1084 pci_emul_membase64 += BUSMEM_ROUNDUP; 1085 pci_emul_membase64 = roundup2(pci_emul_membase64, 1086 BUSMEM_ROUNDUP); 1087 bi->memlimit64 = pci_emul_membase64; 1088 } 1089 1090 /* 1091 * PCI backends are initialized before routing INTx interrupts 1092 * so that LPC devices are able to reserve ISA IRQs before 1093 * routing PIRQ pins. 1094 */ 1095 for (bus = 0; bus < MAXBUSES; bus++) { 1096 if ((bi = pci_businfo[bus]) == NULL) 1097 continue; 1098 1099 for (slot = 0; slot < MAXSLOTS; slot++) { 1100 si = &bi->slotinfo[slot]; 1101 for (func = 0; func < MAXFUNCS; func++) { 1102 fi = &si->si_funcs[func]; 1103 if (fi->fi_devi == NULL) 1104 continue; 1105 pci_lintr_route(fi->fi_devi); 1106 } 1107 } 1108 } 1109 lpc_pirq_routed(); 1110 1111 /* 1112 * The guest physical memory map looks like the following: 1113 * [0, lowmem) guest system memory 1114 * [lowmem, lowmem_limit) memory hole (may be absent) 1115 * [lowmem_limit, 4GB) PCI hole (32-bit BAR allocation) 1116 * [4GB, 4GB + highmem) 1117 * 1118 * Accesses to memory addresses that are not allocated to system 1119 * memory or PCI devices return 0xff's. 1120 */ 1121 lowmem = vm_get_lowmem_size(ctx); 1122 1123 memset(&pci_mem_hole, 0, sizeof(struct mem_range)); 1124 pci_mem_hole.name = "PCI hole"; 1125 pci_mem_hole.flags = MEM_F_RW; 1126 pci_mem_hole.base = lowmem; 1127 pci_mem_hole.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1128 pci_mem_hole.handler = pci_emul_fallback_handler; 1129 1130 error = register_mem_fallback(&pci_mem_hole); 1131 assert(error == 0); 1132 1133 return (0); 1134 } 1135 1136 static void 1137 pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1138 void *arg) 1139 { 1140 1141 dsdt_line(" Package ()"); 1142 dsdt_line(" {"); 1143 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1144 dsdt_line(" 0x%02X,", pin - 1); 1145 dsdt_line(" Zero,"); 1146 dsdt_line(" 0x%X", ioapic_irq); 1147 dsdt_line(" },"); 1148 } 1149 1150 static void 1151 pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, 1152 void *arg) 1153 { 1154 char *name; 1155 1156 name = lpc_pirq_name(pirq_pin); 1157 if (name == NULL) 1158 return; 1159 dsdt_line(" Package ()"); 1160 dsdt_line(" {"); 1161 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1162 dsdt_line(" 0x%02X,", pin - 1); 1163 dsdt_line(" %s,", name); 1164 dsdt_line(" 0x00"); 1165 dsdt_line(" },"); 1166 free(name); 1167 } 1168 1169 /* 1170 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1171 * corresponding to each PCI bus. 1172 */ 1173 static void 1174 pci_bus_write_dsdt(int bus) 1175 { 1176 struct businfo *bi; 1177 struct slotinfo *si; 1178 struct pci_devinst *pi; 1179 int count, func, slot; 1180 1181 /* 1182 * If there are no devices on this 'bus' then just return. 1183 */ 1184 if ((bi = pci_businfo[bus]) == NULL) { 1185 /* 1186 * Bus 0 is special because it decodes the I/O ports used 1187 * for PCI config space access even if there are no devices 1188 * on it. 1189 */ 1190 if (bus != 0) 1191 return; 1192 } 1193 1194 dsdt_line(" Device (PC%02X)", bus); 1195 dsdt_line(" {"); 1196 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1197 dsdt_line(" Name (_ADR, Zero)"); 1198 1199 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1200 dsdt_line(" {"); 1201 dsdt_line(" Return (0x%08X)", bus); 1202 dsdt_line(" }"); 1203 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1204 dsdt_line(" {"); 1205 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1206 "MaxFixed, PosDecode,"); 1207 dsdt_line(" 0x0000, // Granularity"); 1208 dsdt_line(" 0x%04X, // Range Minimum", bus); 1209 dsdt_line(" 0x%04X, // Range Maximum", bus); 1210 dsdt_line(" 0x0000, // Translation Offset"); 1211 dsdt_line(" 0x0001, // Length"); 1212 dsdt_line(" ,, )"); 1213 1214 if (bus == 0) { 1215 dsdt_indent(3); 1216 dsdt_fixed_ioport(0xCF8, 8); 1217 dsdt_unindent(3); 1218 1219 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1220 "PosDecode, EntireRange,"); 1221 dsdt_line(" 0x0000, // Granularity"); 1222 dsdt_line(" 0x0000, // Range Minimum"); 1223 dsdt_line(" 0x0CF7, // Range Maximum"); 1224 dsdt_line(" 0x0000, // Translation Offset"); 1225 dsdt_line(" 0x0CF8, // Length"); 1226 dsdt_line(" ,, , TypeStatic)"); 1227 1228 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1229 "PosDecode, EntireRange,"); 1230 dsdt_line(" 0x0000, // Granularity"); 1231 dsdt_line(" 0x0D00, // Range Minimum"); 1232 dsdt_line(" 0x%04X, // Range Maximum", 1233 PCI_EMUL_IOBASE - 1); 1234 dsdt_line(" 0x0000, // Translation Offset"); 1235 dsdt_line(" 0x%04X, // Length", 1236 PCI_EMUL_IOBASE - 0x0D00); 1237 dsdt_line(" ,, , TypeStatic)"); 1238 1239 if (bi == NULL) { 1240 dsdt_line(" })"); 1241 goto done; 1242 } 1243 } 1244 assert(bi != NULL); 1245 1246 /* i/o window */ 1247 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1248 "PosDecode, EntireRange,"); 1249 dsdt_line(" 0x0000, // Granularity"); 1250 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1251 dsdt_line(" 0x%04X, // Range Maximum", 1252 bi->iolimit - 1); 1253 dsdt_line(" 0x0000, // Translation Offset"); 1254 dsdt_line(" 0x%04X, // Length", 1255 bi->iolimit - bi->iobase); 1256 dsdt_line(" ,, , TypeStatic)"); 1257 1258 /* mmio window (32-bit) */ 1259 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1260 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1261 dsdt_line(" 0x00000000, // Granularity"); 1262 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1263 dsdt_line(" 0x%08X, // Range Maximum\n", 1264 bi->memlimit32 - 1); 1265 dsdt_line(" 0x00000000, // Translation Offset"); 1266 dsdt_line(" 0x%08X, // Length\n", 1267 bi->memlimit32 - bi->membase32); 1268 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1269 1270 /* mmio window (64-bit) */ 1271 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1272 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1273 dsdt_line(" 0x0000000000000000, // Granularity"); 1274 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1275 dsdt_line(" 0x%016lX, // Range Maximum\n", 1276 bi->memlimit64 - 1); 1277 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1278 dsdt_line(" 0x%016lX, // Length\n", 1279 bi->memlimit64 - bi->membase64); 1280 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1281 dsdt_line(" })"); 1282 1283 count = pci_count_lintr(bus); 1284 if (count != 0) { 1285 dsdt_indent(2); 1286 dsdt_line("Name (PPRT, Package ()"); 1287 dsdt_line("{"); 1288 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); 1289 dsdt_line("})"); 1290 dsdt_line("Name (APRT, Package ()"); 1291 dsdt_line("{"); 1292 pci_walk_lintr(bus, pci_apic_prt_entry, NULL); 1293 dsdt_line("})"); 1294 dsdt_line("Method (_PRT, 0, NotSerialized)"); 1295 dsdt_line("{"); 1296 dsdt_line(" If (PICM)"); 1297 dsdt_line(" {"); 1298 dsdt_line(" Return (APRT)"); 1299 dsdt_line(" }"); 1300 dsdt_line(" Else"); 1301 dsdt_line(" {"); 1302 dsdt_line(" Return (PPRT)"); 1303 dsdt_line(" }"); 1304 dsdt_line("}"); 1305 dsdt_unindent(2); 1306 } 1307 1308 dsdt_indent(2); 1309 for (slot = 0; slot < MAXSLOTS; slot++) { 1310 si = &bi->slotinfo[slot]; 1311 for (func = 0; func < MAXFUNCS; func++) { 1312 pi = si->si_funcs[func].fi_devi; 1313 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1314 pi->pi_d->pe_write_dsdt(pi); 1315 } 1316 } 1317 dsdt_unindent(2); 1318 done: 1319 dsdt_line(" }"); 1320 } 1321 1322 void 1323 pci_write_dsdt(void) 1324 { 1325 int bus; 1326 1327 dsdt_indent(1); 1328 dsdt_line("Name (PICM, 0x00)"); 1329 dsdt_line("Method (_PIC, 1, NotSerialized)"); 1330 dsdt_line("{"); 1331 dsdt_line(" Store (Arg0, PICM)"); 1332 dsdt_line("}"); 1333 dsdt_line(""); 1334 dsdt_line("Scope (_SB)"); 1335 dsdt_line("{"); 1336 for (bus = 0; bus < MAXBUSES; bus++) 1337 pci_bus_write_dsdt(bus); 1338 dsdt_line("}"); 1339 dsdt_unindent(1); 1340 } 1341 1342 int 1343 pci_bus_configured(int bus) 1344 { 1345 assert(bus >= 0 && bus < MAXBUSES); 1346 return (pci_businfo[bus] != NULL); 1347 } 1348 1349 int 1350 pci_msi_enabled(struct pci_devinst *pi) 1351 { 1352 return (pi->pi_msi.enabled); 1353 } 1354 1355 int 1356 pci_msi_maxmsgnum(struct pci_devinst *pi) 1357 { 1358 if (pi->pi_msi.enabled) 1359 return (pi->pi_msi.maxmsgnum); 1360 else 1361 return (0); 1362 } 1363 1364 int 1365 pci_msix_enabled(struct pci_devinst *pi) 1366 { 1367 1368 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1369 } 1370 1371 void 1372 pci_generate_msix(struct pci_devinst *pi, int index) 1373 { 1374 struct msix_table_entry *mte; 1375 1376 if (!pci_msix_enabled(pi)) 1377 return; 1378 1379 if (pi->pi_msix.function_mask) 1380 return; 1381 1382 if (index >= pi->pi_msix.table_count) 1383 return; 1384 1385 mte = &pi->pi_msix.table[index]; 1386 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1387 /* XXX Set PBA bit if interrupt is disabled */ 1388 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1389 } 1390 } 1391 1392 void 1393 pci_generate_msi(struct pci_devinst *pi, int index) 1394 { 1395 1396 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1397 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1398 pi->pi_msi.msg_data + index); 1399 } 1400 } 1401 1402 static bool 1403 pci_lintr_permitted(struct pci_devinst *pi) 1404 { 1405 uint16_t cmd; 1406 1407 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1408 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1409 (cmd & PCIM_CMD_INTxDIS))); 1410 } 1411 1412 void 1413 pci_lintr_request(struct pci_devinst *pi) 1414 { 1415 struct businfo *bi; 1416 struct slotinfo *si; 1417 int bestpin, bestcount, pin; 1418 1419 bi = pci_businfo[pi->pi_bus]; 1420 assert(bi != NULL); 1421 1422 /* 1423 * Just allocate a pin from our slot. The pin will be 1424 * assigned IRQs later when interrupts are routed. 1425 */ 1426 si = &bi->slotinfo[pi->pi_slot]; 1427 bestpin = 0; 1428 bestcount = si->si_intpins[0].ii_count; 1429 for (pin = 1; pin < 4; pin++) { 1430 if (si->si_intpins[pin].ii_count < bestcount) { 1431 bestpin = pin; 1432 bestcount = si->si_intpins[pin].ii_count; 1433 } 1434 } 1435 1436 si->si_intpins[bestpin].ii_count++; 1437 pi->pi_lintr.pin = bestpin + 1; 1438 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1439 } 1440 1441 static void 1442 pci_lintr_route(struct pci_devinst *pi) 1443 { 1444 struct businfo *bi; 1445 struct intxinfo *ii; 1446 1447 if (pi->pi_lintr.pin == 0) 1448 return; 1449 1450 bi = pci_businfo[pi->pi_bus]; 1451 assert(bi != NULL); 1452 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; 1453 1454 /* 1455 * Attempt to allocate an I/O APIC pin for this intpin if one 1456 * is not yet assigned. 1457 */ 1458 if (ii->ii_ioapic_irq == 0) 1459 ii->ii_ioapic_irq = ioapic_pci_alloc_irq(); 1460 assert(ii->ii_ioapic_irq > 0); 1461 1462 /* 1463 * Attempt to allocate a PIRQ pin for this intpin if one is 1464 * not yet assigned. 1465 */ 1466 if (ii->ii_pirq_pin == 0) 1467 ii->ii_pirq_pin = pirq_alloc_pin(pi->pi_vmctx); 1468 assert(ii->ii_pirq_pin > 0); 1469 1470 pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; 1471 pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; 1472 pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); 1473 } 1474 1475 void 1476 pci_lintr_assert(struct pci_devinst *pi) 1477 { 1478 1479 assert(pi->pi_lintr.pin > 0); 1480 1481 pthread_mutex_lock(&pi->pi_lintr.lock); 1482 if (pi->pi_lintr.state == IDLE) { 1483 if (pci_lintr_permitted(pi)) { 1484 pi->pi_lintr.state = ASSERTED; 1485 pci_irq_assert(pi); 1486 } else 1487 pi->pi_lintr.state = PENDING; 1488 } 1489 pthread_mutex_unlock(&pi->pi_lintr.lock); 1490 } 1491 1492 void 1493 pci_lintr_deassert(struct pci_devinst *pi) 1494 { 1495 1496 assert(pi->pi_lintr.pin > 0); 1497 1498 pthread_mutex_lock(&pi->pi_lintr.lock); 1499 if (pi->pi_lintr.state == ASSERTED) { 1500 pi->pi_lintr.state = IDLE; 1501 pci_irq_deassert(pi); 1502 } else if (pi->pi_lintr.state == PENDING) 1503 pi->pi_lintr.state = IDLE; 1504 pthread_mutex_unlock(&pi->pi_lintr.lock); 1505 } 1506 1507 static void 1508 pci_lintr_update(struct pci_devinst *pi) 1509 { 1510 1511 pthread_mutex_lock(&pi->pi_lintr.lock); 1512 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1513 pci_irq_deassert(pi); 1514 pi->pi_lintr.state = PENDING; 1515 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1516 pi->pi_lintr.state = ASSERTED; 1517 pci_irq_assert(pi); 1518 } 1519 pthread_mutex_unlock(&pi->pi_lintr.lock); 1520 } 1521 1522 int 1523 pci_count_lintr(int bus) 1524 { 1525 int count, slot, pin; 1526 struct slotinfo *slotinfo; 1527 1528 count = 0; 1529 if (pci_businfo[bus] != NULL) { 1530 for (slot = 0; slot < MAXSLOTS; slot++) { 1531 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1532 for (pin = 0; pin < 4; pin++) { 1533 if (slotinfo->si_intpins[pin].ii_count != 0) 1534 count++; 1535 } 1536 } 1537 } 1538 return (count); 1539 } 1540 1541 void 1542 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1543 { 1544 struct businfo *bi; 1545 struct slotinfo *si; 1546 struct intxinfo *ii; 1547 int slot, pin; 1548 1549 if ((bi = pci_businfo[bus]) == NULL) 1550 return; 1551 1552 for (slot = 0; slot < MAXSLOTS; slot++) { 1553 si = &bi->slotinfo[slot]; 1554 for (pin = 0; pin < 4; pin++) { 1555 ii = &si->si_intpins[pin]; 1556 if (ii->ii_count != 0) 1557 cb(bus, slot, pin + 1, ii->ii_pirq_pin, 1558 ii->ii_ioapic_irq, arg); 1559 } 1560 } 1561 } 1562 1563 /* 1564 * Return 1 if the emulated device in 'slot' is a multi-function device. 1565 * Return 0 otherwise. 1566 */ 1567 static int 1568 pci_emul_is_mfdev(int bus, int slot) 1569 { 1570 struct businfo *bi; 1571 struct slotinfo *si; 1572 int f, numfuncs; 1573 1574 numfuncs = 0; 1575 if ((bi = pci_businfo[bus]) != NULL) { 1576 si = &bi->slotinfo[slot]; 1577 for (f = 0; f < MAXFUNCS; f++) { 1578 if (si->si_funcs[f].fi_devi != NULL) { 1579 numfuncs++; 1580 } 1581 } 1582 } 1583 return (numfuncs > 1); 1584 } 1585 1586 /* 1587 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1588 * whether or not is a multi-function being emulated in the pci 'slot'. 1589 */ 1590 static void 1591 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1592 { 1593 int mfdev; 1594 1595 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1596 mfdev = pci_emul_is_mfdev(bus, slot); 1597 switch (bytes) { 1598 case 1: 1599 case 2: 1600 *rv &= ~PCIM_MFDEV; 1601 if (mfdev) { 1602 *rv |= PCIM_MFDEV; 1603 } 1604 break; 1605 case 4: 1606 *rv &= ~(PCIM_MFDEV << 16); 1607 if (mfdev) { 1608 *rv |= (PCIM_MFDEV << 16); 1609 } 1610 break; 1611 } 1612 } 1613 } 1614 1615 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 1616 1617 static int 1618 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1619 uint32_t *eax, void *arg) 1620 { 1621 uint32_t x; 1622 1623 if (bytes != 4) { 1624 if (in) 1625 *eax = (bytes == 2) ? 0xffff : 0xff; 1626 return (0); 1627 } 1628 1629 if (in) { 1630 x = (cfgbus << 16) | 1631 (cfgslot << 11) | 1632 (cfgfunc << 8) | 1633 cfgoff; 1634 if (cfgenable) 1635 x |= CONF1_ENABLE; 1636 *eax = x; 1637 } else { 1638 x = *eax; 1639 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 1640 cfgoff = x & PCI_REGMAX; 1641 cfgfunc = (x >> 8) & PCI_FUNCMAX; 1642 cfgslot = (x >> 11) & PCI_SLOTMAX; 1643 cfgbus = (x >> 16) & PCI_BUSMAX; 1644 } 1645 1646 return (0); 1647 } 1648 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 1649 1650 static uint32_t 1651 bits_changed(uint32_t old, uint32_t new, uint32_t mask) 1652 { 1653 1654 return ((old ^ new) & mask); 1655 } 1656 1657 static void 1658 pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes) 1659 { 1660 int i; 1661 uint16_t old; 1662 1663 /* 1664 * The command register is at an offset of 4 bytes and thus the 1665 * guest could write 1, 2 or 4 bytes starting at this offset. 1666 */ 1667 1668 old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1669 CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */ 1670 new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */ 1671 1672 /* 1673 * If the MMIO or I/O address space decoding has changed then 1674 * register/unregister all BARs that decode that address space. 1675 */ 1676 for (i = 0; i <= PCI_BARMAX; i++) { 1677 switch (pi->pi_bar[i].type) { 1678 case PCIBAR_NONE: 1679 case PCIBAR_MEMHI64: 1680 break; 1681 case PCIBAR_IO: 1682 /* I/O address space decoding changed? */ 1683 if (bits_changed(old, new, PCIM_CMD_PORTEN)) { 1684 if (porten(pi)) 1685 register_bar(pi, i); 1686 else 1687 unregister_bar(pi, i); 1688 } 1689 break; 1690 case PCIBAR_MEM32: 1691 case PCIBAR_MEM64: 1692 /* MMIO address space decoding changed? */ 1693 if (bits_changed(old, new, PCIM_CMD_MEMEN)) { 1694 if (memen(pi)) 1695 register_bar(pi, i); 1696 else 1697 unregister_bar(pi, i); 1698 } 1699 break; 1700 default: 1701 assert(0); 1702 } 1703 } 1704 1705 /* 1706 * If INTx has been unmasked and is pending, assert the 1707 * interrupt. 1708 */ 1709 pci_lintr_update(pi); 1710 } 1711 1712 static int 1713 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1714 uint32_t *eax, void *arg) 1715 { 1716 struct businfo *bi; 1717 struct slotinfo *si; 1718 struct pci_devinst *pi; 1719 struct pci_devemu *pe; 1720 int coff, idx, needcfg; 1721 uint64_t addr, bar, mask; 1722 1723 assert(bytes == 1 || bytes == 2 || bytes == 4); 1724 1725 if ((bi = pci_businfo[cfgbus]) != NULL) { 1726 si = &bi->slotinfo[cfgslot]; 1727 pi = si->si_funcs[cfgfunc].fi_devi; 1728 } else 1729 pi = NULL; 1730 1731 coff = cfgoff + (port - CONF1_DATA_PORT); 1732 1733 #if 0 1734 printf("pcicfg-%s from 0x%0x of %d bytes (%d/%d/%d)\n\r", 1735 in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc); 1736 #endif 1737 1738 /* 1739 * Just return if there is no device at this cfgslot:cfgfunc, 1740 * if the guest is doing an un-aligned access, or if the config 1741 * address word isn't enabled. 1742 */ 1743 if (!cfgenable || pi == NULL || (coff & (bytes - 1)) != 0) { 1744 if (in) 1745 *eax = 0xffffffff; 1746 return (0); 1747 } 1748 1749 pe = pi->pi_d; 1750 1751 /* 1752 * Config read 1753 */ 1754 if (in) { 1755 /* Let the device emulation override the default handler */ 1756 if (pe->pe_cfgread != NULL) { 1757 needcfg = pe->pe_cfgread(ctx, vcpu, pi, 1758 coff, bytes, eax); 1759 } else { 1760 needcfg = 1; 1761 } 1762 1763 if (needcfg) { 1764 if (bytes == 1) 1765 *eax = pci_get_cfgdata8(pi, coff); 1766 else if (bytes == 2) 1767 *eax = pci_get_cfgdata16(pi, coff); 1768 else 1769 *eax = pci_get_cfgdata32(pi, coff); 1770 } 1771 1772 pci_emul_hdrtype_fixup(cfgbus, cfgslot, coff, bytes, eax); 1773 } else { 1774 /* Let the device emulation override the default handler */ 1775 if (pe->pe_cfgwrite != NULL && 1776 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1777 return (0); 1778 1779 /* 1780 * Special handling for write to BAR registers 1781 */ 1782 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1783 /* 1784 * Ignore writes to BAR registers that are not 1785 * 4-byte aligned. 1786 */ 1787 if (bytes != 4 || (coff & 0x3) != 0) 1788 return (0); 1789 idx = (coff - PCIR_BAR(0)) / 4; 1790 mask = ~(pi->pi_bar[idx].size - 1); 1791 switch (pi->pi_bar[idx].type) { 1792 case PCIBAR_NONE: 1793 pi->pi_bar[idx].addr = bar = 0; 1794 break; 1795 case PCIBAR_IO: 1796 addr = *eax & mask; 1797 addr &= 0xffff; 1798 bar = addr | PCIM_BAR_IO_SPACE; 1799 /* 1800 * Register the new BAR value for interception 1801 */ 1802 if (addr != pi->pi_bar[idx].addr) { 1803 update_bar_address(pi, addr, idx, 1804 PCIBAR_IO); 1805 } 1806 break; 1807 case PCIBAR_MEM32: 1808 addr = bar = *eax & mask; 1809 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 1810 if (addr != pi->pi_bar[idx].addr) { 1811 update_bar_address(pi, addr, idx, 1812 PCIBAR_MEM32); 1813 } 1814 break; 1815 case PCIBAR_MEM64: 1816 addr = bar = *eax & mask; 1817 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 1818 PCIM_BAR_MEM_PREFETCH; 1819 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1820 update_bar_address(pi, addr, idx, 1821 PCIBAR_MEM64); 1822 } 1823 break; 1824 case PCIBAR_MEMHI64: 1825 mask = ~(pi->pi_bar[idx - 1].size - 1); 1826 addr = ((uint64_t)*eax << 32) & mask; 1827 bar = addr >> 32; 1828 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1829 update_bar_address(pi, addr, idx - 1, 1830 PCIBAR_MEMHI64); 1831 } 1832 break; 1833 default: 1834 assert(0); 1835 } 1836 pci_set_cfgdata32(pi, coff, bar); 1837 1838 } else if (pci_emul_iscap(pi, coff)) { 1839 pci_emul_capwrite(pi, coff, bytes, *eax); 1840 } else if (coff == PCIR_COMMAND) { 1841 pci_emul_cmdwrite(pi, *eax, bytes); 1842 } else { 1843 CFGWRITE(pi, coff, *eax, bytes); 1844 } 1845 } 1846 1847 return (0); 1848 } 1849 1850 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 1851 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 1852 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 1853 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 1854 1855 #define PCI_EMUL_TEST 1856 #ifdef PCI_EMUL_TEST 1857 /* 1858 * Define a dummy test device 1859 */ 1860 #define DIOSZ 8 1861 #define DMEMSZ 4096 1862 struct pci_emul_dsoftc { 1863 uint8_t ioregs[DIOSZ]; 1864 uint8_t memregs[DMEMSZ]; 1865 }; 1866 1867 #define PCI_EMUL_MSI_MSGS 4 1868 #define PCI_EMUL_MSIX_MSGS 16 1869 1870 static int 1871 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 1872 { 1873 int error; 1874 struct pci_emul_dsoftc *sc; 1875 1876 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 1877 1878 pi->pi_arg = sc; 1879 1880 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 1881 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 1882 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 1883 1884 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 1885 assert(error == 0); 1886 1887 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 1888 assert(error == 0); 1889 1890 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 1891 assert(error == 0); 1892 1893 return (0); 1894 } 1895 1896 static void 1897 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1898 uint64_t offset, int size, uint64_t value) 1899 { 1900 int i; 1901 struct pci_emul_dsoftc *sc = pi->pi_arg; 1902 1903 if (baridx == 0) { 1904 if (offset + size > DIOSZ) { 1905 printf("diow: iow too large, offset %ld size %d\n", 1906 offset, size); 1907 return; 1908 } 1909 1910 if (size == 1) { 1911 sc->ioregs[offset] = value & 0xff; 1912 } else if (size == 2) { 1913 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 1914 } else if (size == 4) { 1915 *(uint32_t *)&sc->ioregs[offset] = value; 1916 } else { 1917 printf("diow: iow unknown size %d\n", size); 1918 } 1919 1920 /* 1921 * Special magic value to generate an interrupt 1922 */ 1923 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 1924 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 1925 1926 if (value == 0xabcdef) { 1927 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 1928 pci_generate_msi(pi, i); 1929 } 1930 } 1931 1932 if (baridx == 1) { 1933 if (offset + size > DMEMSZ) { 1934 printf("diow: memw too large, offset %ld size %d\n", 1935 offset, size); 1936 return; 1937 } 1938 1939 if (size == 1) { 1940 sc->memregs[offset] = value; 1941 } else if (size == 2) { 1942 *(uint16_t *)&sc->memregs[offset] = value; 1943 } else if (size == 4) { 1944 *(uint32_t *)&sc->memregs[offset] = value; 1945 } else if (size == 8) { 1946 *(uint64_t *)&sc->memregs[offset] = value; 1947 } else { 1948 printf("diow: memw unknown size %d\n", size); 1949 } 1950 1951 /* 1952 * magic interrupt ?? 1953 */ 1954 } 1955 1956 if (baridx > 1) { 1957 printf("diow: unknown bar idx %d\n", baridx); 1958 } 1959 } 1960 1961 static uint64_t 1962 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1963 uint64_t offset, int size) 1964 { 1965 struct pci_emul_dsoftc *sc = pi->pi_arg; 1966 uint32_t value; 1967 1968 if (baridx == 0) { 1969 if (offset + size > DIOSZ) { 1970 printf("dior: ior too large, offset %ld size %d\n", 1971 offset, size); 1972 return (0); 1973 } 1974 1975 if (size == 1) { 1976 value = sc->ioregs[offset]; 1977 } else if (size == 2) { 1978 value = *(uint16_t *) &sc->ioregs[offset]; 1979 } else if (size == 4) { 1980 value = *(uint32_t *) &sc->ioregs[offset]; 1981 } else { 1982 printf("dior: ior unknown size %d\n", size); 1983 } 1984 } 1985 1986 if (baridx == 1) { 1987 if (offset + size > DMEMSZ) { 1988 printf("dior: memr too large, offset %ld size %d\n", 1989 offset, size); 1990 return (0); 1991 } 1992 1993 if (size == 1) { 1994 value = sc->memregs[offset]; 1995 } else if (size == 2) { 1996 value = *(uint16_t *) &sc->memregs[offset]; 1997 } else if (size == 4) { 1998 value = *(uint32_t *) &sc->memregs[offset]; 1999 } else if (size == 8) { 2000 value = *(uint64_t *) &sc->memregs[offset]; 2001 } else { 2002 printf("dior: ior unknown size %d\n", size); 2003 } 2004 } 2005 2006 2007 if (baridx > 1) { 2008 printf("dior: unknown bar idx %d\n", baridx); 2009 return (0); 2010 } 2011 2012 return (value); 2013 } 2014 2015 struct pci_devemu pci_dummy = { 2016 .pe_emu = "dummy", 2017 .pe_init = pci_emul_dinit, 2018 .pe_barwrite = pci_emul_diow, 2019 .pe_barread = pci_emul_dior 2020 }; 2021 PCI_EMUL_SET(pci_dummy); 2022 2023 #endif /* PCI_EMUL_TEST */ 2024