1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/linker_set.h> 34 #include <sys/errno.h> 35 36 #include <ctype.h> 37 #include <pthread.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <strings.h> 42 #include <assert.h> 43 #include <stdbool.h> 44 45 #include <machine/vmm.h> 46 #include <vmmapi.h> 47 48 #include "acpi.h" 49 #include "bhyverun.h" 50 #include "inout.h" 51 #include "ioapic.h" 52 #include "mem.h" 53 #include "pci_emul.h" 54 #include "pci_lpc.h" 55 56 #define CONF1_ADDR_PORT 0x0cf8 57 #define CONF1_DATA_PORT 0x0cfc 58 59 #define CONF1_ENABLE 0x80000000ul 60 61 #define CFGWRITE(pi,off,val,b) \ 62 do { \ 63 if ((b) == 1) { \ 64 pci_set_cfgdata8((pi),(off),(val)); \ 65 } else if ((b) == 2) { \ 66 pci_set_cfgdata16((pi),(off),(val)); \ 67 } else { \ 68 pci_set_cfgdata32((pi),(off),(val)); \ 69 } \ 70 } while (0) 71 72 #define MAXBUSES (PCI_BUSMAX + 1) 73 #define MAXSLOTS (PCI_SLOTMAX + 1) 74 #define MAXFUNCS (PCI_FUNCMAX + 1) 75 76 struct funcinfo { 77 char *fi_name; 78 char *fi_param; 79 struct pci_devinst *fi_devi; 80 }; 81 82 struct intxinfo { 83 int ii_count; 84 int ii_ioapic_irq; 85 }; 86 87 struct slotinfo { 88 struct intxinfo si_intpins[4]; 89 struct funcinfo si_funcs[MAXFUNCS]; 90 }; 91 92 struct businfo { 93 uint16_t iobase, iolimit; /* I/O window */ 94 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 95 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 96 struct slotinfo slotinfo[MAXSLOTS]; 97 }; 98 99 static struct businfo *pci_businfo[MAXBUSES]; 100 101 SET_DECLARE(pci_devemu_set, struct pci_devemu); 102 103 static uint64_t pci_emul_iobase; 104 static uint64_t pci_emul_membase32; 105 static uint64_t pci_emul_membase64; 106 107 #define PCI_EMUL_IOBASE 0x2000 108 #define PCI_EMUL_IOLIMIT 0x10000 109 110 #define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ 111 112 #define PCI_EMUL_MEMBASE64 0xD000000000UL 113 #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL 114 115 static struct pci_devemu *pci_emul_finddev(char *name); 116 static void pci_lintr_update(struct pci_devinst *pi); 117 118 static struct mem_range pci_mem_hole; 119 120 /* 121 * I/O access 122 */ 123 124 /* 125 * Slot options are in the form: 126 * 127 * <bus>:<slot>:<func>,<emul>[,<config>] 128 * <slot>[:<func>],<emul>[,<config>] 129 * 130 * slot is 0..31 131 * func is 0..7 132 * emul is a string describing the type of PCI device e.g. virtio-net 133 * config is an optional string, depending on the device, that can be 134 * used for configuration. 135 * Examples are: 136 * 1,virtio-net,tap0 137 * 3:0,dummy 138 */ 139 static void 140 pci_parse_slot_usage(char *aopt) 141 { 142 143 fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt); 144 } 145 146 int 147 pci_parse_slot(char *opt) 148 { 149 struct businfo *bi; 150 struct slotinfo *si; 151 char *emul, *config, *str, *cp; 152 int error, bnum, snum, fnum; 153 154 error = -1; 155 str = strdup(opt); 156 157 emul = config = NULL; 158 if ((cp = strchr(str, ',')) != NULL) { 159 *cp = '\0'; 160 emul = cp + 1; 161 if ((cp = strchr(emul, ',')) != NULL) { 162 *cp = '\0'; 163 config = cp + 1; 164 } 165 } else { 166 pci_parse_slot_usage(opt); 167 goto done; 168 } 169 170 /* <bus>:<slot>:<func> */ 171 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 172 bnum = 0; 173 /* <slot>:<func> */ 174 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 175 fnum = 0; 176 /* <slot> */ 177 if (sscanf(str, "%d", &snum) != 1) { 178 snum = -1; 179 } 180 } 181 } 182 183 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 184 fnum < 0 || fnum >= MAXFUNCS) { 185 pci_parse_slot_usage(opt); 186 goto done; 187 } 188 189 if (pci_businfo[bnum] == NULL) 190 pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); 191 192 bi = pci_businfo[bnum]; 193 si = &bi->slotinfo[snum]; 194 195 if (si->si_funcs[fnum].fi_name != NULL) { 196 fprintf(stderr, "pci slot %d:%d already occupied!\n", 197 snum, fnum); 198 goto done; 199 } 200 201 if (pci_emul_finddev(emul) == NULL) { 202 fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n", 203 snum, fnum, emul); 204 goto done; 205 } 206 207 error = 0; 208 si->si_funcs[fnum].fi_name = emul; 209 si->si_funcs[fnum].fi_param = config; 210 211 done: 212 if (error) 213 free(str); 214 215 return (error); 216 } 217 218 static int 219 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 220 { 221 222 if (offset < pi->pi_msix.pba_offset) 223 return (0); 224 225 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 226 return (0); 227 } 228 229 return (1); 230 } 231 232 int 233 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 234 uint64_t value) 235 { 236 int msix_entry_offset; 237 int tab_index; 238 char *dest; 239 240 /* support only 4 or 8 byte writes */ 241 if (size != 4 && size != 8) 242 return (-1); 243 244 /* 245 * Return if table index is beyond what device supports 246 */ 247 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 248 if (tab_index >= pi->pi_msix.table_count) 249 return (-1); 250 251 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 252 253 /* support only aligned writes */ 254 if ((msix_entry_offset % size) != 0) 255 return (-1); 256 257 dest = (char *)(pi->pi_msix.table + tab_index); 258 dest += msix_entry_offset; 259 260 if (size == 4) 261 *((uint32_t *)dest) = value; 262 else 263 *((uint64_t *)dest) = value; 264 265 return (0); 266 } 267 268 uint64_t 269 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 270 { 271 char *dest; 272 int msix_entry_offset; 273 int tab_index; 274 uint64_t retval = ~0; 275 276 /* 277 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 278 * table but we also allow 1 byte access to accomodate reads from 279 * ddb. 280 */ 281 if (size != 1 && size != 4 && size != 8) 282 return (retval); 283 284 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 285 286 /* support only aligned reads */ 287 if ((msix_entry_offset % size) != 0) { 288 return (retval); 289 } 290 291 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 292 293 if (tab_index < pi->pi_msix.table_count) { 294 /* valid MSI-X Table access */ 295 dest = (char *)(pi->pi_msix.table + tab_index); 296 dest += msix_entry_offset; 297 298 if (size == 1) 299 retval = *((uint8_t *)dest); 300 else if (size == 4) 301 retval = *((uint32_t *)dest); 302 else 303 retval = *((uint64_t *)dest); 304 } else if (pci_valid_pba_offset(pi, offset)) { 305 /* return 0 for PBA access */ 306 retval = 0; 307 } 308 309 return (retval); 310 } 311 312 int 313 pci_msix_table_bar(struct pci_devinst *pi) 314 { 315 316 if (pi->pi_msix.table != NULL) 317 return (pi->pi_msix.table_bar); 318 else 319 return (-1); 320 } 321 322 int 323 pci_msix_pba_bar(struct pci_devinst *pi) 324 { 325 326 if (pi->pi_msix.table != NULL) 327 return (pi->pi_msix.pba_bar); 328 else 329 return (-1); 330 } 331 332 static int 333 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 334 uint32_t *eax, void *arg) 335 { 336 struct pci_devinst *pdi = arg; 337 struct pci_devemu *pe = pdi->pi_d; 338 uint64_t offset; 339 int i; 340 341 for (i = 0; i <= PCI_BARMAX; i++) { 342 if (pdi->pi_bar[i].type == PCIBAR_IO && 343 port >= pdi->pi_bar[i].addr && 344 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 345 offset = port - pdi->pi_bar[i].addr; 346 if (in) 347 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 348 offset, bytes); 349 else 350 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 351 bytes, *eax); 352 return (0); 353 } 354 } 355 return (-1); 356 } 357 358 static int 359 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 360 int size, uint64_t *val, void *arg1, long arg2) 361 { 362 struct pci_devinst *pdi = arg1; 363 struct pci_devemu *pe = pdi->pi_d; 364 uint64_t offset; 365 int bidx = (int) arg2; 366 367 assert(bidx <= PCI_BARMAX); 368 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 369 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 370 assert(addr >= pdi->pi_bar[bidx].addr && 371 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 372 373 offset = addr - pdi->pi_bar[bidx].addr; 374 375 if (dir == MEM_F_WRITE) 376 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val); 377 else 378 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size); 379 380 return (0); 381 } 382 383 384 static int 385 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 386 uint64_t *addr) 387 { 388 uint64_t base; 389 390 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 391 392 base = roundup2(*baseptr, size); 393 394 if (base + size <= limit) { 395 *addr = base; 396 *baseptr = base + size; 397 return (0); 398 } else 399 return (-1); 400 } 401 402 int 403 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 404 uint64_t size) 405 { 406 407 return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); 408 } 409 410 /* 411 * Register (or unregister) the MMIO or I/O region associated with the BAR 412 * register 'idx' of an emulated pci device. 413 */ 414 static void 415 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 416 { 417 int error; 418 struct inout_port iop; 419 struct mem_range mr; 420 421 switch (pi->pi_bar[idx].type) { 422 case PCIBAR_IO: 423 bzero(&iop, sizeof(struct inout_port)); 424 iop.name = pi->pi_name; 425 iop.port = pi->pi_bar[idx].addr; 426 iop.size = pi->pi_bar[idx].size; 427 if (registration) { 428 iop.flags = IOPORT_F_INOUT; 429 iop.handler = pci_emul_io_handler; 430 iop.arg = pi; 431 error = register_inout(&iop); 432 } else 433 error = unregister_inout(&iop); 434 break; 435 case PCIBAR_MEM32: 436 case PCIBAR_MEM64: 437 bzero(&mr, sizeof(struct mem_range)); 438 mr.name = pi->pi_name; 439 mr.base = pi->pi_bar[idx].addr; 440 mr.size = pi->pi_bar[idx].size; 441 if (registration) { 442 mr.flags = MEM_F_RW; 443 mr.handler = pci_emul_mem_handler; 444 mr.arg1 = pi; 445 mr.arg2 = idx; 446 error = register_mem(&mr); 447 } else 448 error = unregister_mem(&mr); 449 break; 450 default: 451 error = EINVAL; 452 break; 453 } 454 assert(error == 0); 455 } 456 457 static void 458 unregister_bar(struct pci_devinst *pi, int idx) 459 { 460 461 modify_bar_registration(pi, idx, 0); 462 } 463 464 static void 465 register_bar(struct pci_devinst *pi, int idx) 466 { 467 468 modify_bar_registration(pi, idx, 1); 469 } 470 471 /* Are we decoding i/o port accesses for the emulated pci device? */ 472 static int 473 porten(struct pci_devinst *pi) 474 { 475 uint16_t cmd; 476 477 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 478 479 return (cmd & PCIM_CMD_PORTEN); 480 } 481 482 /* Are we decoding memory accesses for the emulated pci device? */ 483 static int 484 memen(struct pci_devinst *pi) 485 { 486 uint16_t cmd; 487 488 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 489 490 return (cmd & PCIM_CMD_MEMEN); 491 } 492 493 /* 494 * Update the MMIO or I/O address that is decoded by the BAR register. 495 * 496 * If the pci device has enabled the address space decoding then intercept 497 * the address range decoded by the BAR register. 498 */ 499 static void 500 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 501 { 502 int decode; 503 504 if (pi->pi_bar[idx].type == PCIBAR_IO) 505 decode = porten(pi); 506 else 507 decode = memen(pi); 508 509 if (decode) 510 unregister_bar(pi, idx); 511 512 switch (type) { 513 case PCIBAR_IO: 514 case PCIBAR_MEM32: 515 pi->pi_bar[idx].addr = addr; 516 break; 517 case PCIBAR_MEM64: 518 pi->pi_bar[idx].addr &= ~0xffffffffUL; 519 pi->pi_bar[idx].addr |= addr; 520 break; 521 case PCIBAR_MEMHI64: 522 pi->pi_bar[idx].addr &= 0xffffffff; 523 pi->pi_bar[idx].addr |= addr; 524 break; 525 default: 526 assert(0); 527 } 528 529 if (decode) 530 register_bar(pi, idx); 531 } 532 533 int 534 pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, 535 enum pcibar_type type, uint64_t size) 536 { 537 int error; 538 uint64_t *baseptr, limit, addr, mask, lobits, bar; 539 540 assert(idx >= 0 && idx <= PCI_BARMAX); 541 542 if ((size & (size - 1)) != 0) 543 size = 1UL << flsl(size); /* round up to a power of 2 */ 544 545 /* Enforce minimum BAR sizes required by the PCI standard */ 546 if (type == PCIBAR_IO) { 547 if (size < 4) 548 size = 4; 549 } else { 550 if (size < 16) 551 size = 16; 552 } 553 554 switch (type) { 555 case PCIBAR_NONE: 556 baseptr = NULL; 557 addr = mask = lobits = 0; 558 break; 559 case PCIBAR_IO: 560 baseptr = &pci_emul_iobase; 561 limit = PCI_EMUL_IOLIMIT; 562 mask = PCIM_BAR_IO_BASE; 563 lobits = PCIM_BAR_IO_SPACE; 564 break; 565 case PCIBAR_MEM64: 566 /* 567 * XXX 568 * Some drivers do not work well if the 64-bit BAR is allocated 569 * above 4GB. Allow for this by allocating small requests under 570 * 4GB unless then allocation size is larger than some arbitrary 571 * number (32MB currently). 572 */ 573 if (size > 32 * 1024 * 1024) { 574 /* 575 * XXX special case for device requiring peer-peer DMA 576 */ 577 if (size == 0x100000000UL) 578 baseptr = &hostbase; 579 else 580 baseptr = &pci_emul_membase64; 581 limit = PCI_EMUL_MEMLIMIT64; 582 mask = PCIM_BAR_MEM_BASE; 583 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 584 PCIM_BAR_MEM_PREFETCH; 585 break; 586 } else { 587 baseptr = &pci_emul_membase32; 588 limit = PCI_EMUL_MEMLIMIT32; 589 mask = PCIM_BAR_MEM_BASE; 590 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 591 } 592 break; 593 case PCIBAR_MEM32: 594 baseptr = &pci_emul_membase32; 595 limit = PCI_EMUL_MEMLIMIT32; 596 mask = PCIM_BAR_MEM_BASE; 597 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 598 break; 599 default: 600 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 601 assert(0); 602 } 603 604 if (baseptr != NULL) { 605 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 606 if (error != 0) 607 return (error); 608 } 609 610 pdi->pi_bar[idx].type = type; 611 pdi->pi_bar[idx].addr = addr; 612 pdi->pi_bar[idx].size = size; 613 614 /* Initialize the BAR register in config space */ 615 bar = (addr & mask) | lobits; 616 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 617 618 if (type == PCIBAR_MEM64) { 619 assert(idx + 1 <= PCI_BARMAX); 620 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 621 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 622 } 623 624 register_bar(pdi, idx); 625 626 return (0); 627 } 628 629 #define CAP_START_OFFSET 0x40 630 static int 631 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 632 { 633 int i, capoff, reallen; 634 uint16_t sts; 635 636 assert(caplen > 0); 637 638 reallen = roundup2(caplen, 4); /* dword aligned */ 639 640 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 641 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 642 capoff = CAP_START_OFFSET; 643 else 644 capoff = pi->pi_capend + 1; 645 646 /* Check if we have enough space */ 647 if (capoff + reallen > PCI_REGMAX + 1) 648 return (-1); 649 650 /* Set the previous capability pointer */ 651 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 652 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 653 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 654 } else 655 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 656 657 /* Copy the capability */ 658 for (i = 0; i < caplen; i++) 659 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 660 661 /* Set the next capability pointer */ 662 pci_set_cfgdata8(pi, capoff + 1, 0); 663 664 pi->pi_prevcap = capoff; 665 pi->pi_capend = capoff + reallen - 1; 666 return (0); 667 } 668 669 static struct pci_devemu * 670 pci_emul_finddev(char *name) 671 { 672 struct pci_devemu **pdpp, *pdp; 673 674 SET_FOREACH(pdpp, pci_devemu_set) { 675 pdp = *pdpp; 676 if (!strcmp(pdp->pe_emu, name)) { 677 return (pdp); 678 } 679 } 680 681 return (NULL); 682 } 683 684 static int 685 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 686 int func, struct funcinfo *fi) 687 { 688 struct pci_devinst *pdi; 689 int err; 690 691 pdi = malloc(sizeof(struct pci_devinst)); 692 bzero(pdi, sizeof(*pdi)); 693 694 pdi->pi_vmctx = ctx; 695 pdi->pi_bus = bus; 696 pdi->pi_slot = slot; 697 pdi->pi_func = func; 698 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 699 pdi->pi_lintr.pin = 0; 700 pdi->pi_lintr.state = IDLE; 701 pdi->pi_lintr.ioapic_irq = 0; 702 pdi->pi_d = pde; 703 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 704 705 /* Disable legacy interrupts */ 706 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 707 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 708 709 pci_set_cfgdata8(pdi, PCIR_COMMAND, 710 PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); 711 712 err = (*pde->pe_init)(ctx, pdi, fi->fi_param); 713 if (err == 0) 714 fi->fi_devi = pdi; 715 else 716 free(pdi); 717 718 return (err); 719 } 720 721 void 722 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 723 { 724 int mmc; 725 726 CTASSERT(sizeof(struct msicap) == 14); 727 728 /* Number of msi messages must be a power of 2 between 1 and 32 */ 729 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 730 mmc = ffs(msgnum) - 1; 731 732 bzero(msicap, sizeof(struct msicap)); 733 msicap->capid = PCIY_MSI; 734 msicap->nextptr = nextptr; 735 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 736 } 737 738 int 739 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 740 { 741 struct msicap msicap; 742 743 pci_populate_msicap(&msicap, msgnum, 0); 744 745 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 746 } 747 748 static void 749 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 750 uint32_t msix_tab_size) 751 { 752 CTASSERT(sizeof(struct msixcap) == 12); 753 754 assert(msix_tab_size % 4096 == 0); 755 756 bzero(msixcap, sizeof(struct msixcap)); 757 msixcap->capid = PCIY_MSIX; 758 759 /* 760 * Message Control Register, all fields set to 761 * zero except for the Table Size. 762 * Note: Table size N is encoded as N-1 763 */ 764 msixcap->msgctrl = msgnum - 1; 765 766 /* 767 * MSI-X BAR setup: 768 * - MSI-X table start at offset 0 769 * - PBA table starts at a 4K aligned offset after the MSI-X table 770 */ 771 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 772 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 773 } 774 775 static void 776 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 777 { 778 int i, table_size; 779 780 assert(table_entries > 0); 781 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 782 783 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 784 pi->pi_msix.table = malloc(table_size); 785 bzero(pi->pi_msix.table, table_size); 786 787 /* set mask bit of vector control register */ 788 for (i = 0; i < table_entries; i++) 789 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 790 } 791 792 int 793 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 794 { 795 uint32_t tab_size; 796 struct msixcap msixcap; 797 798 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 799 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 800 801 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 802 803 /* Align table size to nearest 4K */ 804 tab_size = roundup2(tab_size, 4096); 805 806 pi->pi_msix.table_bar = barnum; 807 pi->pi_msix.pba_bar = barnum; 808 pi->pi_msix.table_offset = 0; 809 pi->pi_msix.table_count = msgnum; 810 pi->pi_msix.pba_offset = tab_size; 811 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 812 813 pci_msix_table_init(pi, msgnum); 814 815 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 816 817 /* allocate memory for MSI-X Table and PBA */ 818 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 819 tab_size + pi->pi_msix.pba_size); 820 821 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 822 sizeof(msixcap))); 823 } 824 825 void 826 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 827 int bytes, uint32_t val) 828 { 829 uint16_t msgctrl, rwmask; 830 int off, table_bar; 831 832 off = offset - capoff; 833 table_bar = pi->pi_msix.table_bar; 834 /* Message Control Register */ 835 if (off == 2 && bytes == 2) { 836 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 837 msgctrl = pci_get_cfgdata16(pi, offset); 838 msgctrl &= ~rwmask; 839 msgctrl |= val & rwmask; 840 val = msgctrl; 841 842 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 843 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 844 pci_lintr_update(pi); 845 } 846 847 CFGWRITE(pi, offset, val, bytes); 848 } 849 850 void 851 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 852 int bytes, uint32_t val) 853 { 854 uint16_t msgctrl, rwmask, msgdata, mme; 855 uint32_t addrlo; 856 857 /* 858 * If guest is writing to the message control register make sure 859 * we do not overwrite read-only fields. 860 */ 861 if ((offset - capoff) == 2 && bytes == 2) { 862 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 863 msgctrl = pci_get_cfgdata16(pi, offset); 864 msgctrl &= ~rwmask; 865 msgctrl |= val & rwmask; 866 val = msgctrl; 867 868 addrlo = pci_get_cfgdata32(pi, capoff + 4); 869 if (msgctrl & PCIM_MSICTRL_64BIT) 870 msgdata = pci_get_cfgdata16(pi, capoff + 12); 871 else 872 msgdata = pci_get_cfgdata16(pi, capoff + 8); 873 874 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 875 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 876 if (pi->pi_msi.enabled) { 877 pi->pi_msi.addr = addrlo; 878 pi->pi_msi.msg_data = msgdata; 879 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 880 } else { 881 pi->pi_msi.maxmsgnum = 0; 882 } 883 pci_lintr_update(pi); 884 } 885 886 CFGWRITE(pi, offset, val, bytes); 887 } 888 889 void 890 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 891 int bytes, uint32_t val) 892 { 893 894 /* XXX don't write to the readonly parts */ 895 CFGWRITE(pi, offset, val, bytes); 896 } 897 898 #define PCIECAP_VERSION 0x2 899 int 900 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 901 { 902 int err; 903 struct pciecap pciecap; 904 905 CTASSERT(sizeof(struct pciecap) == 60); 906 907 if (type != PCIEM_TYPE_ROOT_PORT) 908 return (-1); 909 910 bzero(&pciecap, sizeof(pciecap)); 911 912 pciecap.capid = PCIY_EXPRESS; 913 pciecap.pcie_capabilities = PCIECAP_VERSION | PCIEM_TYPE_ROOT_PORT; 914 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 915 pciecap.link_status = 0x11; /* gen1, x1 */ 916 917 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 918 return (err); 919 } 920 921 /* 922 * This function assumes that 'coff' is in the capabilities region of the 923 * config space. 924 */ 925 static void 926 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) 927 { 928 int capid; 929 uint8_t capoff, nextoff; 930 931 /* Do not allow un-aligned writes */ 932 if ((offset & (bytes - 1)) != 0) 933 return; 934 935 /* Find the capability that we want to update */ 936 capoff = CAP_START_OFFSET; 937 while (1) { 938 nextoff = pci_get_cfgdata8(pi, capoff + 1); 939 if (nextoff == 0) 940 break; 941 if (offset >= capoff && offset < nextoff) 942 break; 943 944 capoff = nextoff; 945 } 946 assert(offset >= capoff); 947 948 /* 949 * Capability ID and Next Capability Pointer are readonly. 950 * However, some o/s's do 4-byte writes that include these. 951 * For this case, trim the write back to 2 bytes and adjust 952 * the data. 953 */ 954 if (offset == capoff || offset == capoff + 1) { 955 if (offset == capoff && bytes == 4) { 956 bytes = 2; 957 offset += 2; 958 val >>= 16; 959 } else 960 return; 961 } 962 963 capid = pci_get_cfgdata8(pi, capoff); 964 switch (capid) { 965 case PCIY_MSI: 966 msicap_cfgwrite(pi, capoff, offset, bytes, val); 967 break; 968 case PCIY_MSIX: 969 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 970 break; 971 case PCIY_EXPRESS: 972 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 973 break; 974 default: 975 break; 976 } 977 } 978 979 static int 980 pci_emul_iscap(struct pci_devinst *pi, int offset) 981 { 982 uint16_t sts; 983 984 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 985 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 986 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 987 return (1); 988 } 989 return (0); 990 } 991 992 static int 993 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 994 int size, uint64_t *val, void *arg1, long arg2) 995 { 996 /* 997 * Ignore writes; return 0xff's for reads. The mem read code 998 * will take care of truncating to the correct size. 999 */ 1000 if (dir == MEM_F_READ) { 1001 *val = 0xffffffffffffffff; 1002 } 1003 1004 return (0); 1005 } 1006 1007 #define BUSIO_ROUNDUP 32 1008 #define BUSMEM_ROUNDUP (1024 * 1024) 1009 1010 int 1011 init_pci(struct vmctx *ctx) 1012 { 1013 struct pci_devemu *pde; 1014 struct businfo *bi; 1015 struct slotinfo *si; 1016 struct funcinfo *fi; 1017 size_t lowmem; 1018 int bus, slot, func; 1019 int error; 1020 1021 pci_emul_iobase = PCI_EMUL_IOBASE; 1022 pci_emul_membase32 = vm_get_lowmem_limit(ctx); 1023 pci_emul_membase64 = PCI_EMUL_MEMBASE64; 1024 1025 for (bus = 0; bus < MAXBUSES; bus++) { 1026 if ((bi = pci_businfo[bus]) == NULL) 1027 continue; 1028 /* 1029 * Keep track of the i/o and memory resources allocated to 1030 * this bus. 1031 */ 1032 bi->iobase = pci_emul_iobase; 1033 bi->membase32 = pci_emul_membase32; 1034 bi->membase64 = pci_emul_membase64; 1035 1036 for (slot = 0; slot < MAXSLOTS; slot++) { 1037 si = &bi->slotinfo[slot]; 1038 for (func = 0; func < MAXFUNCS; func++) { 1039 fi = &si->si_funcs[func]; 1040 if (fi->fi_name == NULL) 1041 continue; 1042 pde = pci_emul_finddev(fi->fi_name); 1043 assert(pde != NULL); 1044 error = pci_emul_init(ctx, pde, bus, slot, 1045 func, fi); 1046 if (error) 1047 return (error); 1048 } 1049 } 1050 1051 /* 1052 * Add some slop to the I/O and memory resources decoded by 1053 * this bus to give a guest some flexibility if it wants to 1054 * reprogram the BARs. 1055 */ 1056 pci_emul_iobase += BUSIO_ROUNDUP; 1057 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1058 bi->iolimit = pci_emul_iobase; 1059 1060 pci_emul_membase32 += BUSMEM_ROUNDUP; 1061 pci_emul_membase32 = roundup2(pci_emul_membase32, 1062 BUSMEM_ROUNDUP); 1063 bi->memlimit32 = pci_emul_membase32; 1064 1065 pci_emul_membase64 += BUSMEM_ROUNDUP; 1066 pci_emul_membase64 = roundup2(pci_emul_membase64, 1067 BUSMEM_ROUNDUP); 1068 bi->memlimit64 = pci_emul_membase64; 1069 } 1070 1071 /* 1072 * The guest physical memory map looks like the following: 1073 * [0, lowmem) guest system memory 1074 * [lowmem, lowmem_limit) memory hole (may be absent) 1075 * [lowmem_limit, 4GB) PCI hole (32-bit BAR allocation) 1076 * [4GB, 4GB + highmem) 1077 * 1078 * Accesses to memory addresses that are not allocated to system 1079 * memory or PCI devices return 0xff's. 1080 */ 1081 error = vm_get_memory_seg(ctx, 0, &lowmem, NULL); 1082 assert(error == 0); 1083 1084 memset(&pci_mem_hole, 0, sizeof(struct mem_range)); 1085 pci_mem_hole.name = "PCI hole"; 1086 pci_mem_hole.flags = MEM_F_RW; 1087 pci_mem_hole.base = lowmem; 1088 pci_mem_hole.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1089 pci_mem_hole.handler = pci_emul_fallback_handler; 1090 1091 error = register_mem_fallback(&pci_mem_hole); 1092 assert(error == 0); 1093 1094 return (0); 1095 } 1096 1097 static void 1098 pci_prt_entry(int bus, int slot, int pin, int ioapic_irq, void *arg) 1099 { 1100 int *count; 1101 1102 count = arg; 1103 dsdt_line(" Package (0x04)"); 1104 dsdt_line(" {"); 1105 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1106 dsdt_line(" 0x%02X,", pin - 1); 1107 dsdt_line(" Zero,"); 1108 dsdt_line(" 0x%X", ioapic_irq); 1109 dsdt_line(" }%s", *count == 1 ? "" : ","); 1110 (*count)--; 1111 } 1112 1113 /* 1114 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1115 * corresponding to each PCI bus. 1116 */ 1117 static void 1118 pci_bus_write_dsdt(int bus) 1119 { 1120 struct businfo *bi; 1121 struct slotinfo *si; 1122 struct pci_devinst *pi; 1123 int count, slot, func; 1124 1125 /* 1126 * If there are no devices on this 'bus' then just return. 1127 */ 1128 if ((bi = pci_businfo[bus]) == NULL) { 1129 /* 1130 * Bus 0 is special because it decodes the I/O ports used 1131 * for PCI config space access even if there are no devices 1132 * on it. 1133 */ 1134 if (bus != 0) 1135 return; 1136 } 1137 1138 dsdt_indent(1); 1139 dsdt_line("Scope (_SB)"); 1140 dsdt_line("{"); 1141 dsdt_line(" Device (PC%02X)", bus); 1142 dsdt_line(" {"); 1143 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1144 dsdt_line(" Name (_ADR, Zero)"); 1145 1146 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1147 dsdt_line(" {"); 1148 dsdt_line(" Return (0x%08X)", bus); 1149 dsdt_line(" }"); 1150 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1151 dsdt_line(" {"); 1152 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1153 "MaxFixed, PosDecode,"); 1154 dsdt_line(" 0x0000, // Granularity"); 1155 dsdt_line(" 0x%04X, // Range Minimum", bus); 1156 dsdt_line(" 0x%04X, // Range Maximum", bus); 1157 dsdt_line(" 0x0000, // Translation Offset"); 1158 dsdt_line(" 0x0001, // Length"); 1159 dsdt_line(" ,, )"); 1160 1161 if (bus == 0) { 1162 dsdt_indent(3); 1163 dsdt_fixed_ioport(0xCF8, 8); 1164 dsdt_unindent(3); 1165 1166 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1167 "PosDecode, EntireRange,"); 1168 dsdt_line(" 0x0000, // Granularity"); 1169 dsdt_line(" 0x0000, // Range Minimum"); 1170 dsdt_line(" 0x0CF7, // Range Maximum"); 1171 dsdt_line(" 0x0000, // Translation Offset"); 1172 dsdt_line(" 0x0CF8, // Length"); 1173 dsdt_line(" ,, , TypeStatic)"); 1174 1175 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1176 "PosDecode, EntireRange,"); 1177 dsdt_line(" 0x0000, // Granularity"); 1178 dsdt_line(" 0x0D00, // Range Minimum"); 1179 dsdt_line(" 0x%04X, // Range Maximum", 1180 PCI_EMUL_IOBASE - 1); 1181 dsdt_line(" 0x0000, // Translation Offset"); 1182 dsdt_line(" 0x%04X, // Length", 1183 PCI_EMUL_IOBASE - 0x0D00); 1184 dsdt_line(" ,, , TypeStatic)"); 1185 1186 if (bi == NULL) { 1187 dsdt_line(" })"); 1188 goto done; 1189 } 1190 } 1191 assert(bi != NULL); 1192 1193 /* i/o window */ 1194 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1195 "PosDecode, EntireRange,"); 1196 dsdt_line(" 0x0000, // Granularity"); 1197 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1198 dsdt_line(" 0x%04X, // Range Maximum", 1199 bi->iolimit - 1); 1200 dsdt_line(" 0x0000, // Translation Offset"); 1201 dsdt_line(" 0x%04X, // Length", 1202 bi->iolimit - bi->iobase); 1203 dsdt_line(" ,, , TypeStatic)"); 1204 1205 /* mmio window (32-bit) */ 1206 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1207 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1208 dsdt_line(" 0x00000000, // Granularity"); 1209 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1210 dsdt_line(" 0x%08X, // Range Maximum\n", 1211 bi->memlimit32 - 1); 1212 dsdt_line(" 0x00000000, // Translation Offset"); 1213 dsdt_line(" 0x%08X, // Length\n", 1214 bi->memlimit32 - bi->membase32); 1215 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1216 1217 /* mmio window (64-bit) */ 1218 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1219 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1220 dsdt_line(" 0x0000000000000000, // Granularity"); 1221 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1222 dsdt_line(" 0x%016lX, // Range Maximum\n", 1223 bi->memlimit64 - 1); 1224 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1225 dsdt_line(" 0x%016lX, // Length\n", 1226 bi->memlimit64 - bi->membase64); 1227 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1228 dsdt_line(" })"); 1229 1230 count = pci_count_lintr(bus); 1231 if (count != 0) { 1232 dsdt_indent(2); 1233 dsdt_line("Name (_PRT, Package (0x%02X)", count); 1234 dsdt_line("{"); 1235 pci_walk_lintr(bus, pci_prt_entry, &count); 1236 dsdt_line("})"); 1237 dsdt_unindent(2); 1238 } 1239 1240 dsdt_indent(2); 1241 for (slot = 0; slot < MAXSLOTS; slot++) { 1242 si = &bi->slotinfo[slot]; 1243 for (func = 0; func < MAXFUNCS; func++) { 1244 pi = si->si_funcs[func].fi_devi; 1245 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1246 pi->pi_d->pe_write_dsdt(pi); 1247 } 1248 } 1249 dsdt_unindent(2); 1250 done: 1251 dsdt_line(" }"); 1252 dsdt_line("}"); 1253 dsdt_unindent(1); 1254 } 1255 1256 void 1257 pci_write_dsdt(void) 1258 { 1259 int bus; 1260 1261 for (bus = 0; bus < MAXBUSES; bus++) 1262 pci_bus_write_dsdt(bus); 1263 } 1264 1265 int 1266 pci_msi_enabled(struct pci_devinst *pi) 1267 { 1268 return (pi->pi_msi.enabled); 1269 } 1270 1271 int 1272 pci_msi_maxmsgnum(struct pci_devinst *pi) 1273 { 1274 if (pi->pi_msi.enabled) 1275 return (pi->pi_msi.maxmsgnum); 1276 else 1277 return (0); 1278 } 1279 1280 int 1281 pci_msix_enabled(struct pci_devinst *pi) 1282 { 1283 1284 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1285 } 1286 1287 void 1288 pci_generate_msix(struct pci_devinst *pi, int index) 1289 { 1290 struct msix_table_entry *mte; 1291 1292 if (!pci_msix_enabled(pi)) 1293 return; 1294 1295 if (pi->pi_msix.function_mask) 1296 return; 1297 1298 if (index >= pi->pi_msix.table_count) 1299 return; 1300 1301 mte = &pi->pi_msix.table[index]; 1302 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1303 /* XXX Set PBA bit if interrupt is disabled */ 1304 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1305 } 1306 } 1307 1308 void 1309 pci_generate_msi(struct pci_devinst *pi, int index) 1310 { 1311 1312 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1313 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1314 pi->pi_msi.msg_data + index); 1315 } 1316 } 1317 1318 static bool 1319 pci_lintr_permitted(struct pci_devinst *pi) 1320 { 1321 uint16_t cmd; 1322 1323 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1324 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1325 (cmd & PCIM_CMD_INTxDIS))); 1326 } 1327 1328 int 1329 pci_lintr_request(struct pci_devinst *pi) 1330 { 1331 struct businfo *bi; 1332 struct slotinfo *si; 1333 int bestpin, bestcount, irq, pin; 1334 1335 bi = pci_businfo[pi->pi_bus]; 1336 assert(bi != NULL); 1337 1338 /* 1339 * First, allocate a pin from our slot. 1340 */ 1341 si = &bi->slotinfo[pi->pi_slot]; 1342 bestpin = 0; 1343 bestcount = si->si_intpins[0].ii_count; 1344 for (pin = 1; pin < 4; pin++) { 1345 if (si->si_intpins[pin].ii_count < bestcount) { 1346 bestpin = pin; 1347 bestcount = si->si_intpins[pin].ii_count; 1348 } 1349 } 1350 1351 /* 1352 * Attempt to allocate an I/O APIC pin for this intpin. If 1353 * 8259A support is added we will need a separate field to 1354 * assign the intpin to an input pin on the PCI interrupt 1355 * router. 1356 */ 1357 if (si->si_intpins[bestpin].ii_count == 0) { 1358 irq = ioapic_pci_alloc_irq(); 1359 if (irq < 0) 1360 return (-1); 1361 si->si_intpins[bestpin].ii_ioapic_irq = irq; 1362 } else 1363 irq = si->si_intpins[bestpin].ii_ioapic_irq; 1364 si->si_intpins[bestpin].ii_count++; 1365 1366 pi->pi_lintr.pin = bestpin + 1; 1367 pi->pi_lintr.ioapic_irq = irq; 1368 pci_set_cfgdata8(pi, PCIR_INTLINE, irq); 1369 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1370 return (0); 1371 } 1372 1373 void 1374 pci_lintr_assert(struct pci_devinst *pi) 1375 { 1376 1377 assert(pi->pi_lintr.pin > 0); 1378 1379 pthread_mutex_lock(&pi->pi_lintr.lock); 1380 if (pi->pi_lintr.state == IDLE) { 1381 if (pci_lintr_permitted(pi)) { 1382 pi->pi_lintr.state = ASSERTED; 1383 vm_ioapic_assert_irq(pi->pi_vmctx, 1384 pi->pi_lintr.ioapic_irq); 1385 } else 1386 pi->pi_lintr.state = PENDING; 1387 } 1388 pthread_mutex_unlock(&pi->pi_lintr.lock); 1389 } 1390 1391 void 1392 pci_lintr_deassert(struct pci_devinst *pi) 1393 { 1394 1395 assert(pi->pi_lintr.pin > 0); 1396 1397 pthread_mutex_lock(&pi->pi_lintr.lock); 1398 if (pi->pi_lintr.state == ASSERTED) { 1399 pi->pi_lintr.state = IDLE; 1400 vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1401 } else if (pi->pi_lintr.state == PENDING) 1402 pi->pi_lintr.state = IDLE; 1403 pthread_mutex_unlock(&pi->pi_lintr.lock); 1404 } 1405 1406 static void 1407 pci_lintr_update(struct pci_devinst *pi) 1408 { 1409 1410 pthread_mutex_lock(&pi->pi_lintr.lock); 1411 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1412 vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1413 pi->pi_lintr.state = PENDING; 1414 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1415 pi->pi_lintr.state = ASSERTED; 1416 vm_ioapic_assert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1417 } 1418 pthread_mutex_unlock(&pi->pi_lintr.lock); 1419 } 1420 1421 int 1422 pci_count_lintr(int bus) 1423 { 1424 int count, slot, pin; 1425 struct slotinfo *slotinfo; 1426 1427 count = 0; 1428 if (pci_businfo[bus] != NULL) { 1429 for (slot = 0; slot < MAXSLOTS; slot++) { 1430 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1431 for (pin = 0; pin < 4; pin++) { 1432 if (slotinfo->si_intpins[pin].ii_count != 0) 1433 count++; 1434 } 1435 } 1436 } 1437 return (count); 1438 } 1439 1440 void 1441 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1442 { 1443 struct businfo *bi; 1444 struct slotinfo *si; 1445 struct intxinfo *ii; 1446 int slot, pin; 1447 1448 if ((bi = pci_businfo[bus]) == NULL) 1449 return; 1450 1451 for (slot = 0; slot < MAXSLOTS; slot++) { 1452 si = &bi->slotinfo[slot]; 1453 for (pin = 0; pin < 4; pin++) { 1454 ii = &si->si_intpins[pin]; 1455 if (ii->ii_count != 0) 1456 cb(bus, slot, pin + 1, ii->ii_ioapic_irq, arg); 1457 } 1458 } 1459 } 1460 1461 /* 1462 * Return 1 if the emulated device in 'slot' is a multi-function device. 1463 * Return 0 otherwise. 1464 */ 1465 static int 1466 pci_emul_is_mfdev(int bus, int slot) 1467 { 1468 struct businfo *bi; 1469 struct slotinfo *si; 1470 int f, numfuncs; 1471 1472 numfuncs = 0; 1473 if ((bi = pci_businfo[bus]) != NULL) { 1474 si = &bi->slotinfo[slot]; 1475 for (f = 0; f < MAXFUNCS; f++) { 1476 if (si->si_funcs[f].fi_devi != NULL) { 1477 numfuncs++; 1478 } 1479 } 1480 } 1481 return (numfuncs > 1); 1482 } 1483 1484 /* 1485 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1486 * whether or not is a multi-function being emulated in the pci 'slot'. 1487 */ 1488 static void 1489 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1490 { 1491 int mfdev; 1492 1493 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1494 mfdev = pci_emul_is_mfdev(bus, slot); 1495 switch (bytes) { 1496 case 1: 1497 case 2: 1498 *rv &= ~PCIM_MFDEV; 1499 if (mfdev) { 1500 *rv |= PCIM_MFDEV; 1501 } 1502 break; 1503 case 4: 1504 *rv &= ~(PCIM_MFDEV << 16); 1505 if (mfdev) { 1506 *rv |= (PCIM_MFDEV << 16); 1507 } 1508 break; 1509 } 1510 } 1511 } 1512 1513 static int cfgbus, cfgslot, cfgfunc, cfgoff; 1514 1515 static int 1516 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1517 uint32_t *eax, void *arg) 1518 { 1519 uint32_t x; 1520 1521 if (bytes != 4) { 1522 if (in) 1523 *eax = (bytes == 2) ? 0xffff : 0xff; 1524 return (0); 1525 } 1526 1527 if (in) { 1528 x = (cfgbus << 16) | 1529 (cfgslot << 11) | 1530 (cfgfunc << 8) | 1531 cfgoff; 1532 *eax = x | CONF1_ENABLE; 1533 } else { 1534 x = *eax; 1535 cfgoff = x & PCI_REGMAX; 1536 cfgfunc = (x >> 8) & PCI_FUNCMAX; 1537 cfgslot = (x >> 11) & PCI_SLOTMAX; 1538 cfgbus = (x >> 16) & PCI_BUSMAX; 1539 } 1540 1541 return (0); 1542 } 1543 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 1544 1545 static uint32_t 1546 bits_changed(uint32_t old, uint32_t new, uint32_t mask) 1547 { 1548 1549 return ((old ^ new) & mask); 1550 } 1551 1552 static void 1553 pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes) 1554 { 1555 int i; 1556 uint16_t old; 1557 1558 /* 1559 * The command register is at an offset of 4 bytes and thus the 1560 * guest could write 1, 2 or 4 bytes starting at this offset. 1561 */ 1562 1563 old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1564 CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */ 1565 new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */ 1566 1567 /* 1568 * If the MMIO or I/O address space decoding has changed then 1569 * register/unregister all BARs that decode that address space. 1570 */ 1571 for (i = 0; i <= PCI_BARMAX; i++) { 1572 switch (pi->pi_bar[i].type) { 1573 case PCIBAR_NONE: 1574 case PCIBAR_MEMHI64: 1575 break; 1576 case PCIBAR_IO: 1577 /* I/O address space decoding changed? */ 1578 if (bits_changed(old, new, PCIM_CMD_PORTEN)) { 1579 if (porten(pi)) 1580 register_bar(pi, i); 1581 else 1582 unregister_bar(pi, i); 1583 } 1584 break; 1585 case PCIBAR_MEM32: 1586 case PCIBAR_MEM64: 1587 /* MMIO address space decoding changed? */ 1588 if (bits_changed(old, new, PCIM_CMD_MEMEN)) { 1589 if (memen(pi)) 1590 register_bar(pi, i); 1591 else 1592 unregister_bar(pi, i); 1593 } 1594 break; 1595 default: 1596 assert(0); 1597 } 1598 } 1599 1600 /* 1601 * If INTx has been unmasked and is pending, assert the 1602 * interrupt. 1603 */ 1604 pci_lintr_update(pi); 1605 } 1606 1607 static int 1608 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1609 uint32_t *eax, void *arg) 1610 { 1611 struct businfo *bi; 1612 struct slotinfo *si; 1613 struct pci_devinst *pi; 1614 struct pci_devemu *pe; 1615 int coff, idx, needcfg; 1616 uint64_t addr, bar, mask; 1617 1618 assert(bytes == 1 || bytes == 2 || bytes == 4); 1619 1620 if ((bi = pci_businfo[cfgbus]) != NULL) { 1621 si = &bi->slotinfo[cfgslot]; 1622 pi = si->si_funcs[cfgfunc].fi_devi; 1623 } else 1624 pi = NULL; 1625 1626 coff = cfgoff + (port - CONF1_DATA_PORT); 1627 1628 #if 0 1629 printf("pcicfg-%s from 0x%0x of %d bytes (%d/%d/%d)\n\r", 1630 in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc); 1631 #endif 1632 1633 /* 1634 * Just return if there is no device at this cfgslot:cfgfunc or 1635 * if the guest is doing an un-aligned access 1636 */ 1637 if (pi == NULL || (coff & (bytes - 1)) != 0) { 1638 if (in) 1639 *eax = 0xffffffff; 1640 return (0); 1641 } 1642 1643 pe = pi->pi_d; 1644 1645 /* 1646 * Config read 1647 */ 1648 if (in) { 1649 /* Let the device emulation override the default handler */ 1650 if (pe->pe_cfgread != NULL) { 1651 needcfg = pe->pe_cfgread(ctx, vcpu, pi, 1652 coff, bytes, eax); 1653 } else { 1654 needcfg = 1; 1655 } 1656 1657 if (needcfg) { 1658 if (bytes == 1) 1659 *eax = pci_get_cfgdata8(pi, coff); 1660 else if (bytes == 2) 1661 *eax = pci_get_cfgdata16(pi, coff); 1662 else 1663 *eax = pci_get_cfgdata32(pi, coff); 1664 } 1665 1666 pci_emul_hdrtype_fixup(cfgbus, cfgslot, coff, bytes, eax); 1667 } else { 1668 /* Let the device emulation override the default handler */ 1669 if (pe->pe_cfgwrite != NULL && 1670 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1671 return (0); 1672 1673 /* 1674 * Special handling for write to BAR registers 1675 */ 1676 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1677 /* 1678 * Ignore writes to BAR registers that are not 1679 * 4-byte aligned. 1680 */ 1681 if (bytes != 4 || (coff & 0x3) != 0) 1682 return (0); 1683 idx = (coff - PCIR_BAR(0)) / 4; 1684 mask = ~(pi->pi_bar[idx].size - 1); 1685 switch (pi->pi_bar[idx].type) { 1686 case PCIBAR_NONE: 1687 pi->pi_bar[idx].addr = bar = 0; 1688 break; 1689 case PCIBAR_IO: 1690 addr = *eax & mask; 1691 addr &= 0xffff; 1692 bar = addr | PCIM_BAR_IO_SPACE; 1693 /* 1694 * Register the new BAR value for interception 1695 */ 1696 if (addr != pi->pi_bar[idx].addr) { 1697 update_bar_address(pi, addr, idx, 1698 PCIBAR_IO); 1699 } 1700 break; 1701 case PCIBAR_MEM32: 1702 addr = bar = *eax & mask; 1703 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 1704 if (addr != pi->pi_bar[idx].addr) { 1705 update_bar_address(pi, addr, idx, 1706 PCIBAR_MEM32); 1707 } 1708 break; 1709 case PCIBAR_MEM64: 1710 addr = bar = *eax & mask; 1711 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 1712 PCIM_BAR_MEM_PREFETCH; 1713 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1714 update_bar_address(pi, addr, idx, 1715 PCIBAR_MEM64); 1716 } 1717 break; 1718 case PCIBAR_MEMHI64: 1719 mask = ~(pi->pi_bar[idx - 1].size - 1); 1720 addr = ((uint64_t)*eax << 32) & mask; 1721 bar = addr >> 32; 1722 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1723 update_bar_address(pi, addr, idx - 1, 1724 PCIBAR_MEMHI64); 1725 } 1726 break; 1727 default: 1728 assert(0); 1729 } 1730 pci_set_cfgdata32(pi, coff, bar); 1731 1732 } else if (pci_emul_iscap(pi, coff)) { 1733 pci_emul_capwrite(pi, coff, bytes, *eax); 1734 } else if (coff == PCIR_COMMAND) { 1735 pci_emul_cmdwrite(pi, *eax, bytes); 1736 } else { 1737 CFGWRITE(pi, coff, *eax, bytes); 1738 } 1739 } 1740 1741 return (0); 1742 } 1743 1744 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 1745 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 1746 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 1747 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 1748 1749 /* 1750 * I/O ports to configure PCI IRQ routing. We ignore all writes to it. 1751 */ 1752 static int 1753 pci_irq_port_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1754 uint32_t *eax, void *arg) 1755 { 1756 assert(in == 0); 1757 return (0); 1758 } 1759 INOUT_PORT(pci_irq, 0xC00, IOPORT_F_OUT, pci_irq_port_handler); 1760 INOUT_PORT(pci_irq, 0xC01, IOPORT_F_OUT, pci_irq_port_handler); 1761 SYSRES_IO(0xC00, 2); 1762 1763 #define PCI_EMUL_TEST 1764 #ifdef PCI_EMUL_TEST 1765 /* 1766 * Define a dummy test device 1767 */ 1768 #define DIOSZ 8 1769 #define DMEMSZ 4096 1770 struct pci_emul_dsoftc { 1771 uint8_t ioregs[DIOSZ]; 1772 uint8_t memregs[DMEMSZ]; 1773 }; 1774 1775 #define PCI_EMUL_MSI_MSGS 4 1776 #define PCI_EMUL_MSIX_MSGS 16 1777 1778 static int 1779 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 1780 { 1781 int error; 1782 struct pci_emul_dsoftc *sc; 1783 1784 sc = malloc(sizeof(struct pci_emul_dsoftc)); 1785 memset(sc, 0, sizeof(struct pci_emul_dsoftc)); 1786 1787 pi->pi_arg = sc; 1788 1789 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 1790 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 1791 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 1792 1793 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 1794 assert(error == 0); 1795 1796 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 1797 assert(error == 0); 1798 1799 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 1800 assert(error == 0); 1801 1802 return (0); 1803 } 1804 1805 static void 1806 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1807 uint64_t offset, int size, uint64_t value) 1808 { 1809 int i; 1810 struct pci_emul_dsoftc *sc = pi->pi_arg; 1811 1812 if (baridx == 0) { 1813 if (offset + size > DIOSZ) { 1814 printf("diow: iow too large, offset %ld size %d\n", 1815 offset, size); 1816 return; 1817 } 1818 1819 if (size == 1) { 1820 sc->ioregs[offset] = value & 0xff; 1821 } else if (size == 2) { 1822 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 1823 } else if (size == 4) { 1824 *(uint32_t *)&sc->ioregs[offset] = value; 1825 } else { 1826 printf("diow: iow unknown size %d\n", size); 1827 } 1828 1829 /* 1830 * Special magic value to generate an interrupt 1831 */ 1832 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 1833 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 1834 1835 if (value == 0xabcdef) { 1836 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 1837 pci_generate_msi(pi, i); 1838 } 1839 } 1840 1841 if (baridx == 1) { 1842 if (offset + size > DMEMSZ) { 1843 printf("diow: memw too large, offset %ld size %d\n", 1844 offset, size); 1845 return; 1846 } 1847 1848 if (size == 1) { 1849 sc->memregs[offset] = value; 1850 } else if (size == 2) { 1851 *(uint16_t *)&sc->memregs[offset] = value; 1852 } else if (size == 4) { 1853 *(uint32_t *)&sc->memregs[offset] = value; 1854 } else if (size == 8) { 1855 *(uint64_t *)&sc->memregs[offset] = value; 1856 } else { 1857 printf("diow: memw unknown size %d\n", size); 1858 } 1859 1860 /* 1861 * magic interrupt ?? 1862 */ 1863 } 1864 1865 if (baridx > 1) { 1866 printf("diow: unknown bar idx %d\n", baridx); 1867 } 1868 } 1869 1870 static uint64_t 1871 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1872 uint64_t offset, int size) 1873 { 1874 struct pci_emul_dsoftc *sc = pi->pi_arg; 1875 uint32_t value; 1876 1877 if (baridx == 0) { 1878 if (offset + size > DIOSZ) { 1879 printf("dior: ior too large, offset %ld size %d\n", 1880 offset, size); 1881 return (0); 1882 } 1883 1884 if (size == 1) { 1885 value = sc->ioregs[offset]; 1886 } else if (size == 2) { 1887 value = *(uint16_t *) &sc->ioregs[offset]; 1888 } else if (size == 4) { 1889 value = *(uint32_t *) &sc->ioregs[offset]; 1890 } else { 1891 printf("dior: ior unknown size %d\n", size); 1892 } 1893 } 1894 1895 if (baridx == 1) { 1896 if (offset + size > DMEMSZ) { 1897 printf("dior: memr too large, offset %ld size %d\n", 1898 offset, size); 1899 return (0); 1900 } 1901 1902 if (size == 1) { 1903 value = sc->memregs[offset]; 1904 } else if (size == 2) { 1905 value = *(uint16_t *) &sc->memregs[offset]; 1906 } else if (size == 4) { 1907 value = *(uint32_t *) &sc->memregs[offset]; 1908 } else if (size == 8) { 1909 value = *(uint64_t *) &sc->memregs[offset]; 1910 } else { 1911 printf("dior: ior unknown size %d\n", size); 1912 } 1913 } 1914 1915 1916 if (baridx > 1) { 1917 printf("dior: unknown bar idx %d\n", baridx); 1918 return (0); 1919 } 1920 1921 return (value); 1922 } 1923 1924 struct pci_devemu pci_dummy = { 1925 .pe_emu = "dummy", 1926 .pe_init = pci_emul_dinit, 1927 .pe_barwrite = pci_emul_diow, 1928 .pe_barread = pci_emul_dior 1929 }; 1930 PCI_EMUL_SET(pci_dummy); 1931 1932 #endif /* PCI_EMUL_TEST */ 1933