1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/linker_set.h> 34 #include <sys/errno.h> 35 36 #include <ctype.h> 37 #include <pthread.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <strings.h> 42 #include <assert.h> 43 #include <stdbool.h> 44 45 #include <machine/vmm.h> 46 #include <vmmapi.h> 47 48 #include "acpi.h" 49 #include "bhyverun.h" 50 #include "inout.h" 51 #include "ioapic.h" 52 #include "mem.h" 53 #include "pci_emul.h" 54 #include "pci_lpc.h" 55 56 #define CONF1_ADDR_PORT 0x0cf8 57 #define CONF1_DATA_PORT 0x0cfc 58 59 #define CONF1_ENABLE 0x80000000ul 60 61 #define CFGWRITE(pi,off,val,b) \ 62 do { \ 63 if ((b) == 1) { \ 64 pci_set_cfgdata8((pi),(off),(val)); \ 65 } else if ((b) == 2) { \ 66 pci_set_cfgdata16((pi),(off),(val)); \ 67 } else { \ 68 pci_set_cfgdata32((pi),(off),(val)); \ 69 } \ 70 } while (0) 71 72 #define MAXBUSES (PCI_BUSMAX + 1) 73 #define MAXSLOTS (PCI_SLOTMAX + 1) 74 #define MAXFUNCS (PCI_FUNCMAX + 1) 75 76 struct funcinfo { 77 char *fi_name; 78 char *fi_param; 79 struct pci_devinst *fi_devi; 80 }; 81 82 struct intxinfo { 83 int ii_count; 84 int ii_ioapic_irq; 85 }; 86 87 struct slotinfo { 88 struct intxinfo si_intpins[4]; 89 struct funcinfo si_funcs[MAXFUNCS]; 90 }; 91 92 struct businfo { 93 uint16_t iobase, iolimit; /* I/O window */ 94 uint32_t membase32, memlimit32; /* mmio window below 4GB */ 95 uint64_t membase64, memlimit64; /* mmio window above 4GB */ 96 struct slotinfo slotinfo[MAXSLOTS]; 97 }; 98 99 static struct businfo *pci_businfo[MAXBUSES]; 100 101 SET_DECLARE(pci_devemu_set, struct pci_devemu); 102 103 static uint64_t pci_emul_iobase; 104 static uint64_t pci_emul_membase32; 105 static uint64_t pci_emul_membase64; 106 107 #define PCI_EMUL_IOBASE 0x2000 108 #define PCI_EMUL_IOLIMIT 0x10000 109 110 #define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ 111 112 #define PCI_EMUL_MEMBASE64 0xD000000000UL 113 #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL 114 115 static struct pci_devemu *pci_emul_finddev(char *name); 116 static void pci_lintr_update(struct pci_devinst *pi); 117 118 static struct mem_range pci_mem_hole; 119 120 /* 121 * I/O access 122 */ 123 124 /* 125 * Slot options are in the form: 126 * 127 * <bus>:<slot>:<func>,<emul>[,<config>] 128 * <slot>[:<func>],<emul>[,<config>] 129 * 130 * slot is 0..31 131 * func is 0..7 132 * emul is a string describing the type of PCI device e.g. virtio-net 133 * config is an optional string, depending on the device, that can be 134 * used for configuration. 135 * Examples are: 136 * 1,virtio-net,tap0 137 * 3:0,dummy 138 */ 139 static void 140 pci_parse_slot_usage(char *aopt) 141 { 142 143 fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt); 144 } 145 146 int 147 pci_parse_slot(char *opt) 148 { 149 struct businfo *bi; 150 struct slotinfo *si; 151 char *emul, *config, *str, *cp; 152 int error, bnum, snum, fnum; 153 154 error = -1; 155 str = strdup(opt); 156 157 emul = config = NULL; 158 if ((cp = strchr(str, ',')) != NULL) { 159 *cp = '\0'; 160 emul = cp + 1; 161 if ((cp = strchr(emul, ',')) != NULL) { 162 *cp = '\0'; 163 config = cp + 1; 164 } 165 } else { 166 pci_parse_slot_usage(opt); 167 goto done; 168 } 169 170 /* <bus>:<slot>:<func> */ 171 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { 172 bnum = 0; 173 /* <slot>:<func> */ 174 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { 175 fnum = 0; 176 /* <slot> */ 177 if (sscanf(str, "%d", &snum) != 1) { 178 snum = -1; 179 } 180 } 181 } 182 183 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || 184 fnum < 0 || fnum >= MAXFUNCS) { 185 pci_parse_slot_usage(opt); 186 goto done; 187 } 188 189 if (pci_businfo[bnum] == NULL) 190 pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); 191 192 bi = pci_businfo[bnum]; 193 si = &bi->slotinfo[snum]; 194 195 if (si->si_funcs[fnum].fi_name != NULL) { 196 fprintf(stderr, "pci slot %d:%d already occupied!\n", 197 snum, fnum); 198 goto done; 199 } 200 201 if (pci_emul_finddev(emul) == NULL) { 202 fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n", 203 snum, fnum, emul); 204 goto done; 205 } 206 207 error = 0; 208 si->si_funcs[fnum].fi_name = emul; 209 si->si_funcs[fnum].fi_param = config; 210 211 done: 212 if (error) 213 free(str); 214 215 return (error); 216 } 217 218 static int 219 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 220 { 221 222 if (offset < pi->pi_msix.pba_offset) 223 return (0); 224 225 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 226 return (0); 227 } 228 229 return (1); 230 } 231 232 int 233 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 234 uint64_t value) 235 { 236 int msix_entry_offset; 237 int tab_index; 238 char *dest; 239 240 /* support only 4 or 8 byte writes */ 241 if (size != 4 && size != 8) 242 return (-1); 243 244 /* 245 * Return if table index is beyond what device supports 246 */ 247 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 248 if (tab_index >= pi->pi_msix.table_count) 249 return (-1); 250 251 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 252 253 /* support only aligned writes */ 254 if ((msix_entry_offset % size) != 0) 255 return (-1); 256 257 dest = (char *)(pi->pi_msix.table + tab_index); 258 dest += msix_entry_offset; 259 260 if (size == 4) 261 *((uint32_t *)dest) = value; 262 else 263 *((uint64_t *)dest) = value; 264 265 return (0); 266 } 267 268 uint64_t 269 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 270 { 271 char *dest; 272 int msix_entry_offset; 273 int tab_index; 274 uint64_t retval = ~0; 275 276 /* 277 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 278 * table but we also allow 1 byte access to accomodate reads from 279 * ddb. 280 */ 281 if (size != 1 && size != 4 && size != 8) 282 return (retval); 283 284 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 285 286 /* support only aligned reads */ 287 if ((msix_entry_offset % size) != 0) { 288 return (retval); 289 } 290 291 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 292 293 if (tab_index < pi->pi_msix.table_count) { 294 /* valid MSI-X Table access */ 295 dest = (char *)(pi->pi_msix.table + tab_index); 296 dest += msix_entry_offset; 297 298 if (size == 1) 299 retval = *((uint8_t *)dest); 300 else if (size == 4) 301 retval = *((uint32_t *)dest); 302 else 303 retval = *((uint64_t *)dest); 304 } else if (pci_valid_pba_offset(pi, offset)) { 305 /* return 0 for PBA access */ 306 retval = 0; 307 } 308 309 return (retval); 310 } 311 312 int 313 pci_msix_table_bar(struct pci_devinst *pi) 314 { 315 316 if (pi->pi_msix.table != NULL) 317 return (pi->pi_msix.table_bar); 318 else 319 return (-1); 320 } 321 322 int 323 pci_msix_pba_bar(struct pci_devinst *pi) 324 { 325 326 if (pi->pi_msix.table != NULL) 327 return (pi->pi_msix.pba_bar); 328 else 329 return (-1); 330 } 331 332 static int 333 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 334 uint32_t *eax, void *arg) 335 { 336 struct pci_devinst *pdi = arg; 337 struct pci_devemu *pe = pdi->pi_d; 338 uint64_t offset; 339 int i; 340 341 for (i = 0; i <= PCI_BARMAX; i++) { 342 if (pdi->pi_bar[i].type == PCIBAR_IO && 343 port >= pdi->pi_bar[i].addr && 344 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 345 offset = port - pdi->pi_bar[i].addr; 346 if (in) 347 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 348 offset, bytes); 349 else 350 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 351 bytes, *eax); 352 return (0); 353 } 354 } 355 return (-1); 356 } 357 358 static int 359 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 360 int size, uint64_t *val, void *arg1, long arg2) 361 { 362 struct pci_devinst *pdi = arg1; 363 struct pci_devemu *pe = pdi->pi_d; 364 uint64_t offset; 365 int bidx = (int) arg2; 366 367 assert(bidx <= PCI_BARMAX); 368 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 369 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 370 assert(addr >= pdi->pi_bar[bidx].addr && 371 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 372 373 offset = addr - pdi->pi_bar[bidx].addr; 374 375 if (dir == MEM_F_WRITE) 376 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val); 377 else 378 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size); 379 380 return (0); 381 } 382 383 384 static int 385 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 386 uint64_t *addr) 387 { 388 uint64_t base; 389 390 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 391 392 base = roundup2(*baseptr, size); 393 394 if (base + size <= limit) { 395 *addr = base; 396 *baseptr = base + size; 397 return (0); 398 } else 399 return (-1); 400 } 401 402 int 403 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 404 uint64_t size) 405 { 406 407 return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); 408 } 409 410 /* 411 * Register (or unregister) the MMIO or I/O region associated with the BAR 412 * register 'idx' of an emulated pci device. 413 */ 414 static void 415 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 416 { 417 int error; 418 struct inout_port iop; 419 struct mem_range mr; 420 421 switch (pi->pi_bar[idx].type) { 422 case PCIBAR_IO: 423 bzero(&iop, sizeof(struct inout_port)); 424 iop.name = pi->pi_name; 425 iop.port = pi->pi_bar[idx].addr; 426 iop.size = pi->pi_bar[idx].size; 427 if (registration) { 428 iop.flags = IOPORT_F_INOUT; 429 iop.handler = pci_emul_io_handler; 430 iop.arg = pi; 431 error = register_inout(&iop); 432 } else 433 error = unregister_inout(&iop); 434 break; 435 case PCIBAR_MEM32: 436 case PCIBAR_MEM64: 437 bzero(&mr, sizeof(struct mem_range)); 438 mr.name = pi->pi_name; 439 mr.base = pi->pi_bar[idx].addr; 440 mr.size = pi->pi_bar[idx].size; 441 if (registration) { 442 mr.flags = MEM_F_RW; 443 mr.handler = pci_emul_mem_handler; 444 mr.arg1 = pi; 445 mr.arg2 = idx; 446 error = register_mem(&mr); 447 } else 448 error = unregister_mem(&mr); 449 break; 450 default: 451 error = EINVAL; 452 break; 453 } 454 assert(error == 0); 455 } 456 457 static void 458 unregister_bar(struct pci_devinst *pi, int idx) 459 { 460 461 modify_bar_registration(pi, idx, 0); 462 } 463 464 static void 465 register_bar(struct pci_devinst *pi, int idx) 466 { 467 468 modify_bar_registration(pi, idx, 1); 469 } 470 471 /* Are we decoding i/o port accesses for the emulated pci device? */ 472 static int 473 porten(struct pci_devinst *pi) 474 { 475 uint16_t cmd; 476 477 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 478 479 return (cmd & PCIM_CMD_PORTEN); 480 } 481 482 /* Are we decoding memory accesses for the emulated pci device? */ 483 static int 484 memen(struct pci_devinst *pi) 485 { 486 uint16_t cmd; 487 488 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 489 490 return (cmd & PCIM_CMD_MEMEN); 491 } 492 493 /* 494 * Update the MMIO or I/O address that is decoded by the BAR register. 495 * 496 * If the pci device has enabled the address space decoding then intercept 497 * the address range decoded by the BAR register. 498 */ 499 static void 500 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 501 { 502 int decode; 503 504 if (pi->pi_bar[idx].type == PCIBAR_IO) 505 decode = porten(pi); 506 else 507 decode = memen(pi); 508 509 if (decode) 510 unregister_bar(pi, idx); 511 512 switch (type) { 513 case PCIBAR_IO: 514 case PCIBAR_MEM32: 515 pi->pi_bar[idx].addr = addr; 516 break; 517 case PCIBAR_MEM64: 518 pi->pi_bar[idx].addr &= ~0xffffffffUL; 519 pi->pi_bar[idx].addr |= addr; 520 break; 521 case PCIBAR_MEMHI64: 522 pi->pi_bar[idx].addr &= 0xffffffff; 523 pi->pi_bar[idx].addr |= addr; 524 break; 525 default: 526 assert(0); 527 } 528 529 if (decode) 530 register_bar(pi, idx); 531 } 532 533 int 534 pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, 535 enum pcibar_type type, uint64_t size) 536 { 537 int error; 538 uint64_t *baseptr, limit, addr, mask, lobits, bar; 539 540 assert(idx >= 0 && idx <= PCI_BARMAX); 541 542 if ((size & (size - 1)) != 0) 543 size = 1UL << flsl(size); /* round up to a power of 2 */ 544 545 /* Enforce minimum BAR sizes required by the PCI standard */ 546 if (type == PCIBAR_IO) { 547 if (size < 4) 548 size = 4; 549 } else { 550 if (size < 16) 551 size = 16; 552 } 553 554 switch (type) { 555 case PCIBAR_NONE: 556 baseptr = NULL; 557 addr = mask = lobits = 0; 558 break; 559 case PCIBAR_IO: 560 baseptr = &pci_emul_iobase; 561 limit = PCI_EMUL_IOLIMIT; 562 mask = PCIM_BAR_IO_BASE; 563 lobits = PCIM_BAR_IO_SPACE; 564 break; 565 case PCIBAR_MEM64: 566 /* 567 * XXX 568 * Some drivers do not work well if the 64-bit BAR is allocated 569 * above 4GB. Allow for this by allocating small requests under 570 * 4GB unless then allocation size is larger than some arbitrary 571 * number (32MB currently). 572 */ 573 if (size > 32 * 1024 * 1024) { 574 /* 575 * XXX special case for device requiring peer-peer DMA 576 */ 577 if (size == 0x100000000UL) 578 baseptr = &hostbase; 579 else 580 baseptr = &pci_emul_membase64; 581 limit = PCI_EMUL_MEMLIMIT64; 582 mask = PCIM_BAR_MEM_BASE; 583 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 584 PCIM_BAR_MEM_PREFETCH; 585 break; 586 } else { 587 baseptr = &pci_emul_membase32; 588 limit = PCI_EMUL_MEMLIMIT32; 589 mask = PCIM_BAR_MEM_BASE; 590 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 591 } 592 break; 593 case PCIBAR_MEM32: 594 baseptr = &pci_emul_membase32; 595 limit = PCI_EMUL_MEMLIMIT32; 596 mask = PCIM_BAR_MEM_BASE; 597 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 598 break; 599 default: 600 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 601 assert(0); 602 } 603 604 if (baseptr != NULL) { 605 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 606 if (error != 0) 607 return (error); 608 } 609 610 pdi->pi_bar[idx].type = type; 611 pdi->pi_bar[idx].addr = addr; 612 pdi->pi_bar[idx].size = size; 613 614 /* Initialize the BAR register in config space */ 615 bar = (addr & mask) | lobits; 616 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 617 618 if (type == PCIBAR_MEM64) { 619 assert(idx + 1 <= PCI_BARMAX); 620 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 621 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 622 } 623 624 register_bar(pdi, idx); 625 626 return (0); 627 } 628 629 #define CAP_START_OFFSET 0x40 630 static int 631 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 632 { 633 int i, capoff, reallen; 634 uint16_t sts; 635 636 assert(caplen > 0); 637 638 reallen = roundup2(caplen, 4); /* dword aligned */ 639 640 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 641 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) 642 capoff = CAP_START_OFFSET; 643 else 644 capoff = pi->pi_capend + 1; 645 646 /* Check if we have enough space */ 647 if (capoff + reallen > PCI_REGMAX + 1) 648 return (-1); 649 650 /* Set the previous capability pointer */ 651 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 652 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 653 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 654 } else 655 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); 656 657 /* Copy the capability */ 658 for (i = 0; i < caplen; i++) 659 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 660 661 /* Set the next capability pointer */ 662 pci_set_cfgdata8(pi, capoff + 1, 0); 663 664 pi->pi_prevcap = capoff; 665 pi->pi_capend = capoff + reallen - 1; 666 return (0); 667 } 668 669 static struct pci_devemu * 670 pci_emul_finddev(char *name) 671 { 672 struct pci_devemu **pdpp, *pdp; 673 674 SET_FOREACH(pdpp, pci_devemu_set) { 675 pdp = *pdpp; 676 if (!strcmp(pdp->pe_emu, name)) { 677 return (pdp); 678 } 679 } 680 681 return (NULL); 682 } 683 684 static int 685 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, 686 int func, struct funcinfo *fi) 687 { 688 struct pci_devinst *pdi; 689 int err; 690 691 pdi = calloc(1, sizeof(struct pci_devinst)); 692 693 pdi->pi_vmctx = ctx; 694 pdi->pi_bus = bus; 695 pdi->pi_slot = slot; 696 pdi->pi_func = func; 697 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 698 pdi->pi_lintr.pin = 0; 699 pdi->pi_lintr.state = IDLE; 700 pdi->pi_lintr.ioapic_irq = 0; 701 pdi->pi_d = pde; 702 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 703 704 /* Disable legacy interrupts */ 705 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 706 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 707 708 pci_set_cfgdata8(pdi, PCIR_COMMAND, 709 PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); 710 711 err = (*pde->pe_init)(ctx, pdi, fi->fi_param); 712 if (err == 0) 713 fi->fi_devi = pdi; 714 else 715 free(pdi); 716 717 return (err); 718 } 719 720 void 721 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 722 { 723 int mmc; 724 725 CTASSERT(sizeof(struct msicap) == 14); 726 727 /* Number of msi messages must be a power of 2 between 1 and 32 */ 728 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 729 mmc = ffs(msgnum) - 1; 730 731 bzero(msicap, sizeof(struct msicap)); 732 msicap->capid = PCIY_MSI; 733 msicap->nextptr = nextptr; 734 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 735 } 736 737 int 738 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 739 { 740 struct msicap msicap; 741 742 pci_populate_msicap(&msicap, msgnum, 0); 743 744 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 745 } 746 747 static void 748 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 749 uint32_t msix_tab_size) 750 { 751 CTASSERT(sizeof(struct msixcap) == 12); 752 753 assert(msix_tab_size % 4096 == 0); 754 755 bzero(msixcap, sizeof(struct msixcap)); 756 msixcap->capid = PCIY_MSIX; 757 758 /* 759 * Message Control Register, all fields set to 760 * zero except for the Table Size. 761 * Note: Table size N is encoded as N-1 762 */ 763 msixcap->msgctrl = msgnum - 1; 764 765 /* 766 * MSI-X BAR setup: 767 * - MSI-X table start at offset 0 768 * - PBA table starts at a 4K aligned offset after the MSI-X table 769 */ 770 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 771 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 772 } 773 774 static void 775 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 776 { 777 int i, table_size; 778 779 assert(table_entries > 0); 780 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 781 782 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 783 pi->pi_msix.table = calloc(1, table_size); 784 785 /* set mask bit of vector control register */ 786 for (i = 0; i < table_entries; i++) 787 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 788 } 789 790 int 791 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 792 { 793 uint32_t tab_size; 794 struct msixcap msixcap; 795 796 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 797 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 798 799 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 800 801 /* Align table size to nearest 4K */ 802 tab_size = roundup2(tab_size, 4096); 803 804 pi->pi_msix.table_bar = barnum; 805 pi->pi_msix.pba_bar = barnum; 806 pi->pi_msix.table_offset = 0; 807 pi->pi_msix.table_count = msgnum; 808 pi->pi_msix.pba_offset = tab_size; 809 pi->pi_msix.pba_size = PBA_SIZE(msgnum); 810 811 pci_msix_table_init(pi, msgnum); 812 813 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); 814 815 /* allocate memory for MSI-X Table and PBA */ 816 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 817 tab_size + pi->pi_msix.pba_size); 818 819 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 820 sizeof(msixcap))); 821 } 822 823 void 824 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 825 int bytes, uint32_t val) 826 { 827 uint16_t msgctrl, rwmask; 828 int off, table_bar; 829 830 off = offset - capoff; 831 table_bar = pi->pi_msix.table_bar; 832 /* Message Control Register */ 833 if (off == 2 && bytes == 2) { 834 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 835 msgctrl = pci_get_cfgdata16(pi, offset); 836 msgctrl &= ~rwmask; 837 msgctrl |= val & rwmask; 838 val = msgctrl; 839 840 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 841 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 842 pci_lintr_update(pi); 843 } 844 845 CFGWRITE(pi, offset, val, bytes); 846 } 847 848 void 849 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 850 int bytes, uint32_t val) 851 { 852 uint16_t msgctrl, rwmask, msgdata, mme; 853 uint32_t addrlo; 854 855 /* 856 * If guest is writing to the message control register make sure 857 * we do not overwrite read-only fields. 858 */ 859 if ((offset - capoff) == 2 && bytes == 2) { 860 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 861 msgctrl = pci_get_cfgdata16(pi, offset); 862 msgctrl &= ~rwmask; 863 msgctrl |= val & rwmask; 864 val = msgctrl; 865 866 addrlo = pci_get_cfgdata32(pi, capoff + 4); 867 if (msgctrl & PCIM_MSICTRL_64BIT) 868 msgdata = pci_get_cfgdata16(pi, capoff + 12); 869 else 870 msgdata = pci_get_cfgdata16(pi, capoff + 8); 871 872 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 873 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 874 if (pi->pi_msi.enabled) { 875 pi->pi_msi.addr = addrlo; 876 pi->pi_msi.msg_data = msgdata; 877 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 878 } else { 879 pi->pi_msi.maxmsgnum = 0; 880 } 881 pci_lintr_update(pi); 882 } 883 884 CFGWRITE(pi, offset, val, bytes); 885 } 886 887 void 888 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 889 int bytes, uint32_t val) 890 { 891 892 /* XXX don't write to the readonly parts */ 893 CFGWRITE(pi, offset, val, bytes); 894 } 895 896 #define PCIECAP_VERSION 0x2 897 int 898 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 899 { 900 int err; 901 struct pciecap pciecap; 902 903 CTASSERT(sizeof(struct pciecap) == 60); 904 905 if (type != PCIEM_TYPE_ROOT_PORT) 906 return (-1); 907 908 bzero(&pciecap, sizeof(pciecap)); 909 910 pciecap.capid = PCIY_EXPRESS; 911 pciecap.pcie_capabilities = PCIECAP_VERSION | PCIEM_TYPE_ROOT_PORT; 912 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 913 pciecap.link_status = 0x11; /* gen1, x1 */ 914 915 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 916 return (err); 917 } 918 919 /* 920 * This function assumes that 'coff' is in the capabilities region of the 921 * config space. 922 */ 923 static void 924 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) 925 { 926 int capid; 927 uint8_t capoff, nextoff; 928 929 /* Do not allow un-aligned writes */ 930 if ((offset & (bytes - 1)) != 0) 931 return; 932 933 /* Find the capability that we want to update */ 934 capoff = CAP_START_OFFSET; 935 while (1) { 936 nextoff = pci_get_cfgdata8(pi, capoff + 1); 937 if (nextoff == 0) 938 break; 939 if (offset >= capoff && offset < nextoff) 940 break; 941 942 capoff = nextoff; 943 } 944 assert(offset >= capoff); 945 946 /* 947 * Capability ID and Next Capability Pointer are readonly. 948 * However, some o/s's do 4-byte writes that include these. 949 * For this case, trim the write back to 2 bytes and adjust 950 * the data. 951 */ 952 if (offset == capoff || offset == capoff + 1) { 953 if (offset == capoff && bytes == 4) { 954 bytes = 2; 955 offset += 2; 956 val >>= 16; 957 } else 958 return; 959 } 960 961 capid = pci_get_cfgdata8(pi, capoff); 962 switch (capid) { 963 case PCIY_MSI: 964 msicap_cfgwrite(pi, capoff, offset, bytes, val); 965 break; 966 case PCIY_MSIX: 967 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 968 break; 969 case PCIY_EXPRESS: 970 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 971 break; 972 default: 973 break; 974 } 975 } 976 977 static int 978 pci_emul_iscap(struct pci_devinst *pi, int offset) 979 { 980 uint16_t sts; 981 982 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 983 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 984 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) 985 return (1); 986 } 987 return (0); 988 } 989 990 static int 991 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 992 int size, uint64_t *val, void *arg1, long arg2) 993 { 994 /* 995 * Ignore writes; return 0xff's for reads. The mem read code 996 * will take care of truncating to the correct size. 997 */ 998 if (dir == MEM_F_READ) { 999 *val = 0xffffffffffffffff; 1000 } 1001 1002 return (0); 1003 } 1004 1005 #define BUSIO_ROUNDUP 32 1006 #define BUSMEM_ROUNDUP (1024 * 1024) 1007 1008 int 1009 init_pci(struct vmctx *ctx) 1010 { 1011 struct pci_devemu *pde; 1012 struct businfo *bi; 1013 struct slotinfo *si; 1014 struct funcinfo *fi; 1015 size_t lowmem; 1016 int bus, slot, func; 1017 int error; 1018 1019 pci_emul_iobase = PCI_EMUL_IOBASE; 1020 pci_emul_membase32 = vm_get_lowmem_limit(ctx); 1021 pci_emul_membase64 = PCI_EMUL_MEMBASE64; 1022 1023 for (bus = 0; bus < MAXBUSES; bus++) { 1024 if ((bi = pci_businfo[bus]) == NULL) 1025 continue; 1026 /* 1027 * Keep track of the i/o and memory resources allocated to 1028 * this bus. 1029 */ 1030 bi->iobase = pci_emul_iobase; 1031 bi->membase32 = pci_emul_membase32; 1032 bi->membase64 = pci_emul_membase64; 1033 1034 for (slot = 0; slot < MAXSLOTS; slot++) { 1035 si = &bi->slotinfo[slot]; 1036 for (func = 0; func < MAXFUNCS; func++) { 1037 fi = &si->si_funcs[func]; 1038 if (fi->fi_name == NULL) 1039 continue; 1040 pde = pci_emul_finddev(fi->fi_name); 1041 assert(pde != NULL); 1042 error = pci_emul_init(ctx, pde, bus, slot, 1043 func, fi); 1044 if (error) 1045 return (error); 1046 } 1047 } 1048 1049 /* 1050 * Add some slop to the I/O and memory resources decoded by 1051 * this bus to give a guest some flexibility if it wants to 1052 * reprogram the BARs. 1053 */ 1054 pci_emul_iobase += BUSIO_ROUNDUP; 1055 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); 1056 bi->iolimit = pci_emul_iobase; 1057 1058 pci_emul_membase32 += BUSMEM_ROUNDUP; 1059 pci_emul_membase32 = roundup2(pci_emul_membase32, 1060 BUSMEM_ROUNDUP); 1061 bi->memlimit32 = pci_emul_membase32; 1062 1063 pci_emul_membase64 += BUSMEM_ROUNDUP; 1064 pci_emul_membase64 = roundup2(pci_emul_membase64, 1065 BUSMEM_ROUNDUP); 1066 bi->memlimit64 = pci_emul_membase64; 1067 } 1068 1069 /* 1070 * The guest physical memory map looks like the following: 1071 * [0, lowmem) guest system memory 1072 * [lowmem, lowmem_limit) memory hole (may be absent) 1073 * [lowmem_limit, 4GB) PCI hole (32-bit BAR allocation) 1074 * [4GB, 4GB + highmem) 1075 * 1076 * Accesses to memory addresses that are not allocated to system 1077 * memory or PCI devices return 0xff's. 1078 */ 1079 error = vm_get_memory_seg(ctx, 0, &lowmem, NULL); 1080 assert(error == 0); 1081 1082 memset(&pci_mem_hole, 0, sizeof(struct mem_range)); 1083 pci_mem_hole.name = "PCI hole"; 1084 pci_mem_hole.flags = MEM_F_RW; 1085 pci_mem_hole.base = lowmem; 1086 pci_mem_hole.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1087 pci_mem_hole.handler = pci_emul_fallback_handler; 1088 1089 error = register_mem_fallback(&pci_mem_hole); 1090 assert(error == 0); 1091 1092 return (0); 1093 } 1094 1095 static void 1096 pci_prt_entry(int bus, int slot, int pin, int ioapic_irq, void *arg) 1097 { 1098 int *count; 1099 1100 count = arg; 1101 dsdt_line(" Package (0x04)"); 1102 dsdt_line(" {"); 1103 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1104 dsdt_line(" 0x%02X,", pin - 1); 1105 dsdt_line(" Zero,"); 1106 dsdt_line(" 0x%X", ioapic_irq); 1107 dsdt_line(" }%s", *count == 1 ? "" : ","); 1108 (*count)--; 1109 } 1110 1111 /* 1112 * A bhyve virtual machine has a flat PCI hierarchy with a root port 1113 * corresponding to each PCI bus. 1114 */ 1115 static void 1116 pci_bus_write_dsdt(int bus) 1117 { 1118 struct businfo *bi; 1119 struct slotinfo *si; 1120 struct pci_devinst *pi; 1121 int count, slot, func; 1122 1123 /* 1124 * If there are no devices on this 'bus' then just return. 1125 */ 1126 if ((bi = pci_businfo[bus]) == NULL) { 1127 /* 1128 * Bus 0 is special because it decodes the I/O ports used 1129 * for PCI config space access even if there are no devices 1130 * on it. 1131 */ 1132 if (bus != 0) 1133 return; 1134 } 1135 1136 dsdt_indent(1); 1137 dsdt_line("Scope (_SB)"); 1138 dsdt_line("{"); 1139 dsdt_line(" Device (PC%02X)", bus); 1140 dsdt_line(" {"); 1141 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1142 dsdt_line(" Name (_ADR, Zero)"); 1143 1144 dsdt_line(" Method (_BBN, 0, NotSerialized)"); 1145 dsdt_line(" {"); 1146 dsdt_line(" Return (0x%08X)", bus); 1147 dsdt_line(" }"); 1148 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1149 dsdt_line(" {"); 1150 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1151 "MaxFixed, PosDecode,"); 1152 dsdt_line(" 0x0000, // Granularity"); 1153 dsdt_line(" 0x%04X, // Range Minimum", bus); 1154 dsdt_line(" 0x%04X, // Range Maximum", bus); 1155 dsdt_line(" 0x0000, // Translation Offset"); 1156 dsdt_line(" 0x0001, // Length"); 1157 dsdt_line(" ,, )"); 1158 1159 if (bus == 0) { 1160 dsdt_indent(3); 1161 dsdt_fixed_ioport(0xCF8, 8); 1162 dsdt_unindent(3); 1163 1164 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1165 "PosDecode, EntireRange,"); 1166 dsdt_line(" 0x0000, // Granularity"); 1167 dsdt_line(" 0x0000, // Range Minimum"); 1168 dsdt_line(" 0x0CF7, // Range Maximum"); 1169 dsdt_line(" 0x0000, // Translation Offset"); 1170 dsdt_line(" 0x0CF8, // Length"); 1171 dsdt_line(" ,, , TypeStatic)"); 1172 1173 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1174 "PosDecode, EntireRange,"); 1175 dsdt_line(" 0x0000, // Granularity"); 1176 dsdt_line(" 0x0D00, // Range Minimum"); 1177 dsdt_line(" 0x%04X, // Range Maximum", 1178 PCI_EMUL_IOBASE - 1); 1179 dsdt_line(" 0x0000, // Translation Offset"); 1180 dsdt_line(" 0x%04X, // Length", 1181 PCI_EMUL_IOBASE - 0x0D00); 1182 dsdt_line(" ,, , TypeStatic)"); 1183 1184 if (bi == NULL) { 1185 dsdt_line(" })"); 1186 goto done; 1187 } 1188 } 1189 assert(bi != NULL); 1190 1191 /* i/o window */ 1192 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1193 "PosDecode, EntireRange,"); 1194 dsdt_line(" 0x0000, // Granularity"); 1195 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); 1196 dsdt_line(" 0x%04X, // Range Maximum", 1197 bi->iolimit - 1); 1198 dsdt_line(" 0x0000, // Translation Offset"); 1199 dsdt_line(" 0x%04X, // Length", 1200 bi->iolimit - bi->iobase); 1201 dsdt_line(" ,, , TypeStatic)"); 1202 1203 /* mmio window (32-bit) */ 1204 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1205 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1206 dsdt_line(" 0x00000000, // Granularity"); 1207 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); 1208 dsdt_line(" 0x%08X, // Range Maximum\n", 1209 bi->memlimit32 - 1); 1210 dsdt_line(" 0x00000000, // Translation Offset"); 1211 dsdt_line(" 0x%08X, // Length\n", 1212 bi->memlimit32 - bi->membase32); 1213 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1214 1215 /* mmio window (64-bit) */ 1216 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1217 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1218 dsdt_line(" 0x0000000000000000, // Granularity"); 1219 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); 1220 dsdt_line(" 0x%016lX, // Range Maximum\n", 1221 bi->memlimit64 - 1); 1222 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1223 dsdt_line(" 0x%016lX, // Length\n", 1224 bi->memlimit64 - bi->membase64); 1225 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1226 dsdt_line(" })"); 1227 1228 count = pci_count_lintr(bus); 1229 if (count != 0) { 1230 dsdt_indent(2); 1231 dsdt_line("Name (_PRT, Package (0x%02X)", count); 1232 dsdt_line("{"); 1233 pci_walk_lintr(bus, pci_prt_entry, &count); 1234 dsdt_line("})"); 1235 dsdt_unindent(2); 1236 } 1237 1238 dsdt_indent(2); 1239 for (slot = 0; slot < MAXSLOTS; slot++) { 1240 si = &bi->slotinfo[slot]; 1241 for (func = 0; func < MAXFUNCS; func++) { 1242 pi = si->si_funcs[func].fi_devi; 1243 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1244 pi->pi_d->pe_write_dsdt(pi); 1245 } 1246 } 1247 dsdt_unindent(2); 1248 done: 1249 dsdt_line(" }"); 1250 dsdt_line("}"); 1251 dsdt_unindent(1); 1252 } 1253 1254 void 1255 pci_write_dsdt(void) 1256 { 1257 int bus; 1258 1259 for (bus = 0; bus < MAXBUSES; bus++) 1260 pci_bus_write_dsdt(bus); 1261 } 1262 1263 int 1264 pci_bus_configured(int bus) 1265 { 1266 assert(bus >= 0 && bus < MAXBUSES); 1267 return (pci_businfo[bus] != NULL); 1268 } 1269 1270 int 1271 pci_msi_enabled(struct pci_devinst *pi) 1272 { 1273 return (pi->pi_msi.enabled); 1274 } 1275 1276 int 1277 pci_msi_maxmsgnum(struct pci_devinst *pi) 1278 { 1279 if (pi->pi_msi.enabled) 1280 return (pi->pi_msi.maxmsgnum); 1281 else 1282 return (0); 1283 } 1284 1285 int 1286 pci_msix_enabled(struct pci_devinst *pi) 1287 { 1288 1289 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1290 } 1291 1292 void 1293 pci_generate_msix(struct pci_devinst *pi, int index) 1294 { 1295 struct msix_table_entry *mte; 1296 1297 if (!pci_msix_enabled(pi)) 1298 return; 1299 1300 if (pi->pi_msix.function_mask) 1301 return; 1302 1303 if (index >= pi->pi_msix.table_count) 1304 return; 1305 1306 mte = &pi->pi_msix.table[index]; 1307 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1308 /* XXX Set PBA bit if interrupt is disabled */ 1309 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1310 } 1311 } 1312 1313 void 1314 pci_generate_msi(struct pci_devinst *pi, int index) 1315 { 1316 1317 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1318 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1319 pi->pi_msi.msg_data + index); 1320 } 1321 } 1322 1323 static bool 1324 pci_lintr_permitted(struct pci_devinst *pi) 1325 { 1326 uint16_t cmd; 1327 1328 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1329 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1330 (cmd & PCIM_CMD_INTxDIS))); 1331 } 1332 1333 int 1334 pci_lintr_request(struct pci_devinst *pi) 1335 { 1336 struct businfo *bi; 1337 struct slotinfo *si; 1338 int bestpin, bestcount, irq, pin; 1339 1340 bi = pci_businfo[pi->pi_bus]; 1341 assert(bi != NULL); 1342 1343 /* 1344 * First, allocate a pin from our slot. 1345 */ 1346 si = &bi->slotinfo[pi->pi_slot]; 1347 bestpin = 0; 1348 bestcount = si->si_intpins[0].ii_count; 1349 for (pin = 1; pin < 4; pin++) { 1350 if (si->si_intpins[pin].ii_count < bestcount) { 1351 bestpin = pin; 1352 bestcount = si->si_intpins[pin].ii_count; 1353 } 1354 } 1355 1356 /* 1357 * Attempt to allocate an I/O APIC pin for this intpin. If 1358 * 8259A support is added we will need a separate field to 1359 * assign the intpin to an input pin on the PCI interrupt 1360 * router. 1361 */ 1362 if (si->si_intpins[bestpin].ii_count == 0) { 1363 irq = ioapic_pci_alloc_irq(); 1364 if (irq < 0) 1365 return (-1); 1366 si->si_intpins[bestpin].ii_ioapic_irq = irq; 1367 } else 1368 irq = si->si_intpins[bestpin].ii_ioapic_irq; 1369 si->si_intpins[bestpin].ii_count++; 1370 1371 pi->pi_lintr.pin = bestpin + 1; 1372 pi->pi_lintr.ioapic_irq = irq; 1373 pci_set_cfgdata8(pi, PCIR_INTLINE, irq); 1374 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1375 return (0); 1376 } 1377 1378 void 1379 pci_lintr_assert(struct pci_devinst *pi) 1380 { 1381 1382 assert(pi->pi_lintr.pin > 0); 1383 1384 pthread_mutex_lock(&pi->pi_lintr.lock); 1385 if (pi->pi_lintr.state == IDLE) { 1386 if (pci_lintr_permitted(pi)) { 1387 pi->pi_lintr.state = ASSERTED; 1388 vm_ioapic_assert_irq(pi->pi_vmctx, 1389 pi->pi_lintr.ioapic_irq); 1390 } else 1391 pi->pi_lintr.state = PENDING; 1392 } 1393 pthread_mutex_unlock(&pi->pi_lintr.lock); 1394 } 1395 1396 void 1397 pci_lintr_deassert(struct pci_devinst *pi) 1398 { 1399 1400 assert(pi->pi_lintr.pin > 0); 1401 1402 pthread_mutex_lock(&pi->pi_lintr.lock); 1403 if (pi->pi_lintr.state == ASSERTED) { 1404 pi->pi_lintr.state = IDLE; 1405 vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1406 } else if (pi->pi_lintr.state == PENDING) 1407 pi->pi_lintr.state = IDLE; 1408 pthread_mutex_unlock(&pi->pi_lintr.lock); 1409 } 1410 1411 static void 1412 pci_lintr_update(struct pci_devinst *pi) 1413 { 1414 1415 pthread_mutex_lock(&pi->pi_lintr.lock); 1416 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1417 vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1418 pi->pi_lintr.state = PENDING; 1419 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1420 pi->pi_lintr.state = ASSERTED; 1421 vm_ioapic_assert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1422 } 1423 pthread_mutex_unlock(&pi->pi_lintr.lock); 1424 } 1425 1426 int 1427 pci_count_lintr(int bus) 1428 { 1429 int count, slot, pin; 1430 struct slotinfo *slotinfo; 1431 1432 count = 0; 1433 if (pci_businfo[bus] != NULL) { 1434 for (slot = 0; slot < MAXSLOTS; slot++) { 1435 slotinfo = &pci_businfo[bus]->slotinfo[slot]; 1436 for (pin = 0; pin < 4; pin++) { 1437 if (slotinfo->si_intpins[pin].ii_count != 0) 1438 count++; 1439 } 1440 } 1441 } 1442 return (count); 1443 } 1444 1445 void 1446 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) 1447 { 1448 struct businfo *bi; 1449 struct slotinfo *si; 1450 struct intxinfo *ii; 1451 int slot, pin; 1452 1453 if ((bi = pci_businfo[bus]) == NULL) 1454 return; 1455 1456 for (slot = 0; slot < MAXSLOTS; slot++) { 1457 si = &bi->slotinfo[slot]; 1458 for (pin = 0; pin < 4; pin++) { 1459 ii = &si->si_intpins[pin]; 1460 if (ii->ii_count != 0) 1461 cb(bus, slot, pin + 1, ii->ii_ioapic_irq, arg); 1462 } 1463 } 1464 } 1465 1466 /* 1467 * Return 1 if the emulated device in 'slot' is a multi-function device. 1468 * Return 0 otherwise. 1469 */ 1470 static int 1471 pci_emul_is_mfdev(int bus, int slot) 1472 { 1473 struct businfo *bi; 1474 struct slotinfo *si; 1475 int f, numfuncs; 1476 1477 numfuncs = 0; 1478 if ((bi = pci_businfo[bus]) != NULL) { 1479 si = &bi->slotinfo[slot]; 1480 for (f = 0; f < MAXFUNCS; f++) { 1481 if (si->si_funcs[f].fi_devi != NULL) { 1482 numfuncs++; 1483 } 1484 } 1485 } 1486 return (numfuncs > 1); 1487 } 1488 1489 /* 1490 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1491 * whether or not is a multi-function being emulated in the pci 'slot'. 1492 */ 1493 static void 1494 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) 1495 { 1496 int mfdev; 1497 1498 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1499 mfdev = pci_emul_is_mfdev(bus, slot); 1500 switch (bytes) { 1501 case 1: 1502 case 2: 1503 *rv &= ~PCIM_MFDEV; 1504 if (mfdev) { 1505 *rv |= PCIM_MFDEV; 1506 } 1507 break; 1508 case 4: 1509 *rv &= ~(PCIM_MFDEV << 16); 1510 if (mfdev) { 1511 *rv |= (PCIM_MFDEV << 16); 1512 } 1513 break; 1514 } 1515 } 1516 } 1517 1518 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; 1519 1520 static int 1521 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1522 uint32_t *eax, void *arg) 1523 { 1524 uint32_t x; 1525 1526 if (bytes != 4) { 1527 if (in) 1528 *eax = (bytes == 2) ? 0xffff : 0xff; 1529 return (0); 1530 } 1531 1532 if (in) { 1533 x = (cfgbus << 16) | 1534 (cfgslot << 11) | 1535 (cfgfunc << 8) | 1536 cfgoff; 1537 if (cfgenable) 1538 x |= CONF1_ENABLE; 1539 *eax = x; 1540 } else { 1541 x = *eax; 1542 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; 1543 cfgoff = x & PCI_REGMAX; 1544 cfgfunc = (x >> 8) & PCI_FUNCMAX; 1545 cfgslot = (x >> 11) & PCI_SLOTMAX; 1546 cfgbus = (x >> 16) & PCI_BUSMAX; 1547 } 1548 1549 return (0); 1550 } 1551 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 1552 1553 static uint32_t 1554 bits_changed(uint32_t old, uint32_t new, uint32_t mask) 1555 { 1556 1557 return ((old ^ new) & mask); 1558 } 1559 1560 static void 1561 pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes) 1562 { 1563 int i; 1564 uint16_t old; 1565 1566 /* 1567 * The command register is at an offset of 4 bytes and thus the 1568 * guest could write 1, 2 or 4 bytes starting at this offset. 1569 */ 1570 1571 old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1572 CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */ 1573 new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */ 1574 1575 /* 1576 * If the MMIO or I/O address space decoding has changed then 1577 * register/unregister all BARs that decode that address space. 1578 */ 1579 for (i = 0; i <= PCI_BARMAX; i++) { 1580 switch (pi->pi_bar[i].type) { 1581 case PCIBAR_NONE: 1582 case PCIBAR_MEMHI64: 1583 break; 1584 case PCIBAR_IO: 1585 /* I/O address space decoding changed? */ 1586 if (bits_changed(old, new, PCIM_CMD_PORTEN)) { 1587 if (porten(pi)) 1588 register_bar(pi, i); 1589 else 1590 unregister_bar(pi, i); 1591 } 1592 break; 1593 case PCIBAR_MEM32: 1594 case PCIBAR_MEM64: 1595 /* MMIO address space decoding changed? */ 1596 if (bits_changed(old, new, PCIM_CMD_MEMEN)) { 1597 if (memen(pi)) 1598 register_bar(pi, i); 1599 else 1600 unregister_bar(pi, i); 1601 } 1602 break; 1603 default: 1604 assert(0); 1605 } 1606 } 1607 1608 /* 1609 * If INTx has been unmasked and is pending, assert the 1610 * interrupt. 1611 */ 1612 pci_lintr_update(pi); 1613 } 1614 1615 static int 1616 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1617 uint32_t *eax, void *arg) 1618 { 1619 struct businfo *bi; 1620 struct slotinfo *si; 1621 struct pci_devinst *pi; 1622 struct pci_devemu *pe; 1623 int coff, idx, needcfg; 1624 uint64_t addr, bar, mask; 1625 1626 assert(bytes == 1 || bytes == 2 || bytes == 4); 1627 1628 if ((bi = pci_businfo[cfgbus]) != NULL) { 1629 si = &bi->slotinfo[cfgslot]; 1630 pi = si->si_funcs[cfgfunc].fi_devi; 1631 } else 1632 pi = NULL; 1633 1634 coff = cfgoff + (port - CONF1_DATA_PORT); 1635 1636 #if 0 1637 printf("pcicfg-%s from 0x%0x of %d bytes (%d/%d/%d)\n\r", 1638 in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc); 1639 #endif 1640 1641 /* 1642 * Just return if there is no device at this cfgslot:cfgfunc, 1643 * if the guest is doing an un-aligned access, or if the config 1644 * address word isn't enabled. 1645 */ 1646 if (!cfgenable || pi == NULL || (coff & (bytes - 1)) != 0) { 1647 if (in) 1648 *eax = 0xffffffff; 1649 return (0); 1650 } 1651 1652 pe = pi->pi_d; 1653 1654 /* 1655 * Config read 1656 */ 1657 if (in) { 1658 /* Let the device emulation override the default handler */ 1659 if (pe->pe_cfgread != NULL) { 1660 needcfg = pe->pe_cfgread(ctx, vcpu, pi, 1661 coff, bytes, eax); 1662 } else { 1663 needcfg = 1; 1664 } 1665 1666 if (needcfg) { 1667 if (bytes == 1) 1668 *eax = pci_get_cfgdata8(pi, coff); 1669 else if (bytes == 2) 1670 *eax = pci_get_cfgdata16(pi, coff); 1671 else 1672 *eax = pci_get_cfgdata32(pi, coff); 1673 } 1674 1675 pci_emul_hdrtype_fixup(cfgbus, cfgslot, coff, bytes, eax); 1676 } else { 1677 /* Let the device emulation override the default handler */ 1678 if (pe->pe_cfgwrite != NULL && 1679 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1680 return (0); 1681 1682 /* 1683 * Special handling for write to BAR registers 1684 */ 1685 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1686 /* 1687 * Ignore writes to BAR registers that are not 1688 * 4-byte aligned. 1689 */ 1690 if (bytes != 4 || (coff & 0x3) != 0) 1691 return (0); 1692 idx = (coff - PCIR_BAR(0)) / 4; 1693 mask = ~(pi->pi_bar[idx].size - 1); 1694 switch (pi->pi_bar[idx].type) { 1695 case PCIBAR_NONE: 1696 pi->pi_bar[idx].addr = bar = 0; 1697 break; 1698 case PCIBAR_IO: 1699 addr = *eax & mask; 1700 addr &= 0xffff; 1701 bar = addr | PCIM_BAR_IO_SPACE; 1702 /* 1703 * Register the new BAR value for interception 1704 */ 1705 if (addr != pi->pi_bar[idx].addr) { 1706 update_bar_address(pi, addr, idx, 1707 PCIBAR_IO); 1708 } 1709 break; 1710 case PCIBAR_MEM32: 1711 addr = bar = *eax & mask; 1712 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 1713 if (addr != pi->pi_bar[idx].addr) { 1714 update_bar_address(pi, addr, idx, 1715 PCIBAR_MEM32); 1716 } 1717 break; 1718 case PCIBAR_MEM64: 1719 addr = bar = *eax & mask; 1720 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 1721 PCIM_BAR_MEM_PREFETCH; 1722 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1723 update_bar_address(pi, addr, idx, 1724 PCIBAR_MEM64); 1725 } 1726 break; 1727 case PCIBAR_MEMHI64: 1728 mask = ~(pi->pi_bar[idx - 1].size - 1); 1729 addr = ((uint64_t)*eax << 32) & mask; 1730 bar = addr >> 32; 1731 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1732 update_bar_address(pi, addr, idx - 1, 1733 PCIBAR_MEMHI64); 1734 } 1735 break; 1736 default: 1737 assert(0); 1738 } 1739 pci_set_cfgdata32(pi, coff, bar); 1740 1741 } else if (pci_emul_iscap(pi, coff)) { 1742 pci_emul_capwrite(pi, coff, bytes, *eax); 1743 } else if (coff == PCIR_COMMAND) { 1744 pci_emul_cmdwrite(pi, *eax, bytes); 1745 } else { 1746 CFGWRITE(pi, coff, *eax, bytes); 1747 } 1748 } 1749 1750 return (0); 1751 } 1752 1753 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 1754 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 1755 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 1756 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 1757 1758 /* 1759 * I/O ports to configure PCI IRQ routing. We ignore all writes to it. 1760 */ 1761 static int 1762 pci_irq_port_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1763 uint32_t *eax, void *arg) 1764 { 1765 assert(in == 0); 1766 return (0); 1767 } 1768 INOUT_PORT(pci_irq, 0xC00, IOPORT_F_OUT, pci_irq_port_handler); 1769 INOUT_PORT(pci_irq, 0xC01, IOPORT_F_OUT, pci_irq_port_handler); 1770 SYSRES_IO(0xC00, 2); 1771 1772 #define PCI_EMUL_TEST 1773 #ifdef PCI_EMUL_TEST 1774 /* 1775 * Define a dummy test device 1776 */ 1777 #define DIOSZ 8 1778 #define DMEMSZ 4096 1779 struct pci_emul_dsoftc { 1780 uint8_t ioregs[DIOSZ]; 1781 uint8_t memregs[DMEMSZ]; 1782 }; 1783 1784 #define PCI_EMUL_MSI_MSGS 4 1785 #define PCI_EMUL_MSIX_MSGS 16 1786 1787 static int 1788 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 1789 { 1790 int error; 1791 struct pci_emul_dsoftc *sc; 1792 1793 sc = calloc(1, sizeof(struct pci_emul_dsoftc)); 1794 1795 pi->pi_arg = sc; 1796 1797 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 1798 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 1799 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 1800 1801 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 1802 assert(error == 0); 1803 1804 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 1805 assert(error == 0); 1806 1807 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 1808 assert(error == 0); 1809 1810 return (0); 1811 } 1812 1813 static void 1814 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1815 uint64_t offset, int size, uint64_t value) 1816 { 1817 int i; 1818 struct pci_emul_dsoftc *sc = pi->pi_arg; 1819 1820 if (baridx == 0) { 1821 if (offset + size > DIOSZ) { 1822 printf("diow: iow too large, offset %ld size %d\n", 1823 offset, size); 1824 return; 1825 } 1826 1827 if (size == 1) { 1828 sc->ioregs[offset] = value & 0xff; 1829 } else if (size == 2) { 1830 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 1831 } else if (size == 4) { 1832 *(uint32_t *)&sc->ioregs[offset] = value; 1833 } else { 1834 printf("diow: iow unknown size %d\n", size); 1835 } 1836 1837 /* 1838 * Special magic value to generate an interrupt 1839 */ 1840 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 1841 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 1842 1843 if (value == 0xabcdef) { 1844 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 1845 pci_generate_msi(pi, i); 1846 } 1847 } 1848 1849 if (baridx == 1) { 1850 if (offset + size > DMEMSZ) { 1851 printf("diow: memw too large, offset %ld size %d\n", 1852 offset, size); 1853 return; 1854 } 1855 1856 if (size == 1) { 1857 sc->memregs[offset] = value; 1858 } else if (size == 2) { 1859 *(uint16_t *)&sc->memregs[offset] = value; 1860 } else if (size == 4) { 1861 *(uint32_t *)&sc->memregs[offset] = value; 1862 } else if (size == 8) { 1863 *(uint64_t *)&sc->memregs[offset] = value; 1864 } else { 1865 printf("diow: memw unknown size %d\n", size); 1866 } 1867 1868 /* 1869 * magic interrupt ?? 1870 */ 1871 } 1872 1873 if (baridx > 1) { 1874 printf("diow: unknown bar idx %d\n", baridx); 1875 } 1876 } 1877 1878 static uint64_t 1879 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1880 uint64_t offset, int size) 1881 { 1882 struct pci_emul_dsoftc *sc = pi->pi_arg; 1883 uint32_t value; 1884 1885 if (baridx == 0) { 1886 if (offset + size > DIOSZ) { 1887 printf("dior: ior too large, offset %ld size %d\n", 1888 offset, size); 1889 return (0); 1890 } 1891 1892 if (size == 1) { 1893 value = sc->ioregs[offset]; 1894 } else if (size == 2) { 1895 value = *(uint16_t *) &sc->ioregs[offset]; 1896 } else if (size == 4) { 1897 value = *(uint32_t *) &sc->ioregs[offset]; 1898 } else { 1899 printf("dior: ior unknown size %d\n", size); 1900 } 1901 } 1902 1903 if (baridx == 1) { 1904 if (offset + size > DMEMSZ) { 1905 printf("dior: memr too large, offset %ld size %d\n", 1906 offset, size); 1907 return (0); 1908 } 1909 1910 if (size == 1) { 1911 value = sc->memregs[offset]; 1912 } else if (size == 2) { 1913 value = *(uint16_t *) &sc->memregs[offset]; 1914 } else if (size == 4) { 1915 value = *(uint32_t *) &sc->memregs[offset]; 1916 } else if (size == 8) { 1917 value = *(uint64_t *) &sc->memregs[offset]; 1918 } else { 1919 printf("dior: ior unknown size %d\n", size); 1920 } 1921 } 1922 1923 1924 if (baridx > 1) { 1925 printf("dior: unknown bar idx %d\n", baridx); 1926 return (0); 1927 } 1928 1929 return (value); 1930 } 1931 1932 struct pci_devemu pci_dummy = { 1933 .pe_emu = "dummy", 1934 .pe_init = pci_emul_dinit, 1935 .pe_barwrite = pci_emul_diow, 1936 .pe_barread = pci_emul_dior 1937 }; 1938 PCI_EMUL_SET(pci_dummy); 1939 1940 #endif /* PCI_EMUL_TEST */ 1941