1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/linker_set.h> 34 #include <sys/errno.h> 35 36 #include <ctype.h> 37 #include <pthread.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <strings.h> 42 #include <assert.h> 43 #include <stdbool.h> 44 45 #include <machine/vmm.h> 46 #include <vmmapi.h> 47 48 #include "acpi.h" 49 #include "bhyverun.h" 50 #include "inout.h" 51 #include "ioapic.h" 52 #include "mem.h" 53 #include "pci_emul.h" 54 #include "pci_lpc.h" 55 56 #define CONF1_ADDR_PORT 0x0cf8 57 #define CONF1_DATA_PORT 0x0cfc 58 59 #define CONF1_ENABLE 0x80000000ul 60 61 #define CFGWRITE(pi,off,val,b) \ 62 do { \ 63 if ((b) == 1) { \ 64 pci_set_cfgdata8((pi),(off),(val)); \ 65 } else if ((b) == 2) { \ 66 pci_set_cfgdata16((pi),(off),(val)); \ 67 } else { \ 68 pci_set_cfgdata32((pi),(off),(val)); \ 69 } \ 70 } while (0) 71 72 #define MAXSLOTS (PCI_SLOTMAX + 1) 73 #define MAXFUNCS (PCI_FUNCMAX + 1) 74 75 struct funcinfo { 76 char *fi_name; 77 char *fi_param; 78 struct pci_devinst *fi_devi; 79 }; 80 81 struct intxinfo { 82 int ii_count; 83 int ii_ioapic_irq; 84 }; 85 86 struct slotinfo { 87 struct intxinfo si_intpins[4]; 88 struct funcinfo si_funcs[MAXFUNCS]; 89 } pci_slotinfo[MAXSLOTS]; 90 91 SET_DECLARE(pci_devemu_set, struct pci_devemu); 92 93 static uint64_t pci_emul_iobase; 94 static uint64_t pci_emul_membase32; 95 static uint64_t pci_emul_membase64; 96 97 #define PCI_EMUL_IOBASE 0x2000 98 #define PCI_EMUL_IOLIMIT 0x10000 99 100 #define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ 101 102 #define PCI_EMUL_MEMBASE64 0xD000000000UL 103 #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL 104 105 static struct pci_devemu *pci_emul_finddev(char *name); 106 static void pci_lintr_update(struct pci_devinst *pi); 107 108 static int pci_emul_devices; 109 static struct mem_range pci_mem_hole; 110 111 /* 112 * I/O access 113 */ 114 115 /* 116 * Slot options are in the form: 117 * 118 * <slot>[:<func>],<emul>[,<config>] 119 * 120 * slot is 0..31 121 * func is 0..7 122 * emul is a string describing the type of PCI device e.g. virtio-net 123 * config is an optional string, depending on the device, that can be 124 * used for configuration. 125 * Examples are: 126 * 1,virtio-net,tap0 127 * 3:0,dummy 128 */ 129 static void 130 pci_parse_slot_usage(char *aopt) 131 { 132 133 fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt); 134 } 135 136 int 137 pci_parse_slot(char *opt) 138 { 139 char *slot, *func, *emul, *config; 140 char *str, *cpy; 141 int error, snum, fnum; 142 143 error = -1; 144 str = cpy = strdup(opt); 145 146 slot = strsep(&str, ","); 147 func = NULL; 148 if (strchr(slot, ':') != NULL) { 149 func = cpy; 150 (void) strsep(&func, ":"); 151 } 152 153 emul = strsep(&str, ","); 154 config = str; 155 156 if (emul == NULL) { 157 pci_parse_slot_usage(opt); 158 goto done; 159 } 160 161 snum = atoi(slot); 162 fnum = func ? atoi(func) : 0; 163 164 if (snum < 0 || snum >= MAXSLOTS || fnum < 0 || fnum >= MAXFUNCS) { 165 pci_parse_slot_usage(opt); 166 goto done; 167 } 168 169 if (pci_slotinfo[snum].si_funcs[fnum].fi_name != NULL) { 170 fprintf(stderr, "pci slot %d:%d already occupied!\n", 171 snum, fnum); 172 goto done; 173 } 174 175 if (pci_emul_finddev(emul) == NULL) { 176 fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n", 177 snum, fnum, emul); 178 goto done; 179 } 180 181 error = 0; 182 pci_slotinfo[snum].si_funcs[fnum].fi_name = emul; 183 pci_slotinfo[snum].si_funcs[fnum].fi_param = config; 184 185 done: 186 if (error) 187 free(cpy); 188 189 return (error); 190 } 191 192 static int 193 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) 194 { 195 196 if (offset < pi->pi_msix.pba_offset) 197 return (0); 198 199 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 200 return (0); 201 } 202 203 return (1); 204 } 205 206 int 207 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, 208 uint64_t value) 209 { 210 int msix_entry_offset; 211 int tab_index; 212 char *dest; 213 214 /* support only 4 or 8 byte writes */ 215 if (size != 4 && size != 8) 216 return (-1); 217 218 /* 219 * Return if table index is beyond what device supports 220 */ 221 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 222 if (tab_index >= pi->pi_msix.table_count) 223 return (-1); 224 225 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 226 227 /* support only aligned writes */ 228 if ((msix_entry_offset % size) != 0) 229 return (-1); 230 231 dest = (char *)(pi->pi_msix.table + tab_index); 232 dest += msix_entry_offset; 233 234 if (size == 4) 235 *((uint32_t *)dest) = value; 236 else 237 *((uint64_t *)dest) = value; 238 239 return (0); 240 } 241 242 uint64_t 243 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) 244 { 245 char *dest; 246 int msix_entry_offset; 247 int tab_index; 248 uint64_t retval = ~0; 249 250 /* 251 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X 252 * table but we also allow 1 byte access to accomodate reads from 253 * ddb. 254 */ 255 if (size != 1 && size != 4 && size != 8) 256 return (retval); 257 258 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 259 260 /* support only aligned reads */ 261 if ((msix_entry_offset % size) != 0) { 262 return (retval); 263 } 264 265 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 266 267 if (tab_index < pi->pi_msix.table_count) { 268 /* valid MSI-X Table access */ 269 dest = (char *)(pi->pi_msix.table + tab_index); 270 dest += msix_entry_offset; 271 272 if (size == 1) 273 retval = *((uint8_t *)dest); 274 else if (size == 4) 275 retval = *((uint32_t *)dest); 276 else 277 retval = *((uint64_t *)dest); 278 } else if (pci_valid_pba_offset(pi, offset)) { 279 /* return 0 for PBA access */ 280 retval = 0; 281 } 282 283 return (retval); 284 } 285 286 int 287 pci_msix_table_bar(struct pci_devinst *pi) 288 { 289 290 if (pi->pi_msix.table != NULL) 291 return (pi->pi_msix.table_bar); 292 else 293 return (-1); 294 } 295 296 int 297 pci_msix_pba_bar(struct pci_devinst *pi) 298 { 299 300 if (pi->pi_msix.table != NULL) 301 return (pi->pi_msix.pba_bar); 302 else 303 return (-1); 304 } 305 306 static int 307 pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 308 uint32_t *eax, void *arg) 309 { 310 struct pci_devinst *pdi = arg; 311 struct pci_devemu *pe = pdi->pi_d; 312 uint64_t offset; 313 int i; 314 315 for (i = 0; i <= PCI_BARMAX; i++) { 316 if (pdi->pi_bar[i].type == PCIBAR_IO && 317 port >= pdi->pi_bar[i].addr && 318 port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { 319 offset = port - pdi->pi_bar[i].addr; 320 if (in) 321 *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, 322 offset, bytes); 323 else 324 (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, 325 bytes, *eax); 326 return (0); 327 } 328 } 329 return (-1); 330 } 331 332 static int 333 pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 334 int size, uint64_t *val, void *arg1, long arg2) 335 { 336 struct pci_devinst *pdi = arg1; 337 struct pci_devemu *pe = pdi->pi_d; 338 uint64_t offset; 339 int bidx = (int) arg2; 340 341 assert(bidx <= PCI_BARMAX); 342 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || 343 pdi->pi_bar[bidx].type == PCIBAR_MEM64); 344 assert(addr >= pdi->pi_bar[bidx].addr && 345 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); 346 347 offset = addr - pdi->pi_bar[bidx].addr; 348 349 if (dir == MEM_F_WRITE) 350 (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val); 351 else 352 *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size); 353 354 return (0); 355 } 356 357 358 static int 359 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, 360 uint64_t *addr) 361 { 362 uint64_t base; 363 364 assert((size & (size - 1)) == 0); /* must be a power of 2 */ 365 366 base = roundup2(*baseptr, size); 367 368 if (base + size <= limit) { 369 *addr = base; 370 *baseptr = base + size; 371 return (0); 372 } else 373 return (-1); 374 } 375 376 int 377 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, 378 uint64_t size) 379 { 380 381 return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); 382 } 383 384 /* 385 * Register (or unregister) the MMIO or I/O region associated with the BAR 386 * register 'idx' of an emulated pci device. 387 */ 388 static void 389 modify_bar_registration(struct pci_devinst *pi, int idx, int registration) 390 { 391 int error; 392 struct inout_port iop; 393 struct mem_range mr; 394 395 switch (pi->pi_bar[idx].type) { 396 case PCIBAR_IO: 397 bzero(&iop, sizeof(struct inout_port)); 398 iop.name = pi->pi_name; 399 iop.port = pi->pi_bar[idx].addr; 400 iop.size = pi->pi_bar[idx].size; 401 if (registration) { 402 iop.flags = IOPORT_F_INOUT; 403 iop.handler = pci_emul_io_handler; 404 iop.arg = pi; 405 error = register_inout(&iop); 406 } else 407 error = unregister_inout(&iop); 408 break; 409 case PCIBAR_MEM32: 410 case PCIBAR_MEM64: 411 bzero(&mr, sizeof(struct mem_range)); 412 mr.name = pi->pi_name; 413 mr.base = pi->pi_bar[idx].addr; 414 mr.size = pi->pi_bar[idx].size; 415 if (registration) { 416 mr.flags = MEM_F_RW; 417 mr.handler = pci_emul_mem_handler; 418 mr.arg1 = pi; 419 mr.arg2 = idx; 420 error = register_mem(&mr); 421 } else 422 error = unregister_mem(&mr); 423 break; 424 default: 425 error = EINVAL; 426 break; 427 } 428 assert(error == 0); 429 } 430 431 static void 432 unregister_bar(struct pci_devinst *pi, int idx) 433 { 434 435 modify_bar_registration(pi, idx, 0); 436 } 437 438 static void 439 register_bar(struct pci_devinst *pi, int idx) 440 { 441 442 modify_bar_registration(pi, idx, 1); 443 } 444 445 /* Are we decoding i/o port accesses for the emulated pci device? */ 446 static int 447 porten(struct pci_devinst *pi) 448 { 449 uint16_t cmd; 450 451 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 452 453 return (cmd & PCIM_CMD_PORTEN); 454 } 455 456 /* Are we decoding memory accesses for the emulated pci device? */ 457 static int 458 memen(struct pci_devinst *pi) 459 { 460 uint16_t cmd; 461 462 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 463 464 return (cmd & PCIM_CMD_MEMEN); 465 } 466 467 /* 468 * Update the MMIO or I/O address that is decoded by the BAR register. 469 * 470 * If the pci device has enabled the address space decoding then intercept 471 * the address range decoded by the BAR register. 472 */ 473 static void 474 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) 475 { 476 int decode; 477 478 if (pi->pi_bar[idx].type == PCIBAR_IO) 479 decode = porten(pi); 480 else 481 decode = memen(pi); 482 483 if (decode) 484 unregister_bar(pi, idx); 485 486 switch (type) { 487 case PCIBAR_IO: 488 case PCIBAR_MEM32: 489 pi->pi_bar[idx].addr = addr; 490 break; 491 case PCIBAR_MEM64: 492 pi->pi_bar[idx].addr &= ~0xffffffffUL; 493 pi->pi_bar[idx].addr |= addr; 494 break; 495 case PCIBAR_MEMHI64: 496 pi->pi_bar[idx].addr &= 0xffffffff; 497 pi->pi_bar[idx].addr |= addr; 498 break; 499 default: 500 assert(0); 501 } 502 503 if (decode) 504 register_bar(pi, idx); 505 } 506 507 int 508 pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, 509 enum pcibar_type type, uint64_t size) 510 { 511 int error; 512 uint64_t *baseptr, limit, addr, mask, lobits, bar; 513 514 assert(idx >= 0 && idx <= PCI_BARMAX); 515 516 if ((size & (size - 1)) != 0) 517 size = 1UL << flsl(size); /* round up to a power of 2 */ 518 519 /* Enforce minimum BAR sizes required by the PCI standard */ 520 if (type == PCIBAR_IO) { 521 if (size < 4) 522 size = 4; 523 } else { 524 if (size < 16) 525 size = 16; 526 } 527 528 switch (type) { 529 case PCIBAR_NONE: 530 baseptr = NULL; 531 addr = mask = lobits = 0; 532 break; 533 case PCIBAR_IO: 534 baseptr = &pci_emul_iobase; 535 limit = PCI_EMUL_IOLIMIT; 536 mask = PCIM_BAR_IO_BASE; 537 lobits = PCIM_BAR_IO_SPACE; 538 break; 539 case PCIBAR_MEM64: 540 /* 541 * XXX 542 * Some drivers do not work well if the 64-bit BAR is allocated 543 * above 4GB. Allow for this by allocating small requests under 544 * 4GB unless then allocation size is larger than some arbitrary 545 * number (32MB currently). 546 */ 547 if (size > 32 * 1024 * 1024) { 548 /* 549 * XXX special case for device requiring peer-peer DMA 550 */ 551 if (size == 0x100000000UL) 552 baseptr = &hostbase; 553 else 554 baseptr = &pci_emul_membase64; 555 limit = PCI_EMUL_MEMLIMIT64; 556 mask = PCIM_BAR_MEM_BASE; 557 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 558 PCIM_BAR_MEM_PREFETCH; 559 break; 560 } else { 561 baseptr = &pci_emul_membase32; 562 limit = PCI_EMUL_MEMLIMIT32; 563 mask = PCIM_BAR_MEM_BASE; 564 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; 565 } 566 break; 567 case PCIBAR_MEM32: 568 baseptr = &pci_emul_membase32; 569 limit = PCI_EMUL_MEMLIMIT32; 570 mask = PCIM_BAR_MEM_BASE; 571 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 572 break; 573 default: 574 printf("pci_emul_alloc_base: invalid bar type %d\n", type); 575 assert(0); 576 } 577 578 if (baseptr != NULL) { 579 error = pci_emul_alloc_resource(baseptr, limit, size, &addr); 580 if (error != 0) 581 return (error); 582 } 583 584 pdi->pi_bar[idx].type = type; 585 pdi->pi_bar[idx].addr = addr; 586 pdi->pi_bar[idx].size = size; 587 588 /* Initialize the BAR register in config space */ 589 bar = (addr & mask) | lobits; 590 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); 591 592 if (type == PCIBAR_MEM64) { 593 assert(idx + 1 <= PCI_BARMAX); 594 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; 595 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); 596 } 597 598 register_bar(pdi, idx); 599 600 return (0); 601 } 602 603 #define CAP_START_OFFSET 0x40 604 static int 605 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) 606 { 607 int i, capoff, capid, reallen; 608 uint16_t sts; 609 610 static u_char endofcap[4] = { 611 PCIY_RESERVED, 0, 0, 0 612 }; 613 614 assert(caplen > 0 && capdata[0] != PCIY_RESERVED); 615 616 reallen = roundup2(caplen, 4); /* dword aligned */ 617 618 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 619 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { 620 capoff = CAP_START_OFFSET; 621 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); 622 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); 623 } else { 624 capoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR); 625 while (1) { 626 assert((capoff & 0x3) == 0); 627 capid = pci_get_cfgdata8(pi, capoff); 628 if (capid == PCIY_RESERVED) 629 break; 630 capoff = pci_get_cfgdata8(pi, capoff + 1); 631 } 632 } 633 634 /* Check if we have enough space */ 635 if (capoff + reallen + sizeof(endofcap) > PCI_REGMAX + 1) 636 return (-1); 637 638 /* Copy the capability */ 639 for (i = 0; i < caplen; i++) 640 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 641 642 /* Set the next capability pointer */ 643 pci_set_cfgdata8(pi, capoff + 1, capoff + reallen); 644 645 /* Copy of the reserved capability which serves as the end marker */ 646 for (i = 0; i < sizeof(endofcap); i++) 647 pci_set_cfgdata8(pi, capoff + reallen + i, endofcap[i]); 648 649 return (0); 650 } 651 652 static struct pci_devemu * 653 pci_emul_finddev(char *name) 654 { 655 struct pci_devemu **pdpp, *pdp; 656 657 SET_FOREACH(pdpp, pci_devemu_set) { 658 pdp = *pdpp; 659 if (!strcmp(pdp->pe_emu, name)) { 660 return (pdp); 661 } 662 } 663 664 return (NULL); 665 } 666 667 static int 668 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, int func, 669 char *params) 670 { 671 struct pci_devinst *pdi; 672 int err; 673 674 pdi = malloc(sizeof(struct pci_devinst)); 675 bzero(pdi, sizeof(*pdi)); 676 677 pdi->pi_vmctx = ctx; 678 pdi->pi_bus = 0; 679 pdi->pi_slot = slot; 680 pdi->pi_func = func; 681 pthread_mutex_init(&pdi->pi_lintr.lock, NULL); 682 pdi->pi_lintr.pin = 0; 683 pdi->pi_lintr.state = IDLE; 684 pdi->pi_lintr.ioapic_irq = 0; 685 pdi->pi_d = pde; 686 snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); 687 688 /* Disable legacy interrupts */ 689 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); 690 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); 691 692 pci_set_cfgdata8(pdi, PCIR_COMMAND, 693 PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); 694 695 err = (*pde->pe_init)(ctx, pdi, params); 696 if (err != 0) { 697 free(pdi); 698 } else { 699 pci_emul_devices++; 700 pci_slotinfo[slot].si_funcs[func].fi_devi = pdi; 701 } 702 703 return (err); 704 } 705 706 void 707 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) 708 { 709 int mmc; 710 711 CTASSERT(sizeof(struct msicap) == 14); 712 713 /* Number of msi messages must be a power of 2 between 1 and 32 */ 714 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); 715 mmc = ffs(msgnum) - 1; 716 717 bzero(msicap, sizeof(struct msicap)); 718 msicap->capid = PCIY_MSI; 719 msicap->nextptr = nextptr; 720 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); 721 } 722 723 int 724 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) 725 { 726 struct msicap msicap; 727 728 pci_populate_msicap(&msicap, msgnum, 0); 729 730 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); 731 } 732 733 static void 734 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, 735 uint32_t msix_tab_size, int nextptr) 736 { 737 CTASSERT(sizeof(struct msixcap) == 12); 738 739 assert(msix_tab_size % 4096 == 0); 740 741 bzero(msixcap, sizeof(struct msixcap)); 742 msixcap->capid = PCIY_MSIX; 743 msixcap->nextptr = nextptr; 744 745 /* 746 * Message Control Register, all fields set to 747 * zero except for the Table Size. 748 * Note: Table size N is encoded as N-1 749 */ 750 msixcap->msgctrl = msgnum - 1; 751 752 /* 753 * MSI-X BAR setup: 754 * - MSI-X table start at offset 0 755 * - PBA table starts at a 4K aligned offset after the MSI-X table 756 */ 757 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; 758 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); 759 } 760 761 static void 762 pci_msix_table_init(struct pci_devinst *pi, int table_entries) 763 { 764 int i, table_size; 765 766 assert(table_entries > 0); 767 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); 768 769 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; 770 pi->pi_msix.table = malloc(table_size); 771 bzero(pi->pi_msix.table, table_size); 772 773 /* set mask bit of vector control register */ 774 for (i = 0; i < table_entries; i++) 775 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; 776 } 777 778 int 779 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) 780 { 781 uint16_t pba_index; 782 uint32_t tab_size; 783 struct msixcap msixcap; 784 785 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); 786 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); 787 788 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; 789 790 /* Align table size to nearest 4K */ 791 tab_size = roundup2(tab_size, 4096); 792 793 pi->pi_msix.table_bar = barnum; 794 pi->pi_msix.pba_bar = barnum; 795 pi->pi_msix.table_offset = 0; 796 pi->pi_msix.table_count = msgnum; 797 pi->pi_msix.pba_offset = tab_size; 798 799 /* calculate the MMIO size required for MSI-X PBA */ 800 pba_index = (msgnum - 1) / (PBA_TABLE_ENTRY_SIZE * 8); 801 pi->pi_msix.pba_size = (pba_index + 1) * PBA_TABLE_ENTRY_SIZE; 802 803 pci_msix_table_init(pi, msgnum); 804 805 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size, 0); 806 807 /* allocate memory for MSI-X Table and PBA */ 808 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, 809 tab_size + pi->pi_msix.pba_size); 810 811 return (pci_emul_add_capability(pi, (u_char *)&msixcap, 812 sizeof(msixcap))); 813 } 814 815 void 816 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 817 int bytes, uint32_t val) 818 { 819 uint16_t msgctrl, rwmask; 820 int off, table_bar; 821 822 off = offset - capoff; 823 table_bar = pi->pi_msix.table_bar; 824 /* Message Control Register */ 825 if (off == 2 && bytes == 2) { 826 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; 827 msgctrl = pci_get_cfgdata16(pi, offset); 828 msgctrl &= ~rwmask; 829 msgctrl |= val & rwmask; 830 val = msgctrl; 831 832 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; 833 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; 834 pci_lintr_update(pi); 835 } 836 837 CFGWRITE(pi, offset, val, bytes); 838 } 839 840 void 841 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 842 int bytes, uint32_t val) 843 { 844 uint16_t msgctrl, rwmask, msgdata, mme; 845 uint32_t addrlo; 846 847 /* 848 * If guest is writing to the message control register make sure 849 * we do not overwrite read-only fields. 850 */ 851 if ((offset - capoff) == 2 && bytes == 2) { 852 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; 853 msgctrl = pci_get_cfgdata16(pi, offset); 854 msgctrl &= ~rwmask; 855 msgctrl |= val & rwmask; 856 val = msgctrl; 857 858 addrlo = pci_get_cfgdata32(pi, capoff + 4); 859 if (msgctrl & PCIM_MSICTRL_64BIT) 860 msgdata = pci_get_cfgdata16(pi, capoff + 12); 861 else 862 msgdata = pci_get_cfgdata16(pi, capoff + 8); 863 864 mme = msgctrl & PCIM_MSICTRL_MME_MASK; 865 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; 866 if (pi->pi_msi.enabled) { 867 pi->pi_msi.addr = addrlo; 868 pi->pi_msi.msg_data = msgdata; 869 pi->pi_msi.maxmsgnum = 1 << (mme >> 4); 870 } else { 871 pi->pi_msi.maxmsgnum = 0; 872 } 873 pci_lintr_update(pi); 874 } 875 876 CFGWRITE(pi, offset, val, bytes); 877 } 878 879 void 880 pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, 881 int bytes, uint32_t val) 882 { 883 884 /* XXX don't write to the readonly parts */ 885 CFGWRITE(pi, offset, val, bytes); 886 } 887 888 #define PCIECAP_VERSION 0x2 889 int 890 pci_emul_add_pciecap(struct pci_devinst *pi, int type) 891 { 892 int err; 893 struct pciecap pciecap; 894 895 CTASSERT(sizeof(struct pciecap) == 60); 896 897 if (type != PCIEM_TYPE_ROOT_PORT) 898 return (-1); 899 900 bzero(&pciecap, sizeof(pciecap)); 901 902 pciecap.capid = PCIY_EXPRESS; 903 pciecap.pcie_capabilities = PCIECAP_VERSION | PCIEM_TYPE_ROOT_PORT; 904 pciecap.link_capabilities = 0x411; /* gen1, x1 */ 905 pciecap.link_status = 0x11; /* gen1, x1 */ 906 907 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); 908 return (err); 909 } 910 911 /* 912 * This function assumes that 'coff' is in the capabilities region of the 913 * config space. 914 */ 915 static void 916 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) 917 { 918 int capid; 919 uint8_t capoff, nextoff; 920 921 /* Do not allow un-aligned writes */ 922 if ((offset & (bytes - 1)) != 0) 923 return; 924 925 /* Find the capability that we want to update */ 926 capoff = CAP_START_OFFSET; 927 while (1) { 928 capid = pci_get_cfgdata8(pi, capoff); 929 if (capid == PCIY_RESERVED) 930 break; 931 932 nextoff = pci_get_cfgdata8(pi, capoff + 1); 933 if (offset >= capoff && offset < nextoff) 934 break; 935 936 capoff = nextoff; 937 } 938 assert(offset >= capoff); 939 940 /* 941 * Capability ID and Next Capability Pointer are readonly. 942 * However, some o/s's do 4-byte writes that include these. 943 * For this case, trim the write back to 2 bytes and adjust 944 * the data. 945 */ 946 if (offset == capoff || offset == capoff + 1) { 947 if (offset == capoff && bytes == 4) { 948 bytes = 2; 949 offset += 2; 950 val >>= 16; 951 } else 952 return; 953 } 954 955 switch (capid) { 956 case PCIY_MSI: 957 msicap_cfgwrite(pi, capoff, offset, bytes, val); 958 break; 959 case PCIY_MSIX: 960 msixcap_cfgwrite(pi, capoff, offset, bytes, val); 961 break; 962 case PCIY_EXPRESS: 963 pciecap_cfgwrite(pi, capoff, offset, bytes, val); 964 break; 965 default: 966 break; 967 } 968 } 969 970 static int 971 pci_emul_iscap(struct pci_devinst *pi, int offset) 972 { 973 int found; 974 uint16_t sts; 975 uint8_t capid, lastoff; 976 977 found = 0; 978 sts = pci_get_cfgdata16(pi, PCIR_STATUS); 979 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { 980 lastoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR); 981 while (1) { 982 assert((lastoff & 0x3) == 0); 983 capid = pci_get_cfgdata8(pi, lastoff); 984 if (capid == PCIY_RESERVED) 985 break; 986 lastoff = pci_get_cfgdata8(pi, lastoff + 1); 987 } 988 if (offset >= CAP_START_OFFSET && offset <= lastoff) 989 found = 1; 990 } 991 return (found); 992 } 993 994 static int 995 pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, 996 int size, uint64_t *val, void *arg1, long arg2) 997 { 998 /* 999 * Ignore writes; return 0xff's for reads. The mem read code 1000 * will take care of truncating to the correct size. 1001 */ 1002 if (dir == MEM_F_READ) { 1003 *val = 0xffffffffffffffff; 1004 } 1005 1006 return (0); 1007 } 1008 1009 int 1010 init_pci(struct vmctx *ctx) 1011 { 1012 struct pci_devemu *pde; 1013 struct funcinfo *fi; 1014 size_t lowmem; 1015 int slot, func; 1016 int error; 1017 1018 pci_emul_iobase = PCI_EMUL_IOBASE; 1019 pci_emul_membase32 = vm_get_lowmem_limit(ctx); 1020 pci_emul_membase64 = PCI_EMUL_MEMBASE64; 1021 1022 for (slot = 0; slot < MAXSLOTS; slot++) { 1023 for (func = 0; func < MAXFUNCS; func++) { 1024 fi = &pci_slotinfo[slot].si_funcs[func]; 1025 if (fi->fi_name != NULL) { 1026 pde = pci_emul_finddev(fi->fi_name); 1027 assert(pde != NULL); 1028 error = pci_emul_init(ctx, pde, slot, func, 1029 fi->fi_param); 1030 if (error) 1031 return (error); 1032 } 1033 } 1034 } 1035 1036 /* 1037 * The guest physical memory map looks like the following: 1038 * [0, lowmem) guest system memory 1039 * [lowmem, lowmem_limit) memory hole (may be absent) 1040 * [lowmem_limit, 4GB) PCI hole (32-bit BAR allocation) 1041 * [4GB, 4GB + highmem) 1042 * 1043 * Accesses to memory addresses that are not allocated to system 1044 * memory or PCI devices return 0xff's. 1045 */ 1046 error = vm_get_memory_seg(ctx, 0, &lowmem, NULL); 1047 assert(error == 0); 1048 1049 memset(&pci_mem_hole, 0, sizeof(struct mem_range)); 1050 pci_mem_hole.name = "PCI hole"; 1051 pci_mem_hole.flags = MEM_F_RW; 1052 pci_mem_hole.base = lowmem; 1053 pci_mem_hole.size = (4ULL * 1024 * 1024 * 1024) - lowmem; 1054 pci_mem_hole.handler = pci_emul_fallback_handler; 1055 1056 error = register_mem_fallback(&pci_mem_hole); 1057 assert(error == 0); 1058 1059 return (0); 1060 } 1061 1062 static void 1063 pci_prt_entry(int slot, int pin, int ioapic_irq, void *arg) 1064 { 1065 int *count; 1066 1067 count = arg; 1068 dsdt_line(" Package (0x04)"); 1069 dsdt_line(" {"); 1070 dsdt_line(" 0x%X,", slot << 16 | 0xffff); 1071 dsdt_line(" 0x%02X,", pin - 1); 1072 dsdt_line(" Zero,"); 1073 dsdt_line(" 0x%X", ioapic_irq); 1074 dsdt_line(" }%s", *count == 1 ? "" : ","); 1075 (*count)--; 1076 } 1077 1078 void 1079 pci_write_dsdt(void) 1080 { 1081 struct pci_devinst *pi; 1082 int count, slot, func; 1083 1084 dsdt_indent(1); 1085 dsdt_line("Scope (_SB)"); 1086 dsdt_line("{"); 1087 dsdt_line(" Device (PCI0)"); 1088 dsdt_line(" {"); 1089 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); 1090 dsdt_line(" Name (_ADR, Zero)"); 1091 dsdt_line(" Name (_CRS, ResourceTemplate ()"); 1092 dsdt_line(" {"); 1093 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " 1094 "MaxFixed, PosDecode,"); 1095 dsdt_line(" 0x0000, // Granularity"); 1096 dsdt_line(" 0x0000, // Range Minimum"); 1097 dsdt_line(" 0x00FF, // Range Maximum"); 1098 dsdt_line(" 0x0000, // Translation Offset"); 1099 dsdt_line(" 0x0100, // Length"); 1100 dsdt_line(" ,, )"); 1101 dsdt_indent(3); 1102 dsdt_fixed_ioport(0xCF8, 8); 1103 dsdt_unindent(3); 1104 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1105 "PosDecode, EntireRange,"); 1106 dsdt_line(" 0x0000, // Granularity"); 1107 dsdt_line(" 0x0000, // Range Minimum"); 1108 dsdt_line(" 0x0CF7, // Range Maximum"); 1109 dsdt_line(" 0x0000, // Translation Offset"); 1110 dsdt_line(" 0x0CF8, // Length"); 1111 dsdt_line(" ,, , TypeStatic)"); 1112 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " 1113 "PosDecode, EntireRange,"); 1114 dsdt_line(" 0x0000, // Granularity"); 1115 dsdt_line(" 0x0D00, // Range Minimum"); 1116 dsdt_line(" 0xFFFF, // Range Maximum"); 1117 dsdt_line(" 0x0000, // Translation Offset"); 1118 dsdt_line(" 0xF300, // Length"); 1119 dsdt_line(" ,, , TypeStatic)"); 1120 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " 1121 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1122 dsdt_line(" 0x00000000, // Granularity"); 1123 dsdt_line(" 0x%08lX, // Range Minimum\n", 1124 pci_mem_hole.base); 1125 dsdt_line(" 0x%08X, // Range Maximum\n", 1126 PCI_EMUL_MEMLIMIT32 - 1); 1127 dsdt_line(" 0x00000000, // Translation Offset"); 1128 dsdt_line(" 0x%08lX, // Length\n", 1129 PCI_EMUL_MEMLIMIT32 - pci_mem_hole.base); 1130 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1131 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " 1132 "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); 1133 dsdt_line(" 0x0000000000000000, // Granularity"); 1134 dsdt_line(" 0x%016lX, // Range Minimum\n", 1135 PCI_EMUL_MEMBASE64); 1136 dsdt_line(" 0x%016lX, // Range Maximum\n", 1137 PCI_EMUL_MEMLIMIT64 - 1); 1138 dsdt_line(" 0x0000000000000000, // Translation Offset"); 1139 dsdt_line(" 0x%016lX, // Length\n", 1140 PCI_EMUL_MEMLIMIT64 - PCI_EMUL_MEMBASE64); 1141 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); 1142 dsdt_line(" })"); 1143 count = pci_count_lintr(); 1144 if (count != 0) { 1145 dsdt_indent(2); 1146 dsdt_line("Name (_PRT, Package (0x%02X)", count); 1147 dsdt_line("{"); 1148 pci_walk_lintr(pci_prt_entry, &count); 1149 dsdt_line("})"); 1150 dsdt_unindent(2); 1151 } 1152 1153 dsdt_indent(2); 1154 for (slot = 0; slot < MAXSLOTS; slot++) { 1155 for (func = 0; func < MAXFUNCS; func++) { 1156 pi = pci_slotinfo[slot].si_funcs[func].fi_devi; 1157 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) 1158 pi->pi_d->pe_write_dsdt(pi); 1159 } 1160 } 1161 dsdt_unindent(2); 1162 1163 dsdt_line(" }"); 1164 dsdt_line("}"); 1165 dsdt_unindent(1); 1166 } 1167 1168 int 1169 pci_msi_enabled(struct pci_devinst *pi) 1170 { 1171 return (pi->pi_msi.enabled); 1172 } 1173 1174 int 1175 pci_msi_maxmsgnum(struct pci_devinst *pi) 1176 { 1177 if (pi->pi_msi.enabled) 1178 return (pi->pi_msi.maxmsgnum); 1179 else 1180 return (0); 1181 } 1182 1183 int 1184 pci_msix_enabled(struct pci_devinst *pi) 1185 { 1186 1187 return (pi->pi_msix.enabled && !pi->pi_msi.enabled); 1188 } 1189 1190 void 1191 pci_generate_msix(struct pci_devinst *pi, int index) 1192 { 1193 struct msix_table_entry *mte; 1194 1195 if (!pci_msix_enabled(pi)) 1196 return; 1197 1198 if (pi->pi_msix.function_mask) 1199 return; 1200 1201 if (index >= pi->pi_msix.table_count) 1202 return; 1203 1204 mte = &pi->pi_msix.table[index]; 1205 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 1206 /* XXX Set PBA bit if interrupt is disabled */ 1207 vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); 1208 } 1209 } 1210 1211 void 1212 pci_generate_msi(struct pci_devinst *pi, int index) 1213 { 1214 1215 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { 1216 vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, 1217 pi->pi_msi.msg_data + index); 1218 } 1219 } 1220 1221 static bool 1222 pci_lintr_permitted(struct pci_devinst *pi) 1223 { 1224 uint16_t cmd; 1225 1226 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); 1227 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || 1228 (cmd & PCIM_CMD_INTxDIS))); 1229 } 1230 1231 int 1232 pci_lintr_request(struct pci_devinst *pi) 1233 { 1234 struct slotinfo *si; 1235 int bestpin, bestcount, irq, pin; 1236 1237 /* 1238 * First, allocate a pin from our slot. 1239 */ 1240 si = &pci_slotinfo[pi->pi_slot]; 1241 bestpin = 0; 1242 bestcount = si->si_intpins[0].ii_count; 1243 for (pin = 1; pin < 4; pin++) { 1244 if (si->si_intpins[pin].ii_count < bestcount) { 1245 bestpin = pin; 1246 bestcount = si->si_intpins[pin].ii_count; 1247 } 1248 } 1249 1250 /* 1251 * Attempt to allocate an I/O APIC pin for this intpin. If 1252 * 8259A support is added we will need a separate field to 1253 * assign the intpin to an input pin on the PCI interrupt 1254 * router. 1255 */ 1256 if (si->si_intpins[bestpin].ii_count == 0) { 1257 irq = ioapic_pci_alloc_irq(); 1258 if (irq < 0) 1259 return (-1); 1260 si->si_intpins[bestpin].ii_ioapic_irq = irq; 1261 } else 1262 irq = si->si_intpins[bestpin].ii_ioapic_irq; 1263 si->si_intpins[bestpin].ii_count++; 1264 1265 pi->pi_lintr.pin = bestpin + 1; 1266 pi->pi_lintr.ioapic_irq = irq; 1267 pci_set_cfgdata8(pi, PCIR_INTLINE, irq); 1268 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); 1269 return (0); 1270 } 1271 1272 void 1273 pci_lintr_assert(struct pci_devinst *pi) 1274 { 1275 1276 assert(pi->pi_lintr.pin > 0); 1277 1278 pthread_mutex_lock(&pi->pi_lintr.lock); 1279 if (pi->pi_lintr.state == IDLE) { 1280 if (pci_lintr_permitted(pi)) { 1281 pi->pi_lintr.state = ASSERTED; 1282 vm_ioapic_assert_irq(pi->pi_vmctx, 1283 pi->pi_lintr.ioapic_irq); 1284 } else 1285 pi->pi_lintr.state = PENDING; 1286 } 1287 pthread_mutex_unlock(&pi->pi_lintr.lock); 1288 } 1289 1290 void 1291 pci_lintr_deassert(struct pci_devinst *pi) 1292 { 1293 1294 assert(pi->pi_lintr.pin > 0); 1295 1296 pthread_mutex_lock(&pi->pi_lintr.lock); 1297 if (pi->pi_lintr.state == ASSERTED) { 1298 pi->pi_lintr.state = IDLE; 1299 vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1300 } else if (pi->pi_lintr.state == PENDING) 1301 pi->pi_lintr.state = IDLE; 1302 pthread_mutex_unlock(&pi->pi_lintr.lock); 1303 } 1304 1305 static void 1306 pci_lintr_update(struct pci_devinst *pi) 1307 { 1308 1309 pthread_mutex_lock(&pi->pi_lintr.lock); 1310 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { 1311 vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1312 pi->pi_lintr.state = PENDING; 1313 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { 1314 pi->pi_lintr.state = ASSERTED; 1315 vm_ioapic_assert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); 1316 } 1317 pthread_mutex_unlock(&pi->pi_lintr.lock); 1318 } 1319 1320 int 1321 pci_count_lintr(void) 1322 { 1323 int count, slot, pin; 1324 1325 count = 0; 1326 for (slot = 0; slot < MAXSLOTS; slot++) { 1327 for (pin = 0; pin < 4; pin++) { 1328 if (pci_slotinfo[slot].si_intpins[pin].ii_count != 0) 1329 count++; 1330 } 1331 } 1332 return (count); 1333 } 1334 1335 void 1336 pci_walk_lintr(pci_lintr_cb cb, void *arg) 1337 { 1338 struct intxinfo *ii; 1339 int slot, pin; 1340 1341 for (slot = 0; slot < MAXSLOTS; slot++) { 1342 for (pin = 0; pin < 4; pin++) { 1343 ii = &pci_slotinfo[slot].si_intpins[pin]; 1344 if (ii->ii_count != 0) 1345 cb(slot, pin + 1, ii->ii_ioapic_irq, arg); 1346 } 1347 } 1348 } 1349 1350 /* 1351 * Return 1 if the emulated device in 'slot' is a multi-function device. 1352 * Return 0 otherwise. 1353 */ 1354 static int 1355 pci_emul_is_mfdev(int slot) 1356 { 1357 int f, numfuncs; 1358 1359 numfuncs = 0; 1360 for (f = 0; f < MAXFUNCS; f++) { 1361 if (pci_slotinfo[slot].si_funcs[f].fi_devi != NULL) { 1362 numfuncs++; 1363 } 1364 } 1365 return (numfuncs > 1); 1366 } 1367 1368 /* 1369 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on 1370 * whether or not is a multi-function being emulated in the pci 'slot'. 1371 */ 1372 static void 1373 pci_emul_hdrtype_fixup(int slot, int off, int bytes, uint32_t *rv) 1374 { 1375 int mfdev; 1376 1377 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { 1378 mfdev = pci_emul_is_mfdev(slot); 1379 switch (bytes) { 1380 case 1: 1381 case 2: 1382 *rv &= ~PCIM_MFDEV; 1383 if (mfdev) { 1384 *rv |= PCIM_MFDEV; 1385 } 1386 break; 1387 case 4: 1388 *rv &= ~(PCIM_MFDEV << 16); 1389 if (mfdev) { 1390 *rv |= (PCIM_MFDEV << 16); 1391 } 1392 break; 1393 } 1394 } 1395 } 1396 1397 static int cfgbus, cfgslot, cfgfunc, cfgoff; 1398 1399 static int 1400 pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1401 uint32_t *eax, void *arg) 1402 { 1403 uint32_t x; 1404 1405 if (bytes != 4) { 1406 if (in) 1407 *eax = (bytes == 2) ? 0xffff : 0xff; 1408 return (0); 1409 } 1410 1411 if (in) { 1412 x = (cfgbus << 16) | 1413 (cfgslot << 11) | 1414 (cfgfunc << 8) | 1415 cfgoff; 1416 *eax = x | CONF1_ENABLE; 1417 } else { 1418 x = *eax; 1419 cfgoff = x & PCI_REGMAX; 1420 cfgfunc = (x >> 8) & PCI_FUNCMAX; 1421 cfgslot = (x >> 11) & PCI_SLOTMAX; 1422 cfgbus = (x >> 16) & PCI_BUSMAX; 1423 } 1424 1425 return (0); 1426 } 1427 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); 1428 1429 static uint32_t 1430 bits_changed(uint32_t old, uint32_t new, uint32_t mask) 1431 { 1432 1433 return ((old ^ new) & mask); 1434 } 1435 1436 static void 1437 pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes) 1438 { 1439 int i; 1440 uint16_t old; 1441 1442 /* 1443 * The command register is at an offset of 4 bytes and thus the 1444 * guest could write 1, 2 or 4 bytes starting at this offset. 1445 */ 1446 1447 old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ 1448 CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */ 1449 new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */ 1450 1451 /* 1452 * If the MMIO or I/O address space decoding has changed then 1453 * register/unregister all BARs that decode that address space. 1454 */ 1455 for (i = 0; i <= PCI_BARMAX; i++) { 1456 switch (pi->pi_bar[i].type) { 1457 case PCIBAR_NONE: 1458 case PCIBAR_MEMHI64: 1459 break; 1460 case PCIBAR_IO: 1461 /* I/O address space decoding changed? */ 1462 if (bits_changed(old, new, PCIM_CMD_PORTEN)) { 1463 if (porten(pi)) 1464 register_bar(pi, i); 1465 else 1466 unregister_bar(pi, i); 1467 } 1468 break; 1469 case PCIBAR_MEM32: 1470 case PCIBAR_MEM64: 1471 /* MMIO address space decoding changed? */ 1472 if (bits_changed(old, new, PCIM_CMD_MEMEN)) { 1473 if (memen(pi)) 1474 register_bar(pi, i); 1475 else 1476 unregister_bar(pi, i); 1477 } 1478 break; 1479 default: 1480 assert(0); 1481 } 1482 } 1483 1484 /* 1485 * If INTx has been unmasked and is pending, assert the 1486 * interrupt. 1487 */ 1488 pci_lintr_update(pi); 1489 } 1490 1491 static int 1492 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1493 uint32_t *eax, void *arg) 1494 { 1495 struct pci_devinst *pi; 1496 struct pci_devemu *pe; 1497 int coff, idx, needcfg; 1498 uint64_t addr, bar, mask; 1499 1500 assert(bytes == 1 || bytes == 2 || bytes == 4); 1501 1502 if (cfgbus == 0) 1503 pi = pci_slotinfo[cfgslot].si_funcs[cfgfunc].fi_devi; 1504 else 1505 pi = NULL; 1506 1507 coff = cfgoff + (port - CONF1_DATA_PORT); 1508 1509 #if 0 1510 printf("pcicfg-%s from 0x%0x of %d bytes (%d/%d/%d)\n\r", 1511 in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc); 1512 #endif 1513 1514 /* 1515 * Just return if there is no device at this cfgslot:cfgfunc or 1516 * if the guest is doing an un-aligned access 1517 */ 1518 if (pi == NULL || (coff & (bytes - 1)) != 0) { 1519 if (in) 1520 *eax = 0xffffffff; 1521 return (0); 1522 } 1523 1524 pe = pi->pi_d; 1525 1526 /* 1527 * Config read 1528 */ 1529 if (in) { 1530 /* Let the device emulation override the default handler */ 1531 if (pe->pe_cfgread != NULL) { 1532 needcfg = pe->pe_cfgread(ctx, vcpu, pi, 1533 coff, bytes, eax); 1534 } else { 1535 needcfg = 1; 1536 } 1537 1538 if (needcfg) { 1539 if (bytes == 1) 1540 *eax = pci_get_cfgdata8(pi, coff); 1541 else if (bytes == 2) 1542 *eax = pci_get_cfgdata16(pi, coff); 1543 else 1544 *eax = pci_get_cfgdata32(pi, coff); 1545 } 1546 1547 pci_emul_hdrtype_fixup(cfgslot, coff, bytes, eax); 1548 } else { 1549 /* Let the device emulation override the default handler */ 1550 if (pe->pe_cfgwrite != NULL && 1551 (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) 1552 return (0); 1553 1554 /* 1555 * Special handling for write to BAR registers 1556 */ 1557 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { 1558 /* 1559 * Ignore writes to BAR registers that are not 1560 * 4-byte aligned. 1561 */ 1562 if (bytes != 4 || (coff & 0x3) != 0) 1563 return (0); 1564 idx = (coff - PCIR_BAR(0)) / 4; 1565 mask = ~(pi->pi_bar[idx].size - 1); 1566 switch (pi->pi_bar[idx].type) { 1567 case PCIBAR_NONE: 1568 pi->pi_bar[idx].addr = bar = 0; 1569 break; 1570 case PCIBAR_IO: 1571 addr = *eax & mask; 1572 addr &= 0xffff; 1573 bar = addr | PCIM_BAR_IO_SPACE; 1574 /* 1575 * Register the new BAR value for interception 1576 */ 1577 if (addr != pi->pi_bar[idx].addr) { 1578 update_bar_address(pi, addr, idx, 1579 PCIBAR_IO); 1580 } 1581 break; 1582 case PCIBAR_MEM32: 1583 addr = bar = *eax & mask; 1584 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; 1585 if (addr != pi->pi_bar[idx].addr) { 1586 update_bar_address(pi, addr, idx, 1587 PCIBAR_MEM32); 1588 } 1589 break; 1590 case PCIBAR_MEM64: 1591 addr = bar = *eax & mask; 1592 bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | 1593 PCIM_BAR_MEM_PREFETCH; 1594 if (addr != (uint32_t)pi->pi_bar[idx].addr) { 1595 update_bar_address(pi, addr, idx, 1596 PCIBAR_MEM64); 1597 } 1598 break; 1599 case PCIBAR_MEMHI64: 1600 mask = ~(pi->pi_bar[idx - 1].size - 1); 1601 addr = ((uint64_t)*eax << 32) & mask; 1602 bar = addr >> 32; 1603 if (bar != pi->pi_bar[idx - 1].addr >> 32) { 1604 update_bar_address(pi, addr, idx - 1, 1605 PCIBAR_MEMHI64); 1606 } 1607 break; 1608 default: 1609 assert(0); 1610 } 1611 pci_set_cfgdata32(pi, coff, bar); 1612 1613 } else if (pci_emul_iscap(pi, coff)) { 1614 pci_emul_capwrite(pi, coff, bytes, *eax); 1615 } else if (coff == PCIR_COMMAND) { 1616 pci_emul_cmdwrite(pi, *eax, bytes); 1617 } else { 1618 CFGWRITE(pi, coff, *eax, bytes); 1619 } 1620 } 1621 1622 return (0); 1623 } 1624 1625 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); 1626 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); 1627 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); 1628 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); 1629 1630 /* 1631 * I/O ports to configure PCI IRQ routing. We ignore all writes to it. 1632 */ 1633 static int 1634 pci_irq_port_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, 1635 uint32_t *eax, void *arg) 1636 { 1637 assert(in == 0); 1638 return (0); 1639 } 1640 INOUT_PORT(pci_irq, 0xC00, IOPORT_F_OUT, pci_irq_port_handler); 1641 INOUT_PORT(pci_irq, 0xC01, IOPORT_F_OUT, pci_irq_port_handler); 1642 SYSRES_IO(0xC00, 2); 1643 1644 #define PCI_EMUL_TEST 1645 #ifdef PCI_EMUL_TEST 1646 /* 1647 * Define a dummy test device 1648 */ 1649 #define DIOSZ 20 1650 #define DMEMSZ 4096 1651 struct pci_emul_dsoftc { 1652 uint8_t ioregs[DIOSZ]; 1653 uint8_t memregs[DMEMSZ]; 1654 }; 1655 1656 #define PCI_EMUL_MSI_MSGS 4 1657 #define PCI_EMUL_MSIX_MSGS 16 1658 1659 static int 1660 pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 1661 { 1662 int error; 1663 struct pci_emul_dsoftc *sc; 1664 1665 sc = malloc(sizeof(struct pci_emul_dsoftc)); 1666 memset(sc, 0, sizeof(struct pci_emul_dsoftc)); 1667 1668 pi->pi_arg = sc; 1669 1670 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); 1671 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); 1672 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); 1673 1674 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); 1675 assert(error == 0); 1676 1677 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); 1678 assert(error == 0); 1679 1680 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); 1681 assert(error == 0); 1682 1683 return (0); 1684 } 1685 1686 static void 1687 pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1688 uint64_t offset, int size, uint64_t value) 1689 { 1690 int i; 1691 struct pci_emul_dsoftc *sc = pi->pi_arg; 1692 1693 if (baridx == 0) { 1694 if (offset + size > DIOSZ) { 1695 printf("diow: iow too large, offset %ld size %d\n", 1696 offset, size); 1697 return; 1698 } 1699 1700 if (size == 1) { 1701 sc->ioregs[offset] = value & 0xff; 1702 } else if (size == 2) { 1703 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; 1704 } else if (size == 4) { 1705 *(uint32_t *)&sc->ioregs[offset] = value; 1706 } else { 1707 printf("diow: iow unknown size %d\n", size); 1708 } 1709 1710 /* 1711 * Special magic value to generate an interrupt 1712 */ 1713 if (offset == 4 && size == 4 && pci_msi_enabled(pi)) 1714 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); 1715 1716 if (value == 0xabcdef) { 1717 for (i = 0; i < pci_msi_maxmsgnum(pi); i++) 1718 pci_generate_msi(pi, i); 1719 } 1720 } 1721 1722 if (baridx == 1) { 1723 if (offset + size > DMEMSZ) { 1724 printf("diow: memw too large, offset %ld size %d\n", 1725 offset, size); 1726 return; 1727 } 1728 1729 if (size == 1) { 1730 sc->memregs[offset] = value; 1731 } else if (size == 2) { 1732 *(uint16_t *)&sc->memregs[offset] = value; 1733 } else if (size == 4) { 1734 *(uint32_t *)&sc->memregs[offset] = value; 1735 } else if (size == 8) { 1736 *(uint64_t *)&sc->memregs[offset] = value; 1737 } else { 1738 printf("diow: memw unknown size %d\n", size); 1739 } 1740 1741 /* 1742 * magic interrupt ?? 1743 */ 1744 } 1745 1746 if (baridx > 1) { 1747 printf("diow: unknown bar idx %d\n", baridx); 1748 } 1749 } 1750 1751 static uint64_t 1752 pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1753 uint64_t offset, int size) 1754 { 1755 struct pci_emul_dsoftc *sc = pi->pi_arg; 1756 uint32_t value; 1757 1758 if (baridx == 0) { 1759 if (offset + size > DIOSZ) { 1760 printf("dior: ior too large, offset %ld size %d\n", 1761 offset, size); 1762 return (0); 1763 } 1764 1765 if (size == 1) { 1766 value = sc->ioregs[offset]; 1767 } else if (size == 2) { 1768 value = *(uint16_t *) &sc->ioregs[offset]; 1769 } else if (size == 4) { 1770 value = *(uint32_t *) &sc->ioregs[offset]; 1771 } else { 1772 printf("dior: ior unknown size %d\n", size); 1773 } 1774 } 1775 1776 if (baridx == 1) { 1777 if (offset + size > DMEMSZ) { 1778 printf("dior: memr too large, offset %ld size %d\n", 1779 offset, size); 1780 return (0); 1781 } 1782 1783 if (size == 1) { 1784 value = sc->memregs[offset]; 1785 } else if (size == 2) { 1786 value = *(uint16_t *) &sc->memregs[offset]; 1787 } else if (size == 4) { 1788 value = *(uint32_t *) &sc->memregs[offset]; 1789 } else if (size == 8) { 1790 value = *(uint64_t *) &sc->memregs[offset]; 1791 } else { 1792 printf("dior: ior unknown size %d\n", size); 1793 } 1794 } 1795 1796 1797 if (baridx > 1) { 1798 printf("dior: unknown bar idx %d\n", baridx); 1799 return (0); 1800 } 1801 1802 return (value); 1803 } 1804 1805 struct pci_devemu pci_dummy = { 1806 .pe_emu = "dummy", 1807 .pe_init = pci_emul_dinit, 1808 .pe_barwrite = pci_emul_diow, 1809 .pe_barread = pci_emul_dior 1810 }; 1811 PCI_EMUL_SET(pci_dummy); 1812 1813 #endif /* PCI_EMUL_TEST */ 1814