1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG 5 * Author: Corvin Köhne <c.koehne@beckhoff.com> 6 */ 7 8 #include <sys/types.h> 9 #include <sys/mman.h> 10 #include <sys/sysctl.h> 11 12 #include <dev/pci/pcireg.h> 13 14 #include <err.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <string.h> 18 #include <unistd.h> 19 20 #include "amd64/e820.h" 21 #include "pci_gvt-d-opregion.h" 22 #include "pci_passthru.h" 23 #include "pciids_intel_gpus.h" 24 25 #define KB (1024UL) 26 #define MB (1024 * KB) 27 #define GB (1024 * MB) 28 29 #ifndef _PATH_MEM 30 #define _PATH_MEM "/dev/mem" 31 #endif 32 33 #define PCI_VENDOR_INTEL 0x8086 34 35 #define PCIR_BDSM 0x5C /* Base of Data Stolen Memory register */ 36 #define PCIR_BDSM_GEN11 0xC0 37 #define PCIR_ASLS_CTL 0xFC /* Opregion start address register */ 38 39 #define PCIM_BDSM_GSM_ALIGNMENT \ 40 0x00100000 /* Graphics Stolen Memory is 1 MB aligned */ 41 42 #define BDSM_GEN11_MMIO_ADDRESS 0x1080C0 43 44 #define GVT_D_MAP_GSM 0 45 #define GVT_D_MAP_OPREGION 1 46 #define GVT_D_MAP_VBT 2 47 48 static uint64_t 49 gvt_d_dsmbase_read(struct pci_devinst *pi, int baridx __unused, uint64_t offset, 50 int size) 51 { 52 switch (size) { 53 case 1: 54 return (pci_get_cfgdata8(pi, PCIR_BDSM_GEN11 + offset)); 55 case 2: 56 return (pci_get_cfgdata16(pi, PCIR_BDSM_GEN11 + offset)); 57 case 4: 58 return (pci_get_cfgdata32(pi, PCIR_BDSM_GEN11 + offset)); 59 default: 60 return (UINT64_MAX); 61 } 62 } 63 64 static void 65 gvt_d_dsmbase_write(struct pci_devinst *pi, int baridx __unused, 66 uint64_t offset, int size, uint64_t val) 67 { 68 switch (size) { 69 case 1: 70 pci_set_cfgdata8(pi, PCIR_BDSM_GEN11 + offset, val); 71 break; 72 case 2: 73 pci_set_cfgdata16(pi, PCIR_BDSM_GEN11 + offset, val); 74 break; 75 case 4: 76 pci_set_cfgdata32(pi, PCIR_BDSM_GEN11 + offset, val); 77 break; 78 default: 79 break; 80 } 81 } 82 83 static int 84 set_bdsm_gen3(struct pci_devinst *const pi, vm_paddr_t bdsm_gpa) 85 { 86 struct passthru_softc *sc = pi->pi_arg; 87 uint32_t bdsm; 88 int error; 89 90 bdsm = pci_host_read_config(passthru_get_sel(sc), PCIR_BDSM, 4); 91 92 /* Protect the BDSM register in PCI space. */ 93 pci_set_cfgdata32(pi, PCIR_BDSM, 94 bdsm_gpa | (bdsm & (PCIM_BDSM_GSM_ALIGNMENT - 1))); 95 error = set_pcir_handler(sc, PCIR_BDSM, 4, passthru_cfgread_emulate, 96 passthru_cfgwrite_emulate); 97 if (error) { 98 warnx("%s: Failed to setup handler for BDSM register!", __func__); 99 return (error); 100 } 101 102 return (0); 103 } 104 105 static int 106 set_bdsm_gen11(struct pci_devinst *const pi, vm_paddr_t bdsm_gpa) 107 { 108 struct passthru_softc *sc = pi->pi_arg; 109 uint64_t bdsm; 110 int error; 111 112 bdsm = pci_host_read_config(passthru_get_sel(sc), PCIR_BDSM_GEN11, 8); 113 114 /* Protect the BDSM register in PCI space. */ 115 pci_set_cfgdata32(pi, PCIR_BDSM_GEN11, 116 bdsm_gpa | (bdsm & (PCIM_BDSM_GSM_ALIGNMENT - 1))); 117 pci_set_cfgdata32(pi, PCIR_BDSM_GEN11 + 4, bdsm_gpa >> 32); 118 error = set_pcir_handler(sc, PCIR_BDSM_GEN11, 8, passthru_cfgread_emulate, 119 passthru_cfgwrite_emulate); 120 if (error) { 121 warnx("%s: Failed to setup handler for BDSM register!\n", __func__); 122 return (error); 123 } 124 125 /* Protect the BDSM register in MMIO space. */ 126 error = passthru_set_bar_handler(sc, 0, BDSM_GEN11_MMIO_ADDRESS, sizeof(uint64_t), 127 gvt_d_dsmbase_read, gvt_d_dsmbase_write); 128 if (error) { 129 warnx("%s: Failed to setup handler for BDSM mirror!\n", __func__); 130 return (error); 131 } 132 133 return (0); 134 } 135 136 struct igd_ops { 137 int (*set_bdsm)(struct pci_devinst *const pi, vm_paddr_t bdsm_gpa); 138 }; 139 140 static const struct igd_ops igd_ops_gen3 = { .set_bdsm = set_bdsm_gen3 }; 141 142 static const struct igd_ops igd_ops_gen11 = { .set_bdsm = set_bdsm_gen11 }; 143 144 struct igd_device { 145 uint32_t device_id; 146 const struct igd_ops *ops; 147 }; 148 149 #define IGD_DEVICE(_device_id, _ops) \ 150 { \ 151 .device_id = (_device_id), \ 152 .ops = (_ops), \ 153 } 154 155 static const struct igd_device igd_devices[] = { 156 INTEL_I915G_IDS(IGD_DEVICE, &igd_ops_gen3), 157 INTEL_I915GM_IDS(IGD_DEVICE, &igd_ops_gen3), 158 INTEL_I945G_IDS(IGD_DEVICE, &igd_ops_gen3), 159 INTEL_I945GM_IDS(IGD_DEVICE, &igd_ops_gen3), 160 INTEL_VLV_IDS(IGD_DEVICE, &igd_ops_gen3), 161 INTEL_PNV_IDS(IGD_DEVICE, &igd_ops_gen3), 162 INTEL_I965GM_IDS(IGD_DEVICE, &igd_ops_gen3), 163 INTEL_GM45_IDS(IGD_DEVICE, &igd_ops_gen3), 164 INTEL_G45_IDS(IGD_DEVICE, &igd_ops_gen3), 165 INTEL_ILK_IDS(IGD_DEVICE, &igd_ops_gen3), 166 INTEL_SNB_IDS(IGD_DEVICE, &igd_ops_gen3), 167 INTEL_IVB_IDS(IGD_DEVICE, &igd_ops_gen3), 168 INTEL_HSW_IDS(IGD_DEVICE, &igd_ops_gen3), 169 INTEL_BDW_IDS(IGD_DEVICE, &igd_ops_gen3), 170 INTEL_CHV_IDS(IGD_DEVICE, &igd_ops_gen3), 171 INTEL_SKL_IDS(IGD_DEVICE, &igd_ops_gen3), 172 INTEL_BXT_IDS(IGD_DEVICE, &igd_ops_gen3), 173 INTEL_KBL_IDS(IGD_DEVICE, &igd_ops_gen3), 174 INTEL_CFL_IDS(IGD_DEVICE, &igd_ops_gen3), 175 INTEL_WHL_IDS(IGD_DEVICE, &igd_ops_gen3), 176 INTEL_CML_IDS(IGD_DEVICE, &igd_ops_gen3), 177 INTEL_GLK_IDS(IGD_DEVICE, &igd_ops_gen3), 178 INTEL_CNL_IDS(IGD_DEVICE, &igd_ops_gen3), 179 INTEL_ICL_IDS(IGD_DEVICE, &igd_ops_gen11), 180 INTEL_EHL_IDS(IGD_DEVICE, &igd_ops_gen11), 181 INTEL_JSL_IDS(IGD_DEVICE, &igd_ops_gen11), 182 INTEL_TGL_IDS(IGD_DEVICE, &igd_ops_gen11), 183 INTEL_RKL_IDS(IGD_DEVICE, &igd_ops_gen11), 184 INTEL_ADLS_IDS(IGD_DEVICE, &igd_ops_gen11), 185 INTEL_ADLP_IDS(IGD_DEVICE, &igd_ops_gen11), 186 INTEL_ADLN_IDS(IGD_DEVICE, &igd_ops_gen11), 187 INTEL_RPLS_IDS(IGD_DEVICE, &igd_ops_gen11), 188 INTEL_RPLU_IDS(IGD_DEVICE, &igd_ops_gen11), 189 INTEL_RPLP_IDS(IGD_DEVICE, &igd_ops_gen11), 190 }; 191 192 static const struct igd_ops * 193 get_igd_ops(struct pci_devinst *const pi) 194 { 195 struct passthru_softc *sc = pi->pi_arg; 196 uint16_t device_id; 197 198 device_id = pci_host_read_config(passthru_get_sel(sc), PCIR_DEVICE, 199 0x02); 200 for (size_t i = 0; i < nitems(igd_devices); i++) { 201 if (igd_devices[i].device_id != device_id) 202 continue; 203 204 return (igd_devices[i].ops); 205 } 206 207 return (NULL); 208 } 209 210 static int 211 gvt_d_probe(struct pci_devinst *const pi) 212 { 213 struct passthru_softc *sc; 214 uint16_t vendor; 215 uint8_t class; 216 217 sc = pi->pi_arg; 218 219 vendor = pci_host_read_config(passthru_get_sel(sc), PCIR_VENDOR, 0x02); 220 if (vendor != PCI_VENDOR_INTEL) 221 return (ENXIO); 222 223 class = pci_host_read_config(passthru_get_sel(sc), PCIR_CLASS, 0x01); 224 if (class != PCIC_DISPLAY) 225 return (ENXIO); 226 227 return (0); 228 } 229 230 static vm_paddr_t 231 gvt_d_alloc_mmio_memory(const vm_paddr_t host_address, const vm_paddr_t length, 232 const vm_paddr_t alignment, const enum e820_memory_type type) 233 { 234 vm_paddr_t address; 235 236 /* Try to reuse host address. */ 237 address = e820_alloc(host_address, length, E820_ALIGNMENT_NONE, type, 238 E820_ALLOCATE_SPECIFIC); 239 if (address != 0) { 240 return (address); 241 } 242 243 /* 244 * We're not able to reuse the host address. Fall back to the highest usable 245 * address below 4 GB. 246 */ 247 return ( 248 e820_alloc(4 * GB, length, alignment, type, E820_ALLOCATE_HIGHEST)); 249 } 250 251 /* 252 * Note that the graphics stolen memory is somehow confusing. On the one hand 253 * the Intel Open Source HD Graphics Programmers' Reference Manual states that 254 * it's only GPU accessible. As the CPU can't access the area, the guest 255 * shouldn't need it. On the other hand, the Intel GOP driver refuses to work 256 * properly, if it's not set to a proper address. 257 * 258 * Intel itself maps it into the guest by EPT [1]. At the moment, we're not 259 * aware of any situation where this EPT mapping is required, so we don't do it 260 * yet. 261 * 262 * Intel also states that the Windows driver for Tiger Lake reads the address of 263 * the graphics stolen memory [2]. As the GVT-d code doesn't support Tiger Lake 264 * in its first implementation, we can't check how it behaves. We should keep an 265 * eye on it. 266 * 267 * [1] 268 * https://github.com/projectacrn/acrn-hypervisor/blob/e28d6fbfdfd556ff1bc3ff330e41d4ddbaa0f897/devicemodel/hw/pci/passthrough.c#L655-L657 269 * [2] 270 * https://github.com/projectacrn/acrn-hypervisor/blob/e28d6fbfdfd556ff1bc3ff330e41d4ddbaa0f897/devicemodel/hw/pci/passthrough.c#L626-L629 271 */ 272 static int 273 gvt_d_setup_gsm(struct pci_devinst *const pi) 274 { 275 struct passthru_softc *sc; 276 struct passthru_mmio_mapping *gsm; 277 const struct igd_ops *igd_ops; 278 size_t sysctl_len; 279 int error; 280 281 sc = pi->pi_arg; 282 283 gsm = passthru_get_mmio(sc, GVT_D_MAP_GSM); 284 if (gsm == NULL) { 285 warnx("%s: Unable to access gsm", __func__); 286 return (-1); 287 } 288 289 sysctl_len = sizeof(gsm->hpa); 290 error = sysctlbyname("hw.intel_graphics_stolen_base", &gsm->hpa, 291 &sysctl_len, NULL, 0); 292 if (error) { 293 warn("%s: Unable to get graphics stolen memory base", 294 __func__); 295 return (-1); 296 } 297 sysctl_len = sizeof(gsm->len); 298 error = sysctlbyname("hw.intel_graphics_stolen_size", &gsm->len, 299 &sysctl_len, NULL, 0); 300 if (error) { 301 warn("%s: Unable to get graphics stolen memory length", 302 __func__); 303 return (-1); 304 } 305 gsm->hva = NULL; /* unused */ 306 gsm->gva = NULL; /* unused */ 307 gsm->gpa = gvt_d_alloc_mmio_memory(gsm->hpa, gsm->len, 308 PCIM_BDSM_GSM_ALIGNMENT, E820_TYPE_RESERVED); 309 if (gsm->gpa == 0) { 310 warnx( 311 "%s: Unable to add Graphics Stolen Memory to E820 table (hpa 0x%lx len 0x%lx)", 312 __func__, gsm->hpa, gsm->len); 313 e820_dump_table(); 314 return (-1); 315 } 316 if (gsm->gpa != gsm->hpa) { 317 /* 318 * ACRN source code implies that graphics driver for newer Intel 319 * platforms like Tiger Lake will read the Graphics Stolen Memory 320 * address from an MMIO register. We have three options to solve this 321 * issue: 322 * 1. Patch the value in the MMIO register 323 * This could have unintended side effects. Without any 324 * documentation how this register is used by the GPU, don't do 325 * it. 326 * 2. Trap the MMIO register 327 * It's not possible to trap a single MMIO register. We need to 328 * trap a whole page. Trapping a bunch of MMIO register could 329 * degrade the performance noticeably. We have to test it. 330 * 3. Use an 1:1 host to guest mapping 331 * Maybe not always possible. As far as we know, no supported 332 * platform requires a 1:1 mapping. For that reason, just log a 333 * warning. 334 */ 335 warnx( 336 "Warning: Unable to reuse host address of Graphics Stolen Memory. GPU passthrough might not work properly."); 337 } 338 339 igd_ops = get_igd_ops(pi); 340 if (igd_ops == NULL) { 341 warn("%s: Unknown IGD device. It's not supported yet!", 342 __func__); 343 return (-1); 344 } 345 346 return (igd_ops->set_bdsm(pi, gsm->gpa)); 347 } 348 349 static int 350 gvt_d_setup_vbt(struct pci_devinst *const pi, int memfd, uint64_t vbt_hpa, 351 uint64_t vbt_len, vm_paddr_t *vbt_gpa) 352 { 353 struct passthru_softc *sc; 354 struct passthru_mmio_mapping *vbt; 355 356 sc = pi->pi_arg; 357 358 vbt = passthru_get_mmio(sc, GVT_D_MAP_VBT); 359 if (vbt == NULL) { 360 warnx("%s: Unable to access VBT", __func__); 361 return (-1); 362 } 363 364 vbt->hpa = vbt_hpa; 365 vbt->len = vbt_len; 366 367 vbt->hva = mmap(NULL, vbt->len, PROT_READ, MAP_SHARED, memfd, vbt->hpa); 368 if (vbt->hva == MAP_FAILED) { 369 warn("%s: Unable to map VBT", __func__); 370 return (-1); 371 } 372 373 vbt->gpa = gvt_d_alloc_mmio_memory(vbt->hpa, vbt->len, 374 E820_ALIGNMENT_NONE, E820_TYPE_NVS); 375 if (vbt->gpa == 0) { 376 warnx( 377 "%s: Unable to add VBT to E820 table (hpa 0x%lx len 0x%lx)", 378 __func__, vbt->hpa, vbt->len); 379 munmap(vbt->hva, vbt->len); 380 e820_dump_table(); 381 return (-1); 382 } 383 vbt->gva = vm_map_gpa(pi->pi_vmctx, vbt->gpa, vbt->len); 384 if (vbt->gva == NULL) { 385 warnx("%s: Unable to map guest VBT", __func__); 386 munmap(vbt->hva, vbt->len); 387 return (-1); 388 } 389 390 if (vbt->gpa != vbt->hpa) { 391 /* 392 * A 1:1 host to guest mapping is not required but this could 393 * change in the future. 394 */ 395 warnx( 396 "Warning: Unable to reuse host address of VBT. GPU passthrough might not work properly."); 397 } 398 399 memcpy(vbt->gva, vbt->hva, vbt->len); 400 401 /* 402 * Return the guest physical address. It's used to patch the OpRegion 403 * properly. 404 */ 405 *vbt_gpa = vbt->gpa; 406 407 return (0); 408 } 409 410 static int 411 gvt_d_setup_opregion(struct pci_devinst *const pi) 412 { 413 struct passthru_softc *sc; 414 struct passthru_mmio_mapping *opregion; 415 struct igd_opregion *opregion_ptr; 416 struct igd_opregion_header *header; 417 vm_paddr_t vbt_gpa = 0; 418 vm_paddr_t vbt_hpa; 419 uint64_t asls; 420 int error = 0; 421 int memfd; 422 423 sc = pi->pi_arg; 424 425 memfd = open(_PATH_MEM, O_RDONLY, 0); 426 if (memfd < 0) { 427 warn("%s: Failed to open %s", __func__, _PATH_MEM); 428 return (-1); 429 } 430 431 opregion = passthru_get_mmio(sc, GVT_D_MAP_OPREGION); 432 if (opregion == NULL) { 433 warnx("%s: Unable to access opregion", __func__); 434 close(memfd); 435 return (-1); 436 } 437 438 asls = pci_host_read_config(passthru_get_sel(sc), PCIR_ASLS_CTL, 4); 439 440 header = mmap(NULL, sizeof(*header), PROT_READ, MAP_SHARED, memfd, 441 asls); 442 if (header == MAP_FAILED) { 443 warn("%s: Unable to map OpRegion header", __func__); 444 close(memfd); 445 return (-1); 446 } 447 if (memcmp(header->sign, IGD_OPREGION_HEADER_SIGN, 448 sizeof(header->sign)) != 0) { 449 warnx("%s: Invalid OpRegion signature", __func__); 450 munmap(header, sizeof(*header)); 451 close(memfd); 452 return (-1); 453 } 454 455 opregion->hpa = asls; 456 opregion->len = header->size * KB; 457 munmap(header, sizeof(*header)); 458 459 if (opregion->len != sizeof(struct igd_opregion)) { 460 warnx("%s: Invalid OpRegion size of 0x%lx", __func__, 461 opregion->len); 462 close(memfd); 463 return (-1); 464 } 465 466 opregion->hva = mmap(NULL, opregion->len, PROT_READ, MAP_SHARED, memfd, 467 opregion->hpa); 468 if (opregion->hva == MAP_FAILED) { 469 warn("%s: Unable to map host OpRegion", __func__); 470 close(memfd); 471 return (-1); 472 } 473 474 opregion_ptr = (struct igd_opregion *)opregion->hva; 475 if (opregion_ptr->mbox3.rvda != 0) { 476 /* 477 * OpRegion v2.0 contains a physical address to the VBT. This 478 * address is useless in a guest environment. It's possible to 479 * patch that but we don't support that yet. So, the only thing 480 * we can do is give up. 481 */ 482 if (opregion_ptr->header.over == 0x02000000) { 483 warnx( 484 "%s: VBT lays outside OpRegion. That's not yet supported for a version 2.0 OpRegion", 485 __func__); 486 close(memfd); 487 return (-1); 488 } 489 vbt_hpa = opregion->hpa + opregion_ptr->mbox3.rvda; 490 if (vbt_hpa < opregion->hpa) { 491 warnx( 492 "%s: overflow when calculating VBT address (OpRegion @ 0x%lx, RVDA = 0x%lx)", 493 __func__, opregion->hpa, opregion_ptr->mbox3.rvda); 494 close(memfd); 495 return (-1); 496 } 497 498 if ((error = gvt_d_setup_vbt(pi, memfd, vbt_hpa, 499 opregion_ptr->mbox3.rvds, &vbt_gpa)) != 0) { 500 close(memfd); 501 return (error); 502 } 503 } 504 505 close(memfd); 506 507 opregion->gpa = gvt_d_alloc_mmio_memory(opregion->hpa, opregion->len, 508 E820_ALIGNMENT_NONE, E820_TYPE_NVS); 509 if (opregion->gpa == 0) { 510 warnx( 511 "%s: Unable to add OpRegion to E820 table (hpa 0x%lx len 0x%lx)", 512 __func__, opregion->hpa, opregion->len); 513 e820_dump_table(); 514 return (-1); 515 } 516 opregion->gva = vm_map_gpa(pi->pi_vmctx, opregion->gpa, opregion->len); 517 if (opregion->gva == NULL) { 518 warnx("%s: Unable to map guest OpRegion", __func__); 519 return (-1); 520 } 521 if (opregion->gpa != opregion->hpa) { 522 /* 523 * A 1:1 host to guest mapping is not required but this could 524 * change in the future. 525 */ 526 warnx( 527 "Warning: Unable to reuse host address of OpRegion. GPU passthrough might not work properly."); 528 } 529 530 memcpy(opregion->gva, opregion->hva, opregion->len); 531 532 /* 533 * Patch the VBT address to match our guest physical address. 534 */ 535 if (vbt_gpa != 0) { 536 if (vbt_gpa < opregion->gpa) { 537 warnx( 538 "%s: invalid guest VBT address 0x%16lx (OpRegion @ 0x%16lx)", 539 __func__, vbt_gpa, opregion->gpa); 540 return (-1); 541 } 542 543 ((struct igd_opregion *)opregion->gva)->mbox3.rvda = vbt_gpa - opregion->gpa; 544 } 545 546 pci_set_cfgdata32(pi, PCIR_ASLS_CTL, opregion->gpa); 547 548 return (set_pcir_handler(sc, PCIR_ASLS_CTL, 4, passthru_cfgread_emulate, 549 passthru_cfgwrite_emulate)); 550 } 551 552 static int 553 gvt_d_init(struct pci_devinst *const pi, nvlist_t *const nvl __unused) 554 { 555 int error; 556 557 if ((error = gvt_d_setup_gsm(pi)) != 0) { 558 warnx("%s: Unable to setup Graphics Stolen Memory", __func__); 559 goto done; 560 } 561 562 if ((error = gvt_d_setup_opregion(pi)) != 0) { 563 warnx("%s: Unable to setup OpRegion", __func__); 564 goto done; 565 } 566 567 done: 568 return (error); 569 } 570 571 static void 572 gvt_d_deinit(struct pci_devinst *const pi) 573 { 574 struct passthru_softc *sc; 575 struct passthru_mmio_mapping *opregion; 576 577 sc = pi->pi_arg; 578 579 opregion = passthru_get_mmio(sc, GVT_D_MAP_OPREGION); 580 581 /* HVA is only set, if it's initialized */ 582 if (opregion->hva) 583 munmap((void *)opregion->hva, opregion->len); 584 } 585 586 static struct passthru_dev gvt_d_dev = { 587 .probe = gvt_d_probe, 588 .init = gvt_d_init, 589 .deinit = gvt_d_deinit, 590 }; 591 PASSTHRU_DEV_SET(gvt_d_dev); 592