1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG 5 * Author: Corvin Köhne <c.koehne@beckhoff.com> 6 */ 7 8 #include <sys/types.h> 9 #include <sys/mman.h> 10 #include <sys/sysctl.h> 11 12 #include <dev/pci/pcireg.h> 13 14 #include <err.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <string.h> 18 #include <unistd.h> 19 20 #include "amd64/e820.h" 21 #include "pci_gvt-d-opregion.h" 22 #include "pci_passthru.h" 23 24 #define KB (1024UL) 25 #define MB (1024 * KB) 26 #define GB (1024 * MB) 27 28 #ifndef _PATH_MEM 29 #define _PATH_MEM "/dev/mem" 30 #endif 31 32 #define PCI_VENDOR_INTEL 0x8086 33 34 #define PCIR_BDSM 0x5C /* Base of Data Stolen Memory register */ 35 #define PCIR_ASLS_CTL 0xFC /* Opregion start address register */ 36 37 #define PCIM_BDSM_GSM_ALIGNMENT \ 38 0x00100000 /* Graphics Stolen Memory is 1 MB aligned */ 39 40 #define GVT_D_MAP_GSM 0 41 #define GVT_D_MAP_OPREGION 1 42 #define GVT_D_MAP_VBT 2 43 44 static int 45 gvt_d_probe(struct pci_devinst *const pi) 46 { 47 struct passthru_softc *sc; 48 uint16_t vendor; 49 uint8_t class; 50 51 sc = pi->pi_arg; 52 53 vendor = pci_host_read_config(passthru_get_sel(sc), PCIR_VENDOR, 0x02); 54 if (vendor != PCI_VENDOR_INTEL) 55 return (ENXIO); 56 57 class = pci_host_read_config(passthru_get_sel(sc), PCIR_CLASS, 0x01); 58 if (class != PCIC_DISPLAY) 59 return (ENXIO); 60 61 return (0); 62 } 63 64 static vm_paddr_t 65 gvt_d_alloc_mmio_memory(const vm_paddr_t host_address, const vm_paddr_t length, 66 const vm_paddr_t alignment, const enum e820_memory_type type) 67 { 68 vm_paddr_t address; 69 70 /* Try to reuse host address. */ 71 address = e820_alloc(host_address, length, E820_ALIGNMENT_NONE, type, 72 E820_ALLOCATE_SPECIFIC); 73 if (address != 0) { 74 return (address); 75 } 76 77 /* 78 * We're not able to reuse the host address. Fall back to the highest usable 79 * address below 4 GB. 80 */ 81 return ( 82 e820_alloc(4 * GB, length, alignment, type, E820_ALLOCATE_HIGHEST)); 83 } 84 85 /* 86 * Note that the graphics stolen memory is somehow confusing. On the one hand 87 * the Intel Open Source HD Graphics Programmers' Reference Manual states that 88 * it's only GPU accessible. As the CPU can't access the area, the guest 89 * shouldn't need it. On the other hand, the Intel GOP driver refuses to work 90 * properly, if it's not set to a proper address. 91 * 92 * Intel itself maps it into the guest by EPT [1]. At the moment, we're not 93 * aware of any situation where this EPT mapping is required, so we don't do it 94 * yet. 95 * 96 * Intel also states that the Windows driver for Tiger Lake reads the address of 97 * the graphics stolen memory [2]. As the GVT-d code doesn't support Tiger Lake 98 * in its first implementation, we can't check how it behaves. We should keep an 99 * eye on it. 100 * 101 * [1] 102 * https://github.com/projectacrn/acrn-hypervisor/blob/e28d6fbfdfd556ff1bc3ff330e41d4ddbaa0f897/devicemodel/hw/pci/passthrough.c#L655-L657 103 * [2] 104 * https://github.com/projectacrn/acrn-hypervisor/blob/e28d6fbfdfd556ff1bc3ff330e41d4ddbaa0f897/devicemodel/hw/pci/passthrough.c#L626-L629 105 */ 106 static int 107 gvt_d_setup_gsm(struct pci_devinst *const pi) 108 { 109 struct passthru_softc *sc; 110 struct passthru_mmio_mapping *gsm; 111 size_t sysctl_len; 112 uint32_t bdsm; 113 int error; 114 115 sc = pi->pi_arg; 116 117 gsm = passthru_get_mmio(sc, GVT_D_MAP_GSM); 118 if (gsm == NULL) { 119 warnx("%s: Unable to access gsm", __func__); 120 return (-1); 121 } 122 123 sysctl_len = sizeof(gsm->hpa); 124 error = sysctlbyname("hw.intel_graphics_stolen_base", &gsm->hpa, 125 &sysctl_len, NULL, 0); 126 if (error) { 127 warn("%s: Unable to get graphics stolen memory base", 128 __func__); 129 return (-1); 130 } 131 sysctl_len = sizeof(gsm->len); 132 error = sysctlbyname("hw.intel_graphics_stolen_size", &gsm->len, 133 &sysctl_len, NULL, 0); 134 if (error) { 135 warn("%s: Unable to get graphics stolen memory length", 136 __func__); 137 return (-1); 138 } 139 gsm->hva = NULL; /* unused */ 140 gsm->gva = NULL; /* unused */ 141 gsm->gpa = gvt_d_alloc_mmio_memory(gsm->hpa, gsm->len, 142 PCIM_BDSM_GSM_ALIGNMENT, E820_TYPE_RESERVED); 143 if (gsm->gpa == 0) { 144 warnx( 145 "%s: Unable to add Graphics Stolen Memory to E820 table (hpa 0x%lx len 0x%lx)", 146 __func__, gsm->hpa, gsm->len); 147 e820_dump_table(); 148 return (-1); 149 } 150 if (gsm->gpa != gsm->hpa) { 151 /* 152 * ACRN source code implies that graphics driver for newer Intel 153 * platforms like Tiger Lake will read the Graphics Stolen Memory 154 * address from an MMIO register. We have three options to solve this 155 * issue: 156 * 1. Patch the value in the MMIO register 157 * This could have unintended side effects. Without any 158 * documentation how this register is used by the GPU, don't do 159 * it. 160 * 2. Trap the MMIO register 161 * It's not possible to trap a single MMIO register. We need to 162 * trap a whole page. Trapping a bunch of MMIO register could 163 * degrade the performance noticeably. We have to test it. 164 * 3. Use an 1:1 host to guest mapping 165 * Maybe not always possible. As far as we know, no supported 166 * platform requires a 1:1 mapping. For that reason, just log a 167 * warning. 168 */ 169 warnx( 170 "Warning: Unable to reuse host address of Graphics Stolen Memory. GPU passthrough might not work properly."); 171 } 172 173 bdsm = pci_host_read_config(passthru_get_sel(sc), PCIR_BDSM, 4); 174 pci_set_cfgdata32(pi, PCIR_BDSM, 175 gsm->gpa | (bdsm & (PCIM_BDSM_GSM_ALIGNMENT - 1))); 176 177 return (set_pcir_handler(sc, PCIR_BDSM, 4, passthru_cfgread_emulate, 178 passthru_cfgwrite_emulate)); 179 } 180 181 static int 182 gvt_d_setup_vbt(struct pci_devinst *const pi, int memfd, uint64_t vbt_hpa, 183 uint64_t vbt_len, vm_paddr_t *vbt_gpa) 184 { 185 struct passthru_softc *sc; 186 struct passthru_mmio_mapping *vbt; 187 188 sc = pi->pi_arg; 189 190 vbt = passthru_get_mmio(sc, GVT_D_MAP_VBT); 191 if (vbt == NULL) { 192 warnx("%s: Unable to access VBT", __func__); 193 return (-1); 194 } 195 196 vbt->hpa = vbt_hpa; 197 vbt->len = vbt_len; 198 199 vbt->hva = mmap(NULL, vbt->len, PROT_READ, MAP_SHARED, memfd, vbt->hpa); 200 if (vbt->hva == MAP_FAILED) { 201 warn("%s: Unable to map VBT", __func__); 202 return (-1); 203 } 204 205 vbt->gpa = gvt_d_alloc_mmio_memory(vbt->hpa, vbt->len, 206 E820_ALIGNMENT_NONE, E820_TYPE_NVS); 207 if (vbt->gpa == 0) { 208 warnx( 209 "%s: Unable to add VBT to E820 table (hpa 0x%lx len 0x%lx)", 210 __func__, vbt->hpa, vbt->len); 211 munmap(vbt->hva, vbt->len); 212 e820_dump_table(); 213 return (-1); 214 } 215 vbt->gva = vm_map_gpa(pi->pi_vmctx, vbt->gpa, vbt->len); 216 if (vbt->gva == NULL) { 217 warnx("%s: Unable to map guest VBT", __func__); 218 munmap(vbt->hva, vbt->len); 219 return (-1); 220 } 221 222 if (vbt->gpa != vbt->hpa) { 223 /* 224 * A 1:1 host to guest mapping is not required but this could 225 * change in the future. 226 */ 227 warnx( 228 "Warning: Unable to reuse host address of VBT. GPU passthrough might not work properly."); 229 } 230 231 memcpy(vbt->gva, vbt->hva, vbt->len); 232 233 /* 234 * Return the guest physical address. It's used to patch the OpRegion 235 * properly. 236 */ 237 *vbt_gpa = vbt->gpa; 238 239 return (0); 240 } 241 242 static int 243 gvt_d_setup_opregion(struct pci_devinst *const pi) 244 { 245 struct passthru_softc *sc; 246 struct passthru_mmio_mapping *opregion; 247 struct igd_opregion *opregion_ptr; 248 struct igd_opregion_header *header; 249 vm_paddr_t vbt_gpa = 0; 250 vm_paddr_t vbt_hpa; 251 uint64_t asls; 252 int error = 0; 253 int memfd; 254 255 sc = pi->pi_arg; 256 257 memfd = open(_PATH_MEM, O_RDONLY, 0); 258 if (memfd < 0) { 259 warn("%s: Failed to open %s", __func__, _PATH_MEM); 260 return (-1); 261 } 262 263 opregion = passthru_get_mmio(sc, GVT_D_MAP_OPREGION); 264 if (opregion == NULL) { 265 warnx("%s: Unable to access opregion", __func__); 266 close(memfd); 267 return (-1); 268 } 269 270 asls = pci_host_read_config(passthru_get_sel(sc), PCIR_ASLS_CTL, 4); 271 272 header = mmap(NULL, sizeof(*header), PROT_READ, MAP_SHARED, memfd, 273 asls); 274 if (header == MAP_FAILED) { 275 warn("%s: Unable to map OpRegion header", __func__); 276 close(memfd); 277 return (-1); 278 } 279 if (memcmp(header->sign, IGD_OPREGION_HEADER_SIGN, 280 sizeof(header->sign)) != 0) { 281 warnx("%s: Invalid OpRegion signature", __func__); 282 munmap(header, sizeof(*header)); 283 close(memfd); 284 return (-1); 285 } 286 287 opregion->hpa = asls; 288 opregion->len = header->size * KB; 289 munmap(header, sizeof(*header)); 290 291 if (opregion->len != sizeof(struct igd_opregion)) { 292 warnx("%s: Invalid OpRegion size of 0x%lx", __func__, 293 opregion->len); 294 close(memfd); 295 return (-1); 296 } 297 298 opregion->hva = mmap(NULL, opregion->len, PROT_READ, MAP_SHARED, memfd, 299 opregion->hpa); 300 if (opregion->hva == MAP_FAILED) { 301 warn("%s: Unable to map host OpRegion", __func__); 302 close(memfd); 303 return (-1); 304 } 305 306 opregion_ptr = (struct igd_opregion *)opregion->hva; 307 if (opregion_ptr->mbox3.rvda != 0) { 308 /* 309 * OpRegion v2.0 contains a physical address to the VBT. This 310 * address is useless in a guest environment. It's possible to 311 * patch that but we don't support that yet. So, the only thing 312 * we can do is give up. 313 */ 314 if (opregion_ptr->header.over == 0x02000000) { 315 warnx( 316 "%s: VBT lays outside OpRegion. That's not yet supported for a version 2.0 OpRegion", 317 __func__); 318 close(memfd); 319 return (-1); 320 } 321 vbt_hpa = opregion->hpa + opregion_ptr->mbox3.rvda; 322 if (vbt_hpa < opregion->hpa) { 323 warnx( 324 "%s: overflow when calculating VBT address (OpRegion @ 0x%lx, RVDA = 0x%lx)", 325 __func__, opregion->hpa, opregion_ptr->mbox3.rvda); 326 close(memfd); 327 return (-1); 328 } 329 330 if ((error = gvt_d_setup_vbt(pi, memfd, vbt_hpa, 331 opregion_ptr->mbox3.rvds, &vbt_gpa)) != 0) { 332 close(memfd); 333 return (error); 334 } 335 } 336 337 close(memfd); 338 339 opregion->gpa = gvt_d_alloc_mmio_memory(opregion->hpa, opregion->len, 340 E820_ALIGNMENT_NONE, E820_TYPE_NVS); 341 if (opregion->gpa == 0) { 342 warnx( 343 "%s: Unable to add OpRegion to E820 table (hpa 0x%lx len 0x%lx)", 344 __func__, opregion->hpa, opregion->len); 345 e820_dump_table(); 346 return (-1); 347 } 348 opregion->gva = vm_map_gpa(pi->pi_vmctx, opregion->gpa, opregion->len); 349 if (opregion->gva == NULL) { 350 warnx("%s: Unable to map guest OpRegion", __func__); 351 return (-1); 352 } 353 if (opregion->gpa != opregion->hpa) { 354 /* 355 * A 1:1 host to guest mapping is not required but this could 356 * change in the future. 357 */ 358 warnx( 359 "Warning: Unable to reuse host address of OpRegion. GPU passthrough might not work properly."); 360 } 361 362 memcpy(opregion->gva, opregion->hva, opregion->len); 363 364 /* 365 * Patch the VBT address to match our guest physical address. 366 */ 367 if (vbt_gpa != 0) { 368 if (vbt_gpa < opregion->gpa) { 369 warnx( 370 "%s: invalid guest VBT address 0x%16lx (OpRegion @ 0x%16lx)", 371 __func__, vbt_gpa, opregion->gpa); 372 return (-1); 373 } 374 375 ((struct igd_opregion *)opregion->gva)->mbox3.rvda = vbt_gpa - opregion->gpa; 376 } 377 378 pci_set_cfgdata32(pi, PCIR_ASLS_CTL, opregion->gpa); 379 380 return (set_pcir_handler(sc, PCIR_ASLS_CTL, 4, passthru_cfgread_emulate, 381 passthru_cfgwrite_emulate)); 382 } 383 384 static int 385 gvt_d_init(struct pci_devinst *const pi, nvlist_t *const nvl __unused) 386 { 387 int error; 388 389 if ((error = gvt_d_setup_gsm(pi)) != 0) { 390 warnx("%s: Unable to setup Graphics Stolen Memory", __func__); 391 goto done; 392 } 393 394 if ((error = gvt_d_setup_opregion(pi)) != 0) { 395 warnx("%s: Unable to setup OpRegion", __func__); 396 goto done; 397 } 398 399 done: 400 return (error); 401 } 402 403 static void 404 gvt_d_deinit(struct pci_devinst *const pi) 405 { 406 struct passthru_softc *sc; 407 struct passthru_mmio_mapping *opregion; 408 409 sc = pi->pi_arg; 410 411 opregion = passthru_get_mmio(sc, GVT_D_MAP_OPREGION); 412 413 /* HVA is only set, if it's initialized */ 414 if (opregion->hva) 415 munmap((void *)opregion->hva, opregion->len); 416 } 417 418 static struct passthru_dev gvt_d_dev = { 419 .probe = gvt_d_probe, 420 .init = gvt_d_init, 421 .deinit = gvt_d_deinit, 422 }; 423 PASSTHRU_DEV_SET(gvt_d_dev); 424