xref: /freebsd/usr.sbin/bhyve/amd64/pci_gvt-d.c (revision b2221534a7bc16ea879c9fbb1a1fe4b337d2623b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
5  * Author: Corvin Köhne <c.koehne@beckhoff.com>
6  */
7 
8 #include <sys/types.h>
9 #include <sys/mman.h>
10 #include <sys/sysctl.h>
11 
12 #include <dev/pci/pcireg.h>
13 
14 #include <err.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <string.h>
18 #include <unistd.h>
19 
20 #include "amd64/e820.h"
21 #include "pci_gvt-d-opregion.h"
22 #include "pci_passthru.h"
23 
24 #define KB (1024UL)
25 #define MB (1024 * KB)
26 #define GB (1024 * MB)
27 
28 #ifndef _PATH_MEM
29 #define _PATH_MEM "/dev/mem"
30 #endif
31 
32 #define PCI_VENDOR_INTEL 0x8086
33 
34 #define PCIR_BDSM 0x5C	   /* Base of Data Stolen Memory register */
35 #define PCIR_ASLS_CTL 0xFC /* Opregion start address register */
36 
37 #define PCIM_BDSM_GSM_ALIGNMENT \
38 	0x00100000 /* Graphics Stolen Memory is 1 MB aligned */
39 
40 #define GVT_D_MAP_GSM 0
41 #define GVT_D_MAP_OPREGION 1
42 #define GVT_D_MAP_VBT 2
43 
44 static int
gvt_d_probe(struct pci_devinst * const pi)45 gvt_d_probe(struct pci_devinst *const pi)
46 {
47 	struct passthru_softc *sc;
48 	uint16_t vendor;
49 	uint8_t class;
50 
51 	sc = pi->pi_arg;
52 
53 	vendor = pci_host_read_config(passthru_get_sel(sc), PCIR_VENDOR, 0x02);
54 	if (vendor != PCI_VENDOR_INTEL)
55 		return (ENXIO);
56 
57 	class = pci_host_read_config(passthru_get_sel(sc), PCIR_CLASS, 0x01);
58 	if (class != PCIC_DISPLAY)
59 		return (ENXIO);
60 
61 	return (0);
62 }
63 
64 static vm_paddr_t
gvt_d_alloc_mmio_memory(const vm_paddr_t host_address,const vm_paddr_t length,const vm_paddr_t alignment,const enum e820_memory_type type)65 gvt_d_alloc_mmio_memory(const vm_paddr_t host_address, const vm_paddr_t length,
66     const vm_paddr_t alignment, const enum e820_memory_type type)
67 {
68 	vm_paddr_t address;
69 
70 	/* Try to reuse host address. */
71 	address = e820_alloc(host_address, length, E820_ALIGNMENT_NONE, type,
72 	    E820_ALLOCATE_SPECIFIC);
73 	if (address != 0) {
74 		return (address);
75 	}
76 
77 	/*
78 	 * We're not able to reuse the host address. Fall back to the highest usable
79 	 * address below 4 GB.
80 	 */
81 	return (
82 	    e820_alloc(4 * GB, length, alignment, type, E820_ALLOCATE_HIGHEST));
83 }
84 
85 /*
86  * Note that the graphics stolen memory is somehow confusing. On the one hand
87  * the Intel Open Source HD Graphics Programmers' Reference Manual states that
88  * it's only GPU accessible. As the CPU can't access the area, the guest
89  * shouldn't need it. On the other hand, the Intel GOP driver refuses to work
90  * properly, if it's not set to a proper address.
91  *
92  * Intel itself maps it into the guest by EPT [1]. At the moment, we're not
93  * aware of any situation where this EPT mapping is required, so we don't do it
94  * yet.
95  *
96  * Intel also states that the Windows driver for Tiger Lake reads the address of
97  * the graphics stolen memory [2]. As the GVT-d code doesn't support Tiger Lake
98  * in its first implementation, we can't check how it behaves. We should keep an
99  * eye on it.
100  *
101  * [1]
102  * https://github.com/projectacrn/acrn-hypervisor/blob/e28d6fbfdfd556ff1bc3ff330e41d4ddbaa0f897/devicemodel/hw/pci/passthrough.c#L655-L657
103  * [2]
104  * https://github.com/projectacrn/acrn-hypervisor/blob/e28d6fbfdfd556ff1bc3ff330e41d4ddbaa0f897/devicemodel/hw/pci/passthrough.c#L626-L629
105  */
106 static int
gvt_d_setup_gsm(struct pci_devinst * const pi)107 gvt_d_setup_gsm(struct pci_devinst *const pi)
108 {
109 	struct passthru_softc *sc;
110 	struct passthru_mmio_mapping *gsm;
111 	size_t sysctl_len;
112 	uint32_t bdsm;
113 	int error;
114 
115 	sc = pi->pi_arg;
116 
117 	gsm = passthru_get_mmio(sc, GVT_D_MAP_GSM);
118 	if (gsm == NULL) {
119 		warnx("%s: Unable to access gsm", __func__);
120 		return (-1);
121 	}
122 
123 	sysctl_len = sizeof(gsm->hpa);
124 	error = sysctlbyname("hw.intel_graphics_stolen_base", &gsm->hpa,
125 	    &sysctl_len, NULL, 0);
126 	if (error) {
127 		warn("%s: Unable to get graphics stolen memory base",
128 		    __func__);
129 		return (-1);
130 	}
131 	sysctl_len = sizeof(gsm->len);
132 	error = sysctlbyname("hw.intel_graphics_stolen_size", &gsm->len,
133 	    &sysctl_len, NULL, 0);
134 	if (error) {
135 		warn("%s: Unable to get graphics stolen memory length",
136 		    __func__);
137 		return (-1);
138 	}
139 	gsm->hva = NULL; /* unused */
140 	gsm->gva = NULL; /* unused */
141 	gsm->gpa = gvt_d_alloc_mmio_memory(gsm->hpa, gsm->len,
142 	    PCIM_BDSM_GSM_ALIGNMENT, E820_TYPE_RESERVED);
143 	if (gsm->gpa == 0) {
144 		warnx(
145 		    "%s: Unable to add Graphics Stolen Memory to E820 table (hpa 0x%lx len 0x%lx)",
146 		    __func__, gsm->hpa, gsm->len);
147 		e820_dump_table();
148 		return (-1);
149 	}
150 	if (gsm->gpa != gsm->hpa) {
151 		/*
152 		 * ACRN source code implies that graphics driver for newer Intel
153 		 * platforms like Tiger Lake will read the Graphics Stolen Memory
154 		 * address from an MMIO register. We have three options to solve this
155 		 * issue:
156 		 *    1. Patch the value in the MMIO register
157 		 *       This could have unintended side effects. Without any
158 		 *       documentation how this register is used by the GPU, don't do
159 		 *       it.
160 		 *    2. Trap the MMIO register
161 		 *       It's not possible to trap a single MMIO register. We need to
162 		 *       trap a whole page. Trapping a bunch of MMIO register could
163 		 *       degrade the performance noticeably. We have to test it.
164 		 *    3. Use an 1:1 host to guest mapping
165 		 *       Maybe not always possible. As far as we know, no supported
166 		 *       platform requires a 1:1 mapping. For that reason, just log a
167 		 *       warning.
168 		 */
169 		warnx(
170 		    "Warning: Unable to reuse host address of Graphics Stolen Memory. GPU passthrough might not work properly.");
171 	}
172 
173 	bdsm = pci_host_read_config(passthru_get_sel(sc), PCIR_BDSM, 4);
174 	pci_set_cfgdata32(pi, PCIR_BDSM,
175 	    gsm->gpa | (bdsm & (PCIM_BDSM_GSM_ALIGNMENT - 1)));
176 
177 	return (set_pcir_handler(sc, PCIR_BDSM, 4, passthru_cfgread_emulate,
178 	    passthru_cfgwrite_emulate));
179 }
180 
181 static int
gvt_d_setup_vbt(struct pci_devinst * const pi,int memfd,uint64_t vbt_hpa,uint64_t vbt_len,vm_paddr_t * vbt_gpa)182 gvt_d_setup_vbt(struct pci_devinst *const pi, int memfd, uint64_t vbt_hpa,
183     uint64_t vbt_len, vm_paddr_t *vbt_gpa)
184 {
185 	struct passthru_softc *sc;
186 	struct passthru_mmio_mapping *vbt;
187 
188 	sc = pi->pi_arg;
189 
190 	vbt = passthru_get_mmio(sc, GVT_D_MAP_VBT);
191 	if (vbt == NULL) {
192 		warnx("%s: Unable to access VBT", __func__);
193 		return (-1);
194 	}
195 
196 	vbt->hpa = vbt_hpa;
197 	vbt->len = vbt_len;
198 
199 	vbt->hva = mmap(NULL, vbt->len, PROT_READ, MAP_SHARED, memfd, vbt->hpa);
200 	if (vbt->hva == MAP_FAILED) {
201 		warn("%s: Unable to map VBT", __func__);
202 		return (-1);
203 	}
204 
205 	vbt->gpa = gvt_d_alloc_mmio_memory(vbt->hpa, vbt->len,
206 	    E820_ALIGNMENT_NONE, E820_TYPE_NVS);
207 	if (vbt->gpa == 0) {
208 		warnx(
209 		    "%s: Unable to add VBT to E820 table (hpa 0x%lx len 0x%lx)",
210 		    __func__, vbt->hpa, vbt->len);
211 		munmap(vbt->hva, vbt->len);
212 		e820_dump_table();
213 		return (-1);
214 	}
215 	vbt->gva = vm_map_gpa(pi->pi_vmctx, vbt->gpa, vbt->len);
216 	if (vbt->gva == NULL) {
217 		warnx("%s: Unable to map guest VBT", __func__);
218 		munmap(vbt->hva, vbt->len);
219 		return (-1);
220 	}
221 
222 	if (vbt->gpa != vbt->hpa) {
223 		/*
224 		 * A 1:1 host to guest mapping is not required but this could
225 		 * change in the future.
226 		 */
227 		warnx(
228 		    "Warning: Unable to reuse host address of VBT. GPU passthrough might not work properly.");
229 	}
230 
231 	memcpy(vbt->gva, vbt->hva, vbt->len);
232 
233 	/*
234 	 * Return the guest physical address. It's used to patch the OpRegion
235 	 * properly.
236 	 */
237 	*vbt_gpa = vbt->gpa;
238 
239 	return (0);
240 }
241 
242 static int
gvt_d_setup_opregion(struct pci_devinst * const pi)243 gvt_d_setup_opregion(struct pci_devinst *const pi)
244 {
245 	struct passthru_softc *sc;
246 	struct passthru_mmio_mapping *opregion;
247 	struct igd_opregion *opregion_ptr;
248 	struct igd_opregion_header *header;
249 	vm_paddr_t vbt_gpa = 0;
250 	vm_paddr_t vbt_hpa;
251 	uint64_t asls;
252 	int error = 0;
253 	int memfd;
254 
255 	sc = pi->pi_arg;
256 
257 	memfd = open(_PATH_MEM, O_RDONLY, 0);
258 	if (memfd < 0) {
259 		warn("%s: Failed to open %s", __func__, _PATH_MEM);
260 		return (-1);
261 	}
262 
263 	opregion = passthru_get_mmio(sc, GVT_D_MAP_OPREGION);
264 	if (opregion == NULL) {
265 		warnx("%s: Unable to access opregion", __func__);
266 		close(memfd);
267 		return (-1);
268 	}
269 
270 	asls = pci_host_read_config(passthru_get_sel(sc), PCIR_ASLS_CTL, 4);
271 
272 	header = mmap(NULL, sizeof(*header), PROT_READ, MAP_SHARED, memfd,
273 	    asls);
274 	if (header == MAP_FAILED) {
275 		warn("%s: Unable to map OpRegion header", __func__);
276 		close(memfd);
277 		return (-1);
278 	}
279 	if (memcmp(header->sign, IGD_OPREGION_HEADER_SIGN,
280 	    sizeof(header->sign)) != 0) {
281 		warnx("%s: Invalid OpRegion signature", __func__);
282 		munmap(header, sizeof(*header));
283 		close(memfd);
284 		return (-1);
285 	}
286 
287 	opregion->hpa = asls;
288 	opregion->len = header->size * KB;
289 	munmap(header, sizeof(*header));
290 
291 	if (opregion->len != sizeof(struct igd_opregion)) {
292 		warnx("%s: Invalid OpRegion size of 0x%lx", __func__,
293 		    opregion->len);
294 		close(memfd);
295 		return (-1);
296 	}
297 
298 	opregion->hva = mmap(NULL, opregion->len, PROT_READ, MAP_SHARED, memfd,
299 	    opregion->hpa);
300 	if (opregion->hva == MAP_FAILED) {
301 		warn("%s: Unable to map host OpRegion", __func__);
302 		close(memfd);
303 		return (-1);
304 	}
305 
306 	opregion_ptr = (struct igd_opregion *)opregion->hva;
307 	if (opregion_ptr->mbox3.rvda != 0) {
308 		/*
309 		 * OpRegion v2.0 contains a physical address to the VBT. This
310 		 * address is useless in a guest environment. It's possible to
311 		 * patch that but we don't support that yet. So, the only thing
312 		 * we can do is give up.
313 		 */
314 		if (opregion_ptr->header.over == 0x02000000) {
315 			warnx(
316 			    "%s: VBT lays outside OpRegion. That's not yet supported for a version 2.0 OpRegion",
317 			    __func__);
318 			close(memfd);
319 			return (-1);
320 		}
321 		vbt_hpa = opregion->hpa + opregion_ptr->mbox3.rvda;
322 		if (vbt_hpa < opregion->hpa) {
323 			warnx(
324 			    "%s: overflow when calculating VBT address (OpRegion @ 0x%lx, RVDA = 0x%lx)",
325 			    __func__, opregion->hpa, opregion_ptr->mbox3.rvda);
326 			close(memfd);
327 			return (-1);
328 		}
329 
330 		if ((error = gvt_d_setup_vbt(pi, memfd, vbt_hpa,
331 		    opregion_ptr->mbox3.rvds, &vbt_gpa)) != 0) {
332 			close(memfd);
333 			return (error);
334 		}
335 	}
336 
337 	close(memfd);
338 
339 	opregion->gpa = gvt_d_alloc_mmio_memory(opregion->hpa, opregion->len,
340 	    E820_ALIGNMENT_NONE, E820_TYPE_NVS);
341 	if (opregion->gpa == 0) {
342 		warnx(
343 		    "%s: Unable to add OpRegion to E820 table (hpa 0x%lx len 0x%lx)",
344 		    __func__, opregion->hpa, opregion->len);
345 		e820_dump_table();
346 		return (-1);
347 	}
348 	opregion->gva = vm_map_gpa(pi->pi_vmctx, opregion->gpa, opregion->len);
349 	if (opregion->gva == NULL) {
350 		warnx("%s: Unable to map guest OpRegion", __func__);
351 		return (-1);
352 	}
353 	if (opregion->gpa != opregion->hpa) {
354 		/*
355 		 * A 1:1 host to guest mapping is not required but this could
356 		 * change in the future.
357 		 */
358 		warnx(
359 		    "Warning: Unable to reuse host address of OpRegion. GPU passthrough might not work properly.");
360 	}
361 
362 	memcpy(opregion->gva, opregion->hva, opregion->len);
363 
364 	/*
365 	 * Patch the VBT address to match our guest physical address.
366 	 */
367 	if (vbt_gpa != 0) {
368 		if (vbt_gpa < opregion->gpa) {
369 			warnx(
370 			    "%s: invalid guest VBT address 0x%16lx (OpRegion @ 0x%16lx)",
371 			    __func__, vbt_gpa, opregion->gpa);
372 			return (-1);
373 		}
374 
375 		((struct igd_opregion *)opregion->gva)->mbox3.rvda = vbt_gpa - opregion->gpa;
376 	}
377 
378 	pci_set_cfgdata32(pi, PCIR_ASLS_CTL, opregion->gpa);
379 
380 	return (set_pcir_handler(sc, PCIR_ASLS_CTL, 4, passthru_cfgread_emulate,
381 	    passthru_cfgwrite_emulate));
382 }
383 
384 static int
gvt_d_init(struct pci_devinst * const pi,nvlist_t * const nvl __unused)385 gvt_d_init(struct pci_devinst *const pi, nvlist_t *const nvl __unused)
386 {
387 	int error;
388 
389 	if ((error = gvt_d_setup_gsm(pi)) != 0) {
390 		warnx("%s: Unable to setup Graphics Stolen Memory", __func__);
391 		goto done;
392 	}
393 
394 	if ((error = gvt_d_setup_opregion(pi)) != 0) {
395 		warnx("%s: Unable to setup OpRegion", __func__);
396 		goto done;
397 	}
398 
399 done:
400 	return (error);
401 }
402 
403 static void
gvt_d_deinit(struct pci_devinst * const pi)404 gvt_d_deinit(struct pci_devinst *const pi)
405 {
406 	struct passthru_softc *sc;
407 	struct passthru_mmio_mapping *opregion;
408 
409 	sc = pi->pi_arg;
410 
411 	opregion = passthru_get_mmio(sc, GVT_D_MAP_OPREGION);
412 
413 	/* HVA is only set, if it's initialized */
414 	if (opregion->hva)
415 		munmap((void *)opregion->hva, opregion->len);
416 }
417 
418 static struct passthru_dev gvt_d_dev = {
419 	.probe = gvt_d_probe,
420 	.init = gvt_d_init,
421 	.deinit = gvt_d_deinit,
422 };
423 PASSTHRU_DEV_SET(gvt_d_dev);
424