xref: /freebsd/usr.sbin/bhyve/amd64/pci_gvt-d.c (revision e425e601b9781c3585fcee4adf29a295a6b2aa45)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
5  * Author: Corvin Köhne <c.koehne@beckhoff.com>
6  */
7 
8 #include <sys/types.h>
9 #include <sys/mman.h>
10 #include <sys/sysctl.h>
11 
12 #include <dev/pci/pcireg.h>
13 
14 #include <err.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <string.h>
18 #include <unistd.h>
19 
20 #include "amd64/e820.h"
21 #include "pci_gvt-d-opregion.h"
22 #include "pci_passthru.h"
23 
24 #define KB (1024UL)
25 #define MB (1024 * KB)
26 #define GB (1024 * MB)
27 
28 #ifndef _PATH_MEM
29 #define _PATH_MEM "/dev/mem"
30 #endif
31 
32 #define PCI_VENDOR_INTEL 0x8086
33 
34 #define PCIR_BDSM 0x5C	   /* Base of Data Stolen Memory register */
35 #define PCIR_ASLS_CTL 0xFC /* Opregion start address register */
36 
37 #define PCIM_BDSM_GSM_ALIGNMENT \
38 	0x00100000 /* Graphics Stolen Memory is 1 MB aligned */
39 
40 #define GVT_D_MAP_GSM 0
41 #define GVT_D_MAP_OPREGION 1
42 
43 static int
gvt_d_probe(struct pci_devinst * const pi)44 gvt_d_probe(struct pci_devinst *const pi)
45 {
46 	struct passthru_softc *sc;
47 	uint16_t vendor;
48 	uint8_t class;
49 
50 	sc = pi->pi_arg;
51 
52 	vendor = pci_host_read_config(passthru_get_sel(sc), PCIR_VENDOR, 0x02);
53 	if (vendor != PCI_VENDOR_INTEL)
54 		return (ENXIO);
55 
56 	class = pci_host_read_config(passthru_get_sel(sc), PCIR_CLASS, 0x01);
57 	if (class != PCIC_DISPLAY)
58 		return (ENXIO);
59 
60 	return (0);
61 }
62 
63 static vm_paddr_t
gvt_d_alloc_mmio_memory(const vm_paddr_t host_address,const vm_paddr_t length,const vm_paddr_t alignment,const enum e820_memory_type type)64 gvt_d_alloc_mmio_memory(const vm_paddr_t host_address, const vm_paddr_t length,
65     const vm_paddr_t alignment, const enum e820_memory_type type)
66 {
67 	vm_paddr_t address;
68 
69 	/* Try to reuse host address. */
70 	address = e820_alloc(host_address, length, E820_ALIGNMENT_NONE, type,
71 	    E820_ALLOCATE_SPECIFIC);
72 	if (address != 0) {
73 		return (address);
74 	}
75 
76 	/*
77 	 * We're not able to reuse the host address. Fall back to the highest usable
78 	 * address below 4 GB.
79 	 */
80 	return (
81 	    e820_alloc(4 * GB, length, alignment, type, E820_ALLOCATE_HIGHEST));
82 }
83 
84 /*
85  * Note that the graphics stolen memory is somehow confusing. On the one hand
86  * the Intel Open Source HD Graphics Programmers' Reference Manual states that
87  * it's only GPU accessible. As the CPU can't access the area, the guest
88  * shouldn't need it. On the other hand, the Intel GOP driver refuses to work
89  * properly, if it's not set to a proper address.
90  *
91  * Intel itself maps it into the guest by EPT [1]. At the moment, we're not
92  * aware of any situation where this EPT mapping is required, so we don't do it
93  * yet.
94  *
95  * Intel also states that the Windows driver for Tiger Lake reads the address of
96  * the graphics stolen memory [2]. As the GVT-d code doesn't support Tiger Lake
97  * in its first implementation, we can't check how it behaves. We should keep an
98  * eye on it.
99  *
100  * [1]
101  * https://github.com/projectacrn/acrn-hypervisor/blob/e28d6fbfdfd556ff1bc3ff330e41d4ddbaa0f897/devicemodel/hw/pci/passthrough.c#L655-L657
102  * [2]
103  * https://github.com/projectacrn/acrn-hypervisor/blob/e28d6fbfdfd556ff1bc3ff330e41d4ddbaa0f897/devicemodel/hw/pci/passthrough.c#L626-L629
104  */
105 static int
gvt_d_setup_gsm(struct pci_devinst * const pi)106 gvt_d_setup_gsm(struct pci_devinst *const pi)
107 {
108 	struct passthru_softc *sc;
109 	struct passthru_mmio_mapping *gsm;
110 	size_t sysctl_len;
111 	uint32_t bdsm;
112 	int error;
113 
114 	sc = pi->pi_arg;
115 
116 	gsm = passthru_get_mmio(sc, GVT_D_MAP_GSM);
117 	if (gsm == NULL) {
118 		warnx("%s: Unable to access gsm", __func__);
119 		return (-1);
120 	}
121 
122 	sysctl_len = sizeof(gsm->hpa);
123 	error = sysctlbyname("hw.intel_graphics_stolen_base", &gsm->hpa,
124 	    &sysctl_len, NULL, 0);
125 	if (error) {
126 		warn("%s: Unable to get graphics stolen memory base",
127 		    __func__);
128 		return (-1);
129 	}
130 	sysctl_len = sizeof(gsm->len);
131 	error = sysctlbyname("hw.intel_graphics_stolen_size", &gsm->len,
132 	    &sysctl_len, NULL, 0);
133 	if (error) {
134 		warn("%s: Unable to get graphics stolen memory length",
135 		    __func__);
136 		return (-1);
137 	}
138 	gsm->hva = NULL; /* unused */
139 	gsm->gva = NULL; /* unused */
140 	gsm->gpa = gvt_d_alloc_mmio_memory(gsm->hpa, gsm->len,
141 	    PCIM_BDSM_GSM_ALIGNMENT, E820_TYPE_RESERVED);
142 	if (gsm->gpa == 0) {
143 		warnx(
144 		    "%s: Unable to add Graphics Stolen Memory to E820 table (hpa 0x%lx len 0x%lx)",
145 		    __func__, gsm->hpa, gsm->len);
146 		e820_dump_table();
147 		return (-1);
148 	}
149 	if (gsm->gpa != gsm->hpa) {
150 		/*
151 		 * ACRN source code implies that graphics driver for newer Intel
152 		 * platforms like Tiger Lake will read the Graphics Stolen Memory
153 		 * address from an MMIO register. We have three options to solve this
154 		 * issue:
155 		 *    1. Patch the value in the MMIO register
156 		 *       This could have unintended side effects. Without any
157 		 *       documentation how this register is used by the GPU, don't do
158 		 *       it.
159 		 *    2. Trap the MMIO register
160 		 *       It's not possible to trap a single MMIO register. We need to
161 		 *       trap a whole page. Trapping a bunch of MMIO register could
162 		 *       degrade the performance noticeably. We have to test it.
163 		 *    3. Use an 1:1 host to guest mapping
164 		 *       Maybe not always possible. As far as we know, no supported
165 		 *       platform requires a 1:1 mapping. For that reason, just log a
166 		 *       warning.
167 		 */
168 		warnx(
169 		    "Warning: Unable to reuse host address of Graphics Stolen Memory. GPU passthrough might not work properly.");
170 	}
171 
172 	bdsm = pci_host_read_config(passthru_get_sel(sc), PCIR_BDSM, 4);
173 	pci_set_cfgdata32(pi, PCIR_BDSM,
174 	    gsm->gpa | (bdsm & (PCIM_BDSM_GSM_ALIGNMENT - 1)));
175 
176 	return (set_pcir_handler(sc, PCIR_BDSM, 4, passthru_cfgread_emulate,
177 	    passthru_cfgwrite_emulate));
178 }
179 
180 static int
gvt_d_setup_opregion(struct pci_devinst * const pi)181 gvt_d_setup_opregion(struct pci_devinst *const pi)
182 {
183 	struct passthru_softc *sc;
184 	struct passthru_mmio_mapping *opregion;
185 	struct igd_opregion_header *header;
186 	uint64_t asls;
187 	int memfd;
188 
189 	sc = pi->pi_arg;
190 
191 	memfd = open(_PATH_MEM, O_RDONLY, 0);
192 	if (memfd < 0) {
193 		warn("%s: Failed to open %s", __func__, _PATH_MEM);
194 		return (-1);
195 	}
196 
197 	opregion = passthru_get_mmio(sc, GVT_D_MAP_OPREGION);
198 	if (opregion == NULL) {
199 		warnx("%s: Unable to access opregion", __func__);
200 		close(memfd);
201 		return (-1);
202 	}
203 
204 	asls = pci_host_read_config(passthru_get_sel(sc), PCIR_ASLS_CTL, 4);
205 
206 	header = mmap(NULL, sizeof(*header), PROT_READ, MAP_SHARED, memfd,
207 	    asls);
208 	if (header == MAP_FAILED) {
209 		warn("%s: Unable to map OpRegion header", __func__);
210 		close(memfd);
211 		return (-1);
212 	}
213 	if (memcmp(header->sign, IGD_OPREGION_HEADER_SIGN,
214 	    sizeof(header->sign)) != 0) {
215 		warnx("%s: Invalid OpRegion signature", __func__);
216 		munmap(header, sizeof(*header));
217 		close(memfd);
218 		return (-1);
219 	}
220 
221 	opregion->hpa = asls;
222 	opregion->len = header->size * KB;
223 	munmap(header, sizeof(*header));
224 
225 	if (opregion->len != sizeof(struct igd_opregion)) {
226 		warnx("%s: Invalid OpRegion size of 0x%lx", __func__,
227 		    opregion->len);
228 		close(memfd);
229 		return (-1);
230 	}
231 
232 	opregion->hva = mmap(NULL, opregion->len, PROT_READ, MAP_SHARED, memfd,
233 	    opregion->hpa);
234 	if (opregion->hva == MAP_FAILED) {
235 		warn("%s: Unable to map host OpRegion", __func__);
236 		close(memfd);
237 		return (-1);
238 	}
239 	close(memfd);
240 
241 	opregion->gpa = gvt_d_alloc_mmio_memory(opregion->hpa, opregion->len,
242 	    E820_ALIGNMENT_NONE, E820_TYPE_NVS);
243 	if (opregion->gpa == 0) {
244 		warnx(
245 		    "%s: Unable to add OpRegion to E820 table (hpa 0x%lx len 0x%lx)",
246 		    __func__, opregion->hpa, opregion->len);
247 		e820_dump_table();
248 		return (-1);
249 	}
250 	opregion->gva = vm_map_gpa(pi->pi_vmctx, opregion->gpa, opregion->len);
251 	if (opregion->gva == NULL) {
252 		warnx("%s: Unable to map guest OpRegion", __func__);
253 		return (-1);
254 	}
255 	if (opregion->gpa != opregion->hpa) {
256 		/*
257 		 * A 1:1 host to guest mapping is not required but this could
258 		 * change in the future.
259 		 */
260 		warnx(
261 		    "Warning: Unable to reuse host address of OpRegion. GPU passthrough might not work properly.");
262 	}
263 
264 	memcpy(opregion->gva, opregion->hva, opregion->len);
265 
266 	pci_set_cfgdata32(pi, PCIR_ASLS_CTL, opregion->gpa);
267 
268 	return (set_pcir_handler(sc, PCIR_ASLS_CTL, 4, passthru_cfgread_emulate,
269 	    passthru_cfgwrite_emulate));
270 }
271 
272 static int
gvt_d_init(struct pci_devinst * const pi,nvlist_t * const nvl __unused)273 gvt_d_init(struct pci_devinst *const pi, nvlist_t *const nvl __unused)
274 {
275 	int error;
276 
277 	if ((error = gvt_d_setup_gsm(pi)) != 0) {
278 		warnx("%s: Unable to setup Graphics Stolen Memory", __func__);
279 		goto done;
280 	}
281 
282 	if ((error = gvt_d_setup_opregion(pi)) != 0) {
283 		warnx("%s: Unable to setup OpRegion", __func__);
284 		goto done;
285 	}
286 
287 done:
288 	return (error);
289 }
290 
291 static void
gvt_d_deinit(struct pci_devinst * const pi)292 gvt_d_deinit(struct pci_devinst *const pi)
293 {
294 	struct passthru_softc *sc;
295 	struct passthru_mmio_mapping *opregion;
296 
297 	sc = pi->pi_arg;
298 
299 	opregion = passthru_get_mmio(sc, GVT_D_MAP_OPREGION);
300 
301 	/* HVA is only set, if it's initialized */
302 	if (opregion->hva)
303 		munmap((void *)opregion->hva, opregion->len);
304 }
305 
306 static struct passthru_dev gvt_d_dev = {
307 	.probe = gvt_d_probe,
308 	.init = gvt_d_init,
309 	.deinit = gvt_d_deinit,
310 };
311 PASSTHRU_DEV_SET(gvt_d_dev);
312