xref: /freebsd/usr.sbin/bhyve/amd64/e820.c (revision 87b759f0fa1f7554d50ce640c40138512bbded44)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
5  * Author: Corvin Köhne <c.koehne@beckhoff.com>
6  */
7 
8 #include <sys/types.h>
9 #include <sys/queue.h>
10 
11 #include <machine/vmm.h>
12 
13 #include <assert.h>
14 #include <err.h>
15 #include <errno.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 
20 #include "debug.h"
21 #include "e820.h"
22 #include "qemu_fwcfg.h"
23 
24 /*
25  * E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it
26  * works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't
27  * hold all possible physical addresses and we can get into trouble.
28  */
29 static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t),
30     "Unable to represent physical memory by E820 table");
31 
32 #define E820_FWCFG_FILE_NAME "etc/e820"
33 
34 #define KB (1024UL)
35 #define MB (1024 * KB)
36 #define GB (1024 * MB)
37 
38 /*
39  * Fix E820 memory holes:
40  * [    A0000,    C0000) VGA
41  * [    C0000,   100000) ROM
42  */
43 #define E820_VGA_MEM_BASE 0xA0000
44 #define E820_VGA_MEM_END 0xC0000
45 #define E820_ROM_MEM_BASE 0xC0000
46 #define E820_ROM_MEM_END 0x100000
47 
48 struct e820_element {
49 	TAILQ_ENTRY(e820_element) chain;
50 	uint64_t base;
51 	uint64_t end;
52 	enum e820_memory_type type;
53 };
54 static TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER(
55     e820_table);
56 
57 static struct e820_element *
58 e820_element_alloc(uint64_t base, uint64_t end, enum e820_memory_type type)
59 {
60 	struct e820_element *element;
61 
62 	element = calloc(1, sizeof(*element));
63 	if (element == NULL) {
64 		return (NULL);
65 	}
66 
67 	element->base = base;
68 	element->end = end;
69 	element->type = type;
70 
71 	return (element);
72 }
73 
74 static const char *
75 e820_get_type_name(const enum e820_memory_type type)
76 {
77 	switch (type) {
78 	case E820_TYPE_MEMORY:
79 		return ("RAM");
80 	case E820_TYPE_RESERVED:
81 		return ("Reserved");
82 	case E820_TYPE_ACPI:
83 		return ("ACPI");
84 	case E820_TYPE_NVS:
85 		return ("NVS");
86 	default:
87 		return ("Unknown");
88 	}
89 }
90 
91 void
92 e820_dump_table(void)
93 {
94 	struct e820_element *element;
95 	uint64_t i;
96 
97 	EPRINTLN("E820 map:");
98 
99 	i = 0;
100 	TAILQ_FOREACH(element, &e820_table, chain) {
101 		EPRINTLN("  (%4lu) [%16lx, %16lx] %s", i,
102 		    element->base, element->end,
103 		    e820_get_type_name(element->type));
104 
105 		++i;
106 	}
107 }
108 
109 static struct qemu_fwcfg_item *
110 e820_get_fwcfg_item(void)
111 {
112 	struct qemu_fwcfg_item *fwcfg_item;
113 	struct e820_element *element;
114 	struct e820_entry *entries;
115 	int count, i;
116 
117 	count = 0;
118 	TAILQ_FOREACH(element, &e820_table, chain) {
119 		++count;
120 	}
121 	if (count == 0) {
122 		warnx("%s: E820 table empty", __func__);
123 		return (NULL);
124 	}
125 
126 	fwcfg_item = calloc(1, sizeof(struct qemu_fwcfg_item));
127 	if (fwcfg_item == NULL) {
128 		return (NULL);
129 	}
130 
131 	fwcfg_item->size = count * sizeof(struct e820_entry);
132 	fwcfg_item->data = calloc(count, sizeof(struct e820_entry));
133 	if (fwcfg_item->data == NULL) {
134 		free(fwcfg_item);
135 		return (NULL);
136 	}
137 
138 	i = 0;
139 	entries = (struct e820_entry *)fwcfg_item->data;
140 	TAILQ_FOREACH(element, &e820_table, chain) {
141 		struct e820_entry *entry = &entries[i];
142 
143 		entry->base = element->base;
144 		entry->length = element->end - element->base;
145 		entry->type = element->type;
146 
147 		++i;
148 	}
149 
150 	return (fwcfg_item);
151 }
152 
153 static int
154 e820_add_entry(const uint64_t base, const uint64_t end,
155     const enum e820_memory_type type)
156 {
157 	struct e820_element *new_element;
158 	struct e820_element *element;
159 	struct e820_element *ram_element;
160 
161 	assert(end >= base);
162 
163 	new_element = e820_element_alloc(base, end, type);
164 	if (new_element == NULL) {
165 		return (ENOMEM);
166 	}
167 
168 	/*
169 	 * E820 table should always be sorted in ascending order. Therefore,
170 	 * search for a range whose end is larger than the base parameter.
171 	 */
172 	TAILQ_FOREACH(element, &e820_table, chain) {
173 		if (element->end > base) {
174 			break;
175 		}
176 	}
177 
178 	/*
179 	 * System memory requires special handling.
180 	 */
181 	if (type == E820_TYPE_MEMORY) {
182 		/*
183 		 * base is larger than of any existing element. Add new system
184 		 * memory at the end of the table.
185 		 */
186 		if (element == NULL) {
187 			TAILQ_INSERT_TAIL(&e820_table, new_element, chain);
188 			return (0);
189 		}
190 
191 		/*
192 		 * System memory shouldn't overlap with any existing element.
193 		 */
194 		assert(end >= element->base);
195 
196 		TAILQ_INSERT_BEFORE(element, new_element, chain);
197 
198 		return (0);
199 	}
200 
201 	/*
202 	 * If some one tries to allocate a specific address, it could happen, that
203 	 * this address is not allocatable. Therefore, do some checks. If the
204 	 * address is not allocatable, don't panic. The user may have a fallback and
205 	 * tries to allocate another address. This is true for the GVT-d emulation
206 	 * which tries to reuse the host address of the graphics stolen memory and
207 	 * falls back to allocating the highest address below 4 GB.
208 	 */
209 	if (element == NULL || element->type != E820_TYPE_MEMORY ||
210 	    (base < element->base || end > element->end))
211 		return (ENOMEM);
212 
213 	if (base == element->base && end == element->end) {
214 		/*
215 		 * The new entry replaces an existing one.
216 		 *
217 		 * Old table:
218 		 * 	[ 0x1000, 0x4000] RAM		<-- element
219 		 * New table:
220 		 *	[ 0x1000, 0x4000] Reserved
221 		 */
222 		TAILQ_INSERT_BEFORE(element, new_element, chain);
223 		TAILQ_REMOVE(&e820_table, element, chain);
224 		free(element);
225 	} else if (base == element->base) {
226 		/*
227 		 * New element at system memory base boundary. Add new
228 		 * element before current and adjust the base of the old
229 		 * element.
230 		 *
231 		 * Old table:
232 		 * 	[ 0x1000, 0x4000] RAM		<-- element
233 		 * New table:
234 		 * 	[ 0x1000, 0x2000] Reserved
235 		 * 	[ 0x2000, 0x4000] RAM		<-- element
236 		 */
237 		TAILQ_INSERT_BEFORE(element, new_element, chain);
238 		element->base = end;
239 	} else if (end == element->end) {
240 		/*
241 		 * New element at system memory end boundary. Add new
242 		 * element after current and adjust the end of the
243 		 * current element.
244 		 *
245 		 * Old table:
246 		 * 	[ 0x1000, 0x4000] RAM		<-- element
247 		 * New table:
248 		 * 	[ 0x1000, 0x3000] RAM		<-- element
249 		 * 	[ 0x3000, 0x4000] Reserved
250 		 */
251 		TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain);
252 		element->end = base;
253 	} else {
254 		/*
255 		 * New element inside system memory entry. Split it by
256 		 * adding a system memory element and the new element
257 		 * before current.
258 		 *
259 		 * Old table:
260 		 * 	[ 0x1000, 0x4000] RAM		<-- element
261 		 * New table:
262 		 * 	[ 0x1000, 0x2000] RAM
263 		 * 	[ 0x2000, 0x3000] Reserved
264 		 * 	[ 0x3000, 0x4000] RAM		<-- element
265 		 */
266 		ram_element = e820_element_alloc(element->base, base,
267 		    E820_TYPE_MEMORY);
268 		if (ram_element == NULL) {
269 			return (ENOMEM);
270 		}
271 		TAILQ_INSERT_BEFORE(element, ram_element, chain);
272 		TAILQ_INSERT_BEFORE(element, new_element, chain);
273 		element->base = end;
274 	}
275 
276 	return (0);
277 }
278 
279 static int
280 e820_add_memory_hole(const uint64_t base, const uint64_t end)
281 {
282 	struct e820_element *element;
283 	struct e820_element *ram_element;
284 
285 	assert(end >= base);
286 
287 	/*
288 	 * E820 table should be always sorted in ascending order. Therefore,
289 	 * search for an element which end is larger than the base parameter.
290 	 */
291 	TAILQ_FOREACH(element, &e820_table, chain) {
292 		if (element->end > base) {
293 			break;
294 		}
295 	}
296 
297 	if (element == NULL || end <= element->base) {
298 		/* Nothing to do. Hole already exists */
299 		return (0);
300 	}
301 
302 	/* Memory holes are only allowed in system memory */
303 	assert(element->type == E820_TYPE_MEMORY);
304 
305 	if (base == element->base) {
306 		/*
307 		 * New hole at system memory base boundary.
308 		 *
309 		 * Old table:
310 		 * 	[ 0x1000, 0x4000] RAM
311 		 * New table:
312 		 * 	[ 0x2000, 0x4000] RAM
313 		 */
314 		element->base = end;
315 	} else if (end == element->end) {
316 		/*
317 		 * New hole at system memory end boundary.
318 		 *
319 		 * Old table:
320 		 * 	[ 0x1000, 0x4000] RAM
321 		 * New table:
322 		 * 	[ 0x1000, 0x3000] RAM
323 		 */
324 		element->end = base;
325 	} else {
326 		/*
327 		 * New hole inside system memory entry. Split the system memory.
328 		 *
329 		 * Old table:
330 		 * 	[ 0x1000, 0x4000] RAM		<-- element
331 		 * New table:
332 		 * 	[ 0x1000, 0x2000] RAM
333 		 * 	[ 0x3000, 0x4000] RAM		<-- element
334 		 */
335 		ram_element = e820_element_alloc(element->base, base,
336 		    E820_TYPE_MEMORY);
337 		if (ram_element == NULL) {
338 			return (ENOMEM);
339 		}
340 		TAILQ_INSERT_BEFORE(element, ram_element, chain);
341 		element->base = end;
342 	}
343 
344 	return (0);
345 }
346 
347 static uint64_t
348 e820_alloc_highest(const uint64_t max_address, const uint64_t length,
349     const uint64_t alignment, const enum e820_memory_type type)
350 {
351 	struct e820_element *element;
352 
353 	TAILQ_FOREACH_REVERSE(element, &e820_table, e820_table, chain) {
354 		uint64_t address, base, end;
355 
356 		end = MIN(max_address, element->end);
357 		base = roundup2(element->base, alignment);
358 
359 		/*
360 		 * If end - length == 0, we would allocate memory at address 0. This
361 		 * address is mostly unusable and we should avoid allocating it.
362 		 * Therefore, search for another block in that case.
363 		 */
364 		if (element->type != E820_TYPE_MEMORY || end < base ||
365 		    end - base < length || end - length == 0) {
366 			continue;
367 		}
368 
369 		address = rounddown2(end - length, alignment);
370 
371 		if (e820_add_entry(address, address + length, type) != 0) {
372 			return (0);
373 		}
374 
375 		return (address);
376 	}
377 
378 	return (0);
379 }
380 
381 static uint64_t
382 e820_alloc_lowest(const uint64_t min_address, const uint64_t length,
383     const uint64_t alignment, const enum e820_memory_type type)
384 {
385 	struct e820_element *element;
386 
387 	TAILQ_FOREACH(element, &e820_table, chain) {
388 		uint64_t base, end;
389 
390 		end = element->end;
391 		base = MAX(min_address, roundup2(element->base, alignment));
392 
393 		/*
394 		 * If base == 0, we would allocate memory at address 0. This
395 		 * address is mostly unusable and we should avoid allocating it.
396 		 * Therefore, search for another block in that case.
397 		 */
398 		if (element->type != E820_TYPE_MEMORY || end < base ||
399 		    end - base < length || base == 0) {
400 			continue;
401 		}
402 
403 		if (e820_add_entry(base, base + length, type) != 0) {
404 			return (0);
405 		}
406 
407 		return (base);
408 	}
409 
410 	return (0);
411 }
412 
413 uint64_t
414 e820_alloc(const uint64_t address, const uint64_t length,
415     const uint64_t alignment, const enum e820_memory_type type,
416     const enum e820_allocation_strategy strategy)
417 {
418 	assert(powerof2(alignment));
419 	assert((address & (alignment - 1)) == 0);
420 
421 	switch (strategy) {
422 	case E820_ALLOCATE_ANY:
423 		/*
424 		 * Allocate any address. Therefore, ignore the address parameter
425 		 * and reuse the code path for allocating the lowest address.
426 		 */
427 		return (e820_alloc_lowest(0, length, alignment, type));
428 	case E820_ALLOCATE_LOWEST:
429 		return (e820_alloc_lowest(address, length, alignment, type));
430 	case E820_ALLOCATE_HIGHEST:
431 		return (e820_alloc_highest(address, length, alignment, type));
432 	case E820_ALLOCATE_SPECIFIC:
433 		if (e820_add_entry(address, address + length, type) != 0) {
434 			return (0);
435 		}
436 
437 		return (address);
438 	}
439 
440 	return (0);
441 }
442 
443 int
444 e820_init(struct vmctx *const ctx)
445 {
446 	uint64_t lowmem_size, highmem_size;
447 	int error;
448 
449 	TAILQ_INIT(&e820_table);
450 
451 	lowmem_size = vm_get_lowmem_size(ctx);
452 	error = e820_add_entry(0, lowmem_size, E820_TYPE_MEMORY);
453 	if (error) {
454 		warnx("%s: Could not add lowmem", __func__);
455 		return (error);
456 	}
457 
458 	highmem_size = vm_get_highmem_size(ctx);
459 	if (highmem_size != 0) {
460 		error = e820_add_entry(4 * GB, 4 * GB + highmem_size,
461 		    E820_TYPE_MEMORY);
462 		if (error) {
463 			warnx("%s: Could not add highmem", __func__);
464 			return (error);
465 		}
466 	}
467 
468 	error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END);
469 	if (error) {
470 		warnx("%s: Could not add VGA memory", __func__);
471 		return (error);
472 	}
473 
474 	error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END);
475 	if (error) {
476 		warnx("%s: Could not add ROM area", __func__);
477 		return (error);
478 	}
479 
480 	return (0);
481 }
482 
483 int
484 e820_finalize(void)
485 {
486 	struct qemu_fwcfg_item *e820_fwcfg_item;
487 	int error;
488 
489 	e820_fwcfg_item = e820_get_fwcfg_item();
490 	if (e820_fwcfg_item == NULL) {
491 		warnx("invalid e820 table");
492 		return (ENOMEM);
493 	}
494 	error = qemu_fwcfg_add_file("etc/e820",
495 	    e820_fwcfg_item->size, e820_fwcfg_item->data);
496 	if (error != 0) {
497 		warnx("could not add qemu fwcfg etc/e820");
498 		free(e820_fwcfg_item->data);
499 		free(e820_fwcfg_item);
500 		return (error);
501 	}
502 	free(e820_fwcfg_item);
503 
504 	return (0);
505 }
506