1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
5 * Author: Corvin Köhne <c.koehne@beckhoff.com>
6 */
7
8 #include <sys/types.h>
9 #include <sys/queue.h>
10
11 #include <machine/vmm.h>
12
13 #include <assert.h>
14 #include <err.h>
15 #include <errno.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19
20 #include "debug.h"
21 #include "e820.h"
22 #include "qemu_fwcfg.h"
23
24 /*
25 * E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it
26 * works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't
27 * hold all possible physical addresses and we can get into trouble.
28 */
29 static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t),
30 "Unable to represent physical memory by E820 table");
31
32 #define E820_FWCFG_FILE_NAME "etc/e820"
33
34 #define KB (1024UL)
35 #define MB (1024 * KB)
36 #define GB (1024 * MB)
37
38 /*
39 * Fix E820 memory holes:
40 * [ A0000, C0000) VGA
41 * [ C0000, 100000) ROM
42 */
43 #define E820_VGA_MEM_BASE 0xA0000
44 #define E820_VGA_MEM_END 0xC0000
45 #define E820_ROM_MEM_BASE 0xC0000
46 #define E820_ROM_MEM_END 0x100000
47
48 struct e820_element {
49 TAILQ_ENTRY(e820_element) chain;
50 uint64_t base;
51 uint64_t end;
52 enum e820_memory_type type;
53 };
54 static TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER(
55 e820_table);
56
57 static struct e820_element *
e820_element_alloc(uint64_t base,uint64_t end,enum e820_memory_type type)58 e820_element_alloc(uint64_t base, uint64_t end, enum e820_memory_type type)
59 {
60 struct e820_element *element;
61
62 element = calloc(1, sizeof(*element));
63 if (element == NULL) {
64 return (NULL);
65 }
66
67 element->base = base;
68 element->end = end;
69 element->type = type;
70
71 return (element);
72 }
73
74 static const char *
e820_get_type_name(const enum e820_memory_type type)75 e820_get_type_name(const enum e820_memory_type type)
76 {
77 switch (type) {
78 case E820_TYPE_MEMORY:
79 return ("RAM");
80 case E820_TYPE_RESERVED:
81 return ("Reserved");
82 case E820_TYPE_ACPI:
83 return ("ACPI");
84 case E820_TYPE_NVS:
85 return ("NVS");
86 default:
87 return ("Unknown");
88 }
89 }
90
91 void
e820_dump_table(void)92 e820_dump_table(void)
93 {
94 struct e820_element *element;
95 uint64_t i;
96
97 EPRINTLN("E820 map:");
98
99 i = 0;
100 TAILQ_FOREACH(element, &e820_table, chain) {
101 EPRINTLN(" (%4lu) [%16lx, %16lx] %s", i,
102 element->base, element->end,
103 e820_get_type_name(element->type));
104
105 ++i;
106 }
107 }
108
109 static struct qemu_fwcfg_item *
e820_get_fwcfg_item(void)110 e820_get_fwcfg_item(void)
111 {
112 struct qemu_fwcfg_item *fwcfg_item;
113 struct e820_element *element;
114 struct e820_entry *entries;
115 int count, i;
116
117 count = 0;
118 TAILQ_FOREACH(element, &e820_table, chain) {
119 ++count;
120 }
121 if (count == 0) {
122 warnx("%s: E820 table empty", __func__);
123 return (NULL);
124 }
125
126 fwcfg_item = calloc(1, sizeof(struct qemu_fwcfg_item));
127 if (fwcfg_item == NULL) {
128 return (NULL);
129 }
130
131 fwcfg_item->size = count * sizeof(struct e820_entry);
132 fwcfg_item->data = calloc(count, sizeof(struct e820_entry));
133 if (fwcfg_item->data == NULL) {
134 free(fwcfg_item);
135 return (NULL);
136 }
137
138 i = 0;
139 entries = (struct e820_entry *)fwcfg_item->data;
140 TAILQ_FOREACH(element, &e820_table, chain) {
141 struct e820_entry *entry = &entries[i];
142
143 entry->base = element->base;
144 entry->length = element->end - element->base;
145 entry->type = element->type;
146
147 ++i;
148 }
149
150 return (fwcfg_item);
151 }
152
153 static int
e820_add_entry(const uint64_t base,const uint64_t end,const enum e820_memory_type type)154 e820_add_entry(const uint64_t base, const uint64_t end,
155 const enum e820_memory_type type)
156 {
157 struct e820_element *new_element;
158 struct e820_element *element;
159 struct e820_element *sib_element;
160 struct e820_element *ram_element;
161
162 assert(end >= base);
163
164 new_element = e820_element_alloc(base, end, type);
165 if (new_element == NULL) {
166 return (ENOMEM);
167 }
168
169 /*
170 * E820 table should always be sorted in ascending order. Therefore,
171 * search for a range whose end is larger than the base parameter.
172 */
173 TAILQ_FOREACH(element, &e820_table, chain) {
174 if (element->end > base) {
175 break;
176 }
177 }
178
179 /*
180 * System memory requires special handling.
181 */
182 if (type == E820_TYPE_MEMORY) {
183 /*
184 * base is larger than of any existing element. Add new system
185 * memory at the end of the table.
186 */
187 if (element == NULL) {
188 TAILQ_INSERT_TAIL(&e820_table, new_element, chain);
189 return (0);
190 }
191
192 /*
193 * System memory shouldn't overlap with any existing element.
194 */
195 assert(end >= element->base);
196
197 TAILQ_INSERT_BEFORE(element, new_element, chain);
198
199 return (0);
200 }
201
202 /*
203 * If some one tries to allocate a specific address, it could happen, that
204 * this address is not allocatable. Therefore, do some checks. If the
205 * address is not allocatable, don't panic. The user may have a fallback and
206 * tries to allocate another address. This is true for the GVT-d emulation
207 * which tries to reuse the host address of the graphics stolen memory and
208 * falls back to allocating the highest address below 4 GB.
209 */
210 if (element == NULL || element->type != E820_TYPE_MEMORY ||
211 (base < element->base || end > element->end))
212 return (ENOMEM);
213
214 if (base == element->base && end == element->end) {
215 /*
216 * The new entry replaces an existing one.
217 *
218 * Old table:
219 * [ 0x1000, 0x4000] RAM <-- element
220 * New table:
221 * [ 0x1000, 0x4000] Reserved
222 */
223 TAILQ_INSERT_BEFORE(element, new_element, chain);
224 TAILQ_REMOVE(&e820_table, element, chain);
225 free(element);
226 } else if (base == element->base) {
227 /*
228 * New element at system memory base boundary. Add new
229 * element before current and adjust the base of the old
230 * element.
231 *
232 * Old table:
233 * [ 0x1000, 0x4000] RAM <-- element
234 * New table:
235 * [ 0x1000, 0x2000] Reserved
236 * [ 0x2000, 0x4000] RAM <-- element
237 */
238 TAILQ_INSERT_BEFORE(element, new_element, chain);
239 element->base = end;
240 } else if (end == element->end) {
241 /*
242 * New element at system memory end boundary. Add new
243 * element after current and adjust the end of the
244 * current element.
245 *
246 * Old table:
247 * [ 0x1000, 0x4000] RAM <-- element
248 * New table:
249 * [ 0x1000, 0x3000] RAM <-- element
250 * [ 0x3000, 0x4000] Reserved
251 */
252 TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain);
253 element->end = base;
254 } else {
255 /*
256 * New element inside system memory entry. Split it by
257 * adding a system memory element and the new element
258 * before current.
259 *
260 * Old table:
261 * [ 0x1000, 0x4000] RAM <-- element
262 * New table:
263 * [ 0x1000, 0x2000] RAM
264 * [ 0x2000, 0x3000] Reserved
265 * [ 0x3000, 0x4000] RAM <-- element
266 */
267 ram_element = e820_element_alloc(element->base, base,
268 E820_TYPE_MEMORY);
269 if (ram_element == NULL) {
270 return (ENOMEM);
271 }
272 TAILQ_INSERT_BEFORE(element, ram_element, chain);
273 TAILQ_INSERT_BEFORE(element, new_element, chain);
274 element->base = end;
275 }
276
277 /*
278 * If the previous element has the same type and ends at our base
279 * boundary, we can merge both entries.
280 */
281 sib_element = TAILQ_PREV(new_element, e820_table, chain);
282 if (sib_element != NULL &&
283 sib_element->type == new_element->type &&
284 sib_element->end == new_element->base) {
285 new_element->base = sib_element->base;
286 TAILQ_REMOVE(&e820_table, sib_element, chain);
287 free(sib_element);
288 }
289
290 /*
291 * If the next element has the same type and starts at our end
292 * boundary, we can merge both entries.
293 */
294 sib_element = TAILQ_NEXT(new_element, chain);
295 if (sib_element != NULL &&
296 sib_element->type == new_element->type &&
297 sib_element->base == new_element->end) {
298 /* Merge new element into subsequent one. */
299 new_element->end = sib_element->end;
300 TAILQ_REMOVE(&e820_table, sib_element, chain);
301 free(sib_element);
302 }
303
304 return (0);
305 }
306
307 static int
e820_add_memory_hole(const uint64_t base,const uint64_t end)308 e820_add_memory_hole(const uint64_t base, const uint64_t end)
309 {
310 struct e820_element *element;
311 struct e820_element *ram_element;
312
313 assert(end >= base);
314
315 /*
316 * E820 table should be always sorted in ascending order. Therefore,
317 * search for an element which end is larger than the base parameter.
318 */
319 TAILQ_FOREACH(element, &e820_table, chain) {
320 if (element->end > base) {
321 break;
322 }
323 }
324
325 if (element == NULL || end <= element->base) {
326 /* Nothing to do. Hole already exists */
327 return (0);
328 }
329
330 /* Memory holes are only allowed in system memory */
331 assert(element->type == E820_TYPE_MEMORY);
332
333 if (base == element->base) {
334 /*
335 * New hole at system memory base boundary.
336 *
337 * Old table:
338 * [ 0x1000, 0x4000] RAM
339 * New table:
340 * [ 0x2000, 0x4000] RAM
341 */
342 element->base = end;
343 } else if (end == element->end) {
344 /*
345 * New hole at system memory end boundary.
346 *
347 * Old table:
348 * [ 0x1000, 0x4000] RAM
349 * New table:
350 * [ 0x1000, 0x3000] RAM
351 */
352 element->end = base;
353 } else {
354 /*
355 * New hole inside system memory entry. Split the system memory.
356 *
357 * Old table:
358 * [ 0x1000, 0x4000] RAM <-- element
359 * New table:
360 * [ 0x1000, 0x2000] RAM
361 * [ 0x3000, 0x4000] RAM <-- element
362 */
363 ram_element = e820_element_alloc(element->base, base,
364 E820_TYPE_MEMORY);
365 if (ram_element == NULL) {
366 return (ENOMEM);
367 }
368 TAILQ_INSERT_BEFORE(element, ram_element, chain);
369 element->base = end;
370 }
371
372 return (0);
373 }
374
375 static uint64_t
e820_alloc_highest(const uint64_t max_address,const uint64_t length,const uint64_t alignment,const enum e820_memory_type type)376 e820_alloc_highest(const uint64_t max_address, const uint64_t length,
377 const uint64_t alignment, const enum e820_memory_type type)
378 {
379 struct e820_element *element;
380
381 TAILQ_FOREACH_REVERSE(element, &e820_table, e820_table, chain) {
382 uint64_t address, base, end;
383
384 end = MIN(max_address, element->end);
385 base = roundup2(element->base, alignment);
386
387 /*
388 * If end - length == 0, we would allocate memory at address 0. This
389 * address is mostly unusable and we should avoid allocating it.
390 * Therefore, search for another block in that case.
391 */
392 if (element->type != E820_TYPE_MEMORY || end < base ||
393 end - base < length || end - length == 0) {
394 continue;
395 }
396
397 address = rounddown2(end - length, alignment);
398
399 if (e820_add_entry(address, address + length, type) != 0) {
400 return (0);
401 }
402
403 return (address);
404 }
405
406 return (0);
407 }
408
409 static uint64_t
e820_alloc_lowest(const uint64_t min_address,const uint64_t length,const uint64_t alignment,const enum e820_memory_type type)410 e820_alloc_lowest(const uint64_t min_address, const uint64_t length,
411 const uint64_t alignment, const enum e820_memory_type type)
412 {
413 struct e820_element *element;
414
415 TAILQ_FOREACH(element, &e820_table, chain) {
416 uint64_t base, end;
417
418 end = element->end;
419 base = MAX(min_address, roundup2(element->base, alignment));
420
421 /*
422 * If base == 0, we would allocate memory at address 0. This
423 * address is mostly unusable and we should avoid allocating it.
424 * Therefore, search for another block in that case.
425 */
426 if (element->type != E820_TYPE_MEMORY || end < base ||
427 end - base < length || base == 0) {
428 continue;
429 }
430
431 if (e820_add_entry(base, base + length, type) != 0) {
432 return (0);
433 }
434
435 return (base);
436 }
437
438 return (0);
439 }
440
441 uint64_t
e820_alloc(const uint64_t address,const uint64_t length,const uint64_t alignment,const enum e820_memory_type type,const enum e820_allocation_strategy strategy)442 e820_alloc(const uint64_t address, const uint64_t length,
443 const uint64_t alignment, const enum e820_memory_type type,
444 const enum e820_allocation_strategy strategy)
445 {
446 assert(powerof2(alignment));
447 assert((address & (alignment - 1)) == 0);
448
449 switch (strategy) {
450 case E820_ALLOCATE_ANY:
451 /*
452 * Allocate any address. Therefore, ignore the address parameter
453 * and reuse the code path for allocating the lowest address.
454 */
455 return (e820_alloc_lowest(0, length, alignment, type));
456 case E820_ALLOCATE_LOWEST:
457 return (e820_alloc_lowest(address, length, alignment, type));
458 case E820_ALLOCATE_HIGHEST:
459 return (e820_alloc_highest(address, length, alignment, type));
460 case E820_ALLOCATE_SPECIFIC:
461 if (e820_add_entry(address, address + length, type) != 0) {
462 return (0);
463 }
464
465 return (address);
466 }
467
468 return (0);
469 }
470
471 int
e820_init(struct vmctx * const ctx)472 e820_init(struct vmctx *const ctx)
473 {
474 uint64_t lowmem_size, highmem_size;
475 int error;
476
477 TAILQ_INIT(&e820_table);
478
479 lowmem_size = vm_get_lowmem_size(ctx);
480 error = e820_add_entry(0, lowmem_size, E820_TYPE_MEMORY);
481 if (error) {
482 warnx("%s: Could not add lowmem", __func__);
483 return (error);
484 }
485
486 highmem_size = vm_get_highmem_size(ctx);
487 if (highmem_size != 0) {
488 error = e820_add_entry(4 * GB, 4 * GB + highmem_size,
489 E820_TYPE_MEMORY);
490 if (error) {
491 warnx("%s: Could not add highmem", __func__);
492 return (error);
493 }
494 }
495
496 error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END);
497 if (error) {
498 warnx("%s: Could not add VGA memory", __func__);
499 return (error);
500 }
501
502 error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END);
503 if (error) {
504 warnx("%s: Could not add ROM area", __func__);
505 return (error);
506 }
507
508 return (0);
509 }
510
511 int
e820_finalize(void)512 e820_finalize(void)
513 {
514 struct qemu_fwcfg_item *e820_fwcfg_item;
515 int error;
516
517 e820_fwcfg_item = e820_get_fwcfg_item();
518 if (e820_fwcfg_item == NULL) {
519 warnx("invalid e820 table");
520 return (ENOMEM);
521 }
522 error = qemu_fwcfg_add_file("etc/e820",
523 e820_fwcfg_item->size, e820_fwcfg_item->data);
524 if (error != 0) {
525 warnx("could not add qemu fwcfg etc/e820");
526 free(e820_fwcfg_item->data);
527 free(e820_fwcfg_item);
528 return (error);
529 }
530 free(e820_fwcfg_item);
531
532 return (0);
533 }
534