1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
5 * Author: Corvin Köhne <c.koehne@beckhoff.com>
6 */
7
8 #include <sys/types.h>
9 #include <sys/queue.h>
10
11 #include <machine/vmm.h>
12
13 #include <assert.h>
14 #include <err.h>
15 #include <errno.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19
20 #include "e820.h"
21 #include "qemu_fwcfg.h"
22
23 /*
24 * E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it
25 * works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't
26 * hold all possible physical addresses and we can get into trouble.
27 */
28 static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t),
29 "Unable to represent physical memory by E820 table");
30
31 #define E820_FWCFG_FILE_NAME "etc/e820"
32
33 #define KB (1024UL)
34 #define MB (1024 * KB)
35 #define GB (1024 * MB)
36
37 /*
38 * Fix E820 memory holes:
39 * [ A0000, C0000) VGA
40 * [ C0000, 100000) ROM
41 */
42 #define E820_VGA_MEM_BASE 0xA0000
43 #define E820_VGA_MEM_END 0xC0000
44 #define E820_ROM_MEM_BASE 0xC0000
45 #define E820_ROM_MEM_END 0x100000
46
47 struct e820_element {
48 TAILQ_ENTRY(e820_element) chain;
49 uint64_t base;
50 uint64_t end;
51 enum e820_memory_type type;
52 };
53 static TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER(
54 e820_table);
55
56 static struct e820_element *
e820_element_alloc(uint64_t base,uint64_t end,enum e820_memory_type type)57 e820_element_alloc(uint64_t base, uint64_t end, enum e820_memory_type type)
58 {
59 struct e820_element *element;
60
61 element = calloc(1, sizeof(*element));
62 if (element == NULL) {
63 return (NULL);
64 }
65
66 element->base = base;
67 element->end = end;
68 element->type = type;
69
70 return (element);
71 }
72
73 static const char *
e820_get_type_name(const enum e820_memory_type type)74 e820_get_type_name(const enum e820_memory_type type)
75 {
76 switch (type) {
77 case E820_TYPE_MEMORY:
78 return ("RAM");
79 case E820_TYPE_RESERVED:
80 return ("Reserved");
81 case E820_TYPE_ACPI:
82 return ("ACPI");
83 case E820_TYPE_NVS:
84 return ("NVS");
85 default:
86 return ("Unknown");
87 }
88 }
89
90 void
e820_dump_table(void)91 e820_dump_table(void)
92 {
93 struct e820_element *element;
94 uint64_t i;
95
96 fprintf(stderr, "E820 map:\n");
97
98 i = 0;
99 TAILQ_FOREACH(element, &e820_table, chain) {
100 fprintf(stderr, " (%4lu) [%16lx, %16lx] %s\n", i,
101 element->base, element->end,
102 e820_get_type_name(element->type));
103
104 ++i;
105 }
106 }
107
108 struct qemu_fwcfg_item *
e820_get_fwcfg_item(void)109 e820_get_fwcfg_item(void)
110 {
111 struct qemu_fwcfg_item *fwcfg_item;
112 struct e820_element *element;
113 struct e820_entry *entries;
114 int count, i;
115
116 count = 0;
117 TAILQ_FOREACH(element, &e820_table, chain) {
118 ++count;
119 }
120 if (count == 0) {
121 warnx("%s: E820 table empty", __func__);
122 return (NULL);
123 }
124
125 fwcfg_item = calloc(1, sizeof(struct qemu_fwcfg_item));
126 if (fwcfg_item == NULL) {
127 return (NULL);
128 }
129
130 fwcfg_item->size = count * sizeof(struct e820_entry);
131 fwcfg_item->data = calloc(count, sizeof(struct e820_entry));
132 if (fwcfg_item->data == NULL) {
133 free(fwcfg_item);
134 return (NULL);
135 }
136
137 i = 0;
138 entries = (struct e820_entry *)fwcfg_item->data;
139 TAILQ_FOREACH(element, &e820_table, chain) {
140 struct e820_entry *entry = &entries[i];
141
142 entry->base = element->base;
143 entry->length = element->end - element->base;
144 entry->type = element->type;
145
146 ++i;
147 }
148
149 return (fwcfg_item);
150 }
151
152 static int
e820_add_entry(const uint64_t base,const uint64_t end,const enum e820_memory_type type)153 e820_add_entry(const uint64_t base, const uint64_t end,
154 const enum e820_memory_type type)
155 {
156 struct e820_element *new_element;
157 struct e820_element *element;
158 struct e820_element *ram_element;
159
160 assert(end >= base);
161
162 new_element = e820_element_alloc(base, end, type);
163 if (new_element == NULL) {
164 return (ENOMEM);
165 }
166
167 /*
168 * E820 table should always be sorted in ascending order. Therefore,
169 * search for a range whose end is larger than the base parameter.
170 */
171 TAILQ_FOREACH(element, &e820_table, chain) {
172 if (element->end > base) {
173 break;
174 }
175 }
176
177 /*
178 * System memory requires special handling.
179 */
180 if (type == E820_TYPE_MEMORY) {
181 /*
182 * base is larger than of any existing element. Add new system
183 * memory at the end of the table.
184 */
185 if (element == NULL) {
186 TAILQ_INSERT_TAIL(&e820_table, new_element, chain);
187 return (0);
188 }
189
190 /*
191 * System memory shouldn't overlap with any existing element.
192 */
193 assert(end >= element->base);
194
195 TAILQ_INSERT_BEFORE(element, new_element, chain);
196
197 return (0);
198 }
199
200 assert(element != NULL);
201 /* Non system memory should be allocated inside system memory. */
202 assert(element->type == E820_TYPE_MEMORY);
203 /* New element should fit into existing system memory element. */
204 assert(base >= element->base && end <= element->end);
205
206 if (base == element->base) {
207 /*
208 * New element at system memory base boundary. Add new
209 * element before current and adjust the base of the old
210 * element.
211 *
212 * Old table:
213 * [ 0x1000, 0x4000] RAM <-- element
214 * New table:
215 * [ 0x1000, 0x2000] Reserved
216 * [ 0x2000, 0x4000] RAM <-- element
217 */
218 TAILQ_INSERT_BEFORE(element, new_element, chain);
219 element->base = end;
220 } else if (end == element->end) {
221 /*
222 * New element at system memory end boundary. Add new
223 * element after current and adjust the end of the
224 * current element.
225 *
226 * Old table:
227 * [ 0x1000, 0x4000] RAM <-- element
228 * New table:
229 * [ 0x1000, 0x3000] RAM <-- element
230 * [ 0x3000, 0x4000] Reserved
231 */
232 TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain);
233 element->end = base;
234 } else {
235 /*
236 * New element inside system memory entry. Split it by
237 * adding a system memory element and the new element
238 * before current.
239 *
240 * Old table:
241 * [ 0x1000, 0x4000] RAM <-- element
242 * New table:
243 * [ 0x1000, 0x2000] RAM
244 * [ 0x2000, 0x3000] Reserved
245 * [ 0x3000, 0x4000] RAM <-- element
246 */
247 ram_element = e820_element_alloc(element->base, base,
248 E820_TYPE_MEMORY);
249 if (ram_element == NULL) {
250 return (ENOMEM);
251 }
252 TAILQ_INSERT_BEFORE(element, ram_element, chain);
253 TAILQ_INSERT_BEFORE(element, new_element, chain);
254 element->base = end;
255 }
256
257 return (0);
258 }
259
260 static int
e820_add_memory_hole(const uint64_t base,const uint64_t end)261 e820_add_memory_hole(const uint64_t base, const uint64_t end)
262 {
263 struct e820_element *element;
264 struct e820_element *ram_element;
265
266 assert(end >= base);
267
268 /*
269 * E820 table should be always sorted in ascending order. Therefore,
270 * search for an element which end is larger than the base parameter.
271 */
272 TAILQ_FOREACH(element, &e820_table, chain) {
273 if (element->end > base) {
274 break;
275 }
276 }
277
278 if (element == NULL || end <= element->base) {
279 /* Nothing to do. Hole already exists */
280 return (0);
281 }
282
283 /* Memory holes are only allowed in system memory */
284 assert(element->type == E820_TYPE_MEMORY);
285
286 if (base == element->base) {
287 /*
288 * New hole at system memory base boundary.
289 *
290 * Old table:
291 * [ 0x1000, 0x4000] RAM
292 * New table:
293 * [ 0x2000, 0x4000] RAM
294 */
295 element->base = end;
296 } else if (end == element->end) {
297 /*
298 * New hole at system memory end boundary.
299 *
300 * Old table:
301 * [ 0x1000, 0x4000] RAM
302 * New table:
303 * [ 0x1000, 0x3000] RAM
304 */
305 element->end = base;
306 } else {
307 /*
308 * New hole inside system memory entry. Split the system memory.
309 *
310 * Old table:
311 * [ 0x1000, 0x4000] RAM <-- element
312 * New table:
313 * [ 0x1000, 0x2000] RAM
314 * [ 0x3000, 0x4000] RAM <-- element
315 */
316 ram_element = e820_element_alloc(element->base, base,
317 E820_TYPE_MEMORY);
318 if (ram_element == NULL) {
319 return (ENOMEM);
320 }
321 TAILQ_INSERT_BEFORE(element, ram_element, chain);
322 element->base = end;
323 }
324
325 return (0);
326 }
327
328 static uint64_t
e820_alloc_highest(const uint64_t max_address,const uint64_t length,const uint64_t alignment,const enum e820_memory_type type)329 e820_alloc_highest(const uint64_t max_address, const uint64_t length,
330 const uint64_t alignment, const enum e820_memory_type type)
331 {
332 struct e820_element *element;
333
334 TAILQ_FOREACH_REVERSE(element, &e820_table, e820_table, chain) {
335 uint64_t address, base, end;
336
337 end = MIN(max_address, element->end);
338 base = roundup2(element->base, alignment);
339
340 /*
341 * If end - length == 0, we would allocate memory at address 0. This
342 * address is mostly unusable and we should avoid allocating it.
343 * Therefore, search for another block in that case.
344 */
345 if (element->type != E820_TYPE_MEMORY || end < base ||
346 end - base < length || end - length == 0) {
347 continue;
348 }
349
350 address = rounddown2(end - length, alignment);
351
352 if (e820_add_entry(address, address + length, type) != 0) {
353 return (0);
354 }
355
356 return (address);
357 }
358
359 return (0);
360 }
361
362 static uint64_t
e820_alloc_lowest(const uint64_t min_address,const uint64_t length,const uint64_t alignment,const enum e820_memory_type type)363 e820_alloc_lowest(const uint64_t min_address, const uint64_t length,
364 const uint64_t alignment, const enum e820_memory_type type)
365 {
366 struct e820_element *element;
367
368 TAILQ_FOREACH(element, &e820_table, chain) {
369 uint64_t base, end;
370
371 end = element->end;
372 base = MAX(min_address, roundup2(element->base, alignment));
373
374 /*
375 * If base == 0, we would allocate memory at address 0. This
376 * address is mostly unusable and we should avoid allocating it.
377 * Therefore, search for another block in that case.
378 */
379 if (element->type != E820_TYPE_MEMORY || end < base ||
380 end - base < length || base == 0) {
381 continue;
382 }
383
384 if (e820_add_entry(base, base + length, type) != 0) {
385 return (0);
386 }
387
388 return (base);
389 }
390
391 return (0);
392 }
393
394 uint64_t
e820_alloc(const uint64_t address,const uint64_t length,const uint64_t alignment,const enum e820_memory_type type,const enum e820_allocation_strategy strategy)395 e820_alloc(const uint64_t address, const uint64_t length,
396 const uint64_t alignment, const enum e820_memory_type type,
397 const enum e820_allocation_strategy strategy)
398 {
399 assert(powerof2(alignment));
400 assert((address & (alignment - 1)) == 0);
401
402 switch (strategy) {
403 case E820_ALLOCATE_ANY:
404 /*
405 * Allocate any address. Therefore, ignore the address parameter
406 * and reuse the code path for allocating the lowest address.
407 */
408 return (e820_alloc_lowest(0, length, alignment, type));
409 case E820_ALLOCATE_LOWEST:
410 return (e820_alloc_lowest(address, length, alignment, type));
411 case E820_ALLOCATE_HIGHEST:
412 return (e820_alloc_highest(address, length, alignment, type));
413 case E820_ALLOCATE_SPECIFIC:
414 if (e820_add_entry(address, address + length, type) != 0) {
415 return (0);
416 }
417
418 return (address);
419 }
420
421 return (0);
422 }
423
424 int
e820_init(struct vmctx * const ctx)425 e820_init(struct vmctx *const ctx)
426 {
427 uint64_t lowmem_size, highmem_size;
428 int error;
429
430 TAILQ_INIT(&e820_table);
431
432 lowmem_size = vm_get_lowmem_size(ctx);
433 error = e820_add_entry(0, lowmem_size, E820_TYPE_MEMORY);
434 if (error) {
435 warnx("%s: Could not add lowmem", __func__);
436 return (error);
437 }
438
439 highmem_size = vm_get_highmem_size(ctx);
440 if (highmem_size != 0) {
441 error = e820_add_entry(4 * GB, 4 * GB + highmem_size,
442 E820_TYPE_MEMORY);
443 if (error) {
444 warnx("%s: Could not add highmem", __func__);
445 return (error);
446 }
447 }
448
449 error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END);
450 if (error) {
451 warnx("%s: Could not add VGA memory", __func__);
452 return (error);
453 }
454
455 error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END);
456 if (error) {
457 warnx("%s: Could not add ROM area", __func__);
458 return (error);
459 }
460
461 return (0);
462 }
463