1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG 5 * Author: Corvin Köhne <c.koehne@beckhoff.com> 6 */ 7 8 #include <sys/types.h> 9 #include <sys/queue.h> 10 11 #include <machine/vmm.h> 12 13 #include <assert.h> 14 #include <err.h> 15 #include <errno.h> 16 #include <stdio.h> 17 #include <stdlib.h> 18 #include <string.h> 19 20 #include "e820.h" 21 #include "qemu_fwcfg.h" 22 23 /* 24 * E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it 25 * works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't 26 * hold all possible physical addresses and we can get into trouble. 27 */ 28 static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t), 29 "Unable to represent physical memory by E820 table"); 30 31 #define E820_FWCFG_FILE_NAME "etc/e820" 32 33 #define KB (1024UL) 34 #define MB (1024 * KB) 35 #define GB (1024 * MB) 36 37 /* 38 * Fix E820 memory holes: 39 * [ A0000, C0000) VGA 40 * [ C0000, 100000) ROM 41 */ 42 #define E820_VGA_MEM_BASE 0xA0000 43 #define E820_VGA_MEM_END 0xC0000 44 #define E820_ROM_MEM_BASE 0xC0000 45 #define E820_ROM_MEM_END 0x100000 46 47 struct e820_element { 48 TAILQ_ENTRY(e820_element) chain; 49 uint64_t base; 50 uint64_t end; 51 enum e820_memory_type type; 52 }; 53 static TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER( 54 e820_table); 55 56 static struct e820_element * 57 e820_element_alloc(uint64_t base, uint64_t end, enum e820_memory_type type) 58 { 59 struct e820_element *element; 60 61 element = calloc(1, sizeof(*element)); 62 if (element == NULL) { 63 return (NULL); 64 } 65 66 element->base = base; 67 element->end = end; 68 element->type = type; 69 70 return (element); 71 } 72 73 static const char * 74 e820_get_type_name(const enum e820_memory_type type) 75 { 76 switch (type) { 77 case E820_TYPE_MEMORY: 78 return ("RAM"); 79 case E820_TYPE_RESERVED: 80 return ("Reserved"); 81 case E820_TYPE_ACPI: 82 return ("ACPI"); 83 case E820_TYPE_NVS: 84 return ("NVS"); 85 default: 86 return ("Unknown"); 87 } 88 } 89 90 void 91 e820_dump_table(void) 92 { 93 struct e820_element *element; 94 uint64_t i; 95 96 fprintf(stderr, "E820 map:\n"); 97 98 i = 0; 99 TAILQ_FOREACH(element, &e820_table, chain) { 100 fprintf(stderr, " (%4lu) [%16lx, %16lx] %s\n", i, 101 element->base, element->end, 102 e820_get_type_name(element->type)); 103 104 ++i; 105 } 106 } 107 108 static struct qemu_fwcfg_item * 109 e820_get_fwcfg_item(void) 110 { 111 struct qemu_fwcfg_item *fwcfg_item; 112 struct e820_element *element; 113 struct e820_entry *entries; 114 int count, i; 115 116 count = 0; 117 TAILQ_FOREACH(element, &e820_table, chain) { 118 ++count; 119 } 120 if (count == 0) { 121 warnx("%s: E820 table empty", __func__); 122 return (NULL); 123 } 124 125 fwcfg_item = calloc(1, sizeof(struct qemu_fwcfg_item)); 126 if (fwcfg_item == NULL) { 127 return (NULL); 128 } 129 130 fwcfg_item->size = count * sizeof(struct e820_entry); 131 fwcfg_item->data = calloc(count, sizeof(struct e820_entry)); 132 if (fwcfg_item->data == NULL) { 133 free(fwcfg_item); 134 return (NULL); 135 } 136 137 i = 0; 138 entries = (struct e820_entry *)fwcfg_item->data; 139 TAILQ_FOREACH(element, &e820_table, chain) { 140 struct e820_entry *entry = &entries[i]; 141 142 entry->base = element->base; 143 entry->length = element->end - element->base; 144 entry->type = element->type; 145 146 ++i; 147 } 148 149 return (fwcfg_item); 150 } 151 152 static int 153 e820_add_entry(const uint64_t base, const uint64_t end, 154 const enum e820_memory_type type) 155 { 156 struct e820_element *new_element; 157 struct e820_element *element; 158 struct e820_element *ram_element; 159 160 assert(end >= base); 161 162 new_element = e820_element_alloc(base, end, type); 163 if (new_element == NULL) { 164 return (ENOMEM); 165 } 166 167 /* 168 * E820 table should always be sorted in ascending order. Therefore, 169 * search for a range whose end is larger than the base parameter. 170 */ 171 TAILQ_FOREACH(element, &e820_table, chain) { 172 if (element->end > base) { 173 break; 174 } 175 } 176 177 /* 178 * System memory requires special handling. 179 */ 180 if (type == E820_TYPE_MEMORY) { 181 /* 182 * base is larger than of any existing element. Add new system 183 * memory at the end of the table. 184 */ 185 if (element == NULL) { 186 TAILQ_INSERT_TAIL(&e820_table, new_element, chain); 187 return (0); 188 } 189 190 /* 191 * System memory shouldn't overlap with any existing element. 192 */ 193 assert(end >= element->base); 194 195 TAILQ_INSERT_BEFORE(element, new_element, chain); 196 197 return (0); 198 } 199 200 /* 201 * If some one tries to allocate a specific address, it could happen, that 202 * this address is not allocatable. Therefore, do some checks. If the 203 * address is not allocatable, don't panic. The user may have a fallback and 204 * tries to allocate another address. This is true for the GVT-d emulation 205 * which tries to reuse the host address of the graphics stolen memory and 206 * falls back to allocating the highest address below 4 GB. 207 */ 208 if (element == NULL || element->type != E820_TYPE_MEMORY || 209 (base < element->base || end > element->end)) 210 return (ENOMEM); 211 212 if (base == element->base) { 213 /* 214 * New element at system memory base boundary. Add new 215 * element before current and adjust the base of the old 216 * element. 217 * 218 * Old table: 219 * [ 0x1000, 0x4000] RAM <-- element 220 * New table: 221 * [ 0x1000, 0x2000] Reserved 222 * [ 0x2000, 0x4000] RAM <-- element 223 */ 224 TAILQ_INSERT_BEFORE(element, new_element, chain); 225 element->base = end; 226 } else if (end == element->end) { 227 /* 228 * New element at system memory end boundary. Add new 229 * element after current and adjust the end of the 230 * current element. 231 * 232 * Old table: 233 * [ 0x1000, 0x4000] RAM <-- element 234 * New table: 235 * [ 0x1000, 0x3000] RAM <-- element 236 * [ 0x3000, 0x4000] Reserved 237 */ 238 TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain); 239 element->end = base; 240 } else { 241 /* 242 * New element inside system memory entry. Split it by 243 * adding a system memory element and the new element 244 * before current. 245 * 246 * Old table: 247 * [ 0x1000, 0x4000] RAM <-- element 248 * New table: 249 * [ 0x1000, 0x2000] RAM 250 * [ 0x2000, 0x3000] Reserved 251 * [ 0x3000, 0x4000] RAM <-- element 252 */ 253 ram_element = e820_element_alloc(element->base, base, 254 E820_TYPE_MEMORY); 255 if (ram_element == NULL) { 256 return (ENOMEM); 257 } 258 TAILQ_INSERT_BEFORE(element, ram_element, chain); 259 TAILQ_INSERT_BEFORE(element, new_element, chain); 260 element->base = end; 261 } 262 263 return (0); 264 } 265 266 static int 267 e820_add_memory_hole(const uint64_t base, const uint64_t end) 268 { 269 struct e820_element *element; 270 struct e820_element *ram_element; 271 272 assert(end >= base); 273 274 /* 275 * E820 table should be always sorted in ascending order. Therefore, 276 * search for an element which end is larger than the base parameter. 277 */ 278 TAILQ_FOREACH(element, &e820_table, chain) { 279 if (element->end > base) { 280 break; 281 } 282 } 283 284 if (element == NULL || end <= element->base) { 285 /* Nothing to do. Hole already exists */ 286 return (0); 287 } 288 289 /* Memory holes are only allowed in system memory */ 290 assert(element->type == E820_TYPE_MEMORY); 291 292 if (base == element->base) { 293 /* 294 * New hole at system memory base boundary. 295 * 296 * Old table: 297 * [ 0x1000, 0x4000] RAM 298 * New table: 299 * [ 0x2000, 0x4000] RAM 300 */ 301 element->base = end; 302 } else if (end == element->end) { 303 /* 304 * New hole at system memory end boundary. 305 * 306 * Old table: 307 * [ 0x1000, 0x4000] RAM 308 * New table: 309 * [ 0x1000, 0x3000] RAM 310 */ 311 element->end = base; 312 } else { 313 /* 314 * New hole inside system memory entry. Split the system memory. 315 * 316 * Old table: 317 * [ 0x1000, 0x4000] RAM <-- element 318 * New table: 319 * [ 0x1000, 0x2000] RAM 320 * [ 0x3000, 0x4000] RAM <-- element 321 */ 322 ram_element = e820_element_alloc(element->base, base, 323 E820_TYPE_MEMORY); 324 if (ram_element == NULL) { 325 return (ENOMEM); 326 } 327 TAILQ_INSERT_BEFORE(element, ram_element, chain); 328 element->base = end; 329 } 330 331 return (0); 332 } 333 334 static uint64_t 335 e820_alloc_highest(const uint64_t max_address, const uint64_t length, 336 const uint64_t alignment, const enum e820_memory_type type) 337 { 338 struct e820_element *element; 339 340 TAILQ_FOREACH_REVERSE(element, &e820_table, e820_table, chain) { 341 uint64_t address, base, end; 342 343 end = MIN(max_address, element->end); 344 base = roundup2(element->base, alignment); 345 346 /* 347 * If end - length == 0, we would allocate memory at address 0. This 348 * address is mostly unusable and we should avoid allocating it. 349 * Therefore, search for another block in that case. 350 */ 351 if (element->type != E820_TYPE_MEMORY || end < base || 352 end - base < length || end - length == 0) { 353 continue; 354 } 355 356 address = rounddown2(end - length, alignment); 357 358 if (e820_add_entry(address, address + length, type) != 0) { 359 return (0); 360 } 361 362 return (address); 363 } 364 365 return (0); 366 } 367 368 static uint64_t 369 e820_alloc_lowest(const uint64_t min_address, const uint64_t length, 370 const uint64_t alignment, const enum e820_memory_type type) 371 { 372 struct e820_element *element; 373 374 TAILQ_FOREACH(element, &e820_table, chain) { 375 uint64_t base, end; 376 377 end = element->end; 378 base = MAX(min_address, roundup2(element->base, alignment)); 379 380 /* 381 * If base == 0, we would allocate memory at address 0. This 382 * address is mostly unusable and we should avoid allocating it. 383 * Therefore, search for another block in that case. 384 */ 385 if (element->type != E820_TYPE_MEMORY || end < base || 386 end - base < length || base == 0) { 387 continue; 388 } 389 390 if (e820_add_entry(base, base + length, type) != 0) { 391 return (0); 392 } 393 394 return (base); 395 } 396 397 return (0); 398 } 399 400 uint64_t 401 e820_alloc(const uint64_t address, const uint64_t length, 402 const uint64_t alignment, const enum e820_memory_type type, 403 const enum e820_allocation_strategy strategy) 404 { 405 assert(powerof2(alignment)); 406 assert((address & (alignment - 1)) == 0); 407 408 switch (strategy) { 409 case E820_ALLOCATE_ANY: 410 /* 411 * Allocate any address. Therefore, ignore the address parameter 412 * and reuse the code path for allocating the lowest address. 413 */ 414 return (e820_alloc_lowest(0, length, alignment, type)); 415 case E820_ALLOCATE_LOWEST: 416 return (e820_alloc_lowest(address, length, alignment, type)); 417 case E820_ALLOCATE_HIGHEST: 418 return (e820_alloc_highest(address, length, alignment, type)); 419 case E820_ALLOCATE_SPECIFIC: 420 if (e820_add_entry(address, address + length, type) != 0) { 421 return (0); 422 } 423 424 return (address); 425 } 426 427 return (0); 428 } 429 430 int 431 e820_init(struct vmctx *const ctx) 432 { 433 uint64_t lowmem_size, highmem_size; 434 int error; 435 436 TAILQ_INIT(&e820_table); 437 438 lowmem_size = vm_get_lowmem_size(ctx); 439 error = e820_add_entry(0, lowmem_size, E820_TYPE_MEMORY); 440 if (error) { 441 warnx("%s: Could not add lowmem", __func__); 442 return (error); 443 } 444 445 highmem_size = vm_get_highmem_size(ctx); 446 if (highmem_size != 0) { 447 error = e820_add_entry(4 * GB, 4 * GB + highmem_size, 448 E820_TYPE_MEMORY); 449 if (error) { 450 warnx("%s: Could not add highmem", __func__); 451 return (error); 452 } 453 } 454 455 error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END); 456 if (error) { 457 warnx("%s: Could not add VGA memory", __func__); 458 return (error); 459 } 460 461 error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END); 462 if (error) { 463 warnx("%s: Could not add ROM area", __func__); 464 return (error); 465 } 466 467 return (0); 468 } 469 470 int 471 e820_finalize(void) 472 { 473 struct qemu_fwcfg_item *e820_fwcfg_item; 474 int error; 475 476 e820_fwcfg_item = e820_get_fwcfg_item(); 477 if (e820_fwcfg_item == NULL) { 478 warnx("invalid e820 table"); 479 return (ENOMEM); 480 } 481 error = qemu_fwcfg_add_file("etc/e820", 482 e820_fwcfg_item->size, e820_fwcfg_item->data); 483 if (error != 0) { 484 warnx("could not add qemu fwcfg etc/e820"); 485 return (error); 486 } 487 free(e820_fwcfg_item); 488 489 return (0); 490 } 491