1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG 5 * Author: Corvin Köhne <c.koehne@beckhoff.com> 6 */ 7 8 #include <sys/types.h> 9 #include <sys/queue.h> 10 11 #include <machine/vmm.h> 12 13 #include <assert.h> 14 #include <err.h> 15 #include <errno.h> 16 #include <stdio.h> 17 #include <stdlib.h> 18 #include <string.h> 19 20 #include "debug.h" 21 #include "e820.h" 22 #include "qemu_fwcfg.h" 23 24 /* 25 * E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it 26 * works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't 27 * hold all possible physical addresses and we can get into trouble. 28 */ 29 static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t), 30 "Unable to represent physical memory by E820 table"); 31 32 #define E820_FWCFG_FILE_NAME "etc/e820" 33 34 #define KB (1024UL) 35 #define MB (1024 * KB) 36 #define GB (1024 * MB) 37 38 /* 39 * Fix E820 memory holes: 40 * [ A0000, C0000) VGA 41 * [ C0000, 100000) ROM 42 */ 43 #define E820_VGA_MEM_BASE 0xA0000 44 #define E820_VGA_MEM_END 0xC0000 45 #define E820_ROM_MEM_BASE 0xC0000 46 #define E820_ROM_MEM_END 0x100000 47 48 struct e820_element { 49 TAILQ_ENTRY(e820_element) chain; 50 uint64_t base; 51 uint64_t end; 52 enum e820_memory_type type; 53 }; 54 static TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER( 55 e820_table); 56 57 static struct e820_element * 58 e820_element_alloc(uint64_t base, uint64_t end, enum e820_memory_type type) 59 { 60 struct e820_element *element; 61 62 element = calloc(1, sizeof(*element)); 63 if (element == NULL) { 64 return (NULL); 65 } 66 67 element->base = base; 68 element->end = end; 69 element->type = type; 70 71 return (element); 72 } 73 74 static const char * 75 e820_get_type_name(const enum e820_memory_type type) 76 { 77 switch (type) { 78 case E820_TYPE_MEMORY: 79 return ("RAM"); 80 case E820_TYPE_RESERVED: 81 return ("Reserved"); 82 case E820_TYPE_ACPI: 83 return ("ACPI"); 84 case E820_TYPE_NVS: 85 return ("NVS"); 86 default: 87 return ("Unknown"); 88 } 89 } 90 91 void 92 e820_dump_table(void) 93 { 94 struct e820_element *element; 95 uint64_t i; 96 97 EPRINTLN("E820 map:"); 98 99 i = 0; 100 TAILQ_FOREACH(element, &e820_table, chain) { 101 EPRINTLN(" (%4lu) [%16lx, %16lx] %s", i, 102 element->base, element->end, 103 e820_get_type_name(element->type)); 104 105 ++i; 106 } 107 } 108 109 static struct qemu_fwcfg_item * 110 e820_get_fwcfg_item(void) 111 { 112 struct qemu_fwcfg_item *fwcfg_item; 113 struct e820_element *element; 114 struct e820_entry *entries; 115 int count, i; 116 117 count = 0; 118 TAILQ_FOREACH(element, &e820_table, chain) { 119 ++count; 120 } 121 if (count == 0) { 122 warnx("%s: E820 table empty", __func__); 123 return (NULL); 124 } 125 126 fwcfg_item = calloc(1, sizeof(struct qemu_fwcfg_item)); 127 if (fwcfg_item == NULL) { 128 return (NULL); 129 } 130 131 fwcfg_item->size = count * sizeof(struct e820_entry); 132 fwcfg_item->data = calloc(count, sizeof(struct e820_entry)); 133 if (fwcfg_item->data == NULL) { 134 free(fwcfg_item); 135 return (NULL); 136 } 137 138 i = 0; 139 entries = (struct e820_entry *)fwcfg_item->data; 140 TAILQ_FOREACH(element, &e820_table, chain) { 141 struct e820_entry *entry = &entries[i]; 142 143 entry->base = element->base; 144 entry->length = element->end - element->base; 145 entry->type = element->type; 146 147 ++i; 148 } 149 150 return (fwcfg_item); 151 } 152 153 static int 154 e820_add_entry(const uint64_t base, const uint64_t end, 155 const enum e820_memory_type type) 156 { 157 struct e820_element *new_element; 158 struct e820_element *element; 159 struct e820_element *sib_element; 160 struct e820_element *ram_element; 161 162 assert(end >= base); 163 164 new_element = e820_element_alloc(base, end, type); 165 if (new_element == NULL) { 166 return (ENOMEM); 167 } 168 169 /* 170 * E820 table should always be sorted in ascending order. Therefore, 171 * search for a range whose end is larger than the base parameter. 172 */ 173 TAILQ_FOREACH(element, &e820_table, chain) { 174 if (element->end > base) { 175 break; 176 } 177 } 178 179 /* 180 * System memory requires special handling. 181 */ 182 if (type == E820_TYPE_MEMORY) { 183 /* 184 * base is larger than of any existing element. Add new system 185 * memory at the end of the table. 186 */ 187 if (element == NULL) { 188 TAILQ_INSERT_TAIL(&e820_table, new_element, chain); 189 return (0); 190 } 191 192 /* 193 * System memory shouldn't overlap with any existing element. 194 */ 195 assert(end >= element->base); 196 197 TAILQ_INSERT_BEFORE(element, new_element, chain); 198 199 return (0); 200 } 201 202 /* 203 * If some one tries to allocate a specific address, it could happen, that 204 * this address is not allocatable. Therefore, do some checks. If the 205 * address is not allocatable, don't panic. The user may have a fallback and 206 * tries to allocate another address. This is true for the GVT-d emulation 207 * which tries to reuse the host address of the graphics stolen memory and 208 * falls back to allocating the highest address below 4 GB. 209 */ 210 if (element == NULL || element->type != E820_TYPE_MEMORY || 211 (base < element->base || end > element->end)) 212 return (ENOMEM); 213 214 if (base == element->base && end == element->end) { 215 /* 216 * The new entry replaces an existing one. 217 * 218 * Old table: 219 * [ 0x1000, 0x4000] RAM <-- element 220 * New table: 221 * [ 0x1000, 0x4000] Reserved 222 */ 223 TAILQ_INSERT_BEFORE(element, new_element, chain); 224 TAILQ_REMOVE(&e820_table, element, chain); 225 free(element); 226 } else if (base == element->base) { 227 /* 228 * New element at system memory base boundary. Add new 229 * element before current and adjust the base of the old 230 * element. 231 * 232 * Old table: 233 * [ 0x1000, 0x4000] RAM <-- element 234 * New table: 235 * [ 0x1000, 0x2000] Reserved 236 * [ 0x2000, 0x4000] RAM <-- element 237 */ 238 TAILQ_INSERT_BEFORE(element, new_element, chain); 239 element->base = end; 240 } else if (end == element->end) { 241 /* 242 * New element at system memory end boundary. Add new 243 * element after current and adjust the end of the 244 * current element. 245 * 246 * Old table: 247 * [ 0x1000, 0x4000] RAM <-- element 248 * New table: 249 * [ 0x1000, 0x3000] RAM <-- element 250 * [ 0x3000, 0x4000] Reserved 251 */ 252 TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain); 253 element->end = base; 254 } else { 255 /* 256 * New element inside system memory entry. Split it by 257 * adding a system memory element and the new element 258 * before current. 259 * 260 * Old table: 261 * [ 0x1000, 0x4000] RAM <-- element 262 * New table: 263 * [ 0x1000, 0x2000] RAM 264 * [ 0x2000, 0x3000] Reserved 265 * [ 0x3000, 0x4000] RAM <-- element 266 */ 267 ram_element = e820_element_alloc(element->base, base, 268 E820_TYPE_MEMORY); 269 if (ram_element == NULL) { 270 return (ENOMEM); 271 } 272 TAILQ_INSERT_BEFORE(element, ram_element, chain); 273 TAILQ_INSERT_BEFORE(element, new_element, chain); 274 element->base = end; 275 } 276 277 /* 278 * If the previous element has the same type and ends at our base 279 * boundary, we can merge both entries. 280 */ 281 sib_element = TAILQ_PREV(new_element, e820_table, chain); 282 if (sib_element != NULL && 283 sib_element->type == new_element->type && 284 sib_element->end == new_element->base) { 285 new_element->base = sib_element->base; 286 TAILQ_REMOVE(&e820_table, sib_element, chain); 287 free(sib_element); 288 } 289 290 /* 291 * If the next element has the same type and starts at our end 292 * boundary, we can merge both entries. 293 */ 294 sib_element = TAILQ_NEXT(new_element, chain); 295 if (sib_element != NULL && 296 sib_element->type == new_element->type && 297 sib_element->base == new_element->end) { 298 /* Merge new element into subsequent one. */ 299 new_element->end = sib_element->end; 300 TAILQ_REMOVE(&e820_table, sib_element, chain); 301 free(sib_element); 302 } 303 304 return (0); 305 } 306 307 static int 308 e820_add_memory_hole(const uint64_t base, const uint64_t end) 309 { 310 struct e820_element *element; 311 struct e820_element *ram_element; 312 313 assert(end >= base); 314 315 /* 316 * E820 table should be always sorted in ascending order. Therefore, 317 * search for an element which end is larger than the base parameter. 318 */ 319 TAILQ_FOREACH(element, &e820_table, chain) { 320 if (element->end > base) { 321 break; 322 } 323 } 324 325 if (element == NULL || end <= element->base) { 326 /* Nothing to do. Hole already exists */ 327 return (0); 328 } 329 330 /* Memory holes are only allowed in system memory */ 331 assert(element->type == E820_TYPE_MEMORY); 332 333 if (base == element->base) { 334 /* 335 * New hole at system memory base boundary. 336 * 337 * Old table: 338 * [ 0x1000, 0x4000] RAM 339 * New table: 340 * [ 0x2000, 0x4000] RAM 341 */ 342 element->base = end; 343 } else if (end == element->end) { 344 /* 345 * New hole at system memory end boundary. 346 * 347 * Old table: 348 * [ 0x1000, 0x4000] RAM 349 * New table: 350 * [ 0x1000, 0x3000] RAM 351 */ 352 element->end = base; 353 } else { 354 /* 355 * New hole inside system memory entry. Split the system memory. 356 * 357 * Old table: 358 * [ 0x1000, 0x4000] RAM <-- element 359 * New table: 360 * [ 0x1000, 0x2000] RAM 361 * [ 0x3000, 0x4000] RAM <-- element 362 */ 363 ram_element = e820_element_alloc(element->base, base, 364 E820_TYPE_MEMORY); 365 if (ram_element == NULL) { 366 return (ENOMEM); 367 } 368 TAILQ_INSERT_BEFORE(element, ram_element, chain); 369 element->base = end; 370 } 371 372 return (0); 373 } 374 375 static uint64_t 376 e820_alloc_highest(const uint64_t max_address, const uint64_t length, 377 const uint64_t alignment, const enum e820_memory_type type) 378 { 379 struct e820_element *element; 380 381 TAILQ_FOREACH_REVERSE(element, &e820_table, e820_table, chain) { 382 uint64_t address, base, end; 383 384 end = MIN(max_address, element->end); 385 base = roundup2(element->base, alignment); 386 387 /* 388 * If end - length == 0, we would allocate memory at address 0. This 389 * address is mostly unusable and we should avoid allocating it. 390 * Therefore, search for another block in that case. 391 */ 392 if (element->type != E820_TYPE_MEMORY || end < base || 393 end - base < length || end - length == 0) { 394 continue; 395 } 396 397 address = rounddown2(end - length, alignment); 398 399 if (e820_add_entry(address, address + length, type) != 0) { 400 return (0); 401 } 402 403 return (address); 404 } 405 406 return (0); 407 } 408 409 static uint64_t 410 e820_alloc_lowest(const uint64_t min_address, const uint64_t length, 411 const uint64_t alignment, const enum e820_memory_type type) 412 { 413 struct e820_element *element; 414 415 TAILQ_FOREACH(element, &e820_table, chain) { 416 uint64_t base, end; 417 418 end = element->end; 419 base = MAX(min_address, roundup2(element->base, alignment)); 420 421 /* 422 * If base == 0, we would allocate memory at address 0. This 423 * address is mostly unusable and we should avoid allocating it. 424 * Therefore, search for another block in that case. 425 */ 426 if (element->type != E820_TYPE_MEMORY || end < base || 427 end - base < length || base == 0) { 428 continue; 429 } 430 431 if (e820_add_entry(base, base + length, type) != 0) { 432 return (0); 433 } 434 435 return (base); 436 } 437 438 return (0); 439 } 440 441 uint64_t 442 e820_alloc(const uint64_t address, const uint64_t length, 443 const uint64_t alignment, const enum e820_memory_type type, 444 const enum e820_allocation_strategy strategy) 445 { 446 assert(powerof2(alignment)); 447 assert((address & (alignment - 1)) == 0); 448 449 switch (strategy) { 450 case E820_ALLOCATE_ANY: 451 /* 452 * Allocate any address. Therefore, ignore the address parameter 453 * and reuse the code path for allocating the lowest address. 454 */ 455 return (e820_alloc_lowest(0, length, alignment, type)); 456 case E820_ALLOCATE_LOWEST: 457 return (e820_alloc_lowest(address, length, alignment, type)); 458 case E820_ALLOCATE_HIGHEST: 459 return (e820_alloc_highest(address, length, alignment, type)); 460 case E820_ALLOCATE_SPECIFIC: 461 if (e820_add_entry(address, address + length, type) != 0) { 462 return (0); 463 } 464 465 return (address); 466 } 467 468 return (0); 469 } 470 471 int 472 e820_init(struct vmctx *const ctx) 473 { 474 uint64_t lowmem_size, highmem_size; 475 int error; 476 477 TAILQ_INIT(&e820_table); 478 479 lowmem_size = vm_get_lowmem_size(ctx); 480 error = e820_add_entry(0, lowmem_size, E820_TYPE_MEMORY); 481 if (error) { 482 warnx("%s: Could not add lowmem", __func__); 483 return (error); 484 } 485 486 highmem_size = vm_get_highmem_size(ctx); 487 if (highmem_size != 0) { 488 error = e820_add_entry(4 * GB, 4 * GB + highmem_size, 489 E820_TYPE_MEMORY); 490 if (error) { 491 warnx("%s: Could not add highmem", __func__); 492 return (error); 493 } 494 } 495 496 error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END); 497 if (error) { 498 warnx("%s: Could not add VGA memory", __func__); 499 return (error); 500 } 501 502 error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END); 503 if (error) { 504 warnx("%s: Could not add ROM area", __func__); 505 return (error); 506 } 507 508 return (0); 509 } 510 511 int 512 e820_finalize(void) 513 { 514 struct qemu_fwcfg_item *e820_fwcfg_item; 515 int error; 516 517 e820_fwcfg_item = e820_get_fwcfg_item(); 518 if (e820_fwcfg_item == NULL) { 519 warnx("invalid e820 table"); 520 return (ENOMEM); 521 } 522 error = qemu_fwcfg_add_file("etc/e820", 523 e820_fwcfg_item->size, e820_fwcfg_item->data); 524 if (error != 0) { 525 warnx("could not add qemu fwcfg etc/e820"); 526 free(e820_fwcfg_item->data); 527 free(e820_fwcfg_item); 528 return (error); 529 } 530 free(e820_fwcfg_item); 531 532 return (0); 533 } 534