1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG 5 * Author: Corvin Köhne <c.koehne@beckhoff.com> 6 */ 7 8 #include <sys/types.h> 9 #include <sys/queue.h> 10 11 #include <machine/vmm.h> 12 13 #include <assert.h> 14 #include <err.h> 15 #include <errno.h> 16 #include <stdio.h> 17 #include <stdlib.h> 18 #include <string.h> 19 20 #include "debug.h" 21 #include "e820.h" 22 #include "qemu_fwcfg.h" 23 24 /* 25 * E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it 26 * works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't 27 * hold all possible physical addresses and we can get into trouble. 28 */ 29 static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t), 30 "Unable to represent physical memory by E820 table"); 31 32 #define E820_FWCFG_FILE_NAME "etc/e820" 33 34 #define KB (1024UL) 35 #define MB (1024 * KB) 36 #define GB (1024 * MB) 37 38 /* 39 * Fix E820 memory holes: 40 * [ A0000, C0000) VGA 41 * [ C0000, 100000) ROM 42 */ 43 #define E820_VGA_MEM_BASE 0xA0000 44 #define E820_VGA_MEM_END 0xC0000 45 #define E820_ROM_MEM_BASE 0xC0000 46 #define E820_ROM_MEM_END 0x100000 47 48 struct e820_element { 49 TAILQ_ENTRY(e820_element) chain; 50 uint64_t base; 51 uint64_t end; 52 enum e820_memory_type type; 53 }; 54 static TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER( 55 e820_table); 56 57 static struct e820_element * 58 e820_element_alloc(uint64_t base, uint64_t end, enum e820_memory_type type) 59 { 60 struct e820_element *element; 61 62 element = calloc(1, sizeof(*element)); 63 if (element == NULL) { 64 return (NULL); 65 } 66 67 element->base = base; 68 element->end = end; 69 element->type = type; 70 71 return (element); 72 } 73 74 static const char * 75 e820_get_type_name(const enum e820_memory_type type) 76 { 77 switch (type) { 78 case E820_TYPE_MEMORY: 79 return ("RAM"); 80 case E820_TYPE_RESERVED: 81 return ("Reserved"); 82 case E820_TYPE_ACPI: 83 return ("ACPI"); 84 case E820_TYPE_NVS: 85 return ("NVS"); 86 default: 87 return ("Unknown"); 88 } 89 } 90 91 void 92 e820_dump_table(void) 93 { 94 struct e820_element *element; 95 uint64_t i; 96 97 EPRINTLN("E820 map:"); 98 99 i = 0; 100 TAILQ_FOREACH(element, &e820_table, chain) { 101 EPRINTLN(" (%4lu) [%16lx, %16lx] %s", i, 102 element->base, element->end, 103 e820_get_type_name(element->type)); 104 105 ++i; 106 } 107 } 108 109 static struct qemu_fwcfg_item * 110 e820_get_fwcfg_item(void) 111 { 112 struct qemu_fwcfg_item *fwcfg_item; 113 struct e820_element *element; 114 struct e820_entry *entries; 115 int count, i; 116 117 count = 0; 118 TAILQ_FOREACH(element, &e820_table, chain) { 119 ++count; 120 } 121 if (count == 0) { 122 warnx("%s: E820 table empty", __func__); 123 return (NULL); 124 } 125 126 fwcfg_item = calloc(1, sizeof(struct qemu_fwcfg_item)); 127 if (fwcfg_item == NULL) { 128 return (NULL); 129 } 130 131 fwcfg_item->size = count * sizeof(struct e820_entry); 132 fwcfg_item->data = calloc(count, sizeof(struct e820_entry)); 133 if (fwcfg_item->data == NULL) { 134 free(fwcfg_item); 135 return (NULL); 136 } 137 138 i = 0; 139 entries = (struct e820_entry *)fwcfg_item->data; 140 TAILQ_FOREACH(element, &e820_table, chain) { 141 struct e820_entry *entry = &entries[i]; 142 143 entry->base = element->base; 144 entry->length = element->end - element->base; 145 entry->type = element->type; 146 147 ++i; 148 } 149 150 return (fwcfg_item); 151 } 152 153 static int 154 e820_add_entry(const uint64_t base, const uint64_t end, 155 const enum e820_memory_type type) 156 { 157 struct e820_element *new_element; 158 struct e820_element *element; 159 struct e820_element *ram_element; 160 161 assert(end >= base); 162 163 new_element = e820_element_alloc(base, end, type); 164 if (new_element == NULL) { 165 return (ENOMEM); 166 } 167 168 /* 169 * E820 table should always be sorted in ascending order. Therefore, 170 * search for a range whose end is larger than the base parameter. 171 */ 172 TAILQ_FOREACH(element, &e820_table, chain) { 173 if (element->end > base) { 174 break; 175 } 176 } 177 178 /* 179 * System memory requires special handling. 180 */ 181 if (type == E820_TYPE_MEMORY) { 182 /* 183 * base is larger than of any existing element. Add new system 184 * memory at the end of the table. 185 */ 186 if (element == NULL) { 187 TAILQ_INSERT_TAIL(&e820_table, new_element, chain); 188 return (0); 189 } 190 191 /* 192 * System memory shouldn't overlap with any existing element. 193 */ 194 assert(end >= element->base); 195 196 TAILQ_INSERT_BEFORE(element, new_element, chain); 197 198 return (0); 199 } 200 201 assert(element != NULL); 202 /* Non system memory should be allocated inside system memory. */ 203 assert(element->type == E820_TYPE_MEMORY); 204 /* New element should fit into existing system memory element. */ 205 assert(base >= element->base && end <= element->end); 206 if (base == element->base && end == element->end) { 207 /* 208 * The new entry replaces an existing one. 209 * 210 * Old table: 211 * [ 0x1000, 0x4000] RAM <-- element 212 * New table: 213 * [ 0x1000, 0x4000] Reserved 214 */ 215 TAILQ_INSERT_BEFORE(element, new_element, chain); 216 TAILQ_REMOVE(&e820_table, element, chain); 217 free(element); 218 } else if (base == element->base) { 219 /* 220 * New element at system memory base boundary. Add new 221 * element before current and adjust the base of the old 222 * element. 223 * 224 * Old table: 225 * [ 0x1000, 0x4000] RAM <-- element 226 * New table: 227 * [ 0x1000, 0x2000] Reserved 228 * [ 0x2000, 0x4000] RAM <-- element 229 */ 230 TAILQ_INSERT_BEFORE(element, new_element, chain); 231 element->base = end; 232 } else if (end == element->end) { 233 /* 234 * New element at system memory end boundary. Add new 235 * element after current and adjust the end of the 236 * current element. 237 * 238 * Old table: 239 * [ 0x1000, 0x4000] RAM <-- element 240 * New table: 241 * [ 0x1000, 0x3000] RAM <-- element 242 * [ 0x3000, 0x4000] Reserved 243 */ 244 TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain); 245 element->end = base; 246 } else { 247 /* 248 * New element inside system memory entry. Split it by 249 * adding a system memory element and the new element 250 * before current. 251 * 252 * Old table: 253 * [ 0x1000, 0x4000] RAM <-- element 254 * New table: 255 * [ 0x1000, 0x2000] RAM 256 * [ 0x2000, 0x3000] Reserved 257 * [ 0x3000, 0x4000] RAM <-- element 258 */ 259 ram_element = e820_element_alloc(element->base, base, 260 E820_TYPE_MEMORY); 261 if (ram_element == NULL) { 262 return (ENOMEM); 263 } 264 TAILQ_INSERT_BEFORE(element, ram_element, chain); 265 TAILQ_INSERT_BEFORE(element, new_element, chain); 266 element->base = end; 267 } 268 269 return (0); 270 } 271 272 static int 273 e820_add_memory_hole(const uint64_t base, const uint64_t end) 274 { 275 struct e820_element *element; 276 struct e820_element *ram_element; 277 278 assert(end >= base); 279 280 /* 281 * E820 table should be always sorted in ascending order. Therefore, 282 * search for an element which end is larger than the base parameter. 283 */ 284 TAILQ_FOREACH(element, &e820_table, chain) { 285 if (element->end > base) { 286 break; 287 } 288 } 289 290 if (element == NULL || end <= element->base) { 291 /* Nothing to do. Hole already exists */ 292 return (0); 293 } 294 295 /* Memory holes are only allowed in system memory */ 296 assert(element->type == E820_TYPE_MEMORY); 297 298 if (base == element->base) { 299 /* 300 * New hole at system memory base boundary. 301 * 302 * Old table: 303 * [ 0x1000, 0x4000] RAM 304 * New table: 305 * [ 0x2000, 0x4000] RAM 306 */ 307 element->base = end; 308 } else if (end == element->end) { 309 /* 310 * New hole at system memory end boundary. 311 * 312 * Old table: 313 * [ 0x1000, 0x4000] RAM 314 * New table: 315 * [ 0x1000, 0x3000] RAM 316 */ 317 element->end = base; 318 } else { 319 /* 320 * New hole inside system memory entry. Split the system memory. 321 * 322 * Old table: 323 * [ 0x1000, 0x4000] RAM <-- element 324 * New table: 325 * [ 0x1000, 0x2000] RAM 326 * [ 0x3000, 0x4000] RAM <-- element 327 */ 328 ram_element = e820_element_alloc(element->base, base, 329 E820_TYPE_MEMORY); 330 if (ram_element == NULL) { 331 return (ENOMEM); 332 } 333 TAILQ_INSERT_BEFORE(element, ram_element, chain); 334 element->base = end; 335 } 336 337 return (0); 338 } 339 340 static uint64_t 341 e820_alloc_highest(const uint64_t max_address, const uint64_t length, 342 const uint64_t alignment, const enum e820_memory_type type) 343 { 344 struct e820_element *element; 345 346 TAILQ_FOREACH_REVERSE(element, &e820_table, e820_table, chain) { 347 uint64_t address, base, end; 348 349 end = MIN(max_address, element->end); 350 base = roundup2(element->base, alignment); 351 352 /* 353 * If end - length == 0, we would allocate memory at address 0. This 354 * address is mostly unusable and we should avoid allocating it. 355 * Therefore, search for another block in that case. 356 */ 357 if (element->type != E820_TYPE_MEMORY || end < base || 358 end - base < length || end - length == 0) { 359 continue; 360 } 361 362 address = rounddown2(end - length, alignment); 363 364 if (e820_add_entry(address, address + length, type) != 0) { 365 return (0); 366 } 367 368 return (address); 369 } 370 371 return (0); 372 } 373 374 static uint64_t 375 e820_alloc_lowest(const uint64_t min_address, const uint64_t length, 376 const uint64_t alignment, const enum e820_memory_type type) 377 { 378 struct e820_element *element; 379 380 TAILQ_FOREACH(element, &e820_table, chain) { 381 uint64_t base, end; 382 383 end = element->end; 384 base = MAX(min_address, roundup2(element->base, alignment)); 385 386 /* 387 * If base == 0, we would allocate memory at address 0. This 388 * address is mostly unusable and we should avoid allocating it. 389 * Therefore, search for another block in that case. 390 */ 391 if (element->type != E820_TYPE_MEMORY || end < base || 392 end - base < length || base == 0) { 393 continue; 394 } 395 396 if (e820_add_entry(base, base + length, type) != 0) { 397 return (0); 398 } 399 400 return (base); 401 } 402 403 return (0); 404 } 405 406 uint64_t 407 e820_alloc(const uint64_t address, const uint64_t length, 408 const uint64_t alignment, const enum e820_memory_type type, 409 const enum e820_allocation_strategy strategy) 410 { 411 assert(powerof2(alignment)); 412 assert((address & (alignment - 1)) == 0); 413 414 switch (strategy) { 415 case E820_ALLOCATE_ANY: 416 /* 417 * Allocate any address. Therefore, ignore the address parameter 418 * and reuse the code path for allocating the lowest address. 419 */ 420 return (e820_alloc_lowest(0, length, alignment, type)); 421 case E820_ALLOCATE_LOWEST: 422 return (e820_alloc_lowest(address, length, alignment, type)); 423 case E820_ALLOCATE_HIGHEST: 424 return (e820_alloc_highest(address, length, alignment, type)); 425 case E820_ALLOCATE_SPECIFIC: 426 if (e820_add_entry(address, address + length, type) != 0) { 427 return (0); 428 } 429 430 return (address); 431 } 432 433 return (0); 434 } 435 436 int 437 e820_init(struct vmctx *const ctx) 438 { 439 uint64_t lowmem_size, highmem_size; 440 int error; 441 442 TAILQ_INIT(&e820_table); 443 444 lowmem_size = vm_get_lowmem_size(ctx); 445 error = e820_add_entry(0, lowmem_size, E820_TYPE_MEMORY); 446 if (error) { 447 warnx("%s: Could not add lowmem", __func__); 448 return (error); 449 } 450 451 highmem_size = vm_get_highmem_size(ctx); 452 if (highmem_size != 0) { 453 error = e820_add_entry(4 * GB, 4 * GB + highmem_size, 454 E820_TYPE_MEMORY); 455 if (error) { 456 warnx("%s: Could not add highmem", __func__); 457 return (error); 458 } 459 } 460 461 error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END); 462 if (error) { 463 warnx("%s: Could not add VGA memory", __func__); 464 return (error); 465 } 466 467 error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END); 468 if (error) { 469 warnx("%s: Could not add ROM area", __func__); 470 return (error); 471 } 472 473 return (0); 474 } 475 476 int 477 e820_finalize(void) 478 { 479 struct qemu_fwcfg_item *e820_fwcfg_item; 480 int error; 481 482 e820_fwcfg_item = e820_get_fwcfg_item(); 483 if (e820_fwcfg_item == NULL) { 484 warnx("invalid e820 table"); 485 return (ENOMEM); 486 } 487 error = qemu_fwcfg_add_file("etc/e820", 488 e820_fwcfg_item->size, e820_fwcfg_item->data); 489 if (error != 0) { 490 warnx("could not add qemu fwcfg etc/e820"); 491 free(e820_fwcfg_item->data); 492 free(e820_fwcfg_item); 493 return (error); 494 } 495 free(e820_fwcfg_item); 496 497 return (0); 498 } 499