1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * kaslr.c 4 * 5 * This contains the routines needed to generate a reasonable level of 6 * entropy to choose a randomized kernel base address offset in support 7 * of Kernel Address Space Layout Randomization (KASLR). Additionally 8 * handles walking the physical memory maps (and tracking memory regions 9 * to avoid) in order to select a physical memory location that can 10 * contain the entire properly aligned running kernel image. 11 * 12 */ 13 14 /* 15 * isspace() in linux/ctype.h is expected by next_args() to filter 16 * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h, 17 * since isdigit() is implemented in both of them. Hence disable it 18 * here. 19 */ 20 #define BOOT_CTYPE_H 21 22 #include "misc.h" 23 #include "error.h" 24 #include "../string.h" 25 #include "efi.h" 26 27 #include <generated/compile.h> 28 #include <generated/utsversion.h> 29 #include <generated/utsrelease.h> 30 31 #define _SETUP 32 #include <asm/setup.h> /* For COMMAND_LINE_SIZE */ 33 #undef _SETUP 34 35 extern unsigned long get_cmd_line_ptr(void); 36 37 /* Simplified build-specific string for starting entropy. */ 38 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" 39 LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; 40 41 static unsigned long rotate_xor(unsigned long hash, const void *area, 42 size_t size) 43 { 44 size_t i; 45 unsigned long *ptr = (unsigned long *)area; 46 47 for (i = 0; i < size / sizeof(hash); i++) { 48 /* Rotate by odd number of bits and XOR. */ 49 hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); 50 hash ^= ptr[i]; 51 } 52 53 return hash; 54 } 55 56 /* Attempt to create a simple but unpredictable starting entropy. */ 57 static unsigned long get_boot_seed(void) 58 { 59 unsigned long hash = 0; 60 61 hash = rotate_xor(hash, build_str, sizeof(build_str)); 62 hash = rotate_xor(hash, boot_params_ptr, sizeof(*boot_params_ptr)); 63 64 return hash; 65 } 66 67 #define KASLR_COMPRESSED_BOOT 68 #include "../../lib/kaslr.c" 69 70 71 /* Only supporting at most 4 unusable memmap regions with kaslr */ 72 #define MAX_MEMMAP_REGIONS 4 73 74 static bool memmap_too_large; 75 76 77 /* 78 * Store memory limit: MAXMEM on 64-bit and KERNEL_IMAGE_SIZE on 32-bit. 79 * It may be reduced by "mem=nn[KMG]" or "memmap=nn[KMG]" command line options. 80 */ 81 static u64 mem_limit; 82 83 /* Number of immovable memory regions */ 84 static int num_immovable_mem; 85 86 enum mem_avoid_index { 87 MEM_AVOID_ZO_RANGE = 0, 88 MEM_AVOID_INITRD, 89 MEM_AVOID_CMDLINE, 90 MEM_AVOID_BOOTPARAMS, 91 MEM_AVOID_MEMMAP_BEGIN, 92 MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1, 93 MEM_AVOID_MAX, 94 }; 95 96 static struct mem_vector mem_avoid[MEM_AVOID_MAX]; 97 98 static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) 99 { 100 /* Item one is entirely before item two. */ 101 if (one->start + one->size <= two->start) 102 return false; 103 /* Item one is entirely after item two. */ 104 if (one->start >= two->start + two->size) 105 return false; 106 return true; 107 } 108 109 char *skip_spaces(const char *str) 110 { 111 while (isspace(*str)) 112 ++str; 113 return (char *)str; 114 } 115 #include "../../../../lib/ctype.c" 116 #include "../../../../lib/cmdline.c" 117 118 static int 119 parse_memmap(char *p, u64 *start, u64 *size) 120 { 121 char *oldp; 122 123 if (!p) 124 return -EINVAL; 125 126 /* We don't care about this option here */ 127 if (!strncmp(p, "exactmap", 8)) 128 return -EINVAL; 129 130 oldp = p; 131 *size = memparse(p, &p); 132 if (p == oldp) 133 return -EINVAL; 134 135 switch (*p) { 136 case '#': 137 case '$': 138 case '!': 139 *start = memparse(p + 1, &p); 140 return 0; 141 case '@': 142 /* 143 * memmap=nn@ss specifies usable region, should 144 * be skipped 145 */ 146 *size = 0; 147 fallthrough; 148 default: 149 /* 150 * If w/o offset, only size specified, memmap=nn[KMG] has the 151 * same behaviour as mem=nn[KMG]. It limits the max address 152 * system can use. Region above the limit should be avoided. 153 */ 154 *start = 0; 155 return 0; 156 } 157 158 return -EINVAL; 159 } 160 161 static void mem_avoid_memmap(char *str) 162 { 163 static int i; 164 165 if (i >= MAX_MEMMAP_REGIONS) 166 return; 167 168 while (str && (i < MAX_MEMMAP_REGIONS)) { 169 int rc; 170 u64 start, size; 171 char *k = strchr(str, ','); 172 173 if (k) 174 *k++ = 0; 175 176 rc = parse_memmap(str, &start, &size); 177 if (rc < 0) 178 break; 179 str = k; 180 181 if (start == 0) { 182 /* Store the specified memory limit if size > 0 */ 183 if (size > 0 && size < mem_limit) 184 mem_limit = size; 185 186 continue; 187 } 188 189 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start; 190 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size; 191 i++; 192 } 193 194 /* More than 4 memmaps, fail kaslr */ 195 if ((i >= MAX_MEMMAP_REGIONS) && str) 196 memmap_too_large = true; 197 } 198 199 /* Store the number of 1GB huge pages which users specified: */ 200 static unsigned long max_gb_huge_pages; 201 202 static void parse_gb_huge_pages(char *param, char *val) 203 { 204 static bool gbpage_sz; 205 char *p; 206 207 if (!strcmp(param, "hugepagesz")) { 208 p = val; 209 if (memparse(p, &p) != PUD_SIZE) { 210 gbpage_sz = false; 211 return; 212 } 213 214 if (gbpage_sz) 215 warn("Repeatedly set hugeTLB page size of 1G!\n"); 216 gbpage_sz = true; 217 return; 218 } 219 220 if (!strcmp(param, "hugepages") && gbpage_sz) { 221 p = val; 222 if (boot_kstrtoul(p, 0, &max_gb_huge_pages)) 223 warn("Failed to parse hugepages= boot parameter\n"); 224 return; 225 } 226 } 227 228 static void handle_mem_options(void) 229 { 230 char *args = (char *)get_cmd_line_ptr(); 231 size_t len; 232 char *tmp_cmdline; 233 char *param, *val; 234 u64 mem_size; 235 236 if (!args) 237 return; 238 239 len = strnlen(args, COMMAND_LINE_SIZE-1); 240 tmp_cmdline = malloc(len + 1); 241 if (!tmp_cmdline) 242 error("Failed to allocate space for tmp_cmdline"); 243 244 memcpy(tmp_cmdline, args, len); 245 tmp_cmdline[len] = 0; 246 args = tmp_cmdline; 247 248 /* Chew leading spaces */ 249 args = skip_spaces(args); 250 251 while (*args) { 252 args = next_arg(args, ¶m, &val); 253 /* Stop at -- */ 254 if (!val && strcmp(param, "--") == 0) 255 break; 256 257 if (!strcmp(param, "memmap")) { 258 mem_avoid_memmap(val); 259 } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) { 260 parse_gb_huge_pages(param, val); 261 } else if (!strcmp(param, "mem")) { 262 char *p = val; 263 264 if (!strcmp(p, "nopentium")) 265 continue; 266 mem_size = memparse(p, &p); 267 if (mem_size == 0) 268 break; 269 270 if (mem_size < mem_limit) 271 mem_limit = mem_size; 272 } 273 } 274 275 free(tmp_cmdline); 276 return; 277 } 278 279 /* 280 * In theory, KASLR can put the kernel anywhere in the range of [16M, MAXMEM) 281 * on 64-bit, and [16M, KERNEL_IMAGE_SIZE) on 32-bit. 282 * 283 * The mem_avoid array is used to store the ranges that need to be avoided 284 * when KASLR searches for an appropriate random address. We must avoid any 285 * regions that are unsafe to overlap with during decompression, and other 286 * things like the initrd, cmdline and boot_params. This comment seeks to 287 * explain mem_avoid as clearly as possible since incorrect mem_avoid 288 * memory ranges lead to really hard to debug boot failures. 289 * 290 * The initrd, cmdline, and boot_params are trivial to identify for 291 * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and 292 * MEM_AVOID_BOOTPARAMS respectively below. 293 * 294 * What is not obvious how to avoid is the range of memory that is used 295 * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover 296 * the compressed kernel (ZO) and its run space, which is used to extract 297 * the uncompressed kernel (VO) and relocs. 298 * 299 * ZO's full run size sits against the end of the decompression buffer, so 300 * we can calculate where text, data, bss, etc of ZO are positioned more 301 * easily. 302 * 303 * For additional background, the decompression calculations can be found 304 * in header.S, and the memory diagram is based on the one found in misc.c. 305 * 306 * The following conditions are already enforced by the image layouts and 307 * associated code: 308 * - input + input_size >= output + output_size 309 * - kernel_total_size <= init_size 310 * - kernel_total_size <= output_size (see Note below) 311 * - output + init_size >= output + output_size 312 * 313 * (Note that kernel_total_size and output_size have no fundamental 314 * relationship, but output_size is passed to choose_random_location 315 * as a maximum of the two. The diagram is showing a case where 316 * kernel_total_size is larger than output_size, but this case is 317 * handled by bumping output_size.) 318 * 319 * The above conditions can be illustrated by a diagram: 320 * 321 * 0 output input input+input_size output+init_size 322 * | | | | | 323 * | | | | | 324 * |-----|--------|--------|--------------|-----------|--|-------------| 325 * | | | 326 * | | | 327 * output+init_size-ZO_INIT_SIZE output+output_size output+kernel_total_size 328 * 329 * [output, output+init_size) is the entire memory range used for 330 * extracting the compressed image. 331 * 332 * [output, output+kernel_total_size) is the range needed for the 333 * uncompressed kernel (VO) and its run size (bss, brk, etc). 334 * 335 * [output, output+output_size) is VO plus relocs (i.e. the entire 336 * uncompressed payload contained by ZO). This is the area of the buffer 337 * written to during decompression. 338 * 339 * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case 340 * range of the copied ZO and decompression code. (i.e. the range 341 * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.) 342 * 343 * [input, input+input_size) is the original copied compressed image (ZO) 344 * (i.e. it does not include its run size). This range must be avoided 345 * because it contains the data used for decompression. 346 * 347 * [input+input_size, output+init_size) is [_text, _end) for ZO. This 348 * range includes ZO's heap and stack, and must be avoided since it 349 * performs the decompression. 350 * 351 * Since the above two ranges need to be avoided and they are adjacent, 352 * they can be merged, resulting in: [input, output+init_size) which 353 * becomes the MEM_AVOID_ZO_RANGE below. 354 */ 355 static void mem_avoid_init(unsigned long input, unsigned long input_size, 356 unsigned long output) 357 { 358 unsigned long init_size = boot_params_ptr->hdr.init_size; 359 u64 initrd_start, initrd_size; 360 unsigned long cmd_line, cmd_line_size; 361 362 /* 363 * Avoid the region that is unsafe to overlap during 364 * decompression. 365 */ 366 mem_avoid[MEM_AVOID_ZO_RANGE].start = input; 367 mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; 368 369 /* Avoid initrd. */ 370 initrd_start = (u64)boot_params_ptr->ext_ramdisk_image << 32; 371 initrd_start |= boot_params_ptr->hdr.ramdisk_image; 372 initrd_size = (u64)boot_params_ptr->ext_ramdisk_size << 32; 373 initrd_size |= boot_params_ptr->hdr.ramdisk_size; 374 mem_avoid[MEM_AVOID_INITRD].start = initrd_start; 375 mem_avoid[MEM_AVOID_INITRD].size = initrd_size; 376 /* No need to set mapping for initrd, it will be handled in VO. */ 377 378 /* Avoid kernel command line. */ 379 cmd_line = get_cmd_line_ptr(); 380 /* Calculate size of cmd_line. */ 381 if (cmd_line) { 382 cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1; 383 mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; 384 mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; 385 } 386 387 /* Avoid boot parameters. */ 388 mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params_ptr; 389 mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params_ptr); 390 391 /* We don't need to set a mapping for setup_data. */ 392 393 /* Mark the memmap regions we need to avoid */ 394 handle_mem_options(); 395 396 /* Enumerate the immovable memory regions */ 397 num_immovable_mem = count_immovable_mem_regions(); 398 } 399 400 /* 401 * Does this memory vector overlap a known avoided area? If so, record the 402 * overlap region with the lowest address. 403 */ 404 static bool mem_avoid_overlap(struct mem_vector *img, 405 struct mem_vector *overlap) 406 { 407 int i; 408 struct setup_data *ptr; 409 u64 earliest = img->start + img->size; 410 bool is_overlapping = false; 411 412 for (i = 0; i < MEM_AVOID_MAX; i++) { 413 if (mem_overlaps(img, &mem_avoid[i]) && 414 mem_avoid[i].start < earliest) { 415 *overlap = mem_avoid[i]; 416 earliest = overlap->start; 417 is_overlapping = true; 418 } 419 } 420 421 /* Avoid all entries in the setup_data linked list. */ 422 ptr = (struct setup_data *)(unsigned long)boot_params_ptr->hdr.setup_data; 423 while (ptr) { 424 struct mem_vector avoid; 425 426 avoid.start = (unsigned long)ptr; 427 avoid.size = sizeof(*ptr) + ptr->len; 428 429 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { 430 *overlap = avoid; 431 earliest = overlap->start; 432 is_overlapping = true; 433 } 434 435 if (ptr->type == SETUP_INDIRECT && 436 ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) { 437 avoid.start = ((struct setup_indirect *)ptr->data)->addr; 438 avoid.size = ((struct setup_indirect *)ptr->data)->len; 439 440 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { 441 *overlap = avoid; 442 earliest = overlap->start; 443 is_overlapping = true; 444 } 445 } 446 447 ptr = (struct setup_data *)(unsigned long)ptr->next; 448 } 449 450 return is_overlapping; 451 } 452 453 struct slot_area { 454 u64 addr; 455 unsigned long num; 456 }; 457 458 #define MAX_SLOT_AREA 100 459 460 static struct slot_area slot_areas[MAX_SLOT_AREA]; 461 static unsigned int slot_area_index; 462 static unsigned long slot_max; 463 464 static void store_slot_info(struct mem_vector *region, unsigned long image_size) 465 { 466 struct slot_area slot_area; 467 468 if (slot_area_index == MAX_SLOT_AREA) 469 return; 470 471 slot_area.addr = region->start; 472 slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN; 473 474 slot_areas[slot_area_index++] = slot_area; 475 slot_max += slot_area.num; 476 } 477 478 /* 479 * Skip as many 1GB huge pages as possible in the passed region 480 * according to the number which users specified: 481 */ 482 static void 483 process_gb_huge_pages(struct mem_vector *region, unsigned long image_size) 484 { 485 u64 pud_start, pud_end; 486 unsigned long gb_huge_pages; 487 struct mem_vector tmp; 488 489 if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) { 490 store_slot_info(region, image_size); 491 return; 492 } 493 494 /* Are there any 1GB pages in the region? */ 495 pud_start = ALIGN(region->start, PUD_SIZE); 496 pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE); 497 498 /* No good 1GB huge pages found: */ 499 if (pud_start >= pud_end) { 500 store_slot_info(region, image_size); 501 return; 502 } 503 504 /* Check if the head part of the region is usable. */ 505 if (pud_start >= region->start + image_size) { 506 tmp.start = region->start; 507 tmp.size = pud_start - region->start; 508 store_slot_info(&tmp, image_size); 509 } 510 511 /* Skip the good 1GB pages. */ 512 gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT; 513 if (gb_huge_pages > max_gb_huge_pages) { 514 pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT); 515 max_gb_huge_pages = 0; 516 } else { 517 max_gb_huge_pages -= gb_huge_pages; 518 } 519 520 /* Check if the tail part of the region is usable. */ 521 if (region->start + region->size >= pud_end + image_size) { 522 tmp.start = pud_end; 523 tmp.size = region->start + region->size - pud_end; 524 store_slot_info(&tmp, image_size); 525 } 526 } 527 528 static u64 slots_fetch_random(void) 529 { 530 unsigned long slot; 531 unsigned int i; 532 533 /* Handle case of no slots stored. */ 534 if (slot_max == 0) 535 return 0; 536 537 slot = kaslr_get_random_long("Physical") % slot_max; 538 539 for (i = 0; i < slot_area_index; i++) { 540 if (slot >= slot_areas[i].num) { 541 slot -= slot_areas[i].num; 542 continue; 543 } 544 return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN); 545 } 546 547 if (i == slot_area_index) 548 debug_putstr("slots_fetch_random() failed!?\n"); 549 return 0; 550 } 551 552 static void __process_mem_region(struct mem_vector *entry, 553 unsigned long minimum, 554 unsigned long image_size) 555 { 556 struct mem_vector region, overlap; 557 u64 region_end; 558 559 /* Enforce minimum and memory limit. */ 560 region.start = max_t(u64, entry->start, minimum); 561 region_end = min(entry->start + entry->size, mem_limit); 562 563 /* Give up if slot area array is full. */ 564 while (slot_area_index < MAX_SLOT_AREA) { 565 /* Potentially raise address to meet alignment needs. */ 566 region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); 567 568 /* Did we raise the address above the passed in memory entry? */ 569 if (region.start > region_end) 570 return; 571 572 /* Reduce size by any delta from the original address. */ 573 region.size = region_end - region.start; 574 575 /* Return if region can't contain decompressed kernel */ 576 if (region.size < image_size) 577 return; 578 579 /* If nothing overlaps, store the region and return. */ 580 if (!mem_avoid_overlap(®ion, &overlap)) { 581 process_gb_huge_pages(®ion, image_size); 582 return; 583 } 584 585 /* Store beginning of region if holds at least image_size. */ 586 if (overlap.start >= region.start + image_size) { 587 region.size = overlap.start - region.start; 588 process_gb_huge_pages(®ion, image_size); 589 } 590 591 /* Clip off the overlapping region and start over. */ 592 region.start = overlap.start + overlap.size; 593 } 594 } 595 596 static bool process_mem_region(struct mem_vector *region, 597 unsigned long minimum, 598 unsigned long image_size) 599 { 600 int i; 601 /* 602 * If no immovable memory found, or MEMORY_HOTREMOVE disabled, 603 * use @region directly. 604 */ 605 if (!num_immovable_mem) { 606 __process_mem_region(region, minimum, image_size); 607 608 if (slot_area_index == MAX_SLOT_AREA) { 609 debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n"); 610 return true; 611 } 612 return false; 613 } 614 615 #if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI) 616 /* 617 * If immovable memory found, filter the intersection between 618 * immovable memory and @region. 619 */ 620 for (i = 0; i < num_immovable_mem; i++) { 621 u64 start, end, entry_end, region_end; 622 struct mem_vector entry; 623 624 if (!mem_overlaps(region, &immovable_mem[i])) 625 continue; 626 627 start = immovable_mem[i].start; 628 end = start + immovable_mem[i].size; 629 region_end = region->start + region->size; 630 631 entry.start = clamp(region->start, start, end); 632 entry_end = clamp(region_end, start, end); 633 entry.size = entry_end - entry.start; 634 635 __process_mem_region(&entry, minimum, image_size); 636 637 if (slot_area_index == MAX_SLOT_AREA) { 638 debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n"); 639 return true; 640 } 641 } 642 #endif 643 return false; 644 } 645 646 #ifdef CONFIG_EFI 647 648 /* 649 * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are 650 * guaranteed to be free. 651 * 652 * Pick free memory more conservatively than the EFI spec allows: according to 653 * the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also free memory and thus 654 * available to place the kernel image into, but in practice there's firmware 655 * where using that memory leads to crashes. Buggy vendor EFI code registers 656 * for an event that triggers on SetVirtualAddressMap(). The handler assumes 657 * that EFI_BOOT_SERVICES_DATA memory has not been touched by loader yet, which 658 * is probably true for Windows. 659 * 660 * Preserve EFI_BOOT_SERVICES_* regions until after SetVirtualAddressMap(). 661 */ 662 static inline bool memory_type_is_free(efi_memory_desc_t *md) 663 { 664 if (md->type == EFI_CONVENTIONAL_MEMORY) 665 return true; 666 667 if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) && 668 md->type == EFI_UNACCEPTED_MEMORY) 669 return true; 670 671 return false; 672 } 673 674 /* 675 * Returns true if we processed the EFI memmap, which we prefer over the E820 676 * table if it is available. 677 */ 678 static bool 679 process_efi_entries(unsigned long minimum, unsigned long image_size) 680 { 681 struct efi_info *e = &boot_params_ptr->efi_info; 682 bool efi_mirror_found = false; 683 struct mem_vector region; 684 efi_memory_desc_t *md; 685 unsigned long pmap; 686 char *signature; 687 u32 nr_desc; 688 int i; 689 690 signature = (char *)&e->efi_loader_signature; 691 if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) && 692 strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) 693 return false; 694 695 #ifdef CONFIG_X86_32 696 /* Can't handle data above 4GB at this time */ 697 if (e->efi_memmap_hi) { 698 warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n"); 699 return false; 700 } 701 pmap = e->efi_memmap; 702 #else 703 pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32)); 704 #endif 705 706 nr_desc = e->efi_memmap_size / e->efi_memdesc_size; 707 for (i = 0; i < nr_desc; i++) { 708 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); 709 if (md->attribute & EFI_MEMORY_MORE_RELIABLE) { 710 efi_mirror_found = true; 711 break; 712 } 713 } 714 715 for (i = 0; i < nr_desc; i++) { 716 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); 717 718 if (!memory_type_is_free(md)) 719 continue; 720 721 if (efi_soft_reserve_enabled() && 722 (md->attribute & EFI_MEMORY_SP)) 723 continue; 724 725 if (efi_mirror_found && 726 !(md->attribute & EFI_MEMORY_MORE_RELIABLE)) 727 continue; 728 729 region.start = md->phys_addr; 730 region.size = md->num_pages << EFI_PAGE_SHIFT; 731 if (process_mem_region(®ion, minimum, image_size)) 732 break; 733 } 734 return true; 735 } 736 #else 737 static inline bool 738 process_efi_entries(unsigned long minimum, unsigned long image_size) 739 { 740 return false; 741 } 742 #endif 743 744 static void process_e820_entries(unsigned long minimum, 745 unsigned long image_size) 746 { 747 int i; 748 struct mem_vector region; 749 struct boot_e820_entry *entry; 750 751 /* Verify potential e820 positions, appending to slots list. */ 752 for (i = 0; i < boot_params_ptr->e820_entries; i++) { 753 entry = &boot_params_ptr->e820_table[i]; 754 /* Skip non-RAM entries. */ 755 if (entry->type != E820_TYPE_RAM) 756 continue; 757 region.start = entry->addr; 758 region.size = entry->size; 759 if (process_mem_region(®ion, minimum, image_size)) 760 break; 761 } 762 } 763 764 /* 765 * If KHO is active, only process its scratch areas to ensure we are not 766 * stepping onto preserved memory. 767 */ 768 static bool process_kho_entries(unsigned long minimum, unsigned long image_size) 769 { 770 struct kho_scratch *kho_scratch; 771 struct setup_data *ptr; 772 struct kho_data *kho; 773 int i, nr_areas = 0; 774 775 if (!IS_ENABLED(CONFIG_KEXEC_HANDOVER)) 776 return false; 777 778 ptr = (struct setup_data *)(unsigned long)boot_params_ptr->hdr.setup_data; 779 while (ptr) { 780 if (ptr->type == SETUP_KEXEC_KHO) { 781 kho = (struct kho_data *)(unsigned long)ptr->data; 782 kho_scratch = (void *)(unsigned long)kho->scratch_addr; 783 nr_areas = kho->scratch_size / sizeof(*kho_scratch); 784 break; 785 } 786 787 ptr = (struct setup_data *)(unsigned long)ptr->next; 788 } 789 790 if (!nr_areas) 791 return false; 792 793 for (i = 0; i < nr_areas; i++) { 794 struct kho_scratch *area = &kho_scratch[i]; 795 struct mem_vector region = { 796 .start = area->addr, 797 .size = area->size, 798 }; 799 800 if (process_mem_region(®ion, minimum, image_size)) 801 break; 802 } 803 804 return true; 805 } 806 807 static unsigned long find_random_phys_addr(unsigned long minimum, 808 unsigned long image_size) 809 { 810 u64 phys_addr; 811 812 /* Bail out early if it's impossible to succeed. */ 813 if (minimum + image_size > mem_limit) 814 return 0; 815 816 /* Check if we had too many memmaps. */ 817 if (memmap_too_large) { 818 debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n"); 819 return 0; 820 } 821 822 /* 823 * During kexec handover only process KHO scratch areas that are known 824 * not to contain any data that must be preserved. 825 */ 826 if (!process_kho_entries(minimum, image_size) && 827 !process_efi_entries(minimum, image_size)) 828 process_e820_entries(minimum, image_size); 829 830 phys_addr = slots_fetch_random(); 831 832 /* Perform a final check to make sure the address is in range. */ 833 if (phys_addr < minimum || phys_addr + image_size > mem_limit) { 834 warn("Invalid physical address chosen!\n"); 835 return 0; 836 } 837 838 return (unsigned long)phys_addr; 839 } 840 841 static unsigned long find_random_virt_addr(unsigned long minimum, 842 unsigned long image_size) 843 { 844 unsigned long slots, random_addr; 845 846 /* 847 * There are how many CONFIG_PHYSICAL_ALIGN-sized slots 848 * that can hold image_size within the range of minimum to 849 * KERNEL_IMAGE_SIZE? 850 */ 851 slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN; 852 853 random_addr = kaslr_get_random_long("Virtual") % slots; 854 855 return random_addr * CONFIG_PHYSICAL_ALIGN + minimum; 856 } 857 858 /* 859 * Since this function examines addresses much more numerically, 860 * it takes the input and output pointers as 'unsigned long'. 861 */ 862 void choose_random_location(unsigned long input, 863 unsigned long input_size, 864 unsigned long *output, 865 unsigned long output_size, 866 unsigned long *virt_addr) 867 { 868 unsigned long random_addr, min_addr; 869 870 if (cmdline_find_option_bool("nokaslr")) { 871 warn("KASLR disabled: 'nokaslr' on cmdline."); 872 return; 873 } 874 875 boot_params_ptr->hdr.loadflags |= KASLR_FLAG; 876 877 if (IS_ENABLED(CONFIG_X86_32)) 878 mem_limit = KERNEL_IMAGE_SIZE; 879 else 880 mem_limit = MAXMEM; 881 882 /* Record the various known unsafe memory ranges. */ 883 mem_avoid_init(input, input_size, *output); 884 885 /* 886 * Low end of the randomization range should be the 887 * smaller of 512M or the initial kernel image 888 * location: 889 */ 890 min_addr = min(*output, 512UL << 20); 891 /* Make sure minimum is aligned. */ 892 min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN); 893 894 /* Walk available memory entries to find a random address. */ 895 random_addr = find_random_phys_addr(min_addr, output_size); 896 if (!random_addr) { 897 warn("Physical KASLR disabled: no suitable memory region!"); 898 } else { 899 /* Update the new physical address location. */ 900 if (*output != random_addr) 901 *output = random_addr; 902 } 903 904 905 /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */ 906 if (IS_ENABLED(CONFIG_X86_64)) 907 random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size); 908 *virt_addr = random_addr; 909 } 910