1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * kaslr.c 4 * 5 * This contains the routines needed to generate a reasonable level of 6 * entropy to choose a randomized kernel base address offset in support 7 * of Kernel Address Space Layout Randomization (KASLR). Additionally 8 * handles walking the physical memory maps (and tracking memory regions 9 * to avoid) in order to select a physical memory location that can 10 * contain the entire properly aligned running kernel image. 11 * 12 */ 13 14 /* 15 * isspace() in linux/ctype.h is expected by next_args() to filter 16 * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h, 17 * since isdigit() is implemented in both of them. Hence disable it 18 * here. 19 */ 20 #define BOOT_CTYPE_H 21 22 #include "misc.h" 23 #include "error.h" 24 #include "../string.h" 25 26 #include <generated/compile.h> 27 #include <linux/module.h> 28 #include <linux/uts.h> 29 #include <linux/utsname.h> 30 #include <linux/ctype.h> 31 #include <linux/efi.h> 32 #include <generated/utsrelease.h> 33 #include <asm/efi.h> 34 35 /* Macros used by the included decompressor code below. */ 36 #define STATIC 37 #include <linux/decompress/mm.h> 38 39 #define _SETUP 40 #include <asm/setup.h> /* For COMMAND_LINE_SIZE */ 41 #undef _SETUP 42 43 #ifdef CONFIG_X86_5LEVEL 44 unsigned int __pgtable_l5_enabled; 45 unsigned int pgdir_shift __ro_after_init = 39; 46 unsigned int ptrs_per_p4d __ro_after_init = 1; 47 #endif 48 49 extern unsigned long get_cmd_line_ptr(void); 50 51 /* Used by PAGE_KERN* macros: */ 52 pteval_t __default_kernel_pte_mask __read_mostly = ~0; 53 54 /* Simplified build-specific string for starting entropy. */ 55 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" 56 LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; 57 58 static unsigned long rotate_xor(unsigned long hash, const void *area, 59 size_t size) 60 { 61 size_t i; 62 unsigned long *ptr = (unsigned long *)area; 63 64 for (i = 0; i < size / sizeof(hash); i++) { 65 /* Rotate by odd number of bits and XOR. */ 66 hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); 67 hash ^= ptr[i]; 68 } 69 70 return hash; 71 } 72 73 /* Attempt to create a simple but unpredictable starting entropy. */ 74 static unsigned long get_boot_seed(void) 75 { 76 unsigned long hash = 0; 77 78 hash = rotate_xor(hash, build_str, sizeof(build_str)); 79 hash = rotate_xor(hash, boot_params, sizeof(*boot_params)); 80 81 return hash; 82 } 83 84 #define KASLR_COMPRESSED_BOOT 85 #include "../../lib/kaslr.c" 86 87 88 /* Only supporting at most 4 unusable memmap regions with kaslr */ 89 #define MAX_MEMMAP_REGIONS 4 90 91 static bool memmap_too_large; 92 93 94 /* 95 * Store memory limit: MAXMEM on 64-bit and KERNEL_IMAGE_SIZE on 32-bit. 96 * It may be reduced by "mem=nn[KMG]" or "memmap=nn[KMG]" command line options. 97 */ 98 static u64 mem_limit; 99 100 /* Number of immovable memory regions */ 101 static int num_immovable_mem; 102 103 enum mem_avoid_index { 104 MEM_AVOID_ZO_RANGE = 0, 105 MEM_AVOID_INITRD, 106 MEM_AVOID_CMDLINE, 107 MEM_AVOID_BOOTPARAMS, 108 MEM_AVOID_MEMMAP_BEGIN, 109 MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1, 110 MEM_AVOID_MAX, 111 }; 112 113 static struct mem_vector mem_avoid[MEM_AVOID_MAX]; 114 115 static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) 116 { 117 /* Item one is entirely before item two. */ 118 if (one->start + one->size <= two->start) 119 return false; 120 /* Item one is entirely after item two. */ 121 if (one->start >= two->start + two->size) 122 return false; 123 return true; 124 } 125 126 char *skip_spaces(const char *str) 127 { 128 while (isspace(*str)) 129 ++str; 130 return (char *)str; 131 } 132 #include "../../../../lib/ctype.c" 133 #include "../../../../lib/cmdline.c" 134 135 enum parse_mode { 136 PARSE_MEMMAP, 137 PARSE_EFI, 138 }; 139 140 static int 141 parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode) 142 { 143 char *oldp; 144 145 if (!p) 146 return -EINVAL; 147 148 /* We don't care about this option here */ 149 if (!strncmp(p, "exactmap", 8)) 150 return -EINVAL; 151 152 oldp = p; 153 *size = memparse(p, &p); 154 if (p == oldp) 155 return -EINVAL; 156 157 switch (*p) { 158 case '#': 159 case '$': 160 case '!': 161 *start = memparse(p + 1, &p); 162 return 0; 163 case '@': 164 if (mode == PARSE_MEMMAP) { 165 /* 166 * memmap=nn@ss specifies usable region, should 167 * be skipped 168 */ 169 *size = 0; 170 } else { 171 u64 flags; 172 173 /* 174 * efi_fake_mem=nn@ss:attr the attr specifies 175 * flags that might imply a soft-reservation. 176 */ 177 *start = memparse(p + 1, &p); 178 if (p && *p == ':') { 179 p++; 180 if (kstrtoull(p, 0, &flags) < 0) 181 *size = 0; 182 else if (flags & EFI_MEMORY_SP) 183 return 0; 184 } 185 *size = 0; 186 } 187 fallthrough; 188 default: 189 /* 190 * If w/o offset, only size specified, memmap=nn[KMG] has the 191 * same behaviour as mem=nn[KMG]. It limits the max address 192 * system can use. Region above the limit should be avoided. 193 */ 194 *start = 0; 195 return 0; 196 } 197 198 return -EINVAL; 199 } 200 201 static void mem_avoid_memmap(enum parse_mode mode, char *str) 202 { 203 static int i; 204 205 if (i >= MAX_MEMMAP_REGIONS) 206 return; 207 208 while (str && (i < MAX_MEMMAP_REGIONS)) { 209 int rc; 210 u64 start, size; 211 char *k = strchr(str, ','); 212 213 if (k) 214 *k++ = 0; 215 216 rc = parse_memmap(str, &start, &size, mode); 217 if (rc < 0) 218 break; 219 str = k; 220 221 if (start == 0) { 222 /* Store the specified memory limit if size > 0 */ 223 if (size > 0 && size < mem_limit) 224 mem_limit = size; 225 226 continue; 227 } 228 229 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start; 230 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size; 231 i++; 232 } 233 234 /* More than 4 memmaps, fail kaslr */ 235 if ((i >= MAX_MEMMAP_REGIONS) && str) 236 memmap_too_large = true; 237 } 238 239 /* Store the number of 1GB huge pages which users specified: */ 240 static unsigned long max_gb_huge_pages; 241 242 static void parse_gb_huge_pages(char *param, char *val) 243 { 244 static bool gbpage_sz; 245 char *p; 246 247 if (!strcmp(param, "hugepagesz")) { 248 p = val; 249 if (memparse(p, &p) != PUD_SIZE) { 250 gbpage_sz = false; 251 return; 252 } 253 254 if (gbpage_sz) 255 warn("Repeatedly set hugeTLB page size of 1G!\n"); 256 gbpage_sz = true; 257 return; 258 } 259 260 if (!strcmp(param, "hugepages") && gbpage_sz) { 261 p = val; 262 max_gb_huge_pages = simple_strtoull(p, &p, 0); 263 return; 264 } 265 } 266 267 static void handle_mem_options(void) 268 { 269 char *args = (char *)get_cmd_line_ptr(); 270 size_t len; 271 char *tmp_cmdline; 272 char *param, *val; 273 u64 mem_size; 274 275 if (!args) 276 return; 277 278 len = strnlen(args, COMMAND_LINE_SIZE-1); 279 tmp_cmdline = malloc(len + 1); 280 if (!tmp_cmdline) 281 error("Failed to allocate space for tmp_cmdline"); 282 283 memcpy(tmp_cmdline, args, len); 284 tmp_cmdline[len] = 0; 285 args = tmp_cmdline; 286 287 /* Chew leading spaces */ 288 args = skip_spaces(args); 289 290 while (*args) { 291 args = next_arg(args, ¶m, &val); 292 /* Stop at -- */ 293 if (!val && strcmp(param, "--") == 0) 294 break; 295 296 if (!strcmp(param, "memmap")) { 297 mem_avoid_memmap(PARSE_MEMMAP, val); 298 } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) { 299 parse_gb_huge_pages(param, val); 300 } else if (!strcmp(param, "mem")) { 301 char *p = val; 302 303 if (!strcmp(p, "nopentium")) 304 continue; 305 mem_size = memparse(p, &p); 306 if (mem_size == 0) 307 break; 308 309 if (mem_size < mem_limit) 310 mem_limit = mem_size; 311 } else if (!strcmp(param, "efi_fake_mem")) { 312 mem_avoid_memmap(PARSE_EFI, val); 313 } 314 } 315 316 free(tmp_cmdline); 317 return; 318 } 319 320 /* 321 * In theory, KASLR can put the kernel anywhere in the range of [16M, MAXMEM) 322 * on 64-bit, and [16M, KERNEL_IMAGE_SIZE) on 32-bit. 323 * 324 * The mem_avoid array is used to store the ranges that need to be avoided 325 * when KASLR searches for an appropriate random address. We must avoid any 326 * regions that are unsafe to overlap with during decompression, and other 327 * things like the initrd, cmdline and boot_params. This comment seeks to 328 * explain mem_avoid as clearly as possible since incorrect mem_avoid 329 * memory ranges lead to really hard to debug boot failures. 330 * 331 * The initrd, cmdline, and boot_params are trivial to identify for 332 * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and 333 * MEM_AVOID_BOOTPARAMS respectively below. 334 * 335 * What is not obvious how to avoid is the range of memory that is used 336 * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover 337 * the compressed kernel (ZO) and its run space, which is used to extract 338 * the uncompressed kernel (VO) and relocs. 339 * 340 * ZO's full run size sits against the end of the decompression buffer, so 341 * we can calculate where text, data, bss, etc of ZO are positioned more 342 * easily. 343 * 344 * For additional background, the decompression calculations can be found 345 * in header.S, and the memory diagram is based on the one found in misc.c. 346 * 347 * The following conditions are already enforced by the image layouts and 348 * associated code: 349 * - input + input_size >= output + output_size 350 * - kernel_total_size <= init_size 351 * - kernel_total_size <= output_size (see Note below) 352 * - output + init_size >= output + output_size 353 * 354 * (Note that kernel_total_size and output_size have no fundamental 355 * relationship, but output_size is passed to choose_random_location 356 * as a maximum of the two. The diagram is showing a case where 357 * kernel_total_size is larger than output_size, but this case is 358 * handled by bumping output_size.) 359 * 360 * The above conditions can be illustrated by a diagram: 361 * 362 * 0 output input input+input_size output+init_size 363 * | | | | | 364 * | | | | | 365 * |-----|--------|--------|--------------|-----------|--|-------------| 366 * | | | 367 * | | | 368 * output+init_size-ZO_INIT_SIZE output+output_size output+kernel_total_size 369 * 370 * [output, output+init_size) is the entire memory range used for 371 * extracting the compressed image. 372 * 373 * [output, output+kernel_total_size) is the range needed for the 374 * uncompressed kernel (VO) and its run size (bss, brk, etc). 375 * 376 * [output, output+output_size) is VO plus relocs (i.e. the entire 377 * uncompressed payload contained by ZO). This is the area of the buffer 378 * written to during decompression. 379 * 380 * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case 381 * range of the copied ZO and decompression code. (i.e. the range 382 * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.) 383 * 384 * [input, input+input_size) is the original copied compressed image (ZO) 385 * (i.e. it does not include its run size). This range must be avoided 386 * because it contains the data used for decompression. 387 * 388 * [input+input_size, output+init_size) is [_text, _end) for ZO. This 389 * range includes ZO's heap and stack, and must be avoided since it 390 * performs the decompression. 391 * 392 * Since the above two ranges need to be avoided and they are adjacent, 393 * they can be merged, resulting in: [input, output+init_size) which 394 * becomes the MEM_AVOID_ZO_RANGE below. 395 */ 396 static void mem_avoid_init(unsigned long input, unsigned long input_size, 397 unsigned long output) 398 { 399 unsigned long init_size = boot_params->hdr.init_size; 400 u64 initrd_start, initrd_size; 401 unsigned long cmd_line, cmd_line_size; 402 403 /* 404 * Avoid the region that is unsafe to overlap during 405 * decompression. 406 */ 407 mem_avoid[MEM_AVOID_ZO_RANGE].start = input; 408 mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; 409 add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start, 410 mem_avoid[MEM_AVOID_ZO_RANGE].size); 411 412 /* Avoid initrd. */ 413 initrd_start = (u64)boot_params->ext_ramdisk_image << 32; 414 initrd_start |= boot_params->hdr.ramdisk_image; 415 initrd_size = (u64)boot_params->ext_ramdisk_size << 32; 416 initrd_size |= boot_params->hdr.ramdisk_size; 417 mem_avoid[MEM_AVOID_INITRD].start = initrd_start; 418 mem_avoid[MEM_AVOID_INITRD].size = initrd_size; 419 /* No need to set mapping for initrd, it will be handled in VO. */ 420 421 /* Avoid kernel command line. */ 422 cmd_line = get_cmd_line_ptr(); 423 /* Calculate size of cmd_line. */ 424 if (cmd_line) { 425 cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1; 426 mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; 427 mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; 428 add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start, 429 mem_avoid[MEM_AVOID_CMDLINE].size); 430 } 431 432 /* Avoid boot parameters. */ 433 mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; 434 mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); 435 add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start, 436 mem_avoid[MEM_AVOID_BOOTPARAMS].size); 437 438 /* We don't need to set a mapping for setup_data. */ 439 440 /* Mark the memmap regions we need to avoid */ 441 handle_mem_options(); 442 443 /* Enumerate the immovable memory regions */ 444 num_immovable_mem = count_immovable_mem_regions(); 445 446 #ifdef CONFIG_X86_VERBOSE_BOOTUP 447 /* Make sure video RAM can be used. */ 448 add_identity_map(0, PMD_SIZE); 449 #endif 450 } 451 452 /* 453 * Does this memory vector overlap a known avoided area? If so, record the 454 * overlap region with the lowest address. 455 */ 456 static bool mem_avoid_overlap(struct mem_vector *img, 457 struct mem_vector *overlap) 458 { 459 int i; 460 struct setup_data *ptr; 461 u64 earliest = img->start + img->size; 462 bool is_overlapping = false; 463 464 for (i = 0; i < MEM_AVOID_MAX; i++) { 465 if (mem_overlaps(img, &mem_avoid[i]) && 466 mem_avoid[i].start < earliest) { 467 *overlap = mem_avoid[i]; 468 earliest = overlap->start; 469 is_overlapping = true; 470 } 471 } 472 473 /* Avoid all entries in the setup_data linked list. */ 474 ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; 475 while (ptr) { 476 struct mem_vector avoid; 477 478 avoid.start = (unsigned long)ptr; 479 avoid.size = sizeof(*ptr) + ptr->len; 480 481 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { 482 *overlap = avoid; 483 earliest = overlap->start; 484 is_overlapping = true; 485 } 486 487 if (ptr->type == SETUP_INDIRECT && 488 ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) { 489 avoid.start = ((struct setup_indirect *)ptr->data)->addr; 490 avoid.size = ((struct setup_indirect *)ptr->data)->len; 491 492 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { 493 *overlap = avoid; 494 earliest = overlap->start; 495 is_overlapping = true; 496 } 497 } 498 499 ptr = (struct setup_data *)(unsigned long)ptr->next; 500 } 501 502 return is_overlapping; 503 } 504 505 struct slot_area { 506 u64 addr; 507 unsigned long num; 508 }; 509 510 #define MAX_SLOT_AREA 100 511 512 static struct slot_area slot_areas[MAX_SLOT_AREA]; 513 static unsigned int slot_area_index; 514 static unsigned long slot_max; 515 516 static void store_slot_info(struct mem_vector *region, unsigned long image_size) 517 { 518 struct slot_area slot_area; 519 520 if (slot_area_index == MAX_SLOT_AREA) 521 return; 522 523 slot_area.addr = region->start; 524 slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN; 525 526 slot_areas[slot_area_index++] = slot_area; 527 slot_max += slot_area.num; 528 } 529 530 /* 531 * Skip as many 1GB huge pages as possible in the passed region 532 * according to the number which users specified: 533 */ 534 static void 535 process_gb_huge_pages(struct mem_vector *region, unsigned long image_size) 536 { 537 u64 pud_start, pud_end; 538 unsigned long gb_huge_pages; 539 struct mem_vector tmp; 540 541 if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) { 542 store_slot_info(region, image_size); 543 return; 544 } 545 546 /* Are there any 1GB pages in the region? */ 547 pud_start = ALIGN(region->start, PUD_SIZE); 548 pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE); 549 550 /* No good 1GB huge pages found: */ 551 if (pud_start >= pud_end) { 552 store_slot_info(region, image_size); 553 return; 554 } 555 556 /* Check if the head part of the region is usable. */ 557 if (pud_start >= region->start + image_size) { 558 tmp.start = region->start; 559 tmp.size = pud_start - region->start; 560 store_slot_info(&tmp, image_size); 561 } 562 563 /* Skip the good 1GB pages. */ 564 gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT; 565 if (gb_huge_pages > max_gb_huge_pages) { 566 pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT); 567 max_gb_huge_pages = 0; 568 } else { 569 max_gb_huge_pages -= gb_huge_pages; 570 } 571 572 /* Check if the tail part of the region is usable. */ 573 if (region->start + region->size >= pud_end + image_size) { 574 tmp.start = pud_end; 575 tmp.size = region->start + region->size - pud_end; 576 store_slot_info(&tmp, image_size); 577 } 578 } 579 580 static u64 slots_fetch_random(void) 581 { 582 unsigned long slot; 583 unsigned int i; 584 585 /* Handle case of no slots stored. */ 586 if (slot_max == 0) 587 return 0; 588 589 slot = kaslr_get_random_long("Physical") % slot_max; 590 591 for (i = 0; i < slot_area_index; i++) { 592 if (slot >= slot_areas[i].num) { 593 slot -= slot_areas[i].num; 594 continue; 595 } 596 return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN); 597 } 598 599 if (i == slot_area_index) 600 debug_putstr("slots_fetch_random() failed!?\n"); 601 return 0; 602 } 603 604 static void __process_mem_region(struct mem_vector *entry, 605 unsigned long minimum, 606 unsigned long image_size) 607 { 608 struct mem_vector region, overlap; 609 u64 region_end; 610 611 /* Enforce minimum and memory limit. */ 612 region.start = max_t(u64, entry->start, minimum); 613 region_end = min(entry->start + entry->size, mem_limit); 614 615 /* Give up if slot area array is full. */ 616 while (slot_area_index < MAX_SLOT_AREA) { 617 /* Potentially raise address to meet alignment needs. */ 618 region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); 619 620 /* Did we raise the address above the passed in memory entry? */ 621 if (region.start > region_end) 622 return; 623 624 /* Reduce size by any delta from the original address. */ 625 region.size = region_end - region.start; 626 627 /* Return if region can't contain decompressed kernel */ 628 if (region.size < image_size) 629 return; 630 631 /* If nothing overlaps, store the region and return. */ 632 if (!mem_avoid_overlap(®ion, &overlap)) { 633 process_gb_huge_pages(®ion, image_size); 634 return; 635 } 636 637 /* Store beginning of region if holds at least image_size. */ 638 if (overlap.start >= region.start + image_size) { 639 region.size = overlap.start - region.start; 640 process_gb_huge_pages(®ion, image_size); 641 } 642 643 /* Clip off the overlapping region and start over. */ 644 region.start = overlap.start + overlap.size; 645 } 646 } 647 648 static bool process_mem_region(struct mem_vector *region, 649 unsigned long minimum, 650 unsigned long image_size) 651 { 652 int i; 653 /* 654 * If no immovable memory found, or MEMORY_HOTREMOVE disabled, 655 * use @region directly. 656 */ 657 if (!num_immovable_mem) { 658 __process_mem_region(region, minimum, image_size); 659 660 if (slot_area_index == MAX_SLOT_AREA) { 661 debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n"); 662 return 1; 663 } 664 return 0; 665 } 666 667 #if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI) 668 /* 669 * If immovable memory found, filter the intersection between 670 * immovable memory and @region. 671 */ 672 for (i = 0; i < num_immovable_mem; i++) { 673 u64 start, end, entry_end, region_end; 674 struct mem_vector entry; 675 676 if (!mem_overlaps(region, &immovable_mem[i])) 677 continue; 678 679 start = immovable_mem[i].start; 680 end = start + immovable_mem[i].size; 681 region_end = region->start + region->size; 682 683 entry.start = clamp(region->start, start, end); 684 entry_end = clamp(region_end, start, end); 685 entry.size = entry_end - entry.start; 686 687 __process_mem_region(&entry, minimum, image_size); 688 689 if (slot_area_index == MAX_SLOT_AREA) { 690 debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n"); 691 return 1; 692 } 693 } 694 #endif 695 return 0; 696 } 697 698 #ifdef CONFIG_EFI 699 /* 700 * Returns true if we processed the EFI memmap, which we prefer over the E820 701 * table if it is available. 702 */ 703 static bool 704 process_efi_entries(unsigned long minimum, unsigned long image_size) 705 { 706 struct efi_info *e = &boot_params->efi_info; 707 bool efi_mirror_found = false; 708 struct mem_vector region; 709 efi_memory_desc_t *md; 710 unsigned long pmap; 711 char *signature; 712 u32 nr_desc; 713 int i; 714 715 signature = (char *)&e->efi_loader_signature; 716 if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) && 717 strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) 718 return false; 719 720 #ifdef CONFIG_X86_32 721 /* Can't handle data above 4GB at this time */ 722 if (e->efi_memmap_hi) { 723 warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n"); 724 return false; 725 } 726 pmap = e->efi_memmap; 727 #else 728 pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32)); 729 #endif 730 731 nr_desc = e->efi_memmap_size / e->efi_memdesc_size; 732 for (i = 0; i < nr_desc; i++) { 733 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); 734 if (md->attribute & EFI_MEMORY_MORE_RELIABLE) { 735 efi_mirror_found = true; 736 break; 737 } 738 } 739 740 for (i = 0; i < nr_desc; i++) { 741 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); 742 743 /* 744 * Here we are more conservative in picking free memory than 745 * the EFI spec allows: 746 * 747 * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also 748 * free memory and thus available to place the kernel image into, 749 * but in practice there's firmware where using that memory leads 750 * to crashes. 751 * 752 * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free. 753 */ 754 if (md->type != EFI_CONVENTIONAL_MEMORY) 755 continue; 756 757 if (efi_soft_reserve_enabled() && 758 (md->attribute & EFI_MEMORY_SP)) 759 continue; 760 761 if (efi_mirror_found && 762 !(md->attribute & EFI_MEMORY_MORE_RELIABLE)) 763 continue; 764 765 region.start = md->phys_addr; 766 region.size = md->num_pages << EFI_PAGE_SHIFT; 767 if (process_mem_region(®ion, minimum, image_size)) 768 break; 769 } 770 return true; 771 } 772 #else 773 static inline bool 774 process_efi_entries(unsigned long minimum, unsigned long image_size) 775 { 776 return false; 777 } 778 #endif 779 780 static void process_e820_entries(unsigned long minimum, 781 unsigned long image_size) 782 { 783 int i; 784 struct mem_vector region; 785 struct boot_e820_entry *entry; 786 787 /* Verify potential e820 positions, appending to slots list. */ 788 for (i = 0; i < boot_params->e820_entries; i++) { 789 entry = &boot_params->e820_table[i]; 790 /* Skip non-RAM entries. */ 791 if (entry->type != E820_TYPE_RAM) 792 continue; 793 region.start = entry->addr; 794 region.size = entry->size; 795 if (process_mem_region(®ion, minimum, image_size)) 796 break; 797 } 798 } 799 800 static unsigned long find_random_phys_addr(unsigned long minimum, 801 unsigned long image_size) 802 { 803 u64 phys_addr; 804 805 /* Bail out early if it's impossible to succeed. */ 806 if (minimum + image_size > mem_limit) 807 return 0; 808 809 /* Check if we had too many memmaps. */ 810 if (memmap_too_large) { 811 debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n"); 812 return 0; 813 } 814 815 if (!process_efi_entries(minimum, image_size)) 816 process_e820_entries(minimum, image_size); 817 818 phys_addr = slots_fetch_random(); 819 820 /* Perform a final check to make sure the address is in range. */ 821 if (phys_addr < minimum || phys_addr + image_size > mem_limit) { 822 warn("Invalid physical address chosen!\n"); 823 return 0; 824 } 825 826 return (unsigned long)phys_addr; 827 } 828 829 static unsigned long find_random_virt_addr(unsigned long minimum, 830 unsigned long image_size) 831 { 832 unsigned long slots, random_addr; 833 834 /* 835 * There are how many CONFIG_PHYSICAL_ALIGN-sized slots 836 * that can hold image_size within the range of minimum to 837 * KERNEL_IMAGE_SIZE? 838 */ 839 slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN; 840 841 random_addr = kaslr_get_random_long("Virtual") % slots; 842 843 return random_addr * CONFIG_PHYSICAL_ALIGN + minimum; 844 } 845 846 /* 847 * Since this function examines addresses much more numerically, 848 * it takes the input and output pointers as 'unsigned long'. 849 */ 850 void choose_random_location(unsigned long input, 851 unsigned long input_size, 852 unsigned long *output, 853 unsigned long output_size, 854 unsigned long *virt_addr) 855 { 856 unsigned long random_addr, min_addr; 857 858 if (cmdline_find_option_bool("nokaslr")) { 859 warn("KASLR disabled: 'nokaslr' on cmdline."); 860 return; 861 } 862 863 #ifdef CONFIG_X86_5LEVEL 864 if (__read_cr4() & X86_CR4_LA57) { 865 __pgtable_l5_enabled = 1; 866 pgdir_shift = 48; 867 ptrs_per_p4d = 512; 868 } 869 #endif 870 871 boot_params->hdr.loadflags |= KASLR_FLAG; 872 873 /* Prepare to add new identity pagetables on demand. */ 874 initialize_identity_maps(); 875 876 if (IS_ENABLED(CONFIG_X86_32)) 877 mem_limit = KERNEL_IMAGE_SIZE; 878 else 879 mem_limit = MAXMEM; 880 881 /* Record the various known unsafe memory ranges. */ 882 mem_avoid_init(input, input_size, *output); 883 884 /* 885 * Low end of the randomization range should be the 886 * smaller of 512M or the initial kernel image 887 * location: 888 */ 889 min_addr = min(*output, 512UL << 20); 890 /* Make sure minimum is aligned. */ 891 min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN); 892 893 /* Walk available memory entries to find a random address. */ 894 random_addr = find_random_phys_addr(min_addr, output_size); 895 if (!random_addr) { 896 warn("Physical KASLR disabled: no suitable memory region!"); 897 } else { 898 /* Update the new physical address location. */ 899 if (*output != random_addr) { 900 add_identity_map(random_addr, output_size); 901 *output = random_addr; 902 } 903 904 /* 905 * This loads the identity mapping page table. 906 * This should only be done if a new physical address 907 * is found for the kernel, otherwise we should keep 908 * the old page table to make it be like the "nokaslr" 909 * case. 910 */ 911 finalize_identity_maps(); 912 } 913 914 915 /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */ 916 if (IS_ENABLED(CONFIG_X86_64)) 917 random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size); 918 *virt_addr = random_addr; 919 } 920