1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 1995 Linus Torvalds 4 * 5 * This file contains the setup_arch() code, which handles the architecture-dependent 6 * parts of early kernel initialization. 7 */ 8 #include <linux/acpi.h> 9 #include <linux/console.h> 10 #include <linux/cpu.h> 11 #include <linux/crash_dump.h> 12 #include <linux/dma-map-ops.h> 13 #include <linux/efi.h> 14 #include <linux/hugetlb.h> 15 #include <linux/ima.h> 16 #include <linux/init_ohci1394_dma.h> 17 #include <linux/initrd.h> 18 #include <linux/iscsi_ibft.h> 19 #include <linux/memblock.h> 20 #include <linux/panic_notifier.h> 21 #include <linux/pci.h> 22 #include <linux/random.h> 23 #include <linux/root_dev.h> 24 #include <linux/static_call.h> 25 #include <linux/swiotlb.h> 26 #include <linux/tboot.h> 27 #include <linux/usb/xhci-dbgp.h> 28 #include <linux/vmalloc.h> 29 30 #include <uapi/linux/mount.h> 31 32 #include <xen/xen.h> 33 34 #include <asm/apic.h> 35 #include <asm/bios_ebda.h> 36 #include <asm/bugs.h> 37 #include <asm/cacheinfo.h> 38 #include <asm/coco.h> 39 #include <asm/cpu.h> 40 #include <asm/efi.h> 41 #include <asm/gart.h> 42 #include <asm/hypervisor.h> 43 #include <asm/io_apic.h> 44 #include <asm/kasan.h> 45 #include <asm/kaslr.h> 46 #include <asm/mce.h> 47 #include <asm/memtype.h> 48 #include <asm/mtrr.h> 49 #include <asm/nmi.h> 50 #include <asm/numa.h> 51 #include <asm/olpc_ofw.h> 52 #include <asm/pci-direct.h> 53 #include <asm/prom.h> 54 #include <asm/proto.h> 55 #include <asm/realmode.h> 56 #include <asm/thermal.h> 57 #include <asm/unwind.h> 58 #include <asm/vsyscall.h> 59 60 /* 61 * max_low_pfn_mapped: highest directly mapped pfn < 4 GB 62 * max_pfn_mapped: highest directly mapped pfn > 4 GB 63 * 64 * The direct mapping only covers E820_TYPE_RAM regions, so the ranges and gaps are 65 * represented by pfn_mapped[]. 66 */ 67 unsigned long max_low_pfn_mapped; 68 unsigned long max_pfn_mapped; 69 70 #ifdef CONFIG_DMI 71 RESERVE_BRK(dmi_alloc, 65536); 72 #endif 73 74 75 unsigned long _brk_start = (unsigned long)__brk_base; 76 unsigned long _brk_end = (unsigned long)__brk_base; 77 78 struct boot_params boot_params; 79 80 /* 81 * These are the four main kernel memory regions, we put them into 82 * the resource tree so that kdump tools and other debugging tools 83 * recover it: 84 */ 85 86 static struct resource rodata_resource = { 87 .name = "Kernel rodata", 88 .start = 0, 89 .end = 0, 90 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM 91 }; 92 93 static struct resource data_resource = { 94 .name = "Kernel data", 95 .start = 0, 96 .end = 0, 97 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM 98 }; 99 100 static struct resource code_resource = { 101 .name = "Kernel code", 102 .start = 0, 103 .end = 0, 104 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM 105 }; 106 107 static struct resource bss_resource = { 108 .name = "Kernel bss", 109 .start = 0, 110 .end = 0, 111 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM 112 }; 113 114 115 #ifdef CONFIG_X86_32 116 /* CPU data as detected by the assembly code in head_32.S */ 117 struct cpuinfo_x86 new_cpu_data; 118 119 struct apm_info apm_info; 120 EXPORT_SYMBOL(apm_info); 121 122 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ 123 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) 124 struct ist_info ist_info; 125 EXPORT_SYMBOL(ist_info); 126 #else 127 struct ist_info ist_info; 128 #endif 129 130 #endif 131 132 struct cpuinfo_x86 boot_cpu_data __read_mostly; 133 EXPORT_SYMBOL(boot_cpu_data); 134 SYM_PIC_ALIAS(boot_cpu_data); 135 136 #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) 137 __visible unsigned long mmu_cr4_features __ro_after_init; 138 #else 139 __visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE; 140 #endif 141 142 #ifdef CONFIG_IMA 143 static phys_addr_t ima_kexec_buffer_phys; 144 static size_t ima_kexec_buffer_size; 145 #endif 146 147 /* Boot loader ID and version as integers, for the benefit of proc_dointvec */ 148 int bootloader_type, bootloader_version; 149 150 static const struct ctl_table x86_sysctl_table[] = { 151 { 152 .procname = "unknown_nmi_panic", 153 .data = &unknown_nmi_panic, 154 .maxlen = sizeof(int), 155 .mode = 0644, 156 .proc_handler = proc_dointvec, 157 }, 158 { 159 .procname = "panic_on_unrecovered_nmi", 160 .data = &panic_on_unrecovered_nmi, 161 .maxlen = sizeof(int), 162 .mode = 0644, 163 .proc_handler = proc_dointvec, 164 }, 165 { 166 .procname = "panic_on_io_nmi", 167 .data = &panic_on_io_nmi, 168 .maxlen = sizeof(int), 169 .mode = 0644, 170 .proc_handler = proc_dointvec, 171 }, 172 { 173 .procname = "bootloader_type", 174 .data = &bootloader_type, 175 .maxlen = sizeof(int), 176 .mode = 0444, 177 .proc_handler = proc_dointvec, 178 }, 179 { 180 .procname = "bootloader_version", 181 .data = &bootloader_version, 182 .maxlen = sizeof(int), 183 .mode = 0444, 184 .proc_handler = proc_dointvec, 185 }, 186 { 187 .procname = "io_delay_type", 188 .data = &io_delay_type, 189 .maxlen = sizeof(int), 190 .mode = 0644, 191 .proc_handler = proc_dointvec, 192 }, 193 #if defined(CONFIG_ACPI_SLEEP) 194 { 195 .procname = "acpi_video_flags", 196 .data = &acpi_realmode_flags, 197 .maxlen = sizeof(unsigned long), 198 .mode = 0644, 199 .proc_handler = proc_doulongvec_minmax, 200 }, 201 #endif 202 }; 203 204 static int __init init_x86_sysctl(void) 205 { 206 register_sysctl_init("kernel", x86_sysctl_table); 207 return 0; 208 } 209 arch_initcall(init_x86_sysctl); 210 211 /* 212 * Setup options 213 */ 214 struct screen_info screen_info; 215 EXPORT_SYMBOL(screen_info); 216 #if defined(CONFIG_FIRMWARE_EDID) 217 struct edid_info edid_info; 218 EXPORT_SYMBOL_GPL(edid_info); 219 #endif 220 221 extern int root_mountflags; 222 223 unsigned long saved_video_mode; 224 225 #define RAMDISK_IMAGE_START_MASK 0x07FF 226 #define RAMDISK_PROMPT_FLAG 0x8000 227 #define RAMDISK_LOAD_FLAG 0x4000 228 229 static char __initdata command_line[COMMAND_LINE_SIZE]; 230 #ifdef CONFIG_CMDLINE_BOOL 231 char builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; 232 bool builtin_cmdline_added __ro_after_init; 233 #endif 234 235 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) 236 struct edd edd; 237 #ifdef CONFIG_EDD_MODULE 238 EXPORT_SYMBOL(edd); 239 #endif 240 /** 241 * copy_edd() - Copy the BIOS EDD information 242 * from boot_params into a safe place. 243 * 244 */ 245 static inline void __init copy_edd(void) 246 { 247 memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, 248 sizeof(edd.mbr_signature)); 249 memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); 250 edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; 251 edd.edd_info_nr = boot_params.eddbuf_entries; 252 } 253 #else 254 static inline void __init copy_edd(void) 255 { 256 } 257 #endif 258 259 void * __init extend_brk(size_t size, size_t align) 260 { 261 size_t mask = align - 1; 262 void *ret; 263 264 BUG_ON(_brk_start == 0); 265 BUG_ON(align & mask); 266 267 _brk_end = (_brk_end + mask) & ~mask; 268 BUG_ON((char *)(_brk_end + size) > __brk_limit); 269 270 ret = (void *)_brk_end; 271 _brk_end += size; 272 273 memset(ret, 0, size); 274 275 return ret; 276 } 277 278 #ifdef CONFIG_X86_32 279 static void __init cleanup_highmap(void) 280 { 281 } 282 #endif 283 284 static void __init reserve_brk(void) 285 { 286 if (_brk_end > _brk_start) 287 memblock_reserve_kern(__pa_symbol(_brk_start), 288 _brk_end - _brk_start); 289 290 /* Mark brk area as locked down and no longer taking any 291 new allocations */ 292 _brk_start = 0; 293 } 294 295 #ifdef CONFIG_BLK_DEV_INITRD 296 297 static u64 __init get_ramdisk_image(void) 298 { 299 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 300 301 ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32; 302 303 if (ramdisk_image == 0) 304 ramdisk_image = phys_initrd_start; 305 306 return ramdisk_image; 307 } 308 static u64 __init get_ramdisk_size(void) 309 { 310 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 311 312 ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32; 313 314 if (ramdisk_size == 0) 315 ramdisk_size = phys_initrd_size; 316 317 return ramdisk_size; 318 } 319 320 static void __init relocate_initrd(void) 321 { 322 /* Assume only end is not page aligned */ 323 u64 ramdisk_image = get_ramdisk_image(); 324 u64 ramdisk_size = get_ramdisk_size(); 325 u64 area_size = PAGE_ALIGN(ramdisk_size); 326 int ret = 0; 327 328 /* We need to move the initrd down into directly mapped mem */ 329 u64 relocated_ramdisk = memblock_phys_alloc_range(area_size, PAGE_SIZE, 0, 330 PFN_PHYS(max_pfn_mapped)); 331 if (!relocated_ramdisk) 332 panic("Cannot find place for new RAMDISK of size %lld\n", 333 ramdisk_size); 334 335 initrd_start = relocated_ramdisk + PAGE_OFFSET; 336 initrd_end = initrd_start + ramdisk_size; 337 printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", 338 relocated_ramdisk, relocated_ramdisk + ramdisk_size - 1); 339 340 ret = copy_from_early_mem((void *)initrd_start, ramdisk_image, ramdisk_size); 341 if (ret) 342 panic("Copy RAMDISK failed\n"); 343 344 printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" 345 " [mem %#010llx-%#010llx]\n", 346 ramdisk_image, ramdisk_image + ramdisk_size - 1, 347 relocated_ramdisk, relocated_ramdisk + ramdisk_size - 1); 348 } 349 350 static void __init early_reserve_initrd(void) 351 { 352 /* Assume only end is not page aligned */ 353 u64 ramdisk_image = get_ramdisk_image(); 354 u64 ramdisk_size = get_ramdisk_size(); 355 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 356 357 if (!boot_params.hdr.type_of_loader || 358 !ramdisk_image || !ramdisk_size) 359 return; /* No initrd provided by bootloader */ 360 361 memblock_reserve_kern(ramdisk_image, ramdisk_end - ramdisk_image); 362 } 363 364 static void __init reserve_initrd(void) 365 { 366 /* Assume only end is not page aligned */ 367 u64 ramdisk_image = get_ramdisk_image(); 368 u64 ramdisk_size = get_ramdisk_size(); 369 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 370 371 if (!boot_params.hdr.type_of_loader || 372 !ramdisk_image || !ramdisk_size) 373 return; /* No initrd provided by bootloader */ 374 375 initrd_start = 0; 376 377 printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, 378 ramdisk_end - 1); 379 380 if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image), 381 PFN_DOWN(ramdisk_end))) { 382 /* All are mapped, easy case */ 383 initrd_start = ramdisk_image + PAGE_OFFSET; 384 initrd_end = initrd_start + ramdisk_size; 385 return; 386 } 387 388 relocate_initrd(); 389 390 memblock_phys_free(ramdisk_image, ramdisk_end - ramdisk_image); 391 } 392 393 #else 394 static void __init early_reserve_initrd(void) 395 { 396 } 397 static void __init reserve_initrd(void) 398 { 399 } 400 #endif /* CONFIG_BLK_DEV_INITRD */ 401 402 static void __init add_early_ima_buffer(u64 phys_addr) 403 { 404 #ifdef CONFIG_IMA 405 struct ima_setup_data *data; 406 407 data = early_memremap(phys_addr + sizeof(struct setup_data), sizeof(*data)); 408 if (!data) { 409 pr_warn("setup: failed to memremap ima_setup_data entry\n"); 410 return; 411 } 412 413 if (data->size) { 414 memblock_reserve_kern(data->addr, data->size); 415 ima_kexec_buffer_phys = data->addr; 416 ima_kexec_buffer_size = data->size; 417 } 418 419 early_memunmap(data, sizeof(*data)); 420 #else 421 pr_warn("Passed IMA kexec data, but CONFIG_IMA not set. Ignoring.\n"); 422 #endif 423 } 424 425 #if defined(CONFIG_HAVE_IMA_KEXEC) && !defined(CONFIG_OF_FLATTREE) 426 int __init ima_free_kexec_buffer(void) 427 { 428 if (!ima_kexec_buffer_size) 429 return -ENOENT; 430 431 memblock_free_late(ima_kexec_buffer_phys, 432 ima_kexec_buffer_size); 433 434 ima_kexec_buffer_phys = 0; 435 ima_kexec_buffer_size = 0; 436 437 return 0; 438 } 439 440 int __init ima_get_kexec_buffer(void **addr, size_t *size) 441 { 442 if (!ima_kexec_buffer_size) 443 return -ENOENT; 444 445 *addr = __va(ima_kexec_buffer_phys); 446 *size = ima_kexec_buffer_size; 447 448 return 0; 449 } 450 #endif 451 452 static void __init add_kho(u64 phys_addr, u32 data_len) 453 { 454 struct kho_data *kho; 455 u64 addr = phys_addr + sizeof(struct setup_data); 456 u64 size = data_len - sizeof(struct setup_data); 457 458 if (!IS_ENABLED(CONFIG_KEXEC_HANDOVER)) { 459 pr_warn("Passed KHO data, but CONFIG_KEXEC_HANDOVER not set. Ignoring.\n"); 460 return; 461 } 462 463 kho = early_memremap(addr, size); 464 if (!kho) { 465 pr_warn("setup: failed to memremap kho data (0x%llx, 0x%llx)\n", 466 addr, size); 467 return; 468 } 469 470 kho_populate(kho->fdt_addr, kho->fdt_size, kho->scratch_addr, kho->scratch_size); 471 472 early_memunmap(kho, size); 473 } 474 475 static void __init parse_setup_data(void) 476 { 477 struct setup_data *data; 478 u64 pa_data, pa_next; 479 480 pa_data = boot_params.hdr.setup_data; 481 while (pa_data) { 482 u32 data_len, data_type; 483 484 data = early_memremap(pa_data, sizeof(*data)); 485 data_len = data->len + sizeof(struct setup_data); 486 data_type = data->type; 487 pa_next = data->next; 488 early_memunmap(data, sizeof(*data)); 489 490 switch (data_type) { 491 case SETUP_E820_EXT: 492 e820__memory_setup_extended(pa_data, data_len); 493 break; 494 case SETUP_DTB: 495 add_dtb(pa_data); 496 break; 497 case SETUP_EFI: 498 parse_efi_setup(pa_data, data_len); 499 break; 500 case SETUP_IMA: 501 add_early_ima_buffer(pa_data); 502 break; 503 case SETUP_KEXEC_KHO: 504 add_kho(pa_data, data_len); 505 break; 506 case SETUP_RNG_SEED: 507 data = early_memremap(pa_data, data_len); 508 add_bootloader_randomness(data->data, data->len); 509 /* Zero seed for forward secrecy. */ 510 memzero_explicit(data->data, data->len); 511 /* Zero length in case we find ourselves back here by accident. */ 512 memzero_explicit(&data->len, sizeof(data->len)); 513 early_memunmap(data, data_len); 514 break; 515 default: 516 break; 517 } 518 pa_data = pa_next; 519 } 520 } 521 522 /* 523 * Translate the fields of 'struct boot_param' into global variables 524 * representing these parameters. 525 */ 526 static void __init parse_boot_params(void) 527 { 528 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); 529 screen_info = boot_params.screen_info; 530 #if defined(CONFIG_FIRMWARE_EDID) 531 edid_info = boot_params.edid_info; 532 #endif 533 #ifdef CONFIG_X86_32 534 apm_info.bios = boot_params.apm_bios_info; 535 ist_info = boot_params.ist_info; 536 #endif 537 saved_video_mode = boot_params.hdr.vid_mode; 538 bootloader_type = boot_params.hdr.type_of_loader; 539 if ((bootloader_type >> 4) == 0xe) { 540 bootloader_type &= 0xf; 541 bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4; 542 } 543 bootloader_version = bootloader_type & 0xf; 544 bootloader_version |= boot_params.hdr.ext_loader_ver << 4; 545 546 #ifdef CONFIG_BLK_DEV_RAM 547 rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; 548 #endif 549 #ifdef CONFIG_EFI 550 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 551 EFI32_LOADER_SIGNATURE, 4)) { 552 set_bit(EFI_BOOT, &efi.flags); 553 } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 554 EFI64_LOADER_SIGNATURE, 4)) { 555 set_bit(EFI_BOOT, &efi.flags); 556 set_bit(EFI_64BIT, &efi.flags); 557 } 558 #endif 559 560 if (!boot_params.hdr.root_flags) 561 root_mountflags &= ~MS_RDONLY; 562 } 563 564 static void __init memblock_x86_reserve_range_setup_data(void) 565 { 566 struct setup_indirect *indirect; 567 struct setup_data *data; 568 u64 pa_data, pa_next; 569 u32 len; 570 571 pa_data = boot_params.hdr.setup_data; 572 while (pa_data) { 573 data = early_memremap(pa_data, sizeof(*data)); 574 if (!data) { 575 pr_warn("setup: failed to memremap setup_data entry\n"); 576 return; 577 } 578 579 len = sizeof(*data); 580 pa_next = data->next; 581 582 memblock_reserve_kern(pa_data, sizeof(*data) + data->len); 583 584 if (data->type == SETUP_INDIRECT) { 585 len += data->len; 586 early_memunmap(data, sizeof(*data)); 587 data = early_memremap(pa_data, len); 588 if (!data) { 589 pr_warn("setup: failed to memremap indirect setup_data\n"); 590 return; 591 } 592 593 indirect = (struct setup_indirect *)data->data; 594 595 if (indirect->type != SETUP_INDIRECT) 596 memblock_reserve_kern(indirect->addr, indirect->len); 597 } 598 599 pa_data = pa_next; 600 early_memunmap(data, len); 601 } 602 } 603 604 static void __init arch_reserve_crashkernel(void) 605 { 606 unsigned long long crash_base, crash_size, low_size = 0; 607 bool high = false; 608 int ret; 609 610 if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) 611 return; 612 613 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 614 &crash_size, &crash_base, 615 &low_size, &high); 616 if (ret) 617 return; 618 619 if (xen_pv_domain()) { 620 pr_info("Ignoring crashkernel for a Xen PV domain\n"); 621 return; 622 } 623 624 reserve_crashkernel_generic(crash_size, crash_base, low_size, high); 625 } 626 627 static struct resource standard_io_resources[] = { 628 { .name = "dma1", .start = 0x00, .end = 0x1f, 629 .flags = IORESOURCE_BUSY | IORESOURCE_IO }, 630 { .name = "pic1", .start = 0x20, .end = 0x21, 631 .flags = IORESOURCE_BUSY | IORESOURCE_IO }, 632 { .name = "timer0", .start = 0x40, .end = 0x43, 633 .flags = IORESOURCE_BUSY | IORESOURCE_IO }, 634 { .name = "timer1", .start = 0x50, .end = 0x53, 635 .flags = IORESOURCE_BUSY | IORESOURCE_IO }, 636 { .name = "keyboard", .start = 0x60, .end = 0x60, 637 .flags = IORESOURCE_BUSY | IORESOURCE_IO }, 638 { .name = "keyboard", .start = 0x64, .end = 0x64, 639 .flags = IORESOURCE_BUSY | IORESOURCE_IO }, 640 { .name = "dma page reg", .start = 0x80, .end = 0x8f, 641 .flags = IORESOURCE_BUSY | IORESOURCE_IO }, 642 { .name = "pic2", .start = 0xa0, .end = 0xa1, 643 .flags = IORESOURCE_BUSY | IORESOURCE_IO }, 644 { .name = "dma2", .start = 0xc0, .end = 0xdf, 645 .flags = IORESOURCE_BUSY | IORESOURCE_IO }, 646 { .name = "fpu", .start = 0xf0, .end = 0xff, 647 .flags = IORESOURCE_BUSY | IORESOURCE_IO } 648 }; 649 650 void __init reserve_standard_io_resources(void) 651 { 652 int i; 653 654 /* request I/O space for devices used on all i[345]86 PCs */ 655 for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) 656 request_resource(&ioport_resource, &standard_io_resources[i]); 657 658 } 659 660 static void __init setup_kernel_resources(void) 661 { 662 code_resource.start = __pa_symbol(_text); 663 code_resource.end = __pa_symbol(_etext)-1; 664 rodata_resource.start = __pa_symbol(__start_rodata); 665 rodata_resource.end = __pa_symbol(__end_rodata)-1; 666 data_resource.start = __pa_symbol(_sdata); 667 data_resource.end = __pa_symbol(_edata)-1; 668 bss_resource.start = __pa_symbol(__bss_start); 669 bss_resource.end = __pa_symbol(__bss_stop)-1; 670 671 insert_resource(&iomem_resource, &code_resource); 672 insert_resource(&iomem_resource, &rodata_resource); 673 insert_resource(&iomem_resource, &data_resource); 674 insert_resource(&iomem_resource, &bss_resource); 675 } 676 677 static bool __init snb_gfx_workaround_needed(void) 678 { 679 #ifdef CONFIG_PCI 680 int i; 681 u16 vendor, devid; 682 static const __initconst u16 snb_ids[] = { 683 0x0102, 684 0x0112, 685 0x0122, 686 0x0106, 687 0x0116, 688 0x0126, 689 0x010a, 690 }; 691 692 /* Assume no if something weird is going on with PCI */ 693 if (!early_pci_allowed()) 694 return false; 695 696 vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID); 697 if (vendor != 0x8086) 698 return false; 699 700 devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID); 701 for (i = 0; i < ARRAY_SIZE(snb_ids); i++) 702 if (devid == snb_ids[i]) 703 return true; 704 #endif 705 706 return false; 707 } 708 709 /* 710 * Sandy Bridge graphics has trouble with certain ranges, exclude 711 * them from allocation. 712 */ 713 static void __init trim_snb_memory(void) 714 { 715 static const __initconst unsigned long bad_pages[] = { 716 0x20050000, 717 0x20110000, 718 0x20130000, 719 0x20138000, 720 0x40004000, 721 }; 722 int i; 723 724 if (!snb_gfx_workaround_needed()) 725 return; 726 727 printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n"); 728 729 /* 730 * SandyBridge integrated graphics devices have a bug that prevents 731 * them from accessing certain memory ranges, namely anything below 732 * 1M and in the pages listed in bad_pages[] above. 733 * 734 * To avoid these pages being ever accessed by SNB gfx devices reserve 735 * bad_pages that have not already been reserved at boot time. 736 * All memory below the 1 MB mark is anyway reserved later during 737 * setup_arch(), so there is no need to reserve it here. 738 */ 739 740 for (i = 0; i < ARRAY_SIZE(bad_pages); i++) { 741 if (memblock_reserve(bad_pages[i], PAGE_SIZE)) 742 printk(KERN_WARNING "failed to reserve 0x%08lx\n", 743 bad_pages[i]); 744 } 745 } 746 747 static void __init trim_bios_range(void) 748 { 749 /* 750 * A special case is the first 4Kb of memory; 751 * This is a BIOS owned area, not kernel ram, but generally 752 * not listed as such in the E820 table. 753 * 754 * This typically reserves additional memory (64KiB by default) 755 * since some BIOSes are known to corrupt low memory. See the 756 * Kconfig help text for X86_RESERVE_LOW. 757 */ 758 e820__range_update(0, PAGE_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); 759 760 /* 761 * special case: Some BIOSes report the PC BIOS 762 * area (640Kb -> 1Mb) as RAM even though it is not. 763 * take them out. 764 */ 765 e820__range_remove(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_TYPE_RAM, 1); 766 767 e820__update_table(e820_table); 768 } 769 770 /* called before trim_bios_range() to spare extra sanitize */ 771 static void __init e820_add_kernel_range(void) 772 { 773 u64 start = __pa_symbol(_text); 774 u64 size = __pa_symbol(_end) - start; 775 776 /* 777 * Complain if .text .data and .bss are not marked as E820_TYPE_RAM and 778 * attempt to fix it by adding the range. We may have a confused BIOS, 779 * or the user may have used memmap=exactmap or memmap=xxM$yyM to 780 * exclude kernel range. If we really are running on top non-RAM, 781 * we will crash later anyways. 782 */ 783 if (e820__mapped_all(start, start + size, E820_TYPE_RAM)) 784 return; 785 786 pr_warn(".text .data .bss are not marked as E820_TYPE_RAM!\n"); 787 e820__range_remove(start, size, E820_TYPE_RAM, 0); 788 e820__range_add(start, size, E820_TYPE_RAM); 789 } 790 791 static void __init early_reserve_memory(void) 792 { 793 /* 794 * Reserve the memory occupied by the kernel between _text and 795 * __end_of_kernel_reserve symbols. Any kernel sections after the 796 * __end_of_kernel_reserve symbol must be explicitly reserved with a 797 * separate memblock_reserve() or they will be discarded. 798 */ 799 memblock_reserve_kern(__pa_symbol(_text), 800 (unsigned long)__end_of_kernel_reserve - (unsigned long)_text); 801 802 /* 803 * The first 4Kb of memory is a BIOS owned area, but generally it is 804 * not listed as such in the E820 table. 805 * 806 * Reserve the first 64K of memory since some BIOSes are known to 807 * corrupt low memory. After the real mode trampoline is allocated the 808 * rest of the memory below 640k is reserved. 809 * 810 * In addition, make sure page 0 is always reserved because on 811 * systems with L1TF its contents can be leaked to user processes. 812 */ 813 memblock_reserve(0, SZ_64K); 814 815 early_reserve_initrd(); 816 817 memblock_x86_reserve_range_setup_data(); 818 819 reserve_bios_regions(); 820 trim_snb_memory(); 821 } 822 823 /* 824 * Dump out kernel offset information on panic. 825 */ 826 static int 827 dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) 828 { 829 if (kaslr_enabled()) { 830 pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n", 831 kaslr_offset(), 832 __START_KERNEL, 833 __START_KERNEL_map, 834 MODULES_VADDR-1); 835 } else { 836 pr_emerg("Kernel Offset: disabled\n"); 837 } 838 839 return 0; 840 } 841 842 void x86_configure_nx(void) 843 { 844 if (boot_cpu_has(X86_FEATURE_NX)) 845 __supported_pte_mask |= _PAGE_NX; 846 else 847 __supported_pte_mask &= ~_PAGE_NX; 848 } 849 850 static void __init x86_report_nx(void) 851 { 852 if (!boot_cpu_has(X86_FEATURE_NX)) { 853 printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " 854 "missing in CPU!\n"); 855 } else { 856 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 857 printk(KERN_INFO "NX (Execute Disable) protection: active\n"); 858 #else 859 /* 32bit non-PAE kernel, NX cannot be used */ 860 printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " 861 "cannot be enabled: non-PAE kernel!\n"); 862 #endif 863 } 864 } 865 866 /* 867 * Determine if we were loaded by an EFI loader. If so, then we have also been 868 * passed the efi memmap, systab, etc., so we should use these data structures 869 * for initialization. Note, the efi init code path is determined by the 870 * global efi_enabled. This allows the same kernel image to be used on existing 871 * systems (with a traditional BIOS) as well as on EFI systems. 872 */ 873 /* 874 * setup_arch - architecture-specific boot-time initializations 875 * 876 * Note: On x86_64, fixmaps are ready for use even before this is called. 877 */ 878 879 void __init setup_arch(char **cmdline_p) 880 { 881 #ifdef CONFIG_X86_32 882 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 883 884 /* 885 * copy kernel address range established so far and switch 886 * to the proper swapper page table 887 */ 888 clone_pgd_range(swapper_pg_dir + KERNEL_PGD_BOUNDARY, 889 initial_page_table + KERNEL_PGD_BOUNDARY, 890 KERNEL_PGD_PTRS); 891 892 load_cr3(swapper_pg_dir); 893 /* 894 * Note: Quark X1000 CPUs advertise PGE incorrectly and require 895 * a cr3 based tlb flush, so the following __flush_tlb_all() 896 * will not flush anything because the CPU quirk which clears 897 * X86_FEATURE_PGE has not been invoked yet. Though due to the 898 * load_cr3() above the TLB has been flushed already. The 899 * quirk is invoked before subsequent calls to __flush_tlb_all() 900 * so proper operation is guaranteed. 901 */ 902 __flush_tlb_all(); 903 #else 904 printk(KERN_INFO "Command line: %s\n", boot_command_line); 905 boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS; 906 #endif 907 908 #ifdef CONFIG_CMDLINE_BOOL 909 #ifdef CONFIG_CMDLINE_OVERRIDE 910 strscpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); 911 #else 912 if (builtin_cmdline[0]) { 913 /* append boot loader cmdline to builtin */ 914 strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); 915 strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); 916 strscpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); 917 } 918 #endif 919 builtin_cmdline_added = true; 920 #endif 921 922 strscpy(command_line, boot_command_line, COMMAND_LINE_SIZE); 923 *cmdline_p = command_line; 924 925 /* 926 * If we have OLPC OFW, we might end up relocating the fixmap due to 927 * reserve_top(), so do this before touching the ioremap area. 928 */ 929 olpc_ofw_detect(); 930 931 idt_setup_early_traps(); 932 early_cpu_init(); 933 jump_label_init(); 934 static_call_init(); 935 early_ioremap_init(); 936 937 setup_olpc_ofw_pgd(); 938 939 parse_boot_params(); 940 941 x86_init.oem.arch_setup(); 942 943 /* 944 * Do some memory reservations *before* memory is added to memblock, so 945 * memblock allocations won't overwrite it. 946 * 947 * After this point, everything still needed from the boot loader or 948 * firmware or kernel text should be early reserved or marked not RAM in 949 * e820. All other memory is free game. 950 * 951 * This call needs to happen before e820__memory_setup() which calls the 952 * xen_memory_setup() on Xen dom0 which relies on the fact that those 953 * early reservations have happened already. 954 */ 955 early_reserve_memory(); 956 957 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; 958 e820__memory_setup(); 959 parse_setup_data(); 960 961 copy_edd(); 962 963 setup_initial_init_mm(_text, _etext, _edata, (void *)_brk_end); 964 965 /* 966 * x86_configure_nx() is called before parse_early_param() to detect 967 * whether hardware doesn't support NX (so that the early EHCI debug 968 * console setup can safely call set_fixmap()). 969 */ 970 x86_configure_nx(); 971 972 parse_early_param(); 973 974 if (efi_enabled(EFI_BOOT)) 975 efi_memblock_x86_reserve_range(); 976 977 x86_report_nx(); 978 979 apic_setup_apic_calls(); 980 981 if (acpi_mps_check()) { 982 #ifdef CONFIG_X86_LOCAL_APIC 983 apic_is_disabled = true; 984 #endif 985 setup_clear_cpu_cap(X86_FEATURE_APIC); 986 } 987 988 e820__finish_early_params(); 989 990 if (efi_enabled(EFI_BOOT)) 991 efi_init(); 992 993 reserve_ibft_region(); 994 x86_init.resources.dmi_setup(); 995 996 /* 997 * VMware detection requires dmi to be available, so this 998 * needs to be done after dmi_setup(), for the boot CPU. 999 * For some guest types (Xen PV, SEV-SNP, TDX) it is required to be 1000 * called before cache_bp_init() for setting up MTRR state. 1001 */ 1002 init_hypervisor_platform(); 1003 1004 tsc_early_init(); 1005 x86_init.resources.probe_roms(); 1006 1007 /* 1008 * Add resources for kernel text and data to the iomem_resource. 1009 * Do it after parse_early_param, so it can be debugged. 1010 */ 1011 setup_kernel_resources(); 1012 1013 e820_add_kernel_range(); 1014 trim_bios_range(); 1015 #ifdef CONFIG_X86_32 1016 if (ppro_with_ram_bug()) { 1017 e820__range_update(0x70000000ULL, 0x40000ULL, E820_TYPE_RAM, 1018 E820_TYPE_RESERVED); 1019 e820__update_table(e820_table); 1020 printk(KERN_INFO "fixed physical RAM map:\n"); 1021 e820__print_table("bad_ppro"); 1022 } 1023 #else 1024 early_gart_iommu_check(); 1025 #endif 1026 1027 /* 1028 * partially used pages are not usable - thus 1029 * we are rounding upwards: 1030 */ 1031 max_pfn = e820__end_of_ram_pfn(); 1032 1033 /* update e820 for memory not covered by WB MTRRs */ 1034 cache_bp_init(); 1035 if (mtrr_trim_uncached_memory(max_pfn)) 1036 max_pfn = e820__end_of_ram_pfn(); 1037 1038 max_possible_pfn = max_pfn; 1039 1040 /* 1041 * Define random base addresses for memory sections after max_pfn is 1042 * defined and before each memory section base is used. 1043 */ 1044 kernel_randomize_memory(); 1045 1046 #ifdef CONFIG_X86_32 1047 /* max_low_pfn get updated here */ 1048 find_low_pfn_range(); 1049 #else 1050 check_x2apic(); 1051 1052 /* How many end-of-memory variables you have, grandma! */ 1053 /* need this before calling reserve_initrd */ 1054 if (max_pfn > (1UL<<(32 - PAGE_SHIFT))) 1055 max_low_pfn = e820__end_of_low_ram_pfn(); 1056 else 1057 max_low_pfn = max_pfn; 1058 #endif 1059 1060 /* Find and reserve MPTABLE area */ 1061 x86_init.mpparse.find_mptable(); 1062 1063 early_alloc_pgt_buf(); 1064 1065 /* 1066 * Need to conclude brk, before e820__memblock_setup() 1067 * it could use memblock_find_in_range, could overlap with 1068 * brk area. 1069 */ 1070 reserve_brk(); 1071 1072 cleanup_highmap(); 1073 1074 e820__memblock_setup(); 1075 1076 /* 1077 * Needs to run after memblock setup because it needs the physical 1078 * memory size. 1079 */ 1080 mem_encrypt_setup_arch(); 1081 cc_random_init(); 1082 1083 efi_find_mirror(); 1084 efi_esrt_init(); 1085 efi_mokvar_table_init(); 1086 1087 /* 1088 * The EFI specification says that boot service code won't be 1089 * called after ExitBootServices(). This is, in fact, a lie. 1090 */ 1091 efi_reserve_boot_services(); 1092 1093 /* preallocate 4k for mptable mpc */ 1094 e820__memblock_alloc_reserved_mpc_new(); 1095 1096 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION 1097 setup_bios_corruption_check(); 1098 #endif 1099 1100 #ifdef CONFIG_X86_32 1101 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", 1102 (max_pfn_mapped<<PAGE_SHIFT) - 1); 1103 #endif 1104 1105 /* 1106 * Find free memory for the real mode trampoline and place it there. If 1107 * there is not enough free memory under 1M, on EFI-enabled systems 1108 * there will be additional attempt to reclaim the memory for the real 1109 * mode trampoline at efi_free_boot_services(). 1110 * 1111 * Unconditionally reserve the entire first 1M of RAM because BIOSes 1112 * are known to corrupt low memory and several hundred kilobytes are not 1113 * worth complex detection what memory gets clobbered. Windows does the 1114 * same thing for very similar reasons. 1115 * 1116 * Moreover, on machines with SandyBridge graphics or in setups that use 1117 * crashkernel the entire 1M is reserved anyway. 1118 * 1119 * Note the host kernel TDX also requires the first 1MB being reserved. 1120 */ 1121 x86_platform.realmode_reserve(); 1122 1123 init_mem_mapping(); 1124 1125 /* 1126 * init_mem_mapping() relies on the early IDT page fault handling. 1127 * Now either enable FRED or install the real page fault handler 1128 * for 64-bit in the IDT. 1129 */ 1130 cpu_init_replace_early_idt(); 1131 1132 /* 1133 * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features) 1134 * with the current CR4 value. This may not be necessary, but 1135 * auditing all the early-boot CR4 manipulation would be needed to 1136 * rule it out. 1137 * 1138 * Mask off features that don't work outside long mode (just 1139 * PCIDE for now). 1140 */ 1141 mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE; 1142 1143 memblock_set_current_limit(get_max_mapped()); 1144 1145 /* 1146 * NOTE: On x86-32, only from this point on, fixmaps are ready for use. 1147 */ 1148 1149 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT 1150 if (init_ohci1394_dma_early) 1151 init_ohci1394_dma_on_all_controllers(); 1152 #endif 1153 /* Allocate bigger log buffer */ 1154 setup_log_buf(1); 1155 1156 if (efi_enabled(EFI_BOOT)) { 1157 switch (boot_params.secure_boot) { 1158 case efi_secureboot_mode_disabled: 1159 pr_info("Secure boot disabled\n"); 1160 break; 1161 case efi_secureboot_mode_enabled: 1162 pr_info("Secure boot enabled\n"); 1163 break; 1164 default: 1165 pr_info("Secure boot could not be determined\n"); 1166 break; 1167 } 1168 } 1169 1170 reserve_initrd(); 1171 1172 acpi_table_upgrade(); 1173 /* Look for ACPI tables and reserve memory occupied by them. */ 1174 acpi_boot_table_init(); 1175 1176 vsmp_init(); 1177 1178 io_delay_init(); 1179 1180 early_platform_quirks(); 1181 1182 /* Some platforms need the APIC registered for NUMA configuration */ 1183 early_acpi_boot_init(); 1184 x86_init.mpparse.early_parse_smp_cfg(); 1185 1186 x86_flattree_get_config(); 1187 1188 initmem_init(); 1189 dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); 1190 1191 if (boot_cpu_has(X86_FEATURE_GBPAGES)) { 1192 hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); 1193 hugetlb_bootmem_alloc(); 1194 } 1195 1196 /* 1197 * Reserve memory for crash kernel after SRAT is parsed so that it 1198 * won't consume hotpluggable memory. 1199 */ 1200 arch_reserve_crashkernel(); 1201 1202 if (!early_xdbc_setup_hardware()) 1203 early_xdbc_register_console(); 1204 1205 x86_init.paging.pagetable_init(); 1206 1207 kasan_init(); 1208 1209 /* 1210 * Sync back kernel address range. 1211 * 1212 * FIXME: Can the later sync in setup_cpu_entry_areas() replace 1213 * this call? 1214 */ 1215 sync_initial_page_table(); 1216 1217 tboot_probe(); 1218 1219 map_vsyscall(); 1220 1221 x86_32_probe_apic(); 1222 1223 early_quirks(); 1224 1225 topology_apply_cmdline_limits_early(); 1226 1227 /* 1228 * Parse SMP configuration. Try ACPI first and then the platform 1229 * specific parser. 1230 */ 1231 acpi_boot_init(); 1232 x86_init.mpparse.parse_smp_cfg(); 1233 1234 /* Last opportunity to detect and map the local APIC */ 1235 init_apic_mappings(); 1236 1237 topology_init_possible_cpus(); 1238 1239 init_cpu_to_node(); 1240 init_gi_nodes(); 1241 1242 io_apic_init_mappings(); 1243 1244 x86_init.hyper.guest_late_init(); 1245 1246 e820__reserve_resources(); 1247 e820__register_nosave_regions(max_pfn); 1248 1249 x86_init.resources.reserve_resources(); 1250 1251 e820__setup_pci_gap(); 1252 1253 #ifdef CONFIG_VT 1254 #if defined(CONFIG_VGA_CONSOLE) 1255 if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) 1256 vgacon_register_screen(&screen_info); 1257 #endif 1258 #endif 1259 x86_init.oem.banner(); 1260 1261 x86_init.timers.wallclock_init(); 1262 1263 /* 1264 * This needs to run before setup_local_APIC() which soft-disables the 1265 * local APIC temporarily and that masks the thermal LVT interrupt, 1266 * leading to softlockups on machines which have configured SMI 1267 * interrupt delivery. 1268 */ 1269 therm_lvt_init(); 1270 1271 mcheck_init(); 1272 1273 register_refined_jiffies(CLOCK_TICK_RATE); 1274 1275 #ifdef CONFIG_EFI 1276 if (efi_enabled(EFI_BOOT)) 1277 efi_apply_memmap_quirks(); 1278 #endif 1279 1280 unwind_init(); 1281 } 1282 1283 #ifdef CONFIG_X86_32 1284 1285 static struct resource video_ram_resource = { 1286 .name = "Video RAM area", 1287 .start = 0xa0000, 1288 .end = 0xbffff, 1289 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 1290 }; 1291 1292 void __init i386_reserve_resources(void) 1293 { 1294 request_resource(&iomem_resource, &video_ram_resource); 1295 reserve_standard_io_resources(); 1296 } 1297 1298 #endif /* CONFIG_X86_32 */ 1299 1300 static struct notifier_block kernel_offset_notifier = { 1301 .notifier_call = dump_kernel_offset 1302 }; 1303 1304 static int __init register_kernel_offset_dumper(void) 1305 { 1306 atomic_notifier_chain_register(&panic_notifier_list, 1307 &kernel_offset_notifier); 1308 return 0; 1309 } 1310 __initcall(register_kernel_offset_dumper); 1311 1312 #ifdef CONFIG_HOTPLUG_CPU 1313 bool arch_cpu_is_hotpluggable(int cpu) 1314 { 1315 return cpu > 0; 1316 } 1317 #endif /* CONFIG_HOTPLUG_CPU */ 1318