1caab277bSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2c1cc1552SCatalin Marinas /* 3c1cc1552SCatalin Marinas * Based on arch/arm/mm/init.c 4c1cc1552SCatalin Marinas * 5c1cc1552SCatalin Marinas * Copyright (C) 1995-2005 Russell King 6c1cc1552SCatalin Marinas * Copyright (C) 2012 ARM Ltd. 7c1cc1552SCatalin Marinas */ 8c1cc1552SCatalin Marinas 9c1cc1552SCatalin Marinas #include <linux/kernel.h> 10c1cc1552SCatalin Marinas #include <linux/export.h> 11c1cc1552SCatalin Marinas #include <linux/errno.h> 12c1cc1552SCatalin Marinas #include <linux/swap.h> 13c1cc1552SCatalin Marinas #include <linux/init.h> 145a9e3e15SJisheng Zhang #include <linux/cache.h> 15c1cc1552SCatalin Marinas #include <linux/mman.h> 16c1cc1552SCatalin Marinas #include <linux/nodemask.h> 17c1cc1552SCatalin Marinas #include <linux/initrd.h> 18c1cc1552SCatalin Marinas #include <linux/gfp.h> 1965033574SCatalin Marinas #include <linux/math.h> 20c1cc1552SCatalin Marinas #include <linux/memblock.h> 21c1cc1552SCatalin Marinas #include <linux/sort.h> 22764b51eaSAKASHI Takahiro #include <linux/of.h> 23c1cc1552SCatalin Marinas #include <linux/of_fdt.h> 248b5369eaSNicolas Saenz Julienne #include <linux/dma-direct.h> 250b1abd1fSChristoph Hellwig #include <linux/dma-map-ops.h> 2686c8b27aSLeif Lindholm #include <linux/efi.h> 27a1e50a82SCatalin Marinas #include <linux/swiotlb.h> 28dae8c235SKefeng Wang #include <linux/vmalloc.h> 292077be67SLaura Abbott #include <linux/mm.h> 30764b51eaSAKASHI Takahiro #include <linux/kexec.h> 31e62aaeacSAKASHI Takahiro #include <linux/crash_dump.h> 32cf11e85fSRoman Gushchin #include <linux/hugetlb.h> 332b865293SArd Biesheuvel #include <linux/acpi_iort.h> 3485f58eb1SChen Wandun #include <linux/kmemleak.h> 35*0cc2dc49SMike Rapoport (IBM) #include <linux/execmem.h> 36c1cc1552SCatalin Marinas 37a7f8de16SArd Biesheuvel #include <asm/boot.h> 3808375198SCatalin Marinas #include <asm/fixmap.h> 39f9040773SArd Biesheuvel #include <asm/kasan.h> 40a7f8de16SArd Biesheuvel #include <asm/kernel-pgtable.h> 41f320bc74SQuentin Perret #include <asm/kvm_host.h> 42aa03c428SMark Rutland #include <asm/memory.h> 431a2db300SGanapatrao Kulkarni #include <asm/numa.h> 44c1cc1552SCatalin Marinas #include <asm/sections.h> 45c1cc1552SCatalin Marinas #include <asm/setup.h> 4687dfb311SMasahiro Yamada #include <linux/sizes.h> 47c1cc1552SCatalin Marinas #include <asm/tlb.h> 48e039ee4eSAndre Przywara #include <asm/alternative.h> 49687842ecSChristoph Hellwig #include <asm/xen/swiotlb-xen.h> 50c1cc1552SCatalin Marinas 51a7f8de16SArd Biesheuvel /* 52a7f8de16SArd Biesheuvel * We need to be able to catch inadvertent references to memstart_addr 53a7f8de16SArd Biesheuvel * that occur (potentially in generic code) before arm64_memblock_init() 54a7f8de16SArd Biesheuvel * executes, which assigns it its actual value. So use a default value 55a7f8de16SArd Biesheuvel * that cannot be mistaken for a real physical address. 56a7f8de16SArd Biesheuvel */ 575a9e3e15SJisheng Zhang s64 memstart_addr __ro_after_init = -1; 5803ef055fSMark Rutland EXPORT_SYMBOL(memstart_addr); 5903ef055fSMark Rutland 601a8e1cefSNicolas Saenz Julienne /* 61d78050eeSCatalin Marinas * If the corresponding config options are enabled, we create both ZONE_DMA 62d78050eeSCatalin Marinas * and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory 63d78050eeSCatalin Marinas * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4). 64d78050eeSCatalin Marinas * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, 65d78050eeSCatalin Marinas * otherwise it is empty. 661a8e1cefSNicolas Saenz Julienne */ 6703149563SVijay Balakrishna phys_addr_t __ro_after_init arm64_dma_phys_limit; 68c1cc1552SCatalin Marinas 694e0bacd6SZhang Jianhua /* 704e0bacd6SZhang Jianhua * To make optimal use of block mappings when laying out the linear 714e0bacd6SZhang Jianhua * mapping, round down the base of physical memory to a size that can 724e0bacd6SZhang Jianhua * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE 734e0bacd6SZhang Jianhua * (64k granule), or a multiple that can be mapped using contiguous bits 744e0bacd6SZhang Jianhua * in the page tables: 32 * PMD_SIZE (16k granule) 754e0bacd6SZhang Jianhua */ 764e0bacd6SZhang Jianhua #if defined(CONFIG_ARM64_4K_PAGES) 774e0bacd6SZhang Jianhua #define ARM64_MEMSTART_SHIFT PUD_SHIFT 784e0bacd6SZhang Jianhua #elif defined(CONFIG_ARM64_16K_PAGES) 794e0bacd6SZhang Jianhua #define ARM64_MEMSTART_SHIFT CONT_PMD_SHIFT 804e0bacd6SZhang Jianhua #else 814e0bacd6SZhang Jianhua #define ARM64_MEMSTART_SHIFT PMD_SHIFT 824e0bacd6SZhang Jianhua #endif 834e0bacd6SZhang Jianhua 844e0bacd6SZhang Jianhua /* 854e0bacd6SZhang Jianhua * sparsemem vmemmap imposes an additional requirement on the alignment of 864e0bacd6SZhang Jianhua * memstart_addr, due to the fact that the base of the vmemmap region 874e0bacd6SZhang Jianhua * has a direct correspondence, and needs to appear sufficiently aligned 884e0bacd6SZhang Jianhua * in the virtual address space. 894e0bacd6SZhang Jianhua */ 904e0bacd6SZhang Jianhua #if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS 914e0bacd6SZhang Jianhua #define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS) 924e0bacd6SZhang Jianhua #else 934e0bacd6SZhang Jianhua #define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT) 944e0bacd6SZhang Jianhua #endif 954e0bacd6SZhang Jianhua 96fdc26823SBaoquan He static void __init arch_reserve_crashkernel(void) 97944a45abSChen Zhou { 98fdc26823SBaoquan He unsigned long long low_size = 0; 996c4dcaddSBaoquan He unsigned long long crash_base, crash_size; 100944a45abSChen Zhou char *cmdline = boot_command_line; 1016c4dcaddSBaoquan He bool high = false; 1026c4dcaddSBaoquan He int ret; 103764b51eaSAKASHI Takahiro 10440254101SBaoquan He if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) 105d339f158SJisheng Zhang return; 106d339f158SJisheng Zhang 107944a45abSChen Zhou ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), 108fdc26823SBaoquan He &crash_size, &crash_base, 109fdc26823SBaoquan He &low_size, &high); 110fdc26823SBaoquan He if (ret) 111764b51eaSAKASHI Takahiro return; 112764b51eaSAKASHI Takahiro 113fdc26823SBaoquan He reserve_crashkernel_generic(cmdline, crash_size, crash_base, 114fdc26823SBaoquan He low_size, high); 115764b51eaSAKASHI Takahiro } 116764b51eaSAKASHI Takahiro 117d50314a6SCatalin Marinas /* 118791ab8b2SCatalin Marinas * Return the maximum physical address for a zone accessible by the given bits 119791ab8b2SCatalin Marinas * limit. If DRAM starts above 32-bit, expand the zone to the maximum 120791ab8b2SCatalin Marinas * available memory, otherwise cap it at 32-bit. 121d50314a6SCatalin Marinas */ 1221a8e1cefSNicolas Saenz Julienne static phys_addr_t __init max_zone_phys(unsigned int zone_bits) 123d50314a6SCatalin Marinas { 124791ab8b2SCatalin Marinas phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits); 125791ab8b2SCatalin Marinas phys_addr_t phys_start = memblock_start_of_DRAM(); 126791ab8b2SCatalin Marinas 127791ab8b2SCatalin Marinas if (phys_start > U32_MAX) 128791ab8b2SCatalin Marinas zone_mask = PHYS_ADDR_MAX; 129791ab8b2SCatalin Marinas else if (phys_start > zone_mask) 130791ab8b2SCatalin Marinas zone_mask = U32_MAX; 131791ab8b2SCatalin Marinas 132791ab8b2SCatalin Marinas return min(zone_mask, memblock_end_of_DRAM() - 1) + 1; 133d50314a6SCatalin Marinas } 134d50314a6SCatalin Marinas 135f41ef4c2SKefeng Wang static void __init zone_sizes_init(void) 1361a2db300SGanapatrao Kulkarni { 1371a2db300SGanapatrao Kulkarni unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; 1382b865293SArd Biesheuvel unsigned int __maybe_unused acpi_zone_dma_bits; 1398424ecddSNicolas Saenz Julienne unsigned int __maybe_unused dt_zone_dma_bits; 140d78050eeSCatalin Marinas phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32); 1411a2db300SGanapatrao Kulkarni 1421a8e1cefSNicolas Saenz Julienne #ifdef CONFIG_ZONE_DMA 1432b865293SArd Biesheuvel acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address()); 1448424ecddSNicolas Saenz Julienne dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL)); 1452b865293SArd Biesheuvel zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits); 1469804f8c6SNicolas Saenz Julienne arm64_dma_phys_limit = max_zone_phys(zone_dma_bits); 1471a8e1cefSNicolas Saenz Julienne max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); 1481a8e1cefSNicolas Saenz Julienne #endif 1490c1f14edSMiles Chen #ifdef CONFIG_ZONE_DMA32 150d78050eeSCatalin Marinas max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit); 151d78050eeSCatalin Marinas if (!arm64_dma_phys_limit) 152d78050eeSCatalin Marinas arm64_dma_phys_limit = dma32_phys_limit; 1530c1f14edSMiles Chen #endif 154504cae45SBaoquan He if (!arm64_dma_phys_limit) 155504cae45SBaoquan He arm64_dma_phys_limit = PHYS_MASK + 1; 156f41ef4c2SKefeng Wang max_zone_pfns[ZONE_NORMAL] = max_pfn; 1571a2db300SGanapatrao Kulkarni 1589691a071SMike Rapoport free_area_init(max_zone_pfns); 1591a2db300SGanapatrao Kulkarni } 1601a2db300SGanapatrao Kulkarni 161873ba463SMike Rapoport int pfn_is_map_memory(unsigned long pfn) 162c1cc1552SCatalin Marinas { 163093bbe21SAnshuman Khandual phys_addr_t addr = PFN_PHYS(pfn); 1645ad356eaSGreg Hackmann 165873ba463SMike Rapoport /* avoid false positives for bogus PFNs, see comment in pfn_valid() */ 166093bbe21SAnshuman Khandual if (PHYS_PFN(addr) != pfn) 1675ad356eaSGreg Hackmann return 0; 1684ab21506SRobin Murphy 1695ad356eaSGreg Hackmann return memblock_is_map_memory(addr); 170c1cc1552SCatalin Marinas } 171873ba463SMike Rapoport EXPORT_SYMBOL(pfn_is_map_memory); 172c1cc1552SCatalin Marinas 173bb425a75SPeng Fan static phys_addr_t memory_limit __ro_after_init = PHYS_ADDR_MAX; 1746083fe74SMark Rutland 1756083fe74SMark Rutland /* 1766083fe74SMark Rutland * Limit the memory size that was specified via FDT. 1776083fe74SMark Rutland */ 1786083fe74SMark Rutland static int __init early_mem(char *p) 1796083fe74SMark Rutland { 1806083fe74SMark Rutland if (!p) 1816083fe74SMark Rutland return 1; 1826083fe74SMark Rutland 1836083fe74SMark Rutland memory_limit = memparse(p, &p) & PAGE_MASK; 1846083fe74SMark Rutland pr_notice("Memory limited to %lldMB\n", memory_limit >> 20); 1856083fe74SMark Rutland 1866083fe74SMark Rutland return 0; 1876083fe74SMark Rutland } 1886083fe74SMark Rutland early_param("mem", early_mem); 1896083fe74SMark Rutland 190c1cc1552SCatalin Marinas void __init arm64_memblock_init(void) 191c1cc1552SCatalin Marinas { 19288053ec8SArd Biesheuvel s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual); 19388053ec8SArd Biesheuvel 19488053ec8SArd Biesheuvel /* 19588053ec8SArd Biesheuvel * Corner case: 52-bit VA capable systems running KVM in nVHE mode may 19688053ec8SArd Biesheuvel * be limited in their ability to support a linear map that exceeds 51 19788053ec8SArd Biesheuvel * bits of VA space, depending on the placement of the ID map. Given 19888053ec8SArd Biesheuvel * that the placement of the ID map may be randomized, let's simply 19988053ec8SArd Biesheuvel * limit the kernel's linear map to 51 bits as well if we detect this 20088053ec8SArd Biesheuvel * configuration. 20188053ec8SArd Biesheuvel */ 20288053ec8SArd Biesheuvel if (IS_ENABLED(CONFIG_KVM) && vabits_actual == 52 && 20388053ec8SArd Biesheuvel is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { 20488053ec8SArd Biesheuvel pr_info("Capping linear region to 51 bits for KVM in nVHE mode on LVA capable hardware.\n"); 20588053ec8SArd Biesheuvel linear_region_size = min_t(u64, linear_region_size, BIT(51)); 20688053ec8SArd Biesheuvel } 207a7f8de16SArd Biesheuvel 208e9eaa805SKristina Martsenko /* Remove memory above our supported physical address size */ 209e9eaa805SKristina Martsenko memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); 210e9eaa805SKristina Martsenko 211a7f8de16SArd Biesheuvel /* 212a7f8de16SArd Biesheuvel * Select a suitable value for the base of physical memory. 213a7f8de16SArd Biesheuvel */ 214a7f8de16SArd Biesheuvel memstart_addr = round_down(memblock_start_of_DRAM(), 215a7f8de16SArd Biesheuvel ARM64_MEMSTART_ALIGN); 216a7f8de16SArd Biesheuvel 21731f80a4eSMarc Zyngier if ((memblock_end_of_DRAM() - memstart_addr) > linear_region_size) 21831f80a4eSMarc Zyngier pr_warn("Memory doesn't fit in the linear mapping, VA_BITS too small\n"); 21931f80a4eSMarc Zyngier 220a7f8de16SArd Biesheuvel /* 221a7f8de16SArd Biesheuvel * Remove the memory that we will not be able to cover with the 222a7f8de16SArd Biesheuvel * linear mapping. Take care not to clip the kernel which may be 223a7f8de16SArd Biesheuvel * high in memory. 224a7f8de16SArd Biesheuvel */ 2252077be67SLaura Abbott memblock_remove(max_t(u64, memstart_addr + linear_region_size, 2262077be67SLaura Abbott __pa_symbol(_end)), ULLONG_MAX); 2272958987fSArd Biesheuvel if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) { 2282958987fSArd Biesheuvel /* ensure that memstart_addr remains sufficiently aligned */ 2292958987fSArd Biesheuvel memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size, 2302958987fSArd Biesheuvel ARM64_MEMSTART_ALIGN); 2312958987fSArd Biesheuvel memblock_remove(0, memstart_addr); 2322958987fSArd Biesheuvel } 233a7f8de16SArd Biesheuvel 234a7f8de16SArd Biesheuvel /* 2357bc1a0f9SArd Biesheuvel * If we are running with a 52-bit kernel VA config on a system that 2367bc1a0f9SArd Biesheuvel * does not support it, we have to place the available physical 2377bc1a0f9SArd Biesheuvel * memory in the 48-bit addressable part of the linear region, i.e., 2387bc1a0f9SArd Biesheuvel * we have to move it upward. Since memstart_addr represents the 2397bc1a0f9SArd Biesheuvel * physical address of PAGE_OFFSET, we have to *subtract* from it. 2407bc1a0f9SArd Biesheuvel */ 2417bc1a0f9SArd Biesheuvel if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52)) 2429684ec18SArd Biesheuvel memstart_addr -= _PAGE_OFFSET(vabits_actual) - _PAGE_OFFSET(52); 2437bc1a0f9SArd Biesheuvel 2447bc1a0f9SArd Biesheuvel /* 245a7f8de16SArd Biesheuvel * Apply the memory limit if it was set. Since the kernel may be loaded 246a7f8de16SArd Biesheuvel * high up in memory, add back the kernel region that must be accessible 247a7f8de16SArd Biesheuvel * via the linear mapping. 248a7f8de16SArd Biesheuvel */ 249d7dc899aSStefan Agner if (memory_limit != PHYS_ADDR_MAX) { 250cb0a6502SDennis Chen memblock_mem_limit_remove_map(memory_limit); 2512077be67SLaura Abbott memblock_add(__pa_symbol(_text), (u64)(_end - _text)); 252a7f8de16SArd Biesheuvel } 2536083fe74SMark Rutland 254c756c592SFlorian Fainelli if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) { 255177e15f0SArd Biesheuvel /* 256177e15f0SArd Biesheuvel * Add back the memory we just removed if it results in the 257177e15f0SArd Biesheuvel * initrd to become inaccessible via the linear mapping. 258177e15f0SArd Biesheuvel * Otherwise, this is a no-op 259177e15f0SArd Biesheuvel */ 260c756c592SFlorian Fainelli u64 base = phys_initrd_start & PAGE_MASK; 261d4d18e3eSBjorn Andersson u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base; 262177e15f0SArd Biesheuvel 263177e15f0SArd Biesheuvel /* 264177e15f0SArd Biesheuvel * We can only add back the initrd memory if we don't end up 265177e15f0SArd Biesheuvel * with more memory than we can address via the linear mapping. 266177e15f0SArd Biesheuvel * It is up to the bootloader to position the kernel and the 267177e15f0SArd Biesheuvel * initrd reasonably close to each other (i.e., within 32 GB of 268177e15f0SArd Biesheuvel * each other) so that all granule/#levels combinations can 269177e15f0SArd Biesheuvel * always access both. 270177e15f0SArd Biesheuvel */ 271177e15f0SArd Biesheuvel if (WARN(base < memblock_start_of_DRAM() || 272177e15f0SArd Biesheuvel base + size > memblock_start_of_DRAM() + 273177e15f0SArd Biesheuvel linear_region_size, 274177e15f0SArd Biesheuvel "initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) { 27570b3d237SWill Deacon phys_initrd_size = 0; 276177e15f0SArd Biesheuvel } else { 277177e15f0SArd Biesheuvel memblock_add(base, size); 278c0b978feSMa Wupeng memblock_clear_nomap(base, size); 279177e15f0SArd Biesheuvel memblock_reserve(base, size); 280177e15f0SArd Biesheuvel } 281177e15f0SArd Biesheuvel } 282177e15f0SArd Biesheuvel 283c031a421SArd Biesheuvel if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { 284c031a421SArd Biesheuvel extern u16 memstart_offset_seed; 28597d6786eSArd Biesheuvel u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); 28697d6786eSArd Biesheuvel int parange = cpuid_feature_extract_unsigned_field( 2872d987e64SMark Brown mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); 28897d6786eSArd Biesheuvel s64 range = linear_region_size - 28997d6786eSArd Biesheuvel BIT(id_aa64mmfr0_parange_to_phys_shift(parange)); 290c031a421SArd Biesheuvel 291c031a421SArd Biesheuvel /* 292c031a421SArd Biesheuvel * If the size of the linear region exceeds, by a sufficient 29397d6786eSArd Biesheuvel * margin, the size of the region that the physical memory can 29497d6786eSArd Biesheuvel * span, randomize the linear region as well. 295c031a421SArd Biesheuvel */ 29697d6786eSArd Biesheuvel if (memstart_offset_seed > 0 && range >= (s64)ARM64_MEMSTART_ALIGN) { 297c8a43c18SYueyi Li range /= ARM64_MEMSTART_ALIGN; 298c031a421SArd Biesheuvel memstart_addr -= ARM64_MEMSTART_ALIGN * 299c031a421SArd Biesheuvel ((range * memstart_offset_seed) >> 16); 300c031a421SArd Biesheuvel } 301c031a421SArd Biesheuvel } 3022d5a5612SCatalin Marinas 303bd00cd5fSMark Rutland /* 304bd00cd5fSMark Rutland * Register the kernel text, kernel data, initrd, and initial 305bd00cd5fSMark Rutland * pagetables with memblock. 306bd00cd5fSMark Rutland */ 307e2a073ddSArd Biesheuvel memblock_reserve(__pa_symbol(_stext), _end - _stext); 308c756c592SFlorian Fainelli if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) { 309a89dea58SArd Biesheuvel /* the generic initrd code expects virtual addresses */ 310c756c592SFlorian Fainelli initrd_start = __phys_to_virt(phys_initrd_start); 311c756c592SFlorian Fainelli initrd_end = initrd_start + phys_initrd_size; 312a89dea58SArd Biesheuvel } 313c1cc1552SCatalin Marinas 3149bf14b7cSMarek Szyprowski early_init_fdt_scan_reserved_mem(); 3152d5a5612SCatalin Marinas 316f24e5834SSteve Capper high_memory = __va(memblock_end_of_DRAM() - 1) + 1; 317c1cc1552SCatalin Marinas } 318c1cc1552SCatalin Marinas 319c1cc1552SCatalin Marinas void __init bootmem_init(void) 320c1cc1552SCatalin Marinas { 321c1cc1552SCatalin Marinas unsigned long min, max; 322c1cc1552SCatalin Marinas 323c1cc1552SCatalin Marinas min = PFN_UP(memblock_start_of_DRAM()); 324c1cc1552SCatalin Marinas max = PFN_DOWN(memblock_end_of_DRAM()); 325c1cc1552SCatalin Marinas 32636dd9086SVladimir Murzin early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT); 32736dd9086SVladimir Murzin 3281a2db300SGanapatrao Kulkarni max_pfn = max_low_pfn = max; 32919d6242eSMiles Chen min_low_pfn = min; 3301a2db300SGanapatrao Kulkarni 331eb75541fSAtish Patra arch_numa_init(); 332618e0786SBarry Song 333618e0786SBarry Song /* 334eb75541fSAtish Patra * must be done after arch_numa_init() which calls numa_init() to 335618e0786SBarry Song * initialize node_online_map that gets used in hugetlb_cma_reserve() 336618e0786SBarry Song * while allocating required CMA size across online nodes. 337618e0786SBarry Song */ 338abb7962aSAnshuman Khandual #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) 339abb7962aSAnshuman Khandual arm64_hugetlb_cma_reserve(); 340618e0786SBarry Song #endif 341618e0786SBarry Song 342f320bc74SQuentin Perret kvm_hyp_reserve(); 343f320bc74SQuentin Perret 344c1cc1552SCatalin Marinas /* 345c89ab04fSMike Rapoport * sparse_init() tries to allocate memory from memblock, so must be 346c89ab04fSMike Rapoport * done after the fixed reservations 347c1cc1552SCatalin Marinas */ 348c1cc1552SCatalin Marinas sparse_init(); 349f41ef4c2SKefeng Wang zone_sizes_init(); 350c1cc1552SCatalin Marinas 3510a30c535SNicolas Saenz Julienne /* 352d78050eeSCatalin Marinas * Reserve the CMA area after arm64_dma_phys_limit was initialised. 353d78050eeSCatalin Marinas */ 354d78050eeSCatalin Marinas dma_contiguous_reserve(arm64_dma_phys_limit); 355d78050eeSCatalin Marinas 356d78050eeSCatalin Marinas /* 3570a30c535SNicolas Saenz Julienne * request_standard_resources() depends on crashkernel's memory being 3580a30c535SNicolas Saenz Julienne * reserved, so do it here. 3590a30c535SNicolas Saenz Julienne */ 360fdc26823SBaoquan He arch_reserve_crashkernel(); 3610a30c535SNicolas Saenz Julienne 3621a2db300SGanapatrao Kulkarni memblock_dump_all(); 363c1cc1552SCatalin Marinas } 364c1cc1552SCatalin Marinas 365c1cc1552SCatalin Marinas /* 366c1cc1552SCatalin Marinas * mem_init() marks the free areas in the mem_map and tells us how much memory 367c1cc1552SCatalin Marinas * is free. This is done after various parts of the system have claimed their 368c1cc1552SCatalin Marinas * memory after the kernel image. 369c1cc1552SCatalin Marinas */ 370c1cc1552SCatalin Marinas void __init mem_init(void) 371c1cc1552SCatalin Marinas { 3721c1a429eSCatalin Marinas bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit); 3731c1a429eSCatalin Marinas 37465033574SCatalin Marinas if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb) { 37565033574SCatalin Marinas /* 37665033574SCatalin Marinas * If no bouncing needed for ZONE_DMA, reduce the swiotlb 37765033574SCatalin Marinas * buffer for kmalloc() bouncing to 1MB per 1GB of RAM. 37865033574SCatalin Marinas */ 37965033574SCatalin Marinas unsigned long size = 38065033574SCatalin Marinas DIV_ROUND_UP(memblock_phys_mem_size(), 1024); 38165033574SCatalin Marinas swiotlb_adjust_size(min(swiotlb_size_or_default(), size)); 3821c1a429eSCatalin Marinas swiotlb = true; 38365033574SCatalin Marinas } 3841c1a429eSCatalin Marinas 3851c1a429eSCatalin Marinas swiotlb_init(swiotlb, SWIOTLB_VERBOSE); 386a1e50a82SCatalin Marinas 387bee4ebd1SJiang Liu /* this will put all unused low memory onto the freelists */ 388c6ffc5caSMike Rapoport memblock_free_all(); 389c1cc1552SCatalin Marinas 390c1cc1552SCatalin Marinas /* 391c1cc1552SCatalin Marinas * Check boundaries twice: Some fundamental inconsistencies can be 392c1cc1552SCatalin Marinas * detected at build time already. 393c1cc1552SCatalin Marinas */ 394c1cc1552SCatalin Marinas #ifdef CONFIG_COMPAT 395363524d2SSteve Capper BUILD_BUG_ON(TASK_SIZE_32 > DEFAULT_MAP_WINDOW_64); 396c1cc1552SCatalin Marinas #endif 397c1cc1552SCatalin Marinas 3987e04cc91SAnshuman Khandual /* 3997e04cc91SAnshuman Khandual * Selected page table levels should match when derived from 4007e04cc91SAnshuman Khandual * scratch using the virtual address range and page size. 4017e04cc91SAnshuman Khandual */ 4027e04cc91SAnshuman Khandual BUILD_BUG_ON(ARM64_HW_PGTABLE_LEVELS(CONFIG_ARM64_VA_BITS) != 4037e04cc91SAnshuman Khandual CONFIG_PGTABLE_LEVELS); 4047e04cc91SAnshuman Khandual 405bee4ebd1SJiang Liu if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { 406c1cc1552SCatalin Marinas extern int sysctl_overcommit_memory; 407c1cc1552SCatalin Marinas /* 408c1cc1552SCatalin Marinas * On a machine this small we won't get anywhere without 409c1cc1552SCatalin Marinas * overcommit, so turn it on by default. 410c1cc1552SCatalin Marinas */ 411c1cc1552SCatalin Marinas sysctl_overcommit_memory = OVERCOMMIT_ALWAYS; 412c1cc1552SCatalin Marinas } 413c1cc1552SCatalin Marinas } 414c1cc1552SCatalin Marinas 415c1cc1552SCatalin Marinas void free_initmem(void) 416c1cc1552SCatalin Marinas { 4172077be67SLaura Abbott free_reserved_area(lm_alias(__init_begin), 4182077be67SLaura Abbott lm_alias(__init_end), 4196ec939f8SAnshuman Khandual POISON_FREE_INITMEM, "unused kernel"); 420dae8c235SKefeng Wang /* 421dae8c235SKefeng Wang * Unmap the __init region but leave the VM area in place. This 422dae8c235SKefeng Wang * prevents the region from being reused for kernel modules, which 423dae8c235SKefeng Wang * is not supported by kallsyms. 424dae8c235SKefeng Wang */ 4254ad0ae8cSNicholas Piggin vunmap_range((u64)__init_begin, (u64)__init_end); 426c1cc1552SCatalin Marinas } 427c1cc1552SCatalin Marinas 428638d5031SAnshuman Khandual void dump_mem_limit(void) 429a7f8de16SArd Biesheuvel { 430d7dc899aSStefan Agner if (memory_limit != PHYS_ADDR_MAX) { 431a7f8de16SArd Biesheuvel pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20); 432a7f8de16SArd Biesheuvel } else { 433a7f8de16SArd Biesheuvel pr_emerg("Memory Limit: none\n"); 434a7f8de16SArd Biesheuvel } 435a7f8de16SArd Biesheuvel } 436*0cc2dc49SMike Rapoport (IBM) 437*0cc2dc49SMike Rapoport (IBM) #ifdef CONFIG_EXECMEM 438*0cc2dc49SMike Rapoport (IBM) static u64 module_direct_base __ro_after_init = 0; 439*0cc2dc49SMike Rapoport (IBM) static u64 module_plt_base __ro_after_init = 0; 440*0cc2dc49SMike Rapoport (IBM) 441*0cc2dc49SMike Rapoport (IBM) /* 442*0cc2dc49SMike Rapoport (IBM) * Choose a random page-aligned base address for a window of 'size' bytes which 443*0cc2dc49SMike Rapoport (IBM) * entirely contains the interval [start, end - 1]. 444*0cc2dc49SMike Rapoport (IBM) */ 445*0cc2dc49SMike Rapoport (IBM) static u64 __init random_bounding_box(u64 size, u64 start, u64 end) 446*0cc2dc49SMike Rapoport (IBM) { 447*0cc2dc49SMike Rapoport (IBM) u64 max_pgoff, pgoff; 448*0cc2dc49SMike Rapoport (IBM) 449*0cc2dc49SMike Rapoport (IBM) if ((end - start) >= size) 450*0cc2dc49SMike Rapoport (IBM) return 0; 451*0cc2dc49SMike Rapoport (IBM) 452*0cc2dc49SMike Rapoport (IBM) max_pgoff = (size - (end - start)) / PAGE_SIZE; 453*0cc2dc49SMike Rapoport (IBM) pgoff = get_random_u32_inclusive(0, max_pgoff); 454*0cc2dc49SMike Rapoport (IBM) 455*0cc2dc49SMike Rapoport (IBM) return start - pgoff * PAGE_SIZE; 456*0cc2dc49SMike Rapoport (IBM) } 457*0cc2dc49SMike Rapoport (IBM) 458*0cc2dc49SMike Rapoport (IBM) /* 459*0cc2dc49SMike Rapoport (IBM) * Modules may directly reference data and text anywhere within the kernel 460*0cc2dc49SMike Rapoport (IBM) * image and other modules. References using PREL32 relocations have a +/-2G 461*0cc2dc49SMike Rapoport (IBM) * range, and so we need to ensure that the entire kernel image and all modules 462*0cc2dc49SMike Rapoport (IBM) * fall within a 2G window such that these are always within range. 463*0cc2dc49SMike Rapoport (IBM) * 464*0cc2dc49SMike Rapoport (IBM) * Modules may directly branch to functions and code within the kernel text, 465*0cc2dc49SMike Rapoport (IBM) * and to functions and code within other modules. These branches will use 466*0cc2dc49SMike Rapoport (IBM) * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure 467*0cc2dc49SMike Rapoport (IBM) * that the entire kernel text and all module text falls within a 128M window 468*0cc2dc49SMike Rapoport (IBM) * such that these are always within range. With PLTs, we can expand this to a 469*0cc2dc49SMike Rapoport (IBM) * 2G window. 470*0cc2dc49SMike Rapoport (IBM) * 471*0cc2dc49SMike Rapoport (IBM) * We chose the 128M region to surround the entire kernel image (rather than 472*0cc2dc49SMike Rapoport (IBM) * just the text) as using the same bounds for the 128M and 2G regions ensures 473*0cc2dc49SMike Rapoport (IBM) * by construction that we never select a 128M region that is not a subset of 474*0cc2dc49SMike Rapoport (IBM) * the 2G region. For very large and unusual kernel configurations this means 475*0cc2dc49SMike Rapoport (IBM) * we may fall back to PLTs where they could have been avoided, but this keeps 476*0cc2dc49SMike Rapoport (IBM) * the logic significantly simpler. 477*0cc2dc49SMike Rapoport (IBM) */ 478*0cc2dc49SMike Rapoport (IBM) static int __init module_init_limits(void) 479*0cc2dc49SMike Rapoport (IBM) { 480*0cc2dc49SMike Rapoport (IBM) u64 kernel_end = (u64)_end; 481*0cc2dc49SMike Rapoport (IBM) u64 kernel_start = (u64)_text; 482*0cc2dc49SMike Rapoport (IBM) u64 kernel_size = kernel_end - kernel_start; 483*0cc2dc49SMike Rapoport (IBM) 484*0cc2dc49SMike Rapoport (IBM) /* 485*0cc2dc49SMike Rapoport (IBM) * The default modules region is placed immediately below the kernel 486*0cc2dc49SMike Rapoport (IBM) * image, and is large enough to use the full 2G relocation range. 487*0cc2dc49SMike Rapoport (IBM) */ 488*0cc2dc49SMike Rapoport (IBM) BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END); 489*0cc2dc49SMike Rapoport (IBM) BUILD_BUG_ON(MODULES_VSIZE < SZ_2G); 490*0cc2dc49SMike Rapoport (IBM) 491*0cc2dc49SMike Rapoport (IBM) if (!kaslr_enabled()) { 492*0cc2dc49SMike Rapoport (IBM) if (kernel_size < SZ_128M) 493*0cc2dc49SMike Rapoport (IBM) module_direct_base = kernel_end - SZ_128M; 494*0cc2dc49SMike Rapoport (IBM) if (kernel_size < SZ_2G) 495*0cc2dc49SMike Rapoport (IBM) module_plt_base = kernel_end - SZ_2G; 496*0cc2dc49SMike Rapoport (IBM) } else { 497*0cc2dc49SMike Rapoport (IBM) u64 min = kernel_start; 498*0cc2dc49SMike Rapoport (IBM) u64 max = kernel_end; 499*0cc2dc49SMike Rapoport (IBM) 500*0cc2dc49SMike Rapoport (IBM) if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { 501*0cc2dc49SMike Rapoport (IBM) pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n"); 502*0cc2dc49SMike Rapoport (IBM) } else { 503*0cc2dc49SMike Rapoport (IBM) module_direct_base = random_bounding_box(SZ_128M, min, max); 504*0cc2dc49SMike Rapoport (IBM) if (module_direct_base) { 505*0cc2dc49SMike Rapoport (IBM) min = module_direct_base; 506*0cc2dc49SMike Rapoport (IBM) max = module_direct_base + SZ_128M; 507*0cc2dc49SMike Rapoport (IBM) } 508*0cc2dc49SMike Rapoport (IBM) } 509*0cc2dc49SMike Rapoport (IBM) 510*0cc2dc49SMike Rapoport (IBM) module_plt_base = random_bounding_box(SZ_2G, min, max); 511*0cc2dc49SMike Rapoport (IBM) } 512*0cc2dc49SMike Rapoport (IBM) 513*0cc2dc49SMike Rapoport (IBM) pr_info("%llu pages in range for non-PLT usage", 514*0cc2dc49SMike Rapoport (IBM) module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0); 515*0cc2dc49SMike Rapoport (IBM) pr_info("%llu pages in range for PLT usage", 516*0cc2dc49SMike Rapoport (IBM) module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0); 517*0cc2dc49SMike Rapoport (IBM) 518*0cc2dc49SMike Rapoport (IBM) return 0; 519*0cc2dc49SMike Rapoport (IBM) } 520*0cc2dc49SMike Rapoport (IBM) 521*0cc2dc49SMike Rapoport (IBM) static struct execmem_info execmem_info __ro_after_init; 522*0cc2dc49SMike Rapoport (IBM) 523*0cc2dc49SMike Rapoport (IBM) struct execmem_info __init *execmem_arch_setup(void) 524*0cc2dc49SMike Rapoport (IBM) { 525*0cc2dc49SMike Rapoport (IBM) unsigned long fallback_start = 0, fallback_end = 0; 526*0cc2dc49SMike Rapoport (IBM) unsigned long start = 0, end = 0; 527*0cc2dc49SMike Rapoport (IBM) 528*0cc2dc49SMike Rapoport (IBM) module_init_limits(); 529*0cc2dc49SMike Rapoport (IBM) 530*0cc2dc49SMike Rapoport (IBM) /* 531*0cc2dc49SMike Rapoport (IBM) * Where possible, prefer to allocate within direct branch range of the 532*0cc2dc49SMike Rapoport (IBM) * kernel such that no PLTs are necessary. 533*0cc2dc49SMike Rapoport (IBM) */ 534*0cc2dc49SMike Rapoport (IBM) if (module_direct_base) { 535*0cc2dc49SMike Rapoport (IBM) start = module_direct_base; 536*0cc2dc49SMike Rapoport (IBM) end = module_direct_base + SZ_128M; 537*0cc2dc49SMike Rapoport (IBM) 538*0cc2dc49SMike Rapoport (IBM) if (module_plt_base) { 539*0cc2dc49SMike Rapoport (IBM) fallback_start = module_plt_base; 540*0cc2dc49SMike Rapoport (IBM) fallback_end = module_plt_base + SZ_2G; 541*0cc2dc49SMike Rapoport (IBM) } 542*0cc2dc49SMike Rapoport (IBM) } else if (module_plt_base) { 543*0cc2dc49SMike Rapoport (IBM) start = module_plt_base; 544*0cc2dc49SMike Rapoport (IBM) end = module_plt_base + SZ_2G; 545*0cc2dc49SMike Rapoport (IBM) } 546*0cc2dc49SMike Rapoport (IBM) 547*0cc2dc49SMike Rapoport (IBM) execmem_info = (struct execmem_info){ 548*0cc2dc49SMike Rapoport (IBM) .ranges = { 549*0cc2dc49SMike Rapoport (IBM) [EXECMEM_DEFAULT] = { 550*0cc2dc49SMike Rapoport (IBM) .start = start, 551*0cc2dc49SMike Rapoport (IBM) .end = end, 552*0cc2dc49SMike Rapoport (IBM) .pgprot = PAGE_KERNEL, 553*0cc2dc49SMike Rapoport (IBM) .alignment = 1, 554*0cc2dc49SMike Rapoport (IBM) .fallback_start = fallback_start, 555*0cc2dc49SMike Rapoport (IBM) .fallback_end = fallback_end, 556*0cc2dc49SMike Rapoport (IBM) }, 557*0cc2dc49SMike Rapoport (IBM) [EXECMEM_KPROBES] = { 558*0cc2dc49SMike Rapoport (IBM) .start = VMALLOC_START, 559*0cc2dc49SMike Rapoport (IBM) .end = VMALLOC_END, 560*0cc2dc49SMike Rapoport (IBM) .pgprot = PAGE_KERNEL_ROX, 561*0cc2dc49SMike Rapoport (IBM) .alignment = 1, 562*0cc2dc49SMike Rapoport (IBM) }, 563*0cc2dc49SMike Rapoport (IBM) [EXECMEM_BPF] = { 564*0cc2dc49SMike Rapoport (IBM) .start = VMALLOC_START, 565*0cc2dc49SMike Rapoport (IBM) .end = VMALLOC_END, 566*0cc2dc49SMike Rapoport (IBM) .pgprot = PAGE_KERNEL, 567*0cc2dc49SMike Rapoport (IBM) .alignment = 1, 568*0cc2dc49SMike Rapoport (IBM) }, 569*0cc2dc49SMike Rapoport (IBM) }, 570*0cc2dc49SMike Rapoport (IBM) }; 571*0cc2dc49SMike Rapoport (IBM) 572*0cc2dc49SMike Rapoport (IBM) return &execmem_info; 573*0cc2dc49SMike Rapoport (IBM) } 574*0cc2dc49SMike Rapoport (IBM) #endif /* CONFIG_EXECMEM */ 575