1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 Regents of the University of California 4 * Copyright (C) 2019 Western Digital Corporation or its affiliates. 5 * Copyright (C) 2020 FORTH-ICS/CARV 6 * Nick Kossifidis <mick@ics.forth.gr> 7 */ 8 9 #include <linux/init.h> 10 #include <linux/mm.h> 11 #include <linux/memblock.h> 12 #include <linux/initrd.h> 13 #include <linux/swap.h> 14 #include <linux/swiotlb.h> 15 #include <linux/sizes.h> 16 #include <linux/of_fdt.h> 17 #include <linux/of_reserved_mem.h> 18 #include <linux/libfdt.h> 19 #include <linux/set_memory.h> 20 #include <linux/dma-map-ops.h> 21 #include <linux/crash_dump.h> 22 #include <linux/hugetlb.h> 23 #include <linux/kfence.h> 24 #include <linux/execmem.h> 25 26 #include <asm/alternative.h> 27 #include <asm/fixmap.h> 28 #include <asm/io.h> 29 #include <asm/kasan.h> 30 #include <asm/module.h> 31 #include <asm/numa.h> 32 #include <asm/pgtable.h> 33 #include <asm/sections.h> 34 #include <asm/soc.h> 35 #include <asm/sparsemem.h> 36 #include <asm/tlbflush.h> 37 38 #include "../kernel/head.h" 39 40 u64 new_vmalloc[NR_CPUS / sizeof(u64) + 1]; 41 42 struct kernel_mapping kernel_map __ro_after_init; 43 EXPORT_SYMBOL(kernel_map); 44 #ifdef CONFIG_XIP_KERNEL 45 #define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map)) 46 #endif 47 48 #ifdef CONFIG_64BIT 49 u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39; 50 #else 51 u64 satp_mode __ro_after_init = SATP_MODE_32; 52 #endif 53 EXPORT_SYMBOL(satp_mode); 54 55 #ifdef CONFIG_64BIT 56 bool pgtable_l4_enabled __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL); 57 bool pgtable_l5_enabled __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL); 58 EXPORT_SYMBOL(pgtable_l4_enabled); 59 EXPORT_SYMBOL(pgtable_l5_enabled); 60 #endif 61 62 phys_addr_t phys_ram_base __ro_after_init; 63 EXPORT_SYMBOL(phys_ram_base); 64 65 #ifdef CONFIG_SPARSEMEM_VMEMMAP 66 #define VMEMMAP_ADDR_ALIGN max(1ULL << SECTION_SIZE_BITS, \ 67 MAX_FOLIO_VMEMMAP_ALIGN) 68 69 unsigned long vmemmap_start_pfn __ro_after_init; 70 EXPORT_SYMBOL(vmemmap_start_pfn); 71 #endif 72 73 extern char _start[]; 74 void *_dtb_early_va __initdata; 75 uintptr_t _dtb_early_pa __initdata; 76 77 phys_addr_t dma32_phys_limit __initdata; 78 79 void __init arch_zone_limits_init(unsigned long *max_zone_pfns) 80 { 81 #ifdef CONFIG_ZONE_DMA32 82 max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit); 83 #endif 84 max_zone_pfns[ZONE_NORMAL] = max_low_pfn; 85 } 86 87 #if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM) 88 89 #define LOG2_SZ_1K ilog2(SZ_1K) 90 #define LOG2_SZ_1M ilog2(SZ_1M) 91 #define LOG2_SZ_1G ilog2(SZ_1G) 92 #define LOG2_SZ_1T ilog2(SZ_1T) 93 94 static inline void print_mlk(char *name, unsigned long b, unsigned long t) 95 { 96 pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld kB)\n", name, b, t, 97 (((t) - (b)) >> LOG2_SZ_1K)); 98 } 99 100 static inline void print_mlm(char *name, unsigned long b, unsigned long t) 101 { 102 pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld MB)\n", name, b, t, 103 (((t) - (b)) >> LOG2_SZ_1M)); 104 } 105 106 static inline void print_mlg(char *name, unsigned long b, unsigned long t) 107 { 108 pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld GB)\n", name, b, t, 109 (((t) - (b)) >> LOG2_SZ_1G)); 110 } 111 112 #ifdef CONFIG_64BIT 113 static inline void print_mlt(char *name, unsigned long b, unsigned long t) 114 { 115 pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld TB)\n", name, b, t, 116 (((t) - (b)) >> LOG2_SZ_1T)); 117 } 118 #else 119 #define print_mlt(n, b, t) do {} while (0) 120 #endif 121 122 static inline void print_ml(char *name, unsigned long b, unsigned long t) 123 { 124 unsigned long diff = t - b; 125 126 if (IS_ENABLED(CONFIG_64BIT) && (diff >> LOG2_SZ_1T) >= 10) 127 print_mlt(name, b, t); 128 else if ((diff >> LOG2_SZ_1G) >= 10) 129 print_mlg(name, b, t); 130 else if ((diff >> LOG2_SZ_1M) >= 10) 131 print_mlm(name, b, t); 132 else 133 print_mlk(name, b, t); 134 } 135 136 static void __init print_vm_layout(void) 137 { 138 pr_notice("Virtual kernel memory layout:\n"); 139 print_ml("fixmap", (unsigned long)FIXADDR_START, 140 (unsigned long)FIXADDR_TOP); 141 print_ml("pci io", (unsigned long)PCI_IO_START, 142 (unsigned long)PCI_IO_END); 143 print_ml("vmemmap", (unsigned long)VMEMMAP_START, 144 (unsigned long)VMEMMAP_END); 145 print_ml("vmalloc", (unsigned long)VMALLOC_START, 146 (unsigned long)VMALLOC_END); 147 #ifdef CONFIG_64BIT 148 print_ml("modules", (unsigned long)MODULES_VADDR, 149 (unsigned long)MODULES_END); 150 #endif 151 print_ml("lowmem", (unsigned long)PAGE_OFFSET, 152 (unsigned long)high_memory); 153 if (IS_ENABLED(CONFIG_64BIT)) { 154 #ifdef CONFIG_KASAN 155 print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END); 156 #endif 157 158 print_ml("kernel", (unsigned long)kernel_map.virt_addr, 159 (unsigned long)ADDRESS_SPACE_END); 160 } 161 } 162 #else 163 static void print_vm_layout(void) { } 164 #endif /* CONFIG_DEBUG_VM */ 165 166 void __init arch_mm_preinit(void) 167 { 168 bool swiotlb = max_pfn > PFN_DOWN(dma32_phys_limit); 169 #ifdef CONFIG_FLATMEM 170 BUG_ON(!mem_map); 171 #endif /* CONFIG_FLATMEM */ 172 173 if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb && 174 dma_cache_alignment != 1) { 175 /* 176 * If no bouncing needed for ZONE_DMA, allocate 1MB swiotlb 177 * buffer per 1GB of RAM for kmalloc() bouncing on 178 * non-coherent platforms. 179 */ 180 unsigned long size = 181 DIV_ROUND_UP(memblock_phys_mem_size(), 1024); 182 swiotlb_adjust_size(min(swiotlb_size_or_default(), size)); 183 swiotlb = true; 184 } 185 186 swiotlb_init(swiotlb, SWIOTLB_VERBOSE); 187 188 print_vm_layout(); 189 } 190 191 /* Limit the memory size via mem. */ 192 static phys_addr_t memory_limit; 193 #ifdef CONFIG_XIP_KERNEL 194 #define memory_limit (*(phys_addr_t *)XIP_FIXUP(&memory_limit)) 195 #endif /* CONFIG_XIP_KERNEL */ 196 197 static int __init early_mem(char *p) 198 { 199 u64 size; 200 201 if (!p) 202 return 1; 203 204 size = memparse(p, &p) & PAGE_MASK; 205 memory_limit = min_t(u64, size, memory_limit); 206 207 pr_notice("Memory limited to %lldMB\n", (u64)memory_limit >> 20); 208 209 return 0; 210 } 211 early_param("mem", early_mem); 212 213 static void __init setup_bootmem(void) 214 { 215 phys_addr_t vmlinux_end = __pa_symbol(&_end); 216 phys_addr_t max_mapped_addr; 217 phys_addr_t phys_ram_end, vmlinux_start; 218 219 if (IS_ENABLED(CONFIG_XIP_KERNEL)) 220 vmlinux_start = __pa_symbol(&_sdata); 221 else 222 vmlinux_start = __pa_symbol(&_start); 223 224 memblock_enforce_memory_limit(memory_limit); 225 226 /* 227 * Make sure we align the reservation on PMD_SIZE since we will 228 * map the kernel in the linear mapping as read-only: we do not want 229 * any allocation to happen between _end and the next pmd aligned page. 230 */ 231 if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) 232 vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK; 233 /* 234 * Reserve from the start of the kernel to the end of the kernel 235 */ 236 memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); 237 238 /* 239 * Make sure we align the start of the memory on a PMD boundary so that 240 * at worst, we map the linear mapping with PMD mappings. 241 */ 242 if (!IS_ENABLED(CONFIG_XIP_KERNEL)) { 243 phys_ram_base = memblock_start_of_DRAM() & PMD_MASK; 244 #ifdef CONFIG_SPARSEMEM_VMEMMAP 245 vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT; 246 #endif 247 } 248 249 /* 250 * In 64-bit, any use of __va/__pa before this point is wrong as we 251 * did not know the start of DRAM before. 252 */ 253 if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) 254 kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; 255 256 /* 257 * The size of the linear page mapping may restrict the amount of 258 * usable RAM. 259 */ 260 if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) { 261 max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE; 262 if (memblock_end_of_DRAM() > max_mapped_addr) { 263 memblock_cap_memory_range(phys_ram_base, 264 max_mapped_addr - phys_ram_base); 265 pr_warn("Physical memory overflows the linear mapping size: region above %pa removed", 266 &max_mapped_addr); 267 } 268 } 269 270 /* 271 * Reserve physical address space that would be mapped to virtual 272 * addresses greater than (void *)(-PAGE_SIZE) because: 273 * - This memory would overlap with ERR_PTR 274 * - This memory belongs to high memory, which is not supported 275 * 276 * This is not applicable to 64-bit kernel, because virtual addresses 277 * after (void *)(-PAGE_SIZE) are not linearly mapped: they are 278 * occupied by kernel mapping. Also it is unrealistic for high memory 279 * to exist on 64-bit platforms. 280 */ 281 if (!IS_ENABLED(CONFIG_64BIT)) { 282 max_mapped_addr = __va_to_pa_nodebug(-PAGE_SIZE); 283 memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr); 284 } 285 286 phys_ram_end = memblock_end_of_DRAM(); 287 min_low_pfn = PFN_UP(phys_ram_base); 288 max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); 289 290 dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); 291 292 reserve_initrd_mem(); 293 294 /* 295 * No allocation should be done before reserving the memory as defined 296 * in the device tree, otherwise the allocation could end up in a 297 * reserved region. 298 */ 299 early_init_fdt_scan_reserved_mem(); 300 301 /* 302 * If DTB is built in, no need to reserve its memblock. 303 * Otherwise, do reserve it but avoid using 304 * early_init_fdt_reserve_self() since __pa() does 305 * not work for DTB pointers that are fixmap addresses 306 */ 307 if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) 308 memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); 309 310 dma_contiguous_reserve(dma32_phys_limit); 311 } 312 313 #ifdef CONFIG_RELOCATABLE 314 extern unsigned long __rela_dyn_start, __rela_dyn_end; 315 316 static void __init relocate_kernel(void) 317 { 318 Elf_Rela *rela = (Elf_Rela *)&__rela_dyn_start; 319 /* 320 * This holds the offset between the linked virtual address and the 321 * relocated virtual address. 322 */ 323 uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR; 324 /* 325 * This holds the offset between kernel linked virtual address and 326 * physical address. 327 */ 328 uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr; 329 330 for ( ; rela < (Elf_Rela *)&__rela_dyn_end; rela++) { 331 Elf_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); 332 Elf_Addr relocated_addr = rela->r_addend; 333 334 if (rela->r_info != R_RISCV_RELATIVE) 335 continue; 336 337 /* 338 * Make sure to not relocate vdso symbols like rt_sigreturn 339 * which are linked from the address 0 in vmlinux since 340 * vdso symbol addresses are actually used as an offset from 341 * mm->context.vdso in VDSO_OFFSET macro. 342 */ 343 if (relocated_addr >= KERNEL_LINK_ADDR) 344 relocated_addr += reloc_offset; 345 346 *(Elf_Addr *)addr = relocated_addr; 347 } 348 } 349 #endif /* CONFIG_RELOCATABLE */ 350 351 #ifdef CONFIG_MMU 352 struct pt_alloc_ops pt_ops __meminitdata; 353 354 pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; 355 pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; 356 static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; 357 358 pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); 359 360 #ifdef CONFIG_XIP_KERNEL 361 #define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops)) 362 #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) 363 #define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte)) 364 #define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir)) 365 #endif /* CONFIG_XIP_KERNEL */ 366 367 static const pgprot_t protection_map[16] = { 368 [VM_NONE] = PAGE_NONE, 369 [VM_READ] = PAGE_READ, 370 [VM_WRITE] = PAGE_SHADOWSTACK, 371 [VM_WRITE | VM_READ] = PAGE_COPY, 372 [VM_EXEC] = PAGE_EXEC, 373 [VM_EXEC | VM_READ] = PAGE_READ_EXEC, 374 [VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC, 375 [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_EXEC, 376 [VM_SHARED] = PAGE_NONE, 377 [VM_SHARED | VM_READ] = PAGE_READ, 378 [VM_SHARED | VM_WRITE] = PAGE_SHARED, 379 [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, 380 [VM_SHARED | VM_EXEC] = PAGE_EXEC, 381 [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READ_EXEC, 382 [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_EXEC, 383 [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC 384 }; 385 DECLARE_VM_GET_PAGE_PROT 386 387 void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) 388 { 389 unsigned long addr = __fix_to_virt(idx); 390 pte_t *ptep; 391 392 BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); 393 394 ptep = &fixmap_pte[pte_index(addr)]; 395 396 if (pgprot_val(prot)) 397 set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot)); 398 else 399 pte_clear(&init_mm, addr, ptep); 400 local_flush_tlb_page(addr); 401 } 402 403 static inline pte_t *__init get_pte_virt_early(phys_addr_t pa) 404 { 405 return (pte_t *)((uintptr_t)pa); 406 } 407 408 static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa) 409 { 410 clear_fixmap(FIX_PTE); 411 return (pte_t *)set_fixmap_offset(FIX_PTE, pa); 412 } 413 414 static inline pte_t *__meminit get_pte_virt_late(phys_addr_t pa) 415 { 416 return (pte_t *) __va(pa); 417 } 418 419 static inline phys_addr_t __init alloc_pte_early(uintptr_t va) 420 { 421 /* 422 * We only create PMD or PGD early mappings so we 423 * should never reach here with MMU disabled. 424 */ 425 BUG(); 426 } 427 428 static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va) 429 { 430 return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); 431 } 432 433 static phys_addr_t __meminit alloc_pte_late(uintptr_t va) 434 { 435 struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); 436 437 /* 438 * We do not know which mm the PTE page is associated to at this point. 439 * Passing NULL to the ctor is the safe option, though it may result 440 * in unnecessary work (e.g. initialising the ptlock for init_mm). 441 */ 442 BUG_ON(!ptdesc || !pagetable_pte_ctor(NULL, ptdesc)); 443 return __pa((pte_t *)ptdesc_address(ptdesc)); 444 } 445 446 static void __meminit create_pte_mapping(pte_t *ptep, uintptr_t va, phys_addr_t pa, phys_addr_t sz, 447 pgprot_t prot) 448 { 449 uintptr_t pte_idx = pte_index(va); 450 451 BUG_ON(sz != PAGE_SIZE); 452 453 if (pte_none(ptep[pte_idx])) 454 ptep[pte_idx] = pfn_pte(PFN_DOWN(pa), prot); 455 } 456 457 #ifndef __PAGETABLE_PMD_FOLDED 458 459 static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; 460 static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; 461 static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); 462 463 #ifdef CONFIG_XIP_KERNEL 464 #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd)) 465 #define fixmap_pmd ((pmd_t *)XIP_FIXUP(fixmap_pmd)) 466 #define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd)) 467 #endif /* CONFIG_XIP_KERNEL */ 468 469 static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss; 470 static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss; 471 static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); 472 473 #ifdef CONFIG_XIP_KERNEL 474 #define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d)) 475 #define fixmap_p4d ((p4d_t *)XIP_FIXUP(fixmap_p4d)) 476 #define early_p4d ((p4d_t *)XIP_FIXUP(early_p4d)) 477 #endif /* CONFIG_XIP_KERNEL */ 478 479 static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss; 480 static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss; 481 static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); 482 483 #ifdef CONFIG_XIP_KERNEL 484 #define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud)) 485 #define fixmap_pud ((pud_t *)XIP_FIXUP(fixmap_pud)) 486 #define early_pud ((pud_t *)XIP_FIXUP(early_pud)) 487 #endif /* CONFIG_XIP_KERNEL */ 488 489 static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) 490 { 491 /* Before MMU is enabled */ 492 return (pmd_t *)((uintptr_t)pa); 493 } 494 495 static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa) 496 { 497 clear_fixmap(FIX_PMD); 498 return (pmd_t *)set_fixmap_offset(FIX_PMD, pa); 499 } 500 501 static pmd_t *__meminit get_pmd_virt_late(phys_addr_t pa) 502 { 503 return (pmd_t *) __va(pa); 504 } 505 506 static phys_addr_t __init alloc_pmd_early(uintptr_t va) 507 { 508 BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT); 509 510 return (uintptr_t)early_pmd; 511 } 512 513 static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) 514 { 515 return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); 516 } 517 518 static phys_addr_t __meminit alloc_pmd_late(uintptr_t va) 519 { 520 struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); 521 522 /* See comment in alloc_pte_late() regarding NULL passed the ctor */ 523 BUG_ON(!ptdesc || !pagetable_pmd_ctor(NULL, ptdesc)); 524 return __pa((pmd_t *)ptdesc_address(ptdesc)); 525 } 526 527 static void __meminit create_pmd_mapping(pmd_t *pmdp, 528 uintptr_t va, phys_addr_t pa, 529 phys_addr_t sz, pgprot_t prot) 530 { 531 pte_t *ptep; 532 phys_addr_t pte_phys; 533 uintptr_t pmd_idx = pmd_index(va); 534 535 if (sz == PMD_SIZE) { 536 if (pmd_none(pmdp[pmd_idx])) 537 pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pa), prot); 538 return; 539 } 540 541 if (pmd_none(pmdp[pmd_idx])) { 542 pte_phys = pt_ops.alloc_pte(va); 543 pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE); 544 ptep = pt_ops.get_pte_virt(pte_phys); 545 memset(ptep, 0, PAGE_SIZE); 546 } else { 547 pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx])); 548 ptep = pt_ops.get_pte_virt(pte_phys); 549 } 550 551 create_pte_mapping(ptep, va, pa, sz, prot); 552 } 553 554 static pud_t *__init get_pud_virt_early(phys_addr_t pa) 555 { 556 return (pud_t *)((uintptr_t)pa); 557 } 558 559 static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa) 560 { 561 clear_fixmap(FIX_PUD); 562 return (pud_t *)set_fixmap_offset(FIX_PUD, pa); 563 } 564 565 static pud_t *__meminit get_pud_virt_late(phys_addr_t pa) 566 { 567 return (pud_t *)__va(pa); 568 } 569 570 static phys_addr_t __init alloc_pud_early(uintptr_t va) 571 { 572 /* Only one PUD is available for early mapping */ 573 BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); 574 575 return (uintptr_t)early_pud; 576 } 577 578 static phys_addr_t __init alloc_pud_fixmap(uintptr_t va) 579 { 580 return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); 581 } 582 583 static phys_addr_t __meminit alloc_pud_late(uintptr_t va) 584 { 585 struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); 586 587 BUG_ON(!ptdesc); 588 pagetable_pud_ctor(ptdesc); 589 return __pa((pud_t *)ptdesc_address(ptdesc)); 590 } 591 592 static p4d_t *__init get_p4d_virt_early(phys_addr_t pa) 593 { 594 return (p4d_t *)((uintptr_t)pa); 595 } 596 597 static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa) 598 { 599 clear_fixmap(FIX_P4D); 600 return (p4d_t *)set_fixmap_offset(FIX_P4D, pa); 601 } 602 603 static p4d_t *__meminit get_p4d_virt_late(phys_addr_t pa) 604 { 605 return (p4d_t *)__va(pa); 606 } 607 608 static phys_addr_t __init alloc_p4d_early(uintptr_t va) 609 { 610 /* Only one P4D is available for early mapping */ 611 BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); 612 613 return (uintptr_t)early_p4d; 614 } 615 616 static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va) 617 { 618 return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); 619 } 620 621 static phys_addr_t __meminit alloc_p4d_late(uintptr_t va) 622 { 623 struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); 624 625 BUG_ON(!ptdesc); 626 pagetable_p4d_ctor(ptdesc); 627 return __pa((p4d_t *)ptdesc_address(ptdesc)); 628 } 629 630 static void __meminit create_pud_mapping(pud_t *pudp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, 631 pgprot_t prot) 632 { 633 pmd_t *nextp; 634 phys_addr_t next_phys; 635 uintptr_t pud_index = pud_index(va); 636 637 if (sz == PUD_SIZE) { 638 if (pud_val(pudp[pud_index]) == 0) 639 pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot); 640 return; 641 } 642 643 if (pud_val(pudp[pud_index]) == 0) { 644 next_phys = pt_ops.alloc_pmd(va); 645 pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE); 646 nextp = pt_ops.get_pmd_virt(next_phys); 647 memset(nextp, 0, PAGE_SIZE); 648 } else { 649 next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index])); 650 nextp = pt_ops.get_pmd_virt(next_phys); 651 } 652 653 create_pmd_mapping(nextp, va, pa, sz, prot); 654 } 655 656 static void __meminit create_p4d_mapping(p4d_t *p4dp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, 657 pgprot_t prot) 658 { 659 pud_t *nextp; 660 phys_addr_t next_phys; 661 uintptr_t p4d_index = p4d_index(va); 662 663 if (sz == P4D_SIZE) { 664 if (p4d_val(p4dp[p4d_index]) == 0) 665 p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot); 666 return; 667 } 668 669 if (p4d_val(p4dp[p4d_index]) == 0) { 670 next_phys = pt_ops.alloc_pud(va); 671 p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE); 672 nextp = pt_ops.get_pud_virt(next_phys); 673 memset(nextp, 0, PAGE_SIZE); 674 } else { 675 next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index])); 676 nextp = pt_ops.get_pud_virt(next_phys); 677 } 678 679 create_pud_mapping(nextp, va, pa, sz, prot); 680 } 681 682 #define pgd_next_t p4d_t 683 #define alloc_pgd_next(__va) (pgtable_l5_enabled ? \ 684 pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ? \ 685 pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))) 686 #define get_pgd_next_virt(__pa) (pgtable_l5_enabled ? \ 687 pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ? \ 688 pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa))) 689 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ 690 (pgtable_l5_enabled ? \ 691 create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \ 692 (pgtable_l4_enabled ? \ 693 create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) : \ 694 create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))) 695 #define fixmap_pgd_next (pgtable_l5_enabled ? \ 696 (uintptr_t)fixmap_p4d : (pgtable_l4_enabled ? \ 697 (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)) 698 #define trampoline_pgd_next (pgtable_l5_enabled ? \ 699 (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \ 700 (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)) 701 #else 702 #define pgd_next_t pte_t 703 #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) 704 #define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa) 705 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ 706 create_pte_mapping(__nextp, __va, __pa, __sz, __prot) 707 #define fixmap_pgd_next ((uintptr_t)fixmap_pte) 708 #define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) 709 #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) 710 #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) 711 #endif /* __PAGETABLE_PMD_FOLDED */ 712 713 void __meminit create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, 714 pgprot_t prot) 715 { 716 pgd_next_t *nextp; 717 phys_addr_t next_phys; 718 uintptr_t pgd_idx = pgd_index(va); 719 720 if (sz == PGDIR_SIZE) { 721 if (pgd_val(pgdp[pgd_idx]) == 0) 722 pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(pa), prot); 723 return; 724 } 725 726 if (pgd_val(pgdp[pgd_idx]) == 0) { 727 next_phys = alloc_pgd_next(va); 728 pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE); 729 nextp = get_pgd_next_virt(next_phys); 730 memset(nextp, 0, PAGE_SIZE); 731 } else { 732 next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_idx])); 733 nextp = get_pgd_next_virt(next_phys); 734 } 735 736 create_pgd_next_mapping(nextp, va, pa, sz, prot); 737 } 738 739 static uintptr_t __meminit best_map_size(phys_addr_t pa, uintptr_t va, phys_addr_t size) 740 { 741 if (debug_pagealloc_enabled()) 742 return PAGE_SIZE; 743 744 if (pgtable_l5_enabled && 745 !(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE) 746 return P4D_SIZE; 747 748 if (pgtable_l4_enabled && 749 !(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE) 750 return PUD_SIZE; 751 752 if (IS_ENABLED(CONFIG_64BIT) && 753 !(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE) 754 return PMD_SIZE; 755 756 return PAGE_SIZE; 757 } 758 759 #ifdef CONFIG_XIP_KERNEL 760 #define phys_ram_base (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base)) 761 extern char _xiprom[], _exiprom[], __data_loc; 762 763 /* called from head.S with MMU off */ 764 asmlinkage void __init __copy_data(void) 765 { 766 void *from = (void *)(&__data_loc); 767 void *to = (void *)CONFIG_PHYS_RAM_BASE; 768 size_t sz = (size_t)((uintptr_t)(&_end) - (uintptr_t)(&_sdata)); 769 770 memcpy(to, from, sz); 771 } 772 #endif 773 774 #ifdef CONFIG_STRICT_KERNEL_RWX 775 static __meminit pgprot_t pgprot_from_va(uintptr_t va) 776 { 777 if (is_va_kernel_text(va)) 778 return PAGE_KERNEL_READ_EXEC; 779 780 /* 781 * In 64-bit kernel, the kernel mapping is outside the linear mapping so 782 * we must protect its linear mapping alias from being executed and 783 * written. 784 * And rodata section is marked readonly in mark_rodata_ro. 785 */ 786 if (IS_ENABLED(CONFIG_64BIT) && is_va_kernel_lm_alias_text(va)) 787 return PAGE_KERNEL_READ; 788 789 return PAGE_KERNEL; 790 } 791 792 void mark_rodata_ro(void) 793 { 794 set_kernel_memory(__start_rodata, _data, set_memory_ro); 795 if (IS_ENABLED(CONFIG_64BIT)) 796 set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data), 797 set_memory_ro); 798 } 799 #else 800 static __meminit pgprot_t pgprot_from_va(uintptr_t va) 801 { 802 if (IS_ENABLED(CONFIG_64BIT) && !is_kernel_mapping(va)) 803 return PAGE_KERNEL; 804 805 return PAGE_KERNEL_EXEC; 806 } 807 #endif /* CONFIG_STRICT_KERNEL_RWX */ 808 809 #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) 810 u64 __pi_set_satp_mode_from_cmdline(uintptr_t dtb_pa); 811 u64 __pi_set_satp_mode_from_fdt(uintptr_t dtb_pa); 812 813 static void __init disable_pgtable_l5(void) 814 { 815 pgtable_l5_enabled = false; 816 kernel_map.page_offset = PAGE_OFFSET_L4; 817 satp_mode = SATP_MODE_48; 818 } 819 820 static void __init disable_pgtable_l4(void) 821 { 822 pgtable_l4_enabled = false; 823 kernel_map.page_offset = PAGE_OFFSET_L3; 824 satp_mode = SATP_MODE_39; 825 } 826 827 static int __init print_no4lvl(char *p) 828 { 829 pr_info("Disabled 4-level and 5-level paging"); 830 return 0; 831 } 832 early_param("no4lvl", print_no4lvl); 833 834 static int __init print_no5lvl(char *p) 835 { 836 pr_info("Disabled 5-level paging"); 837 return 0; 838 } 839 early_param("no5lvl", print_no5lvl); 840 841 static void __init set_mmap_rnd_bits_max(void) 842 { 843 mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3; 844 } 845 846 /* 847 * There is a simple way to determine if 4-level is supported by the 848 * underlying hardware: establish 1:1 mapping in 4-level page table mode 849 * then read SATP to see if the configuration was taken into account 850 * meaning sv48 is supported. 851 * The maximum SATP mode is limited by both the command line and the "mmu-type" 852 * property in the device tree, since some platforms may hang if an unsupported 853 * SATP mode is attempted. 854 */ 855 static __init void set_satp_mode(uintptr_t dtb_pa) 856 { 857 u64 identity_satp, hw_satp; 858 uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; 859 u64 satp_mode_limit = min_not_zero(__pi_set_satp_mode_from_cmdline(dtb_pa), 860 __pi_set_satp_mode_from_fdt(dtb_pa)); 861 862 kernel_map.page_offset = PAGE_OFFSET_L5; 863 864 if (satp_mode_limit == SATP_MODE_48) { 865 disable_pgtable_l5(); 866 } else if (satp_mode_limit == SATP_MODE_39) { 867 disable_pgtable_l5(); 868 disable_pgtable_l4(); 869 return; 870 } 871 872 create_p4d_mapping(early_p4d, 873 set_satp_mode_pmd, (uintptr_t)early_pud, 874 P4D_SIZE, PAGE_TABLE); 875 create_pud_mapping(early_pud, 876 set_satp_mode_pmd, (uintptr_t)early_pmd, 877 PUD_SIZE, PAGE_TABLE); 878 /* Handle the case where set_satp_mode straddles 2 PMDs */ 879 create_pmd_mapping(early_pmd, 880 set_satp_mode_pmd, set_satp_mode_pmd, 881 PMD_SIZE, PAGE_KERNEL_EXEC); 882 create_pmd_mapping(early_pmd, 883 set_satp_mode_pmd + PMD_SIZE, 884 set_satp_mode_pmd + PMD_SIZE, 885 PMD_SIZE, PAGE_KERNEL_EXEC); 886 retry: 887 create_pgd_mapping(early_pg_dir, 888 set_satp_mode_pmd, 889 pgtable_l5_enabled ? 890 (uintptr_t)early_p4d : (uintptr_t)early_pud, 891 PGDIR_SIZE, PAGE_TABLE); 892 893 identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode; 894 895 local_flush_tlb_all(); 896 csr_write(CSR_SATP, identity_satp); 897 hw_satp = csr_swap(CSR_SATP, 0ULL); 898 local_flush_tlb_all(); 899 900 if (hw_satp != identity_satp) { 901 if (pgtable_l5_enabled) { 902 disable_pgtable_l5(); 903 memset(early_pg_dir, 0, PAGE_SIZE); 904 goto retry; 905 } 906 disable_pgtable_l4(); 907 } 908 909 memset(early_pg_dir, 0, PAGE_SIZE); 910 memset(early_p4d, 0, PAGE_SIZE); 911 memset(early_pud, 0, PAGE_SIZE); 912 memset(early_pmd, 0, PAGE_SIZE); 913 } 914 #endif 915 916 /* 917 * setup_vm() is called from head.S with MMU-off. 918 * 919 * Following requirements should be honoured for setup_vm() to work 920 * correctly: 921 * 1) It should use PC-relative addressing for accessing kernel symbols. 922 * To achieve this we always use GCC cmodel=medany. 923 * 2) The compiler instrumentation for FTRACE will not work for setup_vm() 924 * so disable compiler instrumentation when FTRACE is enabled. 925 * 926 * Currently, the above requirements are honoured by using custom CFLAGS 927 * for init.o in mm/Makefile. 928 */ 929 930 #ifndef __riscv_cmodel_medany 931 #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." 932 #endif 933 934 #ifdef CONFIG_XIP_KERNEL 935 static void __init create_kernel_page_table(pgd_t *pgdir, 936 __always_unused bool early) 937 { 938 uintptr_t va, start_va, end_va; 939 940 /* Map the flash resident part */ 941 end_va = kernel_map.virt_addr + kernel_map.xiprom_sz; 942 for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE) 943 create_pgd_mapping(pgdir, va, 944 kernel_map.xiprom + (va - kernel_map.virt_addr), 945 PMD_SIZE, PAGE_KERNEL_EXEC); 946 947 /* Map the data in RAM */ 948 start_va = kernel_map.virt_addr + (uintptr_t)&_sdata - (uintptr_t)&_start; 949 end_va = kernel_map.virt_addr + kernel_map.size; 950 for (va = start_va; va < end_va; va += PMD_SIZE) 951 create_pgd_mapping(pgdir, va, 952 kernel_map.phys_addr + (va - start_va), 953 PMD_SIZE, PAGE_KERNEL); 954 } 955 #else 956 static void __init create_kernel_page_table(pgd_t *pgdir, bool early) 957 { 958 uintptr_t va, end_va; 959 960 end_va = kernel_map.virt_addr + kernel_map.size; 961 for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE) 962 create_pgd_mapping(pgdir, va, 963 kernel_map.phys_addr + (va - kernel_map.virt_addr), 964 PMD_SIZE, 965 early ? 966 PAGE_KERNEL_EXEC : pgprot_from_va(va)); 967 } 968 #endif 969 970 /* 971 * Setup a 4MB mapping that encompasses the device tree: for 64-bit kernel, 972 * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR 973 * entry. 974 */ 975 static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va, 976 uintptr_t dtb_pa) 977 { 978 #ifndef CONFIG_BUILTIN_DTB 979 uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); 980 981 /* Make sure the fdt fixmap address is always aligned on PMD size */ 982 BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE)); 983 984 /* In 32-bit only, the fdt lies in its own PGD */ 985 if (!IS_ENABLED(CONFIG_64BIT)) { 986 create_pgd_mapping(early_pg_dir, fix_fdt_va, 987 pa, MAX_FDT_SIZE, PAGE_KERNEL); 988 } else { 989 create_pmd_mapping(fixmap_pmd, fix_fdt_va, 990 pa, PMD_SIZE, PAGE_KERNEL); 991 create_pmd_mapping(fixmap_pmd, fix_fdt_va + PMD_SIZE, 992 pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); 993 } 994 995 dtb_early_va = (void *)fix_fdt_va + (dtb_pa & (PMD_SIZE - 1)); 996 #else 997 /* 998 * For 64-bit kernel, __va can't be used since it would return a linear 999 * mapping address whereas dtb_early_va will be used before 1000 * setup_vm_final installs the linear mapping. For 32-bit kernel, as the 1001 * kernel is mapped in the linear mapping, that makes no difference. 1002 */ 1003 dtb_early_va = kernel_mapping_pa_to_va(dtb_pa); 1004 #endif 1005 1006 dtb_early_pa = dtb_pa; 1007 } 1008 1009 /* 1010 * MMU is not enabled, the page tables are allocated directly using 1011 * early_pmd/pud/p4d and the address returned is the physical one. 1012 */ 1013 static void __init pt_ops_set_early(void) 1014 { 1015 pt_ops.alloc_pte = alloc_pte_early; 1016 pt_ops.get_pte_virt = get_pte_virt_early; 1017 #ifndef __PAGETABLE_PMD_FOLDED 1018 pt_ops.alloc_pmd = alloc_pmd_early; 1019 pt_ops.get_pmd_virt = get_pmd_virt_early; 1020 pt_ops.alloc_pud = alloc_pud_early; 1021 pt_ops.get_pud_virt = get_pud_virt_early; 1022 pt_ops.alloc_p4d = alloc_p4d_early; 1023 pt_ops.get_p4d_virt = get_p4d_virt_early; 1024 #endif 1025 } 1026 1027 /* 1028 * MMU is enabled but page table setup is not complete yet. 1029 * fixmap page table alloc functions must be used as a means to temporarily 1030 * map the allocated physical pages since the linear mapping does not exist yet. 1031 * 1032 * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va, 1033 * but it will be used as described above. 1034 */ 1035 static void __init pt_ops_set_fixmap(void) 1036 { 1037 pt_ops.alloc_pte = kernel_mapping_pa_to_va(alloc_pte_fixmap); 1038 pt_ops.get_pte_virt = kernel_mapping_pa_to_va(get_pte_virt_fixmap); 1039 #ifndef __PAGETABLE_PMD_FOLDED 1040 pt_ops.alloc_pmd = kernel_mapping_pa_to_va(alloc_pmd_fixmap); 1041 pt_ops.get_pmd_virt = kernel_mapping_pa_to_va(get_pmd_virt_fixmap); 1042 pt_ops.alloc_pud = kernel_mapping_pa_to_va(alloc_pud_fixmap); 1043 pt_ops.get_pud_virt = kernel_mapping_pa_to_va(get_pud_virt_fixmap); 1044 pt_ops.alloc_p4d = kernel_mapping_pa_to_va(alloc_p4d_fixmap); 1045 pt_ops.get_p4d_virt = kernel_mapping_pa_to_va(get_p4d_virt_fixmap); 1046 #endif 1047 } 1048 1049 /* 1050 * MMU is enabled and page table setup is complete, so from now, we can use 1051 * generic page allocation functions to setup page table. 1052 */ 1053 static void __init pt_ops_set_late(void) 1054 { 1055 pt_ops.alloc_pte = alloc_pte_late; 1056 pt_ops.get_pte_virt = get_pte_virt_late; 1057 #ifndef __PAGETABLE_PMD_FOLDED 1058 pt_ops.alloc_pmd = alloc_pmd_late; 1059 pt_ops.get_pmd_virt = get_pmd_virt_late; 1060 pt_ops.alloc_pud = alloc_pud_late; 1061 pt_ops.get_pud_virt = get_pud_virt_late; 1062 pt_ops.alloc_p4d = alloc_p4d_late; 1063 pt_ops.get_p4d_virt = get_p4d_virt_late; 1064 #endif 1065 } 1066 1067 #ifdef CONFIG_RANDOMIZE_BASE 1068 extern bool __init __pi_set_nokaslr_from_cmdline(uintptr_t dtb_pa); 1069 extern u64 __init __pi_get_kaslr_seed(uintptr_t dtb_pa); 1070 extern u64 __init __pi_get_kaslr_seed_zkr(const uintptr_t dtb_pa); 1071 1072 static int __init print_nokaslr(char *p) 1073 { 1074 pr_info("Disabled KASLR"); 1075 return 0; 1076 } 1077 early_param("nokaslr", print_nokaslr); 1078 1079 unsigned long kaslr_offset(void) 1080 { 1081 return kernel_map.virt_offset; 1082 } 1083 #endif 1084 1085 asmlinkage void __init setup_vm(uintptr_t dtb_pa) 1086 { 1087 pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd; 1088 1089 #ifdef CONFIG_RANDOMIZE_BASE 1090 if (!__pi_set_nokaslr_from_cmdline(dtb_pa)) { 1091 u64 kaslr_seed = __pi_get_kaslr_seed_zkr(dtb_pa); 1092 u32 kernel_size = (uintptr_t)(&_end) - (uintptr_t)(&_start); 1093 u32 nr_pos; 1094 1095 if (kaslr_seed == 0) 1096 kaslr_seed = __pi_get_kaslr_seed(dtb_pa); 1097 /* 1098 * Compute the number of positions available: we are limited 1099 * by the early page table that only has one PUD and we must 1100 * be aligned on PMD_SIZE. 1101 */ 1102 nr_pos = (PUD_SIZE - kernel_size) / PMD_SIZE; 1103 1104 kernel_map.virt_offset = (kaslr_seed % nr_pos) * PMD_SIZE; 1105 } 1106 #endif 1107 1108 kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset; 1109 1110 #ifdef CONFIG_XIP_KERNEL 1111 kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; 1112 kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); 1113 1114 phys_ram_base = CONFIG_PHYS_RAM_BASE; 1115 #ifdef CONFIG_SPARSEMEM_VMEMMAP 1116 vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT; 1117 #endif 1118 kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; 1119 kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start); 1120 1121 kernel_map.va_kernel_xip_text_pa_offset = kernel_map.virt_addr - kernel_map.xiprom; 1122 kernel_map.va_kernel_xip_data_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr 1123 + (uintptr_t)&_sdata - (uintptr_t)&_start; 1124 #else 1125 kernel_map.phys_addr = (uintptr_t)(&_start); 1126 kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; 1127 kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; 1128 #endif 1129 1130 #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) 1131 set_satp_mode(dtb_pa); 1132 set_mmap_rnd_bits_max(); 1133 #endif 1134 1135 /* 1136 * In 64-bit, we defer the setup of va_pa_offset to setup_bootmem, 1137 * where we have the system memory layout: this allows us to align 1138 * the physical and virtual mappings and then make use of PUD/P4D/PGD 1139 * for the linear mapping. This is only possible because the kernel 1140 * mapping lies outside the linear mapping. 1141 * In 32-bit however, as the kernel resides in the linear mapping, 1142 * setup_vm_final can not change the mapping established here, 1143 * otherwise the same kernel addresses would get mapped to different 1144 * physical addresses (if the start of dram is different from the 1145 * kernel physical address start). 1146 */ 1147 kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ? 1148 0UL : PAGE_OFFSET - kernel_map.phys_addr; 1149 1150 memory_limit = KERN_VIRT_SIZE; 1151 1152 /* Sanity check alignment and size */ 1153 BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); 1154 BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0); 1155 1156 #ifdef CONFIG_64BIT 1157 /* 1158 * The last 4K bytes of the addressable memory can not be mapped because 1159 * of IS_ERR_VALUE macro. 1160 */ 1161 BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); 1162 #endif 1163 1164 #ifdef CONFIG_RELOCATABLE 1165 /* 1166 * Early page table uses only one PUD, which makes it possible 1167 * to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset 1168 * makes the kernel cross over a PUD_SIZE boundary, raise a bug 1169 * since a part of the kernel would not get mapped. 1170 */ 1171 if (IS_ENABLED(CONFIG_64BIT)) 1172 BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size); 1173 relocate_kernel(); 1174 #endif 1175 1176 apply_early_boot_alternatives(); 1177 pt_ops_set_early(); 1178 1179 /* Setup early PGD for fixmap */ 1180 create_pgd_mapping(early_pg_dir, FIXADDR_START, 1181 fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); 1182 1183 #ifndef __PAGETABLE_PMD_FOLDED 1184 /* Setup fixmap P4D and PUD */ 1185 if (pgtable_l5_enabled) 1186 create_p4d_mapping(fixmap_p4d, FIXADDR_START, 1187 (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE); 1188 /* Setup fixmap PUD and PMD */ 1189 if (pgtable_l4_enabled) 1190 create_pud_mapping(fixmap_pud, FIXADDR_START, 1191 (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE); 1192 create_pmd_mapping(fixmap_pmd, FIXADDR_START, 1193 (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); 1194 /* Setup trampoline PGD and PMD */ 1195 create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr, 1196 trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE); 1197 if (pgtable_l5_enabled) 1198 create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr, 1199 (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE); 1200 if (pgtable_l4_enabled) 1201 create_pud_mapping(trampoline_pud, kernel_map.virt_addr, 1202 (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE); 1203 #ifdef CONFIG_XIP_KERNEL 1204 create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr, 1205 kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC); 1206 #else 1207 create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr, 1208 kernel_map.phys_addr, PMD_SIZE, PAGE_KERNEL_EXEC); 1209 #endif 1210 #else 1211 /* Setup trampoline PGD */ 1212 create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr, 1213 kernel_map.phys_addr, PGDIR_SIZE, PAGE_KERNEL_EXEC); 1214 #endif 1215 1216 /* 1217 * Setup early PGD covering entire kernel which will allow 1218 * us to reach paging_init(). We map all memory banks later 1219 * in setup_vm_final() below. 1220 */ 1221 create_kernel_page_table(early_pg_dir, true); 1222 1223 /* Setup early mapping for FDT early scan */ 1224 create_fdt_early_page_table(__fix_to_virt(FIX_FDT), dtb_pa); 1225 1226 /* 1227 * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap 1228 * range can not span multiple pmds. 1229 */ 1230 BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) 1231 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); 1232 1233 #ifndef __PAGETABLE_PMD_FOLDED 1234 /* 1235 * Early ioremap fixmap is already created as it lies within first 2MB 1236 * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END 1237 * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn 1238 * the user if not. 1239 */ 1240 fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))]; 1241 fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))]; 1242 if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) { 1243 WARN_ON(1); 1244 pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n", 1245 pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd)); 1246 pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", 1247 fix_to_virt(FIX_BTMAP_BEGIN)); 1248 pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", 1249 fix_to_virt(FIX_BTMAP_END)); 1250 1251 pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); 1252 pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); 1253 } 1254 #endif 1255 1256 pt_ops_set_fixmap(); 1257 } 1258 1259 static void __meminit create_linear_mapping_range(phys_addr_t start, phys_addr_t end, 1260 uintptr_t fixed_map_size, const pgprot_t *pgprot) 1261 { 1262 phys_addr_t pa; 1263 uintptr_t va, map_size; 1264 1265 for (pa = start; pa < end; pa += map_size) { 1266 va = (uintptr_t)__va(pa); 1267 map_size = fixed_map_size ? fixed_map_size : 1268 best_map_size(pa, va, end - pa); 1269 1270 create_pgd_mapping(swapper_pg_dir, va, pa, map_size, 1271 pgprot ? *pgprot : pgprot_from_va(va)); 1272 } 1273 } 1274 1275 static void __init create_linear_mapping_page_table(void) 1276 { 1277 phys_addr_t start, end; 1278 phys_addr_t kfence_pool __maybe_unused; 1279 u64 i; 1280 1281 #ifdef CONFIG_STRICT_KERNEL_RWX 1282 phys_addr_t ktext_start = __pa_symbol(_start); 1283 phys_addr_t ktext_size = __init_data_begin - _start; 1284 phys_addr_t krodata_start = __pa_symbol(__start_rodata); 1285 phys_addr_t krodata_size = _data - __start_rodata; 1286 1287 /* Isolate kernel text and rodata so they don't get mapped with a PUD */ 1288 memblock_mark_nomap(ktext_start, ktext_size); 1289 memblock_mark_nomap(krodata_start, krodata_size); 1290 #endif 1291 1292 #ifdef CONFIG_KFENCE 1293 /* 1294 * kfence pool must be backed by PAGE_SIZE mappings, so allocate it 1295 * before we setup the linear mapping so that we avoid using hugepages 1296 * for this region. 1297 */ 1298 kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); 1299 BUG_ON(!kfence_pool); 1300 1301 memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); 1302 __kfence_pool = __va(kfence_pool); 1303 #endif 1304 1305 /* Map all memory banks in the linear mapping */ 1306 for_each_mem_range(i, &start, &end) { 1307 if (start >= end) 1308 break; 1309 if (start <= __pa(PAGE_OFFSET) && 1310 __pa(PAGE_OFFSET) < end) 1311 start = __pa(PAGE_OFFSET); 1312 1313 create_linear_mapping_range(start, end, 0, NULL); 1314 } 1315 1316 #ifdef CONFIG_STRICT_KERNEL_RWX 1317 create_linear_mapping_range(ktext_start, ktext_start + ktext_size, 0, NULL); 1318 create_linear_mapping_range(krodata_start, krodata_start + krodata_size, 0, NULL); 1319 1320 memblock_clear_nomap(ktext_start, ktext_size); 1321 memblock_clear_nomap(krodata_start, krodata_size); 1322 #endif 1323 1324 #ifdef CONFIG_KFENCE 1325 create_linear_mapping_range(kfence_pool, kfence_pool + KFENCE_POOL_SIZE, PAGE_SIZE, NULL); 1326 1327 memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); 1328 #endif 1329 } 1330 1331 static void __init setup_vm_final(void) 1332 { 1333 /* Setup swapper PGD for fixmap */ 1334 #if !defined(CONFIG_64BIT) 1335 /* 1336 * In 32-bit, the device tree lies in a pgd entry, so it must be copied 1337 * directly in swapper_pg_dir in addition to the pgd entry that points 1338 * to fixmap_pte. 1339 */ 1340 unsigned long idx = pgd_index(__fix_to_virt(FIX_FDT)); 1341 1342 set_pgd(&swapper_pg_dir[idx], early_pg_dir[idx]); 1343 #endif 1344 create_pgd_mapping(swapper_pg_dir, FIXADDR_START, 1345 __pa_symbol(fixmap_pgd_next), 1346 PGDIR_SIZE, PAGE_TABLE); 1347 1348 /* Map the linear mapping */ 1349 create_linear_mapping_page_table(); 1350 1351 /* Map the kernel */ 1352 if (IS_ENABLED(CONFIG_64BIT)) 1353 create_kernel_page_table(swapper_pg_dir, false); 1354 1355 #ifdef CONFIG_KASAN 1356 kasan_swapper_init(); 1357 #endif 1358 1359 /* Clear fixmap PTE and PMD mappings */ 1360 clear_fixmap(FIX_PTE); 1361 clear_fixmap(FIX_PMD); 1362 clear_fixmap(FIX_PUD); 1363 clear_fixmap(FIX_P4D); 1364 1365 /* Move to swapper page table */ 1366 csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode); 1367 local_flush_tlb_all(); 1368 1369 pt_ops_set_late(); 1370 } 1371 #else 1372 asmlinkage void __init setup_vm(uintptr_t dtb_pa) 1373 { 1374 dtb_early_va = (void *)dtb_pa; 1375 dtb_early_pa = dtb_pa; 1376 1377 #ifdef CONFIG_RELOCATABLE 1378 kernel_map.virt_addr = (uintptr_t)_start; 1379 kernel_map.phys_addr = (uintptr_t)_start; 1380 relocate_kernel(); 1381 #endif 1382 } 1383 1384 static inline void setup_vm_final(void) 1385 { 1386 } 1387 #endif /* CONFIG_MMU */ 1388 1389 /* 1390 * reserve_crashkernel() - reserves memory for crash kernel 1391 * 1392 * This function reserves memory area given in "crashkernel=" kernel command 1393 * line parameter. The memory reserved is used by dump capture kernel when 1394 * primary kernel is crashing. 1395 */ 1396 static void __init arch_reserve_crashkernel(void) 1397 { 1398 unsigned long long low_size = 0; 1399 unsigned long long crash_base, crash_size; 1400 bool high = false; 1401 int ret; 1402 1403 if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) 1404 return; 1405 1406 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 1407 &crash_size, &crash_base, 1408 &low_size, NULL, &high); 1409 if (ret) 1410 return; 1411 1412 reserve_crashkernel_generic(crash_size, crash_base, low_size, high); 1413 } 1414 1415 void __init paging_init(void) 1416 { 1417 setup_bootmem(); 1418 setup_vm_final(); 1419 1420 /* Depend on that Linear Mapping is ready */ 1421 memblock_allow_resize(); 1422 } 1423 1424 void __init misc_mem_init(void) 1425 { 1426 early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); 1427 arch_numa_init(); 1428 #ifdef CONFIG_SPARSEMEM_VMEMMAP 1429 /* The entire VMEMMAP region has been populated. Flush TLB for this region */ 1430 local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END); 1431 #endif 1432 arch_reserve_crashkernel(); 1433 memblock_dump_all(); 1434 } 1435 1436 #ifdef CONFIG_SPARSEMEM_VMEMMAP 1437 void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, 1438 unsigned long addr, unsigned long next) 1439 { 1440 pmd_set_huge(pmd, virt_to_phys(p), PAGE_KERNEL); 1441 } 1442 1443 int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, 1444 unsigned long addr, unsigned long next) 1445 { 1446 vmemmap_verify((pte_t *)pmdp, node, addr, next); 1447 return 1; 1448 } 1449 1450 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 1451 struct vmem_altmap *altmap) 1452 { 1453 /* 1454 * Note that SPARSEMEM_VMEMMAP is only selected for rv64 and that we 1455 * can't use hugepage mappings for 2-level page table because in case of 1456 * memory hotplug, we are not able to update all the page tables with 1457 * the new PMDs. 1458 */ 1459 return vmemmap_populate_hugepages(start, end, node, altmap); 1460 } 1461 #endif 1462 1463 #if defined(CONFIG_MMU) && defined(CONFIG_64BIT) 1464 /* 1465 * Pre-allocates page-table pages for a specific area in the kernel 1466 * page-table. Only the level which needs to be synchronized between 1467 * all page-tables is allocated because the synchronization can be 1468 * expensive. 1469 */ 1470 static void __init preallocate_pgd_pages_range(unsigned long start, unsigned long end, 1471 const char *area) 1472 { 1473 unsigned long addr; 1474 const char *lvl; 1475 1476 for (addr = start; addr < end && addr >= start; addr = ALIGN(addr + 1, PGDIR_SIZE)) { 1477 pgd_t *pgd = pgd_offset_k(addr); 1478 p4d_t *p4d; 1479 pud_t *pud; 1480 pmd_t *pmd; 1481 1482 lvl = "p4d"; 1483 p4d = p4d_alloc(&init_mm, pgd, addr); 1484 if (!p4d) 1485 goto failed; 1486 1487 if (pgtable_l5_enabled) 1488 continue; 1489 1490 lvl = "pud"; 1491 pud = pud_alloc(&init_mm, p4d, addr); 1492 if (!pud) 1493 goto failed; 1494 1495 if (pgtable_l4_enabled) 1496 continue; 1497 1498 lvl = "pmd"; 1499 pmd = pmd_alloc(&init_mm, pud, addr); 1500 if (!pmd) 1501 goto failed; 1502 } 1503 return; 1504 1505 failed: 1506 /* 1507 * The pages have to be there now or they will be missing in 1508 * process page-tables later. 1509 */ 1510 panic("Failed to pre-allocate %s pages for %s area\n", lvl, area); 1511 } 1512 1513 #define PAGE_END KASAN_SHADOW_START 1514 1515 void __init pgtable_cache_init(void) 1516 { 1517 preallocate_pgd_pages_range(VMALLOC_START, VMALLOC_END, "vmalloc"); 1518 if (IS_ENABLED(CONFIG_MODULES)) 1519 preallocate_pgd_pages_range(MODULES_VADDR, MODULES_END, "bpf/modules"); 1520 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) { 1521 preallocate_pgd_pages_range(VMEMMAP_START, VMEMMAP_END, "vmemmap"); 1522 preallocate_pgd_pages_range(PAGE_OFFSET, PAGE_END, "direct map"); 1523 if (IS_ENABLED(CONFIG_KASAN)) 1524 preallocate_pgd_pages_range(KASAN_SHADOW_START, KASAN_SHADOW_END, "kasan"); 1525 } 1526 } 1527 #endif 1528 1529 #ifdef CONFIG_EXECMEM 1530 #ifdef CONFIG_MMU 1531 static struct execmem_info execmem_info __ro_after_init; 1532 1533 struct execmem_info __init *execmem_arch_setup(void) 1534 { 1535 execmem_info = (struct execmem_info){ 1536 .ranges = { 1537 [EXECMEM_DEFAULT] = { 1538 .start = MODULES_VADDR, 1539 .end = MODULES_END, 1540 .pgprot = PAGE_KERNEL, 1541 .alignment = 1, 1542 }, 1543 [EXECMEM_KPROBES] = { 1544 .start = VMALLOC_START, 1545 .end = VMALLOC_END, 1546 .pgprot = PAGE_KERNEL_READ_EXEC, 1547 .alignment = 1, 1548 }, 1549 [EXECMEM_BPF] = { 1550 .start = BPF_JIT_REGION_START, 1551 .end = BPF_JIT_REGION_END, 1552 .pgprot = PAGE_KERNEL, 1553 .alignment = PAGE_SIZE, 1554 }, 1555 }, 1556 }; 1557 1558 return &execmem_info; 1559 } 1560 #endif /* CONFIG_MMU */ 1561 #endif /* CONFIG_EXECMEM */ 1562 1563 #ifdef CONFIG_MEMORY_HOTPLUG 1564 static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) 1565 { 1566 struct page *page = pmd_page(*pmd); 1567 struct ptdesc *ptdesc = page_ptdesc(page); 1568 pte_t *pte; 1569 int i; 1570 1571 for (i = 0; i < PTRS_PER_PTE; i++) { 1572 pte = pte_start + i; 1573 if (!pte_none(*pte)) 1574 return; 1575 } 1576 1577 pagetable_dtor(ptdesc); 1578 if (PageReserved(page)) 1579 free_reserved_page(page); 1580 else 1581 pagetable_free(ptdesc); 1582 pmd_clear(pmd); 1583 } 1584 1585 static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, bool is_vmemmap) 1586 { 1587 struct page *page = pud_page(*pud); 1588 struct ptdesc *ptdesc = page_ptdesc(page); 1589 pmd_t *pmd; 1590 int i; 1591 1592 for (i = 0; i < PTRS_PER_PMD; i++) { 1593 pmd = pmd_start + i; 1594 if (!pmd_none(*pmd)) 1595 return; 1596 } 1597 1598 if (!is_vmemmap) 1599 pagetable_dtor(ptdesc); 1600 if (PageReserved(page)) 1601 free_reserved_page(page); 1602 else 1603 pagetable_free(ptdesc); 1604 pud_clear(pud); 1605 } 1606 1607 static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) 1608 { 1609 struct page *page = p4d_page(*p4d); 1610 pud_t *pud; 1611 int i; 1612 1613 for (i = 0; i < PTRS_PER_PUD; i++) { 1614 pud = pud_start + i; 1615 if (!pud_none(*pud)) 1616 return; 1617 } 1618 1619 if (PageReserved(page)) 1620 free_reserved_page(page); 1621 else 1622 __free_pages(page, 0); 1623 p4d_clear(p4d); 1624 } 1625 1626 static void __meminit free_vmemmap_storage(struct page *page, size_t size, 1627 struct vmem_altmap *altmap) 1628 { 1629 int order = get_order(size); 1630 1631 if (altmap) { 1632 vmem_altmap_free(altmap, size >> PAGE_SHIFT); 1633 return; 1634 } 1635 1636 if (PageReserved(page)) { 1637 unsigned int nr_pages = 1 << order; 1638 1639 while (nr_pages--) 1640 free_reserved_page(page++); 1641 return; 1642 } 1643 1644 __free_pages(page, order); 1645 } 1646 1647 static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, unsigned long end, 1648 bool is_vmemmap, struct vmem_altmap *altmap) 1649 { 1650 unsigned long next; 1651 pte_t *ptep, pte; 1652 1653 for (; addr < end; addr = next) { 1654 next = (addr + PAGE_SIZE) & PAGE_MASK; 1655 if (next > end) 1656 next = end; 1657 1658 ptep = pte_base + pte_index(addr); 1659 pte = ptep_get(ptep); 1660 if (!pte_present(*ptep)) 1661 continue; 1662 1663 pte_clear(&init_mm, addr, ptep); 1664 if (is_vmemmap) 1665 free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap); 1666 } 1667 } 1668 1669 static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, unsigned long end, 1670 bool is_vmemmap, struct vmem_altmap *altmap) 1671 { 1672 unsigned long next; 1673 pte_t *pte_base; 1674 pmd_t *pmdp, pmd; 1675 1676 for (; addr < end; addr = next) { 1677 next = pmd_addr_end(addr, end); 1678 pmdp = pmd_base + pmd_index(addr); 1679 pmd = pmdp_get(pmdp); 1680 if (!pmd_present(pmd)) 1681 continue; 1682 1683 if (pmd_leaf(pmd)) { 1684 pmd_clear(pmdp); 1685 if (is_vmemmap) 1686 free_vmemmap_storage(pmd_page(pmd), PMD_SIZE, altmap); 1687 continue; 1688 } 1689 1690 pte_base = (pte_t *)pmd_page_vaddr(*pmdp); 1691 remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap); 1692 free_pte_table(pte_base, pmdp); 1693 } 1694 } 1695 1696 static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, unsigned long end, 1697 bool is_vmemmap, struct vmem_altmap *altmap) 1698 { 1699 unsigned long next; 1700 pud_t *pudp, pud; 1701 pmd_t *pmd_base; 1702 1703 for (; addr < end; addr = next) { 1704 next = pud_addr_end(addr, end); 1705 pudp = pud_base + pud_index(addr); 1706 pud = pudp_get(pudp); 1707 if (!pud_present(pud)) 1708 continue; 1709 1710 if (pud_leaf(pud)) { 1711 if (pgtable_l4_enabled) { 1712 pud_clear(pudp); 1713 if (is_vmemmap) 1714 free_vmemmap_storage(pud_page(pud), PUD_SIZE, altmap); 1715 } 1716 continue; 1717 } 1718 1719 pmd_base = pmd_offset(pudp, 0); 1720 remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap); 1721 1722 if (pgtable_l4_enabled) 1723 free_pmd_table(pmd_base, pudp, is_vmemmap); 1724 } 1725 } 1726 1727 static void __meminit remove_p4d_mapping(p4d_t *p4d_base, unsigned long addr, unsigned long end, 1728 bool is_vmemmap, struct vmem_altmap *altmap) 1729 { 1730 unsigned long next; 1731 p4d_t *p4dp, p4d; 1732 pud_t *pud_base; 1733 1734 for (; addr < end; addr = next) { 1735 next = p4d_addr_end(addr, end); 1736 p4dp = p4d_base + p4d_index(addr); 1737 p4d = p4dp_get(p4dp); 1738 if (!p4d_present(p4d)) 1739 continue; 1740 1741 if (p4d_leaf(p4d)) { 1742 if (pgtable_l5_enabled) { 1743 p4d_clear(p4dp); 1744 if (is_vmemmap) 1745 free_vmemmap_storage(p4d_page(p4d), P4D_SIZE, altmap); 1746 } 1747 continue; 1748 } 1749 1750 pud_base = pud_offset(p4dp, 0); 1751 remove_pud_mapping(pud_base, addr, next, is_vmemmap, altmap); 1752 1753 if (pgtable_l5_enabled) 1754 free_pud_table(pud_base, p4dp); 1755 } 1756 } 1757 1758 static void __meminit remove_pgd_mapping(unsigned long va, unsigned long end, bool is_vmemmap, 1759 struct vmem_altmap *altmap) 1760 { 1761 unsigned long addr, next; 1762 p4d_t *p4d_base; 1763 pgd_t *pgd; 1764 1765 for (addr = va; addr < end; addr = next) { 1766 next = pgd_addr_end(addr, end); 1767 pgd = pgd_offset_k(addr); 1768 1769 if (!pgd_present(*pgd)) 1770 continue; 1771 1772 if (pgd_leaf(*pgd)) 1773 continue; 1774 1775 p4d_base = p4d_offset(pgd, 0); 1776 remove_p4d_mapping(p4d_base, addr, next, is_vmemmap, altmap); 1777 } 1778 1779 flush_tlb_all(); 1780 } 1781 1782 static void __meminit remove_linear_mapping(phys_addr_t start, u64 size) 1783 { 1784 unsigned long va = (unsigned long)__va(start); 1785 unsigned long end = (unsigned long)__va(start + size); 1786 1787 remove_pgd_mapping(va, end, false, NULL); 1788 } 1789 1790 struct range arch_get_mappable_range(void) 1791 { 1792 struct range mhp_range; 1793 1794 mhp_range.start = __pa(PAGE_OFFSET); 1795 mhp_range.end = __pa(PAGE_END - 1); 1796 return mhp_range; 1797 } 1798 1799 int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) 1800 { 1801 int ret = 0; 1802 1803 create_linear_mapping_range(start, start + size, 0, ¶ms->pgprot); 1804 ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, params); 1805 if (ret) { 1806 remove_linear_mapping(start, size); 1807 goto out; 1808 } 1809 1810 max_pfn = PFN_UP(start + size); 1811 max_low_pfn = max_pfn; 1812 1813 out: 1814 flush_tlb_all(); 1815 return ret; 1816 } 1817 1818 void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) 1819 { 1820 __remove_pages(start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap); 1821 remove_linear_mapping(start, size); 1822 flush_tlb_all(); 1823 } 1824 1825 void __ref vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) 1826 { 1827 remove_pgd_mapping(start, end, true, altmap); 1828 } 1829 #endif /* CONFIG_MEMORY_HOTPLUG */ 1830