1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 28f6aac41SChristoph Lameter /* 38f6aac41SChristoph Lameter * Virtual Memory Map support 48f6aac41SChristoph Lameter * 5cde53535SChristoph Lameter * (C) 2007 sgi. Christoph Lameter. 68f6aac41SChristoph Lameter * 78f6aac41SChristoph Lameter * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, 88f6aac41SChristoph Lameter * virt_to_page, page_address() to be implemented as a base offset 98f6aac41SChristoph Lameter * calculation without memory access. 108f6aac41SChristoph Lameter * 118f6aac41SChristoph Lameter * However, virtual mappings need a page table and TLBs. Many Linux 128f6aac41SChristoph Lameter * architectures already map their physical space using 1-1 mappings 13b595076aSUwe Kleine-König * via TLBs. For those arches the virtual memory map is essentially 148f6aac41SChristoph Lameter * for free if we use the same page size as the 1-1 mappings. In that 158f6aac41SChristoph Lameter * case the overhead consists of a few additional pages that are 168f6aac41SChristoph Lameter * allocated to create a view of memory for vmemmap. 178f6aac41SChristoph Lameter * 1829c71111SAndy Whitcroft * The architecture is expected to provide a vmemmap_populate() function 1929c71111SAndy Whitcroft * to instantiate the mapping. 208f6aac41SChristoph Lameter */ 218f6aac41SChristoph Lameter #include <linux/mm.h> 228f6aac41SChristoph Lameter #include <linux/mmzone.h> 238f6aac41SChristoph Lameter #include <linux/bootmem.h> 244b94ffdcSDan Williams #include <linux/memremap.h> 258f6aac41SChristoph Lameter #include <linux/highmem.h> 265a0e3ad6STejun Heo #include <linux/slab.h> 278f6aac41SChristoph Lameter #include <linux/spinlock.h> 288f6aac41SChristoph Lameter #include <linux/vmalloc.h> 298bca44bbSGlauber de Oliveira Costa #include <linux/sched.h> 308f6aac41SChristoph Lameter #include <asm/dma.h> 318f6aac41SChristoph Lameter #include <asm/pgalloc.h> 328f6aac41SChristoph Lameter #include <asm/pgtable.h> 338f6aac41SChristoph Lameter 348f6aac41SChristoph Lameter /* 358f6aac41SChristoph Lameter * Allocate a block of memory to be used to back the virtual memory map 368f6aac41SChristoph Lameter * or to back the page tables that are used to create the mapping. 378f6aac41SChristoph Lameter * Uses the main allocators if they are available, else bootmem. 388f6aac41SChristoph Lameter */ 39e0dc3a53SKAMEZAWA Hiroyuki 40bd721ea7SFabian Frederick static void * __ref __earlyonly_bootmem_alloc(int node, 41e0dc3a53SKAMEZAWA Hiroyuki unsigned long size, 42e0dc3a53SKAMEZAWA Hiroyuki unsigned long align, 43e0dc3a53SKAMEZAWA Hiroyuki unsigned long goal) 44e0dc3a53SKAMEZAWA Hiroyuki { 45f7f99100SPavel Tatashin return memblock_virt_alloc_try_nid_raw(size, align, goal, 46bb016b84SSantosh Shilimkar BOOTMEM_ALLOC_ACCESSIBLE, node); 47e0dc3a53SKAMEZAWA Hiroyuki } 48e0dc3a53SKAMEZAWA Hiroyuki 499bdac914SYinghai Lu static void *vmemmap_buf; 509bdac914SYinghai Lu static void *vmemmap_buf_end; 51e0dc3a53SKAMEZAWA Hiroyuki 528f6aac41SChristoph Lameter void * __meminit vmemmap_alloc_block(unsigned long size, int node) 538f6aac41SChristoph Lameter { 548f6aac41SChristoph Lameter /* If the main allocator is up use that, fallback to bootmem. */ 558f6aac41SChristoph Lameter if (slab_is_available()) { 56*fcdaf842SMichal Hocko gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN; 57*fcdaf842SMichal Hocko int order = get_order(size); 58*fcdaf842SMichal Hocko static bool warned; 59f52407ceSShaohua Li struct page *page; 60f52407ceSShaohua Li 61*fcdaf842SMichal Hocko page = alloc_pages_node(node, gfp_mask, order); 628f6aac41SChristoph Lameter if (page) 638f6aac41SChristoph Lameter return page_address(page); 64*fcdaf842SMichal Hocko 65*fcdaf842SMichal Hocko if (!warned) { 66*fcdaf842SMichal Hocko warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL, 67*fcdaf842SMichal Hocko "vmemmap alloc failure: order:%u", order); 68*fcdaf842SMichal Hocko warned = true; 69*fcdaf842SMichal Hocko } 708f6aac41SChristoph Lameter return NULL; 718f6aac41SChristoph Lameter } else 72e0dc3a53SKAMEZAWA Hiroyuki return __earlyonly_bootmem_alloc(node, size, size, 738f6aac41SChristoph Lameter __pa(MAX_DMA_ADDRESS)); 748f6aac41SChristoph Lameter } 758f6aac41SChristoph Lameter 769bdac914SYinghai Lu /* need to make sure size is all the same during early stage */ 774b94ffdcSDan Williams static void * __meminit alloc_block_buf(unsigned long size, int node) 789bdac914SYinghai Lu { 799bdac914SYinghai Lu void *ptr; 809bdac914SYinghai Lu 819bdac914SYinghai Lu if (!vmemmap_buf) 829bdac914SYinghai Lu return vmemmap_alloc_block(size, node); 839bdac914SYinghai Lu 849bdac914SYinghai Lu /* take the from buf */ 859bdac914SYinghai Lu ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size); 869bdac914SYinghai Lu if (ptr + size > vmemmap_buf_end) 879bdac914SYinghai Lu return vmemmap_alloc_block(size, node); 889bdac914SYinghai Lu 899bdac914SYinghai Lu vmemmap_buf = ptr + size; 909bdac914SYinghai Lu 919bdac914SYinghai Lu return ptr; 929bdac914SYinghai Lu } 939bdac914SYinghai Lu 944b94ffdcSDan Williams static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap) 954b94ffdcSDan Williams { 964b94ffdcSDan Williams return altmap->base_pfn + altmap->reserve + altmap->alloc 974b94ffdcSDan Williams + altmap->align; 984b94ffdcSDan Williams } 994b94ffdcSDan Williams 1004b94ffdcSDan Williams static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap) 1014b94ffdcSDan Williams { 1024b94ffdcSDan Williams unsigned long allocated = altmap->alloc + altmap->align; 1034b94ffdcSDan Williams 1044b94ffdcSDan Williams if (altmap->free > allocated) 1054b94ffdcSDan Williams return altmap->free - allocated; 1064b94ffdcSDan Williams return 0; 1074b94ffdcSDan Williams } 1084b94ffdcSDan Williams 1094b94ffdcSDan Williams /** 1104b94ffdcSDan Williams * vmem_altmap_alloc - allocate pages from the vmem_altmap reservation 1114b94ffdcSDan Williams * @altmap - reserved page pool for the allocation 1124b94ffdcSDan Williams * @nr_pfns - size (in pages) of the allocation 1134b94ffdcSDan Williams * 1144b94ffdcSDan Williams * Allocations are aligned to the size of the request 1154b94ffdcSDan Williams */ 1164b94ffdcSDan Williams static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap, 1174b94ffdcSDan Williams unsigned long nr_pfns) 1184b94ffdcSDan Williams { 1194b94ffdcSDan Williams unsigned long pfn = vmem_altmap_next_pfn(altmap); 1204b94ffdcSDan Williams unsigned long nr_align; 1214b94ffdcSDan Williams 1224b94ffdcSDan Williams nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG); 1234b94ffdcSDan Williams nr_align = ALIGN(pfn, nr_align) - pfn; 1244b94ffdcSDan Williams 1254b94ffdcSDan Williams if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap)) 1264b94ffdcSDan Williams return ULONG_MAX; 1274b94ffdcSDan Williams altmap->alloc += nr_pfns; 1284b94ffdcSDan Williams altmap->align += nr_align; 1294b94ffdcSDan Williams return pfn + nr_align; 1304b94ffdcSDan Williams } 1314b94ffdcSDan Williams 1324b94ffdcSDan Williams static void * __meminit altmap_alloc_block_buf(unsigned long size, 1334b94ffdcSDan Williams struct vmem_altmap *altmap) 1344b94ffdcSDan Williams { 1354b94ffdcSDan Williams unsigned long pfn, nr_pfns; 1364b94ffdcSDan Williams void *ptr; 1374b94ffdcSDan Williams 1384b94ffdcSDan Williams if (size & ~PAGE_MASK) { 1394b94ffdcSDan Williams pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n", 1404b94ffdcSDan Williams __func__, size); 1414b94ffdcSDan Williams return NULL; 1424b94ffdcSDan Williams } 1434b94ffdcSDan Williams 1444b94ffdcSDan Williams nr_pfns = size >> PAGE_SHIFT; 1454b94ffdcSDan Williams pfn = vmem_altmap_alloc(altmap, nr_pfns); 1464b94ffdcSDan Williams if (pfn < ULONG_MAX) 1474b94ffdcSDan Williams ptr = __va(__pfn_to_phys(pfn)); 1484b94ffdcSDan Williams else 1494b94ffdcSDan Williams ptr = NULL; 1504b94ffdcSDan Williams pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n", 1514b94ffdcSDan Williams __func__, pfn, altmap->alloc, altmap->align, nr_pfns); 1524b94ffdcSDan Williams 1534b94ffdcSDan Williams return ptr; 1544b94ffdcSDan Williams } 1554b94ffdcSDan Williams 1564b94ffdcSDan Williams /* need to make sure size is all the same during early stage */ 1574b94ffdcSDan Williams void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node, 1584b94ffdcSDan Williams struct vmem_altmap *altmap) 1594b94ffdcSDan Williams { 1604b94ffdcSDan Williams if (altmap) 1614b94ffdcSDan Williams return altmap_alloc_block_buf(size, altmap); 1624b94ffdcSDan Williams return alloc_block_buf(size, node); 1634b94ffdcSDan Williams } 1644b94ffdcSDan Williams 1658f6aac41SChristoph Lameter void __meminit vmemmap_verify(pte_t *pte, int node, 1668f6aac41SChristoph Lameter unsigned long start, unsigned long end) 1678f6aac41SChristoph Lameter { 1688f6aac41SChristoph Lameter unsigned long pfn = pte_pfn(*pte); 1698f6aac41SChristoph Lameter int actual_node = early_pfn_to_nid(pfn); 1708f6aac41SChristoph Lameter 171b41ad14cSDavid Rientjes if (node_distance(actual_node, node) > LOCAL_DISTANCE) 1721170532bSJoe Perches pr_warn("[%lx-%lx] potential offnode page_structs\n", 173756a025fSJoe Perches start, end - 1); 1748f6aac41SChristoph Lameter } 1758f6aac41SChristoph Lameter 17629c71111SAndy Whitcroft pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) 1778f6aac41SChristoph Lameter { 17829c71111SAndy Whitcroft pte_t *pte = pte_offset_kernel(pmd, addr); 1798f6aac41SChristoph Lameter if (pte_none(*pte)) { 1808f6aac41SChristoph Lameter pte_t entry; 1814b94ffdcSDan Williams void *p = alloc_block_buf(PAGE_SIZE, node); 1828f6aac41SChristoph Lameter if (!p) 1839dce07f1SAl Viro return NULL; 18429c71111SAndy Whitcroft entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); 18529c71111SAndy Whitcroft set_pte_at(&init_mm, addr, pte, entry); 18629c71111SAndy Whitcroft } 18729c71111SAndy Whitcroft return pte; 1888f6aac41SChristoph Lameter } 1898f6aac41SChristoph Lameter 190f7f99100SPavel Tatashin static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node) 191f7f99100SPavel Tatashin { 192f7f99100SPavel Tatashin void *p = vmemmap_alloc_block(size, node); 193f7f99100SPavel Tatashin 194f7f99100SPavel Tatashin if (!p) 195f7f99100SPavel Tatashin return NULL; 196f7f99100SPavel Tatashin memset(p, 0, size); 197f7f99100SPavel Tatashin 198f7f99100SPavel Tatashin return p; 199f7f99100SPavel Tatashin } 200f7f99100SPavel Tatashin 20129c71111SAndy Whitcroft pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) 2028f6aac41SChristoph Lameter { 20329c71111SAndy Whitcroft pmd_t *pmd = pmd_offset(pud, addr); 2048f6aac41SChristoph Lameter if (pmd_none(*pmd)) { 205f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); 2068f6aac41SChristoph Lameter if (!p) 2079dce07f1SAl Viro return NULL; 2088f6aac41SChristoph Lameter pmd_populate_kernel(&init_mm, pmd, p); 2098f6aac41SChristoph Lameter } 21029c71111SAndy Whitcroft return pmd; 2118f6aac41SChristoph Lameter } 2128f6aac41SChristoph Lameter 213c2febafcSKirill A. Shutemov pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) 2148f6aac41SChristoph Lameter { 215c2febafcSKirill A. Shutemov pud_t *pud = pud_offset(p4d, addr); 2168f6aac41SChristoph Lameter if (pud_none(*pud)) { 217f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); 2188f6aac41SChristoph Lameter if (!p) 2199dce07f1SAl Viro return NULL; 2208f6aac41SChristoph Lameter pud_populate(&init_mm, pud, p); 2218f6aac41SChristoph Lameter } 22229c71111SAndy Whitcroft return pud; 2238f6aac41SChristoph Lameter } 2248f6aac41SChristoph Lameter 225c2febafcSKirill A. Shutemov p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) 226c2febafcSKirill A. Shutemov { 227c2febafcSKirill A. Shutemov p4d_t *p4d = p4d_offset(pgd, addr); 228c2febafcSKirill A. Shutemov if (p4d_none(*p4d)) { 229f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); 230c2febafcSKirill A. Shutemov if (!p) 231c2febafcSKirill A. Shutemov return NULL; 232c2febafcSKirill A. Shutemov p4d_populate(&init_mm, p4d, p); 233c2febafcSKirill A. Shutemov } 234c2febafcSKirill A. Shutemov return p4d; 235c2febafcSKirill A. Shutemov } 236c2febafcSKirill A. Shutemov 23729c71111SAndy Whitcroft pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) 2388f6aac41SChristoph Lameter { 23929c71111SAndy Whitcroft pgd_t *pgd = pgd_offset_k(addr); 2408f6aac41SChristoph Lameter if (pgd_none(*pgd)) { 241f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); 2428f6aac41SChristoph Lameter if (!p) 2439dce07f1SAl Viro return NULL; 2448f6aac41SChristoph Lameter pgd_populate(&init_mm, pgd, p); 2458f6aac41SChristoph Lameter } 24629c71111SAndy Whitcroft return pgd; 2478f6aac41SChristoph Lameter } 24829c71111SAndy Whitcroft 2490aad818bSJohannes Weiner int __meminit vmemmap_populate_basepages(unsigned long start, 2500aad818bSJohannes Weiner unsigned long end, int node) 25129c71111SAndy Whitcroft { 2520aad818bSJohannes Weiner unsigned long addr = start; 25329c71111SAndy Whitcroft pgd_t *pgd; 254c2febafcSKirill A. Shutemov p4d_t *p4d; 25529c71111SAndy Whitcroft pud_t *pud; 25629c71111SAndy Whitcroft pmd_t *pmd; 25729c71111SAndy Whitcroft pte_t *pte; 25829c71111SAndy Whitcroft 25929c71111SAndy Whitcroft for (; addr < end; addr += PAGE_SIZE) { 26029c71111SAndy Whitcroft pgd = vmemmap_pgd_populate(addr, node); 26129c71111SAndy Whitcroft if (!pgd) 26229c71111SAndy Whitcroft return -ENOMEM; 263c2febafcSKirill A. Shutemov p4d = vmemmap_p4d_populate(pgd, addr, node); 264c2febafcSKirill A. Shutemov if (!p4d) 265c2febafcSKirill A. Shutemov return -ENOMEM; 266c2febafcSKirill A. Shutemov pud = vmemmap_pud_populate(p4d, addr, node); 26729c71111SAndy Whitcroft if (!pud) 26829c71111SAndy Whitcroft return -ENOMEM; 26929c71111SAndy Whitcroft pmd = vmemmap_pmd_populate(pud, addr, node); 27029c71111SAndy Whitcroft if (!pmd) 27129c71111SAndy Whitcroft return -ENOMEM; 27229c71111SAndy Whitcroft pte = vmemmap_pte_populate(pmd, addr, node); 27329c71111SAndy Whitcroft if (!pte) 27429c71111SAndy Whitcroft return -ENOMEM; 27529c71111SAndy Whitcroft vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); 2768f6aac41SChristoph Lameter } 27729c71111SAndy Whitcroft 27829c71111SAndy Whitcroft return 0; 27929c71111SAndy Whitcroft } 2808f6aac41SChristoph Lameter 28198f3cfc1SYasunori Goto struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) 2828f6aac41SChristoph Lameter { 2830aad818bSJohannes Weiner unsigned long start; 2840aad818bSJohannes Weiner unsigned long end; 2850aad818bSJohannes Weiner struct page *map; 2860aad818bSJohannes Weiner 2870aad818bSJohannes Weiner map = pfn_to_page(pnum * PAGES_PER_SECTION); 2880aad818bSJohannes Weiner start = (unsigned long)map; 2890aad818bSJohannes Weiner end = (unsigned long)(map + PAGES_PER_SECTION); 2900aad818bSJohannes Weiner 2910aad818bSJohannes Weiner if (vmemmap_populate(start, end, nid)) 2928f6aac41SChristoph Lameter return NULL; 2938f6aac41SChristoph Lameter 2948f6aac41SChristoph Lameter return map; 2958f6aac41SChristoph Lameter } 2969bdac914SYinghai Lu 2979bdac914SYinghai Lu void __init sparse_mem_maps_populate_node(struct page **map_map, 2989bdac914SYinghai Lu unsigned long pnum_begin, 2999bdac914SYinghai Lu unsigned long pnum_end, 3009bdac914SYinghai Lu unsigned long map_count, int nodeid) 3019bdac914SYinghai Lu { 3029bdac914SYinghai Lu unsigned long pnum; 3039bdac914SYinghai Lu unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; 3049bdac914SYinghai Lu void *vmemmap_buf_start; 3059bdac914SYinghai Lu 3069bdac914SYinghai Lu size = ALIGN(size, PMD_SIZE); 3079bdac914SYinghai Lu vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count, 3089bdac914SYinghai Lu PMD_SIZE, __pa(MAX_DMA_ADDRESS)); 3099bdac914SYinghai Lu 3109bdac914SYinghai Lu if (vmemmap_buf_start) { 3119bdac914SYinghai Lu vmemmap_buf = vmemmap_buf_start; 3129bdac914SYinghai Lu vmemmap_buf_end = vmemmap_buf_start + size * map_count; 3139bdac914SYinghai Lu } 3149bdac914SYinghai Lu 3159bdac914SYinghai Lu for (pnum = pnum_begin; pnum < pnum_end; pnum++) { 3169bdac914SYinghai Lu struct mem_section *ms; 3179bdac914SYinghai Lu 3189bdac914SYinghai Lu if (!present_section_nr(pnum)) 3199bdac914SYinghai Lu continue; 3209bdac914SYinghai Lu 3219bdac914SYinghai Lu map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); 3229bdac914SYinghai Lu if (map_map[pnum]) 3239bdac914SYinghai Lu continue; 3249bdac914SYinghai Lu ms = __nr_to_section(pnum); 3251170532bSJoe Perches pr_err("%s: sparsemem memory map backing failed some memory will not be available\n", 326756a025fSJoe Perches __func__); 3279bdac914SYinghai Lu ms->section_mem_map = 0; 3289bdac914SYinghai Lu } 3299bdac914SYinghai Lu 3309bdac914SYinghai Lu if (vmemmap_buf_start) { 3319bdac914SYinghai Lu /* need to free left buf */ 332bb016b84SSantosh Shilimkar memblock_free_early(__pa(vmemmap_buf), 333bb016b84SSantosh Shilimkar vmemmap_buf_end - vmemmap_buf); 3349bdac914SYinghai Lu vmemmap_buf = NULL; 3359bdac914SYinghai Lu vmemmap_buf_end = NULL; 3369bdac914SYinghai Lu } 3379bdac914SYinghai Lu } 338