1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 28f6aac41SChristoph Lameter /* 38f6aac41SChristoph Lameter * Virtual Memory Map support 48f6aac41SChristoph Lameter * 5cde53535SChristoph Lameter * (C) 2007 sgi. Christoph Lameter. 68f6aac41SChristoph Lameter * 78f6aac41SChristoph Lameter * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, 88f6aac41SChristoph Lameter * virt_to_page, page_address() to be implemented as a base offset 98f6aac41SChristoph Lameter * calculation without memory access. 108f6aac41SChristoph Lameter * 118f6aac41SChristoph Lameter * However, virtual mappings need a page table and TLBs. Many Linux 128f6aac41SChristoph Lameter * architectures already map their physical space using 1-1 mappings 13b595076aSUwe Kleine-König * via TLBs. For those arches the virtual memory map is essentially 148f6aac41SChristoph Lameter * for free if we use the same page size as the 1-1 mappings. In that 158f6aac41SChristoph Lameter * case the overhead consists of a few additional pages that are 168f6aac41SChristoph Lameter * allocated to create a view of memory for vmemmap. 178f6aac41SChristoph Lameter * 1829c71111SAndy Whitcroft * The architecture is expected to provide a vmemmap_populate() function 1929c71111SAndy Whitcroft * to instantiate the mapping. 208f6aac41SChristoph Lameter */ 218f6aac41SChristoph Lameter #include <linux/mm.h> 228f6aac41SChristoph Lameter #include <linux/mmzone.h> 2397ad1087SMike Rapoport #include <linux/memblock.h> 244b94ffdcSDan Williams #include <linux/memremap.h> 258f6aac41SChristoph Lameter #include <linux/highmem.h> 265a0e3ad6STejun Heo #include <linux/slab.h> 278f6aac41SChristoph Lameter #include <linux/spinlock.h> 288f6aac41SChristoph Lameter #include <linux/vmalloc.h> 298bca44bbSGlauber de Oliveira Costa #include <linux/sched.h> 30f41f2ed4SMuchun Song 318f6aac41SChristoph Lameter #include <asm/dma.h> 328f6aac41SChristoph Lameter #include <asm/pgalloc.h> 33ad2fa371SMuchun Song 348f6aac41SChristoph Lameter /* 358f6aac41SChristoph Lameter * Allocate a block of memory to be used to back the virtual memory map 368f6aac41SChristoph Lameter * or to back the page tables that are used to create the mapping. 378f6aac41SChristoph Lameter * Uses the main allocators if they are available, else bootmem. 388f6aac41SChristoph Lameter */ 39e0dc3a53SKAMEZAWA Hiroyuki 40bd721ea7SFabian Frederick static void * __ref __earlyonly_bootmem_alloc(int node, 41e0dc3a53SKAMEZAWA Hiroyuki unsigned long size, 42e0dc3a53SKAMEZAWA Hiroyuki unsigned long align, 43e0dc3a53SKAMEZAWA Hiroyuki unsigned long goal) 44e0dc3a53SKAMEZAWA Hiroyuki { 45eb31d559SMike Rapoport return memblock_alloc_try_nid_raw(size, align, goal, 4697ad1087SMike Rapoport MEMBLOCK_ALLOC_ACCESSIBLE, node); 47e0dc3a53SKAMEZAWA Hiroyuki } 48e0dc3a53SKAMEZAWA Hiroyuki 498f6aac41SChristoph Lameter void * __meminit vmemmap_alloc_block(unsigned long size, int node) 508f6aac41SChristoph Lameter { 518f6aac41SChristoph Lameter /* If the main allocator is up use that, fallback to bootmem. */ 528f6aac41SChristoph Lameter if (slab_is_available()) { 53fcdaf842SMichal Hocko gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN; 54fcdaf842SMichal Hocko int order = get_order(size); 55fcdaf842SMichal Hocko static bool warned; 56f52407ceSShaohua Li struct page *page; 57f52407ceSShaohua Li 58fcdaf842SMichal Hocko page = alloc_pages_node(node, gfp_mask, order); 598f6aac41SChristoph Lameter if (page) 608f6aac41SChristoph Lameter return page_address(page); 61fcdaf842SMichal Hocko 62fcdaf842SMichal Hocko if (!warned) { 63fcdaf842SMichal Hocko warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL, 64fcdaf842SMichal Hocko "vmemmap alloc failure: order:%u", order); 65fcdaf842SMichal Hocko warned = true; 66fcdaf842SMichal Hocko } 678f6aac41SChristoph Lameter return NULL; 688f6aac41SChristoph Lameter } else 69e0dc3a53SKAMEZAWA Hiroyuki return __earlyonly_bootmem_alloc(node, size, size, 708f6aac41SChristoph Lameter __pa(MAX_DMA_ADDRESS)); 718f6aac41SChristoph Lameter } 728f6aac41SChristoph Lameter 7356993b4eSAnshuman Khandual static void * __meminit altmap_alloc_block_buf(unsigned long size, 7456993b4eSAnshuman Khandual struct vmem_altmap *altmap); 759bdac914SYinghai Lu 7656993b4eSAnshuman Khandual /* need to make sure size is all the same during early stage */ 7756993b4eSAnshuman Khandual void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node, 7856993b4eSAnshuman Khandual struct vmem_altmap *altmap) 7956993b4eSAnshuman Khandual { 8056993b4eSAnshuman Khandual void *ptr; 8156993b4eSAnshuman Khandual 8256993b4eSAnshuman Khandual if (altmap) 8356993b4eSAnshuman Khandual return altmap_alloc_block_buf(size, altmap); 8456993b4eSAnshuman Khandual 8556993b4eSAnshuman Khandual ptr = sparse_buffer_alloc(size); 8635fd1eb1SPavel Tatashin if (!ptr) 8735fd1eb1SPavel Tatashin ptr = vmemmap_alloc_block(size, node); 889bdac914SYinghai Lu return ptr; 899bdac914SYinghai Lu } 909bdac914SYinghai Lu 914b94ffdcSDan Williams static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap) 924b94ffdcSDan Williams { 934b94ffdcSDan Williams return altmap->base_pfn + altmap->reserve + altmap->alloc 944b94ffdcSDan Williams + altmap->align; 954b94ffdcSDan Williams } 964b94ffdcSDan Williams 974b94ffdcSDan Williams static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap) 984b94ffdcSDan Williams { 994b94ffdcSDan Williams unsigned long allocated = altmap->alloc + altmap->align; 1004b94ffdcSDan Williams 1014b94ffdcSDan Williams if (altmap->free > allocated) 1024b94ffdcSDan Williams return altmap->free - allocated; 1034b94ffdcSDan Williams return 0; 1044b94ffdcSDan Williams } 1054b94ffdcSDan Williams 10656993b4eSAnshuman Khandual static void * __meminit altmap_alloc_block_buf(unsigned long size, 1074b94ffdcSDan Williams struct vmem_altmap *altmap) 1084b94ffdcSDan Williams { 109eb804533SChristoph Hellwig unsigned long pfn, nr_pfns, nr_align; 1104b94ffdcSDan Williams 1114b94ffdcSDan Williams if (size & ~PAGE_MASK) { 1124b94ffdcSDan Williams pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n", 1134b94ffdcSDan Williams __func__, size); 1144b94ffdcSDan Williams return NULL; 1154b94ffdcSDan Williams } 1164b94ffdcSDan Williams 117eb804533SChristoph Hellwig pfn = vmem_altmap_next_pfn(altmap); 1184b94ffdcSDan Williams nr_pfns = size >> PAGE_SHIFT; 119eb804533SChristoph Hellwig nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG); 120eb804533SChristoph Hellwig nr_align = ALIGN(pfn, nr_align) - pfn; 121eb804533SChristoph Hellwig if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap)) 122eb804533SChristoph Hellwig return NULL; 123eb804533SChristoph Hellwig 124eb804533SChristoph Hellwig altmap->alloc += nr_pfns; 125eb804533SChristoph Hellwig altmap->align += nr_align; 126eb804533SChristoph Hellwig pfn += nr_align; 127eb804533SChristoph Hellwig 1284b94ffdcSDan Williams pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n", 1294b94ffdcSDan Williams __func__, pfn, altmap->alloc, altmap->align, nr_pfns); 130eb804533SChristoph Hellwig return __va(__pfn_to_phys(pfn)); 1314b94ffdcSDan Williams } 1324b94ffdcSDan Williams 1338f6aac41SChristoph Lameter void __meminit vmemmap_verify(pte_t *pte, int node, 1348f6aac41SChristoph Lameter unsigned long start, unsigned long end) 1358f6aac41SChristoph Lameter { 1368f6aac41SChristoph Lameter unsigned long pfn = pte_pfn(*pte); 1378f6aac41SChristoph Lameter int actual_node = early_pfn_to_nid(pfn); 1388f6aac41SChristoph Lameter 139b41ad14cSDavid Rientjes if (node_distance(actual_node, node) > LOCAL_DISTANCE) 140abd62377SMa Wupeng pr_warn_once("[%lx-%lx] potential offnode page_structs\n", 141756a025fSJoe Perches start, end - 1); 1428f6aac41SChristoph Lameter } 1438f6aac41SChristoph Lameter 1441d9cfee7SAnshuman Khandual pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, 1454917f55bSJoao Martins struct vmem_altmap *altmap, 1464917f55bSJoao Martins struct page *reuse) 1478f6aac41SChristoph Lameter { 14829c71111SAndy Whitcroft pte_t *pte = pte_offset_kernel(pmd, addr); 1498f6aac41SChristoph Lameter if (pte_none(*pte)) { 1508f6aac41SChristoph Lameter pte_t entry; 1511d9cfee7SAnshuman Khandual void *p; 1521d9cfee7SAnshuman Khandual 1534917f55bSJoao Martins if (!reuse) { 15456993b4eSAnshuman Khandual p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); 1558f6aac41SChristoph Lameter if (!p) 1569dce07f1SAl Viro return NULL; 1574917f55bSJoao Martins } else { 1584917f55bSJoao Martins /* 1594917f55bSJoao Martins * When a PTE/PMD entry is freed from the init_mm 160f673bd7cSXueBing Chen * there's a free_pages() call to this page allocated 1614917f55bSJoao Martins * above. Thus this get_page() is paired with the 1624917f55bSJoao Martins * put_page_testzero() on the freeing path. 1634917f55bSJoao Martins * This can only called by certain ZONE_DEVICE path, 1644917f55bSJoao Martins * and through vmemmap_populate_compound_pages() when 1654917f55bSJoao Martins * slab is available. 1664917f55bSJoao Martins */ 1674917f55bSJoao Martins get_page(reuse); 1684917f55bSJoao Martins p = page_to_virt(reuse); 1694917f55bSJoao Martins } 17029c71111SAndy Whitcroft entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); 17129c71111SAndy Whitcroft set_pte_at(&init_mm, addr, pte, entry); 17229c71111SAndy Whitcroft } 17329c71111SAndy Whitcroft return pte; 1748f6aac41SChristoph Lameter } 1758f6aac41SChristoph Lameter 176f7f99100SPavel Tatashin static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node) 177f7f99100SPavel Tatashin { 178f7f99100SPavel Tatashin void *p = vmemmap_alloc_block(size, node); 179f7f99100SPavel Tatashin 180f7f99100SPavel Tatashin if (!p) 181f7f99100SPavel Tatashin return NULL; 182f7f99100SPavel Tatashin memset(p, 0, size); 183f7f99100SPavel Tatashin 184f7f99100SPavel Tatashin return p; 185f7f99100SPavel Tatashin } 186f7f99100SPavel Tatashin 18729c71111SAndy Whitcroft pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) 1888f6aac41SChristoph Lameter { 18929c71111SAndy Whitcroft pmd_t *pmd = pmd_offset(pud, addr); 1908f6aac41SChristoph Lameter if (pmd_none(*pmd)) { 191f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); 1928f6aac41SChristoph Lameter if (!p) 1939dce07f1SAl Viro return NULL; 1948f6aac41SChristoph Lameter pmd_populate_kernel(&init_mm, pmd, p); 1958f6aac41SChristoph Lameter } 19629c71111SAndy Whitcroft return pmd; 1978f6aac41SChristoph Lameter } 1988f6aac41SChristoph Lameter 1997b09f5afSFeiyang Chen void __weak __meminit pmd_init(void *addr) 2007b09f5afSFeiyang Chen { 2017b09f5afSFeiyang Chen } 2027b09f5afSFeiyang Chen 203c2febafcSKirill A. Shutemov pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) 2048f6aac41SChristoph Lameter { 205c2febafcSKirill A. Shutemov pud_t *pud = pud_offset(p4d, addr); 2068f6aac41SChristoph Lameter if (pud_none(*pud)) { 207f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); 2088f6aac41SChristoph Lameter if (!p) 2099dce07f1SAl Viro return NULL; 2107b09f5afSFeiyang Chen pmd_init(p); 2118f6aac41SChristoph Lameter pud_populate(&init_mm, pud, p); 2128f6aac41SChristoph Lameter } 21329c71111SAndy Whitcroft return pud; 2148f6aac41SChristoph Lameter } 2158f6aac41SChristoph Lameter 2167b09f5afSFeiyang Chen void __weak __meminit pud_init(void *addr) 2177b09f5afSFeiyang Chen { 2187b09f5afSFeiyang Chen } 2197b09f5afSFeiyang Chen 220c2febafcSKirill A. Shutemov p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) 221c2febafcSKirill A. Shutemov { 222c2febafcSKirill A. Shutemov p4d_t *p4d = p4d_offset(pgd, addr); 223c2febafcSKirill A. Shutemov if (p4d_none(*p4d)) { 224f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); 225c2febafcSKirill A. Shutemov if (!p) 226c2febafcSKirill A. Shutemov return NULL; 2277b09f5afSFeiyang Chen pud_init(p); 228c2febafcSKirill A. Shutemov p4d_populate(&init_mm, p4d, p); 229c2febafcSKirill A. Shutemov } 230c2febafcSKirill A. Shutemov return p4d; 231c2febafcSKirill A. Shutemov } 232c2febafcSKirill A. Shutemov 23329c71111SAndy Whitcroft pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) 2348f6aac41SChristoph Lameter { 23529c71111SAndy Whitcroft pgd_t *pgd = pgd_offset_k(addr); 2368f6aac41SChristoph Lameter if (pgd_none(*pgd)) { 237f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); 2388f6aac41SChristoph Lameter if (!p) 2399dce07f1SAl Viro return NULL; 2408f6aac41SChristoph Lameter pgd_populate(&init_mm, pgd, p); 2418f6aac41SChristoph Lameter } 24229c71111SAndy Whitcroft return pgd; 2438f6aac41SChristoph Lameter } 24429c71111SAndy Whitcroft 2452beea70aSJoao Martins static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node, 2464917f55bSJoao Martins struct vmem_altmap *altmap, 2474917f55bSJoao Martins struct page *reuse) 24829c71111SAndy Whitcroft { 24929c71111SAndy Whitcroft pgd_t *pgd; 250c2febafcSKirill A. Shutemov p4d_t *p4d; 25129c71111SAndy Whitcroft pud_t *pud; 25229c71111SAndy Whitcroft pmd_t *pmd; 25329c71111SAndy Whitcroft pte_t *pte; 25429c71111SAndy Whitcroft 25529c71111SAndy Whitcroft pgd = vmemmap_pgd_populate(addr, node); 25629c71111SAndy Whitcroft if (!pgd) 2572beea70aSJoao Martins return NULL; 258c2febafcSKirill A. Shutemov p4d = vmemmap_p4d_populate(pgd, addr, node); 259c2febafcSKirill A. Shutemov if (!p4d) 2602beea70aSJoao Martins return NULL; 261c2febafcSKirill A. Shutemov pud = vmemmap_pud_populate(p4d, addr, node); 26229c71111SAndy Whitcroft if (!pud) 2632beea70aSJoao Martins return NULL; 26429c71111SAndy Whitcroft pmd = vmemmap_pmd_populate(pud, addr, node); 26529c71111SAndy Whitcroft if (!pmd) 2662beea70aSJoao Martins return NULL; 2674917f55bSJoao Martins pte = vmemmap_pte_populate(pmd, addr, node, altmap, reuse); 26829c71111SAndy Whitcroft if (!pte) 2692beea70aSJoao Martins return NULL; 27029c71111SAndy Whitcroft vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); 2712beea70aSJoao Martins 2722beea70aSJoao Martins return pte; 2732beea70aSJoao Martins } 2742beea70aSJoao Martins 2752beea70aSJoao Martins static int __meminit vmemmap_populate_range(unsigned long start, 2762beea70aSJoao Martins unsigned long end, int node, 2774917f55bSJoao Martins struct vmem_altmap *altmap, 2784917f55bSJoao Martins struct page *reuse) 2792beea70aSJoao Martins { 2802beea70aSJoao Martins unsigned long addr = start; 2812beea70aSJoao Martins pte_t *pte; 2822beea70aSJoao Martins 2832beea70aSJoao Martins for (; addr < end; addr += PAGE_SIZE) { 2844917f55bSJoao Martins pte = vmemmap_populate_address(addr, node, altmap, reuse); 2852beea70aSJoao Martins if (!pte) 2862beea70aSJoao Martins return -ENOMEM; 2878f6aac41SChristoph Lameter } 28829c71111SAndy Whitcroft 28929c71111SAndy Whitcroft return 0; 29029c71111SAndy Whitcroft } 2918f6aac41SChristoph Lameter 2922beea70aSJoao Martins int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, 2932beea70aSJoao Martins int node, struct vmem_altmap *altmap) 2942beea70aSJoao Martins { 2954917f55bSJoao Martins return vmemmap_populate_range(start, end, node, altmap, NULL); 2964917f55bSJoao Martins } 2974917f55bSJoao Martins 298*2045a3b8SFeiyang Chen void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, 299*2045a3b8SFeiyang Chen unsigned long addr, unsigned long next) 300*2045a3b8SFeiyang Chen { 301*2045a3b8SFeiyang Chen } 302*2045a3b8SFeiyang Chen 303*2045a3b8SFeiyang Chen int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, 304*2045a3b8SFeiyang Chen unsigned long addr, unsigned long next) 305*2045a3b8SFeiyang Chen { 306*2045a3b8SFeiyang Chen return 0; 307*2045a3b8SFeiyang Chen } 308*2045a3b8SFeiyang Chen 309*2045a3b8SFeiyang Chen int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, 310*2045a3b8SFeiyang Chen int node, struct vmem_altmap *altmap) 311*2045a3b8SFeiyang Chen { 312*2045a3b8SFeiyang Chen unsigned long addr; 313*2045a3b8SFeiyang Chen unsigned long next; 314*2045a3b8SFeiyang Chen pgd_t *pgd; 315*2045a3b8SFeiyang Chen p4d_t *p4d; 316*2045a3b8SFeiyang Chen pud_t *pud; 317*2045a3b8SFeiyang Chen pmd_t *pmd; 318*2045a3b8SFeiyang Chen 319*2045a3b8SFeiyang Chen for (addr = start; addr < end; addr = next) { 320*2045a3b8SFeiyang Chen next = pmd_addr_end(addr, end); 321*2045a3b8SFeiyang Chen 322*2045a3b8SFeiyang Chen pgd = vmemmap_pgd_populate(addr, node); 323*2045a3b8SFeiyang Chen if (!pgd) 324*2045a3b8SFeiyang Chen return -ENOMEM; 325*2045a3b8SFeiyang Chen 326*2045a3b8SFeiyang Chen p4d = vmemmap_p4d_populate(pgd, addr, node); 327*2045a3b8SFeiyang Chen if (!p4d) 328*2045a3b8SFeiyang Chen return -ENOMEM; 329*2045a3b8SFeiyang Chen 330*2045a3b8SFeiyang Chen pud = vmemmap_pud_populate(p4d, addr, node); 331*2045a3b8SFeiyang Chen if (!pud) 332*2045a3b8SFeiyang Chen return -ENOMEM; 333*2045a3b8SFeiyang Chen 334*2045a3b8SFeiyang Chen pmd = pmd_offset(pud, addr); 335*2045a3b8SFeiyang Chen if (pmd_none(READ_ONCE(*pmd))) { 336*2045a3b8SFeiyang Chen void *p; 337*2045a3b8SFeiyang Chen 338*2045a3b8SFeiyang Chen p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); 339*2045a3b8SFeiyang Chen if (p) { 340*2045a3b8SFeiyang Chen vmemmap_set_pmd(pmd, p, node, addr, next); 341*2045a3b8SFeiyang Chen continue; 342*2045a3b8SFeiyang Chen } else if (altmap) { 343*2045a3b8SFeiyang Chen /* 344*2045a3b8SFeiyang Chen * No fallback: In any case we care about, the 345*2045a3b8SFeiyang Chen * altmap should be reasonably sized and aligned 346*2045a3b8SFeiyang Chen * such that vmemmap_alloc_block_buf() will always 347*2045a3b8SFeiyang Chen * succeed. For consistency with the PTE case, 348*2045a3b8SFeiyang Chen * return an error here as failure could indicate 349*2045a3b8SFeiyang Chen * a configuration issue with the size of the altmap. 350*2045a3b8SFeiyang Chen */ 351*2045a3b8SFeiyang Chen return -ENOMEM; 352*2045a3b8SFeiyang Chen } 353*2045a3b8SFeiyang Chen } else if (vmemmap_check_pmd(pmd, node, addr, next)) 354*2045a3b8SFeiyang Chen continue; 355*2045a3b8SFeiyang Chen if (vmemmap_populate_basepages(addr, next, node, altmap)) 356*2045a3b8SFeiyang Chen return -ENOMEM; 357*2045a3b8SFeiyang Chen } 358*2045a3b8SFeiyang Chen return 0; 359*2045a3b8SFeiyang Chen } 360*2045a3b8SFeiyang Chen 3614917f55bSJoao Martins /* 3624917f55bSJoao Martins * For compound pages bigger than section size (e.g. x86 1G compound 3634917f55bSJoao Martins * pages with 2M subsection size) fill the rest of sections as tail 3644917f55bSJoao Martins * pages. 3654917f55bSJoao Martins * 3664917f55bSJoao Martins * Note that memremap_pages() resets @nr_range value and will increment 3674917f55bSJoao Martins * it after each range successful onlining. Thus the value or @nr_range 3684917f55bSJoao Martins * at section memmap populate corresponds to the in-progress range 3694917f55bSJoao Martins * being onlined here. 3704917f55bSJoao Martins */ 3714917f55bSJoao Martins static bool __meminit reuse_compound_section(unsigned long start_pfn, 3724917f55bSJoao Martins struct dev_pagemap *pgmap) 3734917f55bSJoao Martins { 3744917f55bSJoao Martins unsigned long nr_pages = pgmap_vmemmap_nr(pgmap); 3754917f55bSJoao Martins unsigned long offset = start_pfn - 3764917f55bSJoao Martins PHYS_PFN(pgmap->ranges[pgmap->nr_range].start); 3774917f55bSJoao Martins 3784917f55bSJoao Martins return !IS_ALIGNED(offset, nr_pages) && nr_pages > PAGES_PER_SUBSECTION; 3794917f55bSJoao Martins } 3804917f55bSJoao Martins 3814917f55bSJoao Martins static pte_t * __meminit compound_section_tail_page(unsigned long addr) 3824917f55bSJoao Martins { 3834917f55bSJoao Martins pte_t *pte; 3844917f55bSJoao Martins 3854917f55bSJoao Martins addr -= PAGE_SIZE; 3864917f55bSJoao Martins 3874917f55bSJoao Martins /* 3884917f55bSJoao Martins * Assuming sections are populated sequentially, the previous section's 3894917f55bSJoao Martins * page data can be reused. 3904917f55bSJoao Martins */ 3914917f55bSJoao Martins pte = pte_offset_kernel(pmd_off_k(addr), addr); 3924917f55bSJoao Martins if (!pte) 3934917f55bSJoao Martins return NULL; 3944917f55bSJoao Martins 3954917f55bSJoao Martins return pte; 3964917f55bSJoao Martins } 3974917f55bSJoao Martins 3984917f55bSJoao Martins static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, 3994917f55bSJoao Martins unsigned long start, 4004917f55bSJoao Martins unsigned long end, int node, 4014917f55bSJoao Martins struct dev_pagemap *pgmap) 4024917f55bSJoao Martins { 4034917f55bSJoao Martins unsigned long size, addr; 4044917f55bSJoao Martins pte_t *pte; 4054917f55bSJoao Martins int rc; 4064917f55bSJoao Martins 4074917f55bSJoao Martins if (reuse_compound_section(start_pfn, pgmap)) { 4084917f55bSJoao Martins pte = compound_section_tail_page(start); 4094917f55bSJoao Martins if (!pte) 4104917f55bSJoao Martins return -ENOMEM; 4114917f55bSJoao Martins 4124917f55bSJoao Martins /* 4134917f55bSJoao Martins * Reuse the page that was populated in the prior iteration 4144917f55bSJoao Martins * with just tail struct pages. 4154917f55bSJoao Martins */ 4164917f55bSJoao Martins return vmemmap_populate_range(start, end, node, NULL, 4174917f55bSJoao Martins pte_page(*pte)); 4184917f55bSJoao Martins } 4194917f55bSJoao Martins 4204917f55bSJoao Martins size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page)); 4214917f55bSJoao Martins for (addr = start; addr < end; addr += size) { 42255896f93SGautam Menghani unsigned long next, last = addr + size; 4234917f55bSJoao Martins 4244917f55bSJoao Martins /* Populate the head page vmemmap page */ 4254917f55bSJoao Martins pte = vmemmap_populate_address(addr, node, NULL, NULL); 4264917f55bSJoao Martins if (!pte) 4274917f55bSJoao Martins return -ENOMEM; 4284917f55bSJoao Martins 4294917f55bSJoao Martins /* Populate the tail pages vmemmap page */ 4304917f55bSJoao Martins next = addr + PAGE_SIZE; 4314917f55bSJoao Martins pte = vmemmap_populate_address(next, node, NULL, NULL); 4324917f55bSJoao Martins if (!pte) 4334917f55bSJoao Martins return -ENOMEM; 4344917f55bSJoao Martins 4354917f55bSJoao Martins /* 4364917f55bSJoao Martins * Reuse the previous page for the rest of tail pages 437ee65728eSMike Rapoport * See layout diagram in Documentation/mm/vmemmap_dedup.rst 4384917f55bSJoao Martins */ 4394917f55bSJoao Martins next += PAGE_SIZE; 4404917f55bSJoao Martins rc = vmemmap_populate_range(next, last, node, NULL, 4414917f55bSJoao Martins pte_page(*pte)); 4424917f55bSJoao Martins if (rc) 4434917f55bSJoao Martins return -ENOMEM; 4444917f55bSJoao Martins } 4454917f55bSJoao Martins 4464917f55bSJoao Martins return 0; 4472beea70aSJoao Martins } 4482beea70aSJoao Martins 449e9c0a3f0SDan Williams struct page * __meminit __populate_section_memmap(unsigned long pfn, 450e3246d8fSJoao Martins unsigned long nr_pages, int nid, struct vmem_altmap *altmap, 451e3246d8fSJoao Martins struct dev_pagemap *pgmap) 4528f6aac41SChristoph Lameter { 4536cda7204SWei Yang unsigned long start = (unsigned long) pfn_to_page(pfn); 4546cda7204SWei Yang unsigned long end = start + nr_pages * sizeof(struct page); 4554917f55bSJoao Martins int r; 4560aad818bSJohannes Weiner 4576cda7204SWei Yang if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) || 4586cda7204SWei Yang !IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION))) 4596cda7204SWei Yang return NULL; 4600aad818bSJohannes Weiner 4614917f55bSJoao Martins if (is_power_of_2(sizeof(struct page)) && 4624917f55bSJoao Martins pgmap && pgmap_vmemmap_nr(pgmap) > 1 && !altmap) 4634917f55bSJoao Martins r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap); 4644917f55bSJoao Martins else 4654917f55bSJoao Martins r = vmemmap_populate(start, end, nid, altmap); 4664917f55bSJoao Martins 4674917f55bSJoao Martins if (r < 0) 4688f6aac41SChristoph Lameter return NULL; 4698f6aac41SChristoph Lameter 470e9c0a3f0SDan Williams return pfn_to_page(pfn); 4718f6aac41SChristoph Lameter } 472