1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
28f6aac41SChristoph Lameter /*
38f6aac41SChristoph Lameter * Virtual Memory Map support
48f6aac41SChristoph Lameter *
5cde53535SChristoph Lameter * (C) 2007 sgi. Christoph Lameter.
68f6aac41SChristoph Lameter *
78f6aac41SChristoph Lameter * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
88f6aac41SChristoph Lameter * virt_to_page, page_address() to be implemented as a base offset
98f6aac41SChristoph Lameter * calculation without memory access.
108f6aac41SChristoph Lameter *
118f6aac41SChristoph Lameter * However, virtual mappings need a page table and TLBs. Many Linux
128f6aac41SChristoph Lameter * architectures already map their physical space using 1-1 mappings
13b595076aSUwe Kleine-König * via TLBs. For those arches the virtual memory map is essentially
148f6aac41SChristoph Lameter * for free if we use the same page size as the 1-1 mappings. In that
158f6aac41SChristoph Lameter * case the overhead consists of a few additional pages that are
168f6aac41SChristoph Lameter * allocated to create a view of memory for vmemmap.
178f6aac41SChristoph Lameter *
1829c71111SAndy Whitcroft * The architecture is expected to provide a vmemmap_populate() function
1929c71111SAndy Whitcroft * to instantiate the mapping.
208f6aac41SChristoph Lameter */
218f6aac41SChristoph Lameter #include <linux/mm.h>
228f6aac41SChristoph Lameter #include <linux/mmzone.h>
2397ad1087SMike Rapoport #include <linux/memblock.h>
244b94ffdcSDan Williams #include <linux/memremap.h>
258f6aac41SChristoph Lameter #include <linux/highmem.h>
265a0e3ad6STejun Heo #include <linux/slab.h>
278f6aac41SChristoph Lameter #include <linux/spinlock.h>
288f6aac41SChristoph Lameter #include <linux/vmalloc.h>
298bca44bbSGlauber de Oliveira Costa #include <linux/sched.h>
30f41f2ed4SMuchun Song
318f6aac41SChristoph Lameter #include <asm/dma.h>
328f6aac41SChristoph Lameter #include <asm/pgalloc.h>
33ad2fa371SMuchun Song
348f6aac41SChristoph Lameter /*
358f6aac41SChristoph Lameter * Allocate a block of memory to be used to back the virtual memory map
368f6aac41SChristoph Lameter * or to back the page tables that are used to create the mapping.
378f6aac41SChristoph Lameter * Uses the main allocators if they are available, else bootmem.
388f6aac41SChristoph Lameter */
39e0dc3a53SKAMEZAWA Hiroyuki
__earlyonly_bootmem_alloc(int node,unsigned long size,unsigned long align,unsigned long goal)40bd721ea7SFabian Frederick static void * __ref __earlyonly_bootmem_alloc(int node,
41e0dc3a53SKAMEZAWA Hiroyuki unsigned long size,
42e0dc3a53SKAMEZAWA Hiroyuki unsigned long align,
43e0dc3a53SKAMEZAWA Hiroyuki unsigned long goal)
44e0dc3a53SKAMEZAWA Hiroyuki {
45eb31d559SMike Rapoport return memblock_alloc_try_nid_raw(size, align, goal,
4697ad1087SMike Rapoport MEMBLOCK_ALLOC_ACCESSIBLE, node);
47e0dc3a53SKAMEZAWA Hiroyuki }
48e0dc3a53SKAMEZAWA Hiroyuki
vmemmap_alloc_block(unsigned long size,int node)498f6aac41SChristoph Lameter void * __meminit vmemmap_alloc_block(unsigned long size, int node)
508f6aac41SChristoph Lameter {
518f6aac41SChristoph Lameter /* If the main allocator is up use that, fallback to bootmem. */
528f6aac41SChristoph Lameter if (slab_is_available()) {
53fcdaf842SMichal Hocko gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
54fcdaf842SMichal Hocko int order = get_order(size);
55fcdaf842SMichal Hocko static bool warned;
56f52407ceSShaohua Li struct page *page;
57f52407ceSShaohua Li
58fcdaf842SMichal Hocko page = alloc_pages_node(node, gfp_mask, order);
598f6aac41SChristoph Lameter if (page)
608f6aac41SChristoph Lameter return page_address(page);
61fcdaf842SMichal Hocko
62fcdaf842SMichal Hocko if (!warned) {
63fcdaf842SMichal Hocko warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
64fcdaf842SMichal Hocko "vmemmap alloc failure: order:%u", order);
65fcdaf842SMichal Hocko warned = true;
66fcdaf842SMichal Hocko }
678f6aac41SChristoph Lameter return NULL;
688f6aac41SChristoph Lameter } else
69e0dc3a53SKAMEZAWA Hiroyuki return __earlyonly_bootmem_alloc(node, size, size,
708f6aac41SChristoph Lameter __pa(MAX_DMA_ADDRESS));
718f6aac41SChristoph Lameter }
728f6aac41SChristoph Lameter
7356993b4eSAnshuman Khandual static void * __meminit altmap_alloc_block_buf(unsigned long size,
7456993b4eSAnshuman Khandual struct vmem_altmap *altmap);
759bdac914SYinghai Lu
7656993b4eSAnshuman Khandual /* need to make sure size is all the same during early stage */
vmemmap_alloc_block_buf(unsigned long size,int node,struct vmem_altmap * altmap)7756993b4eSAnshuman Khandual void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node,
7856993b4eSAnshuman Khandual struct vmem_altmap *altmap)
7956993b4eSAnshuman Khandual {
8056993b4eSAnshuman Khandual void *ptr;
8156993b4eSAnshuman Khandual
8256993b4eSAnshuman Khandual if (altmap)
8356993b4eSAnshuman Khandual return altmap_alloc_block_buf(size, altmap);
8456993b4eSAnshuman Khandual
8556993b4eSAnshuman Khandual ptr = sparse_buffer_alloc(size);
8635fd1eb1SPavel Tatashin if (!ptr)
8735fd1eb1SPavel Tatashin ptr = vmemmap_alloc_block(size, node);
889bdac914SYinghai Lu return ptr;
899bdac914SYinghai Lu }
909bdac914SYinghai Lu
vmem_altmap_next_pfn(struct vmem_altmap * altmap)914b94ffdcSDan Williams static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
924b94ffdcSDan Williams {
934b94ffdcSDan Williams return altmap->base_pfn + altmap->reserve + altmap->alloc
944b94ffdcSDan Williams + altmap->align;
954b94ffdcSDan Williams }
964b94ffdcSDan Williams
vmem_altmap_nr_free(struct vmem_altmap * altmap)974b94ffdcSDan Williams static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
984b94ffdcSDan Williams {
994b94ffdcSDan Williams unsigned long allocated = altmap->alloc + altmap->align;
1004b94ffdcSDan Williams
1014b94ffdcSDan Williams if (altmap->free > allocated)
1024b94ffdcSDan Williams return altmap->free - allocated;
1034b94ffdcSDan Williams return 0;
1044b94ffdcSDan Williams }
1054b94ffdcSDan Williams
altmap_alloc_block_buf(unsigned long size,struct vmem_altmap * altmap)10656993b4eSAnshuman Khandual static void * __meminit altmap_alloc_block_buf(unsigned long size,
1074b94ffdcSDan Williams struct vmem_altmap *altmap)
1084b94ffdcSDan Williams {
109eb804533SChristoph Hellwig unsigned long pfn, nr_pfns, nr_align;
1104b94ffdcSDan Williams
1114b94ffdcSDan Williams if (size & ~PAGE_MASK) {
1124b94ffdcSDan Williams pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
1134b94ffdcSDan Williams __func__, size);
1144b94ffdcSDan Williams return NULL;
1154b94ffdcSDan Williams }
1164b94ffdcSDan Williams
117eb804533SChristoph Hellwig pfn = vmem_altmap_next_pfn(altmap);
1184b94ffdcSDan Williams nr_pfns = size >> PAGE_SHIFT;
119eb804533SChristoph Hellwig nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
120eb804533SChristoph Hellwig nr_align = ALIGN(pfn, nr_align) - pfn;
121eb804533SChristoph Hellwig if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
122eb804533SChristoph Hellwig return NULL;
123eb804533SChristoph Hellwig
124eb804533SChristoph Hellwig altmap->alloc += nr_pfns;
125eb804533SChristoph Hellwig altmap->align += nr_align;
126eb804533SChristoph Hellwig pfn += nr_align;
127eb804533SChristoph Hellwig
1284b94ffdcSDan Williams pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
1294b94ffdcSDan Williams __func__, pfn, altmap->alloc, altmap->align, nr_pfns);
130eb804533SChristoph Hellwig return __va(__pfn_to_phys(pfn));
1314b94ffdcSDan Williams }
1324b94ffdcSDan Williams
vmemmap_verify(pte_t * pte,int node,unsigned long start,unsigned long end)1338f6aac41SChristoph Lameter void __meminit vmemmap_verify(pte_t *pte, int node,
1348f6aac41SChristoph Lameter unsigned long start, unsigned long end)
1358f6aac41SChristoph Lameter {
136c33c7948SRyan Roberts unsigned long pfn = pte_pfn(ptep_get(pte));
1378f6aac41SChristoph Lameter int actual_node = early_pfn_to_nid(pfn);
1388f6aac41SChristoph Lameter
139b41ad14cSDavid Rientjes if (node_distance(actual_node, node) > LOCAL_DISTANCE)
140abd62377SMa Wupeng pr_warn_once("[%lx-%lx] potential offnode page_structs\n",
141756a025fSJoe Perches start, end - 1);
1428f6aac41SChristoph Lameter }
1438f6aac41SChristoph Lameter
vmemmap_pte_populate(pmd_t * pmd,unsigned long addr,int node,struct vmem_altmap * altmap,struct page * reuse)1441d9cfee7SAnshuman Khandual pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
1454917f55bSJoao Martins struct vmem_altmap *altmap,
1464917f55bSJoao Martins struct page *reuse)
1478f6aac41SChristoph Lameter {
14829c71111SAndy Whitcroft pte_t *pte = pte_offset_kernel(pmd, addr);
149c33c7948SRyan Roberts if (pte_none(ptep_get(pte))) {
1508f6aac41SChristoph Lameter pte_t entry;
1511d9cfee7SAnshuman Khandual void *p;
1521d9cfee7SAnshuman Khandual
1534917f55bSJoao Martins if (!reuse) {
15456993b4eSAnshuman Khandual p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
1558f6aac41SChristoph Lameter if (!p)
1569dce07f1SAl Viro return NULL;
1574917f55bSJoao Martins } else {
1584917f55bSJoao Martins /*
1594917f55bSJoao Martins * When a PTE/PMD entry is freed from the init_mm
160f673bd7cSXueBing Chen * there's a free_pages() call to this page allocated
1614917f55bSJoao Martins * above. Thus this get_page() is paired with the
1624917f55bSJoao Martins * put_page_testzero() on the freeing path.
1634917f55bSJoao Martins * This can only called by certain ZONE_DEVICE path,
1644917f55bSJoao Martins * and through vmemmap_populate_compound_pages() when
1654917f55bSJoao Martins * slab is available.
1664917f55bSJoao Martins */
1674917f55bSJoao Martins get_page(reuse);
1684917f55bSJoao Martins p = page_to_virt(reuse);
1694917f55bSJoao Martins }
17029c71111SAndy Whitcroft entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
17129c71111SAndy Whitcroft set_pte_at(&init_mm, addr, pte, entry);
17229c71111SAndy Whitcroft }
17329c71111SAndy Whitcroft return pte;
1748f6aac41SChristoph Lameter }
1758f6aac41SChristoph Lameter
vmemmap_alloc_block_zero(unsigned long size,int node)176f7f99100SPavel Tatashin static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
177f7f99100SPavel Tatashin {
178f7f99100SPavel Tatashin void *p = vmemmap_alloc_block(size, node);
179f7f99100SPavel Tatashin
180f7f99100SPavel Tatashin if (!p)
181f7f99100SPavel Tatashin return NULL;
182f7f99100SPavel Tatashin memset(p, 0, size);
183f7f99100SPavel Tatashin
184f7f99100SPavel Tatashin return p;
185f7f99100SPavel Tatashin }
186f7f99100SPavel Tatashin
vmemmap_pmd_populate(pud_t * pud,unsigned long addr,int node)18729c71111SAndy Whitcroft pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
1888f6aac41SChristoph Lameter {
18929c71111SAndy Whitcroft pmd_t *pmd = pmd_offset(pud, addr);
1908f6aac41SChristoph Lameter if (pmd_none(*pmd)) {
191f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
1928f6aac41SChristoph Lameter if (!p)
1939dce07f1SAl Viro return NULL;
1948f6aac41SChristoph Lameter pmd_populate_kernel(&init_mm, pmd, p);
1958f6aac41SChristoph Lameter }
19629c71111SAndy Whitcroft return pmd;
1978f6aac41SChristoph Lameter }
1988f6aac41SChristoph Lameter
pmd_init(void * addr)1997b09f5afSFeiyang Chen void __weak __meminit pmd_init(void *addr)
2007b09f5afSFeiyang Chen {
2017b09f5afSFeiyang Chen }
2027b09f5afSFeiyang Chen
vmemmap_pud_populate(p4d_t * p4d,unsigned long addr,int node)203c2febafcSKirill A. Shutemov pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
2048f6aac41SChristoph Lameter {
205c2febafcSKirill A. Shutemov pud_t *pud = pud_offset(p4d, addr);
2068f6aac41SChristoph Lameter if (pud_none(*pud)) {
207f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
2088f6aac41SChristoph Lameter if (!p)
2099dce07f1SAl Viro return NULL;
2107b09f5afSFeiyang Chen pmd_init(p);
2118f6aac41SChristoph Lameter pud_populate(&init_mm, pud, p);
2128f6aac41SChristoph Lameter }
21329c71111SAndy Whitcroft return pud;
2148f6aac41SChristoph Lameter }
2158f6aac41SChristoph Lameter
pud_init(void * addr)2167b09f5afSFeiyang Chen void __weak __meminit pud_init(void *addr)
2177b09f5afSFeiyang Chen {
2187b09f5afSFeiyang Chen }
2197b09f5afSFeiyang Chen
vmemmap_p4d_populate(pgd_t * pgd,unsigned long addr,int node)220c2febafcSKirill A. Shutemov p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
221c2febafcSKirill A. Shutemov {
222c2febafcSKirill A. Shutemov p4d_t *p4d = p4d_offset(pgd, addr);
223c2febafcSKirill A. Shutemov if (p4d_none(*p4d)) {
224f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
225c2febafcSKirill A. Shutemov if (!p)
226c2febafcSKirill A. Shutemov return NULL;
2277b09f5afSFeiyang Chen pud_init(p);
228c2febafcSKirill A. Shutemov p4d_populate(&init_mm, p4d, p);
229c2febafcSKirill A. Shutemov }
230c2febafcSKirill A. Shutemov return p4d;
231c2febafcSKirill A. Shutemov }
232c2febafcSKirill A. Shutemov
vmemmap_pgd_populate(unsigned long addr,int node)23329c71111SAndy Whitcroft pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
2348f6aac41SChristoph Lameter {
23529c71111SAndy Whitcroft pgd_t *pgd = pgd_offset_k(addr);
2368f6aac41SChristoph Lameter if (pgd_none(*pgd)) {
237f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
2388f6aac41SChristoph Lameter if (!p)
2399dce07f1SAl Viro return NULL;
2408f6aac41SChristoph Lameter pgd_populate(&init_mm, pgd, p);
2418f6aac41SChristoph Lameter }
24229c71111SAndy Whitcroft return pgd;
2438f6aac41SChristoph Lameter }
24429c71111SAndy Whitcroft
vmemmap_populate_address(unsigned long addr,int node,struct vmem_altmap * altmap,struct page * reuse)2452beea70aSJoao Martins static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node,
2464917f55bSJoao Martins struct vmem_altmap *altmap,
2474917f55bSJoao Martins struct page *reuse)
24829c71111SAndy Whitcroft {
24929c71111SAndy Whitcroft pgd_t *pgd;
250c2febafcSKirill A. Shutemov p4d_t *p4d;
25129c71111SAndy Whitcroft pud_t *pud;
25229c71111SAndy Whitcroft pmd_t *pmd;
25329c71111SAndy Whitcroft pte_t *pte;
25429c71111SAndy Whitcroft
25529c71111SAndy Whitcroft pgd = vmemmap_pgd_populate(addr, node);
25629c71111SAndy Whitcroft if (!pgd)
2572beea70aSJoao Martins return NULL;
258c2febafcSKirill A. Shutemov p4d = vmemmap_p4d_populate(pgd, addr, node);
259c2febafcSKirill A. Shutemov if (!p4d)
2602beea70aSJoao Martins return NULL;
261c2febafcSKirill A. Shutemov pud = vmemmap_pud_populate(p4d, addr, node);
26229c71111SAndy Whitcroft if (!pud)
2632beea70aSJoao Martins return NULL;
26429c71111SAndy Whitcroft pmd = vmemmap_pmd_populate(pud, addr, node);
26529c71111SAndy Whitcroft if (!pmd)
2662beea70aSJoao Martins return NULL;
2674917f55bSJoao Martins pte = vmemmap_pte_populate(pmd, addr, node, altmap, reuse);
26829c71111SAndy Whitcroft if (!pte)
2692beea70aSJoao Martins return NULL;
27029c71111SAndy Whitcroft vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
2712beea70aSJoao Martins
2722beea70aSJoao Martins return pte;
2732beea70aSJoao Martins }
2742beea70aSJoao Martins
vmemmap_populate_range(unsigned long start,unsigned long end,int node,struct vmem_altmap * altmap,struct page * reuse)2752beea70aSJoao Martins static int __meminit vmemmap_populate_range(unsigned long start,
2762beea70aSJoao Martins unsigned long end, int node,
2774917f55bSJoao Martins struct vmem_altmap *altmap,
2784917f55bSJoao Martins struct page *reuse)
2792beea70aSJoao Martins {
2802beea70aSJoao Martins unsigned long addr = start;
2812beea70aSJoao Martins pte_t *pte;
2822beea70aSJoao Martins
2832beea70aSJoao Martins for (; addr < end; addr += PAGE_SIZE) {
2844917f55bSJoao Martins pte = vmemmap_populate_address(addr, node, altmap, reuse);
2852beea70aSJoao Martins if (!pte)
2862beea70aSJoao Martins return -ENOMEM;
2878f6aac41SChristoph Lameter }
28829c71111SAndy Whitcroft
28929c71111SAndy Whitcroft return 0;
29029c71111SAndy Whitcroft }
2918f6aac41SChristoph Lameter
vmemmap_populate_basepages(unsigned long start,unsigned long end,int node,struct vmem_altmap * altmap)2922beea70aSJoao Martins int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
2932beea70aSJoao Martins int node, struct vmem_altmap *altmap)
2942beea70aSJoao Martins {
2954917f55bSJoao Martins return vmemmap_populate_range(start, end, node, altmap, NULL);
2964917f55bSJoao Martins }
2974917f55bSJoao Martins
vmemmap_set_pmd(pmd_t * pmd,void * p,int node,unsigned long addr,unsigned long next)2982045a3b8SFeiyang Chen void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
2992045a3b8SFeiyang Chen unsigned long addr, unsigned long next)
3002045a3b8SFeiyang Chen {
3012045a3b8SFeiyang Chen }
3022045a3b8SFeiyang Chen
vmemmap_check_pmd(pmd_t * pmd,int node,unsigned long addr,unsigned long next)3032045a3b8SFeiyang Chen int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node,
3042045a3b8SFeiyang Chen unsigned long addr, unsigned long next)
3052045a3b8SFeiyang Chen {
3062045a3b8SFeiyang Chen return 0;
3072045a3b8SFeiyang Chen }
3082045a3b8SFeiyang Chen
vmemmap_populate_hugepages(unsigned long start,unsigned long end,int node,struct vmem_altmap * altmap)3092045a3b8SFeiyang Chen int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
3102045a3b8SFeiyang Chen int node, struct vmem_altmap *altmap)
3112045a3b8SFeiyang Chen {
3122045a3b8SFeiyang Chen unsigned long addr;
3132045a3b8SFeiyang Chen unsigned long next;
3142045a3b8SFeiyang Chen pgd_t *pgd;
3152045a3b8SFeiyang Chen p4d_t *p4d;
3162045a3b8SFeiyang Chen pud_t *pud;
3172045a3b8SFeiyang Chen pmd_t *pmd;
3182045a3b8SFeiyang Chen
3192045a3b8SFeiyang Chen for (addr = start; addr < end; addr = next) {
3202045a3b8SFeiyang Chen next = pmd_addr_end(addr, end);
3212045a3b8SFeiyang Chen
3222045a3b8SFeiyang Chen pgd = vmemmap_pgd_populate(addr, node);
3232045a3b8SFeiyang Chen if (!pgd)
3242045a3b8SFeiyang Chen return -ENOMEM;
3252045a3b8SFeiyang Chen
3262045a3b8SFeiyang Chen p4d = vmemmap_p4d_populate(pgd, addr, node);
3272045a3b8SFeiyang Chen if (!p4d)
3282045a3b8SFeiyang Chen return -ENOMEM;
3292045a3b8SFeiyang Chen
3302045a3b8SFeiyang Chen pud = vmemmap_pud_populate(p4d, addr, node);
3312045a3b8SFeiyang Chen if (!pud)
3322045a3b8SFeiyang Chen return -ENOMEM;
3332045a3b8SFeiyang Chen
3342045a3b8SFeiyang Chen pmd = pmd_offset(pud, addr);
3352045a3b8SFeiyang Chen if (pmd_none(READ_ONCE(*pmd))) {
3362045a3b8SFeiyang Chen void *p;
3372045a3b8SFeiyang Chen
3382045a3b8SFeiyang Chen p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
3392045a3b8SFeiyang Chen if (p) {
3402045a3b8SFeiyang Chen vmemmap_set_pmd(pmd, p, node, addr, next);
3412045a3b8SFeiyang Chen continue;
3422045a3b8SFeiyang Chen } else if (altmap) {
3432045a3b8SFeiyang Chen /*
3442045a3b8SFeiyang Chen * No fallback: In any case we care about, the
3452045a3b8SFeiyang Chen * altmap should be reasonably sized and aligned
3462045a3b8SFeiyang Chen * such that vmemmap_alloc_block_buf() will always
3472045a3b8SFeiyang Chen * succeed. For consistency with the PTE case,
3482045a3b8SFeiyang Chen * return an error here as failure could indicate
3492045a3b8SFeiyang Chen * a configuration issue with the size of the altmap.
3502045a3b8SFeiyang Chen */
3512045a3b8SFeiyang Chen return -ENOMEM;
3522045a3b8SFeiyang Chen }
3532045a3b8SFeiyang Chen } else if (vmemmap_check_pmd(pmd, node, addr, next))
3542045a3b8SFeiyang Chen continue;
3552045a3b8SFeiyang Chen if (vmemmap_populate_basepages(addr, next, node, altmap))
3562045a3b8SFeiyang Chen return -ENOMEM;
3572045a3b8SFeiyang Chen }
3582045a3b8SFeiyang Chen return 0;
3592045a3b8SFeiyang Chen }
3602045a3b8SFeiyang Chen
361*40135fc7SAneesh Kumar K.V #ifndef vmemmap_populate_compound_pages
3624917f55bSJoao Martins /*
3634917f55bSJoao Martins * For compound pages bigger than section size (e.g. x86 1G compound
3644917f55bSJoao Martins * pages with 2M subsection size) fill the rest of sections as tail
3654917f55bSJoao Martins * pages.
3664917f55bSJoao Martins *
3674917f55bSJoao Martins * Note that memremap_pages() resets @nr_range value and will increment
3684917f55bSJoao Martins * it after each range successful onlining. Thus the value or @nr_range
3694917f55bSJoao Martins * at section memmap populate corresponds to the in-progress range
3704917f55bSJoao Martins * being onlined here.
3714917f55bSJoao Martins */
reuse_compound_section(unsigned long start_pfn,struct dev_pagemap * pgmap)3724917f55bSJoao Martins static bool __meminit reuse_compound_section(unsigned long start_pfn,
3734917f55bSJoao Martins struct dev_pagemap *pgmap)
3744917f55bSJoao Martins {
3754917f55bSJoao Martins unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
3764917f55bSJoao Martins unsigned long offset = start_pfn -
3774917f55bSJoao Martins PHYS_PFN(pgmap->ranges[pgmap->nr_range].start);
3784917f55bSJoao Martins
3794917f55bSJoao Martins return !IS_ALIGNED(offset, nr_pages) && nr_pages > PAGES_PER_SUBSECTION;
3804917f55bSJoao Martins }
3814917f55bSJoao Martins
compound_section_tail_page(unsigned long addr)3824917f55bSJoao Martins static pte_t * __meminit compound_section_tail_page(unsigned long addr)
3834917f55bSJoao Martins {
3844917f55bSJoao Martins pte_t *pte;
3854917f55bSJoao Martins
3864917f55bSJoao Martins addr -= PAGE_SIZE;
3874917f55bSJoao Martins
3884917f55bSJoao Martins /*
3894917f55bSJoao Martins * Assuming sections are populated sequentially, the previous section's
3904917f55bSJoao Martins * page data can be reused.
3914917f55bSJoao Martins */
3924917f55bSJoao Martins pte = pte_offset_kernel(pmd_off_k(addr), addr);
3934917f55bSJoao Martins if (!pte)
3944917f55bSJoao Martins return NULL;
3954917f55bSJoao Martins
3964917f55bSJoao Martins return pte;
3974917f55bSJoao Martins }
3984917f55bSJoao Martins
vmemmap_populate_compound_pages(unsigned long start_pfn,unsigned long start,unsigned long end,int node,struct dev_pagemap * pgmap)3994917f55bSJoao Martins static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
4004917f55bSJoao Martins unsigned long start,
4014917f55bSJoao Martins unsigned long end, int node,
4024917f55bSJoao Martins struct dev_pagemap *pgmap)
4034917f55bSJoao Martins {
4044917f55bSJoao Martins unsigned long size, addr;
4054917f55bSJoao Martins pte_t *pte;
4064917f55bSJoao Martins int rc;
4074917f55bSJoao Martins
4084917f55bSJoao Martins if (reuse_compound_section(start_pfn, pgmap)) {
4094917f55bSJoao Martins pte = compound_section_tail_page(start);
4104917f55bSJoao Martins if (!pte)
4114917f55bSJoao Martins return -ENOMEM;
4124917f55bSJoao Martins
4134917f55bSJoao Martins /*
4144917f55bSJoao Martins * Reuse the page that was populated in the prior iteration
4154917f55bSJoao Martins * with just tail struct pages.
4164917f55bSJoao Martins */
4174917f55bSJoao Martins return vmemmap_populate_range(start, end, node, NULL,
418c33c7948SRyan Roberts pte_page(ptep_get(pte)));
4194917f55bSJoao Martins }
4204917f55bSJoao Martins
4214917f55bSJoao Martins size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page));
4224917f55bSJoao Martins for (addr = start; addr < end; addr += size) {
42355896f93SGautam Menghani unsigned long next, last = addr + size;
4244917f55bSJoao Martins
4254917f55bSJoao Martins /* Populate the head page vmemmap page */
4264917f55bSJoao Martins pte = vmemmap_populate_address(addr, node, NULL, NULL);
4274917f55bSJoao Martins if (!pte)
4284917f55bSJoao Martins return -ENOMEM;
4294917f55bSJoao Martins
4304917f55bSJoao Martins /* Populate the tail pages vmemmap page */
4314917f55bSJoao Martins next = addr + PAGE_SIZE;
4324917f55bSJoao Martins pte = vmemmap_populate_address(next, node, NULL, NULL);
4334917f55bSJoao Martins if (!pte)
4344917f55bSJoao Martins return -ENOMEM;
4354917f55bSJoao Martins
4364917f55bSJoao Martins /*
4374917f55bSJoao Martins * Reuse the previous page for the rest of tail pages
438ee65728eSMike Rapoport * See layout diagram in Documentation/mm/vmemmap_dedup.rst
4394917f55bSJoao Martins */
4404917f55bSJoao Martins next += PAGE_SIZE;
4414917f55bSJoao Martins rc = vmemmap_populate_range(next, last, node, NULL,
442c33c7948SRyan Roberts pte_page(ptep_get(pte)));
4434917f55bSJoao Martins if (rc)
4444917f55bSJoao Martins return -ENOMEM;
4454917f55bSJoao Martins }
4464917f55bSJoao Martins
4474917f55bSJoao Martins return 0;
4482beea70aSJoao Martins }
4492beea70aSJoao Martins
450*40135fc7SAneesh Kumar K.V #endif
451*40135fc7SAneesh Kumar K.V
__populate_section_memmap(unsigned long pfn,unsigned long nr_pages,int nid,struct vmem_altmap * altmap,struct dev_pagemap * pgmap)452e9c0a3f0SDan Williams struct page * __meminit __populate_section_memmap(unsigned long pfn,
453e3246d8fSJoao Martins unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
454e3246d8fSJoao Martins struct dev_pagemap *pgmap)
4558f6aac41SChristoph Lameter {
4566cda7204SWei Yang unsigned long start = (unsigned long) pfn_to_page(pfn);
4576cda7204SWei Yang unsigned long end = start + nr_pages * sizeof(struct page);
4584917f55bSJoao Martins int r;
4590aad818bSJohannes Weiner
4606cda7204SWei Yang if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) ||
4616cda7204SWei Yang !IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
4626cda7204SWei Yang return NULL;
4630aad818bSJohannes Weiner
46487a7ae75SAneesh Kumar K.V if (vmemmap_can_optimize(altmap, pgmap))
4654917f55bSJoao Martins r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
4664917f55bSJoao Martins else
4674917f55bSJoao Martins r = vmemmap_populate(start, end, nid, altmap);
4684917f55bSJoao Martins
4694917f55bSJoao Martins if (r < 0)
4708f6aac41SChristoph Lameter return NULL;
4718f6aac41SChristoph Lameter
472e9c0a3f0SDan Williams return pfn_to_page(pfn);
4738f6aac41SChristoph Lameter }
474