xref: /linux/mm/sparse-vmemmap.c (revision a1c613ae4c322ddd58d5a8539dbfba2a0380a8c0)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
28f6aac41SChristoph Lameter /*
38f6aac41SChristoph Lameter  * Virtual Memory Map support
48f6aac41SChristoph Lameter  *
5cde53535SChristoph Lameter  * (C) 2007 sgi. Christoph Lameter.
68f6aac41SChristoph Lameter  *
78f6aac41SChristoph Lameter  * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
88f6aac41SChristoph Lameter  * virt_to_page, page_address() to be implemented as a base offset
98f6aac41SChristoph Lameter  * calculation without memory access.
108f6aac41SChristoph Lameter  *
118f6aac41SChristoph Lameter  * However, virtual mappings need a page table and TLBs. Many Linux
128f6aac41SChristoph Lameter  * architectures already map their physical space using 1-1 mappings
13b595076aSUwe Kleine-König  * via TLBs. For those arches the virtual memory map is essentially
148f6aac41SChristoph Lameter  * for free if we use the same page size as the 1-1 mappings. In that
158f6aac41SChristoph Lameter  * case the overhead consists of a few additional pages that are
168f6aac41SChristoph Lameter  * allocated to create a view of memory for vmemmap.
178f6aac41SChristoph Lameter  *
1829c71111SAndy Whitcroft  * The architecture is expected to provide a vmemmap_populate() function
1929c71111SAndy Whitcroft  * to instantiate the mapping.
208f6aac41SChristoph Lameter  */
218f6aac41SChristoph Lameter #include <linux/mm.h>
228f6aac41SChristoph Lameter #include <linux/mmzone.h>
2397ad1087SMike Rapoport #include <linux/memblock.h>
244b94ffdcSDan Williams #include <linux/memremap.h>
258f6aac41SChristoph Lameter #include <linux/highmem.h>
265a0e3ad6STejun Heo #include <linux/slab.h>
278f6aac41SChristoph Lameter #include <linux/spinlock.h>
288f6aac41SChristoph Lameter #include <linux/vmalloc.h>
298bca44bbSGlauber de Oliveira Costa #include <linux/sched.h>
30f41f2ed4SMuchun Song 
318f6aac41SChristoph Lameter #include <asm/dma.h>
328f6aac41SChristoph Lameter #include <asm/pgalloc.h>
33ad2fa371SMuchun Song 
348f6aac41SChristoph Lameter /*
358f6aac41SChristoph Lameter  * Allocate a block of memory to be used to back the virtual memory map
368f6aac41SChristoph Lameter  * or to back the page tables that are used to create the mapping.
378f6aac41SChristoph Lameter  * Uses the main allocators if they are available, else bootmem.
388f6aac41SChristoph Lameter  */
39e0dc3a53SKAMEZAWA Hiroyuki 
__earlyonly_bootmem_alloc(int node,unsigned long size,unsigned long align,unsigned long goal)40bd721ea7SFabian Frederick static void * __ref __earlyonly_bootmem_alloc(int node,
41e0dc3a53SKAMEZAWA Hiroyuki 				unsigned long size,
42e0dc3a53SKAMEZAWA Hiroyuki 				unsigned long align,
43e0dc3a53SKAMEZAWA Hiroyuki 				unsigned long goal)
44e0dc3a53SKAMEZAWA Hiroyuki {
45eb31d559SMike Rapoport 	return memblock_alloc_try_nid_raw(size, align, goal,
4697ad1087SMike Rapoport 					       MEMBLOCK_ALLOC_ACCESSIBLE, node);
47e0dc3a53SKAMEZAWA Hiroyuki }
48e0dc3a53SKAMEZAWA Hiroyuki 
vmemmap_alloc_block(unsigned long size,int node)498f6aac41SChristoph Lameter void * __meminit vmemmap_alloc_block(unsigned long size, int node)
508f6aac41SChristoph Lameter {
518f6aac41SChristoph Lameter 	/* If the main allocator is up use that, fallback to bootmem. */
528f6aac41SChristoph Lameter 	if (slab_is_available()) {
53fcdaf842SMichal Hocko 		gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
54fcdaf842SMichal Hocko 		int order = get_order(size);
55fcdaf842SMichal Hocko 		static bool warned;
56f52407ceSShaohua Li 		struct page *page;
57f52407ceSShaohua Li 
58fcdaf842SMichal Hocko 		page = alloc_pages_node(node, gfp_mask, order);
598f6aac41SChristoph Lameter 		if (page)
608f6aac41SChristoph Lameter 			return page_address(page);
61fcdaf842SMichal Hocko 
62fcdaf842SMichal Hocko 		if (!warned) {
63fcdaf842SMichal Hocko 			warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
64fcdaf842SMichal Hocko 				   "vmemmap alloc failure: order:%u", order);
65fcdaf842SMichal Hocko 			warned = true;
66fcdaf842SMichal Hocko 		}
678f6aac41SChristoph Lameter 		return NULL;
688f6aac41SChristoph Lameter 	} else
69e0dc3a53SKAMEZAWA Hiroyuki 		return __earlyonly_bootmem_alloc(node, size, size,
708f6aac41SChristoph Lameter 				__pa(MAX_DMA_ADDRESS));
718f6aac41SChristoph Lameter }
728f6aac41SChristoph Lameter 
7356993b4eSAnshuman Khandual static void * __meminit altmap_alloc_block_buf(unsigned long size,
7456993b4eSAnshuman Khandual 					       struct vmem_altmap *altmap);
759bdac914SYinghai Lu 
7656993b4eSAnshuman Khandual /* need to make sure size is all the same during early stage */
vmemmap_alloc_block_buf(unsigned long size,int node,struct vmem_altmap * altmap)7756993b4eSAnshuman Khandual void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node,
7856993b4eSAnshuman Khandual 					 struct vmem_altmap *altmap)
7956993b4eSAnshuman Khandual {
8056993b4eSAnshuman Khandual 	void *ptr;
8156993b4eSAnshuman Khandual 
8256993b4eSAnshuman Khandual 	if (altmap)
8356993b4eSAnshuman Khandual 		return altmap_alloc_block_buf(size, altmap);
8456993b4eSAnshuman Khandual 
8556993b4eSAnshuman Khandual 	ptr = sparse_buffer_alloc(size);
8635fd1eb1SPavel Tatashin 	if (!ptr)
8735fd1eb1SPavel Tatashin 		ptr = vmemmap_alloc_block(size, node);
889bdac914SYinghai Lu 	return ptr;
899bdac914SYinghai Lu }
909bdac914SYinghai Lu 
vmem_altmap_next_pfn(struct vmem_altmap * altmap)914b94ffdcSDan Williams static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
924b94ffdcSDan Williams {
934b94ffdcSDan Williams 	return altmap->base_pfn + altmap->reserve + altmap->alloc
944b94ffdcSDan Williams 		+ altmap->align;
954b94ffdcSDan Williams }
964b94ffdcSDan Williams 
vmem_altmap_nr_free(struct vmem_altmap * altmap)974b94ffdcSDan Williams static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
984b94ffdcSDan Williams {
994b94ffdcSDan Williams 	unsigned long allocated = altmap->alloc + altmap->align;
1004b94ffdcSDan Williams 
1014b94ffdcSDan Williams 	if (altmap->free > allocated)
1024b94ffdcSDan Williams 		return altmap->free - allocated;
1034b94ffdcSDan Williams 	return 0;
1044b94ffdcSDan Williams }
1054b94ffdcSDan Williams 
altmap_alloc_block_buf(unsigned long size,struct vmem_altmap * altmap)10656993b4eSAnshuman Khandual static void * __meminit altmap_alloc_block_buf(unsigned long size,
1074b94ffdcSDan Williams 					       struct vmem_altmap *altmap)
1084b94ffdcSDan Williams {
109eb804533SChristoph Hellwig 	unsigned long pfn, nr_pfns, nr_align;
1104b94ffdcSDan Williams 
1114b94ffdcSDan Williams 	if (size & ~PAGE_MASK) {
1124b94ffdcSDan Williams 		pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
1134b94ffdcSDan Williams 				__func__, size);
1144b94ffdcSDan Williams 		return NULL;
1154b94ffdcSDan Williams 	}
1164b94ffdcSDan Williams 
117eb804533SChristoph Hellwig 	pfn = vmem_altmap_next_pfn(altmap);
1184b94ffdcSDan Williams 	nr_pfns = size >> PAGE_SHIFT;
119eb804533SChristoph Hellwig 	nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
120eb804533SChristoph Hellwig 	nr_align = ALIGN(pfn, nr_align) - pfn;
121eb804533SChristoph Hellwig 	if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
122eb804533SChristoph Hellwig 		return NULL;
123eb804533SChristoph Hellwig 
124eb804533SChristoph Hellwig 	altmap->alloc += nr_pfns;
125eb804533SChristoph Hellwig 	altmap->align += nr_align;
126eb804533SChristoph Hellwig 	pfn += nr_align;
127eb804533SChristoph Hellwig 
1284b94ffdcSDan Williams 	pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
1294b94ffdcSDan Williams 			__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
130eb804533SChristoph Hellwig 	return __va(__pfn_to_phys(pfn));
1314b94ffdcSDan Williams }
1324b94ffdcSDan Williams 
vmemmap_verify(pte_t * pte,int node,unsigned long start,unsigned long end)1338f6aac41SChristoph Lameter void __meminit vmemmap_verify(pte_t *pte, int node,
1348f6aac41SChristoph Lameter 				unsigned long start, unsigned long end)
1358f6aac41SChristoph Lameter {
136c33c7948SRyan Roberts 	unsigned long pfn = pte_pfn(ptep_get(pte));
1378f6aac41SChristoph Lameter 	int actual_node = early_pfn_to_nid(pfn);
1388f6aac41SChristoph Lameter 
139b41ad14cSDavid Rientjes 	if (node_distance(actual_node, node) > LOCAL_DISTANCE)
140abd62377SMa Wupeng 		pr_warn_once("[%lx-%lx] potential offnode page_structs\n",
141756a025fSJoe Perches 			start, end - 1);
1428f6aac41SChristoph Lameter }
1438f6aac41SChristoph Lameter 
vmemmap_pte_populate(pmd_t * pmd,unsigned long addr,int node,struct vmem_altmap * altmap,struct page * reuse)1441d9cfee7SAnshuman Khandual pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
1454917f55bSJoao Martins 				       struct vmem_altmap *altmap,
1464917f55bSJoao Martins 				       struct page *reuse)
1478f6aac41SChristoph Lameter {
14829c71111SAndy Whitcroft 	pte_t *pte = pte_offset_kernel(pmd, addr);
149c33c7948SRyan Roberts 	if (pte_none(ptep_get(pte))) {
1508f6aac41SChristoph Lameter 		pte_t entry;
1511d9cfee7SAnshuman Khandual 		void *p;
1521d9cfee7SAnshuman Khandual 
1534917f55bSJoao Martins 		if (!reuse) {
15456993b4eSAnshuman Khandual 			p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
1558f6aac41SChristoph Lameter 			if (!p)
1569dce07f1SAl Viro 				return NULL;
1574917f55bSJoao Martins 		} else {
1584917f55bSJoao Martins 			/*
1594917f55bSJoao Martins 			 * When a PTE/PMD entry is freed from the init_mm
160f673bd7cSXueBing Chen 			 * there's a free_pages() call to this page allocated
1614917f55bSJoao Martins 			 * above. Thus this get_page() is paired with the
1624917f55bSJoao Martins 			 * put_page_testzero() on the freeing path.
1634917f55bSJoao Martins 			 * This can only called by certain ZONE_DEVICE path,
1644917f55bSJoao Martins 			 * and through vmemmap_populate_compound_pages() when
1654917f55bSJoao Martins 			 * slab is available.
1664917f55bSJoao Martins 			 */
1674917f55bSJoao Martins 			get_page(reuse);
1684917f55bSJoao Martins 			p = page_to_virt(reuse);
1694917f55bSJoao Martins 		}
17029c71111SAndy Whitcroft 		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
17129c71111SAndy Whitcroft 		set_pte_at(&init_mm, addr, pte, entry);
17229c71111SAndy Whitcroft 	}
17329c71111SAndy Whitcroft 	return pte;
1748f6aac41SChristoph Lameter }
1758f6aac41SChristoph Lameter 
vmemmap_alloc_block_zero(unsigned long size,int node)176f7f99100SPavel Tatashin static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
177f7f99100SPavel Tatashin {
178f7f99100SPavel Tatashin 	void *p = vmemmap_alloc_block(size, node);
179f7f99100SPavel Tatashin 
180f7f99100SPavel Tatashin 	if (!p)
181f7f99100SPavel Tatashin 		return NULL;
182f7f99100SPavel Tatashin 	memset(p, 0, size);
183f7f99100SPavel Tatashin 
184f7f99100SPavel Tatashin 	return p;
185f7f99100SPavel Tatashin }
186f7f99100SPavel Tatashin 
vmemmap_pmd_populate(pud_t * pud,unsigned long addr,int node)18729c71111SAndy Whitcroft pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
1888f6aac41SChristoph Lameter {
18929c71111SAndy Whitcroft 	pmd_t *pmd = pmd_offset(pud, addr);
1908f6aac41SChristoph Lameter 	if (pmd_none(*pmd)) {
191f7f99100SPavel Tatashin 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
1928f6aac41SChristoph Lameter 		if (!p)
1939dce07f1SAl Viro 			return NULL;
1948f6aac41SChristoph Lameter 		pmd_populate_kernel(&init_mm, pmd, p);
1958f6aac41SChristoph Lameter 	}
19629c71111SAndy Whitcroft 	return pmd;
1978f6aac41SChristoph Lameter }
1988f6aac41SChristoph Lameter 
pmd_init(void * addr)1997b09f5afSFeiyang Chen void __weak __meminit pmd_init(void *addr)
2007b09f5afSFeiyang Chen {
2017b09f5afSFeiyang Chen }
2027b09f5afSFeiyang Chen 
vmemmap_pud_populate(p4d_t * p4d,unsigned long addr,int node)203c2febafcSKirill A. Shutemov pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
2048f6aac41SChristoph Lameter {
205c2febafcSKirill A. Shutemov 	pud_t *pud = pud_offset(p4d, addr);
2068f6aac41SChristoph Lameter 	if (pud_none(*pud)) {
207f7f99100SPavel Tatashin 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
2088f6aac41SChristoph Lameter 		if (!p)
2099dce07f1SAl Viro 			return NULL;
2107b09f5afSFeiyang Chen 		pmd_init(p);
2118f6aac41SChristoph Lameter 		pud_populate(&init_mm, pud, p);
2128f6aac41SChristoph Lameter 	}
21329c71111SAndy Whitcroft 	return pud;
2148f6aac41SChristoph Lameter }
2158f6aac41SChristoph Lameter 
pud_init(void * addr)2167b09f5afSFeiyang Chen void __weak __meminit pud_init(void *addr)
2177b09f5afSFeiyang Chen {
2187b09f5afSFeiyang Chen }
2197b09f5afSFeiyang Chen 
vmemmap_p4d_populate(pgd_t * pgd,unsigned long addr,int node)220c2febafcSKirill A. Shutemov p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
221c2febafcSKirill A. Shutemov {
222c2febafcSKirill A. Shutemov 	p4d_t *p4d = p4d_offset(pgd, addr);
223c2febafcSKirill A. Shutemov 	if (p4d_none(*p4d)) {
224f7f99100SPavel Tatashin 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
225c2febafcSKirill A. Shutemov 		if (!p)
226c2febafcSKirill A. Shutemov 			return NULL;
2277b09f5afSFeiyang Chen 		pud_init(p);
228c2febafcSKirill A. Shutemov 		p4d_populate(&init_mm, p4d, p);
229c2febafcSKirill A. Shutemov 	}
230c2febafcSKirill A. Shutemov 	return p4d;
231c2febafcSKirill A. Shutemov }
232c2febafcSKirill A. Shutemov 
vmemmap_pgd_populate(unsigned long addr,int node)23329c71111SAndy Whitcroft pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
2348f6aac41SChristoph Lameter {
23529c71111SAndy Whitcroft 	pgd_t *pgd = pgd_offset_k(addr);
2368f6aac41SChristoph Lameter 	if (pgd_none(*pgd)) {
237f7f99100SPavel Tatashin 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
2388f6aac41SChristoph Lameter 		if (!p)
2399dce07f1SAl Viro 			return NULL;
2408f6aac41SChristoph Lameter 		pgd_populate(&init_mm, pgd, p);
2418f6aac41SChristoph Lameter 	}
24229c71111SAndy Whitcroft 	return pgd;
2438f6aac41SChristoph Lameter }
24429c71111SAndy Whitcroft 
vmemmap_populate_address(unsigned long addr,int node,struct vmem_altmap * altmap,struct page * reuse)2452beea70aSJoao Martins static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node,
2464917f55bSJoao Martins 					      struct vmem_altmap *altmap,
2474917f55bSJoao Martins 					      struct page *reuse)
24829c71111SAndy Whitcroft {
24929c71111SAndy Whitcroft 	pgd_t *pgd;
250c2febafcSKirill A. Shutemov 	p4d_t *p4d;
25129c71111SAndy Whitcroft 	pud_t *pud;
25229c71111SAndy Whitcroft 	pmd_t *pmd;
25329c71111SAndy Whitcroft 	pte_t *pte;
25429c71111SAndy Whitcroft 
25529c71111SAndy Whitcroft 	pgd = vmemmap_pgd_populate(addr, node);
25629c71111SAndy Whitcroft 	if (!pgd)
2572beea70aSJoao Martins 		return NULL;
258c2febafcSKirill A. Shutemov 	p4d = vmemmap_p4d_populate(pgd, addr, node);
259c2febafcSKirill A. Shutemov 	if (!p4d)
2602beea70aSJoao Martins 		return NULL;
261c2febafcSKirill A. Shutemov 	pud = vmemmap_pud_populate(p4d, addr, node);
26229c71111SAndy Whitcroft 	if (!pud)
2632beea70aSJoao Martins 		return NULL;
26429c71111SAndy Whitcroft 	pmd = vmemmap_pmd_populate(pud, addr, node);
26529c71111SAndy Whitcroft 	if (!pmd)
2662beea70aSJoao Martins 		return NULL;
2674917f55bSJoao Martins 	pte = vmemmap_pte_populate(pmd, addr, node, altmap, reuse);
26829c71111SAndy Whitcroft 	if (!pte)
2692beea70aSJoao Martins 		return NULL;
27029c71111SAndy Whitcroft 	vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
2712beea70aSJoao Martins 
2722beea70aSJoao Martins 	return pte;
2732beea70aSJoao Martins }
2742beea70aSJoao Martins 
vmemmap_populate_range(unsigned long start,unsigned long end,int node,struct vmem_altmap * altmap,struct page * reuse)2752beea70aSJoao Martins static int __meminit vmemmap_populate_range(unsigned long start,
2762beea70aSJoao Martins 					    unsigned long end, int node,
2774917f55bSJoao Martins 					    struct vmem_altmap *altmap,
2784917f55bSJoao Martins 					    struct page *reuse)
2792beea70aSJoao Martins {
2802beea70aSJoao Martins 	unsigned long addr = start;
2812beea70aSJoao Martins 	pte_t *pte;
2822beea70aSJoao Martins 
2832beea70aSJoao Martins 	for (; addr < end; addr += PAGE_SIZE) {
2844917f55bSJoao Martins 		pte = vmemmap_populate_address(addr, node, altmap, reuse);
2852beea70aSJoao Martins 		if (!pte)
2862beea70aSJoao Martins 			return -ENOMEM;
2878f6aac41SChristoph Lameter 	}
28829c71111SAndy Whitcroft 
28929c71111SAndy Whitcroft 	return 0;
29029c71111SAndy Whitcroft }
2918f6aac41SChristoph Lameter 
vmemmap_populate_basepages(unsigned long start,unsigned long end,int node,struct vmem_altmap * altmap)2922beea70aSJoao Martins int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
2932beea70aSJoao Martins 					 int node, struct vmem_altmap *altmap)
2942beea70aSJoao Martins {
2954917f55bSJoao Martins 	return vmemmap_populate_range(start, end, node, altmap, NULL);
2964917f55bSJoao Martins }
2974917f55bSJoao Martins 
vmemmap_set_pmd(pmd_t * pmd,void * p,int node,unsigned long addr,unsigned long next)2982045a3b8SFeiyang Chen void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
2992045a3b8SFeiyang Chen 				      unsigned long addr, unsigned long next)
3002045a3b8SFeiyang Chen {
3012045a3b8SFeiyang Chen }
3022045a3b8SFeiyang Chen 
vmemmap_check_pmd(pmd_t * pmd,int node,unsigned long addr,unsigned long next)3032045a3b8SFeiyang Chen int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node,
3042045a3b8SFeiyang Chen 				       unsigned long addr, unsigned long next)
3052045a3b8SFeiyang Chen {
3062045a3b8SFeiyang Chen 	return 0;
3072045a3b8SFeiyang Chen }
3082045a3b8SFeiyang Chen 
vmemmap_populate_hugepages(unsigned long start,unsigned long end,int node,struct vmem_altmap * altmap)3092045a3b8SFeiyang Chen int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
3102045a3b8SFeiyang Chen 					 int node, struct vmem_altmap *altmap)
3112045a3b8SFeiyang Chen {
3122045a3b8SFeiyang Chen 	unsigned long addr;
3132045a3b8SFeiyang Chen 	unsigned long next;
3142045a3b8SFeiyang Chen 	pgd_t *pgd;
3152045a3b8SFeiyang Chen 	p4d_t *p4d;
3162045a3b8SFeiyang Chen 	pud_t *pud;
3172045a3b8SFeiyang Chen 	pmd_t *pmd;
3182045a3b8SFeiyang Chen 
3192045a3b8SFeiyang Chen 	for (addr = start; addr < end; addr = next) {
3202045a3b8SFeiyang Chen 		next = pmd_addr_end(addr, end);
3212045a3b8SFeiyang Chen 
3222045a3b8SFeiyang Chen 		pgd = vmemmap_pgd_populate(addr, node);
3232045a3b8SFeiyang Chen 		if (!pgd)
3242045a3b8SFeiyang Chen 			return -ENOMEM;
3252045a3b8SFeiyang Chen 
3262045a3b8SFeiyang Chen 		p4d = vmemmap_p4d_populate(pgd, addr, node);
3272045a3b8SFeiyang Chen 		if (!p4d)
3282045a3b8SFeiyang Chen 			return -ENOMEM;
3292045a3b8SFeiyang Chen 
3302045a3b8SFeiyang Chen 		pud = vmemmap_pud_populate(p4d, addr, node);
3312045a3b8SFeiyang Chen 		if (!pud)
3322045a3b8SFeiyang Chen 			return -ENOMEM;
3332045a3b8SFeiyang Chen 
3342045a3b8SFeiyang Chen 		pmd = pmd_offset(pud, addr);
3352045a3b8SFeiyang Chen 		if (pmd_none(READ_ONCE(*pmd))) {
3362045a3b8SFeiyang Chen 			void *p;
3372045a3b8SFeiyang Chen 
3382045a3b8SFeiyang Chen 			p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
3392045a3b8SFeiyang Chen 			if (p) {
3402045a3b8SFeiyang Chen 				vmemmap_set_pmd(pmd, p, node, addr, next);
3412045a3b8SFeiyang Chen 				continue;
3422045a3b8SFeiyang Chen 			} else if (altmap) {
3432045a3b8SFeiyang Chen 				/*
3442045a3b8SFeiyang Chen 				 * No fallback: In any case we care about, the
3452045a3b8SFeiyang Chen 				 * altmap should be reasonably sized and aligned
3462045a3b8SFeiyang Chen 				 * such that vmemmap_alloc_block_buf() will always
3472045a3b8SFeiyang Chen 				 * succeed. For consistency with the PTE case,
3482045a3b8SFeiyang Chen 				 * return an error here as failure could indicate
3492045a3b8SFeiyang Chen 				 * a configuration issue with the size of the altmap.
3502045a3b8SFeiyang Chen 				 */
3512045a3b8SFeiyang Chen 				return -ENOMEM;
3522045a3b8SFeiyang Chen 			}
3532045a3b8SFeiyang Chen 		} else if (vmemmap_check_pmd(pmd, node, addr, next))
3542045a3b8SFeiyang Chen 			continue;
3552045a3b8SFeiyang Chen 		if (vmemmap_populate_basepages(addr, next, node, altmap))
3562045a3b8SFeiyang Chen 			return -ENOMEM;
3572045a3b8SFeiyang Chen 	}
3582045a3b8SFeiyang Chen 	return 0;
3592045a3b8SFeiyang Chen }
3602045a3b8SFeiyang Chen 
361*40135fc7SAneesh Kumar K.V #ifndef vmemmap_populate_compound_pages
3624917f55bSJoao Martins /*
3634917f55bSJoao Martins  * For compound pages bigger than section size (e.g. x86 1G compound
3644917f55bSJoao Martins  * pages with 2M subsection size) fill the rest of sections as tail
3654917f55bSJoao Martins  * pages.
3664917f55bSJoao Martins  *
3674917f55bSJoao Martins  * Note that memremap_pages() resets @nr_range value and will increment
3684917f55bSJoao Martins  * it after each range successful onlining. Thus the value or @nr_range
3694917f55bSJoao Martins  * at section memmap populate corresponds to the in-progress range
3704917f55bSJoao Martins  * being onlined here.
3714917f55bSJoao Martins  */
reuse_compound_section(unsigned long start_pfn,struct dev_pagemap * pgmap)3724917f55bSJoao Martins static bool __meminit reuse_compound_section(unsigned long start_pfn,
3734917f55bSJoao Martins 					     struct dev_pagemap *pgmap)
3744917f55bSJoao Martins {
3754917f55bSJoao Martins 	unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
3764917f55bSJoao Martins 	unsigned long offset = start_pfn -
3774917f55bSJoao Martins 		PHYS_PFN(pgmap->ranges[pgmap->nr_range].start);
3784917f55bSJoao Martins 
3794917f55bSJoao Martins 	return !IS_ALIGNED(offset, nr_pages) && nr_pages > PAGES_PER_SUBSECTION;
3804917f55bSJoao Martins }
3814917f55bSJoao Martins 
compound_section_tail_page(unsigned long addr)3824917f55bSJoao Martins static pte_t * __meminit compound_section_tail_page(unsigned long addr)
3834917f55bSJoao Martins {
3844917f55bSJoao Martins 	pte_t *pte;
3854917f55bSJoao Martins 
3864917f55bSJoao Martins 	addr -= PAGE_SIZE;
3874917f55bSJoao Martins 
3884917f55bSJoao Martins 	/*
3894917f55bSJoao Martins 	 * Assuming sections are populated sequentially, the previous section's
3904917f55bSJoao Martins 	 * page data can be reused.
3914917f55bSJoao Martins 	 */
3924917f55bSJoao Martins 	pte = pte_offset_kernel(pmd_off_k(addr), addr);
3934917f55bSJoao Martins 	if (!pte)
3944917f55bSJoao Martins 		return NULL;
3954917f55bSJoao Martins 
3964917f55bSJoao Martins 	return pte;
3974917f55bSJoao Martins }
3984917f55bSJoao Martins 
vmemmap_populate_compound_pages(unsigned long start_pfn,unsigned long start,unsigned long end,int node,struct dev_pagemap * pgmap)3994917f55bSJoao Martins static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
4004917f55bSJoao Martins 						     unsigned long start,
4014917f55bSJoao Martins 						     unsigned long end, int node,
4024917f55bSJoao Martins 						     struct dev_pagemap *pgmap)
4034917f55bSJoao Martins {
4044917f55bSJoao Martins 	unsigned long size, addr;
4054917f55bSJoao Martins 	pte_t *pte;
4064917f55bSJoao Martins 	int rc;
4074917f55bSJoao Martins 
4084917f55bSJoao Martins 	if (reuse_compound_section(start_pfn, pgmap)) {
4094917f55bSJoao Martins 		pte = compound_section_tail_page(start);
4104917f55bSJoao Martins 		if (!pte)
4114917f55bSJoao Martins 			return -ENOMEM;
4124917f55bSJoao Martins 
4134917f55bSJoao Martins 		/*
4144917f55bSJoao Martins 		 * Reuse the page that was populated in the prior iteration
4154917f55bSJoao Martins 		 * with just tail struct pages.
4164917f55bSJoao Martins 		 */
4174917f55bSJoao Martins 		return vmemmap_populate_range(start, end, node, NULL,
418c33c7948SRyan Roberts 					      pte_page(ptep_get(pte)));
4194917f55bSJoao Martins 	}
4204917f55bSJoao Martins 
4214917f55bSJoao Martins 	size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page));
4224917f55bSJoao Martins 	for (addr = start; addr < end; addr += size) {
42355896f93SGautam Menghani 		unsigned long next, last = addr + size;
4244917f55bSJoao Martins 
4254917f55bSJoao Martins 		/* Populate the head page vmemmap page */
4264917f55bSJoao Martins 		pte = vmemmap_populate_address(addr, node, NULL, NULL);
4274917f55bSJoao Martins 		if (!pte)
4284917f55bSJoao Martins 			return -ENOMEM;
4294917f55bSJoao Martins 
4304917f55bSJoao Martins 		/* Populate the tail pages vmemmap page */
4314917f55bSJoao Martins 		next = addr + PAGE_SIZE;
4324917f55bSJoao Martins 		pte = vmemmap_populate_address(next, node, NULL, NULL);
4334917f55bSJoao Martins 		if (!pte)
4344917f55bSJoao Martins 			return -ENOMEM;
4354917f55bSJoao Martins 
4364917f55bSJoao Martins 		/*
4374917f55bSJoao Martins 		 * Reuse the previous page for the rest of tail pages
438ee65728eSMike Rapoport 		 * See layout diagram in Documentation/mm/vmemmap_dedup.rst
4394917f55bSJoao Martins 		 */
4404917f55bSJoao Martins 		next += PAGE_SIZE;
4414917f55bSJoao Martins 		rc = vmemmap_populate_range(next, last, node, NULL,
442c33c7948SRyan Roberts 					    pte_page(ptep_get(pte)));
4434917f55bSJoao Martins 		if (rc)
4444917f55bSJoao Martins 			return -ENOMEM;
4454917f55bSJoao Martins 	}
4464917f55bSJoao Martins 
4474917f55bSJoao Martins 	return 0;
4482beea70aSJoao Martins }
4492beea70aSJoao Martins 
450*40135fc7SAneesh Kumar K.V #endif
451*40135fc7SAneesh Kumar K.V 
__populate_section_memmap(unsigned long pfn,unsigned long nr_pages,int nid,struct vmem_altmap * altmap,struct dev_pagemap * pgmap)452e9c0a3f0SDan Williams struct page * __meminit __populate_section_memmap(unsigned long pfn,
453e3246d8fSJoao Martins 		unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
454e3246d8fSJoao Martins 		struct dev_pagemap *pgmap)
4558f6aac41SChristoph Lameter {
4566cda7204SWei Yang 	unsigned long start = (unsigned long) pfn_to_page(pfn);
4576cda7204SWei Yang 	unsigned long end = start + nr_pages * sizeof(struct page);
4584917f55bSJoao Martins 	int r;
4590aad818bSJohannes Weiner 
4606cda7204SWei Yang 	if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) ||
4616cda7204SWei Yang 		!IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
4626cda7204SWei Yang 		return NULL;
4630aad818bSJohannes Weiner 
46487a7ae75SAneesh Kumar K.V 	if (vmemmap_can_optimize(altmap, pgmap))
4654917f55bSJoao Martins 		r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
4664917f55bSJoao Martins 	else
4674917f55bSJoao Martins 		r = vmemmap_populate(start, end, nid, altmap);
4684917f55bSJoao Martins 
4694917f55bSJoao Martins 	if (r < 0)
4708f6aac41SChristoph Lameter 		return NULL;
4718f6aac41SChristoph Lameter 
472e9c0a3f0SDan Williams 	return pfn_to_page(pfn);
4738f6aac41SChristoph Lameter }
474