1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 28f6aac41SChristoph Lameter /* 38f6aac41SChristoph Lameter * Virtual Memory Map support 48f6aac41SChristoph Lameter * 5cde53535SChristoph Lameter * (C) 2007 sgi. Christoph Lameter. 68f6aac41SChristoph Lameter * 78f6aac41SChristoph Lameter * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, 88f6aac41SChristoph Lameter * virt_to_page, page_address() to be implemented as a base offset 98f6aac41SChristoph Lameter * calculation without memory access. 108f6aac41SChristoph Lameter * 118f6aac41SChristoph Lameter * However, virtual mappings need a page table and TLBs. Many Linux 128f6aac41SChristoph Lameter * architectures already map their physical space using 1-1 mappings 13b595076aSUwe Kleine-König * via TLBs. For those arches the virtual memory map is essentially 148f6aac41SChristoph Lameter * for free if we use the same page size as the 1-1 mappings. In that 158f6aac41SChristoph Lameter * case the overhead consists of a few additional pages that are 168f6aac41SChristoph Lameter * allocated to create a view of memory for vmemmap. 178f6aac41SChristoph Lameter * 1829c71111SAndy Whitcroft * The architecture is expected to provide a vmemmap_populate() function 1929c71111SAndy Whitcroft * to instantiate the mapping. 208f6aac41SChristoph Lameter */ 218f6aac41SChristoph Lameter #include <linux/mm.h> 228f6aac41SChristoph Lameter #include <linux/mmzone.h> 2397ad1087SMike Rapoport #include <linux/memblock.h> 244b94ffdcSDan Williams #include <linux/memremap.h> 258f6aac41SChristoph Lameter #include <linux/highmem.h> 265a0e3ad6STejun Heo #include <linux/slab.h> 278f6aac41SChristoph Lameter #include <linux/spinlock.h> 288f6aac41SChristoph Lameter #include <linux/vmalloc.h> 298bca44bbSGlauber de Oliveira Costa #include <linux/sched.h> 30f41f2ed4SMuchun Song #include <linux/pgtable.h> 31f41f2ed4SMuchun Song #include <linux/bootmem_info.h> 32f41f2ed4SMuchun Song 338f6aac41SChristoph Lameter #include <asm/dma.h> 348f6aac41SChristoph Lameter #include <asm/pgalloc.h> 35f41f2ed4SMuchun Song #include <asm/tlbflush.h> 36f41f2ed4SMuchun Song 3747010c04SMuchun Song #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP 38f41f2ed4SMuchun Song /** 39f41f2ed4SMuchun Song * struct vmemmap_remap_walk - walk vmemmap page table 40f41f2ed4SMuchun Song * 41f41f2ed4SMuchun Song * @remap_pte: called for each lowest-level entry (PTE). 423bc2b6a7SMuchun Song * @nr_walked: the number of walked pte. 43f41f2ed4SMuchun Song * @reuse_page: the page which is reused for the tail vmemmap pages. 44f41f2ed4SMuchun Song * @reuse_addr: the virtual address of the @reuse_page page. 45ad2fa371SMuchun Song * @vmemmap_pages: the list head of the vmemmap pages that can be freed 46ad2fa371SMuchun Song * or is mapped from. 47f41f2ed4SMuchun Song */ 48f41f2ed4SMuchun Song struct vmemmap_remap_walk { 49f41f2ed4SMuchun Song void (*remap_pte)(pte_t *pte, unsigned long addr, 50f41f2ed4SMuchun Song struct vmemmap_remap_walk *walk); 513bc2b6a7SMuchun Song unsigned long nr_walked; 52f41f2ed4SMuchun Song struct page *reuse_page; 53f41f2ed4SMuchun Song unsigned long reuse_addr; 54f41f2ed4SMuchun Song struct list_head *vmemmap_pages; 55f41f2ed4SMuchun Song }; 56f41f2ed4SMuchun Song 57d8d55f56SMuchun Song static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) 583bc2b6a7SMuchun Song { 593bc2b6a7SMuchun Song pmd_t __pmd; 603bc2b6a7SMuchun Song int i; 613bc2b6a7SMuchun Song unsigned long addr = start; 623bc2b6a7SMuchun Song struct page *page = pmd_page(*pmd); 633bc2b6a7SMuchun Song pte_t *pgtable = pte_alloc_one_kernel(&init_mm); 643bc2b6a7SMuchun Song 653bc2b6a7SMuchun Song if (!pgtable) 663bc2b6a7SMuchun Song return -ENOMEM; 673bc2b6a7SMuchun Song 683bc2b6a7SMuchun Song pmd_populate_kernel(&init_mm, &__pmd, pgtable); 693bc2b6a7SMuchun Song 703bc2b6a7SMuchun Song for (i = 0; i < PMD_SIZE / PAGE_SIZE; i++, addr += PAGE_SIZE) { 713bc2b6a7SMuchun Song pte_t entry, *pte; 723bc2b6a7SMuchun Song pgprot_t pgprot = PAGE_KERNEL; 733bc2b6a7SMuchun Song 743bc2b6a7SMuchun Song entry = mk_pte(page + i, pgprot); 753bc2b6a7SMuchun Song pte = pte_offset_kernel(&__pmd, addr); 763bc2b6a7SMuchun Song set_pte_at(&init_mm, addr, pte, entry); 773bc2b6a7SMuchun Song } 783bc2b6a7SMuchun Song 79d8d55f56SMuchun Song spin_lock(&init_mm.page_table_lock); 80d8d55f56SMuchun Song if (likely(pmd_leaf(*pmd))) { 81ed33b5a6SQi Zheng /* Make pte visible before pmd. See comment in pmd_install(). */ 823bc2b6a7SMuchun Song smp_wmb(); 833bc2b6a7SMuchun Song pmd_populate_kernel(&init_mm, pmd, pgtable); 843bc2b6a7SMuchun Song flush_tlb_kernel_range(start, start + PMD_SIZE); 85d8d55f56SMuchun Song } else { 86d8d55f56SMuchun Song pte_free_kernel(&init_mm, pgtable); 87d8d55f56SMuchun Song } 88d8d55f56SMuchun Song spin_unlock(&init_mm.page_table_lock); 893bc2b6a7SMuchun Song 903bc2b6a7SMuchun Song return 0; 913bc2b6a7SMuchun Song } 923bc2b6a7SMuchun Song 93d8d55f56SMuchun Song static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) 94d8d55f56SMuchun Song { 95d8d55f56SMuchun Song int leaf; 96d8d55f56SMuchun Song 97d8d55f56SMuchun Song spin_lock(&init_mm.page_table_lock); 98d8d55f56SMuchun Song leaf = pmd_leaf(*pmd); 99d8d55f56SMuchun Song spin_unlock(&init_mm.page_table_lock); 100d8d55f56SMuchun Song 101d8d55f56SMuchun Song if (!leaf) 102d8d55f56SMuchun Song return 0; 103d8d55f56SMuchun Song 104d8d55f56SMuchun Song return __split_vmemmap_huge_pmd(pmd, start); 105d8d55f56SMuchun Song } 106d8d55f56SMuchun Song 107f41f2ed4SMuchun Song static void vmemmap_pte_range(pmd_t *pmd, unsigned long addr, 108f41f2ed4SMuchun Song unsigned long end, 109f41f2ed4SMuchun Song struct vmemmap_remap_walk *walk) 110f41f2ed4SMuchun Song { 111f41f2ed4SMuchun Song pte_t *pte = pte_offset_kernel(pmd, addr); 112f41f2ed4SMuchun Song 113f41f2ed4SMuchun Song /* 114f41f2ed4SMuchun Song * The reuse_page is found 'first' in table walk before we start 115f41f2ed4SMuchun Song * remapping (which is calling @walk->remap_pte). 116f41f2ed4SMuchun Song */ 117f41f2ed4SMuchun Song if (!walk->reuse_page) { 118f41f2ed4SMuchun Song walk->reuse_page = pte_page(*pte); 119f41f2ed4SMuchun Song /* 120f41f2ed4SMuchun Song * Because the reuse address is part of the range that we are 121f41f2ed4SMuchun Song * walking, skip the reuse address range. 122f41f2ed4SMuchun Song */ 123f41f2ed4SMuchun Song addr += PAGE_SIZE; 124f41f2ed4SMuchun Song pte++; 1253bc2b6a7SMuchun Song walk->nr_walked++; 126f41f2ed4SMuchun Song } 127f41f2ed4SMuchun Song 1283bc2b6a7SMuchun Song for (; addr != end; addr += PAGE_SIZE, pte++) { 129f41f2ed4SMuchun Song walk->remap_pte(pte, addr, walk); 1303bc2b6a7SMuchun Song walk->nr_walked++; 1313bc2b6a7SMuchun Song } 132f41f2ed4SMuchun Song } 133f41f2ed4SMuchun Song 1343bc2b6a7SMuchun Song static int vmemmap_pmd_range(pud_t *pud, unsigned long addr, 135f41f2ed4SMuchun Song unsigned long end, 136f41f2ed4SMuchun Song struct vmemmap_remap_walk *walk) 137f41f2ed4SMuchun Song { 138f41f2ed4SMuchun Song pmd_t *pmd; 139f41f2ed4SMuchun Song unsigned long next; 140f41f2ed4SMuchun Song 141f41f2ed4SMuchun Song pmd = pmd_offset(pud, addr); 142f41f2ed4SMuchun Song do { 1433bc2b6a7SMuchun Song int ret; 144f41f2ed4SMuchun Song 145d8d55f56SMuchun Song ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK); 1463bc2b6a7SMuchun Song if (ret) 1473bc2b6a7SMuchun Song return ret; 148d8d55f56SMuchun Song 149f41f2ed4SMuchun Song next = pmd_addr_end(addr, end); 150f41f2ed4SMuchun Song vmemmap_pte_range(pmd, addr, next, walk); 151f41f2ed4SMuchun Song } while (pmd++, addr = next, addr != end); 1523bc2b6a7SMuchun Song 1533bc2b6a7SMuchun Song return 0; 154f41f2ed4SMuchun Song } 155f41f2ed4SMuchun Song 1563bc2b6a7SMuchun Song static int vmemmap_pud_range(p4d_t *p4d, unsigned long addr, 157f41f2ed4SMuchun Song unsigned long end, 158f41f2ed4SMuchun Song struct vmemmap_remap_walk *walk) 159f41f2ed4SMuchun Song { 160f41f2ed4SMuchun Song pud_t *pud; 161f41f2ed4SMuchun Song unsigned long next; 162f41f2ed4SMuchun Song 163f41f2ed4SMuchun Song pud = pud_offset(p4d, addr); 164f41f2ed4SMuchun Song do { 1653bc2b6a7SMuchun Song int ret; 1663bc2b6a7SMuchun Song 167f41f2ed4SMuchun Song next = pud_addr_end(addr, end); 1683bc2b6a7SMuchun Song ret = vmemmap_pmd_range(pud, addr, next, walk); 1693bc2b6a7SMuchun Song if (ret) 1703bc2b6a7SMuchun Song return ret; 171f41f2ed4SMuchun Song } while (pud++, addr = next, addr != end); 1723bc2b6a7SMuchun Song 1733bc2b6a7SMuchun Song return 0; 174f41f2ed4SMuchun Song } 175f41f2ed4SMuchun Song 1763bc2b6a7SMuchun Song static int vmemmap_p4d_range(pgd_t *pgd, unsigned long addr, 177f41f2ed4SMuchun Song unsigned long end, 178f41f2ed4SMuchun Song struct vmemmap_remap_walk *walk) 179f41f2ed4SMuchun Song { 180f41f2ed4SMuchun Song p4d_t *p4d; 181f41f2ed4SMuchun Song unsigned long next; 182f41f2ed4SMuchun Song 183f41f2ed4SMuchun Song p4d = p4d_offset(pgd, addr); 184f41f2ed4SMuchun Song do { 1853bc2b6a7SMuchun Song int ret; 1863bc2b6a7SMuchun Song 187f41f2ed4SMuchun Song next = p4d_addr_end(addr, end); 1883bc2b6a7SMuchun Song ret = vmemmap_pud_range(p4d, addr, next, walk); 1893bc2b6a7SMuchun Song if (ret) 1903bc2b6a7SMuchun Song return ret; 191f41f2ed4SMuchun Song } while (p4d++, addr = next, addr != end); 1923bc2b6a7SMuchun Song 1933bc2b6a7SMuchun Song return 0; 194f41f2ed4SMuchun Song } 195f41f2ed4SMuchun Song 1963bc2b6a7SMuchun Song static int vmemmap_remap_range(unsigned long start, unsigned long end, 197f41f2ed4SMuchun Song struct vmemmap_remap_walk *walk) 198f41f2ed4SMuchun Song { 199f41f2ed4SMuchun Song unsigned long addr = start; 200f41f2ed4SMuchun Song unsigned long next; 201f41f2ed4SMuchun Song pgd_t *pgd; 202f41f2ed4SMuchun Song 2030b82ade6SFanjun Kong VM_BUG_ON(!PAGE_ALIGNED(start)); 2040b82ade6SFanjun Kong VM_BUG_ON(!PAGE_ALIGNED(end)); 205f41f2ed4SMuchun Song 206f41f2ed4SMuchun Song pgd = pgd_offset_k(addr); 207f41f2ed4SMuchun Song do { 2083bc2b6a7SMuchun Song int ret; 2093bc2b6a7SMuchun Song 210f41f2ed4SMuchun Song next = pgd_addr_end(addr, end); 2113bc2b6a7SMuchun Song ret = vmemmap_p4d_range(pgd, addr, next, walk); 2123bc2b6a7SMuchun Song if (ret) 2133bc2b6a7SMuchun Song return ret; 214f41f2ed4SMuchun Song } while (pgd++, addr = next, addr != end); 215f41f2ed4SMuchun Song 216f41f2ed4SMuchun Song /* 217f41f2ed4SMuchun Song * We only change the mapping of the vmemmap virtual address range 218f41f2ed4SMuchun Song * [@start + PAGE_SIZE, end), so we only need to flush the TLB which 219f41f2ed4SMuchun Song * belongs to the range. 220f41f2ed4SMuchun Song */ 221f41f2ed4SMuchun Song flush_tlb_kernel_range(start + PAGE_SIZE, end); 2223bc2b6a7SMuchun Song 2233bc2b6a7SMuchun Song return 0; 224f41f2ed4SMuchun Song } 225f41f2ed4SMuchun Song 226f41f2ed4SMuchun Song /* 227f41f2ed4SMuchun Song * Free a vmemmap page. A vmemmap page can be allocated from the memblock 228f41f2ed4SMuchun Song * allocator or buddy allocator. If the PG_reserved flag is set, it means 229f41f2ed4SMuchun Song * that it allocated from the memblock allocator, just free it via the 230f41f2ed4SMuchun Song * free_bootmem_page(). Otherwise, use __free_page(). 231f41f2ed4SMuchun Song */ 232f41f2ed4SMuchun Song static inline void free_vmemmap_page(struct page *page) 233f41f2ed4SMuchun Song { 234f41f2ed4SMuchun Song if (PageReserved(page)) 235f41f2ed4SMuchun Song free_bootmem_page(page); 236f41f2ed4SMuchun Song else 237f41f2ed4SMuchun Song __free_page(page); 238f41f2ed4SMuchun Song } 239f41f2ed4SMuchun Song 240f41f2ed4SMuchun Song /* Free a list of the vmemmap pages */ 241f41f2ed4SMuchun Song static void free_vmemmap_page_list(struct list_head *list) 242f41f2ed4SMuchun Song { 243f41f2ed4SMuchun Song struct page *page, *next; 244f41f2ed4SMuchun Song 245f41f2ed4SMuchun Song list_for_each_entry_safe(page, next, list, lru) { 246f41f2ed4SMuchun Song list_del(&page->lru); 247f41f2ed4SMuchun Song free_vmemmap_page(page); 248f41f2ed4SMuchun Song } 249f41f2ed4SMuchun Song } 250f41f2ed4SMuchun Song 251f41f2ed4SMuchun Song static void vmemmap_remap_pte(pte_t *pte, unsigned long addr, 252f41f2ed4SMuchun Song struct vmemmap_remap_walk *walk) 253f41f2ed4SMuchun Song { 254f41f2ed4SMuchun Song /* 255f41f2ed4SMuchun Song * Remap the tail pages as read-only to catch illegal write operation 256f41f2ed4SMuchun Song * to the tail pages. 257f41f2ed4SMuchun Song */ 258f41f2ed4SMuchun Song pgprot_t pgprot = PAGE_KERNEL_RO; 259f41f2ed4SMuchun Song pte_t entry = mk_pte(walk->reuse_page, pgprot); 260f41f2ed4SMuchun Song struct page *page = pte_page(*pte); 261f41f2ed4SMuchun Song 2623bc2b6a7SMuchun Song list_add_tail(&page->lru, walk->vmemmap_pages); 263f41f2ed4SMuchun Song set_pte_at(&init_mm, addr, pte, entry); 264f41f2ed4SMuchun Song } 265f41f2ed4SMuchun Song 266e7d32485SMuchun Song /* 267e7d32485SMuchun Song * How many struct page structs need to be reset. When we reuse the head 268e7d32485SMuchun Song * struct page, the special metadata (e.g. page->flags or page->mapping) 269e7d32485SMuchun Song * cannot copy to the tail struct page structs. The invalid value will be 270e7d32485SMuchun Song * checked in the free_tail_pages_check(). In order to avoid the message 271e7d32485SMuchun Song * of "corrupted mapping in tail page". We need to reset at least 3 (one 272e7d32485SMuchun Song * head struct page struct and two tail struct page structs) struct page 273e7d32485SMuchun Song * structs. 274e7d32485SMuchun Song */ 275e7d32485SMuchun Song #define NR_RESET_STRUCT_PAGE 3 276e7d32485SMuchun Song 277e7d32485SMuchun Song static inline void reset_struct_pages(struct page *start) 278e7d32485SMuchun Song { 279e7d32485SMuchun Song int i; 280e7d32485SMuchun Song struct page *from = start + NR_RESET_STRUCT_PAGE; 281e7d32485SMuchun Song 282e7d32485SMuchun Song for (i = 0; i < NR_RESET_STRUCT_PAGE; i++) 283e7d32485SMuchun Song memcpy(start + i, from, sizeof(*from)); 284e7d32485SMuchun Song } 285e7d32485SMuchun Song 2863bc2b6a7SMuchun Song static void vmemmap_restore_pte(pte_t *pte, unsigned long addr, 2873bc2b6a7SMuchun Song struct vmemmap_remap_walk *walk) 2883bc2b6a7SMuchun Song { 2893bc2b6a7SMuchun Song pgprot_t pgprot = PAGE_KERNEL; 2903bc2b6a7SMuchun Song struct page *page; 2913bc2b6a7SMuchun Song void *to; 2923bc2b6a7SMuchun Song 2933bc2b6a7SMuchun Song BUG_ON(pte_page(*pte) != walk->reuse_page); 2943bc2b6a7SMuchun Song 2953bc2b6a7SMuchun Song page = list_first_entry(walk->vmemmap_pages, struct page, lru); 2963bc2b6a7SMuchun Song list_del(&page->lru); 2973bc2b6a7SMuchun Song to = page_to_virt(page); 2983bc2b6a7SMuchun Song copy_page(to, (void *)walk->reuse_addr); 299e7d32485SMuchun Song reset_struct_pages(to); 3003bc2b6a7SMuchun Song 3013bc2b6a7SMuchun Song set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot)); 3023bc2b6a7SMuchun Song } 3033bc2b6a7SMuchun Song 304f41f2ed4SMuchun Song /** 305f41f2ed4SMuchun Song * vmemmap_remap_free - remap the vmemmap virtual address range [@start, @end) 306f41f2ed4SMuchun Song * to the page which @reuse is mapped to, then free vmemmap 307f41f2ed4SMuchun Song * which the range are mapped to. 308f41f2ed4SMuchun Song * @start: start address of the vmemmap virtual address range that we want 309f41f2ed4SMuchun Song * to remap. 310f41f2ed4SMuchun Song * @end: end address of the vmemmap virtual address range that we want to 311f41f2ed4SMuchun Song * remap. 312f41f2ed4SMuchun Song * @reuse: reuse address. 313f41f2ed4SMuchun Song * 3143bc2b6a7SMuchun Song * Return: %0 on success, negative error code otherwise. 315f41f2ed4SMuchun Song */ 3163bc2b6a7SMuchun Song int vmemmap_remap_free(unsigned long start, unsigned long end, 317f41f2ed4SMuchun Song unsigned long reuse) 318f41f2ed4SMuchun Song { 3193bc2b6a7SMuchun Song int ret; 320f41f2ed4SMuchun Song LIST_HEAD(vmemmap_pages); 321f41f2ed4SMuchun Song struct vmemmap_remap_walk walk = { 322f41f2ed4SMuchun Song .remap_pte = vmemmap_remap_pte, 323f41f2ed4SMuchun Song .reuse_addr = reuse, 324f41f2ed4SMuchun Song .vmemmap_pages = &vmemmap_pages, 325f41f2ed4SMuchun Song }; 326f41f2ed4SMuchun Song 327f41f2ed4SMuchun Song /* 328f41f2ed4SMuchun Song * In order to make remapping routine most efficient for the huge pages, 329f41f2ed4SMuchun Song * the routine of vmemmap page table walking has the following rules 330f41f2ed4SMuchun Song * (see more details from the vmemmap_pte_range()): 331f41f2ed4SMuchun Song * 332f41f2ed4SMuchun Song * - The range [@start, @end) and the range [@reuse, @reuse + PAGE_SIZE) 333f41f2ed4SMuchun Song * should be continuous. 334f41f2ed4SMuchun Song * - The @reuse address is part of the range [@reuse, @end) that we are 335f41f2ed4SMuchun Song * walking which is passed to vmemmap_remap_range(). 336f41f2ed4SMuchun Song * - The @reuse address is the first in the complete range. 337f41f2ed4SMuchun Song * 338f41f2ed4SMuchun Song * So we need to make sure that @start and @reuse meet the above rules. 339f41f2ed4SMuchun Song */ 340f41f2ed4SMuchun Song BUG_ON(start - reuse != PAGE_SIZE); 341f41f2ed4SMuchun Song 342d8d55f56SMuchun Song mmap_read_lock(&init_mm); 3433bc2b6a7SMuchun Song ret = vmemmap_remap_range(reuse, end, &walk); 3443bc2b6a7SMuchun Song if (ret && walk.nr_walked) { 3453bc2b6a7SMuchun Song end = reuse + walk.nr_walked * PAGE_SIZE; 3463bc2b6a7SMuchun Song /* 3473bc2b6a7SMuchun Song * vmemmap_pages contains pages from the previous 3483bc2b6a7SMuchun Song * vmemmap_remap_range call which failed. These 3493bc2b6a7SMuchun Song * are pages which were removed from the vmemmap. 3503bc2b6a7SMuchun Song * They will be restored in the following call. 3513bc2b6a7SMuchun Song */ 3523bc2b6a7SMuchun Song walk = (struct vmemmap_remap_walk) { 3533bc2b6a7SMuchun Song .remap_pte = vmemmap_restore_pte, 3543bc2b6a7SMuchun Song .reuse_addr = reuse, 3553bc2b6a7SMuchun Song .vmemmap_pages = &vmemmap_pages, 3563bc2b6a7SMuchun Song }; 3573bc2b6a7SMuchun Song 358f41f2ed4SMuchun Song vmemmap_remap_range(reuse, end, &walk); 359f41f2ed4SMuchun Song } 3603bc2b6a7SMuchun Song mmap_read_unlock(&init_mm); 3618f6aac41SChristoph Lameter 3623bc2b6a7SMuchun Song free_vmemmap_page_list(&vmemmap_pages); 363ad2fa371SMuchun Song 3643bc2b6a7SMuchun Song return ret; 365ad2fa371SMuchun Song } 366ad2fa371SMuchun Song 367ad2fa371SMuchun Song static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, 368ad2fa371SMuchun Song gfp_t gfp_mask, struct list_head *list) 369ad2fa371SMuchun Song { 370ad2fa371SMuchun Song unsigned long nr_pages = (end - start) >> PAGE_SHIFT; 371ad2fa371SMuchun Song int nid = page_to_nid((struct page *)start); 372ad2fa371SMuchun Song struct page *page, *next; 373ad2fa371SMuchun Song 374ad2fa371SMuchun Song while (nr_pages--) { 375ad2fa371SMuchun Song page = alloc_pages_node(nid, gfp_mask, 0); 376ad2fa371SMuchun Song if (!page) 377ad2fa371SMuchun Song goto out; 378ad2fa371SMuchun Song list_add_tail(&page->lru, list); 379ad2fa371SMuchun Song } 380ad2fa371SMuchun Song 381ad2fa371SMuchun Song return 0; 382ad2fa371SMuchun Song out: 383ad2fa371SMuchun Song list_for_each_entry_safe(page, next, list, lru) 384ad2fa371SMuchun Song __free_pages(page, 0); 385ad2fa371SMuchun Song return -ENOMEM; 386ad2fa371SMuchun Song } 387ad2fa371SMuchun Song 388ad2fa371SMuchun Song /** 389ad2fa371SMuchun Song * vmemmap_remap_alloc - remap the vmemmap virtual address range [@start, end) 390ad2fa371SMuchun Song * to the page which is from the @vmemmap_pages 391ad2fa371SMuchun Song * respectively. 392ad2fa371SMuchun Song * @start: start address of the vmemmap virtual address range that we want 393ad2fa371SMuchun Song * to remap. 394ad2fa371SMuchun Song * @end: end address of the vmemmap virtual address range that we want to 395ad2fa371SMuchun Song * remap. 396ad2fa371SMuchun Song * @reuse: reuse address. 397ad2fa371SMuchun Song * @gfp_mask: GFP flag for allocating vmemmap pages. 3983bc2b6a7SMuchun Song * 3993bc2b6a7SMuchun Song * Return: %0 on success, negative error code otherwise. 400ad2fa371SMuchun Song */ 401ad2fa371SMuchun Song int vmemmap_remap_alloc(unsigned long start, unsigned long end, 402ad2fa371SMuchun Song unsigned long reuse, gfp_t gfp_mask) 403ad2fa371SMuchun Song { 404ad2fa371SMuchun Song LIST_HEAD(vmemmap_pages); 405ad2fa371SMuchun Song struct vmemmap_remap_walk walk = { 406ad2fa371SMuchun Song .remap_pte = vmemmap_restore_pte, 407ad2fa371SMuchun Song .reuse_addr = reuse, 408ad2fa371SMuchun Song .vmemmap_pages = &vmemmap_pages, 409ad2fa371SMuchun Song }; 410ad2fa371SMuchun Song 411ad2fa371SMuchun Song /* See the comment in the vmemmap_remap_free(). */ 412ad2fa371SMuchun Song BUG_ON(start - reuse != PAGE_SIZE); 413ad2fa371SMuchun Song 414ad2fa371SMuchun Song if (alloc_vmemmap_page_list(start, end, gfp_mask, &vmemmap_pages)) 415ad2fa371SMuchun Song return -ENOMEM; 416ad2fa371SMuchun Song 4173bc2b6a7SMuchun Song mmap_read_lock(&init_mm); 418ad2fa371SMuchun Song vmemmap_remap_range(reuse, end, &walk); 4193bc2b6a7SMuchun Song mmap_read_unlock(&init_mm); 420ad2fa371SMuchun Song 421ad2fa371SMuchun Song return 0; 422ad2fa371SMuchun Song } 42347010c04SMuchun Song #endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ 424ad2fa371SMuchun Song 4258f6aac41SChristoph Lameter /* 4268f6aac41SChristoph Lameter * Allocate a block of memory to be used to back the virtual memory map 4278f6aac41SChristoph Lameter * or to back the page tables that are used to create the mapping. 4288f6aac41SChristoph Lameter * Uses the main allocators if they are available, else bootmem. 4298f6aac41SChristoph Lameter */ 430e0dc3a53SKAMEZAWA Hiroyuki 431bd721ea7SFabian Frederick static void * __ref __earlyonly_bootmem_alloc(int node, 432e0dc3a53SKAMEZAWA Hiroyuki unsigned long size, 433e0dc3a53SKAMEZAWA Hiroyuki unsigned long align, 434e0dc3a53SKAMEZAWA Hiroyuki unsigned long goal) 435e0dc3a53SKAMEZAWA Hiroyuki { 436eb31d559SMike Rapoport return memblock_alloc_try_nid_raw(size, align, goal, 43797ad1087SMike Rapoport MEMBLOCK_ALLOC_ACCESSIBLE, node); 438e0dc3a53SKAMEZAWA Hiroyuki } 439e0dc3a53SKAMEZAWA Hiroyuki 4408f6aac41SChristoph Lameter void * __meminit vmemmap_alloc_block(unsigned long size, int node) 4418f6aac41SChristoph Lameter { 4428f6aac41SChristoph Lameter /* If the main allocator is up use that, fallback to bootmem. */ 4438f6aac41SChristoph Lameter if (slab_is_available()) { 444fcdaf842SMichal Hocko gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN; 445fcdaf842SMichal Hocko int order = get_order(size); 446fcdaf842SMichal Hocko static bool warned; 447f52407ceSShaohua Li struct page *page; 448f52407ceSShaohua Li 449fcdaf842SMichal Hocko page = alloc_pages_node(node, gfp_mask, order); 4508f6aac41SChristoph Lameter if (page) 4518f6aac41SChristoph Lameter return page_address(page); 452fcdaf842SMichal Hocko 453fcdaf842SMichal Hocko if (!warned) { 454fcdaf842SMichal Hocko warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL, 455fcdaf842SMichal Hocko "vmemmap alloc failure: order:%u", order); 456fcdaf842SMichal Hocko warned = true; 457fcdaf842SMichal Hocko } 4588f6aac41SChristoph Lameter return NULL; 4598f6aac41SChristoph Lameter } else 460e0dc3a53SKAMEZAWA Hiroyuki return __earlyonly_bootmem_alloc(node, size, size, 4618f6aac41SChristoph Lameter __pa(MAX_DMA_ADDRESS)); 4628f6aac41SChristoph Lameter } 4638f6aac41SChristoph Lameter 46456993b4eSAnshuman Khandual static void * __meminit altmap_alloc_block_buf(unsigned long size, 46556993b4eSAnshuman Khandual struct vmem_altmap *altmap); 4669bdac914SYinghai Lu 46756993b4eSAnshuman Khandual /* need to make sure size is all the same during early stage */ 46856993b4eSAnshuman Khandual void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node, 46956993b4eSAnshuman Khandual struct vmem_altmap *altmap) 47056993b4eSAnshuman Khandual { 47156993b4eSAnshuman Khandual void *ptr; 47256993b4eSAnshuman Khandual 47356993b4eSAnshuman Khandual if (altmap) 47456993b4eSAnshuman Khandual return altmap_alloc_block_buf(size, altmap); 47556993b4eSAnshuman Khandual 47656993b4eSAnshuman Khandual ptr = sparse_buffer_alloc(size); 47735fd1eb1SPavel Tatashin if (!ptr) 47835fd1eb1SPavel Tatashin ptr = vmemmap_alloc_block(size, node); 4799bdac914SYinghai Lu return ptr; 4809bdac914SYinghai Lu } 4819bdac914SYinghai Lu 4824b94ffdcSDan Williams static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap) 4834b94ffdcSDan Williams { 4844b94ffdcSDan Williams return altmap->base_pfn + altmap->reserve + altmap->alloc 4854b94ffdcSDan Williams + altmap->align; 4864b94ffdcSDan Williams } 4874b94ffdcSDan Williams 4884b94ffdcSDan Williams static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap) 4894b94ffdcSDan Williams { 4904b94ffdcSDan Williams unsigned long allocated = altmap->alloc + altmap->align; 4914b94ffdcSDan Williams 4924b94ffdcSDan Williams if (altmap->free > allocated) 4934b94ffdcSDan Williams return altmap->free - allocated; 4944b94ffdcSDan Williams return 0; 4954b94ffdcSDan Williams } 4964b94ffdcSDan Williams 49756993b4eSAnshuman Khandual static void * __meminit altmap_alloc_block_buf(unsigned long size, 4984b94ffdcSDan Williams struct vmem_altmap *altmap) 4994b94ffdcSDan Williams { 500eb804533SChristoph Hellwig unsigned long pfn, nr_pfns, nr_align; 5014b94ffdcSDan Williams 5024b94ffdcSDan Williams if (size & ~PAGE_MASK) { 5034b94ffdcSDan Williams pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n", 5044b94ffdcSDan Williams __func__, size); 5054b94ffdcSDan Williams return NULL; 5064b94ffdcSDan Williams } 5074b94ffdcSDan Williams 508eb804533SChristoph Hellwig pfn = vmem_altmap_next_pfn(altmap); 5094b94ffdcSDan Williams nr_pfns = size >> PAGE_SHIFT; 510eb804533SChristoph Hellwig nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG); 511eb804533SChristoph Hellwig nr_align = ALIGN(pfn, nr_align) - pfn; 512eb804533SChristoph Hellwig if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap)) 513eb804533SChristoph Hellwig return NULL; 514eb804533SChristoph Hellwig 515eb804533SChristoph Hellwig altmap->alloc += nr_pfns; 516eb804533SChristoph Hellwig altmap->align += nr_align; 517eb804533SChristoph Hellwig pfn += nr_align; 518eb804533SChristoph Hellwig 5194b94ffdcSDan Williams pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n", 5204b94ffdcSDan Williams __func__, pfn, altmap->alloc, altmap->align, nr_pfns); 521eb804533SChristoph Hellwig return __va(__pfn_to_phys(pfn)); 5224b94ffdcSDan Williams } 5234b94ffdcSDan Williams 5248f6aac41SChristoph Lameter void __meminit vmemmap_verify(pte_t *pte, int node, 5258f6aac41SChristoph Lameter unsigned long start, unsigned long end) 5268f6aac41SChristoph Lameter { 5278f6aac41SChristoph Lameter unsigned long pfn = pte_pfn(*pte); 5288f6aac41SChristoph Lameter int actual_node = early_pfn_to_nid(pfn); 5298f6aac41SChristoph Lameter 530b41ad14cSDavid Rientjes if (node_distance(actual_node, node) > LOCAL_DISTANCE) 5311170532bSJoe Perches pr_warn("[%lx-%lx] potential offnode page_structs\n", 532756a025fSJoe Perches start, end - 1); 5338f6aac41SChristoph Lameter } 5348f6aac41SChristoph Lameter 5351d9cfee7SAnshuman Khandual pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, 5364917f55bSJoao Martins struct vmem_altmap *altmap, 5374917f55bSJoao Martins struct page *reuse) 5388f6aac41SChristoph Lameter { 53929c71111SAndy Whitcroft pte_t *pte = pte_offset_kernel(pmd, addr); 5408f6aac41SChristoph Lameter if (pte_none(*pte)) { 5418f6aac41SChristoph Lameter pte_t entry; 5421d9cfee7SAnshuman Khandual void *p; 5431d9cfee7SAnshuman Khandual 5444917f55bSJoao Martins if (!reuse) { 54556993b4eSAnshuman Khandual p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); 5468f6aac41SChristoph Lameter if (!p) 5479dce07f1SAl Viro return NULL; 5484917f55bSJoao Martins } else { 5494917f55bSJoao Martins /* 5504917f55bSJoao Martins * When a PTE/PMD entry is freed from the init_mm 5514917f55bSJoao Martins * there's a a free_pages() call to this page allocated 5524917f55bSJoao Martins * above. Thus this get_page() is paired with the 5534917f55bSJoao Martins * put_page_testzero() on the freeing path. 5544917f55bSJoao Martins * This can only called by certain ZONE_DEVICE path, 5554917f55bSJoao Martins * and through vmemmap_populate_compound_pages() when 5564917f55bSJoao Martins * slab is available. 5574917f55bSJoao Martins */ 5584917f55bSJoao Martins get_page(reuse); 5594917f55bSJoao Martins p = page_to_virt(reuse); 5604917f55bSJoao Martins } 56129c71111SAndy Whitcroft entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); 56229c71111SAndy Whitcroft set_pte_at(&init_mm, addr, pte, entry); 56329c71111SAndy Whitcroft } 56429c71111SAndy Whitcroft return pte; 5658f6aac41SChristoph Lameter } 5668f6aac41SChristoph Lameter 567f7f99100SPavel Tatashin static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node) 568f7f99100SPavel Tatashin { 569f7f99100SPavel Tatashin void *p = vmemmap_alloc_block(size, node); 570f7f99100SPavel Tatashin 571f7f99100SPavel Tatashin if (!p) 572f7f99100SPavel Tatashin return NULL; 573f7f99100SPavel Tatashin memset(p, 0, size); 574f7f99100SPavel Tatashin 575f7f99100SPavel Tatashin return p; 576f7f99100SPavel Tatashin } 577f7f99100SPavel Tatashin 57829c71111SAndy Whitcroft pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) 5798f6aac41SChristoph Lameter { 58029c71111SAndy Whitcroft pmd_t *pmd = pmd_offset(pud, addr); 5818f6aac41SChristoph Lameter if (pmd_none(*pmd)) { 582f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); 5838f6aac41SChristoph Lameter if (!p) 5849dce07f1SAl Viro return NULL; 5858f6aac41SChristoph Lameter pmd_populate_kernel(&init_mm, pmd, p); 5868f6aac41SChristoph Lameter } 58729c71111SAndy Whitcroft return pmd; 5888f6aac41SChristoph Lameter } 5898f6aac41SChristoph Lameter 590c2febafcSKirill A. Shutemov pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) 5918f6aac41SChristoph Lameter { 592c2febafcSKirill A. Shutemov pud_t *pud = pud_offset(p4d, addr); 5938f6aac41SChristoph Lameter if (pud_none(*pud)) { 594f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); 5958f6aac41SChristoph Lameter if (!p) 5969dce07f1SAl Viro return NULL; 5978f6aac41SChristoph Lameter pud_populate(&init_mm, pud, p); 5988f6aac41SChristoph Lameter } 59929c71111SAndy Whitcroft return pud; 6008f6aac41SChristoph Lameter } 6018f6aac41SChristoph Lameter 602c2febafcSKirill A. Shutemov p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) 603c2febafcSKirill A. Shutemov { 604c2febafcSKirill A. Shutemov p4d_t *p4d = p4d_offset(pgd, addr); 605c2febafcSKirill A. Shutemov if (p4d_none(*p4d)) { 606f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); 607c2febafcSKirill A. Shutemov if (!p) 608c2febafcSKirill A. Shutemov return NULL; 609c2febafcSKirill A. Shutemov p4d_populate(&init_mm, p4d, p); 610c2febafcSKirill A. Shutemov } 611c2febafcSKirill A. Shutemov return p4d; 612c2febafcSKirill A. Shutemov } 613c2febafcSKirill A. Shutemov 61429c71111SAndy Whitcroft pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) 6158f6aac41SChristoph Lameter { 61629c71111SAndy Whitcroft pgd_t *pgd = pgd_offset_k(addr); 6178f6aac41SChristoph Lameter if (pgd_none(*pgd)) { 618f7f99100SPavel Tatashin void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); 6198f6aac41SChristoph Lameter if (!p) 6209dce07f1SAl Viro return NULL; 6218f6aac41SChristoph Lameter pgd_populate(&init_mm, pgd, p); 6228f6aac41SChristoph Lameter } 62329c71111SAndy Whitcroft return pgd; 6248f6aac41SChristoph Lameter } 62529c71111SAndy Whitcroft 6262beea70aSJoao Martins static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node, 6274917f55bSJoao Martins struct vmem_altmap *altmap, 6284917f55bSJoao Martins struct page *reuse) 62929c71111SAndy Whitcroft { 63029c71111SAndy Whitcroft pgd_t *pgd; 631c2febafcSKirill A. Shutemov p4d_t *p4d; 63229c71111SAndy Whitcroft pud_t *pud; 63329c71111SAndy Whitcroft pmd_t *pmd; 63429c71111SAndy Whitcroft pte_t *pte; 63529c71111SAndy Whitcroft 63629c71111SAndy Whitcroft pgd = vmemmap_pgd_populate(addr, node); 63729c71111SAndy Whitcroft if (!pgd) 6382beea70aSJoao Martins return NULL; 639c2febafcSKirill A. Shutemov p4d = vmemmap_p4d_populate(pgd, addr, node); 640c2febafcSKirill A. Shutemov if (!p4d) 6412beea70aSJoao Martins return NULL; 642c2febafcSKirill A. Shutemov pud = vmemmap_pud_populate(p4d, addr, node); 64329c71111SAndy Whitcroft if (!pud) 6442beea70aSJoao Martins return NULL; 64529c71111SAndy Whitcroft pmd = vmemmap_pmd_populate(pud, addr, node); 64629c71111SAndy Whitcroft if (!pmd) 6472beea70aSJoao Martins return NULL; 6484917f55bSJoao Martins pte = vmemmap_pte_populate(pmd, addr, node, altmap, reuse); 64929c71111SAndy Whitcroft if (!pte) 6502beea70aSJoao Martins return NULL; 65129c71111SAndy Whitcroft vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); 6522beea70aSJoao Martins 6532beea70aSJoao Martins return pte; 6542beea70aSJoao Martins } 6552beea70aSJoao Martins 6562beea70aSJoao Martins static int __meminit vmemmap_populate_range(unsigned long start, 6572beea70aSJoao Martins unsigned long end, int node, 6584917f55bSJoao Martins struct vmem_altmap *altmap, 6594917f55bSJoao Martins struct page *reuse) 6602beea70aSJoao Martins { 6612beea70aSJoao Martins unsigned long addr = start; 6622beea70aSJoao Martins pte_t *pte; 6632beea70aSJoao Martins 6642beea70aSJoao Martins for (; addr < end; addr += PAGE_SIZE) { 6654917f55bSJoao Martins pte = vmemmap_populate_address(addr, node, altmap, reuse); 6662beea70aSJoao Martins if (!pte) 6672beea70aSJoao Martins return -ENOMEM; 6688f6aac41SChristoph Lameter } 66929c71111SAndy Whitcroft 67029c71111SAndy Whitcroft return 0; 67129c71111SAndy Whitcroft } 6728f6aac41SChristoph Lameter 6732beea70aSJoao Martins int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, 6742beea70aSJoao Martins int node, struct vmem_altmap *altmap) 6752beea70aSJoao Martins { 6764917f55bSJoao Martins return vmemmap_populate_range(start, end, node, altmap, NULL); 6774917f55bSJoao Martins } 6784917f55bSJoao Martins 6794917f55bSJoao Martins /* 6804917f55bSJoao Martins * For compound pages bigger than section size (e.g. x86 1G compound 6814917f55bSJoao Martins * pages with 2M subsection size) fill the rest of sections as tail 6824917f55bSJoao Martins * pages. 6834917f55bSJoao Martins * 6844917f55bSJoao Martins * Note that memremap_pages() resets @nr_range value and will increment 6854917f55bSJoao Martins * it after each range successful onlining. Thus the value or @nr_range 6864917f55bSJoao Martins * at section memmap populate corresponds to the in-progress range 6874917f55bSJoao Martins * being onlined here. 6884917f55bSJoao Martins */ 6894917f55bSJoao Martins static bool __meminit reuse_compound_section(unsigned long start_pfn, 6904917f55bSJoao Martins struct dev_pagemap *pgmap) 6914917f55bSJoao Martins { 6924917f55bSJoao Martins unsigned long nr_pages = pgmap_vmemmap_nr(pgmap); 6934917f55bSJoao Martins unsigned long offset = start_pfn - 6944917f55bSJoao Martins PHYS_PFN(pgmap->ranges[pgmap->nr_range].start); 6954917f55bSJoao Martins 6964917f55bSJoao Martins return !IS_ALIGNED(offset, nr_pages) && nr_pages > PAGES_PER_SUBSECTION; 6974917f55bSJoao Martins } 6984917f55bSJoao Martins 6994917f55bSJoao Martins static pte_t * __meminit compound_section_tail_page(unsigned long addr) 7004917f55bSJoao Martins { 7014917f55bSJoao Martins pte_t *pte; 7024917f55bSJoao Martins 7034917f55bSJoao Martins addr -= PAGE_SIZE; 7044917f55bSJoao Martins 7054917f55bSJoao Martins /* 7064917f55bSJoao Martins * Assuming sections are populated sequentially, the previous section's 7074917f55bSJoao Martins * page data can be reused. 7084917f55bSJoao Martins */ 7094917f55bSJoao Martins pte = pte_offset_kernel(pmd_off_k(addr), addr); 7104917f55bSJoao Martins if (!pte) 7114917f55bSJoao Martins return NULL; 7124917f55bSJoao Martins 7134917f55bSJoao Martins return pte; 7144917f55bSJoao Martins } 7154917f55bSJoao Martins 7164917f55bSJoao Martins static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, 7174917f55bSJoao Martins unsigned long start, 7184917f55bSJoao Martins unsigned long end, int node, 7194917f55bSJoao Martins struct dev_pagemap *pgmap) 7204917f55bSJoao Martins { 7214917f55bSJoao Martins unsigned long size, addr; 7224917f55bSJoao Martins pte_t *pte; 7234917f55bSJoao Martins int rc; 7244917f55bSJoao Martins 7254917f55bSJoao Martins if (reuse_compound_section(start_pfn, pgmap)) { 7264917f55bSJoao Martins pte = compound_section_tail_page(start); 7274917f55bSJoao Martins if (!pte) 7284917f55bSJoao Martins return -ENOMEM; 7294917f55bSJoao Martins 7304917f55bSJoao Martins /* 7314917f55bSJoao Martins * Reuse the page that was populated in the prior iteration 7324917f55bSJoao Martins * with just tail struct pages. 7334917f55bSJoao Martins */ 7344917f55bSJoao Martins return vmemmap_populate_range(start, end, node, NULL, 7354917f55bSJoao Martins pte_page(*pte)); 7364917f55bSJoao Martins } 7374917f55bSJoao Martins 7384917f55bSJoao Martins size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page)); 7394917f55bSJoao Martins for (addr = start; addr < end; addr += size) { 740*55896f93SGautam Menghani unsigned long next, last = addr + size; 7414917f55bSJoao Martins 7424917f55bSJoao Martins /* Populate the head page vmemmap page */ 7434917f55bSJoao Martins pte = vmemmap_populate_address(addr, node, NULL, NULL); 7444917f55bSJoao Martins if (!pte) 7454917f55bSJoao Martins return -ENOMEM; 7464917f55bSJoao Martins 7474917f55bSJoao Martins /* Populate the tail pages vmemmap page */ 7484917f55bSJoao Martins next = addr + PAGE_SIZE; 7494917f55bSJoao Martins pte = vmemmap_populate_address(next, node, NULL, NULL); 7504917f55bSJoao Martins if (!pte) 7514917f55bSJoao Martins return -ENOMEM; 7524917f55bSJoao Martins 7534917f55bSJoao Martins /* 7544917f55bSJoao Martins * Reuse the previous page for the rest of tail pages 7554917f55bSJoao Martins * See layout diagram in Documentation/vm/vmemmap_dedup.rst 7564917f55bSJoao Martins */ 7574917f55bSJoao Martins next += PAGE_SIZE; 7584917f55bSJoao Martins rc = vmemmap_populate_range(next, last, node, NULL, 7594917f55bSJoao Martins pte_page(*pte)); 7604917f55bSJoao Martins if (rc) 7614917f55bSJoao Martins return -ENOMEM; 7624917f55bSJoao Martins } 7634917f55bSJoao Martins 7644917f55bSJoao Martins return 0; 7652beea70aSJoao Martins } 7662beea70aSJoao Martins 767e9c0a3f0SDan Williams struct page * __meminit __populate_section_memmap(unsigned long pfn, 768e3246d8fSJoao Martins unsigned long nr_pages, int nid, struct vmem_altmap *altmap, 769e3246d8fSJoao Martins struct dev_pagemap *pgmap) 7708f6aac41SChristoph Lameter { 7716cda7204SWei Yang unsigned long start = (unsigned long) pfn_to_page(pfn); 7726cda7204SWei Yang unsigned long end = start + nr_pages * sizeof(struct page); 7734917f55bSJoao Martins int r; 7740aad818bSJohannes Weiner 7756cda7204SWei Yang if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) || 7766cda7204SWei Yang !IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION))) 7776cda7204SWei Yang return NULL; 7780aad818bSJohannes Weiner 7794917f55bSJoao Martins if (is_power_of_2(sizeof(struct page)) && 7804917f55bSJoao Martins pgmap && pgmap_vmemmap_nr(pgmap) > 1 && !altmap) 7814917f55bSJoao Martins r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap); 7824917f55bSJoao Martins else 7834917f55bSJoao Martins r = vmemmap_populate(start, end, nid, altmap); 7844917f55bSJoao Martins 7854917f55bSJoao Martins if (r < 0) 7868f6aac41SChristoph Lameter return NULL; 7878f6aac41SChristoph Lameter 788e9c0a3f0SDan Williams return pfn_to_page(pfn); 7898f6aac41SChristoph Lameter } 790