18f6aac41SChristoph Lameter /* 28f6aac41SChristoph Lameter * Virtual Memory Map support 38f6aac41SChristoph Lameter * 4cde53535SChristoph Lameter * (C) 2007 sgi. Christoph Lameter. 58f6aac41SChristoph Lameter * 68f6aac41SChristoph Lameter * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, 78f6aac41SChristoph Lameter * virt_to_page, page_address() to be implemented as a base offset 88f6aac41SChristoph Lameter * calculation without memory access. 98f6aac41SChristoph Lameter * 108f6aac41SChristoph Lameter * However, virtual mappings need a page table and TLBs. Many Linux 118f6aac41SChristoph Lameter * architectures already map their physical space using 1-1 mappings 128f6aac41SChristoph Lameter * via TLBs. For those arches the virtual memmory map is essentially 138f6aac41SChristoph Lameter * for free if we use the same page size as the 1-1 mappings. In that 148f6aac41SChristoph Lameter * case the overhead consists of a few additional pages that are 158f6aac41SChristoph Lameter * allocated to create a view of memory for vmemmap. 168f6aac41SChristoph Lameter * 1729c71111SAndy Whitcroft * The architecture is expected to provide a vmemmap_populate() function 1829c71111SAndy Whitcroft * to instantiate the mapping. 198f6aac41SChristoph Lameter */ 208f6aac41SChristoph Lameter #include <linux/mm.h> 218f6aac41SChristoph Lameter #include <linux/mmzone.h> 228f6aac41SChristoph Lameter #include <linux/bootmem.h> 238f6aac41SChristoph Lameter #include <linux/highmem.h> 248f6aac41SChristoph Lameter #include <linux/module.h> 25*5a0e3ad6STejun Heo #include <linux/slab.h> 268f6aac41SChristoph Lameter #include <linux/spinlock.h> 278f6aac41SChristoph Lameter #include <linux/vmalloc.h> 288bca44bbSGlauber de Oliveira Costa #include <linux/sched.h> 298f6aac41SChristoph Lameter #include <asm/dma.h> 308f6aac41SChristoph Lameter #include <asm/pgalloc.h> 318f6aac41SChristoph Lameter #include <asm/pgtable.h> 328f6aac41SChristoph Lameter 338f6aac41SChristoph Lameter /* 348f6aac41SChristoph Lameter * Allocate a block of memory to be used to back the virtual memory map 358f6aac41SChristoph Lameter * or to back the page tables that are used to create the mapping. 368f6aac41SChristoph Lameter * Uses the main allocators if they are available, else bootmem. 378f6aac41SChristoph Lameter */ 38e0dc3a53SKAMEZAWA Hiroyuki 39e0dc3a53SKAMEZAWA Hiroyuki static void * __init_refok __earlyonly_bootmem_alloc(int node, 40e0dc3a53SKAMEZAWA Hiroyuki unsigned long size, 41e0dc3a53SKAMEZAWA Hiroyuki unsigned long align, 42e0dc3a53SKAMEZAWA Hiroyuki unsigned long goal) 43e0dc3a53SKAMEZAWA Hiroyuki { 4408677214SYinghai Lu return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal); 45e0dc3a53SKAMEZAWA Hiroyuki } 46e0dc3a53SKAMEZAWA Hiroyuki 479bdac914SYinghai Lu static void *vmemmap_buf; 489bdac914SYinghai Lu static void *vmemmap_buf_end; 49e0dc3a53SKAMEZAWA Hiroyuki 508f6aac41SChristoph Lameter void * __meminit vmemmap_alloc_block(unsigned long size, int node) 518f6aac41SChristoph Lameter { 528f6aac41SChristoph Lameter /* If the main allocator is up use that, fallback to bootmem. */ 538f6aac41SChristoph Lameter if (slab_is_available()) { 54f52407ceSShaohua Li struct page *page; 55f52407ceSShaohua Li 56f52407ceSShaohua Li if (node_state(node, N_HIGH_MEMORY)) 57f52407ceSShaohua Li page = alloc_pages_node(node, 588f6aac41SChristoph Lameter GFP_KERNEL | __GFP_ZERO, get_order(size)); 59f52407ceSShaohua Li else 60f52407ceSShaohua Li page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 61f52407ceSShaohua Li get_order(size)); 628f6aac41SChristoph Lameter if (page) 638f6aac41SChristoph Lameter return page_address(page); 648f6aac41SChristoph Lameter return NULL; 658f6aac41SChristoph Lameter } else 66e0dc3a53SKAMEZAWA Hiroyuki return __earlyonly_bootmem_alloc(node, size, size, 678f6aac41SChristoph Lameter __pa(MAX_DMA_ADDRESS)); 688f6aac41SChristoph Lameter } 698f6aac41SChristoph Lameter 709bdac914SYinghai Lu /* need to make sure size is all the same during early stage */ 719bdac914SYinghai Lu void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) 729bdac914SYinghai Lu { 739bdac914SYinghai Lu void *ptr; 749bdac914SYinghai Lu 759bdac914SYinghai Lu if (!vmemmap_buf) 769bdac914SYinghai Lu return vmemmap_alloc_block(size, node); 779bdac914SYinghai Lu 789bdac914SYinghai Lu /* take the from buf */ 799bdac914SYinghai Lu ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size); 809bdac914SYinghai Lu if (ptr + size > vmemmap_buf_end) 819bdac914SYinghai Lu return vmemmap_alloc_block(size, node); 829bdac914SYinghai Lu 839bdac914SYinghai Lu vmemmap_buf = ptr + size; 849bdac914SYinghai Lu 859bdac914SYinghai Lu return ptr; 869bdac914SYinghai Lu } 879bdac914SYinghai Lu 888f6aac41SChristoph Lameter void __meminit vmemmap_verify(pte_t *pte, int node, 898f6aac41SChristoph Lameter unsigned long start, unsigned long end) 908f6aac41SChristoph Lameter { 918f6aac41SChristoph Lameter unsigned long pfn = pte_pfn(*pte); 928f6aac41SChristoph Lameter int actual_node = early_pfn_to_nid(pfn); 938f6aac41SChristoph Lameter 94b41ad14cSDavid Rientjes if (node_distance(actual_node, node) > LOCAL_DISTANCE) 958f6aac41SChristoph Lameter printk(KERN_WARNING "[%lx-%lx] potential offnode " 968f6aac41SChristoph Lameter "page_structs\n", start, end - 1); 978f6aac41SChristoph Lameter } 988f6aac41SChristoph Lameter 9929c71111SAndy Whitcroft pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) 1008f6aac41SChristoph Lameter { 10129c71111SAndy Whitcroft pte_t *pte = pte_offset_kernel(pmd, addr); 1028f6aac41SChristoph Lameter if (pte_none(*pte)) { 1038f6aac41SChristoph Lameter pte_t entry; 1049bdac914SYinghai Lu void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); 1058f6aac41SChristoph Lameter if (!p) 1069dce07f1SAl Viro return NULL; 10729c71111SAndy Whitcroft entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); 10829c71111SAndy Whitcroft set_pte_at(&init_mm, addr, pte, entry); 10929c71111SAndy Whitcroft } 11029c71111SAndy Whitcroft return pte; 1118f6aac41SChristoph Lameter } 1128f6aac41SChristoph Lameter 11329c71111SAndy Whitcroft pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) 1148f6aac41SChristoph Lameter { 11529c71111SAndy Whitcroft pmd_t *pmd = pmd_offset(pud, addr); 1168f6aac41SChristoph Lameter if (pmd_none(*pmd)) { 1178f6aac41SChristoph Lameter void *p = vmemmap_alloc_block(PAGE_SIZE, node); 1188f6aac41SChristoph Lameter if (!p) 1199dce07f1SAl Viro return NULL; 1208f6aac41SChristoph Lameter pmd_populate_kernel(&init_mm, pmd, p); 1218f6aac41SChristoph Lameter } 12229c71111SAndy Whitcroft return pmd; 1238f6aac41SChristoph Lameter } 1248f6aac41SChristoph Lameter 12529c71111SAndy Whitcroft pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node) 1268f6aac41SChristoph Lameter { 12729c71111SAndy Whitcroft pud_t *pud = pud_offset(pgd, addr); 1288f6aac41SChristoph Lameter if (pud_none(*pud)) { 1298f6aac41SChristoph Lameter void *p = vmemmap_alloc_block(PAGE_SIZE, node); 1308f6aac41SChristoph Lameter if (!p) 1319dce07f1SAl Viro return NULL; 1328f6aac41SChristoph Lameter pud_populate(&init_mm, pud, p); 1338f6aac41SChristoph Lameter } 13429c71111SAndy Whitcroft return pud; 1358f6aac41SChristoph Lameter } 1368f6aac41SChristoph Lameter 13729c71111SAndy Whitcroft pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) 1388f6aac41SChristoph Lameter { 13929c71111SAndy Whitcroft pgd_t *pgd = pgd_offset_k(addr); 1408f6aac41SChristoph Lameter if (pgd_none(*pgd)) { 1418f6aac41SChristoph Lameter void *p = vmemmap_alloc_block(PAGE_SIZE, node); 1428f6aac41SChristoph Lameter if (!p) 1439dce07f1SAl Viro return NULL; 1448f6aac41SChristoph Lameter pgd_populate(&init_mm, pgd, p); 1458f6aac41SChristoph Lameter } 14629c71111SAndy Whitcroft return pgd; 1478f6aac41SChristoph Lameter } 14829c71111SAndy Whitcroft 14929c71111SAndy Whitcroft int __meminit vmemmap_populate_basepages(struct page *start_page, 15029c71111SAndy Whitcroft unsigned long size, int node) 15129c71111SAndy Whitcroft { 15229c71111SAndy Whitcroft unsigned long addr = (unsigned long)start_page; 15329c71111SAndy Whitcroft unsigned long end = (unsigned long)(start_page + size); 15429c71111SAndy Whitcroft pgd_t *pgd; 15529c71111SAndy Whitcroft pud_t *pud; 15629c71111SAndy Whitcroft pmd_t *pmd; 15729c71111SAndy Whitcroft pte_t *pte; 15829c71111SAndy Whitcroft 15929c71111SAndy Whitcroft for (; addr < end; addr += PAGE_SIZE) { 16029c71111SAndy Whitcroft pgd = vmemmap_pgd_populate(addr, node); 16129c71111SAndy Whitcroft if (!pgd) 16229c71111SAndy Whitcroft return -ENOMEM; 16329c71111SAndy Whitcroft pud = vmemmap_pud_populate(pgd, addr, node); 16429c71111SAndy Whitcroft if (!pud) 16529c71111SAndy Whitcroft return -ENOMEM; 16629c71111SAndy Whitcroft pmd = vmemmap_pmd_populate(pud, addr, node); 16729c71111SAndy Whitcroft if (!pmd) 16829c71111SAndy Whitcroft return -ENOMEM; 16929c71111SAndy Whitcroft pte = vmemmap_pte_populate(pmd, addr, node); 17029c71111SAndy Whitcroft if (!pte) 17129c71111SAndy Whitcroft return -ENOMEM; 17229c71111SAndy Whitcroft vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); 1738f6aac41SChristoph Lameter } 17429c71111SAndy Whitcroft 17529c71111SAndy Whitcroft return 0; 17629c71111SAndy Whitcroft } 1778f6aac41SChristoph Lameter 17898f3cfc1SYasunori Goto struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) 1798f6aac41SChristoph Lameter { 1808f6aac41SChristoph Lameter struct page *map = pfn_to_page(pnum * PAGES_PER_SECTION); 1818f6aac41SChristoph Lameter int error = vmemmap_populate(map, PAGES_PER_SECTION, nid); 1828f6aac41SChristoph Lameter if (error) 1838f6aac41SChristoph Lameter return NULL; 1848f6aac41SChristoph Lameter 1858f6aac41SChristoph Lameter return map; 1868f6aac41SChristoph Lameter } 1879bdac914SYinghai Lu 1889bdac914SYinghai Lu void __init sparse_mem_maps_populate_node(struct page **map_map, 1899bdac914SYinghai Lu unsigned long pnum_begin, 1909bdac914SYinghai Lu unsigned long pnum_end, 1919bdac914SYinghai Lu unsigned long map_count, int nodeid) 1929bdac914SYinghai Lu { 1939bdac914SYinghai Lu unsigned long pnum; 1949bdac914SYinghai Lu unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; 1959bdac914SYinghai Lu void *vmemmap_buf_start; 1969bdac914SYinghai Lu 1979bdac914SYinghai Lu size = ALIGN(size, PMD_SIZE); 1989bdac914SYinghai Lu vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count, 1999bdac914SYinghai Lu PMD_SIZE, __pa(MAX_DMA_ADDRESS)); 2009bdac914SYinghai Lu 2019bdac914SYinghai Lu if (vmemmap_buf_start) { 2029bdac914SYinghai Lu vmemmap_buf = vmemmap_buf_start; 2039bdac914SYinghai Lu vmemmap_buf_end = vmemmap_buf_start + size * map_count; 2049bdac914SYinghai Lu } 2059bdac914SYinghai Lu 2069bdac914SYinghai Lu for (pnum = pnum_begin; pnum < pnum_end; pnum++) { 2079bdac914SYinghai Lu struct mem_section *ms; 2089bdac914SYinghai Lu 2099bdac914SYinghai Lu if (!present_section_nr(pnum)) 2109bdac914SYinghai Lu continue; 2119bdac914SYinghai Lu 2129bdac914SYinghai Lu map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); 2139bdac914SYinghai Lu if (map_map[pnum]) 2149bdac914SYinghai Lu continue; 2159bdac914SYinghai Lu ms = __nr_to_section(pnum); 2169bdac914SYinghai Lu printk(KERN_ERR "%s: sparsemem memory map backing failed " 2179bdac914SYinghai Lu "some memory will not be available.\n", __func__); 2189bdac914SYinghai Lu ms->section_mem_map = 0; 2199bdac914SYinghai Lu } 2209bdac914SYinghai Lu 2219bdac914SYinghai Lu if (vmemmap_buf_start) { 2229bdac914SYinghai Lu /* need to free left buf */ 2239bdac914SYinghai Lu #ifdef CONFIG_NO_BOOTMEM 2249bdac914SYinghai Lu free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end)); 2259bdac914SYinghai Lu if (vmemmap_buf_start < vmemmap_buf) { 2269bdac914SYinghai Lu char name[15]; 2279bdac914SYinghai Lu 2289bdac914SYinghai Lu snprintf(name, sizeof(name), "MEMMAP %d", nodeid); 2299bdac914SYinghai Lu reserve_early_without_check(__pa(vmemmap_buf_start), 2309bdac914SYinghai Lu __pa(vmemmap_buf), name); 2319bdac914SYinghai Lu } 2329bdac914SYinghai Lu #else 2339bdac914SYinghai Lu free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf); 2349bdac914SYinghai Lu #endif 2359bdac914SYinghai Lu vmemmap_buf = NULL; 2369bdac914SYinghai Lu vmemmap_buf_end = NULL; 2379bdac914SYinghai Lu } 2389bdac914SYinghai Lu } 239