1d41dee36SAndy Whitcroft /* 2d41dee36SAndy Whitcroft * sparse memory mappings. 3d41dee36SAndy Whitcroft */ 4d41dee36SAndy Whitcroft #include <linux/config.h> 5d41dee36SAndy Whitcroft #include <linux/mm.h> 6d41dee36SAndy Whitcroft #include <linux/mmzone.h> 7d41dee36SAndy Whitcroft #include <linux/bootmem.h> 8*0b0acbecSDave Hansen #include <linux/highmem.h> 9d41dee36SAndy Whitcroft #include <linux/module.h> 1028ae55c9SDave Hansen #include <linux/spinlock.h> 11*0b0acbecSDave Hansen #include <linux/vmalloc.h> 12d41dee36SAndy Whitcroft #include <asm/dma.h> 13d41dee36SAndy Whitcroft 14d41dee36SAndy Whitcroft /* 15d41dee36SAndy Whitcroft * Permanent SPARSEMEM data: 16d41dee36SAndy Whitcroft * 17d41dee36SAndy Whitcroft * 1) mem_section - memory sections, mem_map's for valid memory 18d41dee36SAndy Whitcroft */ 193e347261SBob Picco #ifdef CONFIG_SPARSEMEM_EXTREME 20802f192eSBob Picco struct mem_section *mem_section[NR_SECTION_ROOTS] 21802f192eSBob Picco ____cacheline_maxaligned_in_smp; 223e347261SBob Picco #else 233e347261SBob Picco struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] 243e347261SBob Picco ____cacheline_maxaligned_in_smp; 253e347261SBob Picco #endif 263e347261SBob Picco EXPORT_SYMBOL(mem_section); 273e347261SBob Picco 283e347261SBob Picco #ifdef CONFIG_SPARSEMEM_EXTREME 2928ae55c9SDave Hansen static struct mem_section *sparse_index_alloc(int nid) 30802f192eSBob Picco { 3128ae55c9SDave Hansen struct mem_section *section = NULL; 3228ae55c9SDave Hansen unsigned long array_size = SECTIONS_PER_ROOT * 3328ae55c9SDave Hansen sizeof(struct mem_section); 34802f192eSBob Picco 3528ae55c9SDave Hansen section = alloc_bootmem_node(NODE_DATA(nid), array_size); 363e347261SBob Picco 3728ae55c9SDave Hansen if (section) 3828ae55c9SDave Hansen memset(section, 0, array_size); 393e347261SBob Picco 4028ae55c9SDave Hansen return section; 41802f192eSBob Picco } 4228ae55c9SDave Hansen 4328ae55c9SDave Hansen static int sparse_index_init(unsigned long section_nr, int nid) 4428ae55c9SDave Hansen { 4528ae55c9SDave Hansen static spinlock_t index_init_lock = SPIN_LOCK_UNLOCKED; 4628ae55c9SDave Hansen unsigned long root = SECTION_NR_TO_ROOT(section_nr); 4728ae55c9SDave Hansen struct mem_section *section; 4828ae55c9SDave Hansen int ret = 0; 4928ae55c9SDave Hansen 5028ae55c9SDave Hansen if (mem_section[root]) 5128ae55c9SDave Hansen return -EEXIST; 5228ae55c9SDave Hansen 5328ae55c9SDave Hansen section = sparse_index_alloc(nid); 5428ae55c9SDave Hansen /* 5528ae55c9SDave Hansen * This lock keeps two different sections from 5628ae55c9SDave Hansen * reallocating for the same index 5728ae55c9SDave Hansen */ 5828ae55c9SDave Hansen spin_lock(&index_init_lock); 5928ae55c9SDave Hansen 6028ae55c9SDave Hansen if (mem_section[root]) { 6128ae55c9SDave Hansen ret = -EEXIST; 6228ae55c9SDave Hansen goto out; 6328ae55c9SDave Hansen } 6428ae55c9SDave Hansen 6528ae55c9SDave Hansen mem_section[root] = section; 6628ae55c9SDave Hansen out: 6728ae55c9SDave Hansen spin_unlock(&index_init_lock); 6828ae55c9SDave Hansen return ret; 6928ae55c9SDave Hansen } 7028ae55c9SDave Hansen #else /* !SPARSEMEM_EXTREME */ 7128ae55c9SDave Hansen static inline int sparse_index_init(unsigned long section_nr, int nid) 7228ae55c9SDave Hansen { 7328ae55c9SDave Hansen return 0; 7428ae55c9SDave Hansen } 7528ae55c9SDave Hansen #endif 7628ae55c9SDave Hansen 774ca644d9SDave Hansen /* 784ca644d9SDave Hansen * Although written for the SPARSEMEM_EXTREME case, this happens 794ca644d9SDave Hansen * to also work for the flat array case becase 804ca644d9SDave Hansen * NR_SECTION_ROOTS==NR_MEM_SECTIONS. 814ca644d9SDave Hansen */ 824ca644d9SDave Hansen int __section_nr(struct mem_section* ms) 834ca644d9SDave Hansen { 844ca644d9SDave Hansen unsigned long root_nr; 854ca644d9SDave Hansen struct mem_section* root; 864ca644d9SDave Hansen 874ca644d9SDave Hansen for (root_nr = 0; 884ca644d9SDave Hansen root_nr < NR_MEM_SECTIONS; 894ca644d9SDave Hansen root_nr += SECTIONS_PER_ROOT) { 904ca644d9SDave Hansen root = __nr_to_section(root_nr); 914ca644d9SDave Hansen 924ca644d9SDave Hansen if (!root) 934ca644d9SDave Hansen continue; 944ca644d9SDave Hansen 954ca644d9SDave Hansen if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT))) 964ca644d9SDave Hansen break; 974ca644d9SDave Hansen } 984ca644d9SDave Hansen 994ca644d9SDave Hansen return (root_nr * SECTIONS_PER_ROOT) + (ms - root); 1004ca644d9SDave Hansen } 1014ca644d9SDave Hansen 102d41dee36SAndy Whitcroft /* Record a memory area against a node. */ 103d41dee36SAndy Whitcroft void memory_present(int nid, unsigned long start, unsigned long end) 104d41dee36SAndy Whitcroft { 105d41dee36SAndy Whitcroft unsigned long pfn; 106d41dee36SAndy Whitcroft 107d41dee36SAndy Whitcroft start &= PAGE_SECTION_MASK; 108d41dee36SAndy Whitcroft for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { 109d41dee36SAndy Whitcroft unsigned long section = pfn_to_section_nr(pfn); 110802f192eSBob Picco struct mem_section *ms; 111802f192eSBob Picco 112802f192eSBob Picco sparse_index_init(section, nid); 113802f192eSBob Picco 114802f192eSBob Picco ms = __nr_to_section(section); 115802f192eSBob Picco if (!ms->section_mem_map) 116802f192eSBob Picco ms->section_mem_map = SECTION_MARKED_PRESENT; 117d41dee36SAndy Whitcroft } 118d41dee36SAndy Whitcroft } 119d41dee36SAndy Whitcroft 120d41dee36SAndy Whitcroft /* 121d41dee36SAndy Whitcroft * Only used by the i386 NUMA architecures, but relatively 122d41dee36SAndy Whitcroft * generic code. 123d41dee36SAndy Whitcroft */ 124d41dee36SAndy Whitcroft unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, 125d41dee36SAndy Whitcroft unsigned long end_pfn) 126d41dee36SAndy Whitcroft { 127d41dee36SAndy Whitcroft unsigned long pfn; 128d41dee36SAndy Whitcroft unsigned long nr_pages = 0; 129d41dee36SAndy Whitcroft 130d41dee36SAndy Whitcroft for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 131d41dee36SAndy Whitcroft if (nid != early_pfn_to_nid(pfn)) 132d41dee36SAndy Whitcroft continue; 133d41dee36SAndy Whitcroft 134d41dee36SAndy Whitcroft if (pfn_valid(pfn)) 135d41dee36SAndy Whitcroft nr_pages += PAGES_PER_SECTION; 136d41dee36SAndy Whitcroft } 137d41dee36SAndy Whitcroft 138d41dee36SAndy Whitcroft return nr_pages * sizeof(struct page); 139d41dee36SAndy Whitcroft } 140d41dee36SAndy Whitcroft 141d41dee36SAndy Whitcroft /* 14229751f69SAndy Whitcroft * Subtle, we encode the real pfn into the mem_map such that 14329751f69SAndy Whitcroft * the identity pfn - section_mem_map will return the actual 14429751f69SAndy Whitcroft * physical page frame number. 14529751f69SAndy Whitcroft */ 14629751f69SAndy Whitcroft static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) 14729751f69SAndy Whitcroft { 14829751f69SAndy Whitcroft return (unsigned long)(mem_map - (section_nr_to_pfn(pnum))); 14929751f69SAndy Whitcroft } 15029751f69SAndy Whitcroft 15129751f69SAndy Whitcroft /* 15229751f69SAndy Whitcroft * We need this if we ever free the mem_maps. While not implemented yet, 15329751f69SAndy Whitcroft * this function is included for parity with its sibling. 15429751f69SAndy Whitcroft */ 15529751f69SAndy Whitcroft static __attribute((unused)) 15629751f69SAndy Whitcroft struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) 15729751f69SAndy Whitcroft { 15829751f69SAndy Whitcroft return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); 15929751f69SAndy Whitcroft } 16029751f69SAndy Whitcroft 16129751f69SAndy Whitcroft static int sparse_init_one_section(struct mem_section *ms, 16229751f69SAndy Whitcroft unsigned long pnum, struct page *mem_map) 16329751f69SAndy Whitcroft { 16429751f69SAndy Whitcroft if (!valid_section(ms)) 16529751f69SAndy Whitcroft return -EINVAL; 16629751f69SAndy Whitcroft 16729751f69SAndy Whitcroft ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum); 16829751f69SAndy Whitcroft 16929751f69SAndy Whitcroft return 1; 17029751f69SAndy Whitcroft } 17129751f69SAndy Whitcroft 17229751f69SAndy Whitcroft static struct page *sparse_early_mem_map_alloc(unsigned long pnum) 17329751f69SAndy Whitcroft { 17429751f69SAndy Whitcroft struct page *map; 17529751f69SAndy Whitcroft int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); 176802f192eSBob Picco struct mem_section *ms = __nr_to_section(pnum); 17729751f69SAndy Whitcroft 17829751f69SAndy Whitcroft map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); 17929751f69SAndy Whitcroft if (map) 18029751f69SAndy Whitcroft return map; 18129751f69SAndy Whitcroft 18229751f69SAndy Whitcroft map = alloc_bootmem_node(NODE_DATA(nid), 18329751f69SAndy Whitcroft sizeof(struct page) * PAGES_PER_SECTION); 18429751f69SAndy Whitcroft if (map) 18529751f69SAndy Whitcroft return map; 18629751f69SAndy Whitcroft 18729751f69SAndy Whitcroft printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); 188802f192eSBob Picco ms->section_mem_map = 0; 18929751f69SAndy Whitcroft return NULL; 19029751f69SAndy Whitcroft } 19129751f69SAndy Whitcroft 192*0b0acbecSDave Hansen static struct page *__kmalloc_section_memmap(unsigned long nr_pages) 193*0b0acbecSDave Hansen { 194*0b0acbecSDave Hansen struct page *page, *ret; 195*0b0acbecSDave Hansen unsigned long memmap_size = sizeof(struct page) * nr_pages; 196*0b0acbecSDave Hansen 197*0b0acbecSDave Hansen page = alloc_pages(GFP_KERNEL, get_order(memmap_size)); 198*0b0acbecSDave Hansen if (page) 199*0b0acbecSDave Hansen goto got_map_page; 200*0b0acbecSDave Hansen 201*0b0acbecSDave Hansen ret = vmalloc(memmap_size); 202*0b0acbecSDave Hansen if (ret) 203*0b0acbecSDave Hansen goto got_map_ptr; 204*0b0acbecSDave Hansen 205*0b0acbecSDave Hansen return NULL; 206*0b0acbecSDave Hansen got_map_page: 207*0b0acbecSDave Hansen ret = (struct page *)pfn_to_kaddr(page_to_pfn(page)); 208*0b0acbecSDave Hansen got_map_ptr: 209*0b0acbecSDave Hansen memset(ret, 0, memmap_size); 210*0b0acbecSDave Hansen 211*0b0acbecSDave Hansen return ret; 212*0b0acbecSDave Hansen } 213*0b0acbecSDave Hansen 214*0b0acbecSDave Hansen static int vaddr_in_vmalloc_area(void *addr) 215*0b0acbecSDave Hansen { 216*0b0acbecSDave Hansen if (addr >= (void *)VMALLOC_START && 217*0b0acbecSDave Hansen addr < (void *)VMALLOC_END) 218*0b0acbecSDave Hansen return 1; 219*0b0acbecSDave Hansen return 0; 220*0b0acbecSDave Hansen } 221*0b0acbecSDave Hansen 222*0b0acbecSDave Hansen static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) 223*0b0acbecSDave Hansen { 224*0b0acbecSDave Hansen if (vaddr_in_vmalloc_area(memmap)) 225*0b0acbecSDave Hansen vfree(memmap); 226*0b0acbecSDave Hansen else 227*0b0acbecSDave Hansen free_pages((unsigned long)memmap, 228*0b0acbecSDave Hansen get_order(sizeof(struct page) * nr_pages)); 229*0b0acbecSDave Hansen } 230*0b0acbecSDave Hansen 23129751f69SAndy Whitcroft /* 232d41dee36SAndy Whitcroft * Allocate the accumulated non-linear sections, allocate a mem_map 233d41dee36SAndy Whitcroft * for each and record the physical to section mapping. 234d41dee36SAndy Whitcroft */ 235d41dee36SAndy Whitcroft void sparse_init(void) 236d41dee36SAndy Whitcroft { 237d41dee36SAndy Whitcroft unsigned long pnum; 238d41dee36SAndy Whitcroft struct page *map; 239d41dee36SAndy Whitcroft 240d41dee36SAndy Whitcroft for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { 24129751f69SAndy Whitcroft if (!valid_section_nr(pnum)) 242d41dee36SAndy Whitcroft continue; 243d41dee36SAndy Whitcroft 24429751f69SAndy Whitcroft map = sparse_early_mem_map_alloc(pnum); 245802f192eSBob Picco if (!map) 246802f192eSBob Picco continue; 247802f192eSBob Picco sparse_init_one_section(__nr_to_section(pnum), pnum, map); 24829751f69SAndy Whitcroft } 249d41dee36SAndy Whitcroft } 250d41dee36SAndy Whitcroft 251d41dee36SAndy Whitcroft /* 25229751f69SAndy Whitcroft * returns the number of sections whose mem_maps were properly 25329751f69SAndy Whitcroft * set. If this is <=0, then that means that the passed-in 25429751f69SAndy Whitcroft * map was not consumed and must be freed. 255d41dee36SAndy Whitcroft */ 256*0b0acbecSDave Hansen int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, 257*0b0acbecSDave Hansen int nr_pages) 25829751f69SAndy Whitcroft { 259*0b0acbecSDave Hansen unsigned long section_nr = pfn_to_section_nr(start_pfn); 260*0b0acbecSDave Hansen struct pglist_data *pgdat = zone->zone_pgdat; 261*0b0acbecSDave Hansen struct mem_section *ms; 262*0b0acbecSDave Hansen struct page *memmap; 263*0b0acbecSDave Hansen unsigned long flags; 264*0b0acbecSDave Hansen int ret; 26529751f69SAndy Whitcroft 266*0b0acbecSDave Hansen /* 267*0b0acbecSDave Hansen * no locking for this, because it does its own 268*0b0acbecSDave Hansen * plus, it does a kmalloc 269*0b0acbecSDave Hansen */ 270*0b0acbecSDave Hansen sparse_index_init(section_nr, pgdat->node_id); 271*0b0acbecSDave Hansen memmap = __kmalloc_section_memmap(nr_pages); 27229751f69SAndy Whitcroft 273*0b0acbecSDave Hansen pgdat_resize_lock(pgdat, &flags); 274*0b0acbecSDave Hansen 275*0b0acbecSDave Hansen ms = __pfn_to_section(start_pfn); 276*0b0acbecSDave Hansen if (ms->section_mem_map & SECTION_MARKED_PRESENT) { 277*0b0acbecSDave Hansen ret = -EEXIST; 278*0b0acbecSDave Hansen goto out; 279*0b0acbecSDave Hansen } 28029751f69SAndy Whitcroft ms->section_mem_map |= SECTION_MARKED_PRESENT; 28129751f69SAndy Whitcroft 282*0b0acbecSDave Hansen ret = sparse_init_one_section(ms, section_nr, memmap); 283*0b0acbecSDave Hansen 284*0b0acbecSDave Hansen if (ret <= 0) 285*0b0acbecSDave Hansen __kfree_section_memmap(memmap, nr_pages); 286*0b0acbecSDave Hansen out: 287*0b0acbecSDave Hansen pgdat_resize_unlock(pgdat, &flags); 288*0b0acbecSDave Hansen return ret; 289d41dee36SAndy Whitcroft } 290