1d41dee36SAndy Whitcroft /* 2d41dee36SAndy Whitcroft * sparse memory mappings. 3d41dee36SAndy Whitcroft */ 4d41dee36SAndy Whitcroft #include <linux/mm.h> 5d41dee36SAndy Whitcroft #include <linux/mmzone.h> 6d41dee36SAndy Whitcroft #include <linux/bootmem.h> 70b0acbecSDave Hansen #include <linux/highmem.h> 8d41dee36SAndy Whitcroft #include <linux/module.h> 928ae55c9SDave Hansen #include <linux/spinlock.h> 100b0acbecSDave Hansen #include <linux/vmalloc.h> 11d41dee36SAndy Whitcroft #include <asm/dma.h> 12d41dee36SAndy Whitcroft 13d41dee36SAndy Whitcroft /* 14d41dee36SAndy Whitcroft * Permanent SPARSEMEM data: 15d41dee36SAndy Whitcroft * 16d41dee36SAndy Whitcroft * 1) mem_section - memory sections, mem_map's for valid memory 17d41dee36SAndy Whitcroft */ 183e347261SBob Picco #ifdef CONFIG_SPARSEMEM_EXTREME 19802f192eSBob Picco struct mem_section *mem_section[NR_SECTION_ROOTS] 2022fc6eccSRavikiran G Thirumalai ____cacheline_internodealigned_in_smp; 213e347261SBob Picco #else 223e347261SBob Picco struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] 2322fc6eccSRavikiran G Thirumalai ____cacheline_internodealigned_in_smp; 243e347261SBob Picco #endif 253e347261SBob Picco EXPORT_SYMBOL(mem_section); 263e347261SBob Picco 2789689ae7SChristoph Lameter #ifdef NODE_NOT_IN_PAGE_FLAGS 2889689ae7SChristoph Lameter /* 2989689ae7SChristoph Lameter * If we did not store the node number in the page then we have to 3089689ae7SChristoph Lameter * do a lookup in the section_to_node_table in order to find which 3189689ae7SChristoph Lameter * node the page belongs to. 3289689ae7SChristoph Lameter */ 3389689ae7SChristoph Lameter #if MAX_NUMNODES <= 256 3489689ae7SChristoph Lameter static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; 3589689ae7SChristoph Lameter #else 3689689ae7SChristoph Lameter static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; 3789689ae7SChristoph Lameter #endif 3889689ae7SChristoph Lameter 3925ba77c1SAndy Whitcroft int page_to_nid(struct page *page) 4089689ae7SChristoph Lameter { 4189689ae7SChristoph Lameter return section_to_node_table[page_to_section(page)]; 4289689ae7SChristoph Lameter } 4389689ae7SChristoph Lameter EXPORT_SYMBOL(page_to_nid); 44*85770ffeSAndy Whitcroft 45*85770ffeSAndy Whitcroft static void set_section_nid(unsigned long section_nr, int nid) 46*85770ffeSAndy Whitcroft { 47*85770ffeSAndy Whitcroft section_to_node_table[section_nr] = nid; 48*85770ffeSAndy Whitcroft } 49*85770ffeSAndy Whitcroft #else /* !NODE_NOT_IN_PAGE_FLAGS */ 50*85770ffeSAndy Whitcroft static inline void set_section_nid(unsigned long section_nr, int nid) 51*85770ffeSAndy Whitcroft { 52*85770ffeSAndy Whitcroft } 5389689ae7SChristoph Lameter #endif 5489689ae7SChristoph Lameter 553e347261SBob Picco #ifdef CONFIG_SPARSEMEM_EXTREME 56577a32f6SSam Ravnborg static struct mem_section noinline __init_refok *sparse_index_alloc(int nid) 57802f192eSBob Picco { 5828ae55c9SDave Hansen struct mem_section *section = NULL; 5928ae55c9SDave Hansen unsigned long array_size = SECTIONS_PER_ROOT * 6028ae55c9SDave Hansen sizeof(struct mem_section); 61802f192eSBob Picco 6239d24e64SMike Kravetz if (slab_is_available()) 6346a66eecSMike Kravetz section = kmalloc_node(array_size, GFP_KERNEL, nid); 6446a66eecSMike Kravetz else 6528ae55c9SDave Hansen section = alloc_bootmem_node(NODE_DATA(nid), array_size); 663e347261SBob Picco 6728ae55c9SDave Hansen if (section) 6828ae55c9SDave Hansen memset(section, 0, array_size); 693e347261SBob Picco 7028ae55c9SDave Hansen return section; 71802f192eSBob Picco } 7228ae55c9SDave Hansen 73a3142c8eSYasunori Goto static int __meminit sparse_index_init(unsigned long section_nr, int nid) 7428ae55c9SDave Hansen { 7534af946aSIngo Molnar static DEFINE_SPINLOCK(index_init_lock); 7628ae55c9SDave Hansen unsigned long root = SECTION_NR_TO_ROOT(section_nr); 7728ae55c9SDave Hansen struct mem_section *section; 7828ae55c9SDave Hansen int ret = 0; 7928ae55c9SDave Hansen 8028ae55c9SDave Hansen if (mem_section[root]) 8128ae55c9SDave Hansen return -EEXIST; 8228ae55c9SDave Hansen 8328ae55c9SDave Hansen section = sparse_index_alloc(nid); 8428ae55c9SDave Hansen /* 8528ae55c9SDave Hansen * This lock keeps two different sections from 8628ae55c9SDave Hansen * reallocating for the same index 8728ae55c9SDave Hansen */ 8828ae55c9SDave Hansen spin_lock(&index_init_lock); 8928ae55c9SDave Hansen 9028ae55c9SDave Hansen if (mem_section[root]) { 9128ae55c9SDave Hansen ret = -EEXIST; 9228ae55c9SDave Hansen goto out; 9328ae55c9SDave Hansen } 9428ae55c9SDave Hansen 9528ae55c9SDave Hansen mem_section[root] = section; 9628ae55c9SDave Hansen out: 9728ae55c9SDave Hansen spin_unlock(&index_init_lock); 9828ae55c9SDave Hansen return ret; 9928ae55c9SDave Hansen } 10028ae55c9SDave Hansen #else /* !SPARSEMEM_EXTREME */ 10128ae55c9SDave Hansen static inline int sparse_index_init(unsigned long section_nr, int nid) 10228ae55c9SDave Hansen { 10328ae55c9SDave Hansen return 0; 10428ae55c9SDave Hansen } 10528ae55c9SDave Hansen #endif 10628ae55c9SDave Hansen 1074ca644d9SDave Hansen /* 1084ca644d9SDave Hansen * Although written for the SPARSEMEM_EXTREME case, this happens 1094ca644d9SDave Hansen * to also work for the flat array case becase 1104ca644d9SDave Hansen * NR_SECTION_ROOTS==NR_MEM_SECTIONS. 1114ca644d9SDave Hansen */ 1124ca644d9SDave Hansen int __section_nr(struct mem_section* ms) 1134ca644d9SDave Hansen { 1144ca644d9SDave Hansen unsigned long root_nr; 1154ca644d9SDave Hansen struct mem_section* root; 1164ca644d9SDave Hansen 11712783b00SMike Kravetz for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) { 11812783b00SMike Kravetz root = __nr_to_section(root_nr * SECTIONS_PER_ROOT); 1194ca644d9SDave Hansen if (!root) 1204ca644d9SDave Hansen continue; 1214ca644d9SDave Hansen 1224ca644d9SDave Hansen if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT))) 1234ca644d9SDave Hansen break; 1244ca644d9SDave Hansen } 1254ca644d9SDave Hansen 1264ca644d9SDave Hansen return (root_nr * SECTIONS_PER_ROOT) + (ms - root); 1274ca644d9SDave Hansen } 1284ca644d9SDave Hansen 12930c253e6SAndy Whitcroft /* 13030c253e6SAndy Whitcroft * During early boot, before section_mem_map is used for an actual 13130c253e6SAndy Whitcroft * mem_map, we use section_mem_map to store the section's NUMA 13230c253e6SAndy Whitcroft * node. This keeps us from having to use another data structure. The 13330c253e6SAndy Whitcroft * node information is cleared just before we store the real mem_map. 13430c253e6SAndy Whitcroft */ 13530c253e6SAndy Whitcroft static inline unsigned long sparse_encode_early_nid(int nid) 13630c253e6SAndy Whitcroft { 13730c253e6SAndy Whitcroft return (nid << SECTION_NID_SHIFT); 13830c253e6SAndy Whitcroft } 13930c253e6SAndy Whitcroft 14030c253e6SAndy Whitcroft static inline int sparse_early_nid(struct mem_section *section) 14130c253e6SAndy Whitcroft { 14230c253e6SAndy Whitcroft return (section->section_mem_map >> SECTION_NID_SHIFT); 14330c253e6SAndy Whitcroft } 14430c253e6SAndy Whitcroft 145d41dee36SAndy Whitcroft /* Record a memory area against a node. */ 146a3142c8eSYasunori Goto void __init memory_present(int nid, unsigned long start, unsigned long end) 147d41dee36SAndy Whitcroft { 148d41dee36SAndy Whitcroft unsigned long pfn; 149d41dee36SAndy Whitcroft 150d41dee36SAndy Whitcroft start &= PAGE_SECTION_MASK; 151d41dee36SAndy Whitcroft for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { 152d41dee36SAndy Whitcroft unsigned long section = pfn_to_section_nr(pfn); 153802f192eSBob Picco struct mem_section *ms; 154802f192eSBob Picco 155802f192eSBob Picco sparse_index_init(section, nid); 156*85770ffeSAndy Whitcroft set_section_nid(section, nid); 157802f192eSBob Picco 158802f192eSBob Picco ms = __nr_to_section(section); 159802f192eSBob Picco if (!ms->section_mem_map) 16030c253e6SAndy Whitcroft ms->section_mem_map = sparse_encode_early_nid(nid) | 16130c253e6SAndy Whitcroft SECTION_MARKED_PRESENT; 162d41dee36SAndy Whitcroft } 163d41dee36SAndy Whitcroft } 164d41dee36SAndy Whitcroft 165d41dee36SAndy Whitcroft /* 166d41dee36SAndy Whitcroft * Only used by the i386 NUMA architecures, but relatively 167d41dee36SAndy Whitcroft * generic code. 168d41dee36SAndy Whitcroft */ 169d41dee36SAndy Whitcroft unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, 170d41dee36SAndy Whitcroft unsigned long end_pfn) 171d41dee36SAndy Whitcroft { 172d41dee36SAndy Whitcroft unsigned long pfn; 173d41dee36SAndy Whitcroft unsigned long nr_pages = 0; 174d41dee36SAndy Whitcroft 175d41dee36SAndy Whitcroft for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 176d41dee36SAndy Whitcroft if (nid != early_pfn_to_nid(pfn)) 177d41dee36SAndy Whitcroft continue; 178d41dee36SAndy Whitcroft 179d41dee36SAndy Whitcroft if (pfn_valid(pfn)) 180d41dee36SAndy Whitcroft nr_pages += PAGES_PER_SECTION; 181d41dee36SAndy Whitcroft } 182d41dee36SAndy Whitcroft 183d41dee36SAndy Whitcroft return nr_pages * sizeof(struct page); 184d41dee36SAndy Whitcroft } 185d41dee36SAndy Whitcroft 186d41dee36SAndy Whitcroft /* 18729751f69SAndy Whitcroft * Subtle, we encode the real pfn into the mem_map such that 18829751f69SAndy Whitcroft * the identity pfn - section_mem_map will return the actual 18929751f69SAndy Whitcroft * physical page frame number. 19029751f69SAndy Whitcroft */ 19129751f69SAndy Whitcroft static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) 19229751f69SAndy Whitcroft { 19329751f69SAndy Whitcroft return (unsigned long)(mem_map - (section_nr_to_pfn(pnum))); 19429751f69SAndy Whitcroft } 19529751f69SAndy Whitcroft 19629751f69SAndy Whitcroft /* 19729751f69SAndy Whitcroft * We need this if we ever free the mem_maps. While not implemented yet, 19829751f69SAndy Whitcroft * this function is included for parity with its sibling. 19929751f69SAndy Whitcroft */ 20029751f69SAndy Whitcroft static __attribute((unused)) 20129751f69SAndy Whitcroft struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) 20229751f69SAndy Whitcroft { 20329751f69SAndy Whitcroft return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); 20429751f69SAndy Whitcroft } 20529751f69SAndy Whitcroft 206a3142c8eSYasunori Goto static int __meminit sparse_init_one_section(struct mem_section *ms, 20729751f69SAndy Whitcroft unsigned long pnum, struct page *mem_map) 20829751f69SAndy Whitcroft { 20929751f69SAndy Whitcroft if (!valid_section(ms)) 21029751f69SAndy Whitcroft return -EINVAL; 21129751f69SAndy Whitcroft 21230c253e6SAndy Whitcroft ms->section_mem_map &= ~SECTION_MAP_MASK; 21329751f69SAndy Whitcroft ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum); 21429751f69SAndy Whitcroft 21529751f69SAndy Whitcroft return 1; 21629751f69SAndy Whitcroft } 21729751f69SAndy Whitcroft 218dec2e6b7SSam Ravnborg __attribute__((weak)) __init 2192e1c49dbSZou Nan hai void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) 2202e1c49dbSZou Nan hai { 2212e1c49dbSZou Nan hai return NULL; 2222e1c49dbSZou Nan hai } 2232e1c49dbSZou Nan hai 224a3142c8eSYasunori Goto static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) 22529751f69SAndy Whitcroft { 22629751f69SAndy Whitcroft struct page *map; 227802f192eSBob Picco struct mem_section *ms = __nr_to_section(pnum); 22830c253e6SAndy Whitcroft int nid = sparse_early_nid(ms); 22929751f69SAndy Whitcroft 23029751f69SAndy Whitcroft map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); 23129751f69SAndy Whitcroft if (map) 23229751f69SAndy Whitcroft return map; 23329751f69SAndy Whitcroft 2342e1c49dbSZou Nan hai map = alloc_bootmem_high_node(NODE_DATA(nid), 2352e1c49dbSZou Nan hai sizeof(struct page) * PAGES_PER_SECTION); 2362e1c49dbSZou Nan hai if (map) 2372e1c49dbSZou Nan hai return map; 2382e1c49dbSZou Nan hai 23929751f69SAndy Whitcroft map = alloc_bootmem_node(NODE_DATA(nid), 24029751f69SAndy Whitcroft sizeof(struct page) * PAGES_PER_SECTION); 24129751f69SAndy Whitcroft if (map) 24229751f69SAndy Whitcroft return map; 24329751f69SAndy Whitcroft 24429751f69SAndy Whitcroft printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); 245802f192eSBob Picco ms->section_mem_map = 0; 24629751f69SAndy Whitcroft return NULL; 24729751f69SAndy Whitcroft } 24829751f69SAndy Whitcroft 249193faea9SStephen Rothwell /* 250193faea9SStephen Rothwell * Allocate the accumulated non-linear sections, allocate a mem_map 251193faea9SStephen Rothwell * for each and record the physical to section mapping. 252193faea9SStephen Rothwell */ 253193faea9SStephen Rothwell void __init sparse_init(void) 254193faea9SStephen Rothwell { 255193faea9SStephen Rothwell unsigned long pnum; 256193faea9SStephen Rothwell struct page *map; 257193faea9SStephen Rothwell 258193faea9SStephen Rothwell for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { 259193faea9SStephen Rothwell if (!valid_section_nr(pnum)) 260193faea9SStephen Rothwell continue; 261193faea9SStephen Rothwell 262193faea9SStephen Rothwell map = sparse_early_mem_map_alloc(pnum); 263193faea9SStephen Rothwell if (!map) 264193faea9SStephen Rothwell continue; 265193faea9SStephen Rothwell sparse_init_one_section(__nr_to_section(pnum), pnum, map); 266193faea9SStephen Rothwell } 267193faea9SStephen Rothwell } 268193faea9SStephen Rothwell 269193faea9SStephen Rothwell #ifdef CONFIG_MEMORY_HOTPLUG 2700b0acbecSDave Hansen static struct page *__kmalloc_section_memmap(unsigned long nr_pages) 2710b0acbecSDave Hansen { 2720b0acbecSDave Hansen struct page *page, *ret; 2730b0acbecSDave Hansen unsigned long memmap_size = sizeof(struct page) * nr_pages; 2740b0acbecSDave Hansen 275f2d0aa5bSYasunori Goto page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size)); 2760b0acbecSDave Hansen if (page) 2770b0acbecSDave Hansen goto got_map_page; 2780b0acbecSDave Hansen 2790b0acbecSDave Hansen ret = vmalloc(memmap_size); 2800b0acbecSDave Hansen if (ret) 2810b0acbecSDave Hansen goto got_map_ptr; 2820b0acbecSDave Hansen 2830b0acbecSDave Hansen return NULL; 2840b0acbecSDave Hansen got_map_page: 2850b0acbecSDave Hansen ret = (struct page *)pfn_to_kaddr(page_to_pfn(page)); 2860b0acbecSDave Hansen got_map_ptr: 2870b0acbecSDave Hansen memset(ret, 0, memmap_size); 2880b0acbecSDave Hansen 2890b0acbecSDave Hansen return ret; 2900b0acbecSDave Hansen } 2910b0acbecSDave Hansen 2920b0acbecSDave Hansen static int vaddr_in_vmalloc_area(void *addr) 2930b0acbecSDave Hansen { 2940b0acbecSDave Hansen if (addr >= (void *)VMALLOC_START && 2950b0acbecSDave Hansen addr < (void *)VMALLOC_END) 2960b0acbecSDave Hansen return 1; 2970b0acbecSDave Hansen return 0; 2980b0acbecSDave Hansen } 2990b0acbecSDave Hansen 3000b0acbecSDave Hansen static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) 3010b0acbecSDave Hansen { 3020b0acbecSDave Hansen if (vaddr_in_vmalloc_area(memmap)) 3030b0acbecSDave Hansen vfree(memmap); 3040b0acbecSDave Hansen else 3050b0acbecSDave Hansen free_pages((unsigned long)memmap, 3060b0acbecSDave Hansen get_order(sizeof(struct page) * nr_pages)); 3070b0acbecSDave Hansen } 3080b0acbecSDave Hansen 30929751f69SAndy Whitcroft /* 31029751f69SAndy Whitcroft * returns the number of sections whose mem_maps were properly 31129751f69SAndy Whitcroft * set. If this is <=0, then that means that the passed-in 31229751f69SAndy Whitcroft * map was not consumed and must be freed. 313d41dee36SAndy Whitcroft */ 3140b0acbecSDave Hansen int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, 3150b0acbecSDave Hansen int nr_pages) 31629751f69SAndy Whitcroft { 3170b0acbecSDave Hansen unsigned long section_nr = pfn_to_section_nr(start_pfn); 3180b0acbecSDave Hansen struct pglist_data *pgdat = zone->zone_pgdat; 3190b0acbecSDave Hansen struct mem_section *ms; 3200b0acbecSDave Hansen struct page *memmap; 3210b0acbecSDave Hansen unsigned long flags; 3220b0acbecSDave Hansen int ret; 32329751f69SAndy Whitcroft 3240b0acbecSDave Hansen /* 3250b0acbecSDave Hansen * no locking for this, because it does its own 3260b0acbecSDave Hansen * plus, it does a kmalloc 3270b0acbecSDave Hansen */ 3280b0acbecSDave Hansen sparse_index_init(section_nr, pgdat->node_id); 3290b0acbecSDave Hansen memmap = __kmalloc_section_memmap(nr_pages); 33029751f69SAndy Whitcroft 3310b0acbecSDave Hansen pgdat_resize_lock(pgdat, &flags); 3320b0acbecSDave Hansen 3330b0acbecSDave Hansen ms = __pfn_to_section(start_pfn); 3340b0acbecSDave Hansen if (ms->section_mem_map & SECTION_MARKED_PRESENT) { 3350b0acbecSDave Hansen ret = -EEXIST; 3360b0acbecSDave Hansen goto out; 3370b0acbecSDave Hansen } 33829751f69SAndy Whitcroft ms->section_mem_map |= SECTION_MARKED_PRESENT; 33929751f69SAndy Whitcroft 3400b0acbecSDave Hansen ret = sparse_init_one_section(ms, section_nr, memmap); 3410b0acbecSDave Hansen 3420b0acbecSDave Hansen out: 3430b0acbecSDave Hansen pgdat_resize_unlock(pgdat, &flags); 34446a66eecSMike Kravetz if (ret <= 0) 34546a66eecSMike Kravetz __kfree_section_memmap(memmap, nr_pages); 3460b0acbecSDave Hansen return ret; 347d41dee36SAndy Whitcroft } 348a3142c8eSYasunori Goto #endif 349