xref: /linux/mm/sparse.c (revision 85770ffe4f0cdd4396b17f14762adc25a571a348)
1d41dee36SAndy Whitcroft /*
2d41dee36SAndy Whitcroft  * sparse memory mappings.
3d41dee36SAndy Whitcroft  */
4d41dee36SAndy Whitcroft #include <linux/mm.h>
5d41dee36SAndy Whitcroft #include <linux/mmzone.h>
6d41dee36SAndy Whitcroft #include <linux/bootmem.h>
70b0acbecSDave Hansen #include <linux/highmem.h>
8d41dee36SAndy Whitcroft #include <linux/module.h>
928ae55c9SDave Hansen #include <linux/spinlock.h>
100b0acbecSDave Hansen #include <linux/vmalloc.h>
11d41dee36SAndy Whitcroft #include <asm/dma.h>
12d41dee36SAndy Whitcroft 
13d41dee36SAndy Whitcroft /*
14d41dee36SAndy Whitcroft  * Permanent SPARSEMEM data:
15d41dee36SAndy Whitcroft  *
16d41dee36SAndy Whitcroft  * 1) mem_section	- memory sections, mem_map's for valid memory
17d41dee36SAndy Whitcroft  */
183e347261SBob Picco #ifdef CONFIG_SPARSEMEM_EXTREME
19802f192eSBob Picco struct mem_section *mem_section[NR_SECTION_ROOTS]
2022fc6eccSRavikiran G Thirumalai 	____cacheline_internodealigned_in_smp;
213e347261SBob Picco #else
223e347261SBob Picco struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
2322fc6eccSRavikiran G Thirumalai 	____cacheline_internodealigned_in_smp;
243e347261SBob Picco #endif
253e347261SBob Picco EXPORT_SYMBOL(mem_section);
263e347261SBob Picco 
2789689ae7SChristoph Lameter #ifdef NODE_NOT_IN_PAGE_FLAGS
2889689ae7SChristoph Lameter /*
2989689ae7SChristoph Lameter  * If we did not store the node number in the page then we have to
3089689ae7SChristoph Lameter  * do a lookup in the section_to_node_table in order to find which
3189689ae7SChristoph Lameter  * node the page belongs to.
3289689ae7SChristoph Lameter  */
3389689ae7SChristoph Lameter #if MAX_NUMNODES <= 256
3489689ae7SChristoph Lameter static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
3589689ae7SChristoph Lameter #else
3689689ae7SChristoph Lameter static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
3789689ae7SChristoph Lameter #endif
3889689ae7SChristoph Lameter 
3925ba77c1SAndy Whitcroft int page_to_nid(struct page *page)
4089689ae7SChristoph Lameter {
4189689ae7SChristoph Lameter 	return section_to_node_table[page_to_section(page)];
4289689ae7SChristoph Lameter }
4389689ae7SChristoph Lameter EXPORT_SYMBOL(page_to_nid);
44*85770ffeSAndy Whitcroft 
45*85770ffeSAndy Whitcroft static void set_section_nid(unsigned long section_nr, int nid)
46*85770ffeSAndy Whitcroft {
47*85770ffeSAndy Whitcroft 	section_to_node_table[section_nr] = nid;
48*85770ffeSAndy Whitcroft }
49*85770ffeSAndy Whitcroft #else /* !NODE_NOT_IN_PAGE_FLAGS */
50*85770ffeSAndy Whitcroft static inline void set_section_nid(unsigned long section_nr, int nid)
51*85770ffeSAndy Whitcroft {
52*85770ffeSAndy Whitcroft }
5389689ae7SChristoph Lameter #endif
5489689ae7SChristoph Lameter 
553e347261SBob Picco #ifdef CONFIG_SPARSEMEM_EXTREME
56577a32f6SSam Ravnborg static struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
57802f192eSBob Picco {
5828ae55c9SDave Hansen 	struct mem_section *section = NULL;
5928ae55c9SDave Hansen 	unsigned long array_size = SECTIONS_PER_ROOT *
6028ae55c9SDave Hansen 				   sizeof(struct mem_section);
61802f192eSBob Picco 
6239d24e64SMike Kravetz 	if (slab_is_available())
6346a66eecSMike Kravetz 		section = kmalloc_node(array_size, GFP_KERNEL, nid);
6446a66eecSMike Kravetz 	else
6528ae55c9SDave Hansen 		section = alloc_bootmem_node(NODE_DATA(nid), array_size);
663e347261SBob Picco 
6728ae55c9SDave Hansen 	if (section)
6828ae55c9SDave Hansen 		memset(section, 0, array_size);
693e347261SBob Picco 
7028ae55c9SDave Hansen 	return section;
71802f192eSBob Picco }
7228ae55c9SDave Hansen 
73a3142c8eSYasunori Goto static int __meminit sparse_index_init(unsigned long section_nr, int nid)
7428ae55c9SDave Hansen {
7534af946aSIngo Molnar 	static DEFINE_SPINLOCK(index_init_lock);
7628ae55c9SDave Hansen 	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
7728ae55c9SDave Hansen 	struct mem_section *section;
7828ae55c9SDave Hansen 	int ret = 0;
7928ae55c9SDave Hansen 
8028ae55c9SDave Hansen 	if (mem_section[root])
8128ae55c9SDave Hansen 		return -EEXIST;
8228ae55c9SDave Hansen 
8328ae55c9SDave Hansen 	section = sparse_index_alloc(nid);
8428ae55c9SDave Hansen 	/*
8528ae55c9SDave Hansen 	 * This lock keeps two different sections from
8628ae55c9SDave Hansen 	 * reallocating for the same index
8728ae55c9SDave Hansen 	 */
8828ae55c9SDave Hansen 	spin_lock(&index_init_lock);
8928ae55c9SDave Hansen 
9028ae55c9SDave Hansen 	if (mem_section[root]) {
9128ae55c9SDave Hansen 		ret = -EEXIST;
9228ae55c9SDave Hansen 		goto out;
9328ae55c9SDave Hansen 	}
9428ae55c9SDave Hansen 
9528ae55c9SDave Hansen 	mem_section[root] = section;
9628ae55c9SDave Hansen out:
9728ae55c9SDave Hansen 	spin_unlock(&index_init_lock);
9828ae55c9SDave Hansen 	return ret;
9928ae55c9SDave Hansen }
10028ae55c9SDave Hansen #else /* !SPARSEMEM_EXTREME */
10128ae55c9SDave Hansen static inline int sparse_index_init(unsigned long section_nr, int nid)
10228ae55c9SDave Hansen {
10328ae55c9SDave Hansen 	return 0;
10428ae55c9SDave Hansen }
10528ae55c9SDave Hansen #endif
10628ae55c9SDave Hansen 
1074ca644d9SDave Hansen /*
1084ca644d9SDave Hansen  * Although written for the SPARSEMEM_EXTREME case, this happens
1094ca644d9SDave Hansen  * to also work for the flat array case becase
1104ca644d9SDave Hansen  * NR_SECTION_ROOTS==NR_MEM_SECTIONS.
1114ca644d9SDave Hansen  */
1124ca644d9SDave Hansen int __section_nr(struct mem_section* ms)
1134ca644d9SDave Hansen {
1144ca644d9SDave Hansen 	unsigned long root_nr;
1154ca644d9SDave Hansen 	struct mem_section* root;
1164ca644d9SDave Hansen 
11712783b00SMike Kravetz 	for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
11812783b00SMike Kravetz 		root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
1194ca644d9SDave Hansen 		if (!root)
1204ca644d9SDave Hansen 			continue;
1214ca644d9SDave Hansen 
1224ca644d9SDave Hansen 		if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
1234ca644d9SDave Hansen 		     break;
1244ca644d9SDave Hansen 	}
1254ca644d9SDave Hansen 
1264ca644d9SDave Hansen 	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
1274ca644d9SDave Hansen }
1284ca644d9SDave Hansen 
12930c253e6SAndy Whitcroft /*
13030c253e6SAndy Whitcroft  * During early boot, before section_mem_map is used for an actual
13130c253e6SAndy Whitcroft  * mem_map, we use section_mem_map to store the section's NUMA
13230c253e6SAndy Whitcroft  * node.  This keeps us from having to use another data structure.  The
13330c253e6SAndy Whitcroft  * node information is cleared just before we store the real mem_map.
13430c253e6SAndy Whitcroft  */
13530c253e6SAndy Whitcroft static inline unsigned long sparse_encode_early_nid(int nid)
13630c253e6SAndy Whitcroft {
13730c253e6SAndy Whitcroft 	return (nid << SECTION_NID_SHIFT);
13830c253e6SAndy Whitcroft }
13930c253e6SAndy Whitcroft 
14030c253e6SAndy Whitcroft static inline int sparse_early_nid(struct mem_section *section)
14130c253e6SAndy Whitcroft {
14230c253e6SAndy Whitcroft 	return (section->section_mem_map >> SECTION_NID_SHIFT);
14330c253e6SAndy Whitcroft }
14430c253e6SAndy Whitcroft 
145d41dee36SAndy Whitcroft /* Record a memory area against a node. */
146a3142c8eSYasunori Goto void __init memory_present(int nid, unsigned long start, unsigned long end)
147d41dee36SAndy Whitcroft {
148d41dee36SAndy Whitcroft 	unsigned long pfn;
149d41dee36SAndy Whitcroft 
150d41dee36SAndy Whitcroft 	start &= PAGE_SECTION_MASK;
151d41dee36SAndy Whitcroft 	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
152d41dee36SAndy Whitcroft 		unsigned long section = pfn_to_section_nr(pfn);
153802f192eSBob Picco 		struct mem_section *ms;
154802f192eSBob Picco 
155802f192eSBob Picco 		sparse_index_init(section, nid);
156*85770ffeSAndy Whitcroft 		set_section_nid(section, nid);
157802f192eSBob Picco 
158802f192eSBob Picco 		ms = __nr_to_section(section);
159802f192eSBob Picco 		if (!ms->section_mem_map)
16030c253e6SAndy Whitcroft 			ms->section_mem_map = sparse_encode_early_nid(nid) |
16130c253e6SAndy Whitcroft 							SECTION_MARKED_PRESENT;
162d41dee36SAndy Whitcroft 	}
163d41dee36SAndy Whitcroft }
164d41dee36SAndy Whitcroft 
165d41dee36SAndy Whitcroft /*
166d41dee36SAndy Whitcroft  * Only used by the i386 NUMA architecures, but relatively
167d41dee36SAndy Whitcroft  * generic code.
168d41dee36SAndy Whitcroft  */
169d41dee36SAndy Whitcroft unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
170d41dee36SAndy Whitcroft 						     unsigned long end_pfn)
171d41dee36SAndy Whitcroft {
172d41dee36SAndy Whitcroft 	unsigned long pfn;
173d41dee36SAndy Whitcroft 	unsigned long nr_pages = 0;
174d41dee36SAndy Whitcroft 
175d41dee36SAndy Whitcroft 	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
176d41dee36SAndy Whitcroft 		if (nid != early_pfn_to_nid(pfn))
177d41dee36SAndy Whitcroft 			continue;
178d41dee36SAndy Whitcroft 
179d41dee36SAndy Whitcroft 		if (pfn_valid(pfn))
180d41dee36SAndy Whitcroft 			nr_pages += PAGES_PER_SECTION;
181d41dee36SAndy Whitcroft 	}
182d41dee36SAndy Whitcroft 
183d41dee36SAndy Whitcroft 	return nr_pages * sizeof(struct page);
184d41dee36SAndy Whitcroft }
185d41dee36SAndy Whitcroft 
186d41dee36SAndy Whitcroft /*
18729751f69SAndy Whitcroft  * Subtle, we encode the real pfn into the mem_map such that
18829751f69SAndy Whitcroft  * the identity pfn - section_mem_map will return the actual
18929751f69SAndy Whitcroft  * physical page frame number.
19029751f69SAndy Whitcroft  */
19129751f69SAndy Whitcroft static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
19229751f69SAndy Whitcroft {
19329751f69SAndy Whitcroft 	return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
19429751f69SAndy Whitcroft }
19529751f69SAndy Whitcroft 
19629751f69SAndy Whitcroft /*
19729751f69SAndy Whitcroft  * We need this if we ever free the mem_maps.  While not implemented yet,
19829751f69SAndy Whitcroft  * this function is included for parity with its sibling.
19929751f69SAndy Whitcroft  */
20029751f69SAndy Whitcroft static __attribute((unused))
20129751f69SAndy Whitcroft struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
20229751f69SAndy Whitcroft {
20329751f69SAndy Whitcroft 	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
20429751f69SAndy Whitcroft }
20529751f69SAndy Whitcroft 
206a3142c8eSYasunori Goto static int __meminit sparse_init_one_section(struct mem_section *ms,
20729751f69SAndy Whitcroft 		unsigned long pnum, struct page *mem_map)
20829751f69SAndy Whitcroft {
20929751f69SAndy Whitcroft 	if (!valid_section(ms))
21029751f69SAndy Whitcroft 		return -EINVAL;
21129751f69SAndy Whitcroft 
21230c253e6SAndy Whitcroft 	ms->section_mem_map &= ~SECTION_MAP_MASK;
21329751f69SAndy Whitcroft 	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum);
21429751f69SAndy Whitcroft 
21529751f69SAndy Whitcroft 	return 1;
21629751f69SAndy Whitcroft }
21729751f69SAndy Whitcroft 
218dec2e6b7SSam Ravnborg __attribute__((weak)) __init
2192e1c49dbSZou Nan hai void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
2202e1c49dbSZou Nan hai {
2212e1c49dbSZou Nan hai 	return NULL;
2222e1c49dbSZou Nan hai }
2232e1c49dbSZou Nan hai 
224a3142c8eSYasunori Goto static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
22529751f69SAndy Whitcroft {
22629751f69SAndy Whitcroft 	struct page *map;
227802f192eSBob Picco 	struct mem_section *ms = __nr_to_section(pnum);
22830c253e6SAndy Whitcroft 	int nid = sparse_early_nid(ms);
22929751f69SAndy Whitcroft 
23029751f69SAndy Whitcroft 	map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
23129751f69SAndy Whitcroft 	if (map)
23229751f69SAndy Whitcroft 		return map;
23329751f69SAndy Whitcroft 
2342e1c49dbSZou Nan hai   	map = alloc_bootmem_high_node(NODE_DATA(nid),
2352e1c49dbSZou Nan hai                        sizeof(struct page) * PAGES_PER_SECTION);
2362e1c49dbSZou Nan hai 	if (map)
2372e1c49dbSZou Nan hai 		return map;
2382e1c49dbSZou Nan hai 
23929751f69SAndy Whitcroft 	map = alloc_bootmem_node(NODE_DATA(nid),
24029751f69SAndy Whitcroft 			sizeof(struct page) * PAGES_PER_SECTION);
24129751f69SAndy Whitcroft 	if (map)
24229751f69SAndy Whitcroft 		return map;
24329751f69SAndy Whitcroft 
24429751f69SAndy Whitcroft 	printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__);
245802f192eSBob Picco 	ms->section_mem_map = 0;
24629751f69SAndy Whitcroft 	return NULL;
24729751f69SAndy Whitcroft }
24829751f69SAndy Whitcroft 
249193faea9SStephen Rothwell /*
250193faea9SStephen Rothwell  * Allocate the accumulated non-linear sections, allocate a mem_map
251193faea9SStephen Rothwell  * for each and record the physical to section mapping.
252193faea9SStephen Rothwell  */
253193faea9SStephen Rothwell void __init sparse_init(void)
254193faea9SStephen Rothwell {
255193faea9SStephen Rothwell 	unsigned long pnum;
256193faea9SStephen Rothwell 	struct page *map;
257193faea9SStephen Rothwell 
258193faea9SStephen Rothwell 	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
259193faea9SStephen Rothwell 		if (!valid_section_nr(pnum))
260193faea9SStephen Rothwell 			continue;
261193faea9SStephen Rothwell 
262193faea9SStephen Rothwell 		map = sparse_early_mem_map_alloc(pnum);
263193faea9SStephen Rothwell 		if (!map)
264193faea9SStephen Rothwell 			continue;
265193faea9SStephen Rothwell 		sparse_init_one_section(__nr_to_section(pnum), pnum, map);
266193faea9SStephen Rothwell 	}
267193faea9SStephen Rothwell }
268193faea9SStephen Rothwell 
269193faea9SStephen Rothwell #ifdef CONFIG_MEMORY_HOTPLUG
2700b0acbecSDave Hansen static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
2710b0acbecSDave Hansen {
2720b0acbecSDave Hansen 	struct page *page, *ret;
2730b0acbecSDave Hansen 	unsigned long memmap_size = sizeof(struct page) * nr_pages;
2740b0acbecSDave Hansen 
275f2d0aa5bSYasunori Goto 	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
2760b0acbecSDave Hansen 	if (page)
2770b0acbecSDave Hansen 		goto got_map_page;
2780b0acbecSDave Hansen 
2790b0acbecSDave Hansen 	ret = vmalloc(memmap_size);
2800b0acbecSDave Hansen 	if (ret)
2810b0acbecSDave Hansen 		goto got_map_ptr;
2820b0acbecSDave Hansen 
2830b0acbecSDave Hansen 	return NULL;
2840b0acbecSDave Hansen got_map_page:
2850b0acbecSDave Hansen 	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
2860b0acbecSDave Hansen got_map_ptr:
2870b0acbecSDave Hansen 	memset(ret, 0, memmap_size);
2880b0acbecSDave Hansen 
2890b0acbecSDave Hansen 	return ret;
2900b0acbecSDave Hansen }
2910b0acbecSDave Hansen 
2920b0acbecSDave Hansen static int vaddr_in_vmalloc_area(void *addr)
2930b0acbecSDave Hansen {
2940b0acbecSDave Hansen 	if (addr >= (void *)VMALLOC_START &&
2950b0acbecSDave Hansen 	    addr < (void *)VMALLOC_END)
2960b0acbecSDave Hansen 		return 1;
2970b0acbecSDave Hansen 	return 0;
2980b0acbecSDave Hansen }
2990b0acbecSDave Hansen 
3000b0acbecSDave Hansen static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
3010b0acbecSDave Hansen {
3020b0acbecSDave Hansen 	if (vaddr_in_vmalloc_area(memmap))
3030b0acbecSDave Hansen 		vfree(memmap);
3040b0acbecSDave Hansen 	else
3050b0acbecSDave Hansen 		free_pages((unsigned long)memmap,
3060b0acbecSDave Hansen 			   get_order(sizeof(struct page) * nr_pages));
3070b0acbecSDave Hansen }
3080b0acbecSDave Hansen 
30929751f69SAndy Whitcroft /*
31029751f69SAndy Whitcroft  * returns the number of sections whose mem_maps were properly
31129751f69SAndy Whitcroft  * set.  If this is <=0, then that means that the passed-in
31229751f69SAndy Whitcroft  * map was not consumed and must be freed.
313d41dee36SAndy Whitcroft  */
3140b0acbecSDave Hansen int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
3150b0acbecSDave Hansen 			   int nr_pages)
31629751f69SAndy Whitcroft {
3170b0acbecSDave Hansen 	unsigned long section_nr = pfn_to_section_nr(start_pfn);
3180b0acbecSDave Hansen 	struct pglist_data *pgdat = zone->zone_pgdat;
3190b0acbecSDave Hansen 	struct mem_section *ms;
3200b0acbecSDave Hansen 	struct page *memmap;
3210b0acbecSDave Hansen 	unsigned long flags;
3220b0acbecSDave Hansen 	int ret;
32329751f69SAndy Whitcroft 
3240b0acbecSDave Hansen 	/*
3250b0acbecSDave Hansen 	 * no locking for this, because it does its own
3260b0acbecSDave Hansen 	 * plus, it does a kmalloc
3270b0acbecSDave Hansen 	 */
3280b0acbecSDave Hansen 	sparse_index_init(section_nr, pgdat->node_id);
3290b0acbecSDave Hansen 	memmap = __kmalloc_section_memmap(nr_pages);
33029751f69SAndy Whitcroft 
3310b0acbecSDave Hansen 	pgdat_resize_lock(pgdat, &flags);
3320b0acbecSDave Hansen 
3330b0acbecSDave Hansen 	ms = __pfn_to_section(start_pfn);
3340b0acbecSDave Hansen 	if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
3350b0acbecSDave Hansen 		ret = -EEXIST;
3360b0acbecSDave Hansen 		goto out;
3370b0acbecSDave Hansen 	}
33829751f69SAndy Whitcroft 	ms->section_mem_map |= SECTION_MARKED_PRESENT;
33929751f69SAndy Whitcroft 
3400b0acbecSDave Hansen 	ret = sparse_init_one_section(ms, section_nr, memmap);
3410b0acbecSDave Hansen 
3420b0acbecSDave Hansen out:
3430b0acbecSDave Hansen 	pgdat_resize_unlock(pgdat, &flags);
34446a66eecSMike Kravetz 	if (ret <= 0)
34546a66eecSMike Kravetz 		__kfree_section_memmap(memmap, nr_pages);
3460b0acbecSDave Hansen 	return ret;
347d41dee36SAndy Whitcroft }
348a3142c8eSYasunori Goto #endif
349