xref: /linux/arch/powerpc/mm/numa.c (revision bc16a75926941094db6b42d76014abb5e8d3a910)
1ab1f9dacSPaul Mackerras /*
2ab1f9dacSPaul Mackerras  * pSeries NUMA support
3ab1f9dacSPaul Mackerras  *
4ab1f9dacSPaul Mackerras  * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
5ab1f9dacSPaul Mackerras  *
6ab1f9dacSPaul Mackerras  * This program is free software; you can redistribute it and/or
7ab1f9dacSPaul Mackerras  * modify it under the terms of the GNU General Public License
8ab1f9dacSPaul Mackerras  * as published by the Free Software Foundation; either version
9ab1f9dacSPaul Mackerras  * 2 of the License, or (at your option) any later version.
10ab1f9dacSPaul Mackerras  */
11ab1f9dacSPaul Mackerras #include <linux/threads.h>
12ab1f9dacSPaul Mackerras #include <linux/bootmem.h>
13ab1f9dacSPaul Mackerras #include <linux/init.h>
14ab1f9dacSPaul Mackerras #include <linux/mm.h>
15ab1f9dacSPaul Mackerras #include <linux/mmzone.h>
16ab1f9dacSPaul Mackerras #include <linux/module.h>
17ab1f9dacSPaul Mackerras #include <linux/nodemask.h>
18ab1f9dacSPaul Mackerras #include <linux/cpu.h>
19ab1f9dacSPaul Mackerras #include <linux/notifier.h>
2045fb6ceaSAnton Blanchard #include <asm/sparsemem.h>
21ab1f9dacSPaul Mackerras #include <asm/lmb.h>
22cf00a8d1SPaul Mackerras #include <asm/system.h>
232249ca9dSPaul Mackerras #include <asm/smp.h>
24ab1f9dacSPaul Mackerras 
25ab1f9dacSPaul Mackerras static int numa_enabled = 1;
26ab1f9dacSPaul Mackerras 
27ab1f9dacSPaul Mackerras static int numa_debug;
28ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
29ab1f9dacSPaul Mackerras 
3045fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS];
31ab1f9dacSPaul Mackerras cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
32ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES];
3345fb6ceaSAnton Blanchard 
3445fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table);
3545fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpumask_lookup_table);
3645fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data);
3745fb6ceaSAnton Blanchard 
3845fb6ceaSAnton Blanchard static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
39ab1f9dacSPaul Mackerras static int min_common_depth;
40237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells;
41ab1f9dacSPaul Mackerras 
42ab1f9dacSPaul Mackerras /*
4345fb6ceaSAnton Blanchard  * We need somewhere to store start/end/node for each region until we have
44ab1f9dacSPaul Mackerras  * allocated the real node_data structures.
45ab1f9dacSPaul Mackerras  */
4645fb6ceaSAnton Blanchard #define MAX_REGIONS	(MAX_LMB_REGIONS*2)
47ab1f9dacSPaul Mackerras static struct {
4845fb6ceaSAnton Blanchard 	unsigned long start_pfn;
4945fb6ceaSAnton Blanchard 	unsigned long end_pfn;
5045fb6ceaSAnton Blanchard 	int nid;
5145fb6ceaSAnton Blanchard } init_node_data[MAX_REGIONS] __initdata;
52ab1f9dacSPaul Mackerras 
5345fb6ceaSAnton Blanchard int __init early_pfn_to_nid(unsigned long pfn)
5445fb6ceaSAnton Blanchard {
5545fb6ceaSAnton Blanchard 	unsigned int i;
5645fb6ceaSAnton Blanchard 
5745fb6ceaSAnton Blanchard 	for (i = 0; init_node_data[i].end_pfn; i++) {
5845fb6ceaSAnton Blanchard 		unsigned long start_pfn = init_node_data[i].start_pfn;
5945fb6ceaSAnton Blanchard 		unsigned long end_pfn = init_node_data[i].end_pfn;
6045fb6ceaSAnton Blanchard 
6145fb6ceaSAnton Blanchard 		if ((start_pfn <= pfn) && (pfn < end_pfn))
6245fb6ceaSAnton Blanchard 			return init_node_data[i].nid;
6345fb6ceaSAnton Blanchard 	}
6445fb6ceaSAnton Blanchard 
6545fb6ceaSAnton Blanchard 	return -1;
6645fb6ceaSAnton Blanchard }
6745fb6ceaSAnton Blanchard 
6845fb6ceaSAnton Blanchard void __init add_region(unsigned int nid, unsigned long start_pfn,
6945fb6ceaSAnton Blanchard 		       unsigned long pages)
7045fb6ceaSAnton Blanchard {
7145fb6ceaSAnton Blanchard 	unsigned int i;
7245fb6ceaSAnton Blanchard 
7345fb6ceaSAnton Blanchard 	dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n",
7445fb6ceaSAnton Blanchard 		nid, start_pfn, pages);
7545fb6ceaSAnton Blanchard 
7645fb6ceaSAnton Blanchard 	for (i = 0; init_node_data[i].end_pfn; i++) {
7745fb6ceaSAnton Blanchard 		if (init_node_data[i].nid != nid)
7845fb6ceaSAnton Blanchard 			continue;
7945fb6ceaSAnton Blanchard 		if (init_node_data[i].end_pfn == start_pfn) {
8045fb6ceaSAnton Blanchard 			init_node_data[i].end_pfn += pages;
8145fb6ceaSAnton Blanchard 			return;
8245fb6ceaSAnton Blanchard 		}
8345fb6ceaSAnton Blanchard 		if (init_node_data[i].start_pfn == (start_pfn + pages)) {
8445fb6ceaSAnton Blanchard 			init_node_data[i].start_pfn -= pages;
8545fb6ceaSAnton Blanchard 			return;
8645fb6ceaSAnton Blanchard 		}
8745fb6ceaSAnton Blanchard 	}
8845fb6ceaSAnton Blanchard 
8945fb6ceaSAnton Blanchard 	/*
9045fb6ceaSAnton Blanchard 	 * Leave last entry NULL so we dont iterate off the end (we use
9145fb6ceaSAnton Blanchard 	 * entry.end_pfn to terminate the walk).
9245fb6ceaSAnton Blanchard 	 */
9345fb6ceaSAnton Blanchard 	if (i >= (MAX_REGIONS - 1)) {
9445fb6ceaSAnton Blanchard 		printk(KERN_ERR "WARNING: too many memory regions in "
9545fb6ceaSAnton Blanchard 				"numa code, truncating\n");
9645fb6ceaSAnton Blanchard 		return;
9745fb6ceaSAnton Blanchard 	}
9845fb6ceaSAnton Blanchard 
9945fb6ceaSAnton Blanchard 	init_node_data[i].start_pfn = start_pfn;
10045fb6ceaSAnton Blanchard 	init_node_data[i].end_pfn = start_pfn + pages;
10145fb6ceaSAnton Blanchard 	init_node_data[i].nid = nid;
10245fb6ceaSAnton Blanchard }
10345fb6ceaSAnton Blanchard 
10445fb6ceaSAnton Blanchard /* We assume init_node_data has no overlapping regions */
10545fb6ceaSAnton Blanchard void __init get_region(unsigned int nid, unsigned long *start_pfn,
10645fb6ceaSAnton Blanchard 		       unsigned long *end_pfn, unsigned long *pages_present)
10745fb6ceaSAnton Blanchard {
10845fb6ceaSAnton Blanchard 	unsigned int i;
10945fb6ceaSAnton Blanchard 
11045fb6ceaSAnton Blanchard 	*start_pfn = -1UL;
11145fb6ceaSAnton Blanchard 	*end_pfn = *pages_present = 0;
11245fb6ceaSAnton Blanchard 
11345fb6ceaSAnton Blanchard 	for (i = 0; init_node_data[i].end_pfn; i++) {
11445fb6ceaSAnton Blanchard 		if (init_node_data[i].nid != nid)
11545fb6ceaSAnton Blanchard 			continue;
11645fb6ceaSAnton Blanchard 
11745fb6ceaSAnton Blanchard 		*pages_present += init_node_data[i].end_pfn -
11845fb6ceaSAnton Blanchard 			init_node_data[i].start_pfn;
11945fb6ceaSAnton Blanchard 
12045fb6ceaSAnton Blanchard 		if (init_node_data[i].start_pfn < *start_pfn)
12145fb6ceaSAnton Blanchard 			*start_pfn = init_node_data[i].start_pfn;
12245fb6ceaSAnton Blanchard 
12345fb6ceaSAnton Blanchard 		if (init_node_data[i].end_pfn > *end_pfn)
12445fb6ceaSAnton Blanchard 			*end_pfn = init_node_data[i].end_pfn;
12545fb6ceaSAnton Blanchard 	}
12645fb6ceaSAnton Blanchard 
12745fb6ceaSAnton Blanchard 	/* We didnt find a matching region, return start/end as 0 */
12845fb6ceaSAnton Blanchard 	if (*start_pfn == -1UL)
1296d91bb93SMike Kravetz 		*start_pfn = 0;
13045fb6ceaSAnton Blanchard }
131ab1f9dacSPaul Mackerras 
1322e5ce39dSNathan Lynch static void __cpuinit map_cpu_to_node(int cpu, int node)
133ab1f9dacSPaul Mackerras {
134ab1f9dacSPaul Mackerras 	numa_cpu_lookup_table[cpu] = node;
13545fb6ceaSAnton Blanchard 
136bf4b85b0SNathan Lynch 	dbg("adding cpu %d to node %d\n", cpu, node);
137bf4b85b0SNathan Lynch 
13845fb6ceaSAnton Blanchard 	if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node])))
139ab1f9dacSPaul Mackerras 		cpu_set(cpu, numa_cpumask_lookup_table[node]);
140ab1f9dacSPaul Mackerras }
141ab1f9dacSPaul Mackerras 
142ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU
143ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu)
144ab1f9dacSPaul Mackerras {
145ab1f9dacSPaul Mackerras 	int node = numa_cpu_lookup_table[cpu];
146ab1f9dacSPaul Mackerras 
147ab1f9dacSPaul Mackerras 	dbg("removing cpu %lu from node %d\n", cpu, node);
148ab1f9dacSPaul Mackerras 
149ab1f9dacSPaul Mackerras 	if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) {
150ab1f9dacSPaul Mackerras 		cpu_clear(cpu, numa_cpumask_lookup_table[node]);
151ab1f9dacSPaul Mackerras 	} else {
152ab1f9dacSPaul Mackerras 		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
153ab1f9dacSPaul Mackerras 		       cpu, node);
154ab1f9dacSPaul Mackerras 	}
155ab1f9dacSPaul Mackerras }
156ab1f9dacSPaul Mackerras #endif /* CONFIG_HOTPLUG_CPU */
157ab1f9dacSPaul Mackerras 
1582e5ce39dSNathan Lynch static struct device_node * __cpuinit find_cpu_node(unsigned int cpu)
159ab1f9dacSPaul Mackerras {
160ab1f9dacSPaul Mackerras 	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
161ab1f9dacSPaul Mackerras 	struct device_node *cpu_node = NULL;
162ab1f9dacSPaul Mackerras 	unsigned int *interrupt_server, *reg;
163ab1f9dacSPaul Mackerras 	int len;
164ab1f9dacSPaul Mackerras 
165ab1f9dacSPaul Mackerras 	while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {
166ab1f9dacSPaul Mackerras 		/* Try interrupt server first */
167ab1f9dacSPaul Mackerras 		interrupt_server = (unsigned int *)get_property(cpu_node,
168ab1f9dacSPaul Mackerras 					"ibm,ppc-interrupt-server#s", &len);
169ab1f9dacSPaul Mackerras 
170ab1f9dacSPaul Mackerras 		len = len / sizeof(u32);
171ab1f9dacSPaul Mackerras 
172ab1f9dacSPaul Mackerras 		if (interrupt_server && (len > 0)) {
173ab1f9dacSPaul Mackerras 			while (len--) {
174ab1f9dacSPaul Mackerras 				if (interrupt_server[len] == hw_cpuid)
175ab1f9dacSPaul Mackerras 					return cpu_node;
176ab1f9dacSPaul Mackerras 			}
177ab1f9dacSPaul Mackerras 		} else {
178ab1f9dacSPaul Mackerras 			reg = (unsigned int *)get_property(cpu_node,
179ab1f9dacSPaul Mackerras 							   "reg", &len);
180ab1f9dacSPaul Mackerras 			if (reg && (len > 0) && (reg[0] == hw_cpuid))
181ab1f9dacSPaul Mackerras 				return cpu_node;
182ab1f9dacSPaul Mackerras 		}
183ab1f9dacSPaul Mackerras 	}
184ab1f9dacSPaul Mackerras 
185ab1f9dacSPaul Mackerras 	return NULL;
186ab1f9dacSPaul Mackerras }
187ab1f9dacSPaul Mackerras 
188ab1f9dacSPaul Mackerras /* must hold reference to node during call */
189ab1f9dacSPaul Mackerras static int *of_get_associativity(struct device_node *dev)
190ab1f9dacSPaul Mackerras {
191ab1f9dacSPaul Mackerras 	return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
192ab1f9dacSPaul Mackerras }
193ab1f9dacSPaul Mackerras 
194cf950b7aSNathan Lynch static int of_node_to_nid(struct device_node *device)
195ab1f9dacSPaul Mackerras {
196cf950b7aSNathan Lynch 	int nid;
197ab1f9dacSPaul Mackerras 	unsigned int *tmp;
198ab1f9dacSPaul Mackerras 
199ab1f9dacSPaul Mackerras 	if (min_common_depth == -1)
200ab1f9dacSPaul Mackerras 		return 0;
201ab1f9dacSPaul Mackerras 
202ab1f9dacSPaul Mackerras 	tmp = of_get_associativity(device);
203ab1f9dacSPaul Mackerras 	if (tmp && (tmp[0] >= min_common_depth)) {
204cf950b7aSNathan Lynch 		nid = tmp[min_common_depth];
205ab1f9dacSPaul Mackerras 	} else {
206ab1f9dacSPaul Mackerras 		dbg("WARNING: no NUMA information for %s\n",
207ab1f9dacSPaul Mackerras 		    device->full_name);
208cf950b7aSNathan Lynch 		nid = 0;
209ab1f9dacSPaul Mackerras 	}
210*bc16a759SNathan Lynch 
211*bc16a759SNathan Lynch 	/* POWER4 LPAR uses 0xffff as invalid node */
212*bc16a759SNathan Lynch 	if (nid == 0xffff)
213*bc16a759SNathan Lynch 		nid = 0;
214*bc16a759SNathan Lynch 
215cf950b7aSNathan Lynch 	return nid;
216ab1f9dacSPaul Mackerras }
217ab1f9dacSPaul Mackerras 
218ab1f9dacSPaul Mackerras /*
219ab1f9dacSPaul Mackerras  * In theory, the "ibm,associativity" property may contain multiple
220ab1f9dacSPaul Mackerras  * associativity lists because a resource may be multiply connected
221ab1f9dacSPaul Mackerras  * into the machine.  This resource then has different associativity
222ab1f9dacSPaul Mackerras  * characteristics relative to its multiple connections.  We ignore
223ab1f9dacSPaul Mackerras  * this for now.  We also assume that all cpu and memory sets have
224ab1f9dacSPaul Mackerras  * their distances represented at a common level.  This won't be
225ab1f9dacSPaul Mackerras  * true for heirarchical NUMA.
226ab1f9dacSPaul Mackerras  *
227ab1f9dacSPaul Mackerras  * In any case the ibm,associativity-reference-points should give
228ab1f9dacSPaul Mackerras  * the correct depth for a normal NUMA system.
229ab1f9dacSPaul Mackerras  *
230ab1f9dacSPaul Mackerras  * - Dave Hansen <haveblue@us.ibm.com>
231ab1f9dacSPaul Mackerras  */
232ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void)
233ab1f9dacSPaul Mackerras {
234ab1f9dacSPaul Mackerras 	int depth;
235ab1f9dacSPaul Mackerras 	unsigned int *ref_points;
236ab1f9dacSPaul Mackerras 	struct device_node *rtas_root;
237ab1f9dacSPaul Mackerras 	unsigned int len;
238ab1f9dacSPaul Mackerras 
239ab1f9dacSPaul Mackerras 	rtas_root = of_find_node_by_path("/rtas");
240ab1f9dacSPaul Mackerras 
241ab1f9dacSPaul Mackerras 	if (!rtas_root)
242ab1f9dacSPaul Mackerras 		return -1;
243ab1f9dacSPaul Mackerras 
244ab1f9dacSPaul Mackerras 	/*
245ab1f9dacSPaul Mackerras 	 * this property is 2 32-bit integers, each representing a level of
246ab1f9dacSPaul Mackerras 	 * depth in the associativity nodes.  The first is for an SMP
247ab1f9dacSPaul Mackerras 	 * configuration (should be all 0's) and the second is for a normal
248ab1f9dacSPaul Mackerras 	 * NUMA configuration.
249ab1f9dacSPaul Mackerras 	 */
250ab1f9dacSPaul Mackerras 	ref_points = (unsigned int *)get_property(rtas_root,
251ab1f9dacSPaul Mackerras 			"ibm,associativity-reference-points", &len);
252ab1f9dacSPaul Mackerras 
253ab1f9dacSPaul Mackerras 	if ((len >= 1) && ref_points) {
254ab1f9dacSPaul Mackerras 		depth = ref_points[1];
255ab1f9dacSPaul Mackerras 	} else {
256bf4b85b0SNathan Lynch 		dbg("NUMA: ibm,associativity-reference-points not found.\n");
257ab1f9dacSPaul Mackerras 		depth = -1;
258ab1f9dacSPaul Mackerras 	}
259ab1f9dacSPaul Mackerras 	of_node_put(rtas_root);
260ab1f9dacSPaul Mackerras 
261ab1f9dacSPaul Mackerras 	return depth;
262ab1f9dacSPaul Mackerras }
263ab1f9dacSPaul Mackerras 
26484c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
265ab1f9dacSPaul Mackerras {
266ab1f9dacSPaul Mackerras 	struct device_node *memory = NULL;
267ab1f9dacSPaul Mackerras 
268ab1f9dacSPaul Mackerras 	memory = of_find_node_by_type(memory, "memory");
26954c23310SPaul Mackerras 	if (!memory)
27084c9fdd1SMike Kravetz 		panic("numa.c: No memory nodes found!");
27154c23310SPaul Mackerras 
27284c9fdd1SMike Kravetz 	*n_addr_cells = prom_n_addr_cells(memory);
27384c9fdd1SMike Kravetz 	*n_size_cells = prom_n_size_cells(memory);
27484c9fdd1SMike Kravetz 	of_node_put(memory);
275ab1f9dacSPaul Mackerras }
276ab1f9dacSPaul Mackerras 
277237a0989SMike Kravetz static unsigned long __devinit read_n_cells(int n, unsigned int **buf)
278ab1f9dacSPaul Mackerras {
279ab1f9dacSPaul Mackerras 	unsigned long result = 0;
280ab1f9dacSPaul Mackerras 
281ab1f9dacSPaul Mackerras 	while (n--) {
282ab1f9dacSPaul Mackerras 		result = (result << 32) | **buf;
283ab1f9dacSPaul Mackerras 		(*buf)++;
284ab1f9dacSPaul Mackerras 	}
285ab1f9dacSPaul Mackerras 	return result;
286ab1f9dacSPaul Mackerras }
287ab1f9dacSPaul Mackerras 
288ab1f9dacSPaul Mackerras /*
289ab1f9dacSPaul Mackerras  * Figure out to which domain a cpu belongs and stick it there.
290ab1f9dacSPaul Mackerras  * Return the id of the domain used.
291ab1f9dacSPaul Mackerras  */
2922e5ce39dSNathan Lynch static int __cpuinit numa_setup_cpu(unsigned long lcpu)
293ab1f9dacSPaul Mackerras {
294cf950b7aSNathan Lynch 	int nid = 0;
295ab1f9dacSPaul Mackerras 	struct device_node *cpu = find_cpu_node(lcpu);
296ab1f9dacSPaul Mackerras 
297ab1f9dacSPaul Mackerras 	if (!cpu) {
298ab1f9dacSPaul Mackerras 		WARN_ON(1);
299ab1f9dacSPaul Mackerras 		goto out;
300ab1f9dacSPaul Mackerras 	}
301ab1f9dacSPaul Mackerras 
302cf950b7aSNathan Lynch 	nid = of_node_to_nid(cpu);
303ab1f9dacSPaul Mackerras 
304cf950b7aSNathan Lynch 	if (nid >= num_online_nodes()) {
305ab1f9dacSPaul Mackerras 		printk(KERN_ERR "WARNING: cpu %ld "
306ab1f9dacSPaul Mackerras 		       "maps to invalid NUMA node %d\n",
307cf950b7aSNathan Lynch 		       lcpu, nid);
308cf950b7aSNathan Lynch 		nid = 0;
309ab1f9dacSPaul Mackerras 	}
310ab1f9dacSPaul Mackerras out:
311cf950b7aSNathan Lynch 	node_set_online(nid);
312ab1f9dacSPaul Mackerras 
313cf950b7aSNathan Lynch 	map_cpu_to_node(lcpu, nid);
314ab1f9dacSPaul Mackerras 
315ab1f9dacSPaul Mackerras 	of_node_put(cpu);
316ab1f9dacSPaul Mackerras 
317cf950b7aSNathan Lynch 	return nid;
318ab1f9dacSPaul Mackerras }
319ab1f9dacSPaul Mackerras 
320ab1f9dacSPaul Mackerras static int cpu_numa_callback(struct notifier_block *nfb,
321ab1f9dacSPaul Mackerras 			     unsigned long action,
322ab1f9dacSPaul Mackerras 			     void *hcpu)
323ab1f9dacSPaul Mackerras {
324ab1f9dacSPaul Mackerras 	unsigned long lcpu = (unsigned long)hcpu;
325ab1f9dacSPaul Mackerras 	int ret = NOTIFY_DONE;
326ab1f9dacSPaul Mackerras 
327ab1f9dacSPaul Mackerras 	switch (action) {
328ab1f9dacSPaul Mackerras 	case CPU_UP_PREPARE:
329ab1f9dacSPaul Mackerras 		if (min_common_depth == -1 || !numa_enabled)
330ab1f9dacSPaul Mackerras 			map_cpu_to_node(lcpu, 0);
331ab1f9dacSPaul Mackerras 		else
332ab1f9dacSPaul Mackerras 			numa_setup_cpu(lcpu);
333ab1f9dacSPaul Mackerras 		ret = NOTIFY_OK;
334ab1f9dacSPaul Mackerras 		break;
335ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU
336ab1f9dacSPaul Mackerras 	case CPU_DEAD:
337ab1f9dacSPaul Mackerras 	case CPU_UP_CANCELED:
338ab1f9dacSPaul Mackerras 		unmap_cpu_from_node(lcpu);
339ab1f9dacSPaul Mackerras 		break;
340ab1f9dacSPaul Mackerras 		ret = NOTIFY_OK;
341ab1f9dacSPaul Mackerras #endif
342ab1f9dacSPaul Mackerras 	}
343ab1f9dacSPaul Mackerras 	return ret;
344ab1f9dacSPaul Mackerras }
345ab1f9dacSPaul Mackerras 
346ab1f9dacSPaul Mackerras /*
347ab1f9dacSPaul Mackerras  * Check and possibly modify a memory region to enforce the memory limit.
348ab1f9dacSPaul Mackerras  *
349ab1f9dacSPaul Mackerras  * Returns the size the region should have to enforce the memory limit.
350ab1f9dacSPaul Mackerras  * This will either be the original value of size, a truncated value,
351ab1f9dacSPaul Mackerras  * or zero. If the returned value of size is 0 the region should be
352ab1f9dacSPaul Mackerras  * discarded as it lies wholy above the memory limit.
353ab1f9dacSPaul Mackerras  */
35445fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start,
35545fb6ceaSAnton Blanchard 						      unsigned long size)
356ab1f9dacSPaul Mackerras {
357ab1f9dacSPaul Mackerras 	/*
358ab1f9dacSPaul Mackerras 	 * We use lmb_end_of_DRAM() in here instead of memory_limit because
359ab1f9dacSPaul Mackerras 	 * we've already adjusted it for the limit and it takes care of
360ab1f9dacSPaul Mackerras 	 * having memory holes below the limit.
361ab1f9dacSPaul Mackerras 	 */
362ab1f9dacSPaul Mackerras 
363ab1f9dacSPaul Mackerras 	if (! memory_limit)
364ab1f9dacSPaul Mackerras 		return size;
365ab1f9dacSPaul Mackerras 
366ab1f9dacSPaul Mackerras 	if (start + size <= lmb_end_of_DRAM())
367ab1f9dacSPaul Mackerras 		return size;
368ab1f9dacSPaul Mackerras 
369ab1f9dacSPaul Mackerras 	if (start >= lmb_end_of_DRAM())
370ab1f9dacSPaul Mackerras 		return 0;
371ab1f9dacSPaul Mackerras 
372ab1f9dacSPaul Mackerras 	return lmb_end_of_DRAM() - start;
373ab1f9dacSPaul Mackerras }
374ab1f9dacSPaul Mackerras 
375ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void)
376ab1f9dacSPaul Mackerras {
377ab1f9dacSPaul Mackerras 	struct device_node *cpu = NULL;
378ab1f9dacSPaul Mackerras 	struct device_node *memory = NULL;
379c08888cfSNathan Lynch 	int max_domain = 0;
380ab1f9dacSPaul Mackerras 	unsigned long i;
381ab1f9dacSPaul Mackerras 
382ab1f9dacSPaul Mackerras 	if (numa_enabled == 0) {
383ab1f9dacSPaul Mackerras 		printk(KERN_WARNING "NUMA disabled by user\n");
384ab1f9dacSPaul Mackerras 		return -1;
385ab1f9dacSPaul Mackerras 	}
386ab1f9dacSPaul Mackerras 
387ab1f9dacSPaul Mackerras 	min_common_depth = find_min_common_depth();
388ab1f9dacSPaul Mackerras 
389ab1f9dacSPaul Mackerras 	if (min_common_depth < 0)
390ab1f9dacSPaul Mackerras 		return min_common_depth;
391ab1f9dacSPaul Mackerras 
392bf4b85b0SNathan Lynch 	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
393bf4b85b0SNathan Lynch 
394ab1f9dacSPaul Mackerras 	/*
395ab1f9dacSPaul Mackerras 	 * Even though we connect cpus to numa domains later in SMP init,
396ab1f9dacSPaul Mackerras 	 * we need to know the maximum node id now. This is because each
397ab1f9dacSPaul Mackerras 	 * node id must have NODE_DATA etc backing it.
398ab1f9dacSPaul Mackerras 	 * As a result of hotplug we could still have cpus appear later on
399ab1f9dacSPaul Mackerras 	 * with larger node ids. In that case we force the cpu into node 0.
400ab1f9dacSPaul Mackerras 	 */
401ab1f9dacSPaul Mackerras 	for_each_cpu(i) {
402cf950b7aSNathan Lynch 		int nid;
403ab1f9dacSPaul Mackerras 
404ab1f9dacSPaul Mackerras 		cpu = find_cpu_node(i);
405ab1f9dacSPaul Mackerras 
406ab1f9dacSPaul Mackerras 		if (cpu) {
407cf950b7aSNathan Lynch 			nid = of_node_to_nid(cpu);
408ab1f9dacSPaul Mackerras 			of_node_put(cpu);
409ab1f9dacSPaul Mackerras 
410cf950b7aSNathan Lynch 			if (nid < MAX_NUMNODES &&
411cf950b7aSNathan Lynch 			    max_domain < nid)
412cf950b7aSNathan Lynch 				max_domain = nid;
413ab1f9dacSPaul Mackerras 		}
414ab1f9dacSPaul Mackerras 	}
415ab1f9dacSPaul Mackerras 
416237a0989SMike Kravetz 	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
417ab1f9dacSPaul Mackerras 	memory = NULL;
418ab1f9dacSPaul Mackerras 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
419ab1f9dacSPaul Mackerras 		unsigned long start;
420ab1f9dacSPaul Mackerras 		unsigned long size;
421cf950b7aSNathan Lynch 		int nid;
422ab1f9dacSPaul Mackerras 		int ranges;
423ab1f9dacSPaul Mackerras 		unsigned int *memcell_buf;
424ab1f9dacSPaul Mackerras 		unsigned int len;
425ab1f9dacSPaul Mackerras 
426ba759485SMichael Ellerman 		memcell_buf = (unsigned int *)get_property(memory,
427ba759485SMichael Ellerman 			"linux,usable-memory", &len);
428ba759485SMichael Ellerman 		if (!memcell_buf || len <= 0)
429ba759485SMichael Ellerman 			memcell_buf =
430ba759485SMichael Ellerman 				(unsigned int *)get_property(memory, "reg",
431ba759485SMichael Ellerman 					&len);
432ab1f9dacSPaul Mackerras 		if (!memcell_buf || len <= 0)
433ab1f9dacSPaul Mackerras 			continue;
434ab1f9dacSPaul Mackerras 
435cc5d0189SBenjamin Herrenschmidt 		/* ranges in cell */
436cc5d0189SBenjamin Herrenschmidt 		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
437ab1f9dacSPaul Mackerras new_range:
438ab1f9dacSPaul Mackerras 		/* these are order-sensitive, and modify the buffer pointer */
439237a0989SMike Kravetz 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
440237a0989SMike Kravetz 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
441ab1f9dacSPaul Mackerras 
442cf950b7aSNathan Lynch 		nid = of_node_to_nid(memory);
443ab1f9dacSPaul Mackerras 
444cf950b7aSNathan Lynch 		if (nid >= MAX_NUMNODES) {
445ab1f9dacSPaul Mackerras 			printk(KERN_ERR "WARNING: memory at %lx maps "
446ab1f9dacSPaul Mackerras 			       "to invalid NUMA node %d\n", start,
447cf950b7aSNathan Lynch 			       nid);
448cf950b7aSNathan Lynch 			nid = 0;
449ab1f9dacSPaul Mackerras 		}
450ab1f9dacSPaul Mackerras 
451cf950b7aSNathan Lynch 		if (max_domain < nid)
452cf950b7aSNathan Lynch 			max_domain = nid;
453ab1f9dacSPaul Mackerras 
454ab1f9dacSPaul Mackerras 		if (!(size = numa_enforce_memory_limit(start, size))) {
455ab1f9dacSPaul Mackerras 			if (--ranges)
456ab1f9dacSPaul Mackerras 				goto new_range;
457ab1f9dacSPaul Mackerras 			else
458ab1f9dacSPaul Mackerras 				continue;
459ab1f9dacSPaul Mackerras 		}
460ab1f9dacSPaul Mackerras 
461cf950b7aSNathan Lynch 		add_region(nid, start >> PAGE_SHIFT,
46245fb6ceaSAnton Blanchard 			   size >> PAGE_SHIFT);
463ab1f9dacSPaul Mackerras 
464ab1f9dacSPaul Mackerras 		if (--ranges)
465ab1f9dacSPaul Mackerras 			goto new_range;
466ab1f9dacSPaul Mackerras 	}
467ab1f9dacSPaul Mackerras 
468ab1f9dacSPaul Mackerras 	for (i = 0; i <= max_domain; i++)
469ab1f9dacSPaul Mackerras 		node_set_online(i);
470ab1f9dacSPaul Mackerras 
471c08888cfSNathan Lynch 	max_domain = numa_setup_cpu(boot_cpuid);
472c08888cfSNathan Lynch 
473ab1f9dacSPaul Mackerras 	return 0;
474ab1f9dacSPaul Mackerras }
475ab1f9dacSPaul Mackerras 
476ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void)
477ab1f9dacSPaul Mackerras {
478ab1f9dacSPaul Mackerras 	unsigned long top_of_ram = lmb_end_of_DRAM();
479ab1f9dacSPaul Mackerras 	unsigned long total_ram = lmb_phys_mem_size();
480fb6d73d3SPaul Mackerras 	unsigned int i;
481ab1f9dacSPaul Mackerras 
482ab1f9dacSPaul Mackerras 	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
483ab1f9dacSPaul Mackerras 	       top_of_ram, total_ram);
484ab1f9dacSPaul Mackerras 	printk(KERN_INFO "Memory hole size: %ldMB\n",
485ab1f9dacSPaul Mackerras 	       (top_of_ram - total_ram) >> 20);
486ab1f9dacSPaul Mackerras 
487ab1f9dacSPaul Mackerras 	map_cpu_to_node(boot_cpuid, 0);
488fb6d73d3SPaul Mackerras 	for (i = 0; i < lmb.memory.cnt; ++i)
489fb6d73d3SPaul Mackerras 		add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT,
490fb6d73d3SPaul Mackerras 			   lmb_size_pages(&lmb.memory, i));
491ab1f9dacSPaul Mackerras 	node_set_online(0);
492ab1f9dacSPaul Mackerras }
493ab1f9dacSPaul Mackerras 
4944b703a23SAnton Blanchard void __init dump_numa_cpu_topology(void)
4954b703a23SAnton Blanchard {
4964b703a23SAnton Blanchard 	unsigned int node;
4974b703a23SAnton Blanchard 	unsigned int cpu, count;
4984b703a23SAnton Blanchard 
4994b703a23SAnton Blanchard 	if (min_common_depth == -1 || !numa_enabled)
5004b703a23SAnton Blanchard 		return;
5014b703a23SAnton Blanchard 
5024b703a23SAnton Blanchard 	for_each_online_node(node) {
5034b703a23SAnton Blanchard 		printk(KERN_INFO "Node %d CPUs:", node);
5044b703a23SAnton Blanchard 
5054b703a23SAnton Blanchard 		count = 0;
5064b703a23SAnton Blanchard 		/*
5074b703a23SAnton Blanchard 		 * If we used a CPU iterator here we would miss printing
5084b703a23SAnton Blanchard 		 * the holes in the cpumap.
5094b703a23SAnton Blanchard 		 */
5104b703a23SAnton Blanchard 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
5114b703a23SAnton Blanchard 			if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) {
5124b703a23SAnton Blanchard 				if (count == 0)
5134b703a23SAnton Blanchard 					printk(" %u", cpu);
5144b703a23SAnton Blanchard 				++count;
5154b703a23SAnton Blanchard 			} else {
5164b703a23SAnton Blanchard 				if (count > 1)
5174b703a23SAnton Blanchard 					printk("-%u", cpu - 1);
5184b703a23SAnton Blanchard 				count = 0;
5194b703a23SAnton Blanchard 			}
5204b703a23SAnton Blanchard 		}
5214b703a23SAnton Blanchard 
5224b703a23SAnton Blanchard 		if (count > 1)
5234b703a23SAnton Blanchard 			printk("-%u", NR_CPUS - 1);
5244b703a23SAnton Blanchard 		printk("\n");
5254b703a23SAnton Blanchard 	}
5264b703a23SAnton Blanchard }
5274b703a23SAnton Blanchard 
5284b703a23SAnton Blanchard static void __init dump_numa_memory_topology(void)
529ab1f9dacSPaul Mackerras {
530ab1f9dacSPaul Mackerras 	unsigned int node;
531ab1f9dacSPaul Mackerras 	unsigned int count;
532ab1f9dacSPaul Mackerras 
533ab1f9dacSPaul Mackerras 	if (min_common_depth == -1 || !numa_enabled)
534ab1f9dacSPaul Mackerras 		return;
535ab1f9dacSPaul Mackerras 
536ab1f9dacSPaul Mackerras 	for_each_online_node(node) {
537ab1f9dacSPaul Mackerras 		unsigned long i;
538ab1f9dacSPaul Mackerras 
539ab1f9dacSPaul Mackerras 		printk(KERN_INFO "Node %d Memory:", node);
540ab1f9dacSPaul Mackerras 
541ab1f9dacSPaul Mackerras 		count = 0;
542ab1f9dacSPaul Mackerras 
54345fb6ceaSAnton Blanchard 		for (i = 0; i < lmb_end_of_DRAM();
54445fb6ceaSAnton Blanchard 		     i += (1 << SECTION_SIZE_BITS)) {
54545fb6ceaSAnton Blanchard 			if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
546ab1f9dacSPaul Mackerras 				if (count == 0)
547ab1f9dacSPaul Mackerras 					printk(" 0x%lx", i);
548ab1f9dacSPaul Mackerras 				++count;
549ab1f9dacSPaul Mackerras 			} else {
550ab1f9dacSPaul Mackerras 				if (count > 0)
551ab1f9dacSPaul Mackerras 					printk("-0x%lx", i);
552ab1f9dacSPaul Mackerras 				count = 0;
553ab1f9dacSPaul Mackerras 			}
554ab1f9dacSPaul Mackerras 		}
555ab1f9dacSPaul Mackerras 
556ab1f9dacSPaul Mackerras 		if (count > 0)
557ab1f9dacSPaul Mackerras 			printk("-0x%lx", i);
558ab1f9dacSPaul Mackerras 		printk("\n");
559ab1f9dacSPaul Mackerras 	}
560ab1f9dacSPaul Mackerras }
561ab1f9dacSPaul Mackerras 
562ab1f9dacSPaul Mackerras /*
563ab1f9dacSPaul Mackerras  * Allocate some memory, satisfying the lmb or bootmem allocator where
564ab1f9dacSPaul Mackerras  * required. nid is the preferred node and end is the physical address of
565ab1f9dacSPaul Mackerras  * the highest address in the node.
566ab1f9dacSPaul Mackerras  *
567ab1f9dacSPaul Mackerras  * Returns the physical address of the memory.
568ab1f9dacSPaul Mackerras  */
56945fb6ceaSAnton Blanchard static void __init *careful_allocation(int nid, unsigned long size,
57045fb6ceaSAnton Blanchard 				       unsigned long align,
57145fb6ceaSAnton Blanchard 				       unsigned long end_pfn)
572ab1f9dacSPaul Mackerras {
57345fb6ceaSAnton Blanchard 	int new_nid;
574d7a5b2ffSMichael Ellerman 	unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
575ab1f9dacSPaul Mackerras 
576ab1f9dacSPaul Mackerras 	/* retry over all memory */
577ab1f9dacSPaul Mackerras 	if (!ret)
578d7a5b2ffSMichael Ellerman 		ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
579ab1f9dacSPaul Mackerras 
580ab1f9dacSPaul Mackerras 	if (!ret)
581ab1f9dacSPaul Mackerras 		panic("numa.c: cannot allocate %lu bytes on node %d",
582ab1f9dacSPaul Mackerras 		      size, nid);
583ab1f9dacSPaul Mackerras 
584ab1f9dacSPaul Mackerras 	/*
585ab1f9dacSPaul Mackerras 	 * If the memory came from a previously allocated node, we must
586ab1f9dacSPaul Mackerras 	 * retry with the bootmem allocator.
587ab1f9dacSPaul Mackerras 	 */
58845fb6ceaSAnton Blanchard 	new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT);
58945fb6ceaSAnton Blanchard 	if (new_nid < nid) {
59045fb6ceaSAnton Blanchard 		ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid),
591ab1f9dacSPaul Mackerras 				size, align, 0);
592ab1f9dacSPaul Mackerras 
593ab1f9dacSPaul Mackerras 		if (!ret)
594ab1f9dacSPaul Mackerras 			panic("numa.c: cannot allocate %lu bytes on node %d",
59545fb6ceaSAnton Blanchard 			      size, new_nid);
596ab1f9dacSPaul Mackerras 
59745fb6ceaSAnton Blanchard 		ret = __pa(ret);
598ab1f9dacSPaul Mackerras 
599ab1f9dacSPaul Mackerras 		dbg("alloc_bootmem %lx %lx\n", ret, size);
600ab1f9dacSPaul Mackerras 	}
601ab1f9dacSPaul Mackerras 
60245fb6ceaSAnton Blanchard 	return (void *)ret;
603ab1f9dacSPaul Mackerras }
604ab1f9dacSPaul Mackerras 
605ab1f9dacSPaul Mackerras void __init do_init_bootmem(void)
606ab1f9dacSPaul Mackerras {
607ab1f9dacSPaul Mackerras 	int nid;
60845fb6ceaSAnton Blanchard 	unsigned int i;
609ab1f9dacSPaul Mackerras 	static struct notifier_block ppc64_numa_nb = {
610ab1f9dacSPaul Mackerras 		.notifier_call = cpu_numa_callback,
611ab1f9dacSPaul Mackerras 		.priority = 1 /* Must run before sched domains notifier. */
612ab1f9dacSPaul Mackerras 	};
613ab1f9dacSPaul Mackerras 
614ab1f9dacSPaul Mackerras 	min_low_pfn = 0;
615ab1f9dacSPaul Mackerras 	max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
616ab1f9dacSPaul Mackerras 	max_pfn = max_low_pfn;
617ab1f9dacSPaul Mackerras 
618ab1f9dacSPaul Mackerras 	if (parse_numa_properties())
619ab1f9dacSPaul Mackerras 		setup_nonnuma();
620ab1f9dacSPaul Mackerras 	else
6214b703a23SAnton Blanchard 		dump_numa_memory_topology();
622ab1f9dacSPaul Mackerras 
623ab1f9dacSPaul Mackerras 	register_cpu_notifier(&ppc64_numa_nb);
624ab1f9dacSPaul Mackerras 
625ab1f9dacSPaul Mackerras 	for_each_online_node(nid) {
62645fb6ceaSAnton Blanchard 		unsigned long start_pfn, end_pfn, pages_present;
627ab1f9dacSPaul Mackerras 		unsigned long bootmem_paddr;
628ab1f9dacSPaul Mackerras 		unsigned long bootmap_pages;
629ab1f9dacSPaul Mackerras 
63045fb6ceaSAnton Blanchard 		get_region(nid, &start_pfn, &end_pfn, &pages_present);
631ab1f9dacSPaul Mackerras 
632ab1f9dacSPaul Mackerras 		/* Allocate the node structure node local if possible */
63345fb6ceaSAnton Blanchard 		NODE_DATA(nid) = careful_allocation(nid,
634ab1f9dacSPaul Mackerras 					sizeof(struct pglist_data),
63545fb6ceaSAnton Blanchard 					SMP_CACHE_BYTES, end_pfn);
63645fb6ceaSAnton Blanchard 		NODE_DATA(nid) = __va(NODE_DATA(nid));
637ab1f9dacSPaul Mackerras 		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
638ab1f9dacSPaul Mackerras 
639ab1f9dacSPaul Mackerras   		dbg("node %d\n", nid);
640ab1f9dacSPaul Mackerras 		dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
641ab1f9dacSPaul Mackerras 
642ab1f9dacSPaul Mackerras 		NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
64345fb6ceaSAnton Blanchard 		NODE_DATA(nid)->node_start_pfn = start_pfn;
64445fb6ceaSAnton Blanchard 		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
645ab1f9dacSPaul Mackerras 
646ab1f9dacSPaul Mackerras 		if (NODE_DATA(nid)->node_spanned_pages == 0)
647ab1f9dacSPaul Mackerras   			continue;
648ab1f9dacSPaul Mackerras 
64945fb6ceaSAnton Blanchard   		dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
65045fb6ceaSAnton Blanchard   		dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
651ab1f9dacSPaul Mackerras 
65245fb6ceaSAnton Blanchard 		bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
65345fb6ceaSAnton Blanchard 		bootmem_paddr = (unsigned long)careful_allocation(nid,
654ab1f9dacSPaul Mackerras 					bootmap_pages << PAGE_SHIFT,
65545fb6ceaSAnton Blanchard 					PAGE_SIZE, end_pfn);
65645fb6ceaSAnton Blanchard 		memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT);
65745fb6ceaSAnton Blanchard 
658ab1f9dacSPaul Mackerras 		dbg("bootmap_paddr = %lx\n", bootmem_paddr);
659ab1f9dacSPaul Mackerras 
660ab1f9dacSPaul Mackerras 		init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
66145fb6ceaSAnton Blanchard 				  start_pfn, end_pfn);
662ab1f9dacSPaul Mackerras 
66345fb6ceaSAnton Blanchard 		/* Add free regions on this node */
66445fb6ceaSAnton Blanchard 		for (i = 0; init_node_data[i].end_pfn; i++) {
66545fb6ceaSAnton Blanchard 			unsigned long start, end;
666ab1f9dacSPaul Mackerras 
66745fb6ceaSAnton Blanchard 			if (init_node_data[i].nid != nid)
668ab1f9dacSPaul Mackerras 				continue;
669ab1f9dacSPaul Mackerras 
67045fb6ceaSAnton Blanchard 			start = init_node_data[i].start_pfn << PAGE_SHIFT;
67145fb6ceaSAnton Blanchard 			end = init_node_data[i].end_pfn << PAGE_SHIFT;
672ab1f9dacSPaul Mackerras 
67345fb6ceaSAnton Blanchard 			dbg("free_bootmem %lx %lx\n", start, end - start);
67445fb6ceaSAnton Blanchard   			free_bootmem_node(NODE_DATA(nid), start, end - start);
675ab1f9dacSPaul Mackerras 		}
676ab1f9dacSPaul Mackerras 
67745fb6ceaSAnton Blanchard 		/* Mark reserved regions on this node */
678ab1f9dacSPaul Mackerras 		for (i = 0; i < lmb.reserved.cnt; i++) {
679ab1f9dacSPaul Mackerras 			unsigned long physbase = lmb.reserved.region[i].base;
680ab1f9dacSPaul Mackerras 			unsigned long size = lmb.reserved.region[i].size;
68145fb6ceaSAnton Blanchard 			unsigned long start_paddr = start_pfn << PAGE_SHIFT;
68245fb6ceaSAnton Blanchard 			unsigned long end_paddr = end_pfn << PAGE_SHIFT;
683ab1f9dacSPaul Mackerras 
68445fb6ceaSAnton Blanchard 			if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid &&
68545fb6ceaSAnton Blanchard 			    early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid)
686ab1f9dacSPaul Mackerras 				continue;
687ab1f9dacSPaul Mackerras 
688ab1f9dacSPaul Mackerras 			if (physbase < end_paddr &&
689ab1f9dacSPaul Mackerras 			    (physbase+size) > start_paddr) {
690ab1f9dacSPaul Mackerras 				/* overlaps */
691ab1f9dacSPaul Mackerras 				if (physbase < start_paddr) {
692ab1f9dacSPaul Mackerras 					size -= start_paddr - physbase;
693ab1f9dacSPaul Mackerras 					physbase = start_paddr;
694ab1f9dacSPaul Mackerras 				}
695ab1f9dacSPaul Mackerras 
696ab1f9dacSPaul Mackerras 				if (size > end_paddr - physbase)
697ab1f9dacSPaul Mackerras 					size = end_paddr - physbase;
698ab1f9dacSPaul Mackerras 
699ab1f9dacSPaul Mackerras 				dbg("reserve_bootmem %lx %lx\n", physbase,
700ab1f9dacSPaul Mackerras 				    size);
701ab1f9dacSPaul Mackerras 				reserve_bootmem_node(NODE_DATA(nid), physbase,
702ab1f9dacSPaul Mackerras 						     size);
703ab1f9dacSPaul Mackerras 			}
704ab1f9dacSPaul Mackerras 		}
705ab1f9dacSPaul Mackerras 
70645fb6ceaSAnton Blanchard 		/* Add regions into sparsemem */
70745fb6ceaSAnton Blanchard 		for (i = 0; init_node_data[i].end_pfn; i++) {
70845fb6ceaSAnton Blanchard 			unsigned long start, end;
70945fb6ceaSAnton Blanchard 
71045fb6ceaSAnton Blanchard 			if (init_node_data[i].nid != nid)
711ab1f9dacSPaul Mackerras 				continue;
712ab1f9dacSPaul Mackerras 
71345fb6ceaSAnton Blanchard 			start = init_node_data[i].start_pfn;
71445fb6ceaSAnton Blanchard 			end = init_node_data[i].end_pfn;
715ab1f9dacSPaul Mackerras 
71645fb6ceaSAnton Blanchard 			memory_present(nid, start, end);
717ab1f9dacSPaul Mackerras 		}
718ab1f9dacSPaul Mackerras 	}
719ab1f9dacSPaul Mackerras }
720ab1f9dacSPaul Mackerras 
721ab1f9dacSPaul Mackerras void __init paging_init(void)
722ab1f9dacSPaul Mackerras {
723ab1f9dacSPaul Mackerras 	unsigned long zones_size[MAX_NR_ZONES];
724ab1f9dacSPaul Mackerras 	unsigned long zholes_size[MAX_NR_ZONES];
725ab1f9dacSPaul Mackerras 	int nid;
726ab1f9dacSPaul Mackerras 
727ab1f9dacSPaul Mackerras 	memset(zones_size, 0, sizeof(zones_size));
728ab1f9dacSPaul Mackerras 	memset(zholes_size, 0, sizeof(zholes_size));
729ab1f9dacSPaul Mackerras 
730ab1f9dacSPaul Mackerras 	for_each_online_node(nid) {
73145fb6ceaSAnton Blanchard 		unsigned long start_pfn, end_pfn, pages_present;
732ab1f9dacSPaul Mackerras 
73345fb6ceaSAnton Blanchard 		get_region(nid, &start_pfn, &end_pfn, &pages_present);
734ab1f9dacSPaul Mackerras 
735ab1f9dacSPaul Mackerras 		zones_size[ZONE_DMA] = end_pfn - start_pfn;
73645fb6ceaSAnton Blanchard 		zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present;
737ab1f9dacSPaul Mackerras 
738ab1f9dacSPaul Mackerras 		dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
739ab1f9dacSPaul Mackerras 		    zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
740ab1f9dacSPaul Mackerras 
74145fb6ceaSAnton Blanchard 		free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn,
74245fb6ceaSAnton Blanchard 				    zholes_size);
743ab1f9dacSPaul Mackerras 	}
744ab1f9dacSPaul Mackerras }
745ab1f9dacSPaul Mackerras 
746ab1f9dacSPaul Mackerras static int __init early_numa(char *p)
747ab1f9dacSPaul Mackerras {
748ab1f9dacSPaul Mackerras 	if (!p)
749ab1f9dacSPaul Mackerras 		return 0;
750ab1f9dacSPaul Mackerras 
751ab1f9dacSPaul Mackerras 	if (strstr(p, "off"))
752ab1f9dacSPaul Mackerras 		numa_enabled = 0;
753ab1f9dacSPaul Mackerras 
754ab1f9dacSPaul Mackerras 	if (strstr(p, "debug"))
755ab1f9dacSPaul Mackerras 		numa_debug = 1;
756ab1f9dacSPaul Mackerras 
757ab1f9dacSPaul Mackerras 	return 0;
758ab1f9dacSPaul Mackerras }
759ab1f9dacSPaul Mackerras early_param("numa", early_numa);
760237a0989SMike Kravetz 
761237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG
762237a0989SMike Kravetz /*
763237a0989SMike Kravetz  * Find the node associated with a hot added memory section.  Section
764237a0989SMike Kravetz  * corresponds to a SPARSEMEM section, not an LMB.  It is assumed that
765237a0989SMike Kravetz  * sections are fully contained within a single LMB.
766237a0989SMike Kravetz  */
767237a0989SMike Kravetz int hot_add_scn_to_nid(unsigned long scn_addr)
768237a0989SMike Kravetz {
769237a0989SMike Kravetz 	struct device_node *memory = NULL;
770b226e462SMike Kravetz 	nodemask_t nodes;
771cf950b7aSNathan Lynch 	int nid = 0;
772237a0989SMike Kravetz 
773237a0989SMike Kravetz 	if (!numa_enabled || (min_common_depth < 0))
774cf950b7aSNathan Lynch 		return nid;
775237a0989SMike Kravetz 
776237a0989SMike Kravetz 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
777237a0989SMike Kravetz 		unsigned long start, size;
778b226e462SMike Kravetz 		int ranges;
779237a0989SMike Kravetz 		unsigned int *memcell_buf;
780237a0989SMike Kravetz 		unsigned int len;
781237a0989SMike Kravetz 
782237a0989SMike Kravetz 		memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
783237a0989SMike Kravetz 		if (!memcell_buf || len <= 0)
784237a0989SMike Kravetz 			continue;
785237a0989SMike Kravetz 
786cc5d0189SBenjamin Herrenschmidt 		/* ranges in cell */
787cc5d0189SBenjamin Herrenschmidt 		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
788237a0989SMike Kravetz ha_new_range:
789237a0989SMike Kravetz 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
790237a0989SMike Kravetz 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
791cf950b7aSNathan Lynch 		nid = of_node_to_nid(memory);
792237a0989SMike Kravetz 
793237a0989SMike Kravetz 		/* Domains not present at boot default to 0 */
794cf950b7aSNathan Lynch 		if (!node_online(nid))
795cf950b7aSNathan Lynch 			nid = any_online_node(NODE_MASK_ALL);
796237a0989SMike Kravetz 
797237a0989SMike Kravetz 		if ((scn_addr >= start) && (scn_addr < (start + size))) {
798237a0989SMike Kravetz 			of_node_put(memory);
799cf950b7aSNathan Lynch 			goto got_nid;
800237a0989SMike Kravetz 		}
801237a0989SMike Kravetz 
802237a0989SMike Kravetz 		if (--ranges)		/* process all ranges in cell */
803237a0989SMike Kravetz 			goto ha_new_range;
804237a0989SMike Kravetz 	}
805237a0989SMike Kravetz 	BUG();	/* section address should be found above */
806b226e462SMike Kravetz 
807b226e462SMike Kravetz 	/* Temporary code to ensure that returned node is not empty */
808cf950b7aSNathan Lynch got_nid:
809b226e462SMike Kravetz 	nodes_setall(nodes);
810cf950b7aSNathan Lynch 	while (NODE_DATA(nid)->node_spanned_pages == 0) {
811cf950b7aSNathan Lynch 		node_clear(nid, nodes);
812cf950b7aSNathan Lynch 		nid = any_online_node(nodes);
813b226e462SMike Kravetz 	}
814cf950b7aSNathan Lynch 	return nid;
815237a0989SMike Kravetz }
816237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */
817