xref: /linux/arch/powerpc/mm/numa.c (revision c08888cf3c80fe07bfd176113c390ca31d3ba5c2)
1ab1f9dacSPaul Mackerras /*
2ab1f9dacSPaul Mackerras  * pSeries NUMA support
3ab1f9dacSPaul Mackerras  *
4ab1f9dacSPaul Mackerras  * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
5ab1f9dacSPaul Mackerras  *
6ab1f9dacSPaul Mackerras  * This program is free software; you can redistribute it and/or
7ab1f9dacSPaul Mackerras  * modify it under the terms of the GNU General Public License
8ab1f9dacSPaul Mackerras  * as published by the Free Software Foundation; either version
9ab1f9dacSPaul Mackerras  * 2 of the License, or (at your option) any later version.
10ab1f9dacSPaul Mackerras  */
11ab1f9dacSPaul Mackerras #include <linux/threads.h>
12ab1f9dacSPaul Mackerras #include <linux/bootmem.h>
13ab1f9dacSPaul Mackerras #include <linux/init.h>
14ab1f9dacSPaul Mackerras #include <linux/mm.h>
15ab1f9dacSPaul Mackerras #include <linux/mmzone.h>
16ab1f9dacSPaul Mackerras #include <linux/module.h>
17ab1f9dacSPaul Mackerras #include <linux/nodemask.h>
18ab1f9dacSPaul Mackerras #include <linux/cpu.h>
19ab1f9dacSPaul Mackerras #include <linux/notifier.h>
2045fb6ceaSAnton Blanchard #include <asm/sparsemem.h>
21ab1f9dacSPaul Mackerras #include <asm/lmb.h>
22cf00a8d1SPaul Mackerras #include <asm/system.h>
232249ca9dSPaul Mackerras #include <asm/smp.h>
24ab1f9dacSPaul Mackerras 
25ab1f9dacSPaul Mackerras static int numa_enabled = 1;
26ab1f9dacSPaul Mackerras 
27ab1f9dacSPaul Mackerras static int numa_debug;
28ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
29ab1f9dacSPaul Mackerras 
3045fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS];
31ab1f9dacSPaul Mackerras cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
32ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES];
3345fb6ceaSAnton Blanchard 
3445fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table);
3545fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpumask_lookup_table);
3645fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data);
3745fb6ceaSAnton Blanchard 
3845fb6ceaSAnton Blanchard static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
39ab1f9dacSPaul Mackerras static int min_common_depth;
40237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells;
41ab1f9dacSPaul Mackerras 
42ab1f9dacSPaul Mackerras /*
4345fb6ceaSAnton Blanchard  * We need somewhere to store start/end/node for each region until we have
44ab1f9dacSPaul Mackerras  * allocated the real node_data structures.
45ab1f9dacSPaul Mackerras  */
4645fb6ceaSAnton Blanchard #define MAX_REGIONS	(MAX_LMB_REGIONS*2)
47ab1f9dacSPaul Mackerras static struct {
4845fb6ceaSAnton Blanchard 	unsigned long start_pfn;
4945fb6ceaSAnton Blanchard 	unsigned long end_pfn;
5045fb6ceaSAnton Blanchard 	int nid;
5145fb6ceaSAnton Blanchard } init_node_data[MAX_REGIONS] __initdata;
52ab1f9dacSPaul Mackerras 
5345fb6ceaSAnton Blanchard int __init early_pfn_to_nid(unsigned long pfn)
5445fb6ceaSAnton Blanchard {
5545fb6ceaSAnton Blanchard 	unsigned int i;
5645fb6ceaSAnton Blanchard 
5745fb6ceaSAnton Blanchard 	for (i = 0; init_node_data[i].end_pfn; i++) {
5845fb6ceaSAnton Blanchard 		unsigned long start_pfn = init_node_data[i].start_pfn;
5945fb6ceaSAnton Blanchard 		unsigned long end_pfn = init_node_data[i].end_pfn;
6045fb6ceaSAnton Blanchard 
6145fb6ceaSAnton Blanchard 		if ((start_pfn <= pfn) && (pfn < end_pfn))
6245fb6ceaSAnton Blanchard 			return init_node_data[i].nid;
6345fb6ceaSAnton Blanchard 	}
6445fb6ceaSAnton Blanchard 
6545fb6ceaSAnton Blanchard 	return -1;
6645fb6ceaSAnton Blanchard }
6745fb6ceaSAnton Blanchard 
6845fb6ceaSAnton Blanchard void __init add_region(unsigned int nid, unsigned long start_pfn,
6945fb6ceaSAnton Blanchard 		       unsigned long pages)
7045fb6ceaSAnton Blanchard {
7145fb6ceaSAnton Blanchard 	unsigned int i;
7245fb6ceaSAnton Blanchard 
7345fb6ceaSAnton Blanchard 	dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n",
7445fb6ceaSAnton Blanchard 		nid, start_pfn, pages);
7545fb6ceaSAnton Blanchard 
7645fb6ceaSAnton Blanchard 	for (i = 0; init_node_data[i].end_pfn; i++) {
7745fb6ceaSAnton Blanchard 		if (init_node_data[i].nid != nid)
7845fb6ceaSAnton Blanchard 			continue;
7945fb6ceaSAnton Blanchard 		if (init_node_data[i].end_pfn == start_pfn) {
8045fb6ceaSAnton Blanchard 			init_node_data[i].end_pfn += pages;
8145fb6ceaSAnton Blanchard 			return;
8245fb6ceaSAnton Blanchard 		}
8345fb6ceaSAnton Blanchard 		if (init_node_data[i].start_pfn == (start_pfn + pages)) {
8445fb6ceaSAnton Blanchard 			init_node_data[i].start_pfn -= pages;
8545fb6ceaSAnton Blanchard 			return;
8645fb6ceaSAnton Blanchard 		}
8745fb6ceaSAnton Blanchard 	}
8845fb6ceaSAnton Blanchard 
8945fb6ceaSAnton Blanchard 	/*
9045fb6ceaSAnton Blanchard 	 * Leave last entry NULL so we dont iterate off the end (we use
9145fb6ceaSAnton Blanchard 	 * entry.end_pfn to terminate the walk).
9245fb6ceaSAnton Blanchard 	 */
9345fb6ceaSAnton Blanchard 	if (i >= (MAX_REGIONS - 1)) {
9445fb6ceaSAnton Blanchard 		printk(KERN_ERR "WARNING: too many memory regions in "
9545fb6ceaSAnton Blanchard 				"numa code, truncating\n");
9645fb6ceaSAnton Blanchard 		return;
9745fb6ceaSAnton Blanchard 	}
9845fb6ceaSAnton Blanchard 
9945fb6ceaSAnton Blanchard 	init_node_data[i].start_pfn = start_pfn;
10045fb6ceaSAnton Blanchard 	init_node_data[i].end_pfn = start_pfn + pages;
10145fb6ceaSAnton Blanchard 	init_node_data[i].nid = nid;
10245fb6ceaSAnton Blanchard }
10345fb6ceaSAnton Blanchard 
10445fb6ceaSAnton Blanchard /* We assume init_node_data has no overlapping regions */
10545fb6ceaSAnton Blanchard void __init get_region(unsigned int nid, unsigned long *start_pfn,
10645fb6ceaSAnton Blanchard 		       unsigned long *end_pfn, unsigned long *pages_present)
10745fb6ceaSAnton Blanchard {
10845fb6ceaSAnton Blanchard 	unsigned int i;
10945fb6ceaSAnton Blanchard 
11045fb6ceaSAnton Blanchard 	*start_pfn = -1UL;
11145fb6ceaSAnton Blanchard 	*end_pfn = *pages_present = 0;
11245fb6ceaSAnton Blanchard 
11345fb6ceaSAnton Blanchard 	for (i = 0; init_node_data[i].end_pfn; i++) {
11445fb6ceaSAnton Blanchard 		if (init_node_data[i].nid != nid)
11545fb6ceaSAnton Blanchard 			continue;
11645fb6ceaSAnton Blanchard 
11745fb6ceaSAnton Blanchard 		*pages_present += init_node_data[i].end_pfn -
11845fb6ceaSAnton Blanchard 			init_node_data[i].start_pfn;
11945fb6ceaSAnton Blanchard 
12045fb6ceaSAnton Blanchard 		if (init_node_data[i].start_pfn < *start_pfn)
12145fb6ceaSAnton Blanchard 			*start_pfn = init_node_data[i].start_pfn;
12245fb6ceaSAnton Blanchard 
12345fb6ceaSAnton Blanchard 		if (init_node_data[i].end_pfn > *end_pfn)
12445fb6ceaSAnton Blanchard 			*end_pfn = init_node_data[i].end_pfn;
12545fb6ceaSAnton Blanchard 	}
12645fb6ceaSAnton Blanchard 
12745fb6ceaSAnton Blanchard 	/* We didnt find a matching region, return start/end as 0 */
12845fb6ceaSAnton Blanchard 	if (*start_pfn == -1UL)
1296d91bb93SMike Kravetz 		*start_pfn = 0;
13045fb6ceaSAnton Blanchard }
131ab1f9dacSPaul Mackerras 
132ab1f9dacSPaul Mackerras static inline void map_cpu_to_node(int cpu, int node)
133ab1f9dacSPaul Mackerras {
134ab1f9dacSPaul Mackerras 	numa_cpu_lookup_table[cpu] = node;
13545fb6ceaSAnton Blanchard 
13645fb6ceaSAnton Blanchard 	if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node])))
137ab1f9dacSPaul Mackerras 		cpu_set(cpu, numa_cpumask_lookup_table[node]);
138ab1f9dacSPaul Mackerras }
139ab1f9dacSPaul Mackerras 
140ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU
141ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu)
142ab1f9dacSPaul Mackerras {
143ab1f9dacSPaul Mackerras 	int node = numa_cpu_lookup_table[cpu];
144ab1f9dacSPaul Mackerras 
145ab1f9dacSPaul Mackerras 	dbg("removing cpu %lu from node %d\n", cpu, node);
146ab1f9dacSPaul Mackerras 
147ab1f9dacSPaul Mackerras 	if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) {
148ab1f9dacSPaul Mackerras 		cpu_clear(cpu, numa_cpumask_lookup_table[node]);
149ab1f9dacSPaul Mackerras 	} else {
150ab1f9dacSPaul Mackerras 		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
151ab1f9dacSPaul Mackerras 		       cpu, node);
152ab1f9dacSPaul Mackerras 	}
153ab1f9dacSPaul Mackerras }
154ab1f9dacSPaul Mackerras #endif /* CONFIG_HOTPLUG_CPU */
155ab1f9dacSPaul Mackerras 
15645fb6ceaSAnton Blanchard static struct device_node *find_cpu_node(unsigned int cpu)
157ab1f9dacSPaul Mackerras {
158ab1f9dacSPaul Mackerras 	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
159ab1f9dacSPaul Mackerras 	struct device_node *cpu_node = NULL;
160ab1f9dacSPaul Mackerras 	unsigned int *interrupt_server, *reg;
161ab1f9dacSPaul Mackerras 	int len;
162ab1f9dacSPaul Mackerras 
163ab1f9dacSPaul Mackerras 	while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {
164ab1f9dacSPaul Mackerras 		/* Try interrupt server first */
165ab1f9dacSPaul Mackerras 		interrupt_server = (unsigned int *)get_property(cpu_node,
166ab1f9dacSPaul Mackerras 					"ibm,ppc-interrupt-server#s", &len);
167ab1f9dacSPaul Mackerras 
168ab1f9dacSPaul Mackerras 		len = len / sizeof(u32);
169ab1f9dacSPaul Mackerras 
170ab1f9dacSPaul Mackerras 		if (interrupt_server && (len > 0)) {
171ab1f9dacSPaul Mackerras 			while (len--) {
172ab1f9dacSPaul Mackerras 				if (interrupt_server[len] == hw_cpuid)
173ab1f9dacSPaul Mackerras 					return cpu_node;
174ab1f9dacSPaul Mackerras 			}
175ab1f9dacSPaul Mackerras 		} else {
176ab1f9dacSPaul Mackerras 			reg = (unsigned int *)get_property(cpu_node,
177ab1f9dacSPaul Mackerras 							   "reg", &len);
178ab1f9dacSPaul Mackerras 			if (reg && (len > 0) && (reg[0] == hw_cpuid))
179ab1f9dacSPaul Mackerras 				return cpu_node;
180ab1f9dacSPaul Mackerras 		}
181ab1f9dacSPaul Mackerras 	}
182ab1f9dacSPaul Mackerras 
183ab1f9dacSPaul Mackerras 	return NULL;
184ab1f9dacSPaul Mackerras }
185ab1f9dacSPaul Mackerras 
186ab1f9dacSPaul Mackerras /* must hold reference to node during call */
187ab1f9dacSPaul Mackerras static int *of_get_associativity(struct device_node *dev)
188ab1f9dacSPaul Mackerras {
189ab1f9dacSPaul Mackerras 	return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
190ab1f9dacSPaul Mackerras }
191ab1f9dacSPaul Mackerras 
192ab1f9dacSPaul Mackerras static int of_node_numa_domain(struct device_node *device)
193ab1f9dacSPaul Mackerras {
194ab1f9dacSPaul Mackerras 	int numa_domain;
195ab1f9dacSPaul Mackerras 	unsigned int *tmp;
196ab1f9dacSPaul Mackerras 
197ab1f9dacSPaul Mackerras 	if (min_common_depth == -1)
198ab1f9dacSPaul Mackerras 		return 0;
199ab1f9dacSPaul Mackerras 
200ab1f9dacSPaul Mackerras 	tmp = of_get_associativity(device);
201ab1f9dacSPaul Mackerras 	if (tmp && (tmp[0] >= min_common_depth)) {
202ab1f9dacSPaul Mackerras 		numa_domain = tmp[min_common_depth];
203ab1f9dacSPaul Mackerras 	} else {
204ab1f9dacSPaul Mackerras 		dbg("WARNING: no NUMA information for %s\n",
205ab1f9dacSPaul Mackerras 		    device->full_name);
206ab1f9dacSPaul Mackerras 		numa_domain = 0;
207ab1f9dacSPaul Mackerras 	}
208ab1f9dacSPaul Mackerras 	return numa_domain;
209ab1f9dacSPaul Mackerras }
210ab1f9dacSPaul Mackerras 
211ab1f9dacSPaul Mackerras /*
212ab1f9dacSPaul Mackerras  * In theory, the "ibm,associativity" property may contain multiple
213ab1f9dacSPaul Mackerras  * associativity lists because a resource may be multiply connected
214ab1f9dacSPaul Mackerras  * into the machine.  This resource then has different associativity
215ab1f9dacSPaul Mackerras  * characteristics relative to its multiple connections.  We ignore
216ab1f9dacSPaul Mackerras  * this for now.  We also assume that all cpu and memory sets have
217ab1f9dacSPaul Mackerras  * their distances represented at a common level.  This won't be
218ab1f9dacSPaul Mackerras  * true for heirarchical NUMA.
219ab1f9dacSPaul Mackerras  *
220ab1f9dacSPaul Mackerras  * In any case the ibm,associativity-reference-points should give
221ab1f9dacSPaul Mackerras  * the correct depth for a normal NUMA system.
222ab1f9dacSPaul Mackerras  *
223ab1f9dacSPaul Mackerras  * - Dave Hansen <haveblue@us.ibm.com>
224ab1f9dacSPaul Mackerras  */
225ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void)
226ab1f9dacSPaul Mackerras {
227ab1f9dacSPaul Mackerras 	int depth;
228ab1f9dacSPaul Mackerras 	unsigned int *ref_points;
229ab1f9dacSPaul Mackerras 	struct device_node *rtas_root;
230ab1f9dacSPaul Mackerras 	unsigned int len;
231ab1f9dacSPaul Mackerras 
232ab1f9dacSPaul Mackerras 	rtas_root = of_find_node_by_path("/rtas");
233ab1f9dacSPaul Mackerras 
234ab1f9dacSPaul Mackerras 	if (!rtas_root)
235ab1f9dacSPaul Mackerras 		return -1;
236ab1f9dacSPaul Mackerras 
237ab1f9dacSPaul Mackerras 	/*
238ab1f9dacSPaul Mackerras 	 * this property is 2 32-bit integers, each representing a level of
239ab1f9dacSPaul Mackerras 	 * depth in the associativity nodes.  The first is for an SMP
240ab1f9dacSPaul Mackerras 	 * configuration (should be all 0's) and the second is for a normal
241ab1f9dacSPaul Mackerras 	 * NUMA configuration.
242ab1f9dacSPaul Mackerras 	 */
243ab1f9dacSPaul Mackerras 	ref_points = (unsigned int *)get_property(rtas_root,
244ab1f9dacSPaul Mackerras 			"ibm,associativity-reference-points", &len);
245ab1f9dacSPaul Mackerras 
246ab1f9dacSPaul Mackerras 	if ((len >= 1) && ref_points) {
247ab1f9dacSPaul Mackerras 		depth = ref_points[1];
248ab1f9dacSPaul Mackerras 	} else {
249ab1f9dacSPaul Mackerras 		dbg("WARNING: could not find NUMA "
250ab1f9dacSPaul Mackerras 		    "associativity reference point\n");
251ab1f9dacSPaul Mackerras 		depth = -1;
252ab1f9dacSPaul Mackerras 	}
253ab1f9dacSPaul Mackerras 	of_node_put(rtas_root);
254ab1f9dacSPaul Mackerras 
255ab1f9dacSPaul Mackerras 	return depth;
256ab1f9dacSPaul Mackerras }
257ab1f9dacSPaul Mackerras 
25884c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
259ab1f9dacSPaul Mackerras {
260ab1f9dacSPaul Mackerras 	struct device_node *memory = NULL;
261ab1f9dacSPaul Mackerras 
262ab1f9dacSPaul Mackerras 	memory = of_find_node_by_type(memory, "memory");
26354c23310SPaul Mackerras 	if (!memory)
26484c9fdd1SMike Kravetz 		panic("numa.c: No memory nodes found!");
26554c23310SPaul Mackerras 
26684c9fdd1SMike Kravetz 	*n_addr_cells = prom_n_addr_cells(memory);
26784c9fdd1SMike Kravetz 	*n_size_cells = prom_n_size_cells(memory);
26884c9fdd1SMike Kravetz 	of_node_put(memory);
269ab1f9dacSPaul Mackerras }
270ab1f9dacSPaul Mackerras 
271237a0989SMike Kravetz static unsigned long __devinit read_n_cells(int n, unsigned int **buf)
272ab1f9dacSPaul Mackerras {
273ab1f9dacSPaul Mackerras 	unsigned long result = 0;
274ab1f9dacSPaul Mackerras 
275ab1f9dacSPaul Mackerras 	while (n--) {
276ab1f9dacSPaul Mackerras 		result = (result << 32) | **buf;
277ab1f9dacSPaul Mackerras 		(*buf)++;
278ab1f9dacSPaul Mackerras 	}
279ab1f9dacSPaul Mackerras 	return result;
280ab1f9dacSPaul Mackerras }
281ab1f9dacSPaul Mackerras 
282ab1f9dacSPaul Mackerras /*
283ab1f9dacSPaul Mackerras  * Figure out to which domain a cpu belongs and stick it there.
284ab1f9dacSPaul Mackerras  * Return the id of the domain used.
285ab1f9dacSPaul Mackerras  */
286ab1f9dacSPaul Mackerras static int numa_setup_cpu(unsigned long lcpu)
287ab1f9dacSPaul Mackerras {
288ab1f9dacSPaul Mackerras 	int numa_domain = 0;
289ab1f9dacSPaul Mackerras 	struct device_node *cpu = find_cpu_node(lcpu);
290ab1f9dacSPaul Mackerras 
291ab1f9dacSPaul Mackerras 	if (!cpu) {
292ab1f9dacSPaul Mackerras 		WARN_ON(1);
293ab1f9dacSPaul Mackerras 		goto out;
294ab1f9dacSPaul Mackerras 	}
295ab1f9dacSPaul Mackerras 
296ab1f9dacSPaul Mackerras 	numa_domain = of_node_numa_domain(cpu);
297ab1f9dacSPaul Mackerras 
298ab1f9dacSPaul Mackerras 	if (numa_domain >= num_online_nodes()) {
299ab1f9dacSPaul Mackerras 		/*
300ab1f9dacSPaul Mackerras 		 * POWER4 LPAR uses 0xffff as invalid node,
301ab1f9dacSPaul Mackerras 		 * dont warn in this case.
302ab1f9dacSPaul Mackerras 		 */
303ab1f9dacSPaul Mackerras 		if (numa_domain != 0xffff)
304ab1f9dacSPaul Mackerras 			printk(KERN_ERR "WARNING: cpu %ld "
305ab1f9dacSPaul Mackerras 			       "maps to invalid NUMA node %d\n",
306ab1f9dacSPaul Mackerras 			       lcpu, numa_domain);
307ab1f9dacSPaul Mackerras 		numa_domain = 0;
308ab1f9dacSPaul Mackerras 	}
309ab1f9dacSPaul Mackerras out:
310ab1f9dacSPaul Mackerras 	node_set_online(numa_domain);
311ab1f9dacSPaul Mackerras 
312ab1f9dacSPaul Mackerras 	map_cpu_to_node(lcpu, numa_domain);
313ab1f9dacSPaul Mackerras 
314ab1f9dacSPaul Mackerras 	of_node_put(cpu);
315ab1f9dacSPaul Mackerras 
316ab1f9dacSPaul Mackerras 	return numa_domain;
317ab1f9dacSPaul Mackerras }
318ab1f9dacSPaul Mackerras 
319ab1f9dacSPaul Mackerras static int cpu_numa_callback(struct notifier_block *nfb,
320ab1f9dacSPaul Mackerras 			     unsigned long action,
321ab1f9dacSPaul Mackerras 			     void *hcpu)
322ab1f9dacSPaul Mackerras {
323ab1f9dacSPaul Mackerras 	unsigned long lcpu = (unsigned long)hcpu;
324ab1f9dacSPaul Mackerras 	int ret = NOTIFY_DONE;
325ab1f9dacSPaul Mackerras 
326ab1f9dacSPaul Mackerras 	switch (action) {
327ab1f9dacSPaul Mackerras 	case CPU_UP_PREPARE:
328ab1f9dacSPaul Mackerras 		if (min_common_depth == -1 || !numa_enabled)
329ab1f9dacSPaul Mackerras 			map_cpu_to_node(lcpu, 0);
330ab1f9dacSPaul Mackerras 		else
331ab1f9dacSPaul Mackerras 			numa_setup_cpu(lcpu);
332ab1f9dacSPaul Mackerras 		ret = NOTIFY_OK;
333ab1f9dacSPaul Mackerras 		break;
334ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU
335ab1f9dacSPaul Mackerras 	case CPU_DEAD:
336ab1f9dacSPaul Mackerras 	case CPU_UP_CANCELED:
337ab1f9dacSPaul Mackerras 		unmap_cpu_from_node(lcpu);
338ab1f9dacSPaul Mackerras 		break;
339ab1f9dacSPaul Mackerras 		ret = NOTIFY_OK;
340ab1f9dacSPaul Mackerras #endif
341ab1f9dacSPaul Mackerras 	}
342ab1f9dacSPaul Mackerras 	return ret;
343ab1f9dacSPaul Mackerras }
344ab1f9dacSPaul Mackerras 
345ab1f9dacSPaul Mackerras /*
346ab1f9dacSPaul Mackerras  * Check and possibly modify a memory region to enforce the memory limit.
347ab1f9dacSPaul Mackerras  *
348ab1f9dacSPaul Mackerras  * Returns the size the region should have to enforce the memory limit.
349ab1f9dacSPaul Mackerras  * This will either be the original value of size, a truncated value,
350ab1f9dacSPaul Mackerras  * or zero. If the returned value of size is 0 the region should be
351ab1f9dacSPaul Mackerras  * discarded as it lies wholy above the memory limit.
352ab1f9dacSPaul Mackerras  */
35345fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start,
35445fb6ceaSAnton Blanchard 						      unsigned long size)
355ab1f9dacSPaul Mackerras {
356ab1f9dacSPaul Mackerras 	/*
357ab1f9dacSPaul Mackerras 	 * We use lmb_end_of_DRAM() in here instead of memory_limit because
358ab1f9dacSPaul Mackerras 	 * we've already adjusted it for the limit and it takes care of
359ab1f9dacSPaul Mackerras 	 * having memory holes below the limit.
360ab1f9dacSPaul Mackerras 	 */
361ab1f9dacSPaul Mackerras 
362ab1f9dacSPaul Mackerras 	if (! memory_limit)
363ab1f9dacSPaul Mackerras 		return size;
364ab1f9dacSPaul Mackerras 
365ab1f9dacSPaul Mackerras 	if (start + size <= lmb_end_of_DRAM())
366ab1f9dacSPaul Mackerras 		return size;
367ab1f9dacSPaul Mackerras 
368ab1f9dacSPaul Mackerras 	if (start >= lmb_end_of_DRAM())
369ab1f9dacSPaul Mackerras 		return 0;
370ab1f9dacSPaul Mackerras 
371ab1f9dacSPaul Mackerras 	return lmb_end_of_DRAM() - start;
372ab1f9dacSPaul Mackerras }
373ab1f9dacSPaul Mackerras 
374ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void)
375ab1f9dacSPaul Mackerras {
376ab1f9dacSPaul Mackerras 	struct device_node *cpu = NULL;
377ab1f9dacSPaul Mackerras 	struct device_node *memory = NULL;
378*c08888cfSNathan Lynch 	int max_domain = 0;
379ab1f9dacSPaul Mackerras 	unsigned long i;
380ab1f9dacSPaul Mackerras 
381ab1f9dacSPaul Mackerras 	if (numa_enabled == 0) {
382ab1f9dacSPaul Mackerras 		printk(KERN_WARNING "NUMA disabled by user\n");
383ab1f9dacSPaul Mackerras 		return -1;
384ab1f9dacSPaul Mackerras 	}
385ab1f9dacSPaul Mackerras 
386ab1f9dacSPaul Mackerras 	min_common_depth = find_min_common_depth();
387ab1f9dacSPaul Mackerras 
388ab1f9dacSPaul Mackerras 	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
389ab1f9dacSPaul Mackerras 	if (min_common_depth < 0)
390ab1f9dacSPaul Mackerras 		return min_common_depth;
391ab1f9dacSPaul Mackerras 
392ab1f9dacSPaul Mackerras 	/*
393ab1f9dacSPaul Mackerras 	 * Even though we connect cpus to numa domains later in SMP init,
394ab1f9dacSPaul Mackerras 	 * we need to know the maximum node id now. This is because each
395ab1f9dacSPaul Mackerras 	 * node id must have NODE_DATA etc backing it.
396ab1f9dacSPaul Mackerras 	 * As a result of hotplug we could still have cpus appear later on
397ab1f9dacSPaul Mackerras 	 * with larger node ids. In that case we force the cpu into node 0.
398ab1f9dacSPaul Mackerras 	 */
399ab1f9dacSPaul Mackerras 	for_each_cpu(i) {
400ab1f9dacSPaul Mackerras 		int numa_domain;
401ab1f9dacSPaul Mackerras 
402ab1f9dacSPaul Mackerras 		cpu = find_cpu_node(i);
403ab1f9dacSPaul Mackerras 
404ab1f9dacSPaul Mackerras 		if (cpu) {
405ab1f9dacSPaul Mackerras 			numa_domain = of_node_numa_domain(cpu);
406ab1f9dacSPaul Mackerras 			of_node_put(cpu);
407ab1f9dacSPaul Mackerras 
408ab1f9dacSPaul Mackerras 			if (numa_domain < MAX_NUMNODES &&
409ab1f9dacSPaul Mackerras 			    max_domain < numa_domain)
410ab1f9dacSPaul Mackerras 				max_domain = numa_domain;
411ab1f9dacSPaul Mackerras 		}
412ab1f9dacSPaul Mackerras 	}
413ab1f9dacSPaul Mackerras 
414237a0989SMike Kravetz 	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
415ab1f9dacSPaul Mackerras 	memory = NULL;
416ab1f9dacSPaul Mackerras 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
417ab1f9dacSPaul Mackerras 		unsigned long start;
418ab1f9dacSPaul Mackerras 		unsigned long size;
419ab1f9dacSPaul Mackerras 		int numa_domain;
420ab1f9dacSPaul Mackerras 		int ranges;
421ab1f9dacSPaul Mackerras 		unsigned int *memcell_buf;
422ab1f9dacSPaul Mackerras 		unsigned int len;
423ab1f9dacSPaul Mackerras 
424ba759485SMichael Ellerman 		memcell_buf = (unsigned int *)get_property(memory,
425ba759485SMichael Ellerman 			"linux,usable-memory", &len);
426ba759485SMichael Ellerman 		if (!memcell_buf || len <= 0)
427ba759485SMichael Ellerman 			memcell_buf =
428ba759485SMichael Ellerman 				(unsigned int *)get_property(memory, "reg",
429ba759485SMichael Ellerman 					&len);
430ab1f9dacSPaul Mackerras 		if (!memcell_buf || len <= 0)
431ab1f9dacSPaul Mackerras 			continue;
432ab1f9dacSPaul Mackerras 
433cc5d0189SBenjamin Herrenschmidt 		/* ranges in cell */
434cc5d0189SBenjamin Herrenschmidt 		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
435ab1f9dacSPaul Mackerras new_range:
436ab1f9dacSPaul Mackerras 		/* these are order-sensitive, and modify the buffer pointer */
437237a0989SMike Kravetz 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
438237a0989SMike Kravetz 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
439ab1f9dacSPaul Mackerras 
440ab1f9dacSPaul Mackerras 		numa_domain = of_node_numa_domain(memory);
441ab1f9dacSPaul Mackerras 
442ab1f9dacSPaul Mackerras 		if (numa_domain >= MAX_NUMNODES) {
443ab1f9dacSPaul Mackerras 			if (numa_domain != 0xffff)
444ab1f9dacSPaul Mackerras 				printk(KERN_ERR "WARNING: memory at %lx maps "
445ab1f9dacSPaul Mackerras 				       "to invalid NUMA node %d\n", start,
446ab1f9dacSPaul Mackerras 				       numa_domain);
447ab1f9dacSPaul Mackerras 			numa_domain = 0;
448ab1f9dacSPaul Mackerras 		}
449ab1f9dacSPaul Mackerras 
450ab1f9dacSPaul Mackerras 		if (max_domain < numa_domain)
451ab1f9dacSPaul Mackerras 			max_domain = numa_domain;
452ab1f9dacSPaul Mackerras 
453ab1f9dacSPaul Mackerras 		if (!(size = numa_enforce_memory_limit(start, size))) {
454ab1f9dacSPaul Mackerras 			if (--ranges)
455ab1f9dacSPaul Mackerras 				goto new_range;
456ab1f9dacSPaul Mackerras 			else
457ab1f9dacSPaul Mackerras 				continue;
458ab1f9dacSPaul Mackerras 		}
459ab1f9dacSPaul Mackerras 
46045fb6ceaSAnton Blanchard 		add_region(numa_domain, start >> PAGE_SHIFT,
46145fb6ceaSAnton Blanchard 			   size >> PAGE_SHIFT);
462ab1f9dacSPaul Mackerras 
463ab1f9dacSPaul Mackerras 		if (--ranges)
464ab1f9dacSPaul Mackerras 			goto new_range;
465ab1f9dacSPaul Mackerras 	}
466ab1f9dacSPaul Mackerras 
467ab1f9dacSPaul Mackerras 	for (i = 0; i <= max_domain; i++)
468ab1f9dacSPaul Mackerras 		node_set_online(i);
469ab1f9dacSPaul Mackerras 
470*c08888cfSNathan Lynch 	max_domain = numa_setup_cpu(boot_cpuid);
471*c08888cfSNathan Lynch 
472ab1f9dacSPaul Mackerras 	return 0;
473ab1f9dacSPaul Mackerras }
474ab1f9dacSPaul Mackerras 
475ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void)
476ab1f9dacSPaul Mackerras {
477ab1f9dacSPaul Mackerras 	unsigned long top_of_ram = lmb_end_of_DRAM();
478ab1f9dacSPaul Mackerras 	unsigned long total_ram = lmb_phys_mem_size();
479fb6d73d3SPaul Mackerras 	unsigned int i;
480ab1f9dacSPaul Mackerras 
481ab1f9dacSPaul Mackerras 	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
482ab1f9dacSPaul Mackerras 	       top_of_ram, total_ram);
483ab1f9dacSPaul Mackerras 	printk(KERN_INFO "Memory hole size: %ldMB\n",
484ab1f9dacSPaul Mackerras 	       (top_of_ram - total_ram) >> 20);
485ab1f9dacSPaul Mackerras 
486ab1f9dacSPaul Mackerras 	map_cpu_to_node(boot_cpuid, 0);
487fb6d73d3SPaul Mackerras 	for (i = 0; i < lmb.memory.cnt; ++i)
488fb6d73d3SPaul Mackerras 		add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT,
489fb6d73d3SPaul Mackerras 			   lmb_size_pages(&lmb.memory, i));
490ab1f9dacSPaul Mackerras 	node_set_online(0);
491ab1f9dacSPaul Mackerras }
492ab1f9dacSPaul Mackerras 
4934b703a23SAnton Blanchard void __init dump_numa_cpu_topology(void)
4944b703a23SAnton Blanchard {
4954b703a23SAnton Blanchard 	unsigned int node;
4964b703a23SAnton Blanchard 	unsigned int cpu, count;
4974b703a23SAnton Blanchard 
4984b703a23SAnton Blanchard 	if (min_common_depth == -1 || !numa_enabled)
4994b703a23SAnton Blanchard 		return;
5004b703a23SAnton Blanchard 
5014b703a23SAnton Blanchard 	for_each_online_node(node) {
5024b703a23SAnton Blanchard 		printk(KERN_INFO "Node %d CPUs:", node);
5034b703a23SAnton Blanchard 
5044b703a23SAnton Blanchard 		count = 0;
5054b703a23SAnton Blanchard 		/*
5064b703a23SAnton Blanchard 		 * If we used a CPU iterator here we would miss printing
5074b703a23SAnton Blanchard 		 * the holes in the cpumap.
5084b703a23SAnton Blanchard 		 */
5094b703a23SAnton Blanchard 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
5104b703a23SAnton Blanchard 			if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) {
5114b703a23SAnton Blanchard 				if (count == 0)
5124b703a23SAnton Blanchard 					printk(" %u", cpu);
5134b703a23SAnton Blanchard 				++count;
5144b703a23SAnton Blanchard 			} else {
5154b703a23SAnton Blanchard 				if (count > 1)
5164b703a23SAnton Blanchard 					printk("-%u", cpu - 1);
5174b703a23SAnton Blanchard 				count = 0;
5184b703a23SAnton Blanchard 			}
5194b703a23SAnton Blanchard 		}
5204b703a23SAnton Blanchard 
5214b703a23SAnton Blanchard 		if (count > 1)
5224b703a23SAnton Blanchard 			printk("-%u", NR_CPUS - 1);
5234b703a23SAnton Blanchard 		printk("\n");
5244b703a23SAnton Blanchard 	}
5254b703a23SAnton Blanchard }
5264b703a23SAnton Blanchard 
5274b703a23SAnton Blanchard static void __init dump_numa_memory_topology(void)
528ab1f9dacSPaul Mackerras {
529ab1f9dacSPaul Mackerras 	unsigned int node;
530ab1f9dacSPaul Mackerras 	unsigned int count;
531ab1f9dacSPaul Mackerras 
532ab1f9dacSPaul Mackerras 	if (min_common_depth == -1 || !numa_enabled)
533ab1f9dacSPaul Mackerras 		return;
534ab1f9dacSPaul Mackerras 
535ab1f9dacSPaul Mackerras 	for_each_online_node(node) {
536ab1f9dacSPaul Mackerras 		unsigned long i;
537ab1f9dacSPaul Mackerras 
538ab1f9dacSPaul Mackerras 		printk(KERN_INFO "Node %d Memory:", node);
539ab1f9dacSPaul Mackerras 
540ab1f9dacSPaul Mackerras 		count = 0;
541ab1f9dacSPaul Mackerras 
54245fb6ceaSAnton Blanchard 		for (i = 0; i < lmb_end_of_DRAM();
54345fb6ceaSAnton Blanchard 		     i += (1 << SECTION_SIZE_BITS)) {
54445fb6ceaSAnton Blanchard 			if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
545ab1f9dacSPaul Mackerras 				if (count == 0)
546ab1f9dacSPaul Mackerras 					printk(" 0x%lx", i);
547ab1f9dacSPaul Mackerras 				++count;
548ab1f9dacSPaul Mackerras 			} else {
549ab1f9dacSPaul Mackerras 				if (count > 0)
550ab1f9dacSPaul Mackerras 					printk("-0x%lx", i);
551ab1f9dacSPaul Mackerras 				count = 0;
552ab1f9dacSPaul Mackerras 			}
553ab1f9dacSPaul Mackerras 		}
554ab1f9dacSPaul Mackerras 
555ab1f9dacSPaul Mackerras 		if (count > 0)
556ab1f9dacSPaul Mackerras 			printk("-0x%lx", i);
557ab1f9dacSPaul Mackerras 		printk("\n");
558ab1f9dacSPaul Mackerras 	}
559ab1f9dacSPaul Mackerras }
560ab1f9dacSPaul Mackerras 
561ab1f9dacSPaul Mackerras /*
562ab1f9dacSPaul Mackerras  * Allocate some memory, satisfying the lmb or bootmem allocator where
563ab1f9dacSPaul Mackerras  * required. nid is the preferred node and end is the physical address of
564ab1f9dacSPaul Mackerras  * the highest address in the node.
565ab1f9dacSPaul Mackerras  *
566ab1f9dacSPaul Mackerras  * Returns the physical address of the memory.
567ab1f9dacSPaul Mackerras  */
56845fb6ceaSAnton Blanchard static void __init *careful_allocation(int nid, unsigned long size,
56945fb6ceaSAnton Blanchard 				       unsigned long align,
57045fb6ceaSAnton Blanchard 				       unsigned long end_pfn)
571ab1f9dacSPaul Mackerras {
57245fb6ceaSAnton Blanchard 	int new_nid;
573d7a5b2ffSMichael Ellerman 	unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
574ab1f9dacSPaul Mackerras 
575ab1f9dacSPaul Mackerras 	/* retry over all memory */
576ab1f9dacSPaul Mackerras 	if (!ret)
577d7a5b2ffSMichael Ellerman 		ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
578ab1f9dacSPaul Mackerras 
579ab1f9dacSPaul Mackerras 	if (!ret)
580ab1f9dacSPaul Mackerras 		panic("numa.c: cannot allocate %lu bytes on node %d",
581ab1f9dacSPaul Mackerras 		      size, nid);
582ab1f9dacSPaul Mackerras 
583ab1f9dacSPaul Mackerras 	/*
584ab1f9dacSPaul Mackerras 	 * If the memory came from a previously allocated node, we must
585ab1f9dacSPaul Mackerras 	 * retry with the bootmem allocator.
586ab1f9dacSPaul Mackerras 	 */
58745fb6ceaSAnton Blanchard 	new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT);
58845fb6ceaSAnton Blanchard 	if (new_nid < nid) {
58945fb6ceaSAnton Blanchard 		ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid),
590ab1f9dacSPaul Mackerras 				size, align, 0);
591ab1f9dacSPaul Mackerras 
592ab1f9dacSPaul Mackerras 		if (!ret)
593ab1f9dacSPaul Mackerras 			panic("numa.c: cannot allocate %lu bytes on node %d",
59445fb6ceaSAnton Blanchard 			      size, new_nid);
595ab1f9dacSPaul Mackerras 
59645fb6ceaSAnton Blanchard 		ret = __pa(ret);
597ab1f9dacSPaul Mackerras 
598ab1f9dacSPaul Mackerras 		dbg("alloc_bootmem %lx %lx\n", ret, size);
599ab1f9dacSPaul Mackerras 	}
600ab1f9dacSPaul Mackerras 
60145fb6ceaSAnton Blanchard 	return (void *)ret;
602ab1f9dacSPaul Mackerras }
603ab1f9dacSPaul Mackerras 
604ab1f9dacSPaul Mackerras void __init do_init_bootmem(void)
605ab1f9dacSPaul Mackerras {
606ab1f9dacSPaul Mackerras 	int nid;
60745fb6ceaSAnton Blanchard 	unsigned int i;
608ab1f9dacSPaul Mackerras 	static struct notifier_block ppc64_numa_nb = {
609ab1f9dacSPaul Mackerras 		.notifier_call = cpu_numa_callback,
610ab1f9dacSPaul Mackerras 		.priority = 1 /* Must run before sched domains notifier. */
611ab1f9dacSPaul Mackerras 	};
612ab1f9dacSPaul Mackerras 
613ab1f9dacSPaul Mackerras 	min_low_pfn = 0;
614ab1f9dacSPaul Mackerras 	max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
615ab1f9dacSPaul Mackerras 	max_pfn = max_low_pfn;
616ab1f9dacSPaul Mackerras 
617ab1f9dacSPaul Mackerras 	if (parse_numa_properties())
618ab1f9dacSPaul Mackerras 		setup_nonnuma();
619ab1f9dacSPaul Mackerras 	else
6204b703a23SAnton Blanchard 		dump_numa_memory_topology();
621ab1f9dacSPaul Mackerras 
622ab1f9dacSPaul Mackerras 	register_cpu_notifier(&ppc64_numa_nb);
623ab1f9dacSPaul Mackerras 
624ab1f9dacSPaul Mackerras 	for_each_online_node(nid) {
62545fb6ceaSAnton Blanchard 		unsigned long start_pfn, end_pfn, pages_present;
626ab1f9dacSPaul Mackerras 		unsigned long bootmem_paddr;
627ab1f9dacSPaul Mackerras 		unsigned long bootmap_pages;
628ab1f9dacSPaul Mackerras 
62945fb6ceaSAnton Blanchard 		get_region(nid, &start_pfn, &end_pfn, &pages_present);
630ab1f9dacSPaul Mackerras 
631ab1f9dacSPaul Mackerras 		/* Allocate the node structure node local if possible */
63245fb6ceaSAnton Blanchard 		NODE_DATA(nid) = careful_allocation(nid,
633ab1f9dacSPaul Mackerras 					sizeof(struct pglist_data),
63445fb6ceaSAnton Blanchard 					SMP_CACHE_BYTES, end_pfn);
63545fb6ceaSAnton Blanchard 		NODE_DATA(nid) = __va(NODE_DATA(nid));
636ab1f9dacSPaul Mackerras 		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
637ab1f9dacSPaul Mackerras 
638ab1f9dacSPaul Mackerras   		dbg("node %d\n", nid);
639ab1f9dacSPaul Mackerras 		dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
640ab1f9dacSPaul Mackerras 
641ab1f9dacSPaul Mackerras 		NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
64245fb6ceaSAnton Blanchard 		NODE_DATA(nid)->node_start_pfn = start_pfn;
64345fb6ceaSAnton Blanchard 		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
644ab1f9dacSPaul Mackerras 
645ab1f9dacSPaul Mackerras 		if (NODE_DATA(nid)->node_spanned_pages == 0)
646ab1f9dacSPaul Mackerras   			continue;
647ab1f9dacSPaul Mackerras 
64845fb6ceaSAnton Blanchard   		dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
64945fb6ceaSAnton Blanchard   		dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
650ab1f9dacSPaul Mackerras 
65145fb6ceaSAnton Blanchard 		bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
65245fb6ceaSAnton Blanchard 		bootmem_paddr = (unsigned long)careful_allocation(nid,
653ab1f9dacSPaul Mackerras 					bootmap_pages << PAGE_SHIFT,
65445fb6ceaSAnton Blanchard 					PAGE_SIZE, end_pfn);
65545fb6ceaSAnton Blanchard 		memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT);
65645fb6ceaSAnton Blanchard 
657ab1f9dacSPaul Mackerras 		dbg("bootmap_paddr = %lx\n", bootmem_paddr);
658ab1f9dacSPaul Mackerras 
659ab1f9dacSPaul Mackerras 		init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
66045fb6ceaSAnton Blanchard 				  start_pfn, end_pfn);
661ab1f9dacSPaul Mackerras 
66245fb6ceaSAnton Blanchard 		/* Add free regions on this node */
66345fb6ceaSAnton Blanchard 		for (i = 0; init_node_data[i].end_pfn; i++) {
66445fb6ceaSAnton Blanchard 			unsigned long start, end;
665ab1f9dacSPaul Mackerras 
66645fb6ceaSAnton Blanchard 			if (init_node_data[i].nid != nid)
667ab1f9dacSPaul Mackerras 				continue;
668ab1f9dacSPaul Mackerras 
66945fb6ceaSAnton Blanchard 			start = init_node_data[i].start_pfn << PAGE_SHIFT;
67045fb6ceaSAnton Blanchard 			end = init_node_data[i].end_pfn << PAGE_SHIFT;
671ab1f9dacSPaul Mackerras 
67245fb6ceaSAnton Blanchard 			dbg("free_bootmem %lx %lx\n", start, end - start);
67345fb6ceaSAnton Blanchard   			free_bootmem_node(NODE_DATA(nid), start, end - start);
674ab1f9dacSPaul Mackerras 		}
675ab1f9dacSPaul Mackerras 
67645fb6ceaSAnton Blanchard 		/* Mark reserved regions on this node */
677ab1f9dacSPaul Mackerras 		for (i = 0; i < lmb.reserved.cnt; i++) {
678ab1f9dacSPaul Mackerras 			unsigned long physbase = lmb.reserved.region[i].base;
679ab1f9dacSPaul Mackerras 			unsigned long size = lmb.reserved.region[i].size;
68045fb6ceaSAnton Blanchard 			unsigned long start_paddr = start_pfn << PAGE_SHIFT;
68145fb6ceaSAnton Blanchard 			unsigned long end_paddr = end_pfn << PAGE_SHIFT;
682ab1f9dacSPaul Mackerras 
68345fb6ceaSAnton Blanchard 			if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid &&
68445fb6ceaSAnton Blanchard 			    early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid)
685ab1f9dacSPaul Mackerras 				continue;
686ab1f9dacSPaul Mackerras 
687ab1f9dacSPaul Mackerras 			if (physbase < end_paddr &&
688ab1f9dacSPaul Mackerras 			    (physbase+size) > start_paddr) {
689ab1f9dacSPaul Mackerras 				/* overlaps */
690ab1f9dacSPaul Mackerras 				if (physbase < start_paddr) {
691ab1f9dacSPaul Mackerras 					size -= start_paddr - physbase;
692ab1f9dacSPaul Mackerras 					physbase = start_paddr;
693ab1f9dacSPaul Mackerras 				}
694ab1f9dacSPaul Mackerras 
695ab1f9dacSPaul Mackerras 				if (size > end_paddr - physbase)
696ab1f9dacSPaul Mackerras 					size = end_paddr - physbase;
697ab1f9dacSPaul Mackerras 
698ab1f9dacSPaul Mackerras 				dbg("reserve_bootmem %lx %lx\n", physbase,
699ab1f9dacSPaul Mackerras 				    size);
700ab1f9dacSPaul Mackerras 				reserve_bootmem_node(NODE_DATA(nid), physbase,
701ab1f9dacSPaul Mackerras 						     size);
702ab1f9dacSPaul Mackerras 			}
703ab1f9dacSPaul Mackerras 		}
704ab1f9dacSPaul Mackerras 
70545fb6ceaSAnton Blanchard 		/* Add regions into sparsemem */
70645fb6ceaSAnton Blanchard 		for (i = 0; init_node_data[i].end_pfn; i++) {
70745fb6ceaSAnton Blanchard 			unsigned long start, end;
70845fb6ceaSAnton Blanchard 
70945fb6ceaSAnton Blanchard 			if (init_node_data[i].nid != nid)
710ab1f9dacSPaul Mackerras 				continue;
711ab1f9dacSPaul Mackerras 
71245fb6ceaSAnton Blanchard 			start = init_node_data[i].start_pfn;
71345fb6ceaSAnton Blanchard 			end = init_node_data[i].end_pfn;
714ab1f9dacSPaul Mackerras 
71545fb6ceaSAnton Blanchard 			memory_present(nid, start, end);
716ab1f9dacSPaul Mackerras 		}
717ab1f9dacSPaul Mackerras 	}
718ab1f9dacSPaul Mackerras }
719ab1f9dacSPaul Mackerras 
720ab1f9dacSPaul Mackerras void __init paging_init(void)
721ab1f9dacSPaul Mackerras {
722ab1f9dacSPaul Mackerras 	unsigned long zones_size[MAX_NR_ZONES];
723ab1f9dacSPaul Mackerras 	unsigned long zholes_size[MAX_NR_ZONES];
724ab1f9dacSPaul Mackerras 	int nid;
725ab1f9dacSPaul Mackerras 
726ab1f9dacSPaul Mackerras 	memset(zones_size, 0, sizeof(zones_size));
727ab1f9dacSPaul Mackerras 	memset(zholes_size, 0, sizeof(zholes_size));
728ab1f9dacSPaul Mackerras 
729ab1f9dacSPaul Mackerras 	for_each_online_node(nid) {
73045fb6ceaSAnton Blanchard 		unsigned long start_pfn, end_pfn, pages_present;
731ab1f9dacSPaul Mackerras 
73245fb6ceaSAnton Blanchard 		get_region(nid, &start_pfn, &end_pfn, &pages_present);
733ab1f9dacSPaul Mackerras 
734ab1f9dacSPaul Mackerras 		zones_size[ZONE_DMA] = end_pfn - start_pfn;
73545fb6ceaSAnton Blanchard 		zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present;
736ab1f9dacSPaul Mackerras 
737ab1f9dacSPaul Mackerras 		dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
738ab1f9dacSPaul Mackerras 		    zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
739ab1f9dacSPaul Mackerras 
74045fb6ceaSAnton Blanchard 		free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn,
74145fb6ceaSAnton Blanchard 				    zholes_size);
742ab1f9dacSPaul Mackerras 	}
743ab1f9dacSPaul Mackerras }
744ab1f9dacSPaul Mackerras 
745ab1f9dacSPaul Mackerras static int __init early_numa(char *p)
746ab1f9dacSPaul Mackerras {
747ab1f9dacSPaul Mackerras 	if (!p)
748ab1f9dacSPaul Mackerras 		return 0;
749ab1f9dacSPaul Mackerras 
750ab1f9dacSPaul Mackerras 	if (strstr(p, "off"))
751ab1f9dacSPaul Mackerras 		numa_enabled = 0;
752ab1f9dacSPaul Mackerras 
753ab1f9dacSPaul Mackerras 	if (strstr(p, "debug"))
754ab1f9dacSPaul Mackerras 		numa_debug = 1;
755ab1f9dacSPaul Mackerras 
756ab1f9dacSPaul Mackerras 	return 0;
757ab1f9dacSPaul Mackerras }
758ab1f9dacSPaul Mackerras early_param("numa", early_numa);
759237a0989SMike Kravetz 
760237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG
761237a0989SMike Kravetz /*
762237a0989SMike Kravetz  * Find the node associated with a hot added memory section.  Section
763237a0989SMike Kravetz  * corresponds to a SPARSEMEM section, not an LMB.  It is assumed that
764237a0989SMike Kravetz  * sections are fully contained within a single LMB.
765237a0989SMike Kravetz  */
766237a0989SMike Kravetz int hot_add_scn_to_nid(unsigned long scn_addr)
767237a0989SMike Kravetz {
768237a0989SMike Kravetz 	struct device_node *memory = NULL;
769b226e462SMike Kravetz 	nodemask_t nodes;
770b226e462SMike Kravetz 	int numa_domain = 0;
771237a0989SMike Kravetz 
772237a0989SMike Kravetz 	if (!numa_enabled || (min_common_depth < 0))
773b226e462SMike Kravetz 		return numa_domain;
774237a0989SMike Kravetz 
775237a0989SMike Kravetz 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
776237a0989SMike Kravetz 		unsigned long start, size;
777b226e462SMike Kravetz 		int ranges;
778237a0989SMike Kravetz 		unsigned int *memcell_buf;
779237a0989SMike Kravetz 		unsigned int len;
780237a0989SMike Kravetz 
781237a0989SMike Kravetz 		memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
782237a0989SMike Kravetz 		if (!memcell_buf || len <= 0)
783237a0989SMike Kravetz 			continue;
784237a0989SMike Kravetz 
785cc5d0189SBenjamin Herrenschmidt 		/* ranges in cell */
786cc5d0189SBenjamin Herrenschmidt 		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
787237a0989SMike Kravetz ha_new_range:
788237a0989SMike Kravetz 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
789237a0989SMike Kravetz 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
790237a0989SMike Kravetz 		numa_domain = of_node_numa_domain(memory);
791237a0989SMike Kravetz 
792237a0989SMike Kravetz 		/* Domains not present at boot default to 0 */
793237a0989SMike Kravetz 		if (!node_online(numa_domain))
794237a0989SMike Kravetz 			numa_domain = any_online_node(NODE_MASK_ALL);
795237a0989SMike Kravetz 
796237a0989SMike Kravetz 		if ((scn_addr >= start) && (scn_addr < (start + size))) {
797237a0989SMike Kravetz 			of_node_put(memory);
798b226e462SMike Kravetz 			goto got_numa_domain;
799237a0989SMike Kravetz 		}
800237a0989SMike Kravetz 
801237a0989SMike Kravetz 		if (--ranges)		/* process all ranges in cell */
802237a0989SMike Kravetz 			goto ha_new_range;
803237a0989SMike Kravetz 	}
804237a0989SMike Kravetz 	BUG();	/* section address should be found above */
805b226e462SMike Kravetz 
806b226e462SMike Kravetz 	/* Temporary code to ensure that returned node is not empty */
807b226e462SMike Kravetz got_numa_domain:
808b226e462SMike Kravetz 	nodes_setall(nodes);
809b226e462SMike Kravetz 	while (NODE_DATA(numa_domain)->node_spanned_pages == 0) {
810b226e462SMike Kravetz 		node_clear(numa_domain, nodes);
811b226e462SMike Kravetz 		numa_domain = any_online_node(nodes);
812b226e462SMike Kravetz 	}
813b226e462SMike Kravetz 	return numa_domain;
814237a0989SMike Kravetz }
815237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */
816