xref: /linux/arch/powerpc/mm/numa.c (revision ba7594852f4e7121b3f037d59f983637b795f0dd)
1ab1f9dacSPaul Mackerras /*
2ab1f9dacSPaul Mackerras  * pSeries NUMA support
3ab1f9dacSPaul Mackerras  *
4ab1f9dacSPaul Mackerras  * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
5ab1f9dacSPaul Mackerras  *
6ab1f9dacSPaul Mackerras  * This program is free software; you can redistribute it and/or
7ab1f9dacSPaul Mackerras  * modify it under the terms of the GNU General Public License
8ab1f9dacSPaul Mackerras  * as published by the Free Software Foundation; either version
9ab1f9dacSPaul Mackerras  * 2 of the License, or (at your option) any later version.
10ab1f9dacSPaul Mackerras  */
11ab1f9dacSPaul Mackerras #include <linux/threads.h>
12ab1f9dacSPaul Mackerras #include <linux/bootmem.h>
13ab1f9dacSPaul Mackerras #include <linux/init.h>
14ab1f9dacSPaul Mackerras #include <linux/mm.h>
15ab1f9dacSPaul Mackerras #include <linux/mmzone.h>
16ab1f9dacSPaul Mackerras #include <linux/module.h>
17ab1f9dacSPaul Mackerras #include <linux/nodemask.h>
18ab1f9dacSPaul Mackerras #include <linux/cpu.h>
19ab1f9dacSPaul Mackerras #include <linux/notifier.h>
2045fb6ceaSAnton Blanchard #include <asm/sparsemem.h>
21ab1f9dacSPaul Mackerras #include <asm/lmb.h>
22cf00a8d1SPaul Mackerras #include <asm/system.h>
232249ca9dSPaul Mackerras #include <asm/smp.h>
24ab1f9dacSPaul Mackerras 
25ab1f9dacSPaul Mackerras static int numa_enabled = 1;
26ab1f9dacSPaul Mackerras 
27ab1f9dacSPaul Mackerras static int numa_debug;
28ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
29ab1f9dacSPaul Mackerras 
3045fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS];
31ab1f9dacSPaul Mackerras cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
32ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES];
3345fb6ceaSAnton Blanchard 
3445fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table);
3545fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpumask_lookup_table);
3645fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data);
3745fb6ceaSAnton Blanchard 
3845fb6ceaSAnton Blanchard static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
39ab1f9dacSPaul Mackerras static int min_common_depth;
40237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells;
41ab1f9dacSPaul Mackerras 
42ab1f9dacSPaul Mackerras /*
4345fb6ceaSAnton Blanchard  * We need somewhere to store start/end/node for each region until we have
44ab1f9dacSPaul Mackerras  * allocated the real node_data structures.
45ab1f9dacSPaul Mackerras  */
4645fb6ceaSAnton Blanchard #define MAX_REGIONS	(MAX_LMB_REGIONS*2)
47ab1f9dacSPaul Mackerras static struct {
4845fb6ceaSAnton Blanchard 	unsigned long start_pfn;
4945fb6ceaSAnton Blanchard 	unsigned long end_pfn;
5045fb6ceaSAnton Blanchard 	int nid;
5145fb6ceaSAnton Blanchard } init_node_data[MAX_REGIONS] __initdata;
52ab1f9dacSPaul Mackerras 
5345fb6ceaSAnton Blanchard int __init early_pfn_to_nid(unsigned long pfn)
5445fb6ceaSAnton Blanchard {
5545fb6ceaSAnton Blanchard 	unsigned int i;
5645fb6ceaSAnton Blanchard 
5745fb6ceaSAnton Blanchard 	for (i = 0; init_node_data[i].end_pfn; i++) {
5845fb6ceaSAnton Blanchard 		unsigned long start_pfn = init_node_data[i].start_pfn;
5945fb6ceaSAnton Blanchard 		unsigned long end_pfn = init_node_data[i].end_pfn;
6045fb6ceaSAnton Blanchard 
6145fb6ceaSAnton Blanchard 		if ((start_pfn <= pfn) && (pfn < end_pfn))
6245fb6ceaSAnton Blanchard 			return init_node_data[i].nid;
6345fb6ceaSAnton Blanchard 	}
6445fb6ceaSAnton Blanchard 
6545fb6ceaSAnton Blanchard 	return -1;
6645fb6ceaSAnton Blanchard }
6745fb6ceaSAnton Blanchard 
6845fb6ceaSAnton Blanchard void __init add_region(unsigned int nid, unsigned long start_pfn,
6945fb6ceaSAnton Blanchard 		       unsigned long pages)
7045fb6ceaSAnton Blanchard {
7145fb6ceaSAnton Blanchard 	unsigned int i;
7245fb6ceaSAnton Blanchard 
7345fb6ceaSAnton Blanchard 	dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n",
7445fb6ceaSAnton Blanchard 		nid, start_pfn, pages);
7545fb6ceaSAnton Blanchard 
7645fb6ceaSAnton Blanchard 	for (i = 0; init_node_data[i].end_pfn; i++) {
7745fb6ceaSAnton Blanchard 		if (init_node_data[i].nid != nid)
7845fb6ceaSAnton Blanchard 			continue;
7945fb6ceaSAnton Blanchard 		if (init_node_data[i].end_pfn == start_pfn) {
8045fb6ceaSAnton Blanchard 			init_node_data[i].end_pfn += pages;
8145fb6ceaSAnton Blanchard 			return;
8245fb6ceaSAnton Blanchard 		}
8345fb6ceaSAnton Blanchard 		if (init_node_data[i].start_pfn == (start_pfn + pages)) {
8445fb6ceaSAnton Blanchard 			init_node_data[i].start_pfn -= pages;
8545fb6ceaSAnton Blanchard 			return;
8645fb6ceaSAnton Blanchard 		}
8745fb6ceaSAnton Blanchard 	}
8845fb6ceaSAnton Blanchard 
8945fb6ceaSAnton Blanchard 	/*
9045fb6ceaSAnton Blanchard 	 * Leave last entry NULL so we dont iterate off the end (we use
9145fb6ceaSAnton Blanchard 	 * entry.end_pfn to terminate the walk).
9245fb6ceaSAnton Blanchard 	 */
9345fb6ceaSAnton Blanchard 	if (i >= (MAX_REGIONS - 1)) {
9445fb6ceaSAnton Blanchard 		printk(KERN_ERR "WARNING: too many memory regions in "
9545fb6ceaSAnton Blanchard 				"numa code, truncating\n");
9645fb6ceaSAnton Blanchard 		return;
9745fb6ceaSAnton Blanchard 	}
9845fb6ceaSAnton Blanchard 
9945fb6ceaSAnton Blanchard 	init_node_data[i].start_pfn = start_pfn;
10045fb6ceaSAnton Blanchard 	init_node_data[i].end_pfn = start_pfn + pages;
10145fb6ceaSAnton Blanchard 	init_node_data[i].nid = nid;
10245fb6ceaSAnton Blanchard }
10345fb6ceaSAnton Blanchard 
10445fb6ceaSAnton Blanchard /* We assume init_node_data has no overlapping regions */
10545fb6ceaSAnton Blanchard void __init get_region(unsigned int nid, unsigned long *start_pfn,
10645fb6ceaSAnton Blanchard 		       unsigned long *end_pfn, unsigned long *pages_present)
10745fb6ceaSAnton Blanchard {
10845fb6ceaSAnton Blanchard 	unsigned int i;
10945fb6ceaSAnton Blanchard 
11045fb6ceaSAnton Blanchard 	*start_pfn = -1UL;
11145fb6ceaSAnton Blanchard 	*end_pfn = *pages_present = 0;
11245fb6ceaSAnton Blanchard 
11345fb6ceaSAnton Blanchard 	for (i = 0; init_node_data[i].end_pfn; i++) {
11445fb6ceaSAnton Blanchard 		if (init_node_data[i].nid != nid)
11545fb6ceaSAnton Blanchard 			continue;
11645fb6ceaSAnton Blanchard 
11745fb6ceaSAnton Blanchard 		*pages_present += init_node_data[i].end_pfn -
11845fb6ceaSAnton Blanchard 			init_node_data[i].start_pfn;
11945fb6ceaSAnton Blanchard 
12045fb6ceaSAnton Blanchard 		if (init_node_data[i].start_pfn < *start_pfn)
12145fb6ceaSAnton Blanchard 			*start_pfn = init_node_data[i].start_pfn;
12245fb6ceaSAnton Blanchard 
12345fb6ceaSAnton Blanchard 		if (init_node_data[i].end_pfn > *end_pfn)
12445fb6ceaSAnton Blanchard 			*end_pfn = init_node_data[i].end_pfn;
12545fb6ceaSAnton Blanchard 	}
12645fb6ceaSAnton Blanchard 
12745fb6ceaSAnton Blanchard 	/* We didnt find a matching region, return start/end as 0 */
12845fb6ceaSAnton Blanchard 	if (*start_pfn == -1UL)
1296d91bb93SMike Kravetz 		*start_pfn = 0;
13045fb6ceaSAnton Blanchard }
131ab1f9dacSPaul Mackerras 
132ab1f9dacSPaul Mackerras static inline void map_cpu_to_node(int cpu, int node)
133ab1f9dacSPaul Mackerras {
134ab1f9dacSPaul Mackerras 	numa_cpu_lookup_table[cpu] = node;
13545fb6ceaSAnton Blanchard 
13645fb6ceaSAnton Blanchard 	if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node])))
137ab1f9dacSPaul Mackerras 		cpu_set(cpu, numa_cpumask_lookup_table[node]);
138ab1f9dacSPaul Mackerras }
139ab1f9dacSPaul Mackerras 
140ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU
141ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu)
142ab1f9dacSPaul Mackerras {
143ab1f9dacSPaul Mackerras 	int node = numa_cpu_lookup_table[cpu];
144ab1f9dacSPaul Mackerras 
145ab1f9dacSPaul Mackerras 	dbg("removing cpu %lu from node %d\n", cpu, node);
146ab1f9dacSPaul Mackerras 
147ab1f9dacSPaul Mackerras 	if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) {
148ab1f9dacSPaul Mackerras 		cpu_clear(cpu, numa_cpumask_lookup_table[node]);
149ab1f9dacSPaul Mackerras 	} else {
150ab1f9dacSPaul Mackerras 		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
151ab1f9dacSPaul Mackerras 		       cpu, node);
152ab1f9dacSPaul Mackerras 	}
153ab1f9dacSPaul Mackerras }
154ab1f9dacSPaul Mackerras #endif /* CONFIG_HOTPLUG_CPU */
155ab1f9dacSPaul Mackerras 
15645fb6ceaSAnton Blanchard static struct device_node *find_cpu_node(unsigned int cpu)
157ab1f9dacSPaul Mackerras {
158ab1f9dacSPaul Mackerras 	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
159ab1f9dacSPaul Mackerras 	struct device_node *cpu_node = NULL;
160ab1f9dacSPaul Mackerras 	unsigned int *interrupt_server, *reg;
161ab1f9dacSPaul Mackerras 	int len;
162ab1f9dacSPaul Mackerras 
163ab1f9dacSPaul Mackerras 	while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {
164ab1f9dacSPaul Mackerras 		/* Try interrupt server first */
165ab1f9dacSPaul Mackerras 		interrupt_server = (unsigned int *)get_property(cpu_node,
166ab1f9dacSPaul Mackerras 					"ibm,ppc-interrupt-server#s", &len);
167ab1f9dacSPaul Mackerras 
168ab1f9dacSPaul Mackerras 		len = len / sizeof(u32);
169ab1f9dacSPaul Mackerras 
170ab1f9dacSPaul Mackerras 		if (interrupt_server && (len > 0)) {
171ab1f9dacSPaul Mackerras 			while (len--) {
172ab1f9dacSPaul Mackerras 				if (interrupt_server[len] == hw_cpuid)
173ab1f9dacSPaul Mackerras 					return cpu_node;
174ab1f9dacSPaul Mackerras 			}
175ab1f9dacSPaul Mackerras 		} else {
176ab1f9dacSPaul Mackerras 			reg = (unsigned int *)get_property(cpu_node,
177ab1f9dacSPaul Mackerras 							   "reg", &len);
178ab1f9dacSPaul Mackerras 			if (reg && (len > 0) && (reg[0] == hw_cpuid))
179ab1f9dacSPaul Mackerras 				return cpu_node;
180ab1f9dacSPaul Mackerras 		}
181ab1f9dacSPaul Mackerras 	}
182ab1f9dacSPaul Mackerras 
183ab1f9dacSPaul Mackerras 	return NULL;
184ab1f9dacSPaul Mackerras }
185ab1f9dacSPaul Mackerras 
186ab1f9dacSPaul Mackerras /* must hold reference to node during call */
187ab1f9dacSPaul Mackerras static int *of_get_associativity(struct device_node *dev)
188ab1f9dacSPaul Mackerras {
189ab1f9dacSPaul Mackerras 	return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
190ab1f9dacSPaul Mackerras }
191ab1f9dacSPaul Mackerras 
192ab1f9dacSPaul Mackerras static int of_node_numa_domain(struct device_node *device)
193ab1f9dacSPaul Mackerras {
194ab1f9dacSPaul Mackerras 	int numa_domain;
195ab1f9dacSPaul Mackerras 	unsigned int *tmp;
196ab1f9dacSPaul Mackerras 
197ab1f9dacSPaul Mackerras 	if (min_common_depth == -1)
198ab1f9dacSPaul Mackerras 		return 0;
199ab1f9dacSPaul Mackerras 
200ab1f9dacSPaul Mackerras 	tmp = of_get_associativity(device);
201ab1f9dacSPaul Mackerras 	if (tmp && (tmp[0] >= min_common_depth)) {
202ab1f9dacSPaul Mackerras 		numa_domain = tmp[min_common_depth];
203ab1f9dacSPaul Mackerras 	} else {
204ab1f9dacSPaul Mackerras 		dbg("WARNING: no NUMA information for %s\n",
205ab1f9dacSPaul Mackerras 		    device->full_name);
206ab1f9dacSPaul Mackerras 		numa_domain = 0;
207ab1f9dacSPaul Mackerras 	}
208ab1f9dacSPaul Mackerras 	return numa_domain;
209ab1f9dacSPaul Mackerras }
210ab1f9dacSPaul Mackerras 
211ab1f9dacSPaul Mackerras /*
212ab1f9dacSPaul Mackerras  * In theory, the "ibm,associativity" property may contain multiple
213ab1f9dacSPaul Mackerras  * associativity lists because a resource may be multiply connected
214ab1f9dacSPaul Mackerras  * into the machine.  This resource then has different associativity
215ab1f9dacSPaul Mackerras  * characteristics relative to its multiple connections.  We ignore
216ab1f9dacSPaul Mackerras  * this for now.  We also assume that all cpu and memory sets have
217ab1f9dacSPaul Mackerras  * their distances represented at a common level.  This won't be
218ab1f9dacSPaul Mackerras  * true for heirarchical NUMA.
219ab1f9dacSPaul Mackerras  *
220ab1f9dacSPaul Mackerras  * In any case the ibm,associativity-reference-points should give
221ab1f9dacSPaul Mackerras  * the correct depth for a normal NUMA system.
222ab1f9dacSPaul Mackerras  *
223ab1f9dacSPaul Mackerras  * - Dave Hansen <haveblue@us.ibm.com>
224ab1f9dacSPaul Mackerras  */
225ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void)
226ab1f9dacSPaul Mackerras {
227ab1f9dacSPaul Mackerras 	int depth;
228ab1f9dacSPaul Mackerras 	unsigned int *ref_points;
229ab1f9dacSPaul Mackerras 	struct device_node *rtas_root;
230ab1f9dacSPaul Mackerras 	unsigned int len;
231ab1f9dacSPaul Mackerras 
232ab1f9dacSPaul Mackerras 	rtas_root = of_find_node_by_path("/rtas");
233ab1f9dacSPaul Mackerras 
234ab1f9dacSPaul Mackerras 	if (!rtas_root)
235ab1f9dacSPaul Mackerras 		return -1;
236ab1f9dacSPaul Mackerras 
237ab1f9dacSPaul Mackerras 	/*
238ab1f9dacSPaul Mackerras 	 * this property is 2 32-bit integers, each representing a level of
239ab1f9dacSPaul Mackerras 	 * depth in the associativity nodes.  The first is for an SMP
240ab1f9dacSPaul Mackerras 	 * configuration (should be all 0's) and the second is for a normal
241ab1f9dacSPaul Mackerras 	 * NUMA configuration.
242ab1f9dacSPaul Mackerras 	 */
243ab1f9dacSPaul Mackerras 	ref_points = (unsigned int *)get_property(rtas_root,
244ab1f9dacSPaul Mackerras 			"ibm,associativity-reference-points", &len);
245ab1f9dacSPaul Mackerras 
246ab1f9dacSPaul Mackerras 	if ((len >= 1) && ref_points) {
247ab1f9dacSPaul Mackerras 		depth = ref_points[1];
248ab1f9dacSPaul Mackerras 	} else {
249ab1f9dacSPaul Mackerras 		dbg("WARNING: could not find NUMA "
250ab1f9dacSPaul Mackerras 		    "associativity reference point\n");
251ab1f9dacSPaul Mackerras 		depth = -1;
252ab1f9dacSPaul Mackerras 	}
253ab1f9dacSPaul Mackerras 	of_node_put(rtas_root);
254ab1f9dacSPaul Mackerras 
255ab1f9dacSPaul Mackerras 	return depth;
256ab1f9dacSPaul Mackerras }
257ab1f9dacSPaul Mackerras 
25884c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
259ab1f9dacSPaul Mackerras {
260ab1f9dacSPaul Mackerras 	struct device_node *memory = NULL;
261ab1f9dacSPaul Mackerras 
262ab1f9dacSPaul Mackerras 	memory = of_find_node_by_type(memory, "memory");
26354c23310SPaul Mackerras 	if (!memory)
26484c9fdd1SMike Kravetz 		panic("numa.c: No memory nodes found!");
26554c23310SPaul Mackerras 
26684c9fdd1SMike Kravetz 	*n_addr_cells = prom_n_addr_cells(memory);
26784c9fdd1SMike Kravetz 	*n_size_cells = prom_n_size_cells(memory);
26884c9fdd1SMike Kravetz 	of_node_put(memory);
269ab1f9dacSPaul Mackerras }
270ab1f9dacSPaul Mackerras 
271237a0989SMike Kravetz static unsigned long __devinit read_n_cells(int n, unsigned int **buf)
272ab1f9dacSPaul Mackerras {
273ab1f9dacSPaul Mackerras 	unsigned long result = 0;
274ab1f9dacSPaul Mackerras 
275ab1f9dacSPaul Mackerras 	while (n--) {
276ab1f9dacSPaul Mackerras 		result = (result << 32) | **buf;
277ab1f9dacSPaul Mackerras 		(*buf)++;
278ab1f9dacSPaul Mackerras 	}
279ab1f9dacSPaul Mackerras 	return result;
280ab1f9dacSPaul Mackerras }
281ab1f9dacSPaul Mackerras 
282ab1f9dacSPaul Mackerras /*
283ab1f9dacSPaul Mackerras  * Figure out to which domain a cpu belongs and stick it there.
284ab1f9dacSPaul Mackerras  * Return the id of the domain used.
285ab1f9dacSPaul Mackerras  */
286ab1f9dacSPaul Mackerras static int numa_setup_cpu(unsigned long lcpu)
287ab1f9dacSPaul Mackerras {
288ab1f9dacSPaul Mackerras 	int numa_domain = 0;
289ab1f9dacSPaul Mackerras 	struct device_node *cpu = find_cpu_node(lcpu);
290ab1f9dacSPaul Mackerras 
291ab1f9dacSPaul Mackerras 	if (!cpu) {
292ab1f9dacSPaul Mackerras 		WARN_ON(1);
293ab1f9dacSPaul Mackerras 		goto out;
294ab1f9dacSPaul Mackerras 	}
295ab1f9dacSPaul Mackerras 
296ab1f9dacSPaul Mackerras 	numa_domain = of_node_numa_domain(cpu);
297ab1f9dacSPaul Mackerras 
298ab1f9dacSPaul Mackerras 	if (numa_domain >= num_online_nodes()) {
299ab1f9dacSPaul Mackerras 		/*
300ab1f9dacSPaul Mackerras 		 * POWER4 LPAR uses 0xffff as invalid node,
301ab1f9dacSPaul Mackerras 		 * dont warn in this case.
302ab1f9dacSPaul Mackerras 		 */
303ab1f9dacSPaul Mackerras 		if (numa_domain != 0xffff)
304ab1f9dacSPaul Mackerras 			printk(KERN_ERR "WARNING: cpu %ld "
305ab1f9dacSPaul Mackerras 			       "maps to invalid NUMA node %d\n",
306ab1f9dacSPaul Mackerras 			       lcpu, numa_domain);
307ab1f9dacSPaul Mackerras 		numa_domain = 0;
308ab1f9dacSPaul Mackerras 	}
309ab1f9dacSPaul Mackerras out:
310ab1f9dacSPaul Mackerras 	node_set_online(numa_domain);
311ab1f9dacSPaul Mackerras 
312ab1f9dacSPaul Mackerras 	map_cpu_to_node(lcpu, numa_domain);
313ab1f9dacSPaul Mackerras 
314ab1f9dacSPaul Mackerras 	of_node_put(cpu);
315ab1f9dacSPaul Mackerras 
316ab1f9dacSPaul Mackerras 	return numa_domain;
317ab1f9dacSPaul Mackerras }
318ab1f9dacSPaul Mackerras 
319ab1f9dacSPaul Mackerras static int cpu_numa_callback(struct notifier_block *nfb,
320ab1f9dacSPaul Mackerras 			     unsigned long action,
321ab1f9dacSPaul Mackerras 			     void *hcpu)
322ab1f9dacSPaul Mackerras {
323ab1f9dacSPaul Mackerras 	unsigned long lcpu = (unsigned long)hcpu;
324ab1f9dacSPaul Mackerras 	int ret = NOTIFY_DONE;
325ab1f9dacSPaul Mackerras 
326ab1f9dacSPaul Mackerras 	switch (action) {
327ab1f9dacSPaul Mackerras 	case CPU_UP_PREPARE:
328ab1f9dacSPaul Mackerras 		if (min_common_depth == -1 || !numa_enabled)
329ab1f9dacSPaul Mackerras 			map_cpu_to_node(lcpu, 0);
330ab1f9dacSPaul Mackerras 		else
331ab1f9dacSPaul Mackerras 			numa_setup_cpu(lcpu);
332ab1f9dacSPaul Mackerras 		ret = NOTIFY_OK;
333ab1f9dacSPaul Mackerras 		break;
334ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU
335ab1f9dacSPaul Mackerras 	case CPU_DEAD:
336ab1f9dacSPaul Mackerras 	case CPU_UP_CANCELED:
337ab1f9dacSPaul Mackerras 		unmap_cpu_from_node(lcpu);
338ab1f9dacSPaul Mackerras 		break;
339ab1f9dacSPaul Mackerras 		ret = NOTIFY_OK;
340ab1f9dacSPaul Mackerras #endif
341ab1f9dacSPaul Mackerras 	}
342ab1f9dacSPaul Mackerras 	return ret;
343ab1f9dacSPaul Mackerras }
344ab1f9dacSPaul Mackerras 
345ab1f9dacSPaul Mackerras /*
346ab1f9dacSPaul Mackerras  * Check and possibly modify a memory region to enforce the memory limit.
347ab1f9dacSPaul Mackerras  *
348ab1f9dacSPaul Mackerras  * Returns the size the region should have to enforce the memory limit.
349ab1f9dacSPaul Mackerras  * This will either be the original value of size, a truncated value,
350ab1f9dacSPaul Mackerras  * or zero. If the returned value of size is 0 the region should be
351ab1f9dacSPaul Mackerras  * discarded as it lies wholy above the memory limit.
352ab1f9dacSPaul Mackerras  */
35345fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start,
35445fb6ceaSAnton Blanchard 						      unsigned long size)
355ab1f9dacSPaul Mackerras {
356ab1f9dacSPaul Mackerras 	/*
357ab1f9dacSPaul Mackerras 	 * We use lmb_end_of_DRAM() in here instead of memory_limit because
358ab1f9dacSPaul Mackerras 	 * we've already adjusted it for the limit and it takes care of
359ab1f9dacSPaul Mackerras 	 * having memory holes below the limit.
360ab1f9dacSPaul Mackerras 	 */
361ab1f9dacSPaul Mackerras 
362ab1f9dacSPaul Mackerras 	if (! memory_limit)
363ab1f9dacSPaul Mackerras 		return size;
364ab1f9dacSPaul Mackerras 
365ab1f9dacSPaul Mackerras 	if (start + size <= lmb_end_of_DRAM())
366ab1f9dacSPaul Mackerras 		return size;
367ab1f9dacSPaul Mackerras 
368ab1f9dacSPaul Mackerras 	if (start >= lmb_end_of_DRAM())
369ab1f9dacSPaul Mackerras 		return 0;
370ab1f9dacSPaul Mackerras 
371ab1f9dacSPaul Mackerras 	return lmb_end_of_DRAM() - start;
372ab1f9dacSPaul Mackerras }
373ab1f9dacSPaul Mackerras 
374ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void)
375ab1f9dacSPaul Mackerras {
376ab1f9dacSPaul Mackerras 	struct device_node *cpu = NULL;
377ab1f9dacSPaul Mackerras 	struct device_node *memory = NULL;
37845fb6ceaSAnton Blanchard 	int max_domain;
379ab1f9dacSPaul Mackerras 	unsigned long i;
380ab1f9dacSPaul Mackerras 
381ab1f9dacSPaul Mackerras 	if (numa_enabled == 0) {
382ab1f9dacSPaul Mackerras 		printk(KERN_WARNING "NUMA disabled by user\n");
383ab1f9dacSPaul Mackerras 		return -1;
384ab1f9dacSPaul Mackerras 	}
385ab1f9dacSPaul Mackerras 
386ab1f9dacSPaul Mackerras 	min_common_depth = find_min_common_depth();
387ab1f9dacSPaul Mackerras 
388ab1f9dacSPaul Mackerras 	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
389ab1f9dacSPaul Mackerras 	if (min_common_depth < 0)
390ab1f9dacSPaul Mackerras 		return min_common_depth;
391ab1f9dacSPaul Mackerras 
392ab1f9dacSPaul Mackerras 	max_domain = numa_setup_cpu(boot_cpuid);
393ab1f9dacSPaul Mackerras 
394ab1f9dacSPaul Mackerras 	/*
395ab1f9dacSPaul Mackerras 	 * Even though we connect cpus to numa domains later in SMP init,
396ab1f9dacSPaul Mackerras 	 * we need to know the maximum node id now. This is because each
397ab1f9dacSPaul Mackerras 	 * node id must have NODE_DATA etc backing it.
398ab1f9dacSPaul Mackerras 	 * As a result of hotplug we could still have cpus appear later on
399ab1f9dacSPaul Mackerras 	 * with larger node ids. In that case we force the cpu into node 0.
400ab1f9dacSPaul Mackerras 	 */
401ab1f9dacSPaul Mackerras 	for_each_cpu(i) {
402ab1f9dacSPaul Mackerras 		int numa_domain;
403ab1f9dacSPaul Mackerras 
404ab1f9dacSPaul Mackerras 		cpu = find_cpu_node(i);
405ab1f9dacSPaul Mackerras 
406ab1f9dacSPaul Mackerras 		if (cpu) {
407ab1f9dacSPaul Mackerras 			numa_domain = of_node_numa_domain(cpu);
408ab1f9dacSPaul Mackerras 			of_node_put(cpu);
409ab1f9dacSPaul Mackerras 
410ab1f9dacSPaul Mackerras 			if (numa_domain < MAX_NUMNODES &&
411ab1f9dacSPaul Mackerras 			    max_domain < numa_domain)
412ab1f9dacSPaul Mackerras 				max_domain = numa_domain;
413ab1f9dacSPaul Mackerras 		}
414ab1f9dacSPaul Mackerras 	}
415ab1f9dacSPaul Mackerras 
416237a0989SMike Kravetz 	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
417ab1f9dacSPaul Mackerras 	memory = NULL;
418ab1f9dacSPaul Mackerras 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
419ab1f9dacSPaul Mackerras 		unsigned long start;
420ab1f9dacSPaul Mackerras 		unsigned long size;
421ab1f9dacSPaul Mackerras 		int numa_domain;
422ab1f9dacSPaul Mackerras 		int ranges;
423ab1f9dacSPaul Mackerras 		unsigned int *memcell_buf;
424ab1f9dacSPaul Mackerras 		unsigned int len;
425ab1f9dacSPaul Mackerras 
426*ba759485SMichael Ellerman 		memcell_buf = (unsigned int *)get_property(memory,
427*ba759485SMichael Ellerman 			"linux,usable-memory", &len);
428*ba759485SMichael Ellerman 		if (!memcell_buf || len <= 0)
429*ba759485SMichael Ellerman 			memcell_buf =
430*ba759485SMichael Ellerman 				(unsigned int *)get_property(memory, "reg",
431*ba759485SMichael Ellerman 					&len);
432ab1f9dacSPaul Mackerras 		if (!memcell_buf || len <= 0)
433ab1f9dacSPaul Mackerras 			continue;
434ab1f9dacSPaul Mackerras 
435ab1f9dacSPaul Mackerras 		ranges = memory->n_addrs;
436ab1f9dacSPaul Mackerras new_range:
437ab1f9dacSPaul Mackerras 		/* these are order-sensitive, and modify the buffer pointer */
438237a0989SMike Kravetz 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
439237a0989SMike Kravetz 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
440ab1f9dacSPaul Mackerras 
441ab1f9dacSPaul Mackerras 		numa_domain = of_node_numa_domain(memory);
442ab1f9dacSPaul Mackerras 
443ab1f9dacSPaul Mackerras 		if (numa_domain >= MAX_NUMNODES) {
444ab1f9dacSPaul Mackerras 			if (numa_domain != 0xffff)
445ab1f9dacSPaul Mackerras 				printk(KERN_ERR "WARNING: memory at %lx maps "
446ab1f9dacSPaul Mackerras 				       "to invalid NUMA node %d\n", start,
447ab1f9dacSPaul Mackerras 				       numa_domain);
448ab1f9dacSPaul Mackerras 			numa_domain = 0;
449ab1f9dacSPaul Mackerras 		}
450ab1f9dacSPaul Mackerras 
451ab1f9dacSPaul Mackerras 		if (max_domain < numa_domain)
452ab1f9dacSPaul Mackerras 			max_domain = numa_domain;
453ab1f9dacSPaul Mackerras 
454ab1f9dacSPaul Mackerras 		if (!(size = numa_enforce_memory_limit(start, size))) {
455ab1f9dacSPaul Mackerras 			if (--ranges)
456ab1f9dacSPaul Mackerras 				goto new_range;
457ab1f9dacSPaul Mackerras 			else
458ab1f9dacSPaul Mackerras 				continue;
459ab1f9dacSPaul Mackerras 		}
460ab1f9dacSPaul Mackerras 
46145fb6ceaSAnton Blanchard 		add_region(numa_domain, start >> PAGE_SHIFT,
46245fb6ceaSAnton Blanchard 			   size >> PAGE_SHIFT);
463ab1f9dacSPaul Mackerras 
464ab1f9dacSPaul Mackerras 		if (--ranges)
465ab1f9dacSPaul Mackerras 			goto new_range;
466ab1f9dacSPaul Mackerras 	}
467ab1f9dacSPaul Mackerras 
468ab1f9dacSPaul Mackerras 	for (i = 0; i <= max_domain; i++)
469ab1f9dacSPaul Mackerras 		node_set_online(i);
470ab1f9dacSPaul Mackerras 
471ab1f9dacSPaul Mackerras 	return 0;
472ab1f9dacSPaul Mackerras }
473ab1f9dacSPaul Mackerras 
474ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void)
475ab1f9dacSPaul Mackerras {
476ab1f9dacSPaul Mackerras 	unsigned long top_of_ram = lmb_end_of_DRAM();
477ab1f9dacSPaul Mackerras 	unsigned long total_ram = lmb_phys_mem_size();
478fb6d73d3SPaul Mackerras 	unsigned int i;
479ab1f9dacSPaul Mackerras 
480ab1f9dacSPaul Mackerras 	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
481ab1f9dacSPaul Mackerras 	       top_of_ram, total_ram);
482ab1f9dacSPaul Mackerras 	printk(KERN_INFO "Memory hole size: %ldMB\n",
483ab1f9dacSPaul Mackerras 	       (top_of_ram - total_ram) >> 20);
484ab1f9dacSPaul Mackerras 
485ab1f9dacSPaul Mackerras 	map_cpu_to_node(boot_cpuid, 0);
486fb6d73d3SPaul Mackerras 	for (i = 0; i < lmb.memory.cnt; ++i)
487fb6d73d3SPaul Mackerras 		add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT,
488fb6d73d3SPaul Mackerras 			   lmb_size_pages(&lmb.memory, i));
489ab1f9dacSPaul Mackerras 	node_set_online(0);
490ab1f9dacSPaul Mackerras }
491ab1f9dacSPaul Mackerras 
492ab1f9dacSPaul Mackerras static void __init dump_numa_topology(void)
493ab1f9dacSPaul Mackerras {
494ab1f9dacSPaul Mackerras 	unsigned int node;
495ab1f9dacSPaul Mackerras 	unsigned int count;
496ab1f9dacSPaul Mackerras 
497ab1f9dacSPaul Mackerras 	if (min_common_depth == -1 || !numa_enabled)
498ab1f9dacSPaul Mackerras 		return;
499ab1f9dacSPaul Mackerras 
500ab1f9dacSPaul Mackerras 	for_each_online_node(node) {
501ab1f9dacSPaul Mackerras 		unsigned long i;
502ab1f9dacSPaul Mackerras 
503ab1f9dacSPaul Mackerras 		printk(KERN_INFO "Node %d Memory:", node);
504ab1f9dacSPaul Mackerras 
505ab1f9dacSPaul Mackerras 		count = 0;
506ab1f9dacSPaul Mackerras 
50745fb6ceaSAnton Blanchard 		for (i = 0; i < lmb_end_of_DRAM();
50845fb6ceaSAnton Blanchard 		     i += (1 << SECTION_SIZE_BITS)) {
50945fb6ceaSAnton Blanchard 			if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
510ab1f9dacSPaul Mackerras 				if (count == 0)
511ab1f9dacSPaul Mackerras 					printk(" 0x%lx", i);
512ab1f9dacSPaul Mackerras 				++count;
513ab1f9dacSPaul Mackerras 			} else {
514ab1f9dacSPaul Mackerras 				if (count > 0)
515ab1f9dacSPaul Mackerras 					printk("-0x%lx", i);
516ab1f9dacSPaul Mackerras 				count = 0;
517ab1f9dacSPaul Mackerras 			}
518ab1f9dacSPaul Mackerras 		}
519ab1f9dacSPaul Mackerras 
520ab1f9dacSPaul Mackerras 		if (count > 0)
521ab1f9dacSPaul Mackerras 			printk("-0x%lx", i);
522ab1f9dacSPaul Mackerras 		printk("\n");
523ab1f9dacSPaul Mackerras 	}
524ab1f9dacSPaul Mackerras 	return;
525ab1f9dacSPaul Mackerras }
526ab1f9dacSPaul Mackerras 
527ab1f9dacSPaul Mackerras /*
528ab1f9dacSPaul Mackerras  * Allocate some memory, satisfying the lmb or bootmem allocator where
529ab1f9dacSPaul Mackerras  * required. nid is the preferred node and end is the physical address of
530ab1f9dacSPaul Mackerras  * the highest address in the node.
531ab1f9dacSPaul Mackerras  *
532ab1f9dacSPaul Mackerras  * Returns the physical address of the memory.
533ab1f9dacSPaul Mackerras  */
53445fb6ceaSAnton Blanchard static void __init *careful_allocation(int nid, unsigned long size,
53545fb6ceaSAnton Blanchard 				       unsigned long align,
53645fb6ceaSAnton Blanchard 				       unsigned long end_pfn)
537ab1f9dacSPaul Mackerras {
53845fb6ceaSAnton Blanchard 	int new_nid;
53945fb6ceaSAnton Blanchard 	unsigned long ret = lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
540ab1f9dacSPaul Mackerras 
541ab1f9dacSPaul Mackerras 	/* retry over all memory */
542ab1f9dacSPaul Mackerras 	if (!ret)
543ab1f9dacSPaul Mackerras 		ret = lmb_alloc_base(size, align, lmb_end_of_DRAM());
544ab1f9dacSPaul Mackerras 
545ab1f9dacSPaul Mackerras 	if (!ret)
546ab1f9dacSPaul Mackerras 		panic("numa.c: cannot allocate %lu bytes on node %d",
547ab1f9dacSPaul Mackerras 		      size, nid);
548ab1f9dacSPaul Mackerras 
549ab1f9dacSPaul Mackerras 	/*
550ab1f9dacSPaul Mackerras 	 * If the memory came from a previously allocated node, we must
551ab1f9dacSPaul Mackerras 	 * retry with the bootmem allocator.
552ab1f9dacSPaul Mackerras 	 */
55345fb6ceaSAnton Blanchard 	new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT);
55445fb6ceaSAnton Blanchard 	if (new_nid < nid) {
55545fb6ceaSAnton Blanchard 		ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid),
556ab1f9dacSPaul Mackerras 				size, align, 0);
557ab1f9dacSPaul Mackerras 
558ab1f9dacSPaul Mackerras 		if (!ret)
559ab1f9dacSPaul Mackerras 			panic("numa.c: cannot allocate %lu bytes on node %d",
56045fb6ceaSAnton Blanchard 			      size, new_nid);
561ab1f9dacSPaul Mackerras 
56245fb6ceaSAnton Blanchard 		ret = __pa(ret);
563ab1f9dacSPaul Mackerras 
564ab1f9dacSPaul Mackerras 		dbg("alloc_bootmem %lx %lx\n", ret, size);
565ab1f9dacSPaul Mackerras 	}
566ab1f9dacSPaul Mackerras 
56745fb6ceaSAnton Blanchard 	return (void *)ret;
568ab1f9dacSPaul Mackerras }
569ab1f9dacSPaul Mackerras 
570ab1f9dacSPaul Mackerras void __init do_init_bootmem(void)
571ab1f9dacSPaul Mackerras {
572ab1f9dacSPaul Mackerras 	int nid;
57345fb6ceaSAnton Blanchard 	unsigned int i;
574ab1f9dacSPaul Mackerras 	static struct notifier_block ppc64_numa_nb = {
575ab1f9dacSPaul Mackerras 		.notifier_call = cpu_numa_callback,
576ab1f9dacSPaul Mackerras 		.priority = 1 /* Must run before sched domains notifier. */
577ab1f9dacSPaul Mackerras 	};
578ab1f9dacSPaul Mackerras 
579ab1f9dacSPaul Mackerras 	min_low_pfn = 0;
580ab1f9dacSPaul Mackerras 	max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
581ab1f9dacSPaul Mackerras 	max_pfn = max_low_pfn;
582ab1f9dacSPaul Mackerras 
583ab1f9dacSPaul Mackerras 	if (parse_numa_properties())
584ab1f9dacSPaul Mackerras 		setup_nonnuma();
585ab1f9dacSPaul Mackerras 	else
586ab1f9dacSPaul Mackerras 		dump_numa_topology();
587ab1f9dacSPaul Mackerras 
588ab1f9dacSPaul Mackerras 	register_cpu_notifier(&ppc64_numa_nb);
589ab1f9dacSPaul Mackerras 
590ab1f9dacSPaul Mackerras 	for_each_online_node(nid) {
59145fb6ceaSAnton Blanchard 		unsigned long start_pfn, end_pfn, pages_present;
592ab1f9dacSPaul Mackerras 		unsigned long bootmem_paddr;
593ab1f9dacSPaul Mackerras 		unsigned long bootmap_pages;
594ab1f9dacSPaul Mackerras 
59545fb6ceaSAnton Blanchard 		get_region(nid, &start_pfn, &end_pfn, &pages_present);
596ab1f9dacSPaul Mackerras 
597ab1f9dacSPaul Mackerras 		/* Allocate the node structure node local if possible */
59845fb6ceaSAnton Blanchard 		NODE_DATA(nid) = careful_allocation(nid,
599ab1f9dacSPaul Mackerras 					sizeof(struct pglist_data),
60045fb6ceaSAnton Blanchard 					SMP_CACHE_BYTES, end_pfn);
60145fb6ceaSAnton Blanchard 		NODE_DATA(nid) = __va(NODE_DATA(nid));
602ab1f9dacSPaul Mackerras 		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
603ab1f9dacSPaul Mackerras 
604ab1f9dacSPaul Mackerras   		dbg("node %d\n", nid);
605ab1f9dacSPaul Mackerras 		dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
606ab1f9dacSPaul Mackerras 
607ab1f9dacSPaul Mackerras 		NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
60845fb6ceaSAnton Blanchard 		NODE_DATA(nid)->node_start_pfn = start_pfn;
60945fb6ceaSAnton Blanchard 		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
610ab1f9dacSPaul Mackerras 
611ab1f9dacSPaul Mackerras 		if (NODE_DATA(nid)->node_spanned_pages == 0)
612ab1f9dacSPaul Mackerras   			continue;
613ab1f9dacSPaul Mackerras 
61445fb6ceaSAnton Blanchard   		dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
61545fb6ceaSAnton Blanchard   		dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
616ab1f9dacSPaul Mackerras 
61745fb6ceaSAnton Blanchard 		bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
61845fb6ceaSAnton Blanchard 		bootmem_paddr = (unsigned long)careful_allocation(nid,
619ab1f9dacSPaul Mackerras 					bootmap_pages << PAGE_SHIFT,
62045fb6ceaSAnton Blanchard 					PAGE_SIZE, end_pfn);
62145fb6ceaSAnton Blanchard 		memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT);
62245fb6ceaSAnton Blanchard 
623ab1f9dacSPaul Mackerras 		dbg("bootmap_paddr = %lx\n", bootmem_paddr);
624ab1f9dacSPaul Mackerras 
625ab1f9dacSPaul Mackerras 		init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
62645fb6ceaSAnton Blanchard 				  start_pfn, end_pfn);
627ab1f9dacSPaul Mackerras 
62845fb6ceaSAnton Blanchard 		/* Add free regions on this node */
62945fb6ceaSAnton Blanchard 		for (i = 0; init_node_data[i].end_pfn; i++) {
63045fb6ceaSAnton Blanchard 			unsigned long start, end;
631ab1f9dacSPaul Mackerras 
63245fb6ceaSAnton Blanchard 			if (init_node_data[i].nid != nid)
633ab1f9dacSPaul Mackerras 				continue;
634ab1f9dacSPaul Mackerras 
63545fb6ceaSAnton Blanchard 			start = init_node_data[i].start_pfn << PAGE_SHIFT;
63645fb6ceaSAnton Blanchard 			end = init_node_data[i].end_pfn << PAGE_SHIFT;
637ab1f9dacSPaul Mackerras 
63845fb6ceaSAnton Blanchard 			dbg("free_bootmem %lx %lx\n", start, end - start);
63945fb6ceaSAnton Blanchard   			free_bootmem_node(NODE_DATA(nid), start, end - start);
640ab1f9dacSPaul Mackerras 		}
641ab1f9dacSPaul Mackerras 
64245fb6ceaSAnton Blanchard 		/* Mark reserved regions on this node */
643ab1f9dacSPaul Mackerras 		for (i = 0; i < lmb.reserved.cnt; i++) {
644ab1f9dacSPaul Mackerras 			unsigned long physbase = lmb.reserved.region[i].base;
645ab1f9dacSPaul Mackerras 			unsigned long size = lmb.reserved.region[i].size;
64645fb6ceaSAnton Blanchard 			unsigned long start_paddr = start_pfn << PAGE_SHIFT;
64745fb6ceaSAnton Blanchard 			unsigned long end_paddr = end_pfn << PAGE_SHIFT;
648ab1f9dacSPaul Mackerras 
64945fb6ceaSAnton Blanchard 			if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid &&
65045fb6ceaSAnton Blanchard 			    early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid)
651ab1f9dacSPaul Mackerras 				continue;
652ab1f9dacSPaul Mackerras 
653ab1f9dacSPaul Mackerras 			if (physbase < end_paddr &&
654ab1f9dacSPaul Mackerras 			    (physbase+size) > start_paddr) {
655ab1f9dacSPaul Mackerras 				/* overlaps */
656ab1f9dacSPaul Mackerras 				if (physbase < start_paddr) {
657ab1f9dacSPaul Mackerras 					size -= start_paddr - physbase;
658ab1f9dacSPaul Mackerras 					physbase = start_paddr;
659ab1f9dacSPaul Mackerras 				}
660ab1f9dacSPaul Mackerras 
661ab1f9dacSPaul Mackerras 				if (size > end_paddr - physbase)
662ab1f9dacSPaul Mackerras 					size = end_paddr - physbase;
663ab1f9dacSPaul Mackerras 
664ab1f9dacSPaul Mackerras 				dbg("reserve_bootmem %lx %lx\n", physbase,
665ab1f9dacSPaul Mackerras 				    size);
666ab1f9dacSPaul Mackerras 				reserve_bootmem_node(NODE_DATA(nid), physbase,
667ab1f9dacSPaul Mackerras 						     size);
668ab1f9dacSPaul Mackerras 			}
669ab1f9dacSPaul Mackerras 		}
670ab1f9dacSPaul Mackerras 
67145fb6ceaSAnton Blanchard 		/* Add regions into sparsemem */
67245fb6ceaSAnton Blanchard 		for (i = 0; init_node_data[i].end_pfn; i++) {
67345fb6ceaSAnton Blanchard 			unsigned long start, end;
67445fb6ceaSAnton Blanchard 
67545fb6ceaSAnton Blanchard 			if (init_node_data[i].nid != nid)
676ab1f9dacSPaul Mackerras 				continue;
677ab1f9dacSPaul Mackerras 
67845fb6ceaSAnton Blanchard 			start = init_node_data[i].start_pfn;
67945fb6ceaSAnton Blanchard 			end = init_node_data[i].end_pfn;
680ab1f9dacSPaul Mackerras 
68145fb6ceaSAnton Blanchard 			memory_present(nid, start, end);
682ab1f9dacSPaul Mackerras 		}
683ab1f9dacSPaul Mackerras 	}
684ab1f9dacSPaul Mackerras }
685ab1f9dacSPaul Mackerras 
686ab1f9dacSPaul Mackerras void __init paging_init(void)
687ab1f9dacSPaul Mackerras {
688ab1f9dacSPaul Mackerras 	unsigned long zones_size[MAX_NR_ZONES];
689ab1f9dacSPaul Mackerras 	unsigned long zholes_size[MAX_NR_ZONES];
690ab1f9dacSPaul Mackerras 	int nid;
691ab1f9dacSPaul Mackerras 
692ab1f9dacSPaul Mackerras 	memset(zones_size, 0, sizeof(zones_size));
693ab1f9dacSPaul Mackerras 	memset(zholes_size, 0, sizeof(zholes_size));
694ab1f9dacSPaul Mackerras 
695ab1f9dacSPaul Mackerras 	for_each_online_node(nid) {
69645fb6ceaSAnton Blanchard 		unsigned long start_pfn, end_pfn, pages_present;
697ab1f9dacSPaul Mackerras 
69845fb6ceaSAnton Blanchard 		get_region(nid, &start_pfn, &end_pfn, &pages_present);
699ab1f9dacSPaul Mackerras 
700ab1f9dacSPaul Mackerras 		zones_size[ZONE_DMA] = end_pfn - start_pfn;
70145fb6ceaSAnton Blanchard 		zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present;
702ab1f9dacSPaul Mackerras 
703ab1f9dacSPaul Mackerras 		dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
704ab1f9dacSPaul Mackerras 		    zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
705ab1f9dacSPaul Mackerras 
70645fb6ceaSAnton Blanchard 		free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn,
70745fb6ceaSAnton Blanchard 				    zholes_size);
708ab1f9dacSPaul Mackerras 	}
709ab1f9dacSPaul Mackerras }
710ab1f9dacSPaul Mackerras 
711ab1f9dacSPaul Mackerras static int __init early_numa(char *p)
712ab1f9dacSPaul Mackerras {
713ab1f9dacSPaul Mackerras 	if (!p)
714ab1f9dacSPaul Mackerras 		return 0;
715ab1f9dacSPaul Mackerras 
716ab1f9dacSPaul Mackerras 	if (strstr(p, "off"))
717ab1f9dacSPaul Mackerras 		numa_enabled = 0;
718ab1f9dacSPaul Mackerras 
719ab1f9dacSPaul Mackerras 	if (strstr(p, "debug"))
720ab1f9dacSPaul Mackerras 		numa_debug = 1;
721ab1f9dacSPaul Mackerras 
722ab1f9dacSPaul Mackerras 	return 0;
723ab1f9dacSPaul Mackerras }
724ab1f9dacSPaul Mackerras early_param("numa", early_numa);
725237a0989SMike Kravetz 
726237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG
727237a0989SMike Kravetz /*
728237a0989SMike Kravetz  * Find the node associated with a hot added memory section.  Section
729237a0989SMike Kravetz  * corresponds to a SPARSEMEM section, not an LMB.  It is assumed that
730237a0989SMike Kravetz  * sections are fully contained within a single LMB.
731237a0989SMike Kravetz  */
732237a0989SMike Kravetz int hot_add_scn_to_nid(unsigned long scn_addr)
733237a0989SMike Kravetz {
734237a0989SMike Kravetz 	struct device_node *memory = NULL;
735237a0989SMike Kravetz 
736237a0989SMike Kravetz 	if (!numa_enabled || (min_common_depth < 0))
737237a0989SMike Kravetz 		return 0;
738237a0989SMike Kravetz 
739237a0989SMike Kravetz 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
740237a0989SMike Kravetz 		unsigned long start, size;
741237a0989SMike Kravetz 		int numa_domain, ranges;
742237a0989SMike Kravetz 		unsigned int *memcell_buf;
743237a0989SMike Kravetz 		unsigned int len;
744237a0989SMike Kravetz 
745237a0989SMike Kravetz 		memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
746237a0989SMike Kravetz 		if (!memcell_buf || len <= 0)
747237a0989SMike Kravetz 			continue;
748237a0989SMike Kravetz 
749237a0989SMike Kravetz 		ranges = memory->n_addrs;	/* ranges in cell */
750237a0989SMike Kravetz ha_new_range:
751237a0989SMike Kravetz 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
752237a0989SMike Kravetz 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
753237a0989SMike Kravetz 		numa_domain = of_node_numa_domain(memory);
754237a0989SMike Kravetz 
755237a0989SMike Kravetz 		/* Domains not present at boot default to 0 */
756237a0989SMike Kravetz 		if (!node_online(numa_domain))
757237a0989SMike Kravetz 			numa_domain = any_online_node(NODE_MASK_ALL);
758237a0989SMike Kravetz 
759237a0989SMike Kravetz 		if ((scn_addr >= start) && (scn_addr < (start + size))) {
760237a0989SMike Kravetz 			of_node_put(memory);
761237a0989SMike Kravetz 			return numa_domain;
762237a0989SMike Kravetz 		}
763237a0989SMike Kravetz 
764237a0989SMike Kravetz 		if (--ranges)		/* process all ranges in cell */
765237a0989SMike Kravetz 			goto ha_new_range;
766237a0989SMike Kravetz 	}
767237a0989SMike Kravetz 
768237a0989SMike Kravetz 	BUG();	/* section address should be found above */
769237a0989SMike Kravetz 	return 0;
770237a0989SMike Kravetz }
771237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */
772