xref: /linux/arch/powerpc/mm/numa.c (revision 3fdfd99051fbc210464378cd44a4b8914282bac3)
1ab1f9dacSPaul Mackerras /*
2ab1f9dacSPaul Mackerras  * pSeries NUMA support
3ab1f9dacSPaul Mackerras  *
4ab1f9dacSPaul Mackerras  * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
5ab1f9dacSPaul Mackerras  *
6ab1f9dacSPaul Mackerras  * This program is free software; you can redistribute it and/or
7ab1f9dacSPaul Mackerras  * modify it under the terms of the GNU General Public License
8ab1f9dacSPaul Mackerras  * as published by the Free Software Foundation; either version
9ab1f9dacSPaul Mackerras  * 2 of the License, or (at your option) any later version.
10ab1f9dacSPaul Mackerras  */
11ab1f9dacSPaul Mackerras #include <linux/threads.h>
12ab1f9dacSPaul Mackerras #include <linux/bootmem.h>
13ab1f9dacSPaul Mackerras #include <linux/init.h>
14ab1f9dacSPaul Mackerras #include <linux/mm.h>
15ab1f9dacSPaul Mackerras #include <linux/mmzone.h>
16ab1f9dacSPaul Mackerras #include <linux/module.h>
17ab1f9dacSPaul Mackerras #include <linux/nodemask.h>
18ab1f9dacSPaul Mackerras #include <linux/cpu.h>
19ab1f9dacSPaul Mackerras #include <linux/notifier.h>
2095f72d1eSYinghai Lu #include <linux/memblock.h>
216df1646eSMichael Ellerman #include <linux/of.h>
2206eccea6SDave Hansen #include <linux/pfn.h>
2345fb6ceaSAnton Blanchard #include <asm/sparsemem.h>
24d9b2b2a2SDavid S. Miller #include <asm/prom.h>
25cf00a8d1SPaul Mackerras #include <asm/system.h>
262249ca9dSPaul Mackerras #include <asm/smp.h>
27ab1f9dacSPaul Mackerras 
28ab1f9dacSPaul Mackerras static int numa_enabled = 1;
29ab1f9dacSPaul Mackerras 
301daa6d08SBalbir Singh static char *cmdline __initdata;
311daa6d08SBalbir Singh 
32ab1f9dacSPaul Mackerras static int numa_debug;
33ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
34ab1f9dacSPaul Mackerras 
3545fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS];
3625863de0SAnton Blanchard cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
37ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES];
3845fb6ceaSAnton Blanchard 
3945fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table);
4025863de0SAnton Blanchard EXPORT_SYMBOL(node_to_cpumask_map);
4145fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data);
4245fb6ceaSAnton Blanchard 
43ab1f9dacSPaul Mackerras static int min_common_depth;
44237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells;
45ab1f9dacSPaul Mackerras 
4625863de0SAnton Blanchard /*
4725863de0SAnton Blanchard  * Allocate node_to_cpumask_map based on number of available nodes
4825863de0SAnton Blanchard  * Requires node_possible_map to be valid.
4925863de0SAnton Blanchard  *
5025863de0SAnton Blanchard  * Note: node_to_cpumask() is not valid until after this is done.
5125863de0SAnton Blanchard  */
5225863de0SAnton Blanchard static void __init setup_node_to_cpumask_map(void)
5325863de0SAnton Blanchard {
5425863de0SAnton Blanchard 	unsigned int node, num = 0;
5525863de0SAnton Blanchard 
5625863de0SAnton Blanchard 	/* setup nr_node_ids if not done yet */
5725863de0SAnton Blanchard 	if (nr_node_ids == MAX_NUMNODES) {
5825863de0SAnton Blanchard 		for_each_node_mask(node, node_possible_map)
5925863de0SAnton Blanchard 			num = node;
6025863de0SAnton Blanchard 		nr_node_ids = num + 1;
6125863de0SAnton Blanchard 	}
6225863de0SAnton Blanchard 
6325863de0SAnton Blanchard 	/* allocate the map */
6425863de0SAnton Blanchard 	for (node = 0; node < nr_node_ids; node++)
6525863de0SAnton Blanchard 		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
6625863de0SAnton Blanchard 
6725863de0SAnton Blanchard 	/* cpumask_of_node() will now work */
6825863de0SAnton Blanchard 	dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
6925863de0SAnton Blanchard }
7025863de0SAnton Blanchard 
711daa6d08SBalbir Singh static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
721daa6d08SBalbir Singh 						unsigned int *nid)
731daa6d08SBalbir Singh {
741daa6d08SBalbir Singh 	unsigned long long mem;
751daa6d08SBalbir Singh 	char *p = cmdline;
761daa6d08SBalbir Singh 	static unsigned int fake_nid;
771daa6d08SBalbir Singh 	static unsigned long long curr_boundary;
781daa6d08SBalbir Singh 
791daa6d08SBalbir Singh 	/*
801daa6d08SBalbir Singh 	 * Modify node id, iff we started creating NUMA nodes
811daa6d08SBalbir Singh 	 * We want to continue from where we left of the last time
821daa6d08SBalbir Singh 	 */
831daa6d08SBalbir Singh 	if (fake_nid)
841daa6d08SBalbir Singh 		*nid = fake_nid;
851daa6d08SBalbir Singh 	/*
861daa6d08SBalbir Singh 	 * In case there are no more arguments to parse, the
871daa6d08SBalbir Singh 	 * node_id should be the same as the last fake node id
881daa6d08SBalbir Singh 	 * (we've handled this above).
891daa6d08SBalbir Singh 	 */
901daa6d08SBalbir Singh 	if (!p)
911daa6d08SBalbir Singh 		return 0;
921daa6d08SBalbir Singh 
931daa6d08SBalbir Singh 	mem = memparse(p, &p);
941daa6d08SBalbir Singh 	if (!mem)
951daa6d08SBalbir Singh 		return 0;
961daa6d08SBalbir Singh 
971daa6d08SBalbir Singh 	if (mem < curr_boundary)
981daa6d08SBalbir Singh 		return 0;
991daa6d08SBalbir Singh 
1001daa6d08SBalbir Singh 	curr_boundary = mem;
1011daa6d08SBalbir Singh 
1021daa6d08SBalbir Singh 	if ((end_pfn << PAGE_SHIFT) > mem) {
1031daa6d08SBalbir Singh 		/*
1041daa6d08SBalbir Singh 		 * Skip commas and spaces
1051daa6d08SBalbir Singh 		 */
1061daa6d08SBalbir Singh 		while (*p == ',' || *p == ' ' || *p == '\t')
1071daa6d08SBalbir Singh 			p++;
1081daa6d08SBalbir Singh 
1091daa6d08SBalbir Singh 		cmdline = p;
1101daa6d08SBalbir Singh 		fake_nid++;
1111daa6d08SBalbir Singh 		*nid = fake_nid;
1121daa6d08SBalbir Singh 		dbg("created new fake_node with id %d\n", fake_nid);
1131daa6d08SBalbir Singh 		return 1;
1141daa6d08SBalbir Singh 	}
1151daa6d08SBalbir Singh 	return 0;
1161daa6d08SBalbir Singh }
1171daa6d08SBalbir Singh 
1188f64e1f2SJon Tollefson /*
1198f64e1f2SJon Tollefson  * get_active_region_work_fn - A helper function for get_node_active_region
1208f64e1f2SJon Tollefson  *	Returns datax set to the start_pfn and end_pfn if they contain
1218f64e1f2SJon Tollefson  *	the initial value of datax->start_pfn between them
1228f64e1f2SJon Tollefson  * @start_pfn: start page(inclusive) of region to check
1238f64e1f2SJon Tollefson  * @end_pfn: end page(exclusive) of region to check
1248f64e1f2SJon Tollefson  * @datax: comes in with ->start_pfn set to value to search for and
1258f64e1f2SJon Tollefson  *	goes out with active range if it contains it
1268f64e1f2SJon Tollefson  * Returns 1 if search value is in range else 0
1278f64e1f2SJon Tollefson  */
1288f64e1f2SJon Tollefson static int __init get_active_region_work_fn(unsigned long start_pfn,
1298f64e1f2SJon Tollefson 					unsigned long end_pfn, void *datax)
1308f64e1f2SJon Tollefson {
1318f64e1f2SJon Tollefson 	struct node_active_region *data;
1328f64e1f2SJon Tollefson 	data = (struct node_active_region *)datax;
1338f64e1f2SJon Tollefson 
1348f64e1f2SJon Tollefson 	if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) {
1358f64e1f2SJon Tollefson 		data->start_pfn = start_pfn;
1368f64e1f2SJon Tollefson 		data->end_pfn = end_pfn;
1378f64e1f2SJon Tollefson 		return 1;
1388f64e1f2SJon Tollefson 	}
1398f64e1f2SJon Tollefson 	return 0;
1408f64e1f2SJon Tollefson 
1418f64e1f2SJon Tollefson }
1428f64e1f2SJon Tollefson 
1438f64e1f2SJon Tollefson /*
1448f64e1f2SJon Tollefson  * get_node_active_region - Return active region containing start_pfn
145e8170372SJon Tollefson  * Active range returned is empty if none found.
1468f64e1f2SJon Tollefson  * @start_pfn: The page to return the region for.
1478f64e1f2SJon Tollefson  * @node_ar: Returned set to the active region containing start_pfn
1488f64e1f2SJon Tollefson  */
1498f64e1f2SJon Tollefson static void __init get_node_active_region(unsigned long start_pfn,
1508f64e1f2SJon Tollefson 		       struct node_active_region *node_ar)
1518f64e1f2SJon Tollefson {
1528f64e1f2SJon Tollefson 	int nid = early_pfn_to_nid(start_pfn);
1538f64e1f2SJon Tollefson 
1548f64e1f2SJon Tollefson 	node_ar->nid = nid;
1558f64e1f2SJon Tollefson 	node_ar->start_pfn = start_pfn;
156e8170372SJon Tollefson 	node_ar->end_pfn = start_pfn;
1578f64e1f2SJon Tollefson 	work_with_active_regions(nid, get_active_region_work_fn, node_ar);
1588f64e1f2SJon Tollefson }
1598f64e1f2SJon Tollefson 
1602e5ce39dSNathan Lynch static void __cpuinit map_cpu_to_node(int cpu, int node)
161ab1f9dacSPaul Mackerras {
162ab1f9dacSPaul Mackerras 	numa_cpu_lookup_table[cpu] = node;
16345fb6ceaSAnton Blanchard 
164bf4b85b0SNathan Lynch 	dbg("adding cpu %d to node %d\n", cpu, node);
165bf4b85b0SNathan Lynch 
16625863de0SAnton Blanchard 	if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node])))
16725863de0SAnton Blanchard 		cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
168ab1f9dacSPaul Mackerras }
169ab1f9dacSPaul Mackerras 
170ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU
171ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu)
172ab1f9dacSPaul Mackerras {
173ab1f9dacSPaul Mackerras 	int node = numa_cpu_lookup_table[cpu];
174ab1f9dacSPaul Mackerras 
175ab1f9dacSPaul Mackerras 	dbg("removing cpu %lu from node %d\n", cpu, node);
176ab1f9dacSPaul Mackerras 
17725863de0SAnton Blanchard 	if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
17825863de0SAnton Blanchard 		cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
179ab1f9dacSPaul Mackerras 	} else {
180ab1f9dacSPaul Mackerras 		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
181ab1f9dacSPaul Mackerras 		       cpu, node);
182ab1f9dacSPaul Mackerras 	}
183ab1f9dacSPaul Mackerras }
184ab1f9dacSPaul Mackerras #endif /* CONFIG_HOTPLUG_CPU */
185ab1f9dacSPaul Mackerras 
186ab1f9dacSPaul Mackerras /* must hold reference to node during call */
187a7f67bdfSJeremy Kerr static const int *of_get_associativity(struct device_node *dev)
188ab1f9dacSPaul Mackerras {
189e2eb6392SStephen Rothwell 	return of_get_property(dev, "ibm,associativity", NULL);
190ab1f9dacSPaul Mackerras }
191ab1f9dacSPaul Mackerras 
192cf00085dSChandru /*
193cf00085dSChandru  * Returns the property linux,drconf-usable-memory if
194cf00085dSChandru  * it exists (the property exists only in kexec/kdump kernels,
195cf00085dSChandru  * added by kexec-tools)
196cf00085dSChandru  */
197cf00085dSChandru static const u32 *of_get_usable_memory(struct device_node *memory)
198cf00085dSChandru {
199cf00085dSChandru 	const u32 *prop;
200cf00085dSChandru 	u32 len;
201cf00085dSChandru 	prop = of_get_property(memory, "linux,drconf-usable-memory", &len);
202cf00085dSChandru 	if (!prop || len < sizeof(unsigned int))
203cf00085dSChandru 		return 0;
204cf00085dSChandru 	return prop;
205cf00085dSChandru }
206cf00085dSChandru 
207482ec7c4SNathan Lynch /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
208482ec7c4SNathan Lynch  * info is found.
209482ec7c4SNathan Lynch  */
210953039c8SJeremy Kerr static int of_node_to_nid_single(struct device_node *device)
211ab1f9dacSPaul Mackerras {
212482ec7c4SNathan Lynch 	int nid = -1;
213a7f67bdfSJeremy Kerr 	const unsigned int *tmp;
214ab1f9dacSPaul Mackerras 
215ab1f9dacSPaul Mackerras 	if (min_common_depth == -1)
216482ec7c4SNathan Lynch 		goto out;
217ab1f9dacSPaul Mackerras 
218ab1f9dacSPaul Mackerras 	tmp = of_get_associativity(device);
219482ec7c4SNathan Lynch 	if (!tmp)
220482ec7c4SNathan Lynch 		goto out;
221482ec7c4SNathan Lynch 
222482ec7c4SNathan Lynch 	if (tmp[0] >= min_common_depth)
223cf950b7aSNathan Lynch 		nid = tmp[min_common_depth];
224bc16a759SNathan Lynch 
225bc16a759SNathan Lynch 	/* POWER4 LPAR uses 0xffff as invalid node */
226482ec7c4SNathan Lynch 	if (nid == 0xffff || nid >= MAX_NUMNODES)
227482ec7c4SNathan Lynch 		nid = -1;
228482ec7c4SNathan Lynch out:
229cf950b7aSNathan Lynch 	return nid;
230ab1f9dacSPaul Mackerras }
231ab1f9dacSPaul Mackerras 
232953039c8SJeremy Kerr /* Walk the device tree upwards, looking for an associativity id */
233953039c8SJeremy Kerr int of_node_to_nid(struct device_node *device)
234953039c8SJeremy Kerr {
235953039c8SJeremy Kerr 	struct device_node *tmp;
236953039c8SJeremy Kerr 	int nid = -1;
237953039c8SJeremy Kerr 
238953039c8SJeremy Kerr 	of_node_get(device);
239953039c8SJeremy Kerr 	while (device) {
240953039c8SJeremy Kerr 		nid = of_node_to_nid_single(device);
241953039c8SJeremy Kerr 		if (nid != -1)
242953039c8SJeremy Kerr 			break;
243953039c8SJeremy Kerr 
244953039c8SJeremy Kerr 	        tmp = device;
245953039c8SJeremy Kerr 		device = of_get_parent(tmp);
246953039c8SJeremy Kerr 		of_node_put(tmp);
247953039c8SJeremy Kerr 	}
248953039c8SJeremy Kerr 	of_node_put(device);
249953039c8SJeremy Kerr 
250953039c8SJeremy Kerr 	return nid;
251953039c8SJeremy Kerr }
252953039c8SJeremy Kerr EXPORT_SYMBOL_GPL(of_node_to_nid);
253953039c8SJeremy Kerr 
254ab1f9dacSPaul Mackerras /*
255ab1f9dacSPaul Mackerras  * In theory, the "ibm,associativity" property may contain multiple
256ab1f9dacSPaul Mackerras  * associativity lists because a resource may be multiply connected
257ab1f9dacSPaul Mackerras  * into the machine.  This resource then has different associativity
258ab1f9dacSPaul Mackerras  * characteristics relative to its multiple connections.  We ignore
259ab1f9dacSPaul Mackerras  * this for now.  We also assume that all cpu and memory sets have
260ab1f9dacSPaul Mackerras  * their distances represented at a common level.  This won't be
2611b3c3714SUwe Kleine-König  * true for hierarchical NUMA.
262ab1f9dacSPaul Mackerras  *
263ab1f9dacSPaul Mackerras  * In any case the ibm,associativity-reference-points should give
264ab1f9dacSPaul Mackerras  * the correct depth for a normal NUMA system.
265ab1f9dacSPaul Mackerras  *
266ab1f9dacSPaul Mackerras  * - Dave Hansen <haveblue@us.ibm.com>
267ab1f9dacSPaul Mackerras  */
268ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void)
269ab1f9dacSPaul Mackerras {
2704b83c330SAnton Blanchard 	int depth, index;
271a7f67bdfSJeremy Kerr 	const unsigned int *ref_points;
272ab1f9dacSPaul Mackerras 	struct device_node *rtas_root;
273ab1f9dacSPaul Mackerras 	unsigned int len;
274bc8449ccSAnton Blanchard 	struct device_node *chosen;
275bc8449ccSAnton Blanchard 	const char *vec5;
276ab1f9dacSPaul Mackerras 
277ab1f9dacSPaul Mackerras 	rtas_root = of_find_node_by_path("/rtas");
278ab1f9dacSPaul Mackerras 
279ab1f9dacSPaul Mackerras 	if (!rtas_root)
280ab1f9dacSPaul Mackerras 		return -1;
281ab1f9dacSPaul Mackerras 
282ab1f9dacSPaul Mackerras 	/*
283ab1f9dacSPaul Mackerras 	 * this property is 2 32-bit integers, each representing a level of
284ab1f9dacSPaul Mackerras 	 * depth in the associativity nodes.  The first is for an SMP
285ab1f9dacSPaul Mackerras 	 * configuration (should be all 0's) and the second is for a normal
286ab1f9dacSPaul Mackerras 	 * NUMA configuration.
287ab1f9dacSPaul Mackerras 	 */
2884b83c330SAnton Blanchard 	index = 1;
289e2eb6392SStephen Rothwell 	ref_points = of_get_property(rtas_root,
290ab1f9dacSPaul Mackerras 			"ibm,associativity-reference-points", &len);
291ab1f9dacSPaul Mackerras 
2924b83c330SAnton Blanchard 	/*
293bc8449ccSAnton Blanchard 	 * For form 1 affinity information we want the first field
2944b83c330SAnton Blanchard 	 */
295bc8449ccSAnton Blanchard #define VEC5_AFFINITY_BYTE	5
296bc8449ccSAnton Blanchard #define VEC5_AFFINITY		0x80
297bc8449ccSAnton Blanchard 	chosen = of_find_node_by_path("/chosen");
298bc8449ccSAnton Blanchard 	if (chosen) {
299bc8449ccSAnton Blanchard 		vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL);
300bc8449ccSAnton Blanchard 		if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) {
301bc8449ccSAnton Blanchard 			dbg("Using form 1 affinity\n");
3024b83c330SAnton Blanchard 			index = 0;
3034b83c330SAnton Blanchard 		}
304bc8449ccSAnton Blanchard 	}
3054b83c330SAnton Blanchard 
30620fcefe5SMilton Miller 	if ((len >= 2 * sizeof(unsigned int)) && ref_points) {
3074b83c330SAnton Blanchard 		depth = ref_points[index];
308ab1f9dacSPaul Mackerras 	} else {
309bf4b85b0SNathan Lynch 		dbg("NUMA: ibm,associativity-reference-points not found.\n");
310ab1f9dacSPaul Mackerras 		depth = -1;
311ab1f9dacSPaul Mackerras 	}
312ab1f9dacSPaul Mackerras 	of_node_put(rtas_root);
313ab1f9dacSPaul Mackerras 
314ab1f9dacSPaul Mackerras 	return depth;
315ab1f9dacSPaul Mackerras }
316ab1f9dacSPaul Mackerras 
31784c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
318ab1f9dacSPaul Mackerras {
319ab1f9dacSPaul Mackerras 	struct device_node *memory = NULL;
320ab1f9dacSPaul Mackerras 
321ab1f9dacSPaul Mackerras 	memory = of_find_node_by_type(memory, "memory");
32254c23310SPaul Mackerras 	if (!memory)
32384c9fdd1SMike Kravetz 		panic("numa.c: No memory nodes found!");
32454c23310SPaul Mackerras 
325a8bda5ddSStephen Rothwell 	*n_addr_cells = of_n_addr_cells(memory);
3269213feeaSStephen Rothwell 	*n_size_cells = of_n_size_cells(memory);
32784c9fdd1SMike Kravetz 	of_node_put(memory);
328ab1f9dacSPaul Mackerras }
329ab1f9dacSPaul Mackerras 
330a7f67bdfSJeremy Kerr static unsigned long __devinit read_n_cells(int n, const unsigned int **buf)
331ab1f9dacSPaul Mackerras {
332ab1f9dacSPaul Mackerras 	unsigned long result = 0;
333ab1f9dacSPaul Mackerras 
334ab1f9dacSPaul Mackerras 	while (n--) {
335ab1f9dacSPaul Mackerras 		result = (result << 32) | **buf;
336ab1f9dacSPaul Mackerras 		(*buf)++;
337ab1f9dacSPaul Mackerras 	}
338ab1f9dacSPaul Mackerras 	return result;
339ab1f9dacSPaul Mackerras }
340ab1f9dacSPaul Mackerras 
3418342681dSNathan Fontenot struct of_drconf_cell {
3428342681dSNathan Fontenot 	u64	base_addr;
3438342681dSNathan Fontenot 	u32	drc_index;
3448342681dSNathan Fontenot 	u32	reserved;
3458342681dSNathan Fontenot 	u32	aa_index;
3468342681dSNathan Fontenot 	u32	flags;
3478342681dSNathan Fontenot };
3488342681dSNathan Fontenot 
3498342681dSNathan Fontenot #define DRCONF_MEM_ASSIGNED	0x00000008
3508342681dSNathan Fontenot #define DRCONF_MEM_AI_INVALID	0x00000040
3518342681dSNathan Fontenot #define DRCONF_MEM_RESERVED	0x00000080
3528342681dSNathan Fontenot 
3538342681dSNathan Fontenot /*
35495f72d1eSYinghai Lu  * Read the next memblock list entry from the ibm,dynamic-memory property
3558342681dSNathan Fontenot  * and return the information in the provided of_drconf_cell structure.
3568342681dSNathan Fontenot  */
3578342681dSNathan Fontenot static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp)
3588342681dSNathan Fontenot {
3598342681dSNathan Fontenot 	const u32 *cp;
3608342681dSNathan Fontenot 
3618342681dSNathan Fontenot 	drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp);
3628342681dSNathan Fontenot 
3638342681dSNathan Fontenot 	cp = *cellp;
3648342681dSNathan Fontenot 	drmem->drc_index = cp[0];
3658342681dSNathan Fontenot 	drmem->reserved = cp[1];
3668342681dSNathan Fontenot 	drmem->aa_index = cp[2];
3678342681dSNathan Fontenot 	drmem->flags = cp[3];
3688342681dSNathan Fontenot 
3698342681dSNathan Fontenot 	*cellp = cp + 4;
3708342681dSNathan Fontenot }
3718342681dSNathan Fontenot 
3728342681dSNathan Fontenot /*
3738342681dSNathan Fontenot  * Retreive and validate the ibm,dynamic-memory property of the device tree.
3748342681dSNathan Fontenot  *
37595f72d1eSYinghai Lu  * The layout of the ibm,dynamic-memory property is a number N of memblock
37695f72d1eSYinghai Lu  * list entries followed by N memblock list entries.  Each memblock list entry
3778342681dSNathan Fontenot  * contains information as layed out in the of_drconf_cell struct above.
3788342681dSNathan Fontenot  */
3798342681dSNathan Fontenot static int of_get_drconf_memory(struct device_node *memory, const u32 **dm)
3808342681dSNathan Fontenot {
3818342681dSNathan Fontenot 	const u32 *prop;
3828342681dSNathan Fontenot 	u32 len, entries;
3838342681dSNathan Fontenot 
3848342681dSNathan Fontenot 	prop = of_get_property(memory, "ibm,dynamic-memory", &len);
3858342681dSNathan Fontenot 	if (!prop || len < sizeof(unsigned int))
3868342681dSNathan Fontenot 		return 0;
3878342681dSNathan Fontenot 
3888342681dSNathan Fontenot 	entries = *prop++;
3898342681dSNathan Fontenot 
3908342681dSNathan Fontenot 	/* Now that we know the number of entries, revalidate the size
3918342681dSNathan Fontenot 	 * of the property read in to ensure we have everything
3928342681dSNathan Fontenot 	 */
3938342681dSNathan Fontenot 	if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int))
3948342681dSNathan Fontenot 		return 0;
3958342681dSNathan Fontenot 
3968342681dSNathan Fontenot 	*dm = prop;
3978342681dSNathan Fontenot 	return entries;
3988342681dSNathan Fontenot }
3998342681dSNathan Fontenot 
4008342681dSNathan Fontenot /*
401*3fdfd990SBenjamin Herrenschmidt  * Retreive and validate the ibm,lmb-size property for drconf memory
4028342681dSNathan Fontenot  * from the device tree.
4038342681dSNathan Fontenot  */
404*3fdfd990SBenjamin Herrenschmidt static u64 of_get_lmb_size(struct device_node *memory)
4058342681dSNathan Fontenot {
4068342681dSNathan Fontenot 	const u32 *prop;
4078342681dSNathan Fontenot 	u32 len;
4088342681dSNathan Fontenot 
409*3fdfd990SBenjamin Herrenschmidt 	prop = of_get_property(memory, "ibm,lmb-size", &len);
4108342681dSNathan Fontenot 	if (!prop || len < sizeof(unsigned int))
4118342681dSNathan Fontenot 		return 0;
4128342681dSNathan Fontenot 
4138342681dSNathan Fontenot 	return read_n_cells(n_mem_size_cells, &prop);
4148342681dSNathan Fontenot }
4158342681dSNathan Fontenot 
4168342681dSNathan Fontenot struct assoc_arrays {
4178342681dSNathan Fontenot 	u32	n_arrays;
4188342681dSNathan Fontenot 	u32	array_sz;
4198342681dSNathan Fontenot 	const u32 *arrays;
4208342681dSNathan Fontenot };
4218342681dSNathan Fontenot 
4228342681dSNathan Fontenot /*
4238342681dSNathan Fontenot  * Retreive and validate the list of associativity arrays for drconf
4248342681dSNathan Fontenot  * memory from the ibm,associativity-lookup-arrays property of the
4258342681dSNathan Fontenot  * device tree..
4268342681dSNathan Fontenot  *
4278342681dSNathan Fontenot  * The layout of the ibm,associativity-lookup-arrays property is a number N
4288342681dSNathan Fontenot  * indicating the number of associativity arrays, followed by a number M
4298342681dSNathan Fontenot  * indicating the size of each associativity array, followed by a list
4308342681dSNathan Fontenot  * of N associativity arrays.
4318342681dSNathan Fontenot  */
4328342681dSNathan Fontenot static int of_get_assoc_arrays(struct device_node *memory,
4338342681dSNathan Fontenot 			       struct assoc_arrays *aa)
4348342681dSNathan Fontenot {
4358342681dSNathan Fontenot 	const u32 *prop;
4368342681dSNathan Fontenot 	u32 len;
4378342681dSNathan Fontenot 
4388342681dSNathan Fontenot 	prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
4398342681dSNathan Fontenot 	if (!prop || len < 2 * sizeof(unsigned int))
4408342681dSNathan Fontenot 		return -1;
4418342681dSNathan Fontenot 
4428342681dSNathan Fontenot 	aa->n_arrays = *prop++;
4438342681dSNathan Fontenot 	aa->array_sz = *prop++;
4448342681dSNathan Fontenot 
4458342681dSNathan Fontenot 	/* Now that we know the number of arrrays and size of each array,
4468342681dSNathan Fontenot 	 * revalidate the size of the property read in.
4478342681dSNathan Fontenot 	 */
4488342681dSNathan Fontenot 	if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int))
4498342681dSNathan Fontenot 		return -1;
4508342681dSNathan Fontenot 
4518342681dSNathan Fontenot 	aa->arrays = prop;
4528342681dSNathan Fontenot 	return 0;
4538342681dSNathan Fontenot }
4548342681dSNathan Fontenot 
4558342681dSNathan Fontenot /*
4568342681dSNathan Fontenot  * This is like of_node_to_nid_single() for memory represented in the
4578342681dSNathan Fontenot  * ibm,dynamic-reconfiguration-memory node.
4588342681dSNathan Fontenot  */
4598342681dSNathan Fontenot static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
4608342681dSNathan Fontenot 				   struct assoc_arrays *aa)
4618342681dSNathan Fontenot {
4628342681dSNathan Fontenot 	int default_nid = 0;
4638342681dSNathan Fontenot 	int nid = default_nid;
4648342681dSNathan Fontenot 	int index;
4658342681dSNathan Fontenot 
4668342681dSNathan Fontenot 	if (min_common_depth > 0 && min_common_depth <= aa->array_sz &&
4678342681dSNathan Fontenot 	    !(drmem->flags & DRCONF_MEM_AI_INVALID) &&
4688342681dSNathan Fontenot 	    drmem->aa_index < aa->n_arrays) {
4698342681dSNathan Fontenot 		index = drmem->aa_index * aa->array_sz + min_common_depth - 1;
4708342681dSNathan Fontenot 		nid = aa->arrays[index];
4718342681dSNathan Fontenot 
4728342681dSNathan Fontenot 		if (nid == 0xffff || nid >= MAX_NUMNODES)
4738342681dSNathan Fontenot 			nid = default_nid;
4748342681dSNathan Fontenot 	}
4758342681dSNathan Fontenot 
4768342681dSNathan Fontenot 	return nid;
4778342681dSNathan Fontenot }
4788342681dSNathan Fontenot 
479ab1f9dacSPaul Mackerras /*
480ab1f9dacSPaul Mackerras  * Figure out to which domain a cpu belongs and stick it there.
481ab1f9dacSPaul Mackerras  * Return the id of the domain used.
482ab1f9dacSPaul Mackerras  */
4832e5ce39dSNathan Lynch static int __cpuinit numa_setup_cpu(unsigned long lcpu)
484ab1f9dacSPaul Mackerras {
485cf950b7aSNathan Lynch 	int nid = 0;
4868b16cd23SMilton Miller 	struct device_node *cpu = of_get_cpu_node(lcpu, NULL);
487ab1f9dacSPaul Mackerras 
488ab1f9dacSPaul Mackerras 	if (!cpu) {
489ab1f9dacSPaul Mackerras 		WARN_ON(1);
490ab1f9dacSPaul Mackerras 		goto out;
491ab1f9dacSPaul Mackerras 	}
492ab1f9dacSPaul Mackerras 
493953039c8SJeremy Kerr 	nid = of_node_to_nid_single(cpu);
494ab1f9dacSPaul Mackerras 
495482ec7c4SNathan Lynch 	if (nid < 0 || !node_online(nid))
49672c33688SH Hartley Sweeten 		nid = first_online_node;
497ab1f9dacSPaul Mackerras out:
498cf950b7aSNathan Lynch 	map_cpu_to_node(lcpu, nid);
499ab1f9dacSPaul Mackerras 
500ab1f9dacSPaul Mackerras 	of_node_put(cpu);
501ab1f9dacSPaul Mackerras 
502cf950b7aSNathan Lynch 	return nid;
503ab1f9dacSPaul Mackerras }
504ab1f9dacSPaul Mackerras 
50574b85f37SChandra Seetharaman static int __cpuinit cpu_numa_callback(struct notifier_block *nfb,
506ab1f9dacSPaul Mackerras 			     unsigned long action,
507ab1f9dacSPaul Mackerras 			     void *hcpu)
508ab1f9dacSPaul Mackerras {
509ab1f9dacSPaul Mackerras 	unsigned long lcpu = (unsigned long)hcpu;
510ab1f9dacSPaul Mackerras 	int ret = NOTIFY_DONE;
511ab1f9dacSPaul Mackerras 
512ab1f9dacSPaul Mackerras 	switch (action) {
513ab1f9dacSPaul Mackerras 	case CPU_UP_PREPARE:
5148bb78442SRafael J. Wysocki 	case CPU_UP_PREPARE_FROZEN:
515ab1f9dacSPaul Mackerras 		numa_setup_cpu(lcpu);
516ab1f9dacSPaul Mackerras 		ret = NOTIFY_OK;
517ab1f9dacSPaul Mackerras 		break;
518ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU
519ab1f9dacSPaul Mackerras 	case CPU_DEAD:
5208bb78442SRafael J. Wysocki 	case CPU_DEAD_FROZEN:
521ab1f9dacSPaul Mackerras 	case CPU_UP_CANCELED:
5228bb78442SRafael J. Wysocki 	case CPU_UP_CANCELED_FROZEN:
523ab1f9dacSPaul Mackerras 		unmap_cpu_from_node(lcpu);
524ab1f9dacSPaul Mackerras 		break;
525ab1f9dacSPaul Mackerras 		ret = NOTIFY_OK;
526ab1f9dacSPaul Mackerras #endif
527ab1f9dacSPaul Mackerras 	}
528ab1f9dacSPaul Mackerras 	return ret;
529ab1f9dacSPaul Mackerras }
530ab1f9dacSPaul Mackerras 
531ab1f9dacSPaul Mackerras /*
532ab1f9dacSPaul Mackerras  * Check and possibly modify a memory region to enforce the memory limit.
533ab1f9dacSPaul Mackerras  *
534ab1f9dacSPaul Mackerras  * Returns the size the region should have to enforce the memory limit.
535ab1f9dacSPaul Mackerras  * This will either be the original value of size, a truncated value,
536ab1f9dacSPaul Mackerras  * or zero. If the returned value of size is 0 the region should be
537ab1f9dacSPaul Mackerras  * discarded as it lies wholy above the memory limit.
538ab1f9dacSPaul Mackerras  */
53945fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start,
54045fb6ceaSAnton Blanchard 						      unsigned long size)
541ab1f9dacSPaul Mackerras {
542ab1f9dacSPaul Mackerras 	/*
54395f72d1eSYinghai Lu 	 * We use memblock_end_of_DRAM() in here instead of memory_limit because
544ab1f9dacSPaul Mackerras 	 * we've already adjusted it for the limit and it takes care of
545fe55249dSMilton Miller 	 * having memory holes below the limit.  Also, in the case of
546fe55249dSMilton Miller 	 * iommu_is_off, memory_limit is not set but is implicitly enforced.
547ab1f9dacSPaul Mackerras 	 */
548ab1f9dacSPaul Mackerras 
54995f72d1eSYinghai Lu 	if (start + size <= memblock_end_of_DRAM())
550ab1f9dacSPaul Mackerras 		return size;
551ab1f9dacSPaul Mackerras 
55295f72d1eSYinghai Lu 	if (start >= memblock_end_of_DRAM())
553ab1f9dacSPaul Mackerras 		return 0;
554ab1f9dacSPaul Mackerras 
55595f72d1eSYinghai Lu 	return memblock_end_of_DRAM() - start;
556ab1f9dacSPaul Mackerras }
557ab1f9dacSPaul Mackerras 
5580204568aSPaul Mackerras /*
559cf00085dSChandru  * Reads the counter for a given entry in
560cf00085dSChandru  * linux,drconf-usable-memory property
561cf00085dSChandru  */
562cf00085dSChandru static inline int __init read_usm_ranges(const u32 **usm)
563cf00085dSChandru {
564cf00085dSChandru 	/*
565*3fdfd990SBenjamin Herrenschmidt 	 * For each lmb in ibm,dynamic-memory a corresponding
566cf00085dSChandru 	 * entry in linux,drconf-usable-memory property contains
567cf00085dSChandru 	 * a counter followed by that many (base, size) duple.
568cf00085dSChandru 	 * read the counter from linux,drconf-usable-memory
569cf00085dSChandru 	 */
570cf00085dSChandru 	return read_n_cells(n_mem_size_cells, usm);
571cf00085dSChandru }
572cf00085dSChandru 
573cf00085dSChandru /*
5740204568aSPaul Mackerras  * Extract NUMA information from the ibm,dynamic-reconfiguration-memory
5750204568aSPaul Mackerras  * node.  This assumes n_mem_{addr,size}_cells have been set.
5760204568aSPaul Mackerras  */
5770204568aSPaul Mackerras static void __init parse_drconf_memory(struct device_node *memory)
5780204568aSPaul Mackerras {
579cf00085dSChandru 	const u32 *dm, *usm;
580cf00085dSChandru 	unsigned int n, rc, ranges, is_kexec_kdump = 0;
581*3fdfd990SBenjamin Herrenschmidt 	unsigned long lmb_size, base, size, sz;
5828342681dSNathan Fontenot 	int nid;
5838342681dSNathan Fontenot 	struct assoc_arrays aa;
5840204568aSPaul Mackerras 
5858342681dSNathan Fontenot 	n = of_get_drconf_memory(memory, &dm);
5868342681dSNathan Fontenot 	if (!n)
5870204568aSPaul Mackerras 		return;
5880204568aSPaul Mackerras 
589*3fdfd990SBenjamin Herrenschmidt 	lmb_size = of_get_lmb_size(memory);
590*3fdfd990SBenjamin Herrenschmidt 	if (!lmb_size)
5918342681dSNathan Fontenot 		return;
5928342681dSNathan Fontenot 
5938342681dSNathan Fontenot 	rc = of_get_assoc_arrays(memory, &aa);
5948342681dSNathan Fontenot 	if (rc)
5950204568aSPaul Mackerras 		return;
5960204568aSPaul Mackerras 
597cf00085dSChandru 	/* check if this is a kexec/kdump kernel */
598cf00085dSChandru 	usm = of_get_usable_memory(memory);
599cf00085dSChandru 	if (usm != NULL)
600cf00085dSChandru 		is_kexec_kdump = 1;
601cf00085dSChandru 
6020204568aSPaul Mackerras 	for (; n != 0; --n) {
6038342681dSNathan Fontenot 		struct of_drconf_cell drmem;
6041daa6d08SBalbir Singh 
6058342681dSNathan Fontenot 		read_drconf_cell(&drmem, &dm);
6068342681dSNathan Fontenot 
6078342681dSNathan Fontenot 		/* skip this block if the reserved bit is set in flags (0x80)
6088342681dSNathan Fontenot 		   or if the block is not assigned to this partition (0x8) */
6098342681dSNathan Fontenot 		if ((drmem.flags & DRCONF_MEM_RESERVED)
6108342681dSNathan Fontenot 		    || !(drmem.flags & DRCONF_MEM_ASSIGNED))
6118342681dSNathan Fontenot 			continue;
6128342681dSNathan Fontenot 
613cf00085dSChandru 		base = drmem.base_addr;
614*3fdfd990SBenjamin Herrenschmidt 		size = lmb_size;
615cf00085dSChandru 		ranges = 1;
6168342681dSNathan Fontenot 
617cf00085dSChandru 		if (is_kexec_kdump) {
618cf00085dSChandru 			ranges = read_usm_ranges(&usm);
619cf00085dSChandru 			if (!ranges) /* there are no (base, size) duple */
6200204568aSPaul Mackerras 				continue;
621cf00085dSChandru 		}
622cf00085dSChandru 		do {
623cf00085dSChandru 			if (is_kexec_kdump) {
624cf00085dSChandru 				base = read_n_cells(n_mem_addr_cells, &usm);
625cf00085dSChandru 				size = read_n_cells(n_mem_size_cells, &usm);
626cf00085dSChandru 			}
627cf00085dSChandru 			nid = of_drconf_to_nid_single(&drmem, &aa);
628cf00085dSChandru 			fake_numa_create_new_node(
629cf00085dSChandru 				((base + size) >> PAGE_SHIFT),
630cf00085dSChandru 					   &nid);
631cf00085dSChandru 			node_set_online(nid);
632cf00085dSChandru 			sz = numa_enforce_memory_limit(base, size);
633cf00085dSChandru 			if (sz)
634cf00085dSChandru 				add_active_range(nid, base >> PAGE_SHIFT,
635cf00085dSChandru 						 (base >> PAGE_SHIFT)
636cf00085dSChandru 						 + (sz >> PAGE_SHIFT));
637cf00085dSChandru 		} while (--ranges);
6380204568aSPaul Mackerras 	}
6390204568aSPaul Mackerras }
6400204568aSPaul Mackerras 
641ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void)
642ab1f9dacSPaul Mackerras {
643ab1f9dacSPaul Mackerras 	struct device_node *cpu = NULL;
644ab1f9dacSPaul Mackerras 	struct device_node *memory = NULL;
645482ec7c4SNathan Lynch 	int default_nid = 0;
646ab1f9dacSPaul Mackerras 	unsigned long i;
647ab1f9dacSPaul Mackerras 
648ab1f9dacSPaul Mackerras 	if (numa_enabled == 0) {
649ab1f9dacSPaul Mackerras 		printk(KERN_WARNING "NUMA disabled by user\n");
650ab1f9dacSPaul Mackerras 		return -1;
651ab1f9dacSPaul Mackerras 	}
652ab1f9dacSPaul Mackerras 
653ab1f9dacSPaul Mackerras 	min_common_depth = find_min_common_depth();
654ab1f9dacSPaul Mackerras 
655ab1f9dacSPaul Mackerras 	if (min_common_depth < 0)
656ab1f9dacSPaul Mackerras 		return min_common_depth;
657ab1f9dacSPaul Mackerras 
658bf4b85b0SNathan Lynch 	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
659bf4b85b0SNathan Lynch 
660ab1f9dacSPaul Mackerras 	/*
661482ec7c4SNathan Lynch 	 * Even though we connect cpus to numa domains later in SMP
662482ec7c4SNathan Lynch 	 * init, we need to know the node ids now. This is because
663482ec7c4SNathan Lynch 	 * each node to be onlined must have NODE_DATA etc backing it.
664ab1f9dacSPaul Mackerras 	 */
665482ec7c4SNathan Lynch 	for_each_present_cpu(i) {
666cf950b7aSNathan Lynch 		int nid;
667ab1f9dacSPaul Mackerras 
6688b16cd23SMilton Miller 		cpu = of_get_cpu_node(i, NULL);
669482ec7c4SNathan Lynch 		BUG_ON(!cpu);
670953039c8SJeremy Kerr 		nid = of_node_to_nid_single(cpu);
671ab1f9dacSPaul Mackerras 		of_node_put(cpu);
672ab1f9dacSPaul Mackerras 
673482ec7c4SNathan Lynch 		/*
674482ec7c4SNathan Lynch 		 * Don't fall back to default_nid yet -- we will plug
675482ec7c4SNathan Lynch 		 * cpus into nodes once the memory scan has discovered
676482ec7c4SNathan Lynch 		 * the topology.
677482ec7c4SNathan Lynch 		 */
678482ec7c4SNathan Lynch 		if (nid < 0)
679482ec7c4SNathan Lynch 			continue;
680482ec7c4SNathan Lynch 		node_set_online(nid);
681ab1f9dacSPaul Mackerras 	}
682ab1f9dacSPaul Mackerras 
683237a0989SMike Kravetz 	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
684ab1f9dacSPaul Mackerras 	memory = NULL;
685ab1f9dacSPaul Mackerras 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
686ab1f9dacSPaul Mackerras 		unsigned long start;
687ab1f9dacSPaul Mackerras 		unsigned long size;
688cf950b7aSNathan Lynch 		int nid;
689ab1f9dacSPaul Mackerras 		int ranges;
690a7f67bdfSJeremy Kerr 		const unsigned int *memcell_buf;
691ab1f9dacSPaul Mackerras 		unsigned int len;
692ab1f9dacSPaul Mackerras 
693e2eb6392SStephen Rothwell 		memcell_buf = of_get_property(memory,
694ba759485SMichael Ellerman 			"linux,usable-memory", &len);
695ba759485SMichael Ellerman 		if (!memcell_buf || len <= 0)
696e2eb6392SStephen Rothwell 			memcell_buf = of_get_property(memory, "reg", &len);
697ab1f9dacSPaul Mackerras 		if (!memcell_buf || len <= 0)
698ab1f9dacSPaul Mackerras 			continue;
699ab1f9dacSPaul Mackerras 
700cc5d0189SBenjamin Herrenschmidt 		/* ranges in cell */
701cc5d0189SBenjamin Herrenschmidt 		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
702ab1f9dacSPaul Mackerras new_range:
703ab1f9dacSPaul Mackerras 		/* these are order-sensitive, and modify the buffer pointer */
704237a0989SMike Kravetz 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
705237a0989SMike Kravetz 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
706ab1f9dacSPaul Mackerras 
707482ec7c4SNathan Lynch 		/*
708482ec7c4SNathan Lynch 		 * Assumption: either all memory nodes or none will
709482ec7c4SNathan Lynch 		 * have associativity properties.  If none, then
710482ec7c4SNathan Lynch 		 * everything goes to default_nid.
711482ec7c4SNathan Lynch 		 */
712953039c8SJeremy Kerr 		nid = of_node_to_nid_single(memory);
713482ec7c4SNathan Lynch 		if (nid < 0)
714482ec7c4SNathan Lynch 			nid = default_nid;
7151daa6d08SBalbir Singh 
7161daa6d08SBalbir Singh 		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
717482ec7c4SNathan Lynch 		node_set_online(nid);
718ab1f9dacSPaul Mackerras 
719ab1f9dacSPaul Mackerras 		if (!(size = numa_enforce_memory_limit(start, size))) {
720ab1f9dacSPaul Mackerras 			if (--ranges)
721ab1f9dacSPaul Mackerras 				goto new_range;
722ab1f9dacSPaul Mackerras 			else
723ab1f9dacSPaul Mackerras 				continue;
724ab1f9dacSPaul Mackerras 		}
725ab1f9dacSPaul Mackerras 
726c67c3cb4SMel Gorman 		add_active_range(nid, start >> PAGE_SHIFT,
727c67c3cb4SMel Gorman 				(start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
728ab1f9dacSPaul Mackerras 
729ab1f9dacSPaul Mackerras 		if (--ranges)
730ab1f9dacSPaul Mackerras 			goto new_range;
731ab1f9dacSPaul Mackerras 	}
732ab1f9dacSPaul Mackerras 
7330204568aSPaul Mackerras 	/*
73495f72d1eSYinghai Lu 	 * Now do the same thing for each MEMBLOCK listed in the ibm,dynamic-memory
7350204568aSPaul Mackerras 	 * property in the ibm,dynamic-reconfiguration-memory node.
7360204568aSPaul Mackerras 	 */
7370204568aSPaul Mackerras 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
7380204568aSPaul Mackerras 	if (memory)
7390204568aSPaul Mackerras 		parse_drconf_memory(memory);
7400204568aSPaul Mackerras 
741ab1f9dacSPaul Mackerras 	return 0;
742ab1f9dacSPaul Mackerras }
743ab1f9dacSPaul Mackerras 
744ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void)
745ab1f9dacSPaul Mackerras {
74695f72d1eSYinghai Lu 	unsigned long top_of_ram = memblock_end_of_DRAM();
74795f72d1eSYinghai Lu 	unsigned long total_ram = memblock_phys_mem_size();
748c67c3cb4SMel Gorman 	unsigned long start_pfn, end_pfn;
7491daa6d08SBalbir Singh 	unsigned int i, nid = 0;
750ab1f9dacSPaul Mackerras 
751e110b281SOlof Johansson 	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
752ab1f9dacSPaul Mackerras 	       top_of_ram, total_ram);
753e110b281SOlof Johansson 	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
754ab1f9dacSPaul Mackerras 	       (top_of_ram - total_ram) >> 20);
755ab1f9dacSPaul Mackerras 
75695f72d1eSYinghai Lu 	for (i = 0; i < memblock.memory.cnt; ++i) {
75795f72d1eSYinghai Lu 		start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
75895f72d1eSYinghai Lu 		end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
7591daa6d08SBalbir Singh 
7601daa6d08SBalbir Singh 		fake_numa_create_new_node(end_pfn, &nid);
7611daa6d08SBalbir Singh 		add_active_range(nid, start_pfn, end_pfn);
7621daa6d08SBalbir Singh 		node_set_online(nid);
763c67c3cb4SMel Gorman 	}
764ab1f9dacSPaul Mackerras }
765ab1f9dacSPaul Mackerras 
7664b703a23SAnton Blanchard void __init dump_numa_cpu_topology(void)
7674b703a23SAnton Blanchard {
7684b703a23SAnton Blanchard 	unsigned int node;
7694b703a23SAnton Blanchard 	unsigned int cpu, count;
7704b703a23SAnton Blanchard 
7714b703a23SAnton Blanchard 	if (min_common_depth == -1 || !numa_enabled)
7724b703a23SAnton Blanchard 		return;
7734b703a23SAnton Blanchard 
7744b703a23SAnton Blanchard 	for_each_online_node(node) {
775e110b281SOlof Johansson 		printk(KERN_DEBUG "Node %d CPUs:", node);
7764b703a23SAnton Blanchard 
7774b703a23SAnton Blanchard 		count = 0;
7784b703a23SAnton Blanchard 		/*
7794b703a23SAnton Blanchard 		 * If we used a CPU iterator here we would miss printing
7804b703a23SAnton Blanchard 		 * the holes in the cpumap.
7814b703a23SAnton Blanchard 		 */
78225863de0SAnton Blanchard 		for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
78325863de0SAnton Blanchard 			if (cpumask_test_cpu(cpu,
78425863de0SAnton Blanchard 					node_to_cpumask_map[node])) {
7854b703a23SAnton Blanchard 				if (count == 0)
7864b703a23SAnton Blanchard 					printk(" %u", cpu);
7874b703a23SAnton Blanchard 				++count;
7884b703a23SAnton Blanchard 			} else {
7894b703a23SAnton Blanchard 				if (count > 1)
7904b703a23SAnton Blanchard 					printk("-%u", cpu - 1);
7914b703a23SAnton Blanchard 				count = 0;
7924b703a23SAnton Blanchard 			}
7934b703a23SAnton Blanchard 		}
7944b703a23SAnton Blanchard 
7954b703a23SAnton Blanchard 		if (count > 1)
79625863de0SAnton Blanchard 			printk("-%u", nr_cpu_ids - 1);
7974b703a23SAnton Blanchard 		printk("\n");
7984b703a23SAnton Blanchard 	}
7994b703a23SAnton Blanchard }
8004b703a23SAnton Blanchard 
8014b703a23SAnton Blanchard static void __init dump_numa_memory_topology(void)
802ab1f9dacSPaul Mackerras {
803ab1f9dacSPaul Mackerras 	unsigned int node;
804ab1f9dacSPaul Mackerras 	unsigned int count;
805ab1f9dacSPaul Mackerras 
806ab1f9dacSPaul Mackerras 	if (min_common_depth == -1 || !numa_enabled)
807ab1f9dacSPaul Mackerras 		return;
808ab1f9dacSPaul Mackerras 
809ab1f9dacSPaul Mackerras 	for_each_online_node(node) {
810ab1f9dacSPaul Mackerras 		unsigned long i;
811ab1f9dacSPaul Mackerras 
812e110b281SOlof Johansson 		printk(KERN_DEBUG "Node %d Memory:", node);
813ab1f9dacSPaul Mackerras 
814ab1f9dacSPaul Mackerras 		count = 0;
815ab1f9dacSPaul Mackerras 
81695f72d1eSYinghai Lu 		for (i = 0; i < memblock_end_of_DRAM();
81745fb6ceaSAnton Blanchard 		     i += (1 << SECTION_SIZE_BITS)) {
81845fb6ceaSAnton Blanchard 			if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
819ab1f9dacSPaul Mackerras 				if (count == 0)
820ab1f9dacSPaul Mackerras 					printk(" 0x%lx", i);
821ab1f9dacSPaul Mackerras 				++count;
822ab1f9dacSPaul Mackerras 			} else {
823ab1f9dacSPaul Mackerras 				if (count > 0)
824ab1f9dacSPaul Mackerras 					printk("-0x%lx", i);
825ab1f9dacSPaul Mackerras 				count = 0;
826ab1f9dacSPaul Mackerras 			}
827ab1f9dacSPaul Mackerras 		}
828ab1f9dacSPaul Mackerras 
829ab1f9dacSPaul Mackerras 		if (count > 0)
830ab1f9dacSPaul Mackerras 			printk("-0x%lx", i);
831ab1f9dacSPaul Mackerras 		printk("\n");
832ab1f9dacSPaul Mackerras 	}
833ab1f9dacSPaul Mackerras }
834ab1f9dacSPaul Mackerras 
835ab1f9dacSPaul Mackerras /*
83695f72d1eSYinghai Lu  * Allocate some memory, satisfying the memblock or bootmem allocator where
837ab1f9dacSPaul Mackerras  * required. nid is the preferred node and end is the physical address of
838ab1f9dacSPaul Mackerras  * the highest address in the node.
839ab1f9dacSPaul Mackerras  *
8400be210fdSDave Hansen  * Returns the virtual address of the memory.
841ab1f9dacSPaul Mackerras  */
842893473dfSDave Hansen static void __init *careful_zallocation(int nid, unsigned long size,
84345fb6ceaSAnton Blanchard 				       unsigned long align,
84445fb6ceaSAnton Blanchard 				       unsigned long end_pfn)
845ab1f9dacSPaul Mackerras {
8460be210fdSDave Hansen 	void *ret;
84745fb6ceaSAnton Blanchard 	int new_nid;
8480be210fdSDave Hansen 	unsigned long ret_paddr;
8490be210fdSDave Hansen 
85095f72d1eSYinghai Lu 	ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
851ab1f9dacSPaul Mackerras 
852ab1f9dacSPaul Mackerras 	/* retry over all memory */
8530be210fdSDave Hansen 	if (!ret_paddr)
85495f72d1eSYinghai Lu 		ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
855ab1f9dacSPaul Mackerras 
8560be210fdSDave Hansen 	if (!ret_paddr)
8575d21ea2bSDave Hansen 		panic("numa.c: cannot allocate %lu bytes for node %d",
858ab1f9dacSPaul Mackerras 		      size, nid);
859ab1f9dacSPaul Mackerras 
8600be210fdSDave Hansen 	ret = __va(ret_paddr);
8610be210fdSDave Hansen 
862ab1f9dacSPaul Mackerras 	/*
863c555e520SDave Hansen 	 * We initialize the nodes in numeric order: 0, 1, 2...
86495f72d1eSYinghai Lu 	 * and hand over control from the MEMBLOCK allocator to the
865c555e520SDave Hansen 	 * bootmem allocator.  If this function is called for
866c555e520SDave Hansen 	 * node 5, then we know that all nodes <5 are using the
86795f72d1eSYinghai Lu 	 * bootmem allocator instead of the MEMBLOCK allocator.
868c555e520SDave Hansen 	 *
869c555e520SDave Hansen 	 * So, check the nid from which this allocation came
870c555e520SDave Hansen 	 * and double check to see if we need to use bootmem
87195f72d1eSYinghai Lu 	 * instead of the MEMBLOCK.  We don't free the MEMBLOCK memory
872c555e520SDave Hansen 	 * since it would be useless.
873ab1f9dacSPaul Mackerras 	 */
8740be210fdSDave Hansen 	new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
87545fb6ceaSAnton Blanchard 	if (new_nid < nid) {
8760be210fdSDave Hansen 		ret = __alloc_bootmem_node(NODE_DATA(new_nid),
877ab1f9dacSPaul Mackerras 				size, align, 0);
878ab1f9dacSPaul Mackerras 
8790be210fdSDave Hansen 		dbg("alloc_bootmem %p %lx\n", ret, size);
880ab1f9dacSPaul Mackerras 	}
881ab1f9dacSPaul Mackerras 
882893473dfSDave Hansen 	memset(ret, 0, size);
8830be210fdSDave Hansen 	return ret;
884ab1f9dacSPaul Mackerras }
885ab1f9dacSPaul Mackerras 
88674b85f37SChandra Seetharaman static struct notifier_block __cpuinitdata ppc64_numa_nb = {
88774b85f37SChandra Seetharaman 	.notifier_call = cpu_numa_callback,
88874b85f37SChandra Seetharaman 	.priority = 1 /* Must run before sched domains notifier. */
88974b85f37SChandra Seetharaman };
89074b85f37SChandra Seetharaman 
8914a618669SDave Hansen static void mark_reserved_regions_for_nid(int nid)
892ab1f9dacSPaul Mackerras {
8934a618669SDave Hansen 	struct pglist_data *node = NODE_DATA(nid);
8944a618669SDave Hansen 	int i;
895ab1f9dacSPaul Mackerras 
89695f72d1eSYinghai Lu 	for (i = 0; i < memblock.reserved.cnt; i++) {
89795f72d1eSYinghai Lu 		unsigned long physbase = memblock.reserved.region[i].base;
89895f72d1eSYinghai Lu 		unsigned long size = memblock.reserved.region[i].size;
8998f64e1f2SJon Tollefson 		unsigned long start_pfn = physbase >> PAGE_SHIFT;
90006eccea6SDave Hansen 		unsigned long end_pfn = PFN_UP(physbase + size);
9018f64e1f2SJon Tollefson 		struct node_active_region node_ar;
9024a618669SDave Hansen 		unsigned long node_end_pfn = node->node_start_pfn +
9034a618669SDave Hansen 					     node->node_spanned_pages;
9044a618669SDave Hansen 
9054a618669SDave Hansen 		/*
90695f72d1eSYinghai Lu 		 * Check to make sure that this memblock.reserved area is
9074a618669SDave Hansen 		 * within the bounds of the node that we care about.
9084a618669SDave Hansen 		 * Checking the nid of the start and end points is not
9094a618669SDave Hansen 		 * sufficient because the reserved area could span the
9104a618669SDave Hansen 		 * entire node.
9114a618669SDave Hansen 		 */
9124a618669SDave Hansen 		if (end_pfn <= node->node_start_pfn ||
9134a618669SDave Hansen 		    start_pfn >= node_end_pfn)
9144a618669SDave Hansen 			continue;
915ab1f9dacSPaul Mackerras 
9168f64e1f2SJon Tollefson 		get_node_active_region(start_pfn, &node_ar);
917e8170372SJon Tollefson 		while (start_pfn < end_pfn &&
918e8170372SJon Tollefson 			node_ar.start_pfn < node_ar.end_pfn) {
919e8170372SJon Tollefson 			unsigned long reserve_size = size;
9208f64e1f2SJon Tollefson 			/*
9218f64e1f2SJon Tollefson 			 * if reserved region extends past active region
9228f64e1f2SJon Tollefson 			 * then trim size to active region
9238f64e1f2SJon Tollefson 			 */
9248f64e1f2SJon Tollefson 			if (end_pfn > node_ar.end_pfn)
925e8170372SJon Tollefson 				reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
92606eccea6SDave Hansen 					- physbase;
927a4c74dddSDave Hansen 			/*
928a4c74dddSDave Hansen 			 * Only worry about *this* node, others may not
929a4c74dddSDave Hansen 			 * yet have valid NODE_DATA().
930a4c74dddSDave Hansen 			 */
931a4c74dddSDave Hansen 			if (node_ar.nid == nid) {
932a4c74dddSDave Hansen 				dbg("reserve_bootmem %lx %lx nid=%d\n",
933a4c74dddSDave Hansen 					physbase, reserve_size, node_ar.nid);
934a4c74dddSDave Hansen 				reserve_bootmem_node(NODE_DATA(node_ar.nid),
935a4c74dddSDave Hansen 						physbase, reserve_size,
936a4c74dddSDave Hansen 						BOOTMEM_DEFAULT);
937a4c74dddSDave Hansen 			}
9388f64e1f2SJon Tollefson 			/*
9398f64e1f2SJon Tollefson 			 * if reserved region is contained in the active region
9408f64e1f2SJon Tollefson 			 * then done.
9418f64e1f2SJon Tollefson 			 */
9428f64e1f2SJon Tollefson 			if (end_pfn <= node_ar.end_pfn)
9438f64e1f2SJon Tollefson 				break;
9448f64e1f2SJon Tollefson 
9458f64e1f2SJon Tollefson 			/*
9468f64e1f2SJon Tollefson 			 * reserved region extends past the active region
9478f64e1f2SJon Tollefson 			 *   get next active region that contains this
9488f64e1f2SJon Tollefson 			 *   reserved region
9498f64e1f2SJon Tollefson 			 */
9508f64e1f2SJon Tollefson 			start_pfn = node_ar.end_pfn;
9518f64e1f2SJon Tollefson 			physbase = start_pfn << PAGE_SHIFT;
952e8170372SJon Tollefson 			size = size - reserve_size;
9538f64e1f2SJon Tollefson 			get_node_active_region(start_pfn, &node_ar);
954ab1f9dacSPaul Mackerras 		}
9554a618669SDave Hansen 	}
956ab1f9dacSPaul Mackerras }
9578f64e1f2SJon Tollefson 
9584a618669SDave Hansen 
9594a618669SDave Hansen void __init do_init_bootmem(void)
9604a618669SDave Hansen {
9614a618669SDave Hansen 	int nid;
9624a618669SDave Hansen 
9634a618669SDave Hansen 	min_low_pfn = 0;
96495f72d1eSYinghai Lu 	max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
9654a618669SDave Hansen 	max_pfn = max_low_pfn;
9664a618669SDave Hansen 
9674a618669SDave Hansen 	if (parse_numa_properties())
9684a618669SDave Hansen 		setup_nonnuma();
9694a618669SDave Hansen 	else
9704a618669SDave Hansen 		dump_numa_memory_topology();
9714a618669SDave Hansen 
9724a618669SDave Hansen 	for_each_online_node(nid) {
9734a618669SDave Hansen 		unsigned long start_pfn, end_pfn;
9740be210fdSDave Hansen 		void *bootmem_vaddr;
9754a618669SDave Hansen 		unsigned long bootmap_pages;
9764a618669SDave Hansen 
9774a618669SDave Hansen 		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
9784a618669SDave Hansen 
9794a618669SDave Hansen 		/*
9804a618669SDave Hansen 		 * Allocate the node structure node local if possible
9814a618669SDave Hansen 		 *
9824a618669SDave Hansen 		 * Be careful moving this around, as it relies on all
9834a618669SDave Hansen 		 * previous nodes' bootmem to be initialized and have
9844a618669SDave Hansen 		 * all reserved areas marked.
9854a618669SDave Hansen 		 */
986893473dfSDave Hansen 		NODE_DATA(nid) = careful_zallocation(nid,
9874a618669SDave Hansen 					sizeof(struct pglist_data),
9884a618669SDave Hansen 					SMP_CACHE_BYTES, end_pfn);
9894a618669SDave Hansen 
9904a618669SDave Hansen   		dbg("node %d\n", nid);
9914a618669SDave Hansen 		dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
9924a618669SDave Hansen 
9934a618669SDave Hansen 		NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
9944a618669SDave Hansen 		NODE_DATA(nid)->node_start_pfn = start_pfn;
9954a618669SDave Hansen 		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
9964a618669SDave Hansen 
9974a618669SDave Hansen 		if (NODE_DATA(nid)->node_spanned_pages == 0)
9984a618669SDave Hansen   			continue;
9994a618669SDave Hansen 
10004a618669SDave Hansen   		dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
10014a618669SDave Hansen   		dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
10024a618669SDave Hansen 
10034a618669SDave Hansen 		bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
1004893473dfSDave Hansen 		bootmem_vaddr = careful_zallocation(nid,
10054a618669SDave Hansen 					bootmap_pages << PAGE_SHIFT,
10064a618669SDave Hansen 					PAGE_SIZE, end_pfn);
10074a618669SDave Hansen 
10080be210fdSDave Hansen 		dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
10094a618669SDave Hansen 
10100be210fdSDave Hansen 		init_bootmem_node(NODE_DATA(nid),
10110be210fdSDave Hansen 				  __pa(bootmem_vaddr) >> PAGE_SHIFT,
10124a618669SDave Hansen 				  start_pfn, end_pfn);
10134a618669SDave Hansen 
10144a618669SDave Hansen 		free_bootmem_with_active_regions(nid, end_pfn);
10154a618669SDave Hansen 		/*
10164a618669SDave Hansen 		 * Be very careful about moving this around.  Future
1017893473dfSDave Hansen 		 * calls to careful_zallocation() depend on this getting
10184a618669SDave Hansen 		 * done correctly.
10194a618669SDave Hansen 		 */
10204a618669SDave Hansen 		mark_reserved_regions_for_nid(nid);
10218f64e1f2SJon Tollefson 		sparse_memory_present_with_active_regions(nid);
1022ab1f9dacSPaul Mackerras 	}
1023d3f6204aSBenjamin Herrenschmidt 
1024d3f6204aSBenjamin Herrenschmidt 	init_bootmem_done = 1;
102525863de0SAnton Blanchard 
102625863de0SAnton Blanchard 	/*
102725863de0SAnton Blanchard 	 * Now bootmem is initialised we can create the node to cpumask
102825863de0SAnton Blanchard 	 * lookup tables and setup the cpu callback to populate them.
102925863de0SAnton Blanchard 	 */
103025863de0SAnton Blanchard 	setup_node_to_cpumask_map();
103125863de0SAnton Blanchard 
103225863de0SAnton Blanchard 	register_cpu_notifier(&ppc64_numa_nb);
103325863de0SAnton Blanchard 	cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
103425863de0SAnton Blanchard 			  (void *)(unsigned long)boot_cpuid);
10354a618669SDave Hansen }
1036ab1f9dacSPaul Mackerras 
1037ab1f9dacSPaul Mackerras void __init paging_init(void)
1038ab1f9dacSPaul Mackerras {
10396391af17SMel Gorman 	unsigned long max_zone_pfns[MAX_NR_ZONES];
10406391af17SMel Gorman 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
104195f72d1eSYinghai Lu 	max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT;
1042c67c3cb4SMel Gorman 	free_area_init_nodes(max_zone_pfns);
1043ab1f9dacSPaul Mackerras }
1044ab1f9dacSPaul Mackerras 
1045ab1f9dacSPaul Mackerras static int __init early_numa(char *p)
1046ab1f9dacSPaul Mackerras {
1047ab1f9dacSPaul Mackerras 	if (!p)
1048ab1f9dacSPaul Mackerras 		return 0;
1049ab1f9dacSPaul Mackerras 
1050ab1f9dacSPaul Mackerras 	if (strstr(p, "off"))
1051ab1f9dacSPaul Mackerras 		numa_enabled = 0;
1052ab1f9dacSPaul Mackerras 
1053ab1f9dacSPaul Mackerras 	if (strstr(p, "debug"))
1054ab1f9dacSPaul Mackerras 		numa_debug = 1;
1055ab1f9dacSPaul Mackerras 
10561daa6d08SBalbir Singh 	p = strstr(p, "fake=");
10571daa6d08SBalbir Singh 	if (p)
10581daa6d08SBalbir Singh 		cmdline = p + strlen("fake=");
10591daa6d08SBalbir Singh 
1060ab1f9dacSPaul Mackerras 	return 0;
1061ab1f9dacSPaul Mackerras }
1062ab1f9dacSPaul Mackerras early_param("numa", early_numa);
1063237a0989SMike Kravetz 
1064237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG
1065237a0989SMike Kravetz /*
10660f16ef7fSNathan Fontenot  * Find the node associated with a hot added memory section for
10670f16ef7fSNathan Fontenot  * memory represented in the device tree by the property
10680f16ef7fSNathan Fontenot  * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
10690db9360aSNathan Fontenot  */
10700db9360aSNathan Fontenot static int hot_add_drconf_scn_to_nid(struct device_node *memory,
10710db9360aSNathan Fontenot 				     unsigned long scn_addr)
10720db9360aSNathan Fontenot {
10730db9360aSNathan Fontenot 	const u32 *dm;
10740f16ef7fSNathan Fontenot 	unsigned int drconf_cell_cnt, rc;
1075*3fdfd990SBenjamin Herrenschmidt 	unsigned long lmb_size;
10760db9360aSNathan Fontenot 	struct assoc_arrays aa;
10770f16ef7fSNathan Fontenot 	int nid = -1;
10780db9360aSNathan Fontenot 
10790f16ef7fSNathan Fontenot 	drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
10800f16ef7fSNathan Fontenot 	if (!drconf_cell_cnt)
10810f16ef7fSNathan Fontenot 		return -1;
10820db9360aSNathan Fontenot 
1083*3fdfd990SBenjamin Herrenschmidt 	lmb_size = of_get_lmb_size(memory);
1084*3fdfd990SBenjamin Herrenschmidt 	if (!lmb_size)
10850f16ef7fSNathan Fontenot 		return -1;
10860db9360aSNathan Fontenot 
10870db9360aSNathan Fontenot 	rc = of_get_assoc_arrays(memory, &aa);
10880db9360aSNathan Fontenot 	if (rc)
10890f16ef7fSNathan Fontenot 		return -1;
10900db9360aSNathan Fontenot 
10910f16ef7fSNathan Fontenot 	for (; drconf_cell_cnt != 0; --drconf_cell_cnt) {
10920db9360aSNathan Fontenot 		struct of_drconf_cell drmem;
10930db9360aSNathan Fontenot 
10940db9360aSNathan Fontenot 		read_drconf_cell(&drmem, &dm);
10950db9360aSNathan Fontenot 
10960db9360aSNathan Fontenot 		/* skip this block if it is reserved or not assigned to
10970db9360aSNathan Fontenot 		 * this partition */
10980db9360aSNathan Fontenot 		if ((drmem.flags & DRCONF_MEM_RESERVED)
10990db9360aSNathan Fontenot 		    || !(drmem.flags & DRCONF_MEM_ASSIGNED))
11000db9360aSNathan Fontenot 			continue;
11010db9360aSNathan Fontenot 
11020f16ef7fSNathan Fontenot 		if ((scn_addr < drmem.base_addr)
1103*3fdfd990SBenjamin Herrenschmidt 		    || (scn_addr >= (drmem.base_addr + lmb_size)))
11040f16ef7fSNathan Fontenot 			continue;
11050db9360aSNathan Fontenot 
11060f16ef7fSNathan Fontenot 		nid = of_drconf_to_nid_single(&drmem, &aa);
11070f16ef7fSNathan Fontenot 		break;
11080db9360aSNathan Fontenot 	}
11090db9360aSNathan Fontenot 
11100f16ef7fSNathan Fontenot 	return nid;
11110db9360aSNathan Fontenot }
11120db9360aSNathan Fontenot 
11130db9360aSNathan Fontenot /*
11140f16ef7fSNathan Fontenot  * Find the node associated with a hot added memory section for memory
11150f16ef7fSNathan Fontenot  * represented in the device tree as a node (i.e. memory@XXXX) for
111695f72d1eSYinghai Lu  * each memblock.
1117237a0989SMike Kravetz  */
11180f16ef7fSNathan Fontenot int hot_add_node_scn_to_nid(unsigned long scn_addr)
1119237a0989SMike Kravetz {
1120237a0989SMike Kravetz 	struct device_node *memory = NULL;
11210f16ef7fSNathan Fontenot 	int nid = -1;
1122237a0989SMike Kravetz 
1123237a0989SMike Kravetz 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
1124237a0989SMike Kravetz 		unsigned long start, size;
1125b226e462SMike Kravetz 		int ranges;
1126a7f67bdfSJeremy Kerr 		const unsigned int *memcell_buf;
1127237a0989SMike Kravetz 		unsigned int len;
1128237a0989SMike Kravetz 
1129e2eb6392SStephen Rothwell 		memcell_buf = of_get_property(memory, "reg", &len);
1130237a0989SMike Kravetz 		if (!memcell_buf || len <= 0)
1131237a0989SMike Kravetz 			continue;
1132237a0989SMike Kravetz 
1133cc5d0189SBenjamin Herrenschmidt 		/* ranges in cell */
1134cc5d0189SBenjamin Herrenschmidt 		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
11350f16ef7fSNathan Fontenot 
11360f16ef7fSNathan Fontenot 		while (ranges--) {
1137237a0989SMike Kravetz 			start = read_n_cells(n_mem_addr_cells, &memcell_buf);
1138237a0989SMike Kravetz 			size = read_n_cells(n_mem_size_cells, &memcell_buf);
1139237a0989SMike Kravetz 
11400f16ef7fSNathan Fontenot 			if ((scn_addr < start) || (scn_addr >= (start + size)))
11410f16ef7fSNathan Fontenot 				continue;
11420f16ef7fSNathan Fontenot 
11430f16ef7fSNathan Fontenot 			nid = of_node_to_nid_single(memory);
11440f16ef7fSNathan Fontenot 			break;
11450f16ef7fSNathan Fontenot 		}
11460f16ef7fSNathan Fontenot 
1147237a0989SMike Kravetz 		of_node_put(memory);
11480f16ef7fSNathan Fontenot 		if (nid >= 0)
11490f16ef7fSNathan Fontenot 			break;
11500f16ef7fSNathan Fontenot 	}
11510f16ef7fSNathan Fontenot 
11520db9360aSNathan Fontenot 	return nid;
1153237a0989SMike Kravetz }
1154237a0989SMike Kravetz 
11550f16ef7fSNathan Fontenot /*
11560f16ef7fSNathan Fontenot  * Find the node associated with a hot added memory section.  Section
115795f72d1eSYinghai Lu  * corresponds to a SPARSEMEM section, not an MEMBLOCK.  It is assumed that
115895f72d1eSYinghai Lu  * sections are fully contained within a single MEMBLOCK.
11590f16ef7fSNathan Fontenot  */
11600f16ef7fSNathan Fontenot int hot_add_scn_to_nid(unsigned long scn_addr)
11610f16ef7fSNathan Fontenot {
11620f16ef7fSNathan Fontenot 	struct device_node *memory = NULL;
11630f16ef7fSNathan Fontenot 	int nid, found = 0;
11640f16ef7fSNathan Fontenot 
11650f16ef7fSNathan Fontenot 	if (!numa_enabled || (min_common_depth < 0))
116672c33688SH Hartley Sweeten 		return first_online_node;
11670f16ef7fSNathan Fontenot 
11680f16ef7fSNathan Fontenot 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
11690f16ef7fSNathan Fontenot 	if (memory) {
11700f16ef7fSNathan Fontenot 		nid = hot_add_drconf_scn_to_nid(memory, scn_addr);
11710f16ef7fSNathan Fontenot 		of_node_put(memory);
11720f16ef7fSNathan Fontenot 	} else {
11730f16ef7fSNathan Fontenot 		nid = hot_add_node_scn_to_nid(scn_addr);
1174237a0989SMike Kravetz 	}
11750f16ef7fSNathan Fontenot 
11760f16ef7fSNathan Fontenot 	if (nid < 0 || !node_online(nid))
117772c33688SH Hartley Sweeten 		nid = first_online_node;
11780f16ef7fSNathan Fontenot 
11790f16ef7fSNathan Fontenot 	if (NODE_DATA(nid)->node_spanned_pages)
11800f16ef7fSNathan Fontenot 		return nid;
11810f16ef7fSNathan Fontenot 
11820f16ef7fSNathan Fontenot 	for_each_online_node(nid) {
11830f16ef7fSNathan Fontenot 		if (NODE_DATA(nid)->node_spanned_pages) {
11840f16ef7fSNathan Fontenot 			found = 1;
11850f16ef7fSNathan Fontenot 			break;
1186237a0989SMike Kravetz 		}
11870f16ef7fSNathan Fontenot 	}
11880f16ef7fSNathan Fontenot 
11890f16ef7fSNathan Fontenot 	BUG_ON(!found);
11900f16ef7fSNathan Fontenot 	return nid;
11910f16ef7fSNathan Fontenot }
11920f16ef7fSNathan Fontenot 
1193237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */
1194