xref: /linux/arch/powerpc/mm/numa.c (revision 28e86bdbc9863d3d81711db02abedbc54528099e)
1ab1f9dacSPaul Mackerras /*
2ab1f9dacSPaul Mackerras  * pSeries NUMA support
3ab1f9dacSPaul Mackerras  *
4ab1f9dacSPaul Mackerras  * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
5ab1f9dacSPaul Mackerras  *
6ab1f9dacSPaul Mackerras  * This program is free software; you can redistribute it and/or
7ab1f9dacSPaul Mackerras  * modify it under the terms of the GNU General Public License
8ab1f9dacSPaul Mackerras  * as published by the Free Software Foundation; either version
9ab1f9dacSPaul Mackerras  * 2 of the License, or (at your option) any later version.
10ab1f9dacSPaul Mackerras  */
11ab1f9dacSPaul Mackerras #include <linux/threads.h>
12ab1f9dacSPaul Mackerras #include <linux/bootmem.h>
13ab1f9dacSPaul Mackerras #include <linux/init.h>
14ab1f9dacSPaul Mackerras #include <linux/mm.h>
15ab1f9dacSPaul Mackerras #include <linux/mmzone.h>
164b16f8e2SPaul Gortmaker #include <linux/export.h>
17ab1f9dacSPaul Mackerras #include <linux/nodemask.h>
18ab1f9dacSPaul Mackerras #include <linux/cpu.h>
19ab1f9dacSPaul Mackerras #include <linux/notifier.h>
2095f72d1eSYinghai Lu #include <linux/memblock.h>
216df1646eSMichael Ellerman #include <linux/of.h>
2206eccea6SDave Hansen #include <linux/pfn.h>
239eff1a38SJesse Larrew #include <linux/cpuset.h>
249eff1a38SJesse Larrew #include <linux/node.h>
2545fb6ceaSAnton Blanchard #include <asm/sparsemem.h>
26d9b2b2a2SDavid S. Miller #include <asm/prom.h>
27cf00a8d1SPaul Mackerras #include <asm/system.h>
282249ca9dSPaul Mackerras #include <asm/smp.h>
299eff1a38SJesse Larrew #include <asm/firmware.h>
309eff1a38SJesse Larrew #include <asm/paca.h>
3139bf990eSJesse Larrew #include <asm/hvcall.h>
32ab1f9dacSPaul Mackerras 
33ab1f9dacSPaul Mackerras static int numa_enabled = 1;
34ab1f9dacSPaul Mackerras 
351daa6d08SBalbir Singh static char *cmdline __initdata;
361daa6d08SBalbir Singh 
37ab1f9dacSPaul Mackerras static int numa_debug;
38ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
39ab1f9dacSPaul Mackerras 
4045fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS];
4125863de0SAnton Blanchard cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
42ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES];
4345fb6ceaSAnton Blanchard 
4445fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table);
4525863de0SAnton Blanchard EXPORT_SYMBOL(node_to_cpumask_map);
4645fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data);
4745fb6ceaSAnton Blanchard 
48ab1f9dacSPaul Mackerras static int min_common_depth;
49237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells;
5041eab6f8SAnton Blanchard static int form1_affinity;
5141eab6f8SAnton Blanchard 
5241eab6f8SAnton Blanchard #define MAX_DISTANCE_REF_POINTS 4
5341eab6f8SAnton Blanchard static int distance_ref_points_depth;
5441eab6f8SAnton Blanchard static const unsigned int *distance_ref_points;
5541eab6f8SAnton Blanchard static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
56ab1f9dacSPaul Mackerras 
5725863de0SAnton Blanchard /*
5825863de0SAnton Blanchard  * Allocate node_to_cpumask_map based on number of available nodes
5925863de0SAnton Blanchard  * Requires node_possible_map to be valid.
6025863de0SAnton Blanchard  *
6125863de0SAnton Blanchard  * Note: node_to_cpumask() is not valid until after this is done.
6225863de0SAnton Blanchard  */
6325863de0SAnton Blanchard static void __init setup_node_to_cpumask_map(void)
6425863de0SAnton Blanchard {
6525863de0SAnton Blanchard 	unsigned int node, num = 0;
6625863de0SAnton Blanchard 
6725863de0SAnton Blanchard 	/* setup nr_node_ids if not done yet */
6825863de0SAnton Blanchard 	if (nr_node_ids == MAX_NUMNODES) {
6925863de0SAnton Blanchard 		for_each_node_mask(node, node_possible_map)
7025863de0SAnton Blanchard 			num = node;
7125863de0SAnton Blanchard 		nr_node_ids = num + 1;
7225863de0SAnton Blanchard 	}
7325863de0SAnton Blanchard 
7425863de0SAnton Blanchard 	/* allocate the map */
7525863de0SAnton Blanchard 	for (node = 0; node < nr_node_ids; node++)
7625863de0SAnton Blanchard 		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
7725863de0SAnton Blanchard 
7825863de0SAnton Blanchard 	/* cpumask_of_node() will now work */
7925863de0SAnton Blanchard 	dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
8025863de0SAnton Blanchard }
8125863de0SAnton Blanchard 
821daa6d08SBalbir Singh static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
831daa6d08SBalbir Singh 						unsigned int *nid)
841daa6d08SBalbir Singh {
851daa6d08SBalbir Singh 	unsigned long long mem;
861daa6d08SBalbir Singh 	char *p = cmdline;
871daa6d08SBalbir Singh 	static unsigned int fake_nid;
881daa6d08SBalbir Singh 	static unsigned long long curr_boundary;
891daa6d08SBalbir Singh 
901daa6d08SBalbir Singh 	/*
911daa6d08SBalbir Singh 	 * Modify node id, iff we started creating NUMA nodes
921daa6d08SBalbir Singh 	 * We want to continue from where we left of the last time
931daa6d08SBalbir Singh 	 */
941daa6d08SBalbir Singh 	if (fake_nid)
951daa6d08SBalbir Singh 		*nid = fake_nid;
961daa6d08SBalbir Singh 	/*
971daa6d08SBalbir Singh 	 * In case there are no more arguments to parse, the
981daa6d08SBalbir Singh 	 * node_id should be the same as the last fake node id
991daa6d08SBalbir Singh 	 * (we've handled this above).
1001daa6d08SBalbir Singh 	 */
1011daa6d08SBalbir Singh 	if (!p)
1021daa6d08SBalbir Singh 		return 0;
1031daa6d08SBalbir Singh 
1041daa6d08SBalbir Singh 	mem = memparse(p, &p);
1051daa6d08SBalbir Singh 	if (!mem)
1061daa6d08SBalbir Singh 		return 0;
1071daa6d08SBalbir Singh 
1081daa6d08SBalbir Singh 	if (mem < curr_boundary)
1091daa6d08SBalbir Singh 		return 0;
1101daa6d08SBalbir Singh 
1111daa6d08SBalbir Singh 	curr_boundary = mem;
1121daa6d08SBalbir Singh 
1131daa6d08SBalbir Singh 	if ((end_pfn << PAGE_SHIFT) > mem) {
1141daa6d08SBalbir Singh 		/*
1151daa6d08SBalbir Singh 		 * Skip commas and spaces
1161daa6d08SBalbir Singh 		 */
1171daa6d08SBalbir Singh 		while (*p == ',' || *p == ' ' || *p == '\t')
1181daa6d08SBalbir Singh 			p++;
1191daa6d08SBalbir Singh 
1201daa6d08SBalbir Singh 		cmdline = p;
1211daa6d08SBalbir Singh 		fake_nid++;
1221daa6d08SBalbir Singh 		*nid = fake_nid;
1231daa6d08SBalbir Singh 		dbg("created new fake_node with id %d\n", fake_nid);
1241daa6d08SBalbir Singh 		return 1;
1251daa6d08SBalbir Singh 	}
1261daa6d08SBalbir Singh 	return 0;
1271daa6d08SBalbir Singh }
1281daa6d08SBalbir Singh 
1298f64e1f2SJon Tollefson /*
1308f64e1f2SJon Tollefson  * get_active_region_work_fn - A helper function for get_node_active_region
1318f64e1f2SJon Tollefson  *	Returns datax set to the start_pfn and end_pfn if they contain
1328f64e1f2SJon Tollefson  *	the initial value of datax->start_pfn between them
1338f64e1f2SJon Tollefson  * @start_pfn: start page(inclusive) of region to check
1348f64e1f2SJon Tollefson  * @end_pfn: end page(exclusive) of region to check
1358f64e1f2SJon Tollefson  * @datax: comes in with ->start_pfn set to value to search for and
1368f64e1f2SJon Tollefson  *	goes out with active range if it contains it
1378f64e1f2SJon Tollefson  * Returns 1 if search value is in range else 0
1388f64e1f2SJon Tollefson  */
1398f64e1f2SJon Tollefson static int __init get_active_region_work_fn(unsigned long start_pfn,
1408f64e1f2SJon Tollefson 					unsigned long end_pfn, void *datax)
1418f64e1f2SJon Tollefson {
1428f64e1f2SJon Tollefson 	struct node_active_region *data;
1438f64e1f2SJon Tollefson 	data = (struct node_active_region *)datax;
1448f64e1f2SJon Tollefson 
1458f64e1f2SJon Tollefson 	if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) {
1468f64e1f2SJon Tollefson 		data->start_pfn = start_pfn;
1478f64e1f2SJon Tollefson 		data->end_pfn = end_pfn;
1488f64e1f2SJon Tollefson 		return 1;
1498f64e1f2SJon Tollefson 	}
1508f64e1f2SJon Tollefson 	return 0;
1518f64e1f2SJon Tollefson 
1528f64e1f2SJon Tollefson }
1538f64e1f2SJon Tollefson 
1548f64e1f2SJon Tollefson /*
1558f64e1f2SJon Tollefson  * get_node_active_region - Return active region containing start_pfn
156e8170372SJon Tollefson  * Active range returned is empty if none found.
1578f64e1f2SJon Tollefson  * @start_pfn: The page to return the region for.
1588f64e1f2SJon Tollefson  * @node_ar: Returned set to the active region containing start_pfn
1598f64e1f2SJon Tollefson  */
1608f64e1f2SJon Tollefson static void __init get_node_active_region(unsigned long start_pfn,
1618f64e1f2SJon Tollefson 		       struct node_active_region *node_ar)
1628f64e1f2SJon Tollefson {
1638f64e1f2SJon Tollefson 	int nid = early_pfn_to_nid(start_pfn);
1648f64e1f2SJon Tollefson 
1658f64e1f2SJon Tollefson 	node_ar->nid = nid;
1668f64e1f2SJon Tollefson 	node_ar->start_pfn = start_pfn;
167e8170372SJon Tollefson 	node_ar->end_pfn = start_pfn;
1688f64e1f2SJon Tollefson 	work_with_active_regions(nid, get_active_region_work_fn, node_ar);
1698f64e1f2SJon Tollefson }
1708f64e1f2SJon Tollefson 
17139bf990eSJesse Larrew static void map_cpu_to_node(int cpu, int node)
172ab1f9dacSPaul Mackerras {
173ab1f9dacSPaul Mackerras 	numa_cpu_lookup_table[cpu] = node;
17445fb6ceaSAnton Blanchard 
175bf4b85b0SNathan Lynch 	dbg("adding cpu %d to node %d\n", cpu, node);
176bf4b85b0SNathan Lynch 
17725863de0SAnton Blanchard 	if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node])))
17825863de0SAnton Blanchard 		cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
179ab1f9dacSPaul Mackerras }
180ab1f9dacSPaul Mackerras 
18139bf990eSJesse Larrew #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
182ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu)
183ab1f9dacSPaul Mackerras {
184ab1f9dacSPaul Mackerras 	int node = numa_cpu_lookup_table[cpu];
185ab1f9dacSPaul Mackerras 
186ab1f9dacSPaul Mackerras 	dbg("removing cpu %lu from node %d\n", cpu, node);
187ab1f9dacSPaul Mackerras 
18825863de0SAnton Blanchard 	if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
189429f4d8dSAnton Blanchard 		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
190ab1f9dacSPaul Mackerras 	} else {
191ab1f9dacSPaul Mackerras 		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
192ab1f9dacSPaul Mackerras 		       cpu, node);
193ab1f9dacSPaul Mackerras 	}
194ab1f9dacSPaul Mackerras }
19539bf990eSJesse Larrew #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
196ab1f9dacSPaul Mackerras 
197ab1f9dacSPaul Mackerras /* must hold reference to node during call */
198a7f67bdfSJeremy Kerr static const int *of_get_associativity(struct device_node *dev)
199ab1f9dacSPaul Mackerras {
200e2eb6392SStephen Rothwell 	return of_get_property(dev, "ibm,associativity", NULL);
201ab1f9dacSPaul Mackerras }
202ab1f9dacSPaul Mackerras 
203cf00085dSChandru /*
204cf00085dSChandru  * Returns the property linux,drconf-usable-memory if
205cf00085dSChandru  * it exists (the property exists only in kexec/kdump kernels,
206cf00085dSChandru  * added by kexec-tools)
207cf00085dSChandru  */
208cf00085dSChandru static const u32 *of_get_usable_memory(struct device_node *memory)
209cf00085dSChandru {
210cf00085dSChandru 	const u32 *prop;
211cf00085dSChandru 	u32 len;
212cf00085dSChandru 	prop = of_get_property(memory, "linux,drconf-usable-memory", &len);
213cf00085dSChandru 	if (!prop || len < sizeof(unsigned int))
214cf00085dSChandru 		return 0;
215cf00085dSChandru 	return prop;
216cf00085dSChandru }
217cf00085dSChandru 
21841eab6f8SAnton Blanchard int __node_distance(int a, int b)
21941eab6f8SAnton Blanchard {
22041eab6f8SAnton Blanchard 	int i;
22141eab6f8SAnton Blanchard 	int distance = LOCAL_DISTANCE;
22241eab6f8SAnton Blanchard 
22341eab6f8SAnton Blanchard 	if (!form1_affinity)
22441eab6f8SAnton Blanchard 		return distance;
22541eab6f8SAnton Blanchard 
22641eab6f8SAnton Blanchard 	for (i = 0; i < distance_ref_points_depth; i++) {
22741eab6f8SAnton Blanchard 		if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
22841eab6f8SAnton Blanchard 			break;
22941eab6f8SAnton Blanchard 
23041eab6f8SAnton Blanchard 		/* Double the distance for each NUMA level */
23141eab6f8SAnton Blanchard 		distance *= 2;
23241eab6f8SAnton Blanchard 	}
23341eab6f8SAnton Blanchard 
23441eab6f8SAnton Blanchard 	return distance;
23541eab6f8SAnton Blanchard }
23641eab6f8SAnton Blanchard 
23741eab6f8SAnton Blanchard static void initialize_distance_lookup_table(int nid,
23841eab6f8SAnton Blanchard 		const unsigned int *associativity)
23941eab6f8SAnton Blanchard {
24041eab6f8SAnton Blanchard 	int i;
24141eab6f8SAnton Blanchard 
24241eab6f8SAnton Blanchard 	if (!form1_affinity)
24341eab6f8SAnton Blanchard 		return;
24441eab6f8SAnton Blanchard 
24541eab6f8SAnton Blanchard 	for (i = 0; i < distance_ref_points_depth; i++) {
24641eab6f8SAnton Blanchard 		distance_lookup_table[nid][i] =
24741eab6f8SAnton Blanchard 			associativity[distance_ref_points[i]];
24841eab6f8SAnton Blanchard 	}
24941eab6f8SAnton Blanchard }
25041eab6f8SAnton Blanchard 
251482ec7c4SNathan Lynch /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
252482ec7c4SNathan Lynch  * info is found.
253482ec7c4SNathan Lynch  */
2549eff1a38SJesse Larrew static int associativity_to_nid(const unsigned int *associativity)
255ab1f9dacSPaul Mackerras {
256482ec7c4SNathan Lynch 	int nid = -1;
257ab1f9dacSPaul Mackerras 
258ab1f9dacSPaul Mackerras 	if (min_common_depth == -1)
259482ec7c4SNathan Lynch 		goto out;
260ab1f9dacSPaul Mackerras 
2619eff1a38SJesse Larrew 	if (associativity[0] >= min_common_depth)
2629eff1a38SJesse Larrew 		nid = associativity[min_common_depth];
263bc16a759SNathan Lynch 
264bc16a759SNathan Lynch 	/* POWER4 LPAR uses 0xffff as invalid node */
265482ec7c4SNathan Lynch 	if (nid == 0xffff || nid >= MAX_NUMNODES)
266482ec7c4SNathan Lynch 		nid = -1;
26741eab6f8SAnton Blanchard 
2689eff1a38SJesse Larrew 	if (nid > 0 && associativity[0] >= distance_ref_points_depth)
2699eff1a38SJesse Larrew 		initialize_distance_lookup_table(nid, associativity);
27041eab6f8SAnton Blanchard 
271482ec7c4SNathan Lynch out:
272cf950b7aSNathan Lynch 	return nid;
273ab1f9dacSPaul Mackerras }
274ab1f9dacSPaul Mackerras 
2759eff1a38SJesse Larrew /* Returns the nid associated with the given device tree node,
2769eff1a38SJesse Larrew  * or -1 if not found.
2779eff1a38SJesse Larrew  */
2789eff1a38SJesse Larrew static int of_node_to_nid_single(struct device_node *device)
2799eff1a38SJesse Larrew {
2809eff1a38SJesse Larrew 	int nid = -1;
2819eff1a38SJesse Larrew 	const unsigned int *tmp;
2829eff1a38SJesse Larrew 
2839eff1a38SJesse Larrew 	tmp = of_get_associativity(device);
2849eff1a38SJesse Larrew 	if (tmp)
2859eff1a38SJesse Larrew 		nid = associativity_to_nid(tmp);
2869eff1a38SJesse Larrew 	return nid;
2879eff1a38SJesse Larrew }
2889eff1a38SJesse Larrew 
289953039c8SJeremy Kerr /* Walk the device tree upwards, looking for an associativity id */
290953039c8SJeremy Kerr int of_node_to_nid(struct device_node *device)
291953039c8SJeremy Kerr {
292953039c8SJeremy Kerr 	struct device_node *tmp;
293953039c8SJeremy Kerr 	int nid = -1;
294953039c8SJeremy Kerr 
295953039c8SJeremy Kerr 	of_node_get(device);
296953039c8SJeremy Kerr 	while (device) {
297953039c8SJeremy Kerr 		nid = of_node_to_nid_single(device);
298953039c8SJeremy Kerr 		if (nid != -1)
299953039c8SJeremy Kerr 			break;
300953039c8SJeremy Kerr 
301953039c8SJeremy Kerr 	        tmp = device;
302953039c8SJeremy Kerr 		device = of_get_parent(tmp);
303953039c8SJeremy Kerr 		of_node_put(tmp);
304953039c8SJeremy Kerr 	}
305953039c8SJeremy Kerr 	of_node_put(device);
306953039c8SJeremy Kerr 
307953039c8SJeremy Kerr 	return nid;
308953039c8SJeremy Kerr }
309953039c8SJeremy Kerr EXPORT_SYMBOL_GPL(of_node_to_nid);
310953039c8SJeremy Kerr 
311ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void)
312ab1f9dacSPaul Mackerras {
31341eab6f8SAnton Blanchard 	int depth;
314bc8449ccSAnton Blanchard 	struct device_node *chosen;
315e70606ebSMichael Ellerman 	struct device_node *root;
316bc8449ccSAnton Blanchard 	const char *vec5;
317ab1f9dacSPaul Mackerras 
3181c8ee733SDipankar Sarma 	if (firmware_has_feature(FW_FEATURE_OPAL))
3191c8ee733SDipankar Sarma 		root = of_find_node_by_path("/ibm,opal");
3201c8ee733SDipankar Sarma 	else
321e70606ebSMichael Ellerman 		root = of_find_node_by_path("/rtas");
322e70606ebSMichael Ellerman 	if (!root)
323e70606ebSMichael Ellerman 		root = of_find_node_by_path("/");
324ab1f9dacSPaul Mackerras 
325ab1f9dacSPaul Mackerras 	/*
32641eab6f8SAnton Blanchard 	 * This property is a set of 32-bit integers, each representing
32741eab6f8SAnton Blanchard 	 * an index into the ibm,associativity nodes.
32841eab6f8SAnton Blanchard 	 *
32941eab6f8SAnton Blanchard 	 * With form 0 affinity the first integer is for an SMP configuration
33041eab6f8SAnton Blanchard 	 * (should be all 0's) and the second is for a normal NUMA
33141eab6f8SAnton Blanchard 	 * configuration. We have only one level of NUMA.
33241eab6f8SAnton Blanchard 	 *
33341eab6f8SAnton Blanchard 	 * With form 1 affinity the first integer is the most significant
33441eab6f8SAnton Blanchard 	 * NUMA boundary and the following are progressively less significant
33541eab6f8SAnton Blanchard 	 * boundaries. There can be more than one level of NUMA.
336ab1f9dacSPaul Mackerras 	 */
337e70606ebSMichael Ellerman 	distance_ref_points = of_get_property(root,
33841eab6f8SAnton Blanchard 					"ibm,associativity-reference-points",
33941eab6f8SAnton Blanchard 					&distance_ref_points_depth);
340ab1f9dacSPaul Mackerras 
34141eab6f8SAnton Blanchard 	if (!distance_ref_points) {
34241eab6f8SAnton Blanchard 		dbg("NUMA: ibm,associativity-reference-points not found.\n");
34341eab6f8SAnton Blanchard 		goto err;
34441eab6f8SAnton Blanchard 	}
34541eab6f8SAnton Blanchard 
34641eab6f8SAnton Blanchard 	distance_ref_points_depth /= sizeof(int);
34741eab6f8SAnton Blanchard 
348bc8449ccSAnton Blanchard #define VEC5_AFFINITY_BYTE	5
349bc8449ccSAnton Blanchard #define VEC5_AFFINITY		0x80
3501c8ee733SDipankar Sarma 
3511c8ee733SDipankar Sarma 	if (firmware_has_feature(FW_FEATURE_OPAL))
3521c8ee733SDipankar Sarma 		form1_affinity = 1;
3531c8ee733SDipankar Sarma 	else {
354bc8449ccSAnton Blanchard 		chosen = of_find_node_by_path("/chosen");
355bc8449ccSAnton Blanchard 		if (chosen) {
3561c8ee733SDipankar Sarma 			vec5 = of_get_property(chosen,
3571c8ee733SDipankar Sarma 					       "ibm,architecture-vec-5", NULL);
3581c8ee733SDipankar Sarma 			if (vec5 && (vec5[VEC5_AFFINITY_BYTE] &
3591c8ee733SDipankar Sarma 							VEC5_AFFINITY)) {
360bc8449ccSAnton Blanchard 				dbg("Using form 1 affinity\n");
36141eab6f8SAnton Blanchard 				form1_affinity = 1;
3624b83c330SAnton Blanchard 			}
363bc8449ccSAnton Blanchard 		}
3641c8ee733SDipankar Sarma 	}
3654b83c330SAnton Blanchard 
36641eab6f8SAnton Blanchard 	if (form1_affinity) {
36741eab6f8SAnton Blanchard 		depth = distance_ref_points[0];
368ab1f9dacSPaul Mackerras 	} else {
36941eab6f8SAnton Blanchard 		if (distance_ref_points_depth < 2) {
37041eab6f8SAnton Blanchard 			printk(KERN_WARNING "NUMA: "
37141eab6f8SAnton Blanchard 				"short ibm,associativity-reference-points\n");
37241eab6f8SAnton Blanchard 			goto err;
373ab1f9dacSPaul Mackerras 		}
374ab1f9dacSPaul Mackerras 
37541eab6f8SAnton Blanchard 		depth = distance_ref_points[1];
37641eab6f8SAnton Blanchard 	}
37741eab6f8SAnton Blanchard 
37841eab6f8SAnton Blanchard 	/*
37941eab6f8SAnton Blanchard 	 * Warn and cap if the hardware supports more than
38041eab6f8SAnton Blanchard 	 * MAX_DISTANCE_REF_POINTS domains.
38141eab6f8SAnton Blanchard 	 */
38241eab6f8SAnton Blanchard 	if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
38341eab6f8SAnton Blanchard 		printk(KERN_WARNING "NUMA: distance array capped at "
38441eab6f8SAnton Blanchard 			"%d entries\n", MAX_DISTANCE_REF_POINTS);
38541eab6f8SAnton Blanchard 		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
38641eab6f8SAnton Blanchard 	}
38741eab6f8SAnton Blanchard 
388e70606ebSMichael Ellerman 	of_node_put(root);
389ab1f9dacSPaul Mackerras 	return depth;
39041eab6f8SAnton Blanchard 
39141eab6f8SAnton Blanchard err:
392e70606ebSMichael Ellerman 	of_node_put(root);
39341eab6f8SAnton Blanchard 	return -1;
394ab1f9dacSPaul Mackerras }
395ab1f9dacSPaul Mackerras 
39684c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
397ab1f9dacSPaul Mackerras {
398ab1f9dacSPaul Mackerras 	struct device_node *memory = NULL;
399ab1f9dacSPaul Mackerras 
400ab1f9dacSPaul Mackerras 	memory = of_find_node_by_type(memory, "memory");
40154c23310SPaul Mackerras 	if (!memory)
40284c9fdd1SMike Kravetz 		panic("numa.c: No memory nodes found!");
40354c23310SPaul Mackerras 
404a8bda5ddSStephen Rothwell 	*n_addr_cells = of_n_addr_cells(memory);
4059213feeaSStephen Rothwell 	*n_size_cells = of_n_size_cells(memory);
40684c9fdd1SMike Kravetz 	of_node_put(memory);
407ab1f9dacSPaul Mackerras }
408ab1f9dacSPaul Mackerras 
409a7f67bdfSJeremy Kerr static unsigned long __devinit read_n_cells(int n, const unsigned int **buf)
410ab1f9dacSPaul Mackerras {
411ab1f9dacSPaul Mackerras 	unsigned long result = 0;
412ab1f9dacSPaul Mackerras 
413ab1f9dacSPaul Mackerras 	while (n--) {
414ab1f9dacSPaul Mackerras 		result = (result << 32) | **buf;
415ab1f9dacSPaul Mackerras 		(*buf)++;
416ab1f9dacSPaul Mackerras 	}
417ab1f9dacSPaul Mackerras 	return result;
418ab1f9dacSPaul Mackerras }
419ab1f9dacSPaul Mackerras 
4208342681dSNathan Fontenot struct of_drconf_cell {
4218342681dSNathan Fontenot 	u64	base_addr;
4228342681dSNathan Fontenot 	u32	drc_index;
4238342681dSNathan Fontenot 	u32	reserved;
4248342681dSNathan Fontenot 	u32	aa_index;
4258342681dSNathan Fontenot 	u32	flags;
4268342681dSNathan Fontenot };
4278342681dSNathan Fontenot 
4288342681dSNathan Fontenot #define DRCONF_MEM_ASSIGNED	0x00000008
4298342681dSNathan Fontenot #define DRCONF_MEM_AI_INVALID	0x00000040
4308342681dSNathan Fontenot #define DRCONF_MEM_RESERVED	0x00000080
4318342681dSNathan Fontenot 
4328342681dSNathan Fontenot /*
43395f72d1eSYinghai Lu  * Read the next memblock list entry from the ibm,dynamic-memory property
4348342681dSNathan Fontenot  * and return the information in the provided of_drconf_cell structure.
4358342681dSNathan Fontenot  */
4368342681dSNathan Fontenot static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp)
4378342681dSNathan Fontenot {
4388342681dSNathan Fontenot 	const u32 *cp;
4398342681dSNathan Fontenot 
4408342681dSNathan Fontenot 	drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp);
4418342681dSNathan Fontenot 
4428342681dSNathan Fontenot 	cp = *cellp;
4438342681dSNathan Fontenot 	drmem->drc_index = cp[0];
4448342681dSNathan Fontenot 	drmem->reserved = cp[1];
4458342681dSNathan Fontenot 	drmem->aa_index = cp[2];
4468342681dSNathan Fontenot 	drmem->flags = cp[3];
4478342681dSNathan Fontenot 
4488342681dSNathan Fontenot 	*cellp = cp + 4;
4498342681dSNathan Fontenot }
4508342681dSNathan Fontenot 
4518342681dSNathan Fontenot /*
45225985edcSLucas De Marchi  * Retrieve and validate the ibm,dynamic-memory property of the device tree.
4538342681dSNathan Fontenot  *
45495f72d1eSYinghai Lu  * The layout of the ibm,dynamic-memory property is a number N of memblock
45595f72d1eSYinghai Lu  * list entries followed by N memblock list entries.  Each memblock list entry
45625985edcSLucas De Marchi  * contains information as laid out in the of_drconf_cell struct above.
4578342681dSNathan Fontenot  */
4588342681dSNathan Fontenot static int of_get_drconf_memory(struct device_node *memory, const u32 **dm)
4598342681dSNathan Fontenot {
4608342681dSNathan Fontenot 	const u32 *prop;
4618342681dSNathan Fontenot 	u32 len, entries;
4628342681dSNathan Fontenot 
4638342681dSNathan Fontenot 	prop = of_get_property(memory, "ibm,dynamic-memory", &len);
4648342681dSNathan Fontenot 	if (!prop || len < sizeof(unsigned int))
4658342681dSNathan Fontenot 		return 0;
4668342681dSNathan Fontenot 
4678342681dSNathan Fontenot 	entries = *prop++;
4688342681dSNathan Fontenot 
4698342681dSNathan Fontenot 	/* Now that we know the number of entries, revalidate the size
4708342681dSNathan Fontenot 	 * of the property read in to ensure we have everything
4718342681dSNathan Fontenot 	 */
4728342681dSNathan Fontenot 	if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int))
4738342681dSNathan Fontenot 		return 0;
4748342681dSNathan Fontenot 
4758342681dSNathan Fontenot 	*dm = prop;
4768342681dSNathan Fontenot 	return entries;
4778342681dSNathan Fontenot }
4788342681dSNathan Fontenot 
4798342681dSNathan Fontenot /*
48025985edcSLucas De Marchi  * Retrieve and validate the ibm,lmb-size property for drconf memory
4818342681dSNathan Fontenot  * from the device tree.
4828342681dSNathan Fontenot  */
4833fdfd990SBenjamin Herrenschmidt static u64 of_get_lmb_size(struct device_node *memory)
4848342681dSNathan Fontenot {
4858342681dSNathan Fontenot 	const u32 *prop;
4868342681dSNathan Fontenot 	u32 len;
4878342681dSNathan Fontenot 
4883fdfd990SBenjamin Herrenschmidt 	prop = of_get_property(memory, "ibm,lmb-size", &len);
4898342681dSNathan Fontenot 	if (!prop || len < sizeof(unsigned int))
4908342681dSNathan Fontenot 		return 0;
4918342681dSNathan Fontenot 
4928342681dSNathan Fontenot 	return read_n_cells(n_mem_size_cells, &prop);
4938342681dSNathan Fontenot }
4948342681dSNathan Fontenot 
4958342681dSNathan Fontenot struct assoc_arrays {
4968342681dSNathan Fontenot 	u32	n_arrays;
4978342681dSNathan Fontenot 	u32	array_sz;
4988342681dSNathan Fontenot 	const u32 *arrays;
4998342681dSNathan Fontenot };
5008342681dSNathan Fontenot 
5018342681dSNathan Fontenot /*
50225985edcSLucas De Marchi  * Retrieve and validate the list of associativity arrays for drconf
5038342681dSNathan Fontenot  * memory from the ibm,associativity-lookup-arrays property of the
5048342681dSNathan Fontenot  * device tree..
5058342681dSNathan Fontenot  *
5068342681dSNathan Fontenot  * The layout of the ibm,associativity-lookup-arrays property is a number N
5078342681dSNathan Fontenot  * indicating the number of associativity arrays, followed by a number M
5088342681dSNathan Fontenot  * indicating the size of each associativity array, followed by a list
5098342681dSNathan Fontenot  * of N associativity arrays.
5108342681dSNathan Fontenot  */
5118342681dSNathan Fontenot static int of_get_assoc_arrays(struct device_node *memory,
5128342681dSNathan Fontenot 			       struct assoc_arrays *aa)
5138342681dSNathan Fontenot {
5148342681dSNathan Fontenot 	const u32 *prop;
5158342681dSNathan Fontenot 	u32 len;
5168342681dSNathan Fontenot 
5178342681dSNathan Fontenot 	prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
5188342681dSNathan Fontenot 	if (!prop || len < 2 * sizeof(unsigned int))
5198342681dSNathan Fontenot 		return -1;
5208342681dSNathan Fontenot 
5218342681dSNathan Fontenot 	aa->n_arrays = *prop++;
5228342681dSNathan Fontenot 	aa->array_sz = *prop++;
5238342681dSNathan Fontenot 
5248342681dSNathan Fontenot 	/* Now that we know the number of arrrays and size of each array,
5258342681dSNathan Fontenot 	 * revalidate the size of the property read in.
5268342681dSNathan Fontenot 	 */
5278342681dSNathan Fontenot 	if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int))
5288342681dSNathan Fontenot 		return -1;
5298342681dSNathan Fontenot 
5308342681dSNathan Fontenot 	aa->arrays = prop;
5318342681dSNathan Fontenot 	return 0;
5328342681dSNathan Fontenot }
5338342681dSNathan Fontenot 
5348342681dSNathan Fontenot /*
5358342681dSNathan Fontenot  * This is like of_node_to_nid_single() for memory represented in the
5368342681dSNathan Fontenot  * ibm,dynamic-reconfiguration-memory node.
5378342681dSNathan Fontenot  */
5388342681dSNathan Fontenot static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
5398342681dSNathan Fontenot 				   struct assoc_arrays *aa)
5408342681dSNathan Fontenot {
5418342681dSNathan Fontenot 	int default_nid = 0;
5428342681dSNathan Fontenot 	int nid = default_nid;
5438342681dSNathan Fontenot 	int index;
5448342681dSNathan Fontenot 
5458342681dSNathan Fontenot 	if (min_common_depth > 0 && min_common_depth <= aa->array_sz &&
5468342681dSNathan Fontenot 	    !(drmem->flags & DRCONF_MEM_AI_INVALID) &&
5478342681dSNathan Fontenot 	    drmem->aa_index < aa->n_arrays) {
5488342681dSNathan Fontenot 		index = drmem->aa_index * aa->array_sz + min_common_depth - 1;
5498342681dSNathan Fontenot 		nid = aa->arrays[index];
5508342681dSNathan Fontenot 
5518342681dSNathan Fontenot 		if (nid == 0xffff || nid >= MAX_NUMNODES)
5528342681dSNathan Fontenot 			nid = default_nid;
5538342681dSNathan Fontenot 	}
5548342681dSNathan Fontenot 
5558342681dSNathan Fontenot 	return nid;
5568342681dSNathan Fontenot }
5578342681dSNathan Fontenot 
558ab1f9dacSPaul Mackerras /*
559ab1f9dacSPaul Mackerras  * Figure out to which domain a cpu belongs and stick it there.
560ab1f9dacSPaul Mackerras  * Return the id of the domain used.
561ab1f9dacSPaul Mackerras  */
5622e5ce39dSNathan Lynch static int __cpuinit numa_setup_cpu(unsigned long lcpu)
563ab1f9dacSPaul Mackerras {
564cf950b7aSNathan Lynch 	int nid = 0;
5658b16cd23SMilton Miller 	struct device_node *cpu = of_get_cpu_node(lcpu, NULL);
566ab1f9dacSPaul Mackerras 
567ab1f9dacSPaul Mackerras 	if (!cpu) {
568ab1f9dacSPaul Mackerras 		WARN_ON(1);
569ab1f9dacSPaul Mackerras 		goto out;
570ab1f9dacSPaul Mackerras 	}
571ab1f9dacSPaul Mackerras 
572953039c8SJeremy Kerr 	nid = of_node_to_nid_single(cpu);
573ab1f9dacSPaul Mackerras 
574482ec7c4SNathan Lynch 	if (nid < 0 || !node_online(nid))
57572c33688SH Hartley Sweeten 		nid = first_online_node;
576ab1f9dacSPaul Mackerras out:
577cf950b7aSNathan Lynch 	map_cpu_to_node(lcpu, nid);
578ab1f9dacSPaul Mackerras 
579ab1f9dacSPaul Mackerras 	of_node_put(cpu);
580ab1f9dacSPaul Mackerras 
581cf950b7aSNathan Lynch 	return nid;
582ab1f9dacSPaul Mackerras }
583ab1f9dacSPaul Mackerras 
58474b85f37SChandra Seetharaman static int __cpuinit cpu_numa_callback(struct notifier_block *nfb,
585ab1f9dacSPaul Mackerras 			     unsigned long action,
586ab1f9dacSPaul Mackerras 			     void *hcpu)
587ab1f9dacSPaul Mackerras {
588ab1f9dacSPaul Mackerras 	unsigned long lcpu = (unsigned long)hcpu;
589ab1f9dacSPaul Mackerras 	int ret = NOTIFY_DONE;
590ab1f9dacSPaul Mackerras 
591ab1f9dacSPaul Mackerras 	switch (action) {
592ab1f9dacSPaul Mackerras 	case CPU_UP_PREPARE:
5938bb78442SRafael J. Wysocki 	case CPU_UP_PREPARE_FROZEN:
594ab1f9dacSPaul Mackerras 		numa_setup_cpu(lcpu);
595ab1f9dacSPaul Mackerras 		ret = NOTIFY_OK;
596ab1f9dacSPaul Mackerras 		break;
597ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU
598ab1f9dacSPaul Mackerras 	case CPU_DEAD:
5998bb78442SRafael J. Wysocki 	case CPU_DEAD_FROZEN:
600ab1f9dacSPaul Mackerras 	case CPU_UP_CANCELED:
6018bb78442SRafael J. Wysocki 	case CPU_UP_CANCELED_FROZEN:
602ab1f9dacSPaul Mackerras 		unmap_cpu_from_node(lcpu);
603ab1f9dacSPaul Mackerras 		break;
604ab1f9dacSPaul Mackerras 		ret = NOTIFY_OK;
605ab1f9dacSPaul Mackerras #endif
606ab1f9dacSPaul Mackerras 	}
607ab1f9dacSPaul Mackerras 	return ret;
608ab1f9dacSPaul Mackerras }
609ab1f9dacSPaul Mackerras 
610ab1f9dacSPaul Mackerras /*
611ab1f9dacSPaul Mackerras  * Check and possibly modify a memory region to enforce the memory limit.
612ab1f9dacSPaul Mackerras  *
613ab1f9dacSPaul Mackerras  * Returns the size the region should have to enforce the memory limit.
614ab1f9dacSPaul Mackerras  * This will either be the original value of size, a truncated value,
615ab1f9dacSPaul Mackerras  * or zero. If the returned value of size is 0 the region should be
61625985edcSLucas De Marchi  * discarded as it lies wholly above the memory limit.
617ab1f9dacSPaul Mackerras  */
61845fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start,
61945fb6ceaSAnton Blanchard 						      unsigned long size)
620ab1f9dacSPaul Mackerras {
621ab1f9dacSPaul Mackerras 	/*
62295f72d1eSYinghai Lu 	 * We use memblock_end_of_DRAM() in here instead of memory_limit because
623ab1f9dacSPaul Mackerras 	 * we've already adjusted it for the limit and it takes care of
624fe55249dSMilton Miller 	 * having memory holes below the limit.  Also, in the case of
625fe55249dSMilton Miller 	 * iommu_is_off, memory_limit is not set but is implicitly enforced.
626ab1f9dacSPaul Mackerras 	 */
627ab1f9dacSPaul Mackerras 
62895f72d1eSYinghai Lu 	if (start + size <= memblock_end_of_DRAM())
629ab1f9dacSPaul Mackerras 		return size;
630ab1f9dacSPaul Mackerras 
63195f72d1eSYinghai Lu 	if (start >= memblock_end_of_DRAM())
632ab1f9dacSPaul Mackerras 		return 0;
633ab1f9dacSPaul Mackerras 
63495f72d1eSYinghai Lu 	return memblock_end_of_DRAM() - start;
635ab1f9dacSPaul Mackerras }
636ab1f9dacSPaul Mackerras 
6370204568aSPaul Mackerras /*
638cf00085dSChandru  * Reads the counter for a given entry in
639cf00085dSChandru  * linux,drconf-usable-memory property
640cf00085dSChandru  */
641cf00085dSChandru static inline int __init read_usm_ranges(const u32 **usm)
642cf00085dSChandru {
643cf00085dSChandru 	/*
6443fdfd990SBenjamin Herrenschmidt 	 * For each lmb in ibm,dynamic-memory a corresponding
645cf00085dSChandru 	 * entry in linux,drconf-usable-memory property contains
646cf00085dSChandru 	 * a counter followed by that many (base, size) duple.
647cf00085dSChandru 	 * read the counter from linux,drconf-usable-memory
648cf00085dSChandru 	 */
649cf00085dSChandru 	return read_n_cells(n_mem_size_cells, usm);
650cf00085dSChandru }
651cf00085dSChandru 
652cf00085dSChandru /*
6530204568aSPaul Mackerras  * Extract NUMA information from the ibm,dynamic-reconfiguration-memory
6540204568aSPaul Mackerras  * node.  This assumes n_mem_{addr,size}_cells have been set.
6550204568aSPaul Mackerras  */
6560204568aSPaul Mackerras static void __init parse_drconf_memory(struct device_node *memory)
6570204568aSPaul Mackerras {
658cf00085dSChandru 	const u32 *dm, *usm;
659cf00085dSChandru 	unsigned int n, rc, ranges, is_kexec_kdump = 0;
6603fdfd990SBenjamin Herrenschmidt 	unsigned long lmb_size, base, size, sz;
6618342681dSNathan Fontenot 	int nid;
6628342681dSNathan Fontenot 	struct assoc_arrays aa;
6630204568aSPaul Mackerras 
6648342681dSNathan Fontenot 	n = of_get_drconf_memory(memory, &dm);
6658342681dSNathan Fontenot 	if (!n)
6660204568aSPaul Mackerras 		return;
6670204568aSPaul Mackerras 
6683fdfd990SBenjamin Herrenschmidt 	lmb_size = of_get_lmb_size(memory);
6693fdfd990SBenjamin Herrenschmidt 	if (!lmb_size)
6708342681dSNathan Fontenot 		return;
6718342681dSNathan Fontenot 
6728342681dSNathan Fontenot 	rc = of_get_assoc_arrays(memory, &aa);
6738342681dSNathan Fontenot 	if (rc)
6740204568aSPaul Mackerras 		return;
6750204568aSPaul Mackerras 
676cf00085dSChandru 	/* check if this is a kexec/kdump kernel */
677cf00085dSChandru 	usm = of_get_usable_memory(memory);
678cf00085dSChandru 	if (usm != NULL)
679cf00085dSChandru 		is_kexec_kdump = 1;
680cf00085dSChandru 
6810204568aSPaul Mackerras 	for (; n != 0; --n) {
6828342681dSNathan Fontenot 		struct of_drconf_cell drmem;
6831daa6d08SBalbir Singh 
6848342681dSNathan Fontenot 		read_drconf_cell(&drmem, &dm);
6858342681dSNathan Fontenot 
6868342681dSNathan Fontenot 		/* skip this block if the reserved bit is set in flags (0x80)
6878342681dSNathan Fontenot 		   or if the block is not assigned to this partition (0x8) */
6888342681dSNathan Fontenot 		if ((drmem.flags & DRCONF_MEM_RESERVED)
6898342681dSNathan Fontenot 		    || !(drmem.flags & DRCONF_MEM_ASSIGNED))
6908342681dSNathan Fontenot 			continue;
6918342681dSNathan Fontenot 
692cf00085dSChandru 		base = drmem.base_addr;
6933fdfd990SBenjamin Herrenschmidt 		size = lmb_size;
694cf00085dSChandru 		ranges = 1;
6958342681dSNathan Fontenot 
696cf00085dSChandru 		if (is_kexec_kdump) {
697cf00085dSChandru 			ranges = read_usm_ranges(&usm);
698cf00085dSChandru 			if (!ranges) /* there are no (base, size) duple */
6990204568aSPaul Mackerras 				continue;
700cf00085dSChandru 		}
701cf00085dSChandru 		do {
702cf00085dSChandru 			if (is_kexec_kdump) {
703cf00085dSChandru 				base = read_n_cells(n_mem_addr_cells, &usm);
704cf00085dSChandru 				size = read_n_cells(n_mem_size_cells, &usm);
705cf00085dSChandru 			}
706cf00085dSChandru 			nid = of_drconf_to_nid_single(&drmem, &aa);
707cf00085dSChandru 			fake_numa_create_new_node(
708cf00085dSChandru 				((base + size) >> PAGE_SHIFT),
709cf00085dSChandru 					   &nid);
710cf00085dSChandru 			node_set_online(nid);
711cf00085dSChandru 			sz = numa_enforce_memory_limit(base, size);
712cf00085dSChandru 			if (sz)
713cf00085dSChandru 				add_active_range(nid, base >> PAGE_SHIFT,
714cf00085dSChandru 						 (base >> PAGE_SHIFT)
715cf00085dSChandru 						 + (sz >> PAGE_SHIFT));
716cf00085dSChandru 		} while (--ranges);
7170204568aSPaul Mackerras 	}
7180204568aSPaul Mackerras }
7190204568aSPaul Mackerras 
720ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void)
721ab1f9dacSPaul Mackerras {
72294db7c5eSAnton Blanchard 	struct device_node *memory;
723482ec7c4SNathan Lynch 	int default_nid = 0;
724ab1f9dacSPaul Mackerras 	unsigned long i;
725ab1f9dacSPaul Mackerras 
726ab1f9dacSPaul Mackerras 	if (numa_enabled == 0) {
727ab1f9dacSPaul Mackerras 		printk(KERN_WARNING "NUMA disabled by user\n");
728ab1f9dacSPaul Mackerras 		return -1;
729ab1f9dacSPaul Mackerras 	}
730ab1f9dacSPaul Mackerras 
731ab1f9dacSPaul Mackerras 	min_common_depth = find_min_common_depth();
732ab1f9dacSPaul Mackerras 
733ab1f9dacSPaul Mackerras 	if (min_common_depth < 0)
734ab1f9dacSPaul Mackerras 		return min_common_depth;
735ab1f9dacSPaul Mackerras 
736bf4b85b0SNathan Lynch 	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
737bf4b85b0SNathan Lynch 
738ab1f9dacSPaul Mackerras 	/*
739482ec7c4SNathan Lynch 	 * Even though we connect cpus to numa domains later in SMP
740482ec7c4SNathan Lynch 	 * init, we need to know the node ids now. This is because
741482ec7c4SNathan Lynch 	 * each node to be onlined must have NODE_DATA etc backing it.
742ab1f9dacSPaul Mackerras 	 */
743482ec7c4SNathan Lynch 	for_each_present_cpu(i) {
744dfbe93a2SAnton Blanchard 		struct device_node *cpu;
745cf950b7aSNathan Lynch 		int nid;
746ab1f9dacSPaul Mackerras 
7478b16cd23SMilton Miller 		cpu = of_get_cpu_node(i, NULL);
748482ec7c4SNathan Lynch 		BUG_ON(!cpu);
749953039c8SJeremy Kerr 		nid = of_node_to_nid_single(cpu);
750ab1f9dacSPaul Mackerras 		of_node_put(cpu);
751ab1f9dacSPaul Mackerras 
752482ec7c4SNathan Lynch 		/*
753482ec7c4SNathan Lynch 		 * Don't fall back to default_nid yet -- we will plug
754482ec7c4SNathan Lynch 		 * cpus into nodes once the memory scan has discovered
755482ec7c4SNathan Lynch 		 * the topology.
756482ec7c4SNathan Lynch 		 */
757482ec7c4SNathan Lynch 		if (nid < 0)
758482ec7c4SNathan Lynch 			continue;
759482ec7c4SNathan Lynch 		node_set_online(nid);
760ab1f9dacSPaul Mackerras 	}
761ab1f9dacSPaul Mackerras 
762237a0989SMike Kravetz 	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
76394db7c5eSAnton Blanchard 
76494db7c5eSAnton Blanchard 	for_each_node_by_type(memory, "memory") {
765ab1f9dacSPaul Mackerras 		unsigned long start;
766ab1f9dacSPaul Mackerras 		unsigned long size;
767cf950b7aSNathan Lynch 		int nid;
768ab1f9dacSPaul Mackerras 		int ranges;
769a7f67bdfSJeremy Kerr 		const unsigned int *memcell_buf;
770ab1f9dacSPaul Mackerras 		unsigned int len;
771ab1f9dacSPaul Mackerras 
772e2eb6392SStephen Rothwell 		memcell_buf = of_get_property(memory,
773ba759485SMichael Ellerman 			"linux,usable-memory", &len);
774ba759485SMichael Ellerman 		if (!memcell_buf || len <= 0)
775e2eb6392SStephen Rothwell 			memcell_buf = of_get_property(memory, "reg", &len);
776ab1f9dacSPaul Mackerras 		if (!memcell_buf || len <= 0)
777ab1f9dacSPaul Mackerras 			continue;
778ab1f9dacSPaul Mackerras 
779cc5d0189SBenjamin Herrenschmidt 		/* ranges in cell */
780cc5d0189SBenjamin Herrenschmidt 		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
781ab1f9dacSPaul Mackerras new_range:
782ab1f9dacSPaul Mackerras 		/* these are order-sensitive, and modify the buffer pointer */
783237a0989SMike Kravetz 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
784237a0989SMike Kravetz 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
785ab1f9dacSPaul Mackerras 
786482ec7c4SNathan Lynch 		/*
787482ec7c4SNathan Lynch 		 * Assumption: either all memory nodes or none will
788482ec7c4SNathan Lynch 		 * have associativity properties.  If none, then
789482ec7c4SNathan Lynch 		 * everything goes to default_nid.
790482ec7c4SNathan Lynch 		 */
791953039c8SJeremy Kerr 		nid = of_node_to_nid_single(memory);
792482ec7c4SNathan Lynch 		if (nid < 0)
793482ec7c4SNathan Lynch 			nid = default_nid;
7941daa6d08SBalbir Singh 
7951daa6d08SBalbir Singh 		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
796482ec7c4SNathan Lynch 		node_set_online(nid);
797ab1f9dacSPaul Mackerras 
798ab1f9dacSPaul Mackerras 		if (!(size = numa_enforce_memory_limit(start, size))) {
799ab1f9dacSPaul Mackerras 			if (--ranges)
800ab1f9dacSPaul Mackerras 				goto new_range;
801ab1f9dacSPaul Mackerras 			else
802ab1f9dacSPaul Mackerras 				continue;
803ab1f9dacSPaul Mackerras 		}
804ab1f9dacSPaul Mackerras 
805c67c3cb4SMel Gorman 		add_active_range(nid, start >> PAGE_SHIFT,
806c67c3cb4SMel Gorman 				(start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
807ab1f9dacSPaul Mackerras 
808ab1f9dacSPaul Mackerras 		if (--ranges)
809ab1f9dacSPaul Mackerras 			goto new_range;
810ab1f9dacSPaul Mackerras 	}
811ab1f9dacSPaul Mackerras 
8120204568aSPaul Mackerras 	/*
813dfbe93a2SAnton Blanchard 	 * Now do the same thing for each MEMBLOCK listed in the
814dfbe93a2SAnton Blanchard 	 * ibm,dynamic-memory property in the
815dfbe93a2SAnton Blanchard 	 * ibm,dynamic-reconfiguration-memory node.
8160204568aSPaul Mackerras 	 */
8170204568aSPaul Mackerras 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
8180204568aSPaul Mackerras 	if (memory)
8190204568aSPaul Mackerras 		parse_drconf_memory(memory);
8200204568aSPaul Mackerras 
821ab1f9dacSPaul Mackerras 	return 0;
822ab1f9dacSPaul Mackerras }
823ab1f9dacSPaul Mackerras 
824ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void)
825ab1f9dacSPaul Mackerras {
82695f72d1eSYinghai Lu 	unsigned long top_of_ram = memblock_end_of_DRAM();
82795f72d1eSYinghai Lu 	unsigned long total_ram = memblock_phys_mem_size();
828c67c3cb4SMel Gorman 	unsigned long start_pfn, end_pfn;
82928be7072SBenjamin Herrenschmidt 	unsigned int nid = 0;
83028be7072SBenjamin Herrenschmidt 	struct memblock_region *reg;
831ab1f9dacSPaul Mackerras 
832e110b281SOlof Johansson 	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
833ab1f9dacSPaul Mackerras 	       top_of_ram, total_ram);
834e110b281SOlof Johansson 	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
835ab1f9dacSPaul Mackerras 	       (top_of_ram - total_ram) >> 20);
836ab1f9dacSPaul Mackerras 
83728be7072SBenjamin Herrenschmidt 	for_each_memblock(memory, reg) {
838c7fc2de0SYinghai Lu 		start_pfn = memblock_region_memory_base_pfn(reg);
839c7fc2de0SYinghai Lu 		end_pfn = memblock_region_memory_end_pfn(reg);
8401daa6d08SBalbir Singh 
8411daa6d08SBalbir Singh 		fake_numa_create_new_node(end_pfn, &nid);
8421daa6d08SBalbir Singh 		add_active_range(nid, start_pfn, end_pfn);
8431daa6d08SBalbir Singh 		node_set_online(nid);
844c67c3cb4SMel Gorman 	}
845ab1f9dacSPaul Mackerras }
846ab1f9dacSPaul Mackerras 
8474b703a23SAnton Blanchard void __init dump_numa_cpu_topology(void)
8484b703a23SAnton Blanchard {
8494b703a23SAnton Blanchard 	unsigned int node;
8504b703a23SAnton Blanchard 	unsigned int cpu, count;
8514b703a23SAnton Blanchard 
8524b703a23SAnton Blanchard 	if (min_common_depth == -1 || !numa_enabled)
8534b703a23SAnton Blanchard 		return;
8544b703a23SAnton Blanchard 
8554b703a23SAnton Blanchard 	for_each_online_node(node) {
856e110b281SOlof Johansson 		printk(KERN_DEBUG "Node %d CPUs:", node);
8574b703a23SAnton Blanchard 
8584b703a23SAnton Blanchard 		count = 0;
8594b703a23SAnton Blanchard 		/*
8604b703a23SAnton Blanchard 		 * If we used a CPU iterator here we would miss printing
8614b703a23SAnton Blanchard 		 * the holes in the cpumap.
8624b703a23SAnton Blanchard 		 */
86325863de0SAnton Blanchard 		for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
86425863de0SAnton Blanchard 			if (cpumask_test_cpu(cpu,
86525863de0SAnton Blanchard 					node_to_cpumask_map[node])) {
8664b703a23SAnton Blanchard 				if (count == 0)
8674b703a23SAnton Blanchard 					printk(" %u", cpu);
8684b703a23SAnton Blanchard 				++count;
8694b703a23SAnton Blanchard 			} else {
8704b703a23SAnton Blanchard 				if (count > 1)
8714b703a23SAnton Blanchard 					printk("-%u", cpu - 1);
8724b703a23SAnton Blanchard 				count = 0;
8734b703a23SAnton Blanchard 			}
8744b703a23SAnton Blanchard 		}
8754b703a23SAnton Blanchard 
8764b703a23SAnton Blanchard 		if (count > 1)
87725863de0SAnton Blanchard 			printk("-%u", nr_cpu_ids - 1);
8784b703a23SAnton Blanchard 		printk("\n");
8794b703a23SAnton Blanchard 	}
8804b703a23SAnton Blanchard }
8814b703a23SAnton Blanchard 
8824b703a23SAnton Blanchard static void __init dump_numa_memory_topology(void)
883ab1f9dacSPaul Mackerras {
884ab1f9dacSPaul Mackerras 	unsigned int node;
885ab1f9dacSPaul Mackerras 	unsigned int count;
886ab1f9dacSPaul Mackerras 
887ab1f9dacSPaul Mackerras 	if (min_common_depth == -1 || !numa_enabled)
888ab1f9dacSPaul Mackerras 		return;
889ab1f9dacSPaul Mackerras 
890ab1f9dacSPaul Mackerras 	for_each_online_node(node) {
891ab1f9dacSPaul Mackerras 		unsigned long i;
892ab1f9dacSPaul Mackerras 
893e110b281SOlof Johansson 		printk(KERN_DEBUG "Node %d Memory:", node);
894ab1f9dacSPaul Mackerras 
895ab1f9dacSPaul Mackerras 		count = 0;
896ab1f9dacSPaul Mackerras 
89795f72d1eSYinghai Lu 		for (i = 0; i < memblock_end_of_DRAM();
89845fb6ceaSAnton Blanchard 		     i += (1 << SECTION_SIZE_BITS)) {
89945fb6ceaSAnton Blanchard 			if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
900ab1f9dacSPaul Mackerras 				if (count == 0)
901ab1f9dacSPaul Mackerras 					printk(" 0x%lx", i);
902ab1f9dacSPaul Mackerras 				++count;
903ab1f9dacSPaul Mackerras 			} else {
904ab1f9dacSPaul Mackerras 				if (count > 0)
905ab1f9dacSPaul Mackerras 					printk("-0x%lx", i);
906ab1f9dacSPaul Mackerras 				count = 0;
907ab1f9dacSPaul Mackerras 			}
908ab1f9dacSPaul Mackerras 		}
909ab1f9dacSPaul Mackerras 
910ab1f9dacSPaul Mackerras 		if (count > 0)
911ab1f9dacSPaul Mackerras 			printk("-0x%lx", i);
912ab1f9dacSPaul Mackerras 		printk("\n");
913ab1f9dacSPaul Mackerras 	}
914ab1f9dacSPaul Mackerras }
915ab1f9dacSPaul Mackerras 
916ab1f9dacSPaul Mackerras /*
91795f72d1eSYinghai Lu  * Allocate some memory, satisfying the memblock or bootmem allocator where
918ab1f9dacSPaul Mackerras  * required. nid is the preferred node and end is the physical address of
919ab1f9dacSPaul Mackerras  * the highest address in the node.
920ab1f9dacSPaul Mackerras  *
9210be210fdSDave Hansen  * Returns the virtual address of the memory.
922ab1f9dacSPaul Mackerras  */
923893473dfSDave Hansen static void __init *careful_zallocation(int nid, unsigned long size,
92445fb6ceaSAnton Blanchard 				       unsigned long align,
92545fb6ceaSAnton Blanchard 				       unsigned long end_pfn)
926ab1f9dacSPaul Mackerras {
9270be210fdSDave Hansen 	void *ret;
92845fb6ceaSAnton Blanchard 	int new_nid;
9290be210fdSDave Hansen 	unsigned long ret_paddr;
9300be210fdSDave Hansen 
93195f72d1eSYinghai Lu 	ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
932ab1f9dacSPaul Mackerras 
933ab1f9dacSPaul Mackerras 	/* retry over all memory */
9340be210fdSDave Hansen 	if (!ret_paddr)
93595f72d1eSYinghai Lu 		ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
936ab1f9dacSPaul Mackerras 
9370be210fdSDave Hansen 	if (!ret_paddr)
9385d21ea2bSDave Hansen 		panic("numa.c: cannot allocate %lu bytes for node %d",
939ab1f9dacSPaul Mackerras 		      size, nid);
940ab1f9dacSPaul Mackerras 
9410be210fdSDave Hansen 	ret = __va(ret_paddr);
9420be210fdSDave Hansen 
943ab1f9dacSPaul Mackerras 	/*
944c555e520SDave Hansen 	 * We initialize the nodes in numeric order: 0, 1, 2...
94595f72d1eSYinghai Lu 	 * and hand over control from the MEMBLOCK allocator to the
946c555e520SDave Hansen 	 * bootmem allocator.  If this function is called for
947c555e520SDave Hansen 	 * node 5, then we know that all nodes <5 are using the
94895f72d1eSYinghai Lu 	 * bootmem allocator instead of the MEMBLOCK allocator.
949c555e520SDave Hansen 	 *
950c555e520SDave Hansen 	 * So, check the nid from which this allocation came
951c555e520SDave Hansen 	 * and double check to see if we need to use bootmem
95295f72d1eSYinghai Lu 	 * instead of the MEMBLOCK.  We don't free the MEMBLOCK memory
953c555e520SDave Hansen 	 * since it would be useless.
954ab1f9dacSPaul Mackerras 	 */
9550be210fdSDave Hansen 	new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
95645fb6ceaSAnton Blanchard 	if (new_nid < nid) {
9570be210fdSDave Hansen 		ret = __alloc_bootmem_node(NODE_DATA(new_nid),
958ab1f9dacSPaul Mackerras 				size, align, 0);
959ab1f9dacSPaul Mackerras 
9600be210fdSDave Hansen 		dbg("alloc_bootmem %p %lx\n", ret, size);
961ab1f9dacSPaul Mackerras 	}
962ab1f9dacSPaul Mackerras 
963893473dfSDave Hansen 	memset(ret, 0, size);
9640be210fdSDave Hansen 	return ret;
965ab1f9dacSPaul Mackerras }
966ab1f9dacSPaul Mackerras 
96774b85f37SChandra Seetharaman static struct notifier_block __cpuinitdata ppc64_numa_nb = {
96874b85f37SChandra Seetharaman 	.notifier_call = cpu_numa_callback,
96974b85f37SChandra Seetharaman 	.priority = 1 /* Must run before sched domains notifier. */
97074b85f37SChandra Seetharaman };
97174b85f37SChandra Seetharaman 
972*28e86bdbSDavid Rientjes static void __init mark_reserved_regions_for_nid(int nid)
973ab1f9dacSPaul Mackerras {
9744a618669SDave Hansen 	struct pglist_data *node = NODE_DATA(nid);
97528be7072SBenjamin Herrenschmidt 	struct memblock_region *reg;
976ab1f9dacSPaul Mackerras 
97728be7072SBenjamin Herrenschmidt 	for_each_memblock(reserved, reg) {
97828be7072SBenjamin Herrenschmidt 		unsigned long physbase = reg->base;
97928be7072SBenjamin Herrenschmidt 		unsigned long size = reg->size;
9808f64e1f2SJon Tollefson 		unsigned long start_pfn = physbase >> PAGE_SHIFT;
98106eccea6SDave Hansen 		unsigned long end_pfn = PFN_UP(physbase + size);
9828f64e1f2SJon Tollefson 		struct node_active_region node_ar;
9834a618669SDave Hansen 		unsigned long node_end_pfn = node->node_start_pfn +
9844a618669SDave Hansen 					     node->node_spanned_pages;
9854a618669SDave Hansen 
9864a618669SDave Hansen 		/*
98795f72d1eSYinghai Lu 		 * Check to make sure that this memblock.reserved area is
9884a618669SDave Hansen 		 * within the bounds of the node that we care about.
9894a618669SDave Hansen 		 * Checking the nid of the start and end points is not
9904a618669SDave Hansen 		 * sufficient because the reserved area could span the
9914a618669SDave Hansen 		 * entire node.
9924a618669SDave Hansen 		 */
9934a618669SDave Hansen 		if (end_pfn <= node->node_start_pfn ||
9944a618669SDave Hansen 		    start_pfn >= node_end_pfn)
9954a618669SDave Hansen 			continue;
996ab1f9dacSPaul Mackerras 
9978f64e1f2SJon Tollefson 		get_node_active_region(start_pfn, &node_ar);
998e8170372SJon Tollefson 		while (start_pfn < end_pfn &&
999e8170372SJon Tollefson 			node_ar.start_pfn < node_ar.end_pfn) {
1000e8170372SJon Tollefson 			unsigned long reserve_size = size;
10018f64e1f2SJon Tollefson 			/*
10028f64e1f2SJon Tollefson 			 * if reserved region extends past active region
10038f64e1f2SJon Tollefson 			 * then trim size to active region
10048f64e1f2SJon Tollefson 			 */
10058f64e1f2SJon Tollefson 			if (end_pfn > node_ar.end_pfn)
1006e8170372SJon Tollefson 				reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
100706eccea6SDave Hansen 					- physbase;
1008a4c74dddSDave Hansen 			/*
1009a4c74dddSDave Hansen 			 * Only worry about *this* node, others may not
1010a4c74dddSDave Hansen 			 * yet have valid NODE_DATA().
1011a4c74dddSDave Hansen 			 */
1012a4c74dddSDave Hansen 			if (node_ar.nid == nid) {
1013a4c74dddSDave Hansen 				dbg("reserve_bootmem %lx %lx nid=%d\n",
1014a4c74dddSDave Hansen 					physbase, reserve_size, node_ar.nid);
1015a4c74dddSDave Hansen 				reserve_bootmem_node(NODE_DATA(node_ar.nid),
1016a4c74dddSDave Hansen 						physbase, reserve_size,
1017a4c74dddSDave Hansen 						BOOTMEM_DEFAULT);
1018a4c74dddSDave Hansen 			}
10198f64e1f2SJon Tollefson 			/*
10208f64e1f2SJon Tollefson 			 * if reserved region is contained in the active region
10218f64e1f2SJon Tollefson 			 * then done.
10228f64e1f2SJon Tollefson 			 */
10238f64e1f2SJon Tollefson 			if (end_pfn <= node_ar.end_pfn)
10248f64e1f2SJon Tollefson 				break;
10258f64e1f2SJon Tollefson 
10268f64e1f2SJon Tollefson 			/*
10278f64e1f2SJon Tollefson 			 * reserved region extends past the active region
10288f64e1f2SJon Tollefson 			 *   get next active region that contains this
10298f64e1f2SJon Tollefson 			 *   reserved region
10308f64e1f2SJon Tollefson 			 */
10318f64e1f2SJon Tollefson 			start_pfn = node_ar.end_pfn;
10328f64e1f2SJon Tollefson 			physbase = start_pfn << PAGE_SHIFT;
1033e8170372SJon Tollefson 			size = size - reserve_size;
10348f64e1f2SJon Tollefson 			get_node_active_region(start_pfn, &node_ar);
1035ab1f9dacSPaul Mackerras 		}
10364a618669SDave Hansen 	}
1037ab1f9dacSPaul Mackerras }
10388f64e1f2SJon Tollefson 
10394a618669SDave Hansen 
10404a618669SDave Hansen void __init do_init_bootmem(void)
10414a618669SDave Hansen {
10424a618669SDave Hansen 	int nid;
10434a618669SDave Hansen 
10444a618669SDave Hansen 	min_low_pfn = 0;
104595f72d1eSYinghai Lu 	max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
10464a618669SDave Hansen 	max_pfn = max_low_pfn;
10474a618669SDave Hansen 
10484a618669SDave Hansen 	if (parse_numa_properties())
10494a618669SDave Hansen 		setup_nonnuma();
10504a618669SDave Hansen 	else
10514a618669SDave Hansen 		dump_numa_memory_topology();
10524a618669SDave Hansen 
10534a618669SDave Hansen 	for_each_online_node(nid) {
10544a618669SDave Hansen 		unsigned long start_pfn, end_pfn;
10550be210fdSDave Hansen 		void *bootmem_vaddr;
10564a618669SDave Hansen 		unsigned long bootmap_pages;
10574a618669SDave Hansen 
10584a618669SDave Hansen 		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
10594a618669SDave Hansen 
10604a618669SDave Hansen 		/*
10614a618669SDave Hansen 		 * Allocate the node structure node local if possible
10624a618669SDave Hansen 		 *
10634a618669SDave Hansen 		 * Be careful moving this around, as it relies on all
10644a618669SDave Hansen 		 * previous nodes' bootmem to be initialized and have
10654a618669SDave Hansen 		 * all reserved areas marked.
10664a618669SDave Hansen 		 */
1067893473dfSDave Hansen 		NODE_DATA(nid) = careful_zallocation(nid,
10684a618669SDave Hansen 					sizeof(struct pglist_data),
10694a618669SDave Hansen 					SMP_CACHE_BYTES, end_pfn);
10704a618669SDave Hansen 
10714a618669SDave Hansen   		dbg("node %d\n", nid);
10724a618669SDave Hansen 		dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
10734a618669SDave Hansen 
10744a618669SDave Hansen 		NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
10754a618669SDave Hansen 		NODE_DATA(nid)->node_start_pfn = start_pfn;
10764a618669SDave Hansen 		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
10774a618669SDave Hansen 
10784a618669SDave Hansen 		if (NODE_DATA(nid)->node_spanned_pages == 0)
10794a618669SDave Hansen   			continue;
10804a618669SDave Hansen 
10814a618669SDave Hansen   		dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
10824a618669SDave Hansen   		dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
10834a618669SDave Hansen 
10844a618669SDave Hansen 		bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
1085893473dfSDave Hansen 		bootmem_vaddr = careful_zallocation(nid,
10864a618669SDave Hansen 					bootmap_pages << PAGE_SHIFT,
10874a618669SDave Hansen 					PAGE_SIZE, end_pfn);
10884a618669SDave Hansen 
10890be210fdSDave Hansen 		dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
10904a618669SDave Hansen 
10910be210fdSDave Hansen 		init_bootmem_node(NODE_DATA(nid),
10920be210fdSDave Hansen 				  __pa(bootmem_vaddr) >> PAGE_SHIFT,
10934a618669SDave Hansen 				  start_pfn, end_pfn);
10944a618669SDave Hansen 
10954a618669SDave Hansen 		free_bootmem_with_active_regions(nid, end_pfn);
10964a618669SDave Hansen 		/*
10974a618669SDave Hansen 		 * Be very careful about moving this around.  Future
1098893473dfSDave Hansen 		 * calls to careful_zallocation() depend on this getting
10994a618669SDave Hansen 		 * done correctly.
11004a618669SDave Hansen 		 */
11014a618669SDave Hansen 		mark_reserved_regions_for_nid(nid);
11028f64e1f2SJon Tollefson 		sparse_memory_present_with_active_regions(nid);
1103ab1f9dacSPaul Mackerras 	}
1104d3f6204aSBenjamin Herrenschmidt 
1105d3f6204aSBenjamin Herrenschmidt 	init_bootmem_done = 1;
110625863de0SAnton Blanchard 
110725863de0SAnton Blanchard 	/*
110825863de0SAnton Blanchard 	 * Now bootmem is initialised we can create the node to cpumask
110925863de0SAnton Blanchard 	 * lookup tables and setup the cpu callback to populate them.
111025863de0SAnton Blanchard 	 */
111125863de0SAnton Blanchard 	setup_node_to_cpumask_map();
111225863de0SAnton Blanchard 
111325863de0SAnton Blanchard 	register_cpu_notifier(&ppc64_numa_nb);
111425863de0SAnton Blanchard 	cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
111525863de0SAnton Blanchard 			  (void *)(unsigned long)boot_cpuid);
11164a618669SDave Hansen }
1117ab1f9dacSPaul Mackerras 
1118ab1f9dacSPaul Mackerras void __init paging_init(void)
1119ab1f9dacSPaul Mackerras {
11206391af17SMel Gorman 	unsigned long max_zone_pfns[MAX_NR_ZONES];
11216391af17SMel Gorman 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
112295f72d1eSYinghai Lu 	max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT;
1123c67c3cb4SMel Gorman 	free_area_init_nodes(max_zone_pfns);
1124ab1f9dacSPaul Mackerras }
1125ab1f9dacSPaul Mackerras 
1126ab1f9dacSPaul Mackerras static int __init early_numa(char *p)
1127ab1f9dacSPaul Mackerras {
1128ab1f9dacSPaul Mackerras 	if (!p)
1129ab1f9dacSPaul Mackerras 		return 0;
1130ab1f9dacSPaul Mackerras 
1131ab1f9dacSPaul Mackerras 	if (strstr(p, "off"))
1132ab1f9dacSPaul Mackerras 		numa_enabled = 0;
1133ab1f9dacSPaul Mackerras 
1134ab1f9dacSPaul Mackerras 	if (strstr(p, "debug"))
1135ab1f9dacSPaul Mackerras 		numa_debug = 1;
1136ab1f9dacSPaul Mackerras 
11371daa6d08SBalbir Singh 	p = strstr(p, "fake=");
11381daa6d08SBalbir Singh 	if (p)
11391daa6d08SBalbir Singh 		cmdline = p + strlen("fake=");
11401daa6d08SBalbir Singh 
1141ab1f9dacSPaul Mackerras 	return 0;
1142ab1f9dacSPaul Mackerras }
1143ab1f9dacSPaul Mackerras early_param("numa", early_numa);
1144237a0989SMike Kravetz 
1145237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG
1146237a0989SMike Kravetz /*
11470f16ef7fSNathan Fontenot  * Find the node associated with a hot added memory section for
11480f16ef7fSNathan Fontenot  * memory represented in the device tree by the property
11490f16ef7fSNathan Fontenot  * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
11500db9360aSNathan Fontenot  */
11510db9360aSNathan Fontenot static int hot_add_drconf_scn_to_nid(struct device_node *memory,
11520db9360aSNathan Fontenot 				     unsigned long scn_addr)
11530db9360aSNathan Fontenot {
11540db9360aSNathan Fontenot 	const u32 *dm;
11550f16ef7fSNathan Fontenot 	unsigned int drconf_cell_cnt, rc;
11563fdfd990SBenjamin Herrenschmidt 	unsigned long lmb_size;
11570db9360aSNathan Fontenot 	struct assoc_arrays aa;
11580f16ef7fSNathan Fontenot 	int nid = -1;
11590db9360aSNathan Fontenot 
11600f16ef7fSNathan Fontenot 	drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
11610f16ef7fSNathan Fontenot 	if (!drconf_cell_cnt)
11620f16ef7fSNathan Fontenot 		return -1;
11630db9360aSNathan Fontenot 
11643fdfd990SBenjamin Herrenschmidt 	lmb_size = of_get_lmb_size(memory);
11653fdfd990SBenjamin Herrenschmidt 	if (!lmb_size)
11660f16ef7fSNathan Fontenot 		return -1;
11670db9360aSNathan Fontenot 
11680db9360aSNathan Fontenot 	rc = of_get_assoc_arrays(memory, &aa);
11690db9360aSNathan Fontenot 	if (rc)
11700f16ef7fSNathan Fontenot 		return -1;
11710db9360aSNathan Fontenot 
11720f16ef7fSNathan Fontenot 	for (; drconf_cell_cnt != 0; --drconf_cell_cnt) {
11730db9360aSNathan Fontenot 		struct of_drconf_cell drmem;
11740db9360aSNathan Fontenot 
11750db9360aSNathan Fontenot 		read_drconf_cell(&drmem, &dm);
11760db9360aSNathan Fontenot 
11770db9360aSNathan Fontenot 		/* skip this block if it is reserved or not assigned to
11780db9360aSNathan Fontenot 		 * this partition */
11790db9360aSNathan Fontenot 		if ((drmem.flags & DRCONF_MEM_RESERVED)
11800db9360aSNathan Fontenot 		    || !(drmem.flags & DRCONF_MEM_ASSIGNED))
11810db9360aSNathan Fontenot 			continue;
11820db9360aSNathan Fontenot 
11830f16ef7fSNathan Fontenot 		if ((scn_addr < drmem.base_addr)
11843fdfd990SBenjamin Herrenschmidt 		    || (scn_addr >= (drmem.base_addr + lmb_size)))
11850f16ef7fSNathan Fontenot 			continue;
11860db9360aSNathan Fontenot 
11870f16ef7fSNathan Fontenot 		nid = of_drconf_to_nid_single(&drmem, &aa);
11880f16ef7fSNathan Fontenot 		break;
11890db9360aSNathan Fontenot 	}
11900db9360aSNathan Fontenot 
11910f16ef7fSNathan Fontenot 	return nid;
11920db9360aSNathan Fontenot }
11930db9360aSNathan Fontenot 
11940db9360aSNathan Fontenot /*
11950f16ef7fSNathan Fontenot  * Find the node associated with a hot added memory section for memory
11960f16ef7fSNathan Fontenot  * represented in the device tree as a node (i.e. memory@XXXX) for
119795f72d1eSYinghai Lu  * each memblock.
1198237a0989SMike Kravetz  */
11990f16ef7fSNathan Fontenot int hot_add_node_scn_to_nid(unsigned long scn_addr)
1200237a0989SMike Kravetz {
120194db7c5eSAnton Blanchard 	struct device_node *memory;
12020f16ef7fSNathan Fontenot 	int nid = -1;
1203237a0989SMike Kravetz 
120494db7c5eSAnton Blanchard 	for_each_node_by_type(memory, "memory") {
1205237a0989SMike Kravetz 		unsigned long start, size;
1206b226e462SMike Kravetz 		int ranges;
1207a7f67bdfSJeremy Kerr 		const unsigned int *memcell_buf;
1208237a0989SMike Kravetz 		unsigned int len;
1209237a0989SMike Kravetz 
1210e2eb6392SStephen Rothwell 		memcell_buf = of_get_property(memory, "reg", &len);
1211237a0989SMike Kravetz 		if (!memcell_buf || len <= 0)
1212237a0989SMike Kravetz 			continue;
1213237a0989SMike Kravetz 
1214cc5d0189SBenjamin Herrenschmidt 		/* ranges in cell */
1215cc5d0189SBenjamin Herrenschmidt 		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
12160f16ef7fSNathan Fontenot 
12170f16ef7fSNathan Fontenot 		while (ranges--) {
1218237a0989SMike Kravetz 			start = read_n_cells(n_mem_addr_cells, &memcell_buf);
1219237a0989SMike Kravetz 			size = read_n_cells(n_mem_size_cells, &memcell_buf);
1220237a0989SMike Kravetz 
12210f16ef7fSNathan Fontenot 			if ((scn_addr < start) || (scn_addr >= (start + size)))
12220f16ef7fSNathan Fontenot 				continue;
12230f16ef7fSNathan Fontenot 
12240f16ef7fSNathan Fontenot 			nid = of_node_to_nid_single(memory);
12250f16ef7fSNathan Fontenot 			break;
12260f16ef7fSNathan Fontenot 		}
12270f16ef7fSNathan Fontenot 
12280f16ef7fSNathan Fontenot 		if (nid >= 0)
12290f16ef7fSNathan Fontenot 			break;
12300f16ef7fSNathan Fontenot 	}
12310f16ef7fSNathan Fontenot 
123260831842SAnton Blanchard 	of_node_put(memory);
123360831842SAnton Blanchard 
12340db9360aSNathan Fontenot 	return nid;
1235237a0989SMike Kravetz }
1236237a0989SMike Kravetz 
12370f16ef7fSNathan Fontenot /*
12380f16ef7fSNathan Fontenot  * Find the node associated with a hot added memory section.  Section
123995f72d1eSYinghai Lu  * corresponds to a SPARSEMEM section, not an MEMBLOCK.  It is assumed that
124095f72d1eSYinghai Lu  * sections are fully contained within a single MEMBLOCK.
12410f16ef7fSNathan Fontenot  */
12420f16ef7fSNathan Fontenot int hot_add_scn_to_nid(unsigned long scn_addr)
12430f16ef7fSNathan Fontenot {
12440f16ef7fSNathan Fontenot 	struct device_node *memory = NULL;
12450f16ef7fSNathan Fontenot 	int nid, found = 0;
12460f16ef7fSNathan Fontenot 
12470f16ef7fSNathan Fontenot 	if (!numa_enabled || (min_common_depth < 0))
124872c33688SH Hartley Sweeten 		return first_online_node;
12490f16ef7fSNathan Fontenot 
12500f16ef7fSNathan Fontenot 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
12510f16ef7fSNathan Fontenot 	if (memory) {
12520f16ef7fSNathan Fontenot 		nid = hot_add_drconf_scn_to_nid(memory, scn_addr);
12530f16ef7fSNathan Fontenot 		of_node_put(memory);
12540f16ef7fSNathan Fontenot 	} else {
12550f16ef7fSNathan Fontenot 		nid = hot_add_node_scn_to_nid(scn_addr);
1256237a0989SMike Kravetz 	}
12570f16ef7fSNathan Fontenot 
12580f16ef7fSNathan Fontenot 	if (nid < 0 || !node_online(nid))
125972c33688SH Hartley Sweeten 		nid = first_online_node;
12600f16ef7fSNathan Fontenot 
12610f16ef7fSNathan Fontenot 	if (NODE_DATA(nid)->node_spanned_pages)
12620f16ef7fSNathan Fontenot 		return nid;
12630f16ef7fSNathan Fontenot 
12640f16ef7fSNathan Fontenot 	for_each_online_node(nid) {
12650f16ef7fSNathan Fontenot 		if (NODE_DATA(nid)->node_spanned_pages) {
12660f16ef7fSNathan Fontenot 			found = 1;
12670f16ef7fSNathan Fontenot 			break;
1268237a0989SMike Kravetz 		}
12690f16ef7fSNathan Fontenot 	}
12700f16ef7fSNathan Fontenot 
12710f16ef7fSNathan Fontenot 	BUG_ON(!found);
12720f16ef7fSNathan Fontenot 	return nid;
12730f16ef7fSNathan Fontenot }
12740f16ef7fSNathan Fontenot 
1275cd34206eSNishanth Aravamudan static u64 hot_add_drconf_memory_max(void)
1276cd34206eSNishanth Aravamudan {
1277cd34206eSNishanth Aravamudan         struct device_node *memory = NULL;
1278cd34206eSNishanth Aravamudan         unsigned int drconf_cell_cnt = 0;
1279cd34206eSNishanth Aravamudan         u64 lmb_size = 0;
1280cd34206eSNishanth Aravamudan         const u32 *dm = 0;
1281cd34206eSNishanth Aravamudan 
1282cd34206eSNishanth Aravamudan         memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
1283cd34206eSNishanth Aravamudan         if (memory) {
1284cd34206eSNishanth Aravamudan                 drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
1285cd34206eSNishanth Aravamudan                 lmb_size = of_get_lmb_size(memory);
1286cd34206eSNishanth Aravamudan                 of_node_put(memory);
1287cd34206eSNishanth Aravamudan         }
1288cd34206eSNishanth Aravamudan         return lmb_size * drconf_cell_cnt;
1289cd34206eSNishanth Aravamudan }
1290cd34206eSNishanth Aravamudan 
1291cd34206eSNishanth Aravamudan /*
1292cd34206eSNishanth Aravamudan  * memory_hotplug_max - return max address of memory that may be added
1293cd34206eSNishanth Aravamudan  *
1294cd34206eSNishanth Aravamudan  * This is currently only used on systems that support drconfig memory
1295cd34206eSNishanth Aravamudan  * hotplug.
1296cd34206eSNishanth Aravamudan  */
1297cd34206eSNishanth Aravamudan u64 memory_hotplug_max(void)
1298cd34206eSNishanth Aravamudan {
1299cd34206eSNishanth Aravamudan         return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
1300cd34206eSNishanth Aravamudan }
1301237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */
13029eff1a38SJesse Larrew 
1303bd03403aSJesse Larrew /* Virtual Processor Home Node (VPHN) support */
130439bf990eSJesse Larrew #ifdef CONFIG_PPC_SPLPAR
13055de16699SAnton Blanchard static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
13069eff1a38SJesse Larrew static cpumask_t cpu_associativity_changes_mask;
13079eff1a38SJesse Larrew static int vphn_enabled;
13089eff1a38SJesse Larrew static void set_topology_timer(void);
13099eff1a38SJesse Larrew 
13109eff1a38SJesse Larrew /*
13119eff1a38SJesse Larrew  * Store the current values of the associativity change counters in the
13129eff1a38SJesse Larrew  * hypervisor.
13139eff1a38SJesse Larrew  */
13149eff1a38SJesse Larrew static void setup_cpu_associativity_change_counters(void)
13159eff1a38SJesse Larrew {
1316cd9d6cc7SJesse Larrew 	int cpu;
13179eff1a38SJesse Larrew 
13185de16699SAnton Blanchard 	/* The VPHN feature supports a maximum of 8 reference points */
13195de16699SAnton Blanchard 	BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
13205de16699SAnton Blanchard 
13219eff1a38SJesse Larrew 	for_each_possible_cpu(cpu) {
1322cd9d6cc7SJesse Larrew 		int i;
13239eff1a38SJesse Larrew 		u8 *counts = vphn_cpu_change_counts[cpu];
13249eff1a38SJesse Larrew 		volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
13259eff1a38SJesse Larrew 
13265de16699SAnton Blanchard 		for (i = 0; i < distance_ref_points_depth; i++)
13279eff1a38SJesse Larrew 			counts[i] = hypervisor_counts[i];
13289eff1a38SJesse Larrew 	}
13299eff1a38SJesse Larrew }
13309eff1a38SJesse Larrew 
13319eff1a38SJesse Larrew /*
13329eff1a38SJesse Larrew  * The hypervisor maintains a set of 8 associativity change counters in
13339eff1a38SJesse Larrew  * the VPA of each cpu that correspond to the associativity levels in the
13349eff1a38SJesse Larrew  * ibm,associativity-reference-points property. When an associativity
13359eff1a38SJesse Larrew  * level changes, the corresponding counter is incremented.
13369eff1a38SJesse Larrew  *
13379eff1a38SJesse Larrew  * Set a bit in cpu_associativity_changes_mask for each cpu whose home
13389eff1a38SJesse Larrew  * node associativity levels have changed.
13399eff1a38SJesse Larrew  *
13409eff1a38SJesse Larrew  * Returns the number of cpus with unhandled associativity changes.
13419eff1a38SJesse Larrew  */
13429eff1a38SJesse Larrew static int update_cpu_associativity_changes_mask(void)
13439eff1a38SJesse Larrew {
1344cd9d6cc7SJesse Larrew 	int cpu, nr_cpus = 0;
13459eff1a38SJesse Larrew 	cpumask_t *changes = &cpu_associativity_changes_mask;
13469eff1a38SJesse Larrew 
13479eff1a38SJesse Larrew 	cpumask_clear(changes);
13489eff1a38SJesse Larrew 
13499eff1a38SJesse Larrew 	for_each_possible_cpu(cpu) {
13509eff1a38SJesse Larrew 		int i, changed = 0;
13519eff1a38SJesse Larrew 		u8 *counts = vphn_cpu_change_counts[cpu];
13529eff1a38SJesse Larrew 		volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
13539eff1a38SJesse Larrew 
13545de16699SAnton Blanchard 		for (i = 0; i < distance_ref_points_depth; i++) {
1355d69043e8SAnton Blanchard 			if (hypervisor_counts[i] != counts[i]) {
13569eff1a38SJesse Larrew 				counts[i] = hypervisor_counts[i];
13579eff1a38SJesse Larrew 				changed = 1;
13589eff1a38SJesse Larrew 			}
13599eff1a38SJesse Larrew 		}
13609eff1a38SJesse Larrew 		if (changed) {
13619eff1a38SJesse Larrew 			cpumask_set_cpu(cpu, changes);
13629eff1a38SJesse Larrew 			nr_cpus++;
13639eff1a38SJesse Larrew 		}
13649eff1a38SJesse Larrew 	}
13659eff1a38SJesse Larrew 
13669eff1a38SJesse Larrew 	return nr_cpus;
13679eff1a38SJesse Larrew }
13689eff1a38SJesse Larrew 
1369c0e5e46fSAnton Blanchard /*
1370c0e5e46fSAnton Blanchard  * 6 64-bit registers unpacked into 12 32-bit associativity values. To form
1371c0e5e46fSAnton Blanchard  * the complete property we have to add the length in the first cell.
1372c0e5e46fSAnton Blanchard  */
1373c0e5e46fSAnton Blanchard #define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32) + 1)
13749eff1a38SJesse Larrew 
13759eff1a38SJesse Larrew /*
13769eff1a38SJesse Larrew  * Convert the associativity domain numbers returned from the hypervisor
13779eff1a38SJesse Larrew  * to the sequence they would appear in the ibm,associativity property.
13789eff1a38SJesse Larrew  */
13799eff1a38SJesse Larrew static int vphn_unpack_associativity(const long *packed, unsigned int *unpacked)
13809eff1a38SJesse Larrew {
1381cd9d6cc7SJesse Larrew 	int i, nr_assoc_doms = 0;
13829eff1a38SJesse Larrew 	const u16 *field = (const u16*) packed;
13839eff1a38SJesse Larrew 
13849eff1a38SJesse Larrew #define VPHN_FIELD_UNUSED	(0xffff)
13859eff1a38SJesse Larrew #define VPHN_FIELD_MSB		(0x8000)
13869eff1a38SJesse Larrew #define VPHN_FIELD_MASK		(~VPHN_FIELD_MSB)
13879eff1a38SJesse Larrew 
1388c0e5e46fSAnton Blanchard 	for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
13899eff1a38SJesse Larrew 		if (*field == VPHN_FIELD_UNUSED) {
13909eff1a38SJesse Larrew 			/* All significant fields processed, and remaining
13919eff1a38SJesse Larrew 			 * fields contain the reserved value of all 1's.
13929eff1a38SJesse Larrew 			 * Just store them.
13939eff1a38SJesse Larrew 			 */
13949eff1a38SJesse Larrew 			unpacked[i] = *((u32*)field);
13959eff1a38SJesse Larrew 			field += 2;
13967639adaaSJesse Larrew 		} else if (*field & VPHN_FIELD_MSB) {
13979eff1a38SJesse Larrew 			/* Data is in the lower 15 bits of this field */
13989eff1a38SJesse Larrew 			unpacked[i] = *field & VPHN_FIELD_MASK;
13999eff1a38SJesse Larrew 			field++;
14009eff1a38SJesse Larrew 			nr_assoc_doms++;
14017639adaaSJesse Larrew 		} else {
14029eff1a38SJesse Larrew 			/* Data is in the lower 15 bits of this field
14039eff1a38SJesse Larrew 			 * concatenated with the next 16 bit field
14049eff1a38SJesse Larrew 			 */
14059eff1a38SJesse Larrew 			unpacked[i] = *((u32*)field);
14069eff1a38SJesse Larrew 			field += 2;
14079eff1a38SJesse Larrew 			nr_assoc_doms++;
14089eff1a38SJesse Larrew 		}
14099eff1a38SJesse Larrew 	}
14109eff1a38SJesse Larrew 
1411c0e5e46fSAnton Blanchard 	/* The first cell contains the length of the property */
1412c0e5e46fSAnton Blanchard 	unpacked[0] = nr_assoc_doms;
1413c0e5e46fSAnton Blanchard 
14149eff1a38SJesse Larrew 	return nr_assoc_doms;
14159eff1a38SJesse Larrew }
14169eff1a38SJesse Larrew 
14179eff1a38SJesse Larrew /*
14189eff1a38SJesse Larrew  * Retrieve the new associativity information for a virtual processor's
14199eff1a38SJesse Larrew  * home node.
14209eff1a38SJesse Larrew  */
14219eff1a38SJesse Larrew static long hcall_vphn(unsigned long cpu, unsigned int *associativity)
14229eff1a38SJesse Larrew {
1423cd9d6cc7SJesse Larrew 	long rc;
14249eff1a38SJesse Larrew 	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
14259eff1a38SJesse Larrew 	u64 flags = 1;
14269eff1a38SJesse Larrew 	int hwcpu = get_hard_smp_processor_id(cpu);
14279eff1a38SJesse Larrew 
14289eff1a38SJesse Larrew 	rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
14299eff1a38SJesse Larrew 	vphn_unpack_associativity(retbuf, associativity);
14309eff1a38SJesse Larrew 
14319eff1a38SJesse Larrew 	return rc;
14329eff1a38SJesse Larrew }
14339eff1a38SJesse Larrew 
14349eff1a38SJesse Larrew static long vphn_get_associativity(unsigned long cpu,
14359eff1a38SJesse Larrew 					unsigned int *associativity)
14369eff1a38SJesse Larrew {
1437cd9d6cc7SJesse Larrew 	long rc;
14389eff1a38SJesse Larrew 
14399eff1a38SJesse Larrew 	rc = hcall_vphn(cpu, associativity);
14409eff1a38SJesse Larrew 
14419eff1a38SJesse Larrew 	switch (rc) {
14429eff1a38SJesse Larrew 	case H_FUNCTION:
14439eff1a38SJesse Larrew 		printk(KERN_INFO
14449eff1a38SJesse Larrew 			"VPHN is not supported. Disabling polling...\n");
14459eff1a38SJesse Larrew 		stop_topology_update();
14469eff1a38SJesse Larrew 		break;
14479eff1a38SJesse Larrew 	case H_HARDWARE:
14489eff1a38SJesse Larrew 		printk(KERN_ERR
14499eff1a38SJesse Larrew 			"hcall_vphn() experienced a hardware fault "
14509eff1a38SJesse Larrew 			"preventing VPHN. Disabling polling...\n");
14519eff1a38SJesse Larrew 		stop_topology_update();
14529eff1a38SJesse Larrew 	}
14539eff1a38SJesse Larrew 
14549eff1a38SJesse Larrew 	return rc;
14559eff1a38SJesse Larrew }
14569eff1a38SJesse Larrew 
14579eff1a38SJesse Larrew /*
14589eff1a38SJesse Larrew  * Update the node maps and sysfs entries for each cpu whose home node
14599eff1a38SJesse Larrew  * has changed.
14609eff1a38SJesse Larrew  */
14619eff1a38SJesse Larrew int arch_update_cpu_topology(void)
14629eff1a38SJesse Larrew {
1463cd9d6cc7SJesse Larrew 	int cpu, nid, old_nid;
14649eff1a38SJesse Larrew 	unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
1465cd9d6cc7SJesse Larrew 	struct sys_device *sysdev;
14669eff1a38SJesse Larrew 
1467104699c0SKOSAKI Motohiro 	for_each_cpu(cpu,&cpu_associativity_changes_mask) {
14689eff1a38SJesse Larrew 		vphn_get_associativity(cpu, associativity);
14699eff1a38SJesse Larrew 		nid = associativity_to_nid(associativity);
14709eff1a38SJesse Larrew 
14719eff1a38SJesse Larrew 		if (nid < 0 || !node_online(nid))
14729eff1a38SJesse Larrew 			nid = first_online_node;
14739eff1a38SJesse Larrew 
14749eff1a38SJesse Larrew 		old_nid = numa_cpu_lookup_table[cpu];
14759eff1a38SJesse Larrew 
14769eff1a38SJesse Larrew 		/* Disable hotplug while we update the cpu
14779eff1a38SJesse Larrew 		 * masks and sysfs.
14789eff1a38SJesse Larrew 		 */
14799eff1a38SJesse Larrew 		get_online_cpus();
14809eff1a38SJesse Larrew 		unregister_cpu_under_node(cpu, old_nid);
14819eff1a38SJesse Larrew 		unmap_cpu_from_node(cpu);
14829eff1a38SJesse Larrew 		map_cpu_to_node(cpu, nid);
14839eff1a38SJesse Larrew 		register_cpu_under_node(cpu, nid);
14849eff1a38SJesse Larrew 		put_online_cpus();
14859eff1a38SJesse Larrew 
14869eff1a38SJesse Larrew 		sysdev = get_cpu_sysdev(cpu);
14879eff1a38SJesse Larrew 		if (sysdev)
14889eff1a38SJesse Larrew 			kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
14899eff1a38SJesse Larrew 	}
14909eff1a38SJesse Larrew 
14919eff1a38SJesse Larrew 	return 1;
14929eff1a38SJesse Larrew }
14939eff1a38SJesse Larrew 
14949eff1a38SJesse Larrew static void topology_work_fn(struct work_struct *work)
14959eff1a38SJesse Larrew {
14969eff1a38SJesse Larrew 	rebuild_sched_domains();
14979eff1a38SJesse Larrew }
14989eff1a38SJesse Larrew static DECLARE_WORK(topology_work, topology_work_fn);
14999eff1a38SJesse Larrew 
15009eff1a38SJesse Larrew void topology_schedule_update(void)
15019eff1a38SJesse Larrew {
15029eff1a38SJesse Larrew 	schedule_work(&topology_work);
15039eff1a38SJesse Larrew }
15049eff1a38SJesse Larrew 
15059eff1a38SJesse Larrew static void topology_timer_fn(unsigned long ignored)
15069eff1a38SJesse Larrew {
15079eff1a38SJesse Larrew 	if (!vphn_enabled)
15089eff1a38SJesse Larrew 		return;
15099eff1a38SJesse Larrew 	if (update_cpu_associativity_changes_mask() > 0)
15109eff1a38SJesse Larrew 		topology_schedule_update();
15119eff1a38SJesse Larrew 	set_topology_timer();
15129eff1a38SJesse Larrew }
15139eff1a38SJesse Larrew static struct timer_list topology_timer =
15149eff1a38SJesse Larrew 	TIMER_INITIALIZER(topology_timer_fn, 0, 0);
15159eff1a38SJesse Larrew 
15169eff1a38SJesse Larrew static void set_topology_timer(void)
15179eff1a38SJesse Larrew {
15189eff1a38SJesse Larrew 	topology_timer.data = 0;
15199eff1a38SJesse Larrew 	topology_timer.expires = jiffies + 60 * HZ;
15209eff1a38SJesse Larrew 	add_timer(&topology_timer);
15219eff1a38SJesse Larrew }
15229eff1a38SJesse Larrew 
15239eff1a38SJesse Larrew /*
15249eff1a38SJesse Larrew  * Start polling for VPHN associativity changes.
15259eff1a38SJesse Larrew  */
15269eff1a38SJesse Larrew int start_topology_update(void)
15279eff1a38SJesse Larrew {
15289eff1a38SJesse Larrew 	int rc = 0;
15299eff1a38SJesse Larrew 
153036e8695cSBenjamin Herrenschmidt 	/* Disabled until races with load balancing are fixed */
153136e8695cSBenjamin Herrenschmidt 	if (0 && firmware_has_feature(FW_FEATURE_VPHN) &&
1532fe5cfd63SAnton Blanchard 	    get_lppaca()->shared_proc) {
15339eff1a38SJesse Larrew 		vphn_enabled = 1;
15349eff1a38SJesse Larrew 		setup_cpu_associativity_change_counters();
15359eff1a38SJesse Larrew 		init_timer_deferrable(&topology_timer);
15369eff1a38SJesse Larrew 		set_topology_timer();
15379eff1a38SJesse Larrew 		rc = 1;
15389eff1a38SJesse Larrew 	}
15399eff1a38SJesse Larrew 
15409eff1a38SJesse Larrew 	return rc;
15419eff1a38SJesse Larrew }
15429eff1a38SJesse Larrew __initcall(start_topology_update);
15439eff1a38SJesse Larrew 
15449eff1a38SJesse Larrew /*
15459eff1a38SJesse Larrew  * Disable polling for VPHN associativity changes.
15469eff1a38SJesse Larrew  */
15479eff1a38SJesse Larrew int stop_topology_update(void)
15489eff1a38SJesse Larrew {
15499eff1a38SJesse Larrew 	vphn_enabled = 0;
15509eff1a38SJesse Larrew 	return del_timer_sync(&topology_timer);
15519eff1a38SJesse Larrew }
155239bf990eSJesse Larrew #endif /* CONFIG_PPC_SPLPAR */
1553