xref: /linux/arch/powerpc/mm/numa.c (revision 3e401f7a2e5199151f735aee6a5c6b4776e6a35e)
1ab1f9dacSPaul Mackerras /*
2ab1f9dacSPaul Mackerras  * pSeries NUMA support
3ab1f9dacSPaul Mackerras  *
4ab1f9dacSPaul Mackerras  * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
5ab1f9dacSPaul Mackerras  *
6ab1f9dacSPaul Mackerras  * This program is free software; you can redistribute it and/or
7ab1f9dacSPaul Mackerras  * modify it under the terms of the GNU General Public License
8ab1f9dacSPaul Mackerras  * as published by the Free Software Foundation; either version
9ab1f9dacSPaul Mackerras  * 2 of the License, or (at your option) any later version.
10ab1f9dacSPaul Mackerras  */
112d73bae1SNishanth Aravamudan #define pr_fmt(fmt) "numa: " fmt
122d73bae1SNishanth Aravamudan 
13ab1f9dacSPaul Mackerras #include <linux/threads.h>
14ab1f9dacSPaul Mackerras #include <linux/bootmem.h>
15ab1f9dacSPaul Mackerras #include <linux/init.h>
16ab1f9dacSPaul Mackerras #include <linux/mm.h>
17ab1f9dacSPaul Mackerras #include <linux/mmzone.h>
184b16f8e2SPaul Gortmaker #include <linux/export.h>
19ab1f9dacSPaul Mackerras #include <linux/nodemask.h>
20ab1f9dacSPaul Mackerras #include <linux/cpu.h>
21ab1f9dacSPaul Mackerras #include <linux/notifier.h>
2295f72d1eSYinghai Lu #include <linux/memblock.h>
236df1646eSMichael Ellerman #include <linux/of.h>
2406eccea6SDave Hansen #include <linux/pfn.h>
259eff1a38SJesse Larrew #include <linux/cpuset.h>
269eff1a38SJesse Larrew #include <linux/node.h>
2730c05350SNathan Fontenot #include <linux/stop_machine.h>
28e04fa612SNathan Fontenot #include <linux/proc_fs.h>
29e04fa612SNathan Fontenot #include <linux/seq_file.h>
30e04fa612SNathan Fontenot #include <linux/uaccess.h>
31191a7120SLinus Torvalds #include <linux/slab.h>
323be7db6aSRobert Jennings #include <asm/cputhreads.h>
3345fb6ceaSAnton Blanchard #include <asm/sparsemem.h>
34d9b2b2a2SDavid S. Miller #include <asm/prom.h>
352249ca9dSPaul Mackerras #include <asm/smp.h>
36d4edc5b6SSrivatsa S. Bhat #include <asm/cputhreads.h>
37d4edc5b6SSrivatsa S. Bhat #include <asm/topology.h>
389eff1a38SJesse Larrew #include <asm/firmware.h>
399eff1a38SJesse Larrew #include <asm/paca.h>
4039bf990eSJesse Larrew #include <asm/hvcall.h>
41ae3a197eSDavid Howells #include <asm/setup.h>
42176bbf14SJesse Larrew #include <asm/vdso.h>
43ab1f9dacSPaul Mackerras 
44ab1f9dacSPaul Mackerras static int numa_enabled = 1;
45ab1f9dacSPaul Mackerras 
461daa6d08SBalbir Singh static char *cmdline __initdata;
471daa6d08SBalbir Singh 
48ab1f9dacSPaul Mackerras static int numa_debug;
49ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
50ab1f9dacSPaul Mackerras 
5145fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS];
5225863de0SAnton Blanchard cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
53ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES];
5445fb6ceaSAnton Blanchard 
5545fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table);
5625863de0SAnton Blanchard EXPORT_SYMBOL(node_to_cpumask_map);
5745fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data);
5845fb6ceaSAnton Blanchard 
59ab1f9dacSPaul Mackerras static int min_common_depth;
60237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells;
6141eab6f8SAnton Blanchard static int form1_affinity;
6241eab6f8SAnton Blanchard 
6341eab6f8SAnton Blanchard #define MAX_DISTANCE_REF_POINTS 4
6441eab6f8SAnton Blanchard static int distance_ref_points_depth;
65b08a2a12SAlistair Popple static const __be32 *distance_ref_points;
6641eab6f8SAnton Blanchard static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
67ab1f9dacSPaul Mackerras 
6825863de0SAnton Blanchard /*
6925863de0SAnton Blanchard  * Allocate node_to_cpumask_map based on number of available nodes
7025863de0SAnton Blanchard  * Requires node_possible_map to be valid.
7125863de0SAnton Blanchard  *
729512938bSWanlong Gao  * Note: cpumask_of_node() is not valid until after this is done.
7325863de0SAnton Blanchard  */
7425863de0SAnton Blanchard static void __init setup_node_to_cpumask_map(void)
7525863de0SAnton Blanchard {
76f9d531b8SCody P Schafer 	unsigned int node;
7725863de0SAnton Blanchard 
7825863de0SAnton Blanchard 	/* setup nr_node_ids if not done yet */
79f9d531b8SCody P Schafer 	if (nr_node_ids == MAX_NUMNODES)
80f9d531b8SCody P Schafer 		setup_nr_node_ids();
8125863de0SAnton Blanchard 
8225863de0SAnton Blanchard 	/* allocate the map */
83c118baf8SRaghavendra K T 	for_each_node(node)
8425863de0SAnton Blanchard 		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
8525863de0SAnton Blanchard 
8625863de0SAnton Blanchard 	/* cpumask_of_node() will now work */
8725863de0SAnton Blanchard 	dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
8825863de0SAnton Blanchard }
8925863de0SAnton Blanchard 
9055671f3cSStephen Rothwell static int __init fake_numa_create_new_node(unsigned long end_pfn,
911daa6d08SBalbir Singh 						unsigned int *nid)
921daa6d08SBalbir Singh {
931daa6d08SBalbir Singh 	unsigned long long mem;
941daa6d08SBalbir Singh 	char *p = cmdline;
951daa6d08SBalbir Singh 	static unsigned int fake_nid;
961daa6d08SBalbir Singh 	static unsigned long long curr_boundary;
971daa6d08SBalbir Singh 
981daa6d08SBalbir Singh 	/*
991daa6d08SBalbir Singh 	 * Modify node id, iff we started creating NUMA nodes
1001daa6d08SBalbir Singh 	 * We want to continue from where we left of the last time
1011daa6d08SBalbir Singh 	 */
1021daa6d08SBalbir Singh 	if (fake_nid)
1031daa6d08SBalbir Singh 		*nid = fake_nid;
1041daa6d08SBalbir Singh 	/*
1051daa6d08SBalbir Singh 	 * In case there are no more arguments to parse, the
1061daa6d08SBalbir Singh 	 * node_id should be the same as the last fake node id
1071daa6d08SBalbir Singh 	 * (we've handled this above).
1081daa6d08SBalbir Singh 	 */
1091daa6d08SBalbir Singh 	if (!p)
1101daa6d08SBalbir Singh 		return 0;
1111daa6d08SBalbir Singh 
1121daa6d08SBalbir Singh 	mem = memparse(p, &p);
1131daa6d08SBalbir Singh 	if (!mem)
1141daa6d08SBalbir Singh 		return 0;
1151daa6d08SBalbir Singh 
1161daa6d08SBalbir Singh 	if (mem < curr_boundary)
1171daa6d08SBalbir Singh 		return 0;
1181daa6d08SBalbir Singh 
1191daa6d08SBalbir Singh 	curr_boundary = mem;
1201daa6d08SBalbir Singh 
1211daa6d08SBalbir Singh 	if ((end_pfn << PAGE_SHIFT) > mem) {
1221daa6d08SBalbir Singh 		/*
1231daa6d08SBalbir Singh 		 * Skip commas and spaces
1241daa6d08SBalbir Singh 		 */
1251daa6d08SBalbir Singh 		while (*p == ',' || *p == ' ' || *p == '\t')
1261daa6d08SBalbir Singh 			p++;
1271daa6d08SBalbir Singh 
1281daa6d08SBalbir Singh 		cmdline = p;
1291daa6d08SBalbir Singh 		fake_nid++;
1301daa6d08SBalbir Singh 		*nid = fake_nid;
1311daa6d08SBalbir Singh 		dbg("created new fake_node with id %d\n", fake_nid);
1321daa6d08SBalbir Singh 		return 1;
1331daa6d08SBalbir Singh 	}
1341daa6d08SBalbir Singh 	return 0;
1351daa6d08SBalbir Singh }
1361daa6d08SBalbir Singh 
137d4edc5b6SSrivatsa S. Bhat static void reset_numa_cpu_lookup_table(void)
138d4edc5b6SSrivatsa S. Bhat {
139d4edc5b6SSrivatsa S. Bhat 	unsigned int cpu;
140d4edc5b6SSrivatsa S. Bhat 
141d4edc5b6SSrivatsa S. Bhat 	for_each_possible_cpu(cpu)
142d4edc5b6SSrivatsa S. Bhat 		numa_cpu_lookup_table[cpu] = -1;
143d4edc5b6SSrivatsa S. Bhat }
144d4edc5b6SSrivatsa S. Bhat 
145d4edc5b6SSrivatsa S. Bhat static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
146ab1f9dacSPaul Mackerras {
147ab1f9dacSPaul Mackerras 	numa_cpu_lookup_table[cpu] = node;
148d4edc5b6SSrivatsa S. Bhat }
149d4edc5b6SSrivatsa S. Bhat 
150d4edc5b6SSrivatsa S. Bhat static void map_cpu_to_node(int cpu, int node)
151d4edc5b6SSrivatsa S. Bhat {
152d4edc5b6SSrivatsa S. Bhat 	update_numa_cpu_lookup_table(cpu, node);
15345fb6ceaSAnton Blanchard 
154bf4b85b0SNathan Lynch 	dbg("adding cpu %d to node %d\n", cpu, node);
155bf4b85b0SNathan Lynch 
15625863de0SAnton Blanchard 	if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node])))
15725863de0SAnton Blanchard 		cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
158ab1f9dacSPaul Mackerras }
159ab1f9dacSPaul Mackerras 
16039bf990eSJesse Larrew #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
161ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu)
162ab1f9dacSPaul Mackerras {
163ab1f9dacSPaul Mackerras 	int node = numa_cpu_lookup_table[cpu];
164ab1f9dacSPaul Mackerras 
165ab1f9dacSPaul Mackerras 	dbg("removing cpu %lu from node %d\n", cpu, node);
166ab1f9dacSPaul Mackerras 
16725863de0SAnton Blanchard 	if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
168429f4d8dSAnton Blanchard 		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
169ab1f9dacSPaul Mackerras 	} else {
170ab1f9dacSPaul Mackerras 		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
171ab1f9dacSPaul Mackerras 		       cpu, node);
172ab1f9dacSPaul Mackerras 	}
173ab1f9dacSPaul Mackerras }
17439bf990eSJesse Larrew #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
175ab1f9dacSPaul Mackerras 
176ab1f9dacSPaul Mackerras /* must hold reference to node during call */
177b08a2a12SAlistair Popple static const __be32 *of_get_associativity(struct device_node *dev)
178ab1f9dacSPaul Mackerras {
179e2eb6392SStephen Rothwell 	return of_get_property(dev, "ibm,associativity", NULL);
180ab1f9dacSPaul Mackerras }
181ab1f9dacSPaul Mackerras 
182cf00085dSChandru /*
183cf00085dSChandru  * Returns the property linux,drconf-usable-memory if
184cf00085dSChandru  * it exists (the property exists only in kexec/kdump kernels,
185cf00085dSChandru  * added by kexec-tools)
186cf00085dSChandru  */
187b08a2a12SAlistair Popple static const __be32 *of_get_usable_memory(struct device_node *memory)
188cf00085dSChandru {
189b08a2a12SAlistair Popple 	const __be32 *prop;
190cf00085dSChandru 	u32 len;
191cf00085dSChandru 	prop = of_get_property(memory, "linux,drconf-usable-memory", &len);
192cf00085dSChandru 	if (!prop || len < sizeof(unsigned int))
193ec32dd66SRobert Jennings 		return NULL;
194cf00085dSChandru 	return prop;
195cf00085dSChandru }
196cf00085dSChandru 
19741eab6f8SAnton Blanchard int __node_distance(int a, int b)
19841eab6f8SAnton Blanchard {
19941eab6f8SAnton Blanchard 	int i;
20041eab6f8SAnton Blanchard 	int distance = LOCAL_DISTANCE;
20141eab6f8SAnton Blanchard 
20241eab6f8SAnton Blanchard 	if (!form1_affinity)
2037122beeeSVaidyanathan Srinivasan 		return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
20441eab6f8SAnton Blanchard 
20541eab6f8SAnton Blanchard 	for (i = 0; i < distance_ref_points_depth; i++) {
20641eab6f8SAnton Blanchard 		if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
20741eab6f8SAnton Blanchard 			break;
20841eab6f8SAnton Blanchard 
20941eab6f8SAnton Blanchard 		/* Double the distance for each NUMA level */
21041eab6f8SAnton Blanchard 		distance *= 2;
21141eab6f8SAnton Blanchard 	}
21241eab6f8SAnton Blanchard 
21341eab6f8SAnton Blanchard 	return distance;
21441eab6f8SAnton Blanchard }
21512c743ebSMike Qiu EXPORT_SYMBOL(__node_distance);
21641eab6f8SAnton Blanchard 
21741eab6f8SAnton Blanchard static void initialize_distance_lookup_table(int nid,
218b08a2a12SAlistair Popple 		const __be32 *associativity)
21941eab6f8SAnton Blanchard {
22041eab6f8SAnton Blanchard 	int i;
22141eab6f8SAnton Blanchard 
22241eab6f8SAnton Blanchard 	if (!form1_affinity)
22341eab6f8SAnton Blanchard 		return;
22441eab6f8SAnton Blanchard 
22541eab6f8SAnton Blanchard 	for (i = 0; i < distance_ref_points_depth; i++) {
226b08a2a12SAlistair Popple 		const __be32 *entry;
227b08a2a12SAlistair Popple 
2281d805440SNikunj A Dadhania 		entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
229b08a2a12SAlistair Popple 		distance_lookup_table[nid][i] = of_read_number(entry, 1);
23041eab6f8SAnton Blanchard 	}
23141eab6f8SAnton Blanchard }
23241eab6f8SAnton Blanchard 
233482ec7c4SNathan Lynch /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
234482ec7c4SNathan Lynch  * info is found.
235482ec7c4SNathan Lynch  */
236b08a2a12SAlistair Popple static int associativity_to_nid(const __be32 *associativity)
237ab1f9dacSPaul Mackerras {
238482ec7c4SNathan Lynch 	int nid = -1;
239ab1f9dacSPaul Mackerras 
240ab1f9dacSPaul Mackerras 	if (min_common_depth == -1)
241482ec7c4SNathan Lynch 		goto out;
242ab1f9dacSPaul Mackerras 
243b08a2a12SAlistair Popple 	if (of_read_number(associativity, 1) >= min_common_depth)
244b08a2a12SAlistair Popple 		nid = of_read_number(&associativity[min_common_depth], 1);
245bc16a759SNathan Lynch 
246bc16a759SNathan Lynch 	/* POWER4 LPAR uses 0xffff as invalid node */
247482ec7c4SNathan Lynch 	if (nid == 0xffff || nid >= MAX_NUMNODES)
248482ec7c4SNathan Lynch 		nid = -1;
24941eab6f8SAnton Blanchard 
250b08a2a12SAlistair Popple 	if (nid > 0 &&
2511d805440SNikunj A Dadhania 		of_read_number(associativity, 1) >= distance_ref_points_depth) {
2521d805440SNikunj A Dadhania 		/*
2531d805440SNikunj A Dadhania 		 * Skip the length field and send start of associativity array
2541d805440SNikunj A Dadhania 		 */
2551d805440SNikunj A Dadhania 		initialize_distance_lookup_table(nid, associativity + 1);
2561d805440SNikunj A Dadhania 	}
25741eab6f8SAnton Blanchard 
258482ec7c4SNathan Lynch out:
259cf950b7aSNathan Lynch 	return nid;
260ab1f9dacSPaul Mackerras }
261ab1f9dacSPaul Mackerras 
2629eff1a38SJesse Larrew /* Returns the nid associated with the given device tree node,
2639eff1a38SJesse Larrew  * or -1 if not found.
2649eff1a38SJesse Larrew  */
2659eff1a38SJesse Larrew static int of_node_to_nid_single(struct device_node *device)
2669eff1a38SJesse Larrew {
2679eff1a38SJesse Larrew 	int nid = -1;
268b08a2a12SAlistair Popple 	const __be32 *tmp;
2699eff1a38SJesse Larrew 
2709eff1a38SJesse Larrew 	tmp = of_get_associativity(device);
2719eff1a38SJesse Larrew 	if (tmp)
2729eff1a38SJesse Larrew 		nid = associativity_to_nid(tmp);
2739eff1a38SJesse Larrew 	return nid;
2749eff1a38SJesse Larrew }
2759eff1a38SJesse Larrew 
276953039c8SJeremy Kerr /* Walk the device tree upwards, looking for an associativity id */
277953039c8SJeremy Kerr int of_node_to_nid(struct device_node *device)
278953039c8SJeremy Kerr {
279953039c8SJeremy Kerr 	int nid = -1;
280953039c8SJeremy Kerr 
281953039c8SJeremy Kerr 	of_node_get(device);
282953039c8SJeremy Kerr 	while (device) {
283953039c8SJeremy Kerr 		nid = of_node_to_nid_single(device);
284953039c8SJeremy Kerr 		if (nid != -1)
285953039c8SJeremy Kerr 			break;
286953039c8SJeremy Kerr 
2871def3758SChristophe Jaillet 		device = of_get_next_parent(device);
288953039c8SJeremy Kerr 	}
289953039c8SJeremy Kerr 	of_node_put(device);
290953039c8SJeremy Kerr 
291953039c8SJeremy Kerr 	return nid;
292953039c8SJeremy Kerr }
293be9ba9ffSShailendra Singh EXPORT_SYMBOL(of_node_to_nid);
294953039c8SJeremy Kerr 
295ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void)
296ab1f9dacSPaul Mackerras {
29741eab6f8SAnton Blanchard 	int depth;
298e70606ebSMichael Ellerman 	struct device_node *root;
299ab1f9dacSPaul Mackerras 
3001c8ee733SDipankar Sarma 	if (firmware_has_feature(FW_FEATURE_OPAL))
3011c8ee733SDipankar Sarma 		root = of_find_node_by_path("/ibm,opal");
3021c8ee733SDipankar Sarma 	else
303e70606ebSMichael Ellerman 		root = of_find_node_by_path("/rtas");
304e70606ebSMichael Ellerman 	if (!root)
305e70606ebSMichael Ellerman 		root = of_find_node_by_path("/");
306ab1f9dacSPaul Mackerras 
307ab1f9dacSPaul Mackerras 	/*
30841eab6f8SAnton Blanchard 	 * This property is a set of 32-bit integers, each representing
30941eab6f8SAnton Blanchard 	 * an index into the ibm,associativity nodes.
31041eab6f8SAnton Blanchard 	 *
31141eab6f8SAnton Blanchard 	 * With form 0 affinity the first integer is for an SMP configuration
31241eab6f8SAnton Blanchard 	 * (should be all 0's) and the second is for a normal NUMA
31341eab6f8SAnton Blanchard 	 * configuration. We have only one level of NUMA.
31441eab6f8SAnton Blanchard 	 *
31541eab6f8SAnton Blanchard 	 * With form 1 affinity the first integer is the most significant
31641eab6f8SAnton Blanchard 	 * NUMA boundary and the following are progressively less significant
31741eab6f8SAnton Blanchard 	 * boundaries. There can be more than one level of NUMA.
318ab1f9dacSPaul Mackerras 	 */
319e70606ebSMichael Ellerman 	distance_ref_points = of_get_property(root,
32041eab6f8SAnton Blanchard 					"ibm,associativity-reference-points",
32141eab6f8SAnton Blanchard 					&distance_ref_points_depth);
322ab1f9dacSPaul Mackerras 
32341eab6f8SAnton Blanchard 	if (!distance_ref_points) {
32441eab6f8SAnton Blanchard 		dbg("NUMA: ibm,associativity-reference-points not found.\n");
32541eab6f8SAnton Blanchard 		goto err;
32641eab6f8SAnton Blanchard 	}
32741eab6f8SAnton Blanchard 
32841eab6f8SAnton Blanchard 	distance_ref_points_depth /= sizeof(int);
32941eab6f8SAnton Blanchard 
3308002b0c5SNathan Fontenot 	if (firmware_has_feature(FW_FEATURE_OPAL) ||
3318002b0c5SNathan Fontenot 	    firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) {
332bc8449ccSAnton Blanchard 		dbg("Using form 1 affinity\n");
33341eab6f8SAnton Blanchard 		form1_affinity = 1;
3344b83c330SAnton Blanchard 	}
3355b958a7eSGavin Shan 
33641eab6f8SAnton Blanchard 	if (form1_affinity) {
337b08a2a12SAlistair Popple 		depth = of_read_number(distance_ref_points, 1);
338ab1f9dacSPaul Mackerras 	} else {
33941eab6f8SAnton Blanchard 		if (distance_ref_points_depth < 2) {
34041eab6f8SAnton Blanchard 			printk(KERN_WARNING "NUMA: "
34141eab6f8SAnton Blanchard 				"short ibm,associativity-reference-points\n");
34241eab6f8SAnton Blanchard 			goto err;
343ab1f9dacSPaul Mackerras 		}
344ab1f9dacSPaul Mackerras 
345b08a2a12SAlistair Popple 		depth = of_read_number(&distance_ref_points[1], 1);
34641eab6f8SAnton Blanchard 	}
34741eab6f8SAnton Blanchard 
34841eab6f8SAnton Blanchard 	/*
34941eab6f8SAnton Blanchard 	 * Warn and cap if the hardware supports more than
35041eab6f8SAnton Blanchard 	 * MAX_DISTANCE_REF_POINTS domains.
35141eab6f8SAnton Blanchard 	 */
35241eab6f8SAnton Blanchard 	if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
35341eab6f8SAnton Blanchard 		printk(KERN_WARNING "NUMA: distance array capped at "
35441eab6f8SAnton Blanchard 			"%d entries\n", MAX_DISTANCE_REF_POINTS);
35541eab6f8SAnton Blanchard 		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
35641eab6f8SAnton Blanchard 	}
35741eab6f8SAnton Blanchard 
358e70606ebSMichael Ellerman 	of_node_put(root);
359ab1f9dacSPaul Mackerras 	return depth;
36041eab6f8SAnton Blanchard 
36141eab6f8SAnton Blanchard err:
362e70606ebSMichael Ellerman 	of_node_put(root);
36341eab6f8SAnton Blanchard 	return -1;
364ab1f9dacSPaul Mackerras }
365ab1f9dacSPaul Mackerras 
36684c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
367ab1f9dacSPaul Mackerras {
368ab1f9dacSPaul Mackerras 	struct device_node *memory = NULL;
369ab1f9dacSPaul Mackerras 
370ab1f9dacSPaul Mackerras 	memory = of_find_node_by_type(memory, "memory");
37154c23310SPaul Mackerras 	if (!memory)
37284c9fdd1SMike Kravetz 		panic("numa.c: No memory nodes found!");
37354c23310SPaul Mackerras 
374a8bda5ddSStephen Rothwell 	*n_addr_cells = of_n_addr_cells(memory);
3759213feeaSStephen Rothwell 	*n_size_cells = of_n_size_cells(memory);
37684c9fdd1SMike Kravetz 	of_node_put(memory);
377ab1f9dacSPaul Mackerras }
378ab1f9dacSPaul Mackerras 
379b08a2a12SAlistair Popple static unsigned long read_n_cells(int n, const __be32 **buf)
380ab1f9dacSPaul Mackerras {
381ab1f9dacSPaul Mackerras 	unsigned long result = 0;
382ab1f9dacSPaul Mackerras 
383ab1f9dacSPaul Mackerras 	while (n--) {
384b08a2a12SAlistair Popple 		result = (result << 32) | of_read_number(*buf, 1);
385ab1f9dacSPaul Mackerras 		(*buf)++;
386ab1f9dacSPaul Mackerras 	}
387ab1f9dacSPaul Mackerras 	return result;
388ab1f9dacSPaul Mackerras }
389ab1f9dacSPaul Mackerras 
3908342681dSNathan Fontenot /*
39195f72d1eSYinghai Lu  * Read the next memblock list entry from the ibm,dynamic-memory property
3928342681dSNathan Fontenot  * and return the information in the provided of_drconf_cell structure.
3938342681dSNathan Fontenot  */
394b08a2a12SAlistair Popple static void read_drconf_cell(struct of_drconf_cell *drmem, const __be32 **cellp)
3958342681dSNathan Fontenot {
396b08a2a12SAlistair Popple 	const __be32 *cp;
3978342681dSNathan Fontenot 
3988342681dSNathan Fontenot 	drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp);
3998342681dSNathan Fontenot 
4008342681dSNathan Fontenot 	cp = *cellp;
401b08a2a12SAlistair Popple 	drmem->drc_index = of_read_number(cp, 1);
402b08a2a12SAlistair Popple 	drmem->reserved = of_read_number(&cp[1], 1);
403b08a2a12SAlistair Popple 	drmem->aa_index = of_read_number(&cp[2], 1);
404b08a2a12SAlistair Popple 	drmem->flags = of_read_number(&cp[3], 1);
4058342681dSNathan Fontenot 
4068342681dSNathan Fontenot 	*cellp = cp + 4;
4078342681dSNathan Fontenot }
4088342681dSNathan Fontenot 
4098342681dSNathan Fontenot /*
41025985edcSLucas De Marchi  * Retrieve and validate the ibm,dynamic-memory property of the device tree.
4118342681dSNathan Fontenot  *
41295f72d1eSYinghai Lu  * The layout of the ibm,dynamic-memory property is a number N of memblock
41395f72d1eSYinghai Lu  * list entries followed by N memblock list entries.  Each memblock list entry
41425985edcSLucas De Marchi  * contains information as laid out in the of_drconf_cell struct above.
4158342681dSNathan Fontenot  */
416b08a2a12SAlistair Popple static int of_get_drconf_memory(struct device_node *memory, const __be32 **dm)
4178342681dSNathan Fontenot {
418b08a2a12SAlistair Popple 	const __be32 *prop;
4198342681dSNathan Fontenot 	u32 len, entries;
4208342681dSNathan Fontenot 
4218342681dSNathan Fontenot 	prop = of_get_property(memory, "ibm,dynamic-memory", &len);
4228342681dSNathan Fontenot 	if (!prop || len < sizeof(unsigned int))
4238342681dSNathan Fontenot 		return 0;
4248342681dSNathan Fontenot 
425b08a2a12SAlistair Popple 	entries = of_read_number(prop++, 1);
4268342681dSNathan Fontenot 
4278342681dSNathan Fontenot 	/* Now that we know the number of entries, revalidate the size
4288342681dSNathan Fontenot 	 * of the property read in to ensure we have everything
4298342681dSNathan Fontenot 	 */
4308342681dSNathan Fontenot 	if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int))
4318342681dSNathan Fontenot 		return 0;
4328342681dSNathan Fontenot 
4338342681dSNathan Fontenot 	*dm = prop;
4348342681dSNathan Fontenot 	return entries;
4358342681dSNathan Fontenot }
4368342681dSNathan Fontenot 
4378342681dSNathan Fontenot /*
43825985edcSLucas De Marchi  * Retrieve and validate the ibm,lmb-size property for drconf memory
4398342681dSNathan Fontenot  * from the device tree.
4408342681dSNathan Fontenot  */
4413fdfd990SBenjamin Herrenschmidt static u64 of_get_lmb_size(struct device_node *memory)
4428342681dSNathan Fontenot {
443b08a2a12SAlistair Popple 	const __be32 *prop;
4448342681dSNathan Fontenot 	u32 len;
4458342681dSNathan Fontenot 
4463fdfd990SBenjamin Herrenschmidt 	prop = of_get_property(memory, "ibm,lmb-size", &len);
4478342681dSNathan Fontenot 	if (!prop || len < sizeof(unsigned int))
4488342681dSNathan Fontenot 		return 0;
4498342681dSNathan Fontenot 
4508342681dSNathan Fontenot 	return read_n_cells(n_mem_size_cells, &prop);
4518342681dSNathan Fontenot }
4528342681dSNathan Fontenot 
4538342681dSNathan Fontenot struct assoc_arrays {
4548342681dSNathan Fontenot 	u32	n_arrays;
4558342681dSNathan Fontenot 	u32	array_sz;
456b08a2a12SAlistair Popple 	const __be32 *arrays;
4578342681dSNathan Fontenot };
4588342681dSNathan Fontenot 
4598342681dSNathan Fontenot /*
46025985edcSLucas De Marchi  * Retrieve and validate the list of associativity arrays for drconf
4618342681dSNathan Fontenot  * memory from the ibm,associativity-lookup-arrays property of the
4628342681dSNathan Fontenot  * device tree..
4638342681dSNathan Fontenot  *
4648342681dSNathan Fontenot  * The layout of the ibm,associativity-lookup-arrays property is a number N
4658342681dSNathan Fontenot  * indicating the number of associativity arrays, followed by a number M
4668342681dSNathan Fontenot  * indicating the size of each associativity array, followed by a list
4678342681dSNathan Fontenot  * of N associativity arrays.
4688342681dSNathan Fontenot  */
4698342681dSNathan Fontenot static int of_get_assoc_arrays(struct device_node *memory,
4708342681dSNathan Fontenot 			       struct assoc_arrays *aa)
4718342681dSNathan Fontenot {
472b08a2a12SAlistair Popple 	const __be32 *prop;
4738342681dSNathan Fontenot 	u32 len;
4748342681dSNathan Fontenot 
4758342681dSNathan Fontenot 	prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
4768342681dSNathan Fontenot 	if (!prop || len < 2 * sizeof(unsigned int))
4778342681dSNathan Fontenot 		return -1;
4788342681dSNathan Fontenot 
479b08a2a12SAlistair Popple 	aa->n_arrays = of_read_number(prop++, 1);
480b08a2a12SAlistair Popple 	aa->array_sz = of_read_number(prop++, 1);
4818342681dSNathan Fontenot 
48242b2aa86SJustin P. Mattock 	/* Now that we know the number of arrays and size of each array,
4838342681dSNathan Fontenot 	 * revalidate the size of the property read in.
4848342681dSNathan Fontenot 	 */
4858342681dSNathan Fontenot 	if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int))
4868342681dSNathan Fontenot 		return -1;
4878342681dSNathan Fontenot 
4888342681dSNathan Fontenot 	aa->arrays = prop;
4898342681dSNathan Fontenot 	return 0;
4908342681dSNathan Fontenot }
4918342681dSNathan Fontenot 
4928342681dSNathan Fontenot /*
4938342681dSNathan Fontenot  * This is like of_node_to_nid_single() for memory represented in the
4948342681dSNathan Fontenot  * ibm,dynamic-reconfiguration-memory node.
4958342681dSNathan Fontenot  */
4968342681dSNathan Fontenot static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
4978342681dSNathan Fontenot 				   struct assoc_arrays *aa)
4988342681dSNathan Fontenot {
4998342681dSNathan Fontenot 	int default_nid = 0;
5008342681dSNathan Fontenot 	int nid = default_nid;
5018342681dSNathan Fontenot 	int index;
5028342681dSNathan Fontenot 
5038342681dSNathan Fontenot 	if (min_common_depth > 0 && min_common_depth <= aa->array_sz &&
5048342681dSNathan Fontenot 	    !(drmem->flags & DRCONF_MEM_AI_INVALID) &&
5058342681dSNathan Fontenot 	    drmem->aa_index < aa->n_arrays) {
5068342681dSNathan Fontenot 		index = drmem->aa_index * aa->array_sz + min_common_depth - 1;
507b08a2a12SAlistair Popple 		nid = of_read_number(&aa->arrays[index], 1);
5088342681dSNathan Fontenot 
5098342681dSNathan Fontenot 		if (nid == 0xffff || nid >= MAX_NUMNODES)
5108342681dSNathan Fontenot 			nid = default_nid;
5111d805440SNikunj A Dadhania 
5121d805440SNikunj A Dadhania 		if (nid > 0) {
5131d805440SNikunj A Dadhania 			index = drmem->aa_index * aa->array_sz;
5141d805440SNikunj A Dadhania 			initialize_distance_lookup_table(nid,
5151d805440SNikunj A Dadhania 							&aa->arrays[index]);
5161d805440SNikunj A Dadhania 		}
5178342681dSNathan Fontenot 	}
5188342681dSNathan Fontenot 
5198342681dSNathan Fontenot 	return nid;
5208342681dSNathan Fontenot }
5218342681dSNathan Fontenot 
522ab1f9dacSPaul Mackerras /*
523ab1f9dacSPaul Mackerras  * Figure out to which domain a cpu belongs and stick it there.
524ab1f9dacSPaul Mackerras  * Return the id of the domain used.
525ab1f9dacSPaul Mackerras  */
526061d19f2SPaul Gortmaker static int numa_setup_cpu(unsigned long lcpu)
527ab1f9dacSPaul Mackerras {
528297cf502SLi Zhong 	int nid = -1;
529d4edc5b6SSrivatsa S. Bhat 	struct device_node *cpu;
530d4edc5b6SSrivatsa S. Bhat 
531d4edc5b6SSrivatsa S. Bhat 	/*
532d4edc5b6SSrivatsa S. Bhat 	 * If a valid cpu-to-node mapping is already available, use it
533d4edc5b6SSrivatsa S. Bhat 	 * directly instead of querying the firmware, since it represents
534d4edc5b6SSrivatsa S. Bhat 	 * the most recent mapping notified to us by the platform (eg: VPHN).
535d4edc5b6SSrivatsa S. Bhat 	 */
536d4edc5b6SSrivatsa S. Bhat 	if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) {
537d4edc5b6SSrivatsa S. Bhat 		map_cpu_to_node(lcpu, nid);
538d4edc5b6SSrivatsa S. Bhat 		return nid;
539d4edc5b6SSrivatsa S. Bhat 	}
540d4edc5b6SSrivatsa S. Bhat 
541d4edc5b6SSrivatsa S. Bhat 	cpu = of_get_cpu_node(lcpu, NULL);
542ab1f9dacSPaul Mackerras 
543ab1f9dacSPaul Mackerras 	if (!cpu) {
544ab1f9dacSPaul Mackerras 		WARN_ON(1);
545297cf502SLi Zhong 		if (cpu_present(lcpu))
546297cf502SLi Zhong 			goto out_present;
547297cf502SLi Zhong 		else
548ab1f9dacSPaul Mackerras 			goto out;
549ab1f9dacSPaul Mackerras 	}
550ab1f9dacSPaul Mackerras 
551953039c8SJeremy Kerr 	nid = of_node_to_nid_single(cpu);
552ab1f9dacSPaul Mackerras 
553297cf502SLi Zhong out_present:
554482ec7c4SNathan Lynch 	if (nid < 0 || !node_online(nid))
55572c33688SH Hartley Sweeten 		nid = first_online_node;
556297cf502SLi Zhong 
557cf950b7aSNathan Lynch 	map_cpu_to_node(lcpu, nid);
558ab1f9dacSPaul Mackerras 	of_node_put(cpu);
559297cf502SLi Zhong out:
560cf950b7aSNathan Lynch 	return nid;
561ab1f9dacSPaul Mackerras }
562ab1f9dacSPaul Mackerras 
56368fb18aaSSrivatsa S. Bhat static void verify_cpu_node_mapping(int cpu, int node)
56468fb18aaSSrivatsa S. Bhat {
56568fb18aaSSrivatsa S. Bhat 	int base, sibling, i;
56668fb18aaSSrivatsa S. Bhat 
56768fb18aaSSrivatsa S. Bhat 	/* Verify that all the threads in the core belong to the same node */
56868fb18aaSSrivatsa S. Bhat 	base = cpu_first_thread_sibling(cpu);
56968fb18aaSSrivatsa S. Bhat 
57068fb18aaSSrivatsa S. Bhat 	for (i = 0; i < threads_per_core; i++) {
57168fb18aaSSrivatsa S. Bhat 		sibling = base + i;
57268fb18aaSSrivatsa S. Bhat 
57368fb18aaSSrivatsa S. Bhat 		if (sibling == cpu || cpu_is_offline(sibling))
57468fb18aaSSrivatsa S. Bhat 			continue;
57568fb18aaSSrivatsa S. Bhat 
57668fb18aaSSrivatsa S. Bhat 		if (cpu_to_node(sibling) != node) {
57768fb18aaSSrivatsa S. Bhat 			WARN(1, "CPU thread siblings %d and %d don't belong"
57868fb18aaSSrivatsa S. Bhat 				" to the same node!\n", cpu, sibling);
57968fb18aaSSrivatsa S. Bhat 			break;
58068fb18aaSSrivatsa S. Bhat 		}
58168fb18aaSSrivatsa S. Bhat 	}
58268fb18aaSSrivatsa S. Bhat }
58368fb18aaSSrivatsa S. Bhat 
584bdab88e0SSebastian Andrzej Siewior /* Must run before sched domains notifier. */
585bdab88e0SSebastian Andrzej Siewior static int ppc_numa_cpu_prepare(unsigned int cpu)
586ab1f9dacSPaul Mackerras {
587bdab88e0SSebastian Andrzej Siewior 	int nid;
588ab1f9dacSPaul Mackerras 
589bdab88e0SSebastian Andrzej Siewior 	nid = numa_setup_cpu(cpu);
590bdab88e0SSebastian Andrzej Siewior 	verify_cpu_node_mapping(cpu, nid);
591bdab88e0SSebastian Andrzej Siewior 	return 0;
592ab1f9dacSPaul Mackerras }
593bdab88e0SSebastian Andrzej Siewior 
594bdab88e0SSebastian Andrzej Siewior static int ppc_numa_cpu_dead(unsigned int cpu)
595bdab88e0SSebastian Andrzej Siewior {
596bdab88e0SSebastian Andrzej Siewior #ifdef CONFIG_HOTPLUG_CPU
597bdab88e0SSebastian Andrzej Siewior 	unmap_cpu_from_node(cpu);
598bdab88e0SSebastian Andrzej Siewior #endif
599bdab88e0SSebastian Andrzej Siewior 	return 0;
600ab1f9dacSPaul Mackerras }
601ab1f9dacSPaul Mackerras 
602ab1f9dacSPaul Mackerras /*
603ab1f9dacSPaul Mackerras  * Check and possibly modify a memory region to enforce the memory limit.
604ab1f9dacSPaul Mackerras  *
605ab1f9dacSPaul Mackerras  * Returns the size the region should have to enforce the memory limit.
606ab1f9dacSPaul Mackerras  * This will either be the original value of size, a truncated value,
607ab1f9dacSPaul Mackerras  * or zero. If the returned value of size is 0 the region should be
60825985edcSLucas De Marchi  * discarded as it lies wholly above the memory limit.
609ab1f9dacSPaul Mackerras  */
61045fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start,
61145fb6ceaSAnton Blanchard 						      unsigned long size)
612ab1f9dacSPaul Mackerras {
613ab1f9dacSPaul Mackerras 	/*
61495f72d1eSYinghai Lu 	 * We use memblock_end_of_DRAM() in here instead of memory_limit because
615ab1f9dacSPaul Mackerras 	 * we've already adjusted it for the limit and it takes care of
616fe55249dSMilton Miller 	 * having memory holes below the limit.  Also, in the case of
617fe55249dSMilton Miller 	 * iommu_is_off, memory_limit is not set but is implicitly enforced.
618ab1f9dacSPaul Mackerras 	 */
619ab1f9dacSPaul Mackerras 
62095f72d1eSYinghai Lu 	if (start + size <= memblock_end_of_DRAM())
621ab1f9dacSPaul Mackerras 		return size;
622ab1f9dacSPaul Mackerras 
62395f72d1eSYinghai Lu 	if (start >= memblock_end_of_DRAM())
624ab1f9dacSPaul Mackerras 		return 0;
625ab1f9dacSPaul Mackerras 
62695f72d1eSYinghai Lu 	return memblock_end_of_DRAM() - start;
627ab1f9dacSPaul Mackerras }
628ab1f9dacSPaul Mackerras 
6290204568aSPaul Mackerras /*
630cf00085dSChandru  * Reads the counter for a given entry in
631cf00085dSChandru  * linux,drconf-usable-memory property
632cf00085dSChandru  */
633b08a2a12SAlistair Popple static inline int __init read_usm_ranges(const __be32 **usm)
634cf00085dSChandru {
635cf00085dSChandru 	/*
6363fdfd990SBenjamin Herrenschmidt 	 * For each lmb in ibm,dynamic-memory a corresponding
637cf00085dSChandru 	 * entry in linux,drconf-usable-memory property contains
638cf00085dSChandru 	 * a counter followed by that many (base, size) duple.
639cf00085dSChandru 	 * read the counter from linux,drconf-usable-memory
640cf00085dSChandru 	 */
641cf00085dSChandru 	return read_n_cells(n_mem_size_cells, usm);
642cf00085dSChandru }
643cf00085dSChandru 
644cf00085dSChandru /*
6450204568aSPaul Mackerras  * Extract NUMA information from the ibm,dynamic-reconfiguration-memory
6460204568aSPaul Mackerras  * node.  This assumes n_mem_{addr,size}_cells have been set.
6470204568aSPaul Mackerras  */
6480204568aSPaul Mackerras static void __init parse_drconf_memory(struct device_node *memory)
6490204568aSPaul Mackerras {
650b08a2a12SAlistair Popple 	const __be32 *uninitialized_var(dm), *usm;
651cf00085dSChandru 	unsigned int n, rc, ranges, is_kexec_kdump = 0;
6523fdfd990SBenjamin Herrenschmidt 	unsigned long lmb_size, base, size, sz;
6538342681dSNathan Fontenot 	int nid;
654aa709f3bSBenjamin Herrenschmidt 	struct assoc_arrays aa = { .arrays = NULL };
6550204568aSPaul Mackerras 
6568342681dSNathan Fontenot 	n = of_get_drconf_memory(memory, &dm);
6578342681dSNathan Fontenot 	if (!n)
6580204568aSPaul Mackerras 		return;
6590204568aSPaul Mackerras 
6603fdfd990SBenjamin Herrenschmidt 	lmb_size = of_get_lmb_size(memory);
6613fdfd990SBenjamin Herrenschmidt 	if (!lmb_size)
6628342681dSNathan Fontenot 		return;
6638342681dSNathan Fontenot 
6648342681dSNathan Fontenot 	rc = of_get_assoc_arrays(memory, &aa);
6658342681dSNathan Fontenot 	if (rc)
6660204568aSPaul Mackerras 		return;
6670204568aSPaul Mackerras 
668cf00085dSChandru 	/* check if this is a kexec/kdump kernel */
669cf00085dSChandru 	usm = of_get_usable_memory(memory);
670cf00085dSChandru 	if (usm != NULL)
671cf00085dSChandru 		is_kexec_kdump = 1;
672cf00085dSChandru 
6730204568aSPaul Mackerras 	for (; n != 0; --n) {
6748342681dSNathan Fontenot 		struct of_drconf_cell drmem;
6751daa6d08SBalbir Singh 
6768342681dSNathan Fontenot 		read_drconf_cell(&drmem, &dm);
6778342681dSNathan Fontenot 
6788342681dSNathan Fontenot 		/* skip this block if the reserved bit is set in flags (0x80)
6798342681dSNathan Fontenot 		   or if the block is not assigned to this partition (0x8) */
6808342681dSNathan Fontenot 		if ((drmem.flags & DRCONF_MEM_RESERVED)
6818342681dSNathan Fontenot 		    || !(drmem.flags & DRCONF_MEM_ASSIGNED))
6828342681dSNathan Fontenot 			continue;
6838342681dSNathan Fontenot 
684cf00085dSChandru 		base = drmem.base_addr;
6853fdfd990SBenjamin Herrenschmidt 		size = lmb_size;
686cf00085dSChandru 		ranges = 1;
6878342681dSNathan Fontenot 
688cf00085dSChandru 		if (is_kexec_kdump) {
689cf00085dSChandru 			ranges = read_usm_ranges(&usm);
690cf00085dSChandru 			if (!ranges) /* there are no (base, size) duple */
6910204568aSPaul Mackerras 				continue;
692cf00085dSChandru 		}
693cf00085dSChandru 		do {
694cf00085dSChandru 			if (is_kexec_kdump) {
695cf00085dSChandru 				base = read_n_cells(n_mem_addr_cells, &usm);
696cf00085dSChandru 				size = read_n_cells(n_mem_size_cells, &usm);
697cf00085dSChandru 			}
698cf00085dSChandru 			nid = of_drconf_to_nid_single(&drmem, &aa);
699cf00085dSChandru 			fake_numa_create_new_node(
700cf00085dSChandru 				((base + size) >> PAGE_SHIFT),
701cf00085dSChandru 					   &nid);
702cf00085dSChandru 			node_set_online(nid);
703cf00085dSChandru 			sz = numa_enforce_memory_limit(base, size);
704cf00085dSChandru 			if (sz)
705e7e8de59STang Chen 				memblock_set_node(base, sz,
706e7e8de59STang Chen 						  &memblock.memory, nid);
707cf00085dSChandru 		} while (--ranges);
7080204568aSPaul Mackerras 	}
7090204568aSPaul Mackerras }
7100204568aSPaul Mackerras 
711ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void)
712ab1f9dacSPaul Mackerras {
71394db7c5eSAnton Blanchard 	struct device_node *memory;
714482ec7c4SNathan Lynch 	int default_nid = 0;
715ab1f9dacSPaul Mackerras 	unsigned long i;
716ab1f9dacSPaul Mackerras 
717ab1f9dacSPaul Mackerras 	if (numa_enabled == 0) {
718ab1f9dacSPaul Mackerras 		printk(KERN_WARNING "NUMA disabled by user\n");
719ab1f9dacSPaul Mackerras 		return -1;
720ab1f9dacSPaul Mackerras 	}
721ab1f9dacSPaul Mackerras 
722ab1f9dacSPaul Mackerras 	min_common_depth = find_min_common_depth();
723ab1f9dacSPaul Mackerras 
724ab1f9dacSPaul Mackerras 	if (min_common_depth < 0)
725ab1f9dacSPaul Mackerras 		return min_common_depth;
726ab1f9dacSPaul Mackerras 
727bf4b85b0SNathan Lynch 	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
728bf4b85b0SNathan Lynch 
729ab1f9dacSPaul Mackerras 	/*
730482ec7c4SNathan Lynch 	 * Even though we connect cpus to numa domains later in SMP
731482ec7c4SNathan Lynch 	 * init, we need to know the node ids now. This is because
732482ec7c4SNathan Lynch 	 * each node to be onlined must have NODE_DATA etc backing it.
733ab1f9dacSPaul Mackerras 	 */
734482ec7c4SNathan Lynch 	for_each_present_cpu(i) {
735dfbe93a2SAnton Blanchard 		struct device_node *cpu;
736cf950b7aSNathan Lynch 		int nid;
737ab1f9dacSPaul Mackerras 
7388b16cd23SMilton Miller 		cpu = of_get_cpu_node(i, NULL);
739482ec7c4SNathan Lynch 		BUG_ON(!cpu);
740953039c8SJeremy Kerr 		nid = of_node_to_nid_single(cpu);
741ab1f9dacSPaul Mackerras 		of_node_put(cpu);
742ab1f9dacSPaul Mackerras 
743482ec7c4SNathan Lynch 		/*
744482ec7c4SNathan Lynch 		 * Don't fall back to default_nid yet -- we will plug
745482ec7c4SNathan Lynch 		 * cpus into nodes once the memory scan has discovered
746482ec7c4SNathan Lynch 		 * the topology.
747482ec7c4SNathan Lynch 		 */
748482ec7c4SNathan Lynch 		if (nid < 0)
749482ec7c4SNathan Lynch 			continue;
750482ec7c4SNathan Lynch 		node_set_online(nid);
751ab1f9dacSPaul Mackerras 	}
752ab1f9dacSPaul Mackerras 
753237a0989SMike Kravetz 	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
75494db7c5eSAnton Blanchard 
75594db7c5eSAnton Blanchard 	for_each_node_by_type(memory, "memory") {
756ab1f9dacSPaul Mackerras 		unsigned long start;
757ab1f9dacSPaul Mackerras 		unsigned long size;
758cf950b7aSNathan Lynch 		int nid;
759ab1f9dacSPaul Mackerras 		int ranges;
760b08a2a12SAlistair Popple 		const __be32 *memcell_buf;
761ab1f9dacSPaul Mackerras 		unsigned int len;
762ab1f9dacSPaul Mackerras 
763e2eb6392SStephen Rothwell 		memcell_buf = of_get_property(memory,
764ba759485SMichael Ellerman 			"linux,usable-memory", &len);
765ba759485SMichael Ellerman 		if (!memcell_buf || len <= 0)
766e2eb6392SStephen Rothwell 			memcell_buf = of_get_property(memory, "reg", &len);
767ab1f9dacSPaul Mackerras 		if (!memcell_buf || len <= 0)
768ab1f9dacSPaul Mackerras 			continue;
769ab1f9dacSPaul Mackerras 
770cc5d0189SBenjamin Herrenschmidt 		/* ranges in cell */
771cc5d0189SBenjamin Herrenschmidt 		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
772ab1f9dacSPaul Mackerras new_range:
773ab1f9dacSPaul Mackerras 		/* these are order-sensitive, and modify the buffer pointer */
774237a0989SMike Kravetz 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
775237a0989SMike Kravetz 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
776ab1f9dacSPaul Mackerras 
777482ec7c4SNathan Lynch 		/*
778482ec7c4SNathan Lynch 		 * Assumption: either all memory nodes or none will
779482ec7c4SNathan Lynch 		 * have associativity properties.  If none, then
780482ec7c4SNathan Lynch 		 * everything goes to default_nid.
781482ec7c4SNathan Lynch 		 */
782953039c8SJeremy Kerr 		nid = of_node_to_nid_single(memory);
783482ec7c4SNathan Lynch 		if (nid < 0)
784482ec7c4SNathan Lynch 			nid = default_nid;
7851daa6d08SBalbir Singh 
7861daa6d08SBalbir Singh 		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
787482ec7c4SNathan Lynch 		node_set_online(nid);
788ab1f9dacSPaul Mackerras 
7897656cd8eSReza Arbab 		size = numa_enforce_memory_limit(start, size);
7907656cd8eSReza Arbab 		if (size)
791e7e8de59STang Chen 			memblock_set_node(start, size, &memblock.memory, nid);
792ab1f9dacSPaul Mackerras 
793ab1f9dacSPaul Mackerras 		if (--ranges)
794ab1f9dacSPaul Mackerras 			goto new_range;
795ab1f9dacSPaul Mackerras 	}
796ab1f9dacSPaul Mackerras 
7970204568aSPaul Mackerras 	/*
798dfbe93a2SAnton Blanchard 	 * Now do the same thing for each MEMBLOCK listed in the
799dfbe93a2SAnton Blanchard 	 * ibm,dynamic-memory property in the
800dfbe93a2SAnton Blanchard 	 * ibm,dynamic-reconfiguration-memory node.
8010204568aSPaul Mackerras 	 */
8020204568aSPaul Mackerras 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
8030204568aSPaul Mackerras 	if (memory)
8040204568aSPaul Mackerras 		parse_drconf_memory(memory);
8050204568aSPaul Mackerras 
806ab1f9dacSPaul Mackerras 	return 0;
807ab1f9dacSPaul Mackerras }
808ab1f9dacSPaul Mackerras 
809ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void)
810ab1f9dacSPaul Mackerras {
81195f72d1eSYinghai Lu 	unsigned long top_of_ram = memblock_end_of_DRAM();
81295f72d1eSYinghai Lu 	unsigned long total_ram = memblock_phys_mem_size();
813c67c3cb4SMel Gorman 	unsigned long start_pfn, end_pfn;
81428be7072SBenjamin Herrenschmidt 	unsigned int nid = 0;
81528be7072SBenjamin Herrenschmidt 	struct memblock_region *reg;
816ab1f9dacSPaul Mackerras 
817e110b281SOlof Johansson 	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
818ab1f9dacSPaul Mackerras 	       top_of_ram, total_ram);
819e110b281SOlof Johansson 	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
820ab1f9dacSPaul Mackerras 	       (top_of_ram - total_ram) >> 20);
821ab1f9dacSPaul Mackerras 
82228be7072SBenjamin Herrenschmidt 	for_each_memblock(memory, reg) {
823c7fc2de0SYinghai Lu 		start_pfn = memblock_region_memory_base_pfn(reg);
824c7fc2de0SYinghai Lu 		end_pfn = memblock_region_memory_end_pfn(reg);
8251daa6d08SBalbir Singh 
8261daa6d08SBalbir Singh 		fake_numa_create_new_node(end_pfn, &nid);
8271d7cfe18STejun Heo 		memblock_set_node(PFN_PHYS(start_pfn),
828e7e8de59STang Chen 				  PFN_PHYS(end_pfn - start_pfn),
829e7e8de59STang Chen 				  &memblock.memory, nid);
8301daa6d08SBalbir Singh 		node_set_online(nid);
831c67c3cb4SMel Gorman 	}
832ab1f9dacSPaul Mackerras }
833ab1f9dacSPaul Mackerras 
8344b703a23SAnton Blanchard void __init dump_numa_cpu_topology(void)
8354b703a23SAnton Blanchard {
8364b703a23SAnton Blanchard 	unsigned int node;
8374b703a23SAnton Blanchard 	unsigned int cpu, count;
8384b703a23SAnton Blanchard 
8394b703a23SAnton Blanchard 	if (min_common_depth == -1 || !numa_enabled)
8404b703a23SAnton Blanchard 		return;
8414b703a23SAnton Blanchard 
8424b703a23SAnton Blanchard 	for_each_online_node(node) {
8438467801cSAneesh Kumar K.V 		pr_info("Node %d CPUs:", node);
8444b703a23SAnton Blanchard 
8454b703a23SAnton Blanchard 		count = 0;
8464b703a23SAnton Blanchard 		/*
8474b703a23SAnton Blanchard 		 * If we used a CPU iterator here we would miss printing
8484b703a23SAnton Blanchard 		 * the holes in the cpumap.
8494b703a23SAnton Blanchard 		 */
85025863de0SAnton Blanchard 		for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
85125863de0SAnton Blanchard 			if (cpumask_test_cpu(cpu,
85225863de0SAnton Blanchard 					node_to_cpumask_map[node])) {
8534b703a23SAnton Blanchard 				if (count == 0)
8548467801cSAneesh Kumar K.V 					pr_cont(" %u", cpu);
8554b703a23SAnton Blanchard 				++count;
8564b703a23SAnton Blanchard 			} else {
8574b703a23SAnton Blanchard 				if (count > 1)
8588467801cSAneesh Kumar K.V 					pr_cont("-%u", cpu - 1);
8594b703a23SAnton Blanchard 				count = 0;
8604b703a23SAnton Blanchard 			}
8614b703a23SAnton Blanchard 		}
8624b703a23SAnton Blanchard 
8634b703a23SAnton Blanchard 		if (count > 1)
8648467801cSAneesh Kumar K.V 			pr_cont("-%u", nr_cpu_ids - 1);
8658467801cSAneesh Kumar K.V 		pr_cont("\n");
8664b703a23SAnton Blanchard 	}
8674b703a23SAnton Blanchard }
8684b703a23SAnton Blanchard 
86910239733SAnton Blanchard /* Initialize NODE_DATA for a node on the local memory */
87010239733SAnton Blanchard static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
871ab1f9dacSPaul Mackerras {
87210239733SAnton Blanchard 	u64 spanned_pages = end_pfn - start_pfn;
87310239733SAnton Blanchard 	const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
87410239733SAnton Blanchard 	u64 nd_pa;
87510239733SAnton Blanchard 	void *nd;
87610239733SAnton Blanchard 	int tnid;
877ab1f9dacSPaul Mackerras 
87810239733SAnton Blanchard 	nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
87910239733SAnton Blanchard 	nd = __va(nd_pa);
880ab1f9dacSPaul Mackerras 
88110239733SAnton Blanchard 	/* report and initialize */
88210239733SAnton Blanchard 	pr_info("  NODE_DATA [mem %#010Lx-%#010Lx]\n",
88310239733SAnton Blanchard 		nd_pa, nd_pa + nd_size - 1);
88410239733SAnton Blanchard 	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
88510239733SAnton Blanchard 	if (tnid != nid)
88610239733SAnton Blanchard 		pr_info("    NODE_DATA(%d) on node %d\n", nid, tnid);
8878f64e1f2SJon Tollefson 
88810239733SAnton Blanchard 	node_data[nid] = nd;
88910239733SAnton Blanchard 	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
89010239733SAnton Blanchard 	NODE_DATA(nid)->node_id = nid;
89110239733SAnton Blanchard 	NODE_DATA(nid)->node_start_pfn = start_pfn;
89210239733SAnton Blanchard 	NODE_DATA(nid)->node_spanned_pages = spanned_pages;
893ab1f9dacSPaul Mackerras }
8948f64e1f2SJon Tollefson 
89510239733SAnton Blanchard void __init initmem_init(void)
8964a618669SDave Hansen {
8972fabf084SNishanth Aravamudan 	int nid, cpu;
8984a618669SDave Hansen 
89995f72d1eSYinghai Lu 	max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
9004a618669SDave Hansen 	max_pfn = max_low_pfn;
9014a618669SDave Hansen 
9024a618669SDave Hansen 	if (parse_numa_properties())
9034a618669SDave Hansen 		setup_nonnuma();
9044a618669SDave Hansen 
90510239733SAnton Blanchard 	memblock_dump_all();
90610239733SAnton Blanchard 
9073af229f2SNishanth Aravamudan 	/*
9083af229f2SNishanth Aravamudan 	 * Reduce the possible NUMA nodes to the online NUMA nodes,
9093af229f2SNishanth Aravamudan 	 * since we do not support node hotplug. This ensures that  we
9103af229f2SNishanth Aravamudan 	 * lower the maximum NUMA node ID to what is actually present.
9113af229f2SNishanth Aravamudan 	 */
9123af229f2SNishanth Aravamudan 	nodes_and(node_possible_map, node_possible_map, node_online_map);
9133af229f2SNishanth Aravamudan 
9144a618669SDave Hansen 	for_each_online_node(nid) {
9154a618669SDave Hansen 		unsigned long start_pfn, end_pfn;
9164a618669SDave Hansen 
9174a618669SDave Hansen 		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
91810239733SAnton Blanchard 		setup_node_data(nid, start_pfn, end_pfn);
9198f64e1f2SJon Tollefson 		sparse_memory_present_with_active_regions(nid);
920ab1f9dacSPaul Mackerras 	}
921d3f6204aSBenjamin Herrenschmidt 
92221098b9eSAnton Blanchard 	sparse_init();
92325863de0SAnton Blanchard 
92425863de0SAnton Blanchard 	setup_node_to_cpumask_map();
92525863de0SAnton Blanchard 
926d4edc5b6SSrivatsa S. Bhat 	reset_numa_cpu_lookup_table();
927bdab88e0SSebastian Andrzej Siewior 
9282fabf084SNishanth Aravamudan 	/*
9292fabf084SNishanth Aravamudan 	 * We need the numa_cpu_lookup_table to be accurate for all CPUs,
9302fabf084SNishanth Aravamudan 	 * even before we online them, so that we can use cpu_to_{node,mem}
9312fabf084SNishanth Aravamudan 	 * early in boot, cf. smp_prepare_cpus().
932bdab88e0SSebastian Andrzej Siewior 	 * _nocalls() + manual invocation is used because cpuhp is not yet
933bdab88e0SSebastian Andrzej Siewior 	 * initialized for the boot CPU.
9342fabf084SNishanth Aravamudan 	 */
93573c1b41eSThomas Gleixner 	cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare",
936bdab88e0SSebastian Andrzej Siewior 				  ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
937bdab88e0SSebastian Andrzej Siewior 	for_each_present_cpu(cpu)
938bdab88e0SSebastian Andrzej Siewior 		numa_setup_cpu(cpu);
9394a618669SDave Hansen }
940ab1f9dacSPaul Mackerras 
941ab1f9dacSPaul Mackerras static int __init early_numa(char *p)
942ab1f9dacSPaul Mackerras {
943ab1f9dacSPaul Mackerras 	if (!p)
944ab1f9dacSPaul Mackerras 		return 0;
945ab1f9dacSPaul Mackerras 
946ab1f9dacSPaul Mackerras 	if (strstr(p, "off"))
947ab1f9dacSPaul Mackerras 		numa_enabled = 0;
948ab1f9dacSPaul Mackerras 
949ab1f9dacSPaul Mackerras 	if (strstr(p, "debug"))
950ab1f9dacSPaul Mackerras 		numa_debug = 1;
951ab1f9dacSPaul Mackerras 
9521daa6d08SBalbir Singh 	p = strstr(p, "fake=");
9531daa6d08SBalbir Singh 	if (p)
9541daa6d08SBalbir Singh 		cmdline = p + strlen("fake=");
9551daa6d08SBalbir Singh 
956ab1f9dacSPaul Mackerras 	return 0;
957ab1f9dacSPaul Mackerras }
958ab1f9dacSPaul Mackerras early_param("numa", early_numa);
959237a0989SMike Kravetz 
9602d73bae1SNishanth Aravamudan static bool topology_updates_enabled = true;
9612d73bae1SNishanth Aravamudan 
9622d73bae1SNishanth Aravamudan static int __init early_topology_updates(char *p)
9632d73bae1SNishanth Aravamudan {
9642d73bae1SNishanth Aravamudan 	if (!p)
9652d73bae1SNishanth Aravamudan 		return 0;
9662d73bae1SNishanth Aravamudan 
9672d73bae1SNishanth Aravamudan 	if (!strcmp(p, "off")) {
9682d73bae1SNishanth Aravamudan 		pr_info("Disabling topology updates\n");
9692d73bae1SNishanth Aravamudan 		topology_updates_enabled = false;
9702d73bae1SNishanth Aravamudan 	}
9712d73bae1SNishanth Aravamudan 
9722d73bae1SNishanth Aravamudan 	return 0;
9732d73bae1SNishanth Aravamudan }
9742d73bae1SNishanth Aravamudan early_param("topology_updates", early_topology_updates);
9752d73bae1SNishanth Aravamudan 
976237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG
977237a0989SMike Kravetz /*
9780f16ef7fSNathan Fontenot  * Find the node associated with a hot added memory section for
9790f16ef7fSNathan Fontenot  * memory represented in the device tree by the property
9800f16ef7fSNathan Fontenot  * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
9810db9360aSNathan Fontenot  */
9820db9360aSNathan Fontenot static int hot_add_drconf_scn_to_nid(struct device_node *memory,
9830db9360aSNathan Fontenot 				     unsigned long scn_addr)
9840db9360aSNathan Fontenot {
985b08a2a12SAlistair Popple 	const __be32 *dm;
9860f16ef7fSNathan Fontenot 	unsigned int drconf_cell_cnt, rc;
9873fdfd990SBenjamin Herrenschmidt 	unsigned long lmb_size;
9880db9360aSNathan Fontenot 	struct assoc_arrays aa;
9890f16ef7fSNathan Fontenot 	int nid = -1;
9900db9360aSNathan Fontenot 
9910f16ef7fSNathan Fontenot 	drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
9920f16ef7fSNathan Fontenot 	if (!drconf_cell_cnt)
9930f16ef7fSNathan Fontenot 		return -1;
9940db9360aSNathan Fontenot 
9953fdfd990SBenjamin Herrenschmidt 	lmb_size = of_get_lmb_size(memory);
9963fdfd990SBenjamin Herrenschmidt 	if (!lmb_size)
9970f16ef7fSNathan Fontenot 		return -1;
9980db9360aSNathan Fontenot 
9990db9360aSNathan Fontenot 	rc = of_get_assoc_arrays(memory, &aa);
10000db9360aSNathan Fontenot 	if (rc)
10010f16ef7fSNathan Fontenot 		return -1;
10020db9360aSNathan Fontenot 
10030f16ef7fSNathan Fontenot 	for (; drconf_cell_cnt != 0; --drconf_cell_cnt) {
10040db9360aSNathan Fontenot 		struct of_drconf_cell drmem;
10050db9360aSNathan Fontenot 
10060db9360aSNathan Fontenot 		read_drconf_cell(&drmem, &dm);
10070db9360aSNathan Fontenot 
10080db9360aSNathan Fontenot 		/* skip this block if it is reserved or not assigned to
10090db9360aSNathan Fontenot 		 * this partition */
10100db9360aSNathan Fontenot 		if ((drmem.flags & DRCONF_MEM_RESERVED)
10110db9360aSNathan Fontenot 		    || !(drmem.flags & DRCONF_MEM_ASSIGNED))
10120db9360aSNathan Fontenot 			continue;
10130db9360aSNathan Fontenot 
10140f16ef7fSNathan Fontenot 		if ((scn_addr < drmem.base_addr)
10153fdfd990SBenjamin Herrenschmidt 		    || (scn_addr >= (drmem.base_addr + lmb_size)))
10160f16ef7fSNathan Fontenot 			continue;
10170db9360aSNathan Fontenot 
10180f16ef7fSNathan Fontenot 		nid = of_drconf_to_nid_single(&drmem, &aa);
10190f16ef7fSNathan Fontenot 		break;
10200db9360aSNathan Fontenot 	}
10210db9360aSNathan Fontenot 
10220f16ef7fSNathan Fontenot 	return nid;
10230db9360aSNathan Fontenot }
10240db9360aSNathan Fontenot 
10250db9360aSNathan Fontenot /*
10260f16ef7fSNathan Fontenot  * Find the node associated with a hot added memory section for memory
10270f16ef7fSNathan Fontenot  * represented in the device tree as a node (i.e. memory@XXXX) for
102895f72d1eSYinghai Lu  * each memblock.
1029237a0989SMike Kravetz  */
1030ec32dd66SRobert Jennings static int hot_add_node_scn_to_nid(unsigned long scn_addr)
1031237a0989SMike Kravetz {
103294db7c5eSAnton Blanchard 	struct device_node *memory;
10330f16ef7fSNathan Fontenot 	int nid = -1;
1034237a0989SMike Kravetz 
103594db7c5eSAnton Blanchard 	for_each_node_by_type(memory, "memory") {
1036237a0989SMike Kravetz 		unsigned long start, size;
1037b226e462SMike Kravetz 		int ranges;
1038b08a2a12SAlistair Popple 		const __be32 *memcell_buf;
1039237a0989SMike Kravetz 		unsigned int len;
1040237a0989SMike Kravetz 
1041e2eb6392SStephen Rothwell 		memcell_buf = of_get_property(memory, "reg", &len);
1042237a0989SMike Kravetz 		if (!memcell_buf || len <= 0)
1043237a0989SMike Kravetz 			continue;
1044237a0989SMike Kravetz 
1045cc5d0189SBenjamin Herrenschmidt 		/* ranges in cell */
1046cc5d0189SBenjamin Herrenschmidt 		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
10470f16ef7fSNathan Fontenot 
10480f16ef7fSNathan Fontenot 		while (ranges--) {
1049237a0989SMike Kravetz 			start = read_n_cells(n_mem_addr_cells, &memcell_buf);
1050237a0989SMike Kravetz 			size = read_n_cells(n_mem_size_cells, &memcell_buf);
1051237a0989SMike Kravetz 
10520f16ef7fSNathan Fontenot 			if ((scn_addr < start) || (scn_addr >= (start + size)))
10530f16ef7fSNathan Fontenot 				continue;
10540f16ef7fSNathan Fontenot 
10550f16ef7fSNathan Fontenot 			nid = of_node_to_nid_single(memory);
10560f16ef7fSNathan Fontenot 			break;
10570f16ef7fSNathan Fontenot 		}
10580f16ef7fSNathan Fontenot 
10590f16ef7fSNathan Fontenot 		if (nid >= 0)
10600f16ef7fSNathan Fontenot 			break;
10610f16ef7fSNathan Fontenot 	}
10620f16ef7fSNathan Fontenot 
106360831842SAnton Blanchard 	of_node_put(memory);
106460831842SAnton Blanchard 
10650db9360aSNathan Fontenot 	return nid;
1066237a0989SMike Kravetz }
1067237a0989SMike Kravetz 
10680f16ef7fSNathan Fontenot /*
10690f16ef7fSNathan Fontenot  * Find the node associated with a hot added memory section.  Section
107095f72d1eSYinghai Lu  * corresponds to a SPARSEMEM section, not an MEMBLOCK.  It is assumed that
107195f72d1eSYinghai Lu  * sections are fully contained within a single MEMBLOCK.
10720f16ef7fSNathan Fontenot  */
10730f16ef7fSNathan Fontenot int hot_add_scn_to_nid(unsigned long scn_addr)
10740f16ef7fSNathan Fontenot {
10750f16ef7fSNathan Fontenot 	struct device_node *memory = NULL;
10764a3bac4eSReza Arbab 	int nid;
10770f16ef7fSNathan Fontenot 
10780f16ef7fSNathan Fontenot 	if (!numa_enabled || (min_common_depth < 0))
107972c33688SH Hartley Sweeten 		return first_online_node;
10800f16ef7fSNathan Fontenot 
10810f16ef7fSNathan Fontenot 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
10820f16ef7fSNathan Fontenot 	if (memory) {
10830f16ef7fSNathan Fontenot 		nid = hot_add_drconf_scn_to_nid(memory, scn_addr);
10840f16ef7fSNathan Fontenot 		of_node_put(memory);
10850f16ef7fSNathan Fontenot 	} else {
10860f16ef7fSNathan Fontenot 		nid = hot_add_node_scn_to_nid(scn_addr);
1087237a0989SMike Kravetz 	}
10880f16ef7fSNathan Fontenot 
10892a8628d4SReza Arbab 	if (nid < 0 || !node_possible(nid))
109072c33688SH Hartley Sweeten 		nid = first_online_node;
10910f16ef7fSNathan Fontenot 
10920f16ef7fSNathan Fontenot 	return nid;
10930f16ef7fSNathan Fontenot }
10940f16ef7fSNathan Fontenot 
1095cd34206eSNishanth Aravamudan static u64 hot_add_drconf_memory_max(void)
1096cd34206eSNishanth Aravamudan {
1097cd34206eSNishanth Aravamudan 	struct device_node *memory = NULL;
109845b64ee6SBharata B Rao 	struct device_node *dn = NULL;
1099cd34206eSNishanth Aravamudan 	unsigned int drconf_cell_cnt = 0;
1100cd34206eSNishanth Aravamudan 	u64 lmb_size = 0;
1101ec32dd66SRobert Jennings 	const __be32 *dm = NULL;
110245b64ee6SBharata B Rao 	const __be64 *lrdr = NULL;
110345b64ee6SBharata B Rao 	struct of_drconf_cell drmem;
110445b64ee6SBharata B Rao 
110545b64ee6SBharata B Rao 	dn = of_find_node_by_path("/rtas");
110645b64ee6SBharata B Rao 	if (dn) {
110745b64ee6SBharata B Rao 		lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL);
110845b64ee6SBharata B Rao 		of_node_put(dn);
110945b64ee6SBharata B Rao 		if (lrdr)
111045b64ee6SBharata B Rao 			return be64_to_cpup(lrdr);
111145b64ee6SBharata B Rao 	}
1112cd34206eSNishanth Aravamudan 
1113cd34206eSNishanth Aravamudan 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
1114cd34206eSNishanth Aravamudan 	if (memory) {
1115cd34206eSNishanth Aravamudan 		drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
1116cd34206eSNishanth Aravamudan 		lmb_size = of_get_lmb_size(memory);
111745b64ee6SBharata B Rao 
111845b64ee6SBharata B Rao 		/* Advance to the last cell, each cell has 6 32 bit integers */
111945b64ee6SBharata B Rao 		dm += (drconf_cell_cnt - 1) * 6;
112045b64ee6SBharata B Rao 		read_drconf_cell(&drmem, &dm);
1121cd34206eSNishanth Aravamudan 		of_node_put(memory);
112245b64ee6SBharata B Rao 		return drmem.base_addr + lmb_size;
1123cd34206eSNishanth Aravamudan 	}
112445b64ee6SBharata B Rao 	return 0;
1125cd34206eSNishanth Aravamudan }
1126cd34206eSNishanth Aravamudan 
1127cd34206eSNishanth Aravamudan /*
1128cd34206eSNishanth Aravamudan  * memory_hotplug_max - return max address of memory that may be added
1129cd34206eSNishanth Aravamudan  *
1130cd34206eSNishanth Aravamudan  * This is currently only used on systems that support drconfig memory
1131cd34206eSNishanth Aravamudan  * hotplug.
1132cd34206eSNishanth Aravamudan  */
1133cd34206eSNishanth Aravamudan u64 memory_hotplug_max(void)
1134cd34206eSNishanth Aravamudan {
1135cd34206eSNishanth Aravamudan         return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
1136cd34206eSNishanth Aravamudan }
1137237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */
11389eff1a38SJesse Larrew 
1139bd03403aSJesse Larrew /* Virtual Processor Home Node (VPHN) support */
114039bf990eSJesse Larrew #ifdef CONFIG_PPC_SPLPAR
11414b6cfb2aSGreg Kurz 
11424b6cfb2aSGreg Kurz #include "vphn.h"
11434b6cfb2aSGreg Kurz 
114430c05350SNathan Fontenot struct topology_update_data {
114530c05350SNathan Fontenot 	struct topology_update_data *next;
114630c05350SNathan Fontenot 	unsigned int cpu;
114730c05350SNathan Fontenot 	int old_nid;
114830c05350SNathan Fontenot 	int new_nid;
114930c05350SNathan Fontenot };
115030c05350SNathan Fontenot 
11515de16699SAnton Blanchard static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
11529eff1a38SJesse Larrew static cpumask_t cpu_associativity_changes_mask;
11539eff1a38SJesse Larrew static int vphn_enabled;
11545d88aa85SJesse Larrew static int prrn_enabled;
11555d88aa85SJesse Larrew static void reset_topology_timer(void);
11569eff1a38SJesse Larrew 
11579eff1a38SJesse Larrew /*
11589eff1a38SJesse Larrew  * Store the current values of the associativity change counters in the
11599eff1a38SJesse Larrew  * hypervisor.
11609eff1a38SJesse Larrew  */
11619eff1a38SJesse Larrew static void setup_cpu_associativity_change_counters(void)
11629eff1a38SJesse Larrew {
1163cd9d6cc7SJesse Larrew 	int cpu;
11649eff1a38SJesse Larrew 
11655de16699SAnton Blanchard 	/* The VPHN feature supports a maximum of 8 reference points */
11665de16699SAnton Blanchard 	BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
11675de16699SAnton Blanchard 
11689eff1a38SJesse Larrew 	for_each_possible_cpu(cpu) {
1169cd9d6cc7SJesse Larrew 		int i;
11709eff1a38SJesse Larrew 		u8 *counts = vphn_cpu_change_counts[cpu];
11719eff1a38SJesse Larrew 		volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
11729eff1a38SJesse Larrew 
11735de16699SAnton Blanchard 		for (i = 0; i < distance_ref_points_depth; i++)
11749eff1a38SJesse Larrew 			counts[i] = hypervisor_counts[i];
11759eff1a38SJesse Larrew 	}
11769eff1a38SJesse Larrew }
11779eff1a38SJesse Larrew 
11789eff1a38SJesse Larrew /*
11799eff1a38SJesse Larrew  * The hypervisor maintains a set of 8 associativity change counters in
11809eff1a38SJesse Larrew  * the VPA of each cpu that correspond to the associativity levels in the
11819eff1a38SJesse Larrew  * ibm,associativity-reference-points property. When an associativity
11829eff1a38SJesse Larrew  * level changes, the corresponding counter is incremented.
11839eff1a38SJesse Larrew  *
11849eff1a38SJesse Larrew  * Set a bit in cpu_associativity_changes_mask for each cpu whose home
11859eff1a38SJesse Larrew  * node associativity levels have changed.
11869eff1a38SJesse Larrew  *
11879eff1a38SJesse Larrew  * Returns the number of cpus with unhandled associativity changes.
11889eff1a38SJesse Larrew  */
11899eff1a38SJesse Larrew static int update_cpu_associativity_changes_mask(void)
11909eff1a38SJesse Larrew {
11915d88aa85SJesse Larrew 	int cpu;
11929eff1a38SJesse Larrew 	cpumask_t *changes = &cpu_associativity_changes_mask;
11939eff1a38SJesse Larrew 
11949eff1a38SJesse Larrew 	for_each_possible_cpu(cpu) {
11959eff1a38SJesse Larrew 		int i, changed = 0;
11969eff1a38SJesse Larrew 		u8 *counts = vphn_cpu_change_counts[cpu];
11979eff1a38SJesse Larrew 		volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
11989eff1a38SJesse Larrew 
11995de16699SAnton Blanchard 		for (i = 0; i < distance_ref_points_depth; i++) {
1200d69043e8SAnton Blanchard 			if (hypervisor_counts[i] != counts[i]) {
12019eff1a38SJesse Larrew 				counts[i] = hypervisor_counts[i];
12029eff1a38SJesse Larrew 				changed = 1;
12039eff1a38SJesse Larrew 			}
12049eff1a38SJesse Larrew 		}
12059eff1a38SJesse Larrew 		if (changed) {
12063be7db6aSRobert Jennings 			cpumask_or(changes, changes, cpu_sibling_mask(cpu));
12073be7db6aSRobert Jennings 			cpu = cpu_last_thread_sibling(cpu);
12089eff1a38SJesse Larrew 		}
12099eff1a38SJesse Larrew 	}
12109eff1a38SJesse Larrew 
12115d88aa85SJesse Larrew 	return cpumask_weight(changes);
12129eff1a38SJesse Larrew }
12139eff1a38SJesse Larrew 
12149eff1a38SJesse Larrew /*
12159eff1a38SJesse Larrew  * Retrieve the new associativity information for a virtual processor's
12169eff1a38SJesse Larrew  * home node.
12179eff1a38SJesse Larrew  */
1218b08a2a12SAlistair Popple static long hcall_vphn(unsigned long cpu, __be32 *associativity)
12199eff1a38SJesse Larrew {
1220cd9d6cc7SJesse Larrew 	long rc;
12219eff1a38SJesse Larrew 	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
12229eff1a38SJesse Larrew 	u64 flags = 1;
12239eff1a38SJesse Larrew 	int hwcpu = get_hard_smp_processor_id(cpu);
12249eff1a38SJesse Larrew 
12259eff1a38SJesse Larrew 	rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
12269eff1a38SJesse Larrew 	vphn_unpack_associativity(retbuf, associativity);
12279eff1a38SJesse Larrew 
12289eff1a38SJesse Larrew 	return rc;
12299eff1a38SJesse Larrew }
12309eff1a38SJesse Larrew 
12319eff1a38SJesse Larrew static long vphn_get_associativity(unsigned long cpu,
1232b08a2a12SAlistair Popple 					__be32 *associativity)
12339eff1a38SJesse Larrew {
1234cd9d6cc7SJesse Larrew 	long rc;
12359eff1a38SJesse Larrew 
12369eff1a38SJesse Larrew 	rc = hcall_vphn(cpu, associativity);
12379eff1a38SJesse Larrew 
12389eff1a38SJesse Larrew 	switch (rc) {
12399eff1a38SJesse Larrew 	case H_FUNCTION:
12409eff1a38SJesse Larrew 		printk(KERN_INFO
12419eff1a38SJesse Larrew 			"VPHN is not supported. Disabling polling...\n");
12429eff1a38SJesse Larrew 		stop_topology_update();
12439eff1a38SJesse Larrew 		break;
12449eff1a38SJesse Larrew 	case H_HARDWARE:
12459eff1a38SJesse Larrew 		printk(KERN_ERR
12469eff1a38SJesse Larrew 			"hcall_vphn() experienced a hardware fault "
12479eff1a38SJesse Larrew 			"preventing VPHN. Disabling polling...\n");
12489eff1a38SJesse Larrew 		stop_topology_update();
12499eff1a38SJesse Larrew 	}
12509eff1a38SJesse Larrew 
12519eff1a38SJesse Larrew 	return rc;
12529eff1a38SJesse Larrew }
12539eff1a38SJesse Larrew 
12549eff1a38SJesse Larrew /*
125530c05350SNathan Fontenot  * Update the CPU maps and sysfs entries for a single CPU when its NUMA
125630c05350SNathan Fontenot  * characteristics change. This function doesn't perform any locking and is
125730c05350SNathan Fontenot  * only safe to call from stop_machine().
125830c05350SNathan Fontenot  */
125930c05350SNathan Fontenot static int update_cpu_topology(void *data)
126030c05350SNathan Fontenot {
126130c05350SNathan Fontenot 	struct topology_update_data *update;
126230c05350SNathan Fontenot 	unsigned long cpu;
126330c05350SNathan Fontenot 
126430c05350SNathan Fontenot 	if (!data)
126530c05350SNathan Fontenot 		return -EINVAL;
126630c05350SNathan Fontenot 
12673be7db6aSRobert Jennings 	cpu = smp_processor_id();
126830c05350SNathan Fontenot 
126930c05350SNathan Fontenot 	for (update = data; update; update = update->next) {
12702c0a33f9SNishanth Aravamudan 		int new_nid = update->new_nid;
127130c05350SNathan Fontenot 		if (cpu != update->cpu)
127230c05350SNathan Fontenot 			continue;
127330c05350SNathan Fontenot 
127449f8d8c0SNishanth Aravamudan 		unmap_cpu_from_node(cpu);
12752c0a33f9SNishanth Aravamudan 		map_cpu_to_node(cpu, new_nid);
12762c0a33f9SNishanth Aravamudan 		set_cpu_numa_node(cpu, new_nid);
12772c0a33f9SNishanth Aravamudan 		set_cpu_numa_mem(cpu, local_memory_node(new_nid));
1278176bbf14SJesse Larrew 		vdso_getcpu_init();
127930c05350SNathan Fontenot 	}
128030c05350SNathan Fontenot 
128130c05350SNathan Fontenot 	return 0;
128230c05350SNathan Fontenot }
128330c05350SNathan Fontenot 
1284d4edc5b6SSrivatsa S. Bhat static int update_lookup_table(void *data)
1285d4edc5b6SSrivatsa S. Bhat {
1286d4edc5b6SSrivatsa S. Bhat 	struct topology_update_data *update;
1287d4edc5b6SSrivatsa S. Bhat 
1288d4edc5b6SSrivatsa S. Bhat 	if (!data)
1289d4edc5b6SSrivatsa S. Bhat 		return -EINVAL;
1290d4edc5b6SSrivatsa S. Bhat 
1291d4edc5b6SSrivatsa S. Bhat 	/*
1292d4edc5b6SSrivatsa S. Bhat 	 * Upon topology update, the numa-cpu lookup table needs to be updated
1293d4edc5b6SSrivatsa S. Bhat 	 * for all threads in the core, including offline CPUs, to ensure that
1294d4edc5b6SSrivatsa S. Bhat 	 * future hotplug operations respect the cpu-to-node associativity
1295d4edc5b6SSrivatsa S. Bhat 	 * properly.
1296d4edc5b6SSrivatsa S. Bhat 	 */
1297d4edc5b6SSrivatsa S. Bhat 	for (update = data; update; update = update->next) {
1298d4edc5b6SSrivatsa S. Bhat 		int nid, base, j;
1299d4edc5b6SSrivatsa S. Bhat 
1300d4edc5b6SSrivatsa S. Bhat 		nid = update->new_nid;
1301d4edc5b6SSrivatsa S. Bhat 		base = cpu_first_thread_sibling(update->cpu);
1302d4edc5b6SSrivatsa S. Bhat 
1303d4edc5b6SSrivatsa S. Bhat 		for (j = 0; j < threads_per_core; j++) {
1304d4edc5b6SSrivatsa S. Bhat 			update_numa_cpu_lookup_table(base + j, nid);
1305d4edc5b6SSrivatsa S. Bhat 		}
1306d4edc5b6SSrivatsa S. Bhat 	}
1307d4edc5b6SSrivatsa S. Bhat 
1308d4edc5b6SSrivatsa S. Bhat 	return 0;
1309d4edc5b6SSrivatsa S. Bhat }
1310d4edc5b6SSrivatsa S. Bhat 
131130c05350SNathan Fontenot /*
13129eff1a38SJesse Larrew  * Update the node maps and sysfs entries for each cpu whose home node
131379c5fcebSJesse Larrew  * has changed. Returns 1 when the topology has changed, and 0 otherwise.
1314*3e401f7aSThiago Jung Bauermann  *
1315*3e401f7aSThiago Jung Bauermann  * cpus_locked says whether we already hold cpu_hotplug_lock.
13169eff1a38SJesse Larrew  */
1317*3e401f7aSThiago Jung Bauermann int numa_update_cpu_topology(bool cpus_locked)
13189eff1a38SJesse Larrew {
13193be7db6aSRobert Jennings 	unsigned int cpu, sibling, changed = 0;
132030c05350SNathan Fontenot 	struct topology_update_data *updates, *ud;
1321b08a2a12SAlistair Popple 	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
1322176bbf14SJesse Larrew 	cpumask_t updated_cpus;
13238a25a2fdSKay Sievers 	struct device *dev;
13243be7db6aSRobert Jennings 	int weight, new_nid, i = 0;
132530c05350SNathan Fontenot 
13262d73bae1SNishanth Aravamudan 	if (!prrn_enabled && !vphn_enabled)
13272d73bae1SNishanth Aravamudan 		return 0;
13282d73bae1SNishanth Aravamudan 
132930c05350SNathan Fontenot 	weight = cpumask_weight(&cpu_associativity_changes_mask);
133030c05350SNathan Fontenot 	if (!weight)
133130c05350SNathan Fontenot 		return 0;
133230c05350SNathan Fontenot 
133330c05350SNathan Fontenot 	updates = kzalloc(weight * (sizeof(*updates)), GFP_KERNEL);
133430c05350SNathan Fontenot 	if (!updates)
133530c05350SNathan Fontenot 		return 0;
13369eff1a38SJesse Larrew 
1337176bbf14SJesse Larrew 	cpumask_clear(&updated_cpus);
13389eff1a38SJesse Larrew 
1339104699c0SKOSAKI Motohiro 	for_each_cpu(cpu, &cpu_associativity_changes_mask) {
13403be7db6aSRobert Jennings 		/*
13413be7db6aSRobert Jennings 		 * If siblings aren't flagged for changes, updates list
13423be7db6aSRobert Jennings 		 * will be too short. Skip on this update and set for next
13433be7db6aSRobert Jennings 		 * update.
13443be7db6aSRobert Jennings 		 */
13453be7db6aSRobert Jennings 		if (!cpumask_subset(cpu_sibling_mask(cpu),
13463be7db6aSRobert Jennings 					&cpu_associativity_changes_mask)) {
13473be7db6aSRobert Jennings 			pr_info("Sibling bits not set for associativity "
13483be7db6aSRobert Jennings 					"change, cpu%d\n", cpu);
13493be7db6aSRobert Jennings 			cpumask_or(&cpu_associativity_changes_mask,
13503be7db6aSRobert Jennings 					&cpu_associativity_changes_mask,
13513be7db6aSRobert Jennings 					cpu_sibling_mask(cpu));
13523be7db6aSRobert Jennings 			cpu = cpu_last_thread_sibling(cpu);
13533be7db6aSRobert Jennings 			continue;
13543be7db6aSRobert Jennings 		}
13553be7db6aSRobert Jennings 
13563be7db6aSRobert Jennings 		/* Use associativity from first thread for all siblings */
13579eff1a38SJesse Larrew 		vphn_get_associativity(cpu, associativity);
13583be7db6aSRobert Jennings 		new_nid = associativity_to_nid(associativity);
13593be7db6aSRobert Jennings 		if (new_nid < 0 || !node_online(new_nid))
13603be7db6aSRobert Jennings 			new_nid = first_online_node;
13619eff1a38SJesse Larrew 
13623be7db6aSRobert Jennings 		if (new_nid == numa_cpu_lookup_table[cpu]) {
13633be7db6aSRobert Jennings 			cpumask_andnot(&cpu_associativity_changes_mask,
13643be7db6aSRobert Jennings 					&cpu_associativity_changes_mask,
13653be7db6aSRobert Jennings 					cpu_sibling_mask(cpu));
13663be7db6aSRobert Jennings 			cpu = cpu_last_thread_sibling(cpu);
13673be7db6aSRobert Jennings 			continue;
13683be7db6aSRobert Jennings 		}
13699eff1a38SJesse Larrew 
13703be7db6aSRobert Jennings 		for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
13713be7db6aSRobert Jennings 			ud = &updates[i++];
13723be7db6aSRobert Jennings 			ud->cpu = sibling;
13733be7db6aSRobert Jennings 			ud->new_nid = new_nid;
13743be7db6aSRobert Jennings 			ud->old_nid = numa_cpu_lookup_table[sibling];
13753be7db6aSRobert Jennings 			cpumask_set_cpu(sibling, &updated_cpus);
137630c05350SNathan Fontenot 			if (i < weight)
137730c05350SNathan Fontenot 				ud->next = &updates[i];
137830c05350SNathan Fontenot 		}
13793be7db6aSRobert Jennings 		cpu = cpu_last_thread_sibling(cpu);
13803be7db6aSRobert Jennings 	}
13819eff1a38SJesse Larrew 
13822d73bae1SNishanth Aravamudan 	pr_debug("Topology update for the following CPUs:\n");
13832d73bae1SNishanth Aravamudan 	if (cpumask_weight(&updated_cpus)) {
13842d73bae1SNishanth Aravamudan 		for (ud = &updates[0]; ud; ud = ud->next) {
13852d73bae1SNishanth Aravamudan 			pr_debug("cpu %d moving from node %d "
13862d73bae1SNishanth Aravamudan 					  "to %d\n", ud->cpu,
13872d73bae1SNishanth Aravamudan 					  ud->old_nid, ud->new_nid);
13882d73bae1SNishanth Aravamudan 		}
13892d73bae1SNishanth Aravamudan 	}
13902d73bae1SNishanth Aravamudan 
13919a013361SMichael Wang 	/*
13929a013361SMichael Wang 	 * In cases where we have nothing to update (because the updates list
13939a013361SMichael Wang 	 * is too short or because the new topology is same as the old one),
13949a013361SMichael Wang 	 * skip invoking update_cpu_topology() via stop-machine(). This is
13959a013361SMichael Wang 	 * necessary (and not just a fast-path optimization) since stop-machine
13969a013361SMichael Wang 	 * can end up electing a random CPU to run update_cpu_topology(), and
13979a013361SMichael Wang 	 * thus trick us into setting up incorrect cpu-node mappings (since
13989a013361SMichael Wang 	 * 'updates' is kzalloc()'ed).
13999a013361SMichael Wang 	 *
14009a013361SMichael Wang 	 * And for the similar reason, we will skip all the following updating.
14019a013361SMichael Wang 	 */
14029a013361SMichael Wang 	if (!cpumask_weight(&updated_cpus))
14039a013361SMichael Wang 		goto out;
14049a013361SMichael Wang 
1405*3e401f7aSThiago Jung Bauermann 	if (cpus_locked)
1406*3e401f7aSThiago Jung Bauermann 		stop_machine_cpuslocked(update_cpu_topology, &updates[0],
1407*3e401f7aSThiago Jung Bauermann 					&updated_cpus);
1408*3e401f7aSThiago Jung Bauermann 	else
1409176bbf14SJesse Larrew 		stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
141030c05350SNathan Fontenot 
1411d4edc5b6SSrivatsa S. Bhat 	/*
1412d4edc5b6SSrivatsa S. Bhat 	 * Update the numa-cpu lookup table with the new mappings, even for
1413d4edc5b6SSrivatsa S. Bhat 	 * offline CPUs. It is best to perform this update from the stop-
1414d4edc5b6SSrivatsa S. Bhat 	 * machine context.
1415d4edc5b6SSrivatsa S. Bhat 	 */
1416*3e401f7aSThiago Jung Bauermann 	if (cpus_locked)
1417*3e401f7aSThiago Jung Bauermann 		stop_machine_cpuslocked(update_lookup_table, &updates[0],
1418*3e401f7aSThiago Jung Bauermann 					cpumask_of(raw_smp_processor_id()));
1419*3e401f7aSThiago Jung Bauermann 	else
1420d4edc5b6SSrivatsa S. Bhat 		stop_machine(update_lookup_table, &updates[0],
1421d4edc5b6SSrivatsa S. Bhat 			     cpumask_of(raw_smp_processor_id()));
1422d4edc5b6SSrivatsa S. Bhat 
142330c05350SNathan Fontenot 	for (ud = &updates[0]; ud; ud = ud->next) {
1424dd023217SNathan Fontenot 		unregister_cpu_under_node(ud->cpu, ud->old_nid);
1425dd023217SNathan Fontenot 		register_cpu_under_node(ud->cpu, ud->new_nid);
1426dd023217SNathan Fontenot 
142730c05350SNathan Fontenot 		dev = get_cpu_device(ud->cpu);
14288a25a2fdSKay Sievers 		if (dev)
14298a25a2fdSKay Sievers 			kobject_uevent(&dev->kobj, KOBJ_CHANGE);
143030c05350SNathan Fontenot 		cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
143179c5fcebSJesse Larrew 		changed = 1;
14329eff1a38SJesse Larrew 	}
14339eff1a38SJesse Larrew 
14349a013361SMichael Wang out:
143530c05350SNathan Fontenot 	kfree(updates);
143679c5fcebSJesse Larrew 	return changed;
14379eff1a38SJesse Larrew }
14389eff1a38SJesse Larrew 
1439*3e401f7aSThiago Jung Bauermann int arch_update_cpu_topology(void)
1440*3e401f7aSThiago Jung Bauermann {
1441*3e401f7aSThiago Jung Bauermann 	lockdep_assert_cpus_held();
1442*3e401f7aSThiago Jung Bauermann 	return numa_update_cpu_topology(true);
1443*3e401f7aSThiago Jung Bauermann }
1444*3e401f7aSThiago Jung Bauermann 
14459eff1a38SJesse Larrew static void topology_work_fn(struct work_struct *work)
14469eff1a38SJesse Larrew {
14479eff1a38SJesse Larrew 	rebuild_sched_domains();
14489eff1a38SJesse Larrew }
14499eff1a38SJesse Larrew static DECLARE_WORK(topology_work, topology_work_fn);
14509eff1a38SJesse Larrew 
1451ec32dd66SRobert Jennings static void topology_schedule_update(void)
14529eff1a38SJesse Larrew {
14539eff1a38SJesse Larrew 	schedule_work(&topology_work);
14549eff1a38SJesse Larrew }
14559eff1a38SJesse Larrew 
14569eff1a38SJesse Larrew static void topology_timer_fn(unsigned long ignored)
14579eff1a38SJesse Larrew {
14585d88aa85SJesse Larrew 	if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
14595d88aa85SJesse Larrew 		topology_schedule_update();
14605d88aa85SJesse Larrew 	else if (vphn_enabled) {
14619eff1a38SJesse Larrew 		if (update_cpu_associativity_changes_mask() > 0)
14629eff1a38SJesse Larrew 			topology_schedule_update();
14635d88aa85SJesse Larrew 		reset_topology_timer();
14645d88aa85SJesse Larrew 	}
14659eff1a38SJesse Larrew }
14669eff1a38SJesse Larrew static struct timer_list topology_timer =
14679eff1a38SJesse Larrew 	TIMER_INITIALIZER(topology_timer_fn, 0, 0);
14689eff1a38SJesse Larrew 
14695d88aa85SJesse Larrew static void reset_topology_timer(void)
14709eff1a38SJesse Larrew {
14719eff1a38SJesse Larrew 	topology_timer.data = 0;
14729eff1a38SJesse Larrew 	topology_timer.expires = jiffies + 60 * HZ;
14735d88aa85SJesse Larrew 	mod_timer(&topology_timer, topology_timer.expires);
14749eff1a38SJesse Larrew }
14759eff1a38SJesse Larrew 
1476601abdc3SNathan Fontenot #ifdef CONFIG_SMP
1477601abdc3SNathan Fontenot 
14785d88aa85SJesse Larrew static void stage_topology_update(int core_id)
14795d88aa85SJesse Larrew {
14805d88aa85SJesse Larrew 	cpumask_or(&cpu_associativity_changes_mask,
14815d88aa85SJesse Larrew 		&cpu_associativity_changes_mask, cpu_sibling_mask(core_id));
14825d88aa85SJesse Larrew 	reset_topology_timer();
14835d88aa85SJesse Larrew }
14845d88aa85SJesse Larrew 
14855d88aa85SJesse Larrew static int dt_update_callback(struct notifier_block *nb,
14865d88aa85SJesse Larrew 				unsigned long action, void *data)
14875d88aa85SJesse Larrew {
1488f5242e5aSGrant Likely 	struct of_reconfig_data *update = data;
14895d88aa85SJesse Larrew 	int rc = NOTIFY_DONE;
14905d88aa85SJesse Larrew 
14915d88aa85SJesse Larrew 	switch (action) {
14925d88aa85SJesse Larrew 	case OF_RECONFIG_UPDATE_PROPERTY:
149330c05350SNathan Fontenot 		if (!of_prop_cmp(update->dn->type, "cpu") &&
149430c05350SNathan Fontenot 		    !of_prop_cmp(update->prop->name, "ibm,associativity")) {
14955d88aa85SJesse Larrew 			u32 core_id;
14965d88aa85SJesse Larrew 			of_property_read_u32(update->dn, "reg", &core_id);
14975d88aa85SJesse Larrew 			stage_topology_update(core_id);
14985d88aa85SJesse Larrew 			rc = NOTIFY_OK;
14995d88aa85SJesse Larrew 		}
15005d88aa85SJesse Larrew 		break;
15015d88aa85SJesse Larrew 	}
15025d88aa85SJesse Larrew 
15035d88aa85SJesse Larrew 	return rc;
15045d88aa85SJesse Larrew }
15055d88aa85SJesse Larrew 
15065d88aa85SJesse Larrew static struct notifier_block dt_update_nb = {
15075d88aa85SJesse Larrew 	.notifier_call = dt_update_callback,
15085d88aa85SJesse Larrew };
15095d88aa85SJesse Larrew 
1510601abdc3SNathan Fontenot #endif
1511601abdc3SNathan Fontenot 
15129eff1a38SJesse Larrew /*
15135d88aa85SJesse Larrew  * Start polling for associativity changes.
15149eff1a38SJesse Larrew  */
15159eff1a38SJesse Larrew int start_topology_update(void)
15169eff1a38SJesse Larrew {
15179eff1a38SJesse Larrew 	int rc = 0;
15189eff1a38SJesse Larrew 
15195d88aa85SJesse Larrew 	if (firmware_has_feature(FW_FEATURE_PRRN)) {
15205d88aa85SJesse Larrew 		if (!prrn_enabled) {
15215d88aa85SJesse Larrew 			prrn_enabled = 1;
15225d88aa85SJesse Larrew 			vphn_enabled = 0;
1523601abdc3SNathan Fontenot #ifdef CONFIG_SMP
15245d88aa85SJesse Larrew 			rc = of_reconfig_notifier_register(&dt_update_nb);
1525601abdc3SNathan Fontenot #endif
15265d88aa85SJesse Larrew 		}
1527b7abef04SJesse Larrew 	} else if (firmware_has_feature(FW_FEATURE_VPHN) &&
1528f13c13a0SAnton Blanchard 		   lppaca_shared_proc(get_lppaca())) {
15295d88aa85SJesse Larrew 		if (!vphn_enabled) {
15305d88aa85SJesse Larrew 			prrn_enabled = 0;
15319eff1a38SJesse Larrew 			vphn_enabled = 1;
15329eff1a38SJesse Larrew 			setup_cpu_associativity_change_counters();
15339eff1a38SJesse Larrew 			init_timer_deferrable(&topology_timer);
15345d88aa85SJesse Larrew 			reset_topology_timer();
15355d88aa85SJesse Larrew 		}
15369eff1a38SJesse Larrew 	}
15379eff1a38SJesse Larrew 
15389eff1a38SJesse Larrew 	return rc;
15399eff1a38SJesse Larrew }
15409eff1a38SJesse Larrew 
15419eff1a38SJesse Larrew /*
15429eff1a38SJesse Larrew  * Disable polling for VPHN associativity changes.
15439eff1a38SJesse Larrew  */
15449eff1a38SJesse Larrew int stop_topology_update(void)
15459eff1a38SJesse Larrew {
15465d88aa85SJesse Larrew 	int rc = 0;
15475d88aa85SJesse Larrew 
15485d88aa85SJesse Larrew 	if (prrn_enabled) {
15495d88aa85SJesse Larrew 		prrn_enabled = 0;
1550601abdc3SNathan Fontenot #ifdef CONFIG_SMP
15515d88aa85SJesse Larrew 		rc = of_reconfig_notifier_unregister(&dt_update_nb);
1552601abdc3SNathan Fontenot #endif
15535d88aa85SJesse Larrew 	} else if (vphn_enabled) {
15549eff1a38SJesse Larrew 		vphn_enabled = 0;
15555d88aa85SJesse Larrew 		rc = del_timer_sync(&topology_timer);
15569eff1a38SJesse Larrew 	}
15575d88aa85SJesse Larrew 
15585d88aa85SJesse Larrew 	return rc;
1559ab1f9dacSPaul Mackerras }
1560e04fa612SNathan Fontenot 
1561e04fa612SNathan Fontenot int prrn_is_enabled(void)
1562e04fa612SNathan Fontenot {
1563e04fa612SNathan Fontenot 	return prrn_enabled;
1564e04fa612SNathan Fontenot }
1565e04fa612SNathan Fontenot 
1566e04fa612SNathan Fontenot static int topology_read(struct seq_file *file, void *v)
1567e04fa612SNathan Fontenot {
1568e04fa612SNathan Fontenot 	if (vphn_enabled || prrn_enabled)
1569e04fa612SNathan Fontenot 		seq_puts(file, "on\n");
1570e04fa612SNathan Fontenot 	else
1571e04fa612SNathan Fontenot 		seq_puts(file, "off\n");
1572e04fa612SNathan Fontenot 
1573e04fa612SNathan Fontenot 	return 0;
1574e04fa612SNathan Fontenot }
1575e04fa612SNathan Fontenot 
1576e04fa612SNathan Fontenot static int topology_open(struct inode *inode, struct file *file)
1577e04fa612SNathan Fontenot {
1578e04fa612SNathan Fontenot 	return single_open(file, topology_read, NULL);
1579e04fa612SNathan Fontenot }
1580e04fa612SNathan Fontenot 
1581e04fa612SNathan Fontenot static ssize_t topology_write(struct file *file, const char __user *buf,
1582e04fa612SNathan Fontenot 			      size_t count, loff_t *off)
1583e04fa612SNathan Fontenot {
1584e04fa612SNathan Fontenot 	char kbuf[4]; /* "on" or "off" plus null. */
1585e04fa612SNathan Fontenot 	int read_len;
1586e04fa612SNathan Fontenot 
1587e04fa612SNathan Fontenot 	read_len = count < 3 ? count : 3;
1588e04fa612SNathan Fontenot 	if (copy_from_user(kbuf, buf, read_len))
1589e04fa612SNathan Fontenot 		return -EINVAL;
1590e04fa612SNathan Fontenot 
1591e04fa612SNathan Fontenot 	kbuf[read_len] = '\0';
1592e04fa612SNathan Fontenot 
1593e04fa612SNathan Fontenot 	if (!strncmp(kbuf, "on", 2))
1594e04fa612SNathan Fontenot 		start_topology_update();
1595e04fa612SNathan Fontenot 	else if (!strncmp(kbuf, "off", 3))
1596e04fa612SNathan Fontenot 		stop_topology_update();
1597e04fa612SNathan Fontenot 	else
1598e04fa612SNathan Fontenot 		return -EINVAL;
1599e04fa612SNathan Fontenot 
1600e04fa612SNathan Fontenot 	return count;
1601e04fa612SNathan Fontenot }
1602e04fa612SNathan Fontenot 
1603e04fa612SNathan Fontenot static const struct file_operations topology_ops = {
1604e04fa612SNathan Fontenot 	.read = seq_read,
1605e04fa612SNathan Fontenot 	.write = topology_write,
1606e04fa612SNathan Fontenot 	.open = topology_open,
1607e04fa612SNathan Fontenot 	.release = single_release
1608e04fa612SNathan Fontenot };
1609e04fa612SNathan Fontenot 
1610e04fa612SNathan Fontenot static int topology_update_init(void)
1611e04fa612SNathan Fontenot {
16122d73bae1SNishanth Aravamudan 	/* Do not poll for changes if disabled at boot */
16132d73bae1SNishanth Aravamudan 	if (topology_updates_enabled)
1614e04fa612SNathan Fontenot 		start_topology_update();
16152d73bae1SNishanth Aravamudan 
16162d15b9b4SNishanth Aravamudan 	if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
16172d15b9b4SNishanth Aravamudan 		return -ENOMEM;
1618e04fa612SNathan Fontenot 
1619e04fa612SNathan Fontenot 	return 0;
1620e04fa612SNathan Fontenot }
1621e04fa612SNathan Fontenot device_initcall(topology_update_init);
162239bf990eSJesse Larrew #endif /* CONFIG_PPC_SPLPAR */
1623