xref: /linux/arch/powerpc/mm/numa.c (revision 2483ef056f6e42f61cd266452e2841165dfe1b5c)
1ab1f9dacSPaul Mackerras /*
2ab1f9dacSPaul Mackerras  * pSeries NUMA support
3ab1f9dacSPaul Mackerras  *
4ab1f9dacSPaul Mackerras  * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
5ab1f9dacSPaul Mackerras  *
6ab1f9dacSPaul Mackerras  * This program is free software; you can redistribute it and/or
7ab1f9dacSPaul Mackerras  * modify it under the terms of the GNU General Public License
8ab1f9dacSPaul Mackerras  * as published by the Free Software Foundation; either version
9ab1f9dacSPaul Mackerras  * 2 of the License, or (at your option) any later version.
10ab1f9dacSPaul Mackerras  */
112d73bae1SNishanth Aravamudan #define pr_fmt(fmt) "numa: " fmt
122d73bae1SNishanth Aravamudan 
13ab1f9dacSPaul Mackerras #include <linux/threads.h>
14ab1f9dacSPaul Mackerras #include <linux/bootmem.h>
15ab1f9dacSPaul Mackerras #include <linux/init.h>
16ab1f9dacSPaul Mackerras #include <linux/mm.h>
17ab1f9dacSPaul Mackerras #include <linux/mmzone.h>
184b16f8e2SPaul Gortmaker #include <linux/export.h>
19ab1f9dacSPaul Mackerras #include <linux/nodemask.h>
20ab1f9dacSPaul Mackerras #include <linux/cpu.h>
21ab1f9dacSPaul Mackerras #include <linux/notifier.h>
2295f72d1eSYinghai Lu #include <linux/memblock.h>
236df1646eSMichael Ellerman #include <linux/of.h>
2406eccea6SDave Hansen #include <linux/pfn.h>
259eff1a38SJesse Larrew #include <linux/cpuset.h>
269eff1a38SJesse Larrew #include <linux/node.h>
2730c05350SNathan Fontenot #include <linux/stop_machine.h>
28e04fa612SNathan Fontenot #include <linux/proc_fs.h>
29e04fa612SNathan Fontenot #include <linux/seq_file.h>
30e04fa612SNathan Fontenot #include <linux/uaccess.h>
31191a7120SLinus Torvalds #include <linux/slab.h>
323be7db6aSRobert Jennings #include <asm/cputhreads.h>
3345fb6ceaSAnton Blanchard #include <asm/sparsemem.h>
34d9b2b2a2SDavid S. Miller #include <asm/prom.h>
352249ca9dSPaul Mackerras #include <asm/smp.h>
36d4edc5b6SSrivatsa S. Bhat #include <asm/cputhreads.h>
37d4edc5b6SSrivatsa S. Bhat #include <asm/topology.h>
389eff1a38SJesse Larrew #include <asm/firmware.h>
399eff1a38SJesse Larrew #include <asm/paca.h>
4039bf990eSJesse Larrew #include <asm/hvcall.h>
41ae3a197eSDavid Howells #include <asm/setup.h>
42176bbf14SJesse Larrew #include <asm/vdso.h>
43514a9cb3SNathan Fontenot #include <asm/drmem.h>
44ab1f9dacSPaul Mackerras 
45ab1f9dacSPaul Mackerras static int numa_enabled = 1;
46ab1f9dacSPaul Mackerras 
471daa6d08SBalbir Singh static char *cmdline __initdata;
481daa6d08SBalbir Singh 
49ab1f9dacSPaul Mackerras static int numa_debug;
50ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
51ab1f9dacSPaul Mackerras 
5245fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS];
5325863de0SAnton Blanchard cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
54ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES];
5545fb6ceaSAnton Blanchard 
5645fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table);
5725863de0SAnton Blanchard EXPORT_SYMBOL(node_to_cpumask_map);
5845fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data);
5945fb6ceaSAnton Blanchard 
60ab1f9dacSPaul Mackerras static int min_common_depth;
61237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells;
6241eab6f8SAnton Blanchard static int form1_affinity;
6341eab6f8SAnton Blanchard 
6441eab6f8SAnton Blanchard #define MAX_DISTANCE_REF_POINTS 4
6541eab6f8SAnton Blanchard static int distance_ref_points_depth;
66b08a2a12SAlistair Popple static const __be32 *distance_ref_points;
6741eab6f8SAnton Blanchard static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
68ab1f9dacSPaul Mackerras 
6925863de0SAnton Blanchard /*
7025863de0SAnton Blanchard  * Allocate node_to_cpumask_map based on number of available nodes
7125863de0SAnton Blanchard  * Requires node_possible_map to be valid.
7225863de0SAnton Blanchard  *
739512938bSWanlong Gao  * Note: cpumask_of_node() is not valid until after this is done.
7425863de0SAnton Blanchard  */
7525863de0SAnton Blanchard static void __init setup_node_to_cpumask_map(void)
7625863de0SAnton Blanchard {
77f9d531b8SCody P Schafer 	unsigned int node;
7825863de0SAnton Blanchard 
7925863de0SAnton Blanchard 	/* setup nr_node_ids if not done yet */
80f9d531b8SCody P Schafer 	if (nr_node_ids == MAX_NUMNODES)
81f9d531b8SCody P Schafer 		setup_nr_node_ids();
8225863de0SAnton Blanchard 
8325863de0SAnton Blanchard 	/* allocate the map */
84c118baf8SRaghavendra K T 	for_each_node(node)
8525863de0SAnton Blanchard 		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
8625863de0SAnton Blanchard 
8725863de0SAnton Blanchard 	/* cpumask_of_node() will now work */
8825863de0SAnton Blanchard 	dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
8925863de0SAnton Blanchard }
9025863de0SAnton Blanchard 
9155671f3cSStephen Rothwell static int __init fake_numa_create_new_node(unsigned long end_pfn,
921daa6d08SBalbir Singh 						unsigned int *nid)
931daa6d08SBalbir Singh {
941daa6d08SBalbir Singh 	unsigned long long mem;
951daa6d08SBalbir Singh 	char *p = cmdline;
961daa6d08SBalbir Singh 	static unsigned int fake_nid;
971daa6d08SBalbir Singh 	static unsigned long long curr_boundary;
981daa6d08SBalbir Singh 
991daa6d08SBalbir Singh 	/*
1001daa6d08SBalbir Singh 	 * Modify node id, iff we started creating NUMA nodes
1011daa6d08SBalbir Singh 	 * We want to continue from where we left of the last time
1021daa6d08SBalbir Singh 	 */
1031daa6d08SBalbir Singh 	if (fake_nid)
1041daa6d08SBalbir Singh 		*nid = fake_nid;
1051daa6d08SBalbir Singh 	/*
1061daa6d08SBalbir Singh 	 * In case there are no more arguments to parse, the
1071daa6d08SBalbir Singh 	 * node_id should be the same as the last fake node id
1081daa6d08SBalbir Singh 	 * (we've handled this above).
1091daa6d08SBalbir Singh 	 */
1101daa6d08SBalbir Singh 	if (!p)
1111daa6d08SBalbir Singh 		return 0;
1121daa6d08SBalbir Singh 
1131daa6d08SBalbir Singh 	mem = memparse(p, &p);
1141daa6d08SBalbir Singh 	if (!mem)
1151daa6d08SBalbir Singh 		return 0;
1161daa6d08SBalbir Singh 
1171daa6d08SBalbir Singh 	if (mem < curr_boundary)
1181daa6d08SBalbir Singh 		return 0;
1191daa6d08SBalbir Singh 
1201daa6d08SBalbir Singh 	curr_boundary = mem;
1211daa6d08SBalbir Singh 
1221daa6d08SBalbir Singh 	if ((end_pfn << PAGE_SHIFT) > mem) {
1231daa6d08SBalbir Singh 		/*
1241daa6d08SBalbir Singh 		 * Skip commas and spaces
1251daa6d08SBalbir Singh 		 */
1261daa6d08SBalbir Singh 		while (*p == ',' || *p == ' ' || *p == '\t')
1271daa6d08SBalbir Singh 			p++;
1281daa6d08SBalbir Singh 
1291daa6d08SBalbir Singh 		cmdline = p;
1301daa6d08SBalbir Singh 		fake_nid++;
1311daa6d08SBalbir Singh 		*nid = fake_nid;
1321daa6d08SBalbir Singh 		dbg("created new fake_node with id %d\n", fake_nid);
1331daa6d08SBalbir Singh 		return 1;
1341daa6d08SBalbir Singh 	}
1351daa6d08SBalbir Singh 	return 0;
1361daa6d08SBalbir Singh }
1371daa6d08SBalbir Singh 
138d4edc5b6SSrivatsa S. Bhat static void reset_numa_cpu_lookup_table(void)
139d4edc5b6SSrivatsa S. Bhat {
140d4edc5b6SSrivatsa S. Bhat 	unsigned int cpu;
141d4edc5b6SSrivatsa S. Bhat 
142d4edc5b6SSrivatsa S. Bhat 	for_each_possible_cpu(cpu)
143d4edc5b6SSrivatsa S. Bhat 		numa_cpu_lookup_table[cpu] = -1;
144d4edc5b6SSrivatsa S. Bhat }
145d4edc5b6SSrivatsa S. Bhat 
146d4edc5b6SSrivatsa S. Bhat static void map_cpu_to_node(int cpu, int node)
147d4edc5b6SSrivatsa S. Bhat {
148d4edc5b6SSrivatsa S. Bhat 	update_numa_cpu_lookup_table(cpu, node);
14945fb6ceaSAnton Blanchard 
150bf4b85b0SNathan Lynch 	dbg("adding cpu %d to node %d\n", cpu, node);
151bf4b85b0SNathan Lynch 
15225863de0SAnton Blanchard 	if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node])))
15325863de0SAnton Blanchard 		cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
154ab1f9dacSPaul Mackerras }
155ab1f9dacSPaul Mackerras 
15639bf990eSJesse Larrew #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
157ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu)
158ab1f9dacSPaul Mackerras {
159ab1f9dacSPaul Mackerras 	int node = numa_cpu_lookup_table[cpu];
160ab1f9dacSPaul Mackerras 
161ab1f9dacSPaul Mackerras 	dbg("removing cpu %lu from node %d\n", cpu, node);
162ab1f9dacSPaul Mackerras 
16325863de0SAnton Blanchard 	if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
164429f4d8dSAnton Blanchard 		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
165ab1f9dacSPaul Mackerras 	} else {
166ab1f9dacSPaul Mackerras 		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
167ab1f9dacSPaul Mackerras 		       cpu, node);
168ab1f9dacSPaul Mackerras 	}
169ab1f9dacSPaul Mackerras }
17039bf990eSJesse Larrew #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
171ab1f9dacSPaul Mackerras 
172ab1f9dacSPaul Mackerras /* must hold reference to node during call */
173b08a2a12SAlistair Popple static const __be32 *of_get_associativity(struct device_node *dev)
174ab1f9dacSPaul Mackerras {
175e2eb6392SStephen Rothwell 	return of_get_property(dev, "ibm,associativity", NULL);
176ab1f9dacSPaul Mackerras }
177ab1f9dacSPaul Mackerras 
17841eab6f8SAnton Blanchard int __node_distance(int a, int b)
17941eab6f8SAnton Blanchard {
18041eab6f8SAnton Blanchard 	int i;
18141eab6f8SAnton Blanchard 	int distance = LOCAL_DISTANCE;
18241eab6f8SAnton Blanchard 
18341eab6f8SAnton Blanchard 	if (!form1_affinity)
1847122beeeSVaidyanathan Srinivasan 		return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
18541eab6f8SAnton Blanchard 
18641eab6f8SAnton Blanchard 	for (i = 0; i < distance_ref_points_depth; i++) {
18741eab6f8SAnton Blanchard 		if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
18841eab6f8SAnton Blanchard 			break;
18941eab6f8SAnton Blanchard 
19041eab6f8SAnton Blanchard 		/* Double the distance for each NUMA level */
19141eab6f8SAnton Blanchard 		distance *= 2;
19241eab6f8SAnton Blanchard 	}
19341eab6f8SAnton Blanchard 
19441eab6f8SAnton Blanchard 	return distance;
19541eab6f8SAnton Blanchard }
19612c743ebSMike Qiu EXPORT_SYMBOL(__node_distance);
19741eab6f8SAnton Blanchard 
19841eab6f8SAnton Blanchard static void initialize_distance_lookup_table(int nid,
199b08a2a12SAlistair Popple 		const __be32 *associativity)
20041eab6f8SAnton Blanchard {
20141eab6f8SAnton Blanchard 	int i;
20241eab6f8SAnton Blanchard 
20341eab6f8SAnton Blanchard 	if (!form1_affinity)
20441eab6f8SAnton Blanchard 		return;
20541eab6f8SAnton Blanchard 
20641eab6f8SAnton Blanchard 	for (i = 0; i < distance_ref_points_depth; i++) {
207b08a2a12SAlistair Popple 		const __be32 *entry;
208b08a2a12SAlistair Popple 
2091d805440SNikunj A Dadhania 		entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
210b08a2a12SAlistair Popple 		distance_lookup_table[nid][i] = of_read_number(entry, 1);
21141eab6f8SAnton Blanchard 	}
21241eab6f8SAnton Blanchard }
21341eab6f8SAnton Blanchard 
214482ec7c4SNathan Lynch /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
215482ec7c4SNathan Lynch  * info is found.
216482ec7c4SNathan Lynch  */
217b08a2a12SAlistair Popple static int associativity_to_nid(const __be32 *associativity)
218ab1f9dacSPaul Mackerras {
219482ec7c4SNathan Lynch 	int nid = -1;
220ab1f9dacSPaul Mackerras 
221ab1f9dacSPaul Mackerras 	if (min_common_depth == -1)
222482ec7c4SNathan Lynch 		goto out;
223ab1f9dacSPaul Mackerras 
224b08a2a12SAlistair Popple 	if (of_read_number(associativity, 1) >= min_common_depth)
225b08a2a12SAlistair Popple 		nid = of_read_number(&associativity[min_common_depth], 1);
226bc16a759SNathan Lynch 
227bc16a759SNathan Lynch 	/* POWER4 LPAR uses 0xffff as invalid node */
228482ec7c4SNathan Lynch 	if (nid == 0xffff || nid >= MAX_NUMNODES)
229482ec7c4SNathan Lynch 		nid = -1;
23041eab6f8SAnton Blanchard 
231b08a2a12SAlistair Popple 	if (nid > 0 &&
2321d805440SNikunj A Dadhania 		of_read_number(associativity, 1) >= distance_ref_points_depth) {
2331d805440SNikunj A Dadhania 		/*
2341d805440SNikunj A Dadhania 		 * Skip the length field and send start of associativity array
2351d805440SNikunj A Dadhania 		 */
2361d805440SNikunj A Dadhania 		initialize_distance_lookup_table(nid, associativity + 1);
2371d805440SNikunj A Dadhania 	}
23841eab6f8SAnton Blanchard 
239482ec7c4SNathan Lynch out:
240cf950b7aSNathan Lynch 	return nid;
241ab1f9dacSPaul Mackerras }
242ab1f9dacSPaul Mackerras 
2439eff1a38SJesse Larrew /* Returns the nid associated with the given device tree node,
2449eff1a38SJesse Larrew  * or -1 if not found.
2459eff1a38SJesse Larrew  */
2469eff1a38SJesse Larrew static int of_node_to_nid_single(struct device_node *device)
2479eff1a38SJesse Larrew {
2489eff1a38SJesse Larrew 	int nid = -1;
249b08a2a12SAlistair Popple 	const __be32 *tmp;
2509eff1a38SJesse Larrew 
2519eff1a38SJesse Larrew 	tmp = of_get_associativity(device);
2529eff1a38SJesse Larrew 	if (tmp)
2539eff1a38SJesse Larrew 		nid = associativity_to_nid(tmp);
2549eff1a38SJesse Larrew 	return nid;
2559eff1a38SJesse Larrew }
2569eff1a38SJesse Larrew 
257953039c8SJeremy Kerr /* Walk the device tree upwards, looking for an associativity id */
258953039c8SJeremy Kerr int of_node_to_nid(struct device_node *device)
259953039c8SJeremy Kerr {
260953039c8SJeremy Kerr 	int nid = -1;
261953039c8SJeremy Kerr 
262953039c8SJeremy Kerr 	of_node_get(device);
263953039c8SJeremy Kerr 	while (device) {
264953039c8SJeremy Kerr 		nid = of_node_to_nid_single(device);
265953039c8SJeremy Kerr 		if (nid != -1)
266953039c8SJeremy Kerr 			break;
267953039c8SJeremy Kerr 
2681def3758SChristophe Jaillet 		device = of_get_next_parent(device);
269953039c8SJeremy Kerr 	}
270953039c8SJeremy Kerr 	of_node_put(device);
271953039c8SJeremy Kerr 
272953039c8SJeremy Kerr 	return nid;
273953039c8SJeremy Kerr }
274be9ba9ffSShailendra Singh EXPORT_SYMBOL(of_node_to_nid);
275953039c8SJeremy Kerr 
276ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void)
277ab1f9dacSPaul Mackerras {
27841eab6f8SAnton Blanchard 	int depth;
279e70606ebSMichael Ellerman 	struct device_node *root;
280ab1f9dacSPaul Mackerras 
2811c8ee733SDipankar Sarma 	if (firmware_has_feature(FW_FEATURE_OPAL))
2821c8ee733SDipankar Sarma 		root = of_find_node_by_path("/ibm,opal");
2831c8ee733SDipankar Sarma 	else
284e70606ebSMichael Ellerman 		root = of_find_node_by_path("/rtas");
285e70606ebSMichael Ellerman 	if (!root)
286e70606ebSMichael Ellerman 		root = of_find_node_by_path("/");
287ab1f9dacSPaul Mackerras 
288ab1f9dacSPaul Mackerras 	/*
28941eab6f8SAnton Blanchard 	 * This property is a set of 32-bit integers, each representing
29041eab6f8SAnton Blanchard 	 * an index into the ibm,associativity nodes.
29141eab6f8SAnton Blanchard 	 *
29241eab6f8SAnton Blanchard 	 * With form 0 affinity the first integer is for an SMP configuration
29341eab6f8SAnton Blanchard 	 * (should be all 0's) and the second is for a normal NUMA
29441eab6f8SAnton Blanchard 	 * configuration. We have only one level of NUMA.
29541eab6f8SAnton Blanchard 	 *
29641eab6f8SAnton Blanchard 	 * With form 1 affinity the first integer is the most significant
29741eab6f8SAnton Blanchard 	 * NUMA boundary and the following are progressively less significant
29841eab6f8SAnton Blanchard 	 * boundaries. There can be more than one level of NUMA.
299ab1f9dacSPaul Mackerras 	 */
300e70606ebSMichael Ellerman 	distance_ref_points = of_get_property(root,
30141eab6f8SAnton Blanchard 					"ibm,associativity-reference-points",
30241eab6f8SAnton Blanchard 					&distance_ref_points_depth);
303ab1f9dacSPaul Mackerras 
30441eab6f8SAnton Blanchard 	if (!distance_ref_points) {
30541eab6f8SAnton Blanchard 		dbg("NUMA: ibm,associativity-reference-points not found.\n");
30641eab6f8SAnton Blanchard 		goto err;
30741eab6f8SAnton Blanchard 	}
30841eab6f8SAnton Blanchard 
30941eab6f8SAnton Blanchard 	distance_ref_points_depth /= sizeof(int);
31041eab6f8SAnton Blanchard 
3118002b0c5SNathan Fontenot 	if (firmware_has_feature(FW_FEATURE_OPAL) ||
3128002b0c5SNathan Fontenot 	    firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) {
313bc8449ccSAnton Blanchard 		dbg("Using form 1 affinity\n");
31441eab6f8SAnton Blanchard 		form1_affinity = 1;
3154b83c330SAnton Blanchard 	}
3165b958a7eSGavin Shan 
31741eab6f8SAnton Blanchard 	if (form1_affinity) {
318b08a2a12SAlistair Popple 		depth = of_read_number(distance_ref_points, 1);
319ab1f9dacSPaul Mackerras 	} else {
32041eab6f8SAnton Blanchard 		if (distance_ref_points_depth < 2) {
32141eab6f8SAnton Blanchard 			printk(KERN_WARNING "NUMA: "
32241eab6f8SAnton Blanchard 				"short ibm,associativity-reference-points\n");
32341eab6f8SAnton Blanchard 			goto err;
324ab1f9dacSPaul Mackerras 		}
325ab1f9dacSPaul Mackerras 
326b08a2a12SAlistair Popple 		depth = of_read_number(&distance_ref_points[1], 1);
32741eab6f8SAnton Blanchard 	}
32841eab6f8SAnton Blanchard 
32941eab6f8SAnton Blanchard 	/*
33041eab6f8SAnton Blanchard 	 * Warn and cap if the hardware supports more than
33141eab6f8SAnton Blanchard 	 * MAX_DISTANCE_REF_POINTS domains.
33241eab6f8SAnton Blanchard 	 */
33341eab6f8SAnton Blanchard 	if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
33441eab6f8SAnton Blanchard 		printk(KERN_WARNING "NUMA: distance array capped at "
33541eab6f8SAnton Blanchard 			"%d entries\n", MAX_DISTANCE_REF_POINTS);
33641eab6f8SAnton Blanchard 		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
33741eab6f8SAnton Blanchard 	}
33841eab6f8SAnton Blanchard 
339e70606ebSMichael Ellerman 	of_node_put(root);
340ab1f9dacSPaul Mackerras 	return depth;
34141eab6f8SAnton Blanchard 
34241eab6f8SAnton Blanchard err:
343e70606ebSMichael Ellerman 	of_node_put(root);
34441eab6f8SAnton Blanchard 	return -1;
345ab1f9dacSPaul Mackerras }
346ab1f9dacSPaul Mackerras 
34784c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
348ab1f9dacSPaul Mackerras {
349ab1f9dacSPaul Mackerras 	struct device_node *memory = NULL;
350ab1f9dacSPaul Mackerras 
351ab1f9dacSPaul Mackerras 	memory = of_find_node_by_type(memory, "memory");
35254c23310SPaul Mackerras 	if (!memory)
35384c9fdd1SMike Kravetz 		panic("numa.c: No memory nodes found!");
35454c23310SPaul Mackerras 
355a8bda5ddSStephen Rothwell 	*n_addr_cells = of_n_addr_cells(memory);
3569213feeaSStephen Rothwell 	*n_size_cells = of_n_size_cells(memory);
35784c9fdd1SMike Kravetz 	of_node_put(memory);
358ab1f9dacSPaul Mackerras }
359ab1f9dacSPaul Mackerras 
360b08a2a12SAlistair Popple static unsigned long read_n_cells(int n, const __be32 **buf)
361ab1f9dacSPaul Mackerras {
362ab1f9dacSPaul Mackerras 	unsigned long result = 0;
363ab1f9dacSPaul Mackerras 
364ab1f9dacSPaul Mackerras 	while (n--) {
365b08a2a12SAlistair Popple 		result = (result << 32) | of_read_number(*buf, 1);
366ab1f9dacSPaul Mackerras 		(*buf)++;
367ab1f9dacSPaul Mackerras 	}
368ab1f9dacSPaul Mackerras 	return result;
369ab1f9dacSPaul Mackerras }
370ab1f9dacSPaul Mackerras 
3718342681dSNathan Fontenot struct assoc_arrays {
3728342681dSNathan Fontenot 	u32	n_arrays;
3738342681dSNathan Fontenot 	u32	array_sz;
374b08a2a12SAlistair Popple 	const __be32 *arrays;
3758342681dSNathan Fontenot };
3768342681dSNathan Fontenot 
3778342681dSNathan Fontenot /*
37825985edcSLucas De Marchi  * Retrieve and validate the list of associativity arrays for drconf
3798342681dSNathan Fontenot  * memory from the ibm,associativity-lookup-arrays property of the
3808342681dSNathan Fontenot  * device tree..
3818342681dSNathan Fontenot  *
3828342681dSNathan Fontenot  * The layout of the ibm,associativity-lookup-arrays property is a number N
3838342681dSNathan Fontenot  * indicating the number of associativity arrays, followed by a number M
3848342681dSNathan Fontenot  * indicating the size of each associativity array, followed by a list
3858342681dSNathan Fontenot  * of N associativity arrays.
3868342681dSNathan Fontenot  */
38735f80debSNathan Fontenot static int of_get_assoc_arrays(struct assoc_arrays *aa)
3888342681dSNathan Fontenot {
38935f80debSNathan Fontenot 	struct device_node *memory;
390b08a2a12SAlistair Popple 	const __be32 *prop;
3918342681dSNathan Fontenot 	u32 len;
3928342681dSNathan Fontenot 
39335f80debSNathan Fontenot 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
39435f80debSNathan Fontenot 	if (!memory)
3958342681dSNathan Fontenot 		return -1;
3968342681dSNathan Fontenot 
39735f80debSNathan Fontenot 	prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
39835f80debSNathan Fontenot 	if (!prop || len < 2 * sizeof(unsigned int)) {
39935f80debSNathan Fontenot 		of_node_put(memory);
40035f80debSNathan Fontenot 		return -1;
40135f80debSNathan Fontenot 	}
40235f80debSNathan Fontenot 
403b08a2a12SAlistair Popple 	aa->n_arrays = of_read_number(prop++, 1);
404b08a2a12SAlistair Popple 	aa->array_sz = of_read_number(prop++, 1);
4058342681dSNathan Fontenot 
40635f80debSNathan Fontenot 	of_node_put(memory);
40735f80debSNathan Fontenot 
40842b2aa86SJustin P. Mattock 	/* Now that we know the number of arrays and size of each array,
4098342681dSNathan Fontenot 	 * revalidate the size of the property read in.
4108342681dSNathan Fontenot 	 */
4118342681dSNathan Fontenot 	if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int))
4128342681dSNathan Fontenot 		return -1;
4138342681dSNathan Fontenot 
4148342681dSNathan Fontenot 	aa->arrays = prop;
4158342681dSNathan Fontenot 	return 0;
4168342681dSNathan Fontenot }
4178342681dSNathan Fontenot 
4188342681dSNathan Fontenot /*
4198342681dSNathan Fontenot  * This is like of_node_to_nid_single() for memory represented in the
4208342681dSNathan Fontenot  * ibm,dynamic-reconfiguration-memory node.
4218342681dSNathan Fontenot  */
422514a9cb3SNathan Fontenot static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
4238342681dSNathan Fontenot {
424b88fc309SNathan Fontenot 	struct assoc_arrays aa = { .arrays = NULL };
4258342681dSNathan Fontenot 	int default_nid = 0;
4268342681dSNathan Fontenot 	int nid = default_nid;
427b88fc309SNathan Fontenot 	int rc, index;
4288342681dSNathan Fontenot 
429b88fc309SNathan Fontenot 	rc = of_get_assoc_arrays(&aa);
430b88fc309SNathan Fontenot 	if (rc)
431b88fc309SNathan Fontenot 		return default_nid;
432b88fc309SNathan Fontenot 
433b88fc309SNathan Fontenot 	if (min_common_depth > 0 && min_common_depth <= aa.array_sz &&
434514a9cb3SNathan Fontenot 	    !(lmb->flags & DRCONF_MEM_AI_INVALID) &&
435514a9cb3SNathan Fontenot 	    lmb->aa_index < aa.n_arrays) {
436514a9cb3SNathan Fontenot 		index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
437b88fc309SNathan Fontenot 		nid = of_read_number(&aa.arrays[index], 1);
4388342681dSNathan Fontenot 
4398342681dSNathan Fontenot 		if (nid == 0xffff || nid >= MAX_NUMNODES)
4408342681dSNathan Fontenot 			nid = default_nid;
4411d805440SNikunj A Dadhania 
4421d805440SNikunj A Dadhania 		if (nid > 0) {
443514a9cb3SNathan Fontenot 			index = lmb->aa_index * aa.array_sz;
4441d805440SNikunj A Dadhania 			initialize_distance_lookup_table(nid,
445b88fc309SNathan Fontenot 							&aa.arrays[index]);
4461d805440SNikunj A Dadhania 		}
4478342681dSNathan Fontenot 	}
4488342681dSNathan Fontenot 
4498342681dSNathan Fontenot 	return nid;
4508342681dSNathan Fontenot }
4518342681dSNathan Fontenot 
452ab1f9dacSPaul Mackerras /*
453ab1f9dacSPaul Mackerras  * Figure out to which domain a cpu belongs and stick it there.
454ab1f9dacSPaul Mackerras  * Return the id of the domain used.
455ab1f9dacSPaul Mackerras  */
456061d19f2SPaul Gortmaker static int numa_setup_cpu(unsigned long lcpu)
457ab1f9dacSPaul Mackerras {
458297cf502SLi Zhong 	int nid = -1;
459d4edc5b6SSrivatsa S. Bhat 	struct device_node *cpu;
460d4edc5b6SSrivatsa S. Bhat 
461d4edc5b6SSrivatsa S. Bhat 	/*
462d4edc5b6SSrivatsa S. Bhat 	 * If a valid cpu-to-node mapping is already available, use it
463d4edc5b6SSrivatsa S. Bhat 	 * directly instead of querying the firmware, since it represents
464d4edc5b6SSrivatsa S. Bhat 	 * the most recent mapping notified to us by the platform (eg: VPHN).
465d4edc5b6SSrivatsa S. Bhat 	 */
466d4edc5b6SSrivatsa S. Bhat 	if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) {
467d4edc5b6SSrivatsa S. Bhat 		map_cpu_to_node(lcpu, nid);
468d4edc5b6SSrivatsa S. Bhat 		return nid;
469d4edc5b6SSrivatsa S. Bhat 	}
470d4edc5b6SSrivatsa S. Bhat 
471d4edc5b6SSrivatsa S. Bhat 	cpu = of_get_cpu_node(lcpu, NULL);
472ab1f9dacSPaul Mackerras 
473ab1f9dacSPaul Mackerras 	if (!cpu) {
474ab1f9dacSPaul Mackerras 		WARN_ON(1);
475297cf502SLi Zhong 		if (cpu_present(lcpu))
476297cf502SLi Zhong 			goto out_present;
477297cf502SLi Zhong 		else
478ab1f9dacSPaul Mackerras 			goto out;
479ab1f9dacSPaul Mackerras 	}
480ab1f9dacSPaul Mackerras 
481953039c8SJeremy Kerr 	nid = of_node_to_nid_single(cpu);
482ab1f9dacSPaul Mackerras 
483297cf502SLi Zhong out_present:
484ea05ba7cSMichael Bringmann 	if (nid < 0 || !node_possible(nid))
48572c33688SH Hartley Sweeten 		nid = first_online_node;
486297cf502SLi Zhong 
487cf950b7aSNathan Lynch 	map_cpu_to_node(lcpu, nid);
488ab1f9dacSPaul Mackerras 	of_node_put(cpu);
489297cf502SLi Zhong out:
490cf950b7aSNathan Lynch 	return nid;
491ab1f9dacSPaul Mackerras }
492ab1f9dacSPaul Mackerras 
49368fb18aaSSrivatsa S. Bhat static void verify_cpu_node_mapping(int cpu, int node)
49468fb18aaSSrivatsa S. Bhat {
49568fb18aaSSrivatsa S. Bhat 	int base, sibling, i;
49668fb18aaSSrivatsa S. Bhat 
49768fb18aaSSrivatsa S. Bhat 	/* Verify that all the threads in the core belong to the same node */
49868fb18aaSSrivatsa S. Bhat 	base = cpu_first_thread_sibling(cpu);
49968fb18aaSSrivatsa S. Bhat 
50068fb18aaSSrivatsa S. Bhat 	for (i = 0; i < threads_per_core; i++) {
50168fb18aaSSrivatsa S. Bhat 		sibling = base + i;
50268fb18aaSSrivatsa S. Bhat 
50368fb18aaSSrivatsa S. Bhat 		if (sibling == cpu || cpu_is_offline(sibling))
50468fb18aaSSrivatsa S. Bhat 			continue;
50568fb18aaSSrivatsa S. Bhat 
50668fb18aaSSrivatsa S. Bhat 		if (cpu_to_node(sibling) != node) {
50768fb18aaSSrivatsa S. Bhat 			WARN(1, "CPU thread siblings %d and %d don't belong"
50868fb18aaSSrivatsa S. Bhat 				" to the same node!\n", cpu, sibling);
50968fb18aaSSrivatsa S. Bhat 			break;
51068fb18aaSSrivatsa S. Bhat 		}
51168fb18aaSSrivatsa S. Bhat 	}
51268fb18aaSSrivatsa S. Bhat }
51368fb18aaSSrivatsa S. Bhat 
514bdab88e0SSebastian Andrzej Siewior /* Must run before sched domains notifier. */
515bdab88e0SSebastian Andrzej Siewior static int ppc_numa_cpu_prepare(unsigned int cpu)
516ab1f9dacSPaul Mackerras {
517bdab88e0SSebastian Andrzej Siewior 	int nid;
518ab1f9dacSPaul Mackerras 
519bdab88e0SSebastian Andrzej Siewior 	nid = numa_setup_cpu(cpu);
520bdab88e0SSebastian Andrzej Siewior 	verify_cpu_node_mapping(cpu, nid);
521bdab88e0SSebastian Andrzej Siewior 	return 0;
522ab1f9dacSPaul Mackerras }
523bdab88e0SSebastian Andrzej Siewior 
524bdab88e0SSebastian Andrzej Siewior static int ppc_numa_cpu_dead(unsigned int cpu)
525bdab88e0SSebastian Andrzej Siewior {
526bdab88e0SSebastian Andrzej Siewior #ifdef CONFIG_HOTPLUG_CPU
527bdab88e0SSebastian Andrzej Siewior 	unmap_cpu_from_node(cpu);
528bdab88e0SSebastian Andrzej Siewior #endif
529bdab88e0SSebastian Andrzej Siewior 	return 0;
530ab1f9dacSPaul Mackerras }
531ab1f9dacSPaul Mackerras 
532ab1f9dacSPaul Mackerras /*
533ab1f9dacSPaul Mackerras  * Check and possibly modify a memory region to enforce the memory limit.
534ab1f9dacSPaul Mackerras  *
535ab1f9dacSPaul Mackerras  * Returns the size the region should have to enforce the memory limit.
536ab1f9dacSPaul Mackerras  * This will either be the original value of size, a truncated value,
537ab1f9dacSPaul Mackerras  * or zero. If the returned value of size is 0 the region should be
53825985edcSLucas De Marchi  * discarded as it lies wholly above the memory limit.
539ab1f9dacSPaul Mackerras  */
54045fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start,
54145fb6ceaSAnton Blanchard 						      unsigned long size)
542ab1f9dacSPaul Mackerras {
543ab1f9dacSPaul Mackerras 	/*
54495f72d1eSYinghai Lu 	 * We use memblock_end_of_DRAM() in here instead of memory_limit because
545ab1f9dacSPaul Mackerras 	 * we've already adjusted it for the limit and it takes care of
546fe55249dSMilton Miller 	 * having memory holes below the limit.  Also, in the case of
547fe55249dSMilton Miller 	 * iommu_is_off, memory_limit is not set but is implicitly enforced.
548ab1f9dacSPaul Mackerras 	 */
549ab1f9dacSPaul Mackerras 
55095f72d1eSYinghai Lu 	if (start + size <= memblock_end_of_DRAM())
551ab1f9dacSPaul Mackerras 		return size;
552ab1f9dacSPaul Mackerras 
55395f72d1eSYinghai Lu 	if (start >= memblock_end_of_DRAM())
554ab1f9dacSPaul Mackerras 		return 0;
555ab1f9dacSPaul Mackerras 
55695f72d1eSYinghai Lu 	return memblock_end_of_DRAM() - start;
557ab1f9dacSPaul Mackerras }
558ab1f9dacSPaul Mackerras 
5590204568aSPaul Mackerras /*
560cf00085dSChandru  * Reads the counter for a given entry in
561cf00085dSChandru  * linux,drconf-usable-memory property
562cf00085dSChandru  */
563b08a2a12SAlistair Popple static inline int __init read_usm_ranges(const __be32 **usm)
564cf00085dSChandru {
565cf00085dSChandru 	/*
5663fdfd990SBenjamin Herrenschmidt 	 * For each lmb in ibm,dynamic-memory a corresponding
567cf00085dSChandru 	 * entry in linux,drconf-usable-memory property contains
568cf00085dSChandru 	 * a counter followed by that many (base, size) duple.
569cf00085dSChandru 	 * read the counter from linux,drconf-usable-memory
570cf00085dSChandru 	 */
571cf00085dSChandru 	return read_n_cells(n_mem_size_cells, usm);
572cf00085dSChandru }
573cf00085dSChandru 
574cf00085dSChandru /*
5750204568aSPaul Mackerras  * Extract NUMA information from the ibm,dynamic-reconfiguration-memory
5760204568aSPaul Mackerras  * node.  This assumes n_mem_{addr,size}_cells have been set.
5770204568aSPaul Mackerras  */
578514a9cb3SNathan Fontenot static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
579514a9cb3SNathan Fontenot 					const __be32 **usm)
5800204568aSPaul Mackerras {
581514a9cb3SNathan Fontenot 	unsigned int ranges, is_kexec_kdump = 0;
582514a9cb3SNathan Fontenot 	unsigned long base, size, sz;
5838342681dSNathan Fontenot 	int nid;
5840204568aSPaul Mackerras 
585514a9cb3SNathan Fontenot 	/*
586514a9cb3SNathan Fontenot 	 * Skip this block if the reserved bit is set in flags (0x80)
587514a9cb3SNathan Fontenot 	 * or if the block is not assigned to this partition (0x8)
588514a9cb3SNathan Fontenot 	 */
589514a9cb3SNathan Fontenot 	if ((lmb->flags & DRCONF_MEM_RESERVED)
590514a9cb3SNathan Fontenot 	    || !(lmb->flags & DRCONF_MEM_ASSIGNED))
5910204568aSPaul Mackerras 		return;
5920204568aSPaul Mackerras 
593514a9cb3SNathan Fontenot 	if (*usm)
594cf00085dSChandru 		is_kexec_kdump = 1;
595cf00085dSChandru 
596514a9cb3SNathan Fontenot 	base = lmb->base_addr;
597514a9cb3SNathan Fontenot 	size = drmem_lmb_size();
598cf00085dSChandru 	ranges = 1;
5998342681dSNathan Fontenot 
600cf00085dSChandru 	if (is_kexec_kdump) {
601514a9cb3SNathan Fontenot 		ranges = read_usm_ranges(usm);
602cf00085dSChandru 		if (!ranges) /* there are no (base, size) duple */
603514a9cb3SNathan Fontenot 			return;
604cf00085dSChandru 	}
605514a9cb3SNathan Fontenot 
606cf00085dSChandru 	do {
607cf00085dSChandru 		if (is_kexec_kdump) {
608514a9cb3SNathan Fontenot 			base = read_n_cells(n_mem_addr_cells, usm);
609514a9cb3SNathan Fontenot 			size = read_n_cells(n_mem_size_cells, usm);
610cf00085dSChandru 		}
611514a9cb3SNathan Fontenot 
612514a9cb3SNathan Fontenot 		nid = of_drconf_to_nid_single(lmb);
613514a9cb3SNathan Fontenot 		fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
614cf00085dSChandru 					  &nid);
615cf00085dSChandru 		node_set_online(nid);
616cf00085dSChandru 		sz = numa_enforce_memory_limit(base, size);
617cf00085dSChandru 		if (sz)
618514a9cb3SNathan Fontenot 			memblock_set_node(base, sz, &memblock.memory, nid);
619cf00085dSChandru 	} while (--ranges);
6200204568aSPaul Mackerras }
6210204568aSPaul Mackerras 
622ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void)
623ab1f9dacSPaul Mackerras {
62494db7c5eSAnton Blanchard 	struct device_node *memory;
625482ec7c4SNathan Lynch 	int default_nid = 0;
626ab1f9dacSPaul Mackerras 	unsigned long i;
627ab1f9dacSPaul Mackerras 
628ab1f9dacSPaul Mackerras 	if (numa_enabled == 0) {
629ab1f9dacSPaul Mackerras 		printk(KERN_WARNING "NUMA disabled by user\n");
630ab1f9dacSPaul Mackerras 		return -1;
631ab1f9dacSPaul Mackerras 	}
632ab1f9dacSPaul Mackerras 
633ab1f9dacSPaul Mackerras 	min_common_depth = find_min_common_depth();
634ab1f9dacSPaul Mackerras 
635ab1f9dacSPaul Mackerras 	if (min_common_depth < 0)
636ab1f9dacSPaul Mackerras 		return min_common_depth;
637ab1f9dacSPaul Mackerras 
638bf4b85b0SNathan Lynch 	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
639bf4b85b0SNathan Lynch 
640ab1f9dacSPaul Mackerras 	/*
641482ec7c4SNathan Lynch 	 * Even though we connect cpus to numa domains later in SMP
642482ec7c4SNathan Lynch 	 * init, we need to know the node ids now. This is because
643482ec7c4SNathan Lynch 	 * each node to be onlined must have NODE_DATA etc backing it.
644ab1f9dacSPaul Mackerras 	 */
645482ec7c4SNathan Lynch 	for_each_present_cpu(i) {
646dfbe93a2SAnton Blanchard 		struct device_node *cpu;
647cf950b7aSNathan Lynch 		int nid;
648ab1f9dacSPaul Mackerras 
6498b16cd23SMilton Miller 		cpu = of_get_cpu_node(i, NULL);
650482ec7c4SNathan Lynch 		BUG_ON(!cpu);
651953039c8SJeremy Kerr 		nid = of_node_to_nid_single(cpu);
652ab1f9dacSPaul Mackerras 		of_node_put(cpu);
653ab1f9dacSPaul Mackerras 
654482ec7c4SNathan Lynch 		/*
655482ec7c4SNathan Lynch 		 * Don't fall back to default_nid yet -- we will plug
656482ec7c4SNathan Lynch 		 * cpus into nodes once the memory scan has discovered
657482ec7c4SNathan Lynch 		 * the topology.
658482ec7c4SNathan Lynch 		 */
659482ec7c4SNathan Lynch 		if (nid < 0)
660482ec7c4SNathan Lynch 			continue;
661482ec7c4SNathan Lynch 		node_set_online(nid);
662ab1f9dacSPaul Mackerras 	}
663ab1f9dacSPaul Mackerras 
664237a0989SMike Kravetz 	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
66594db7c5eSAnton Blanchard 
66694db7c5eSAnton Blanchard 	for_each_node_by_type(memory, "memory") {
667ab1f9dacSPaul Mackerras 		unsigned long start;
668ab1f9dacSPaul Mackerras 		unsigned long size;
669cf950b7aSNathan Lynch 		int nid;
670ab1f9dacSPaul Mackerras 		int ranges;
671b08a2a12SAlistair Popple 		const __be32 *memcell_buf;
672ab1f9dacSPaul Mackerras 		unsigned int len;
673ab1f9dacSPaul Mackerras 
674e2eb6392SStephen Rothwell 		memcell_buf = of_get_property(memory,
675ba759485SMichael Ellerman 			"linux,usable-memory", &len);
676ba759485SMichael Ellerman 		if (!memcell_buf || len <= 0)
677e2eb6392SStephen Rothwell 			memcell_buf = of_get_property(memory, "reg", &len);
678ab1f9dacSPaul Mackerras 		if (!memcell_buf || len <= 0)
679ab1f9dacSPaul Mackerras 			continue;
680ab1f9dacSPaul Mackerras 
681cc5d0189SBenjamin Herrenschmidt 		/* ranges in cell */
682cc5d0189SBenjamin Herrenschmidt 		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
683ab1f9dacSPaul Mackerras new_range:
684ab1f9dacSPaul Mackerras 		/* these are order-sensitive, and modify the buffer pointer */
685237a0989SMike Kravetz 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
686237a0989SMike Kravetz 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
687ab1f9dacSPaul Mackerras 
688482ec7c4SNathan Lynch 		/*
689482ec7c4SNathan Lynch 		 * Assumption: either all memory nodes or none will
690482ec7c4SNathan Lynch 		 * have associativity properties.  If none, then
691482ec7c4SNathan Lynch 		 * everything goes to default_nid.
692482ec7c4SNathan Lynch 		 */
693953039c8SJeremy Kerr 		nid = of_node_to_nid_single(memory);
694482ec7c4SNathan Lynch 		if (nid < 0)
695482ec7c4SNathan Lynch 			nid = default_nid;
6961daa6d08SBalbir Singh 
6971daa6d08SBalbir Singh 		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
698482ec7c4SNathan Lynch 		node_set_online(nid);
699ab1f9dacSPaul Mackerras 
7007656cd8eSReza Arbab 		size = numa_enforce_memory_limit(start, size);
7017656cd8eSReza Arbab 		if (size)
702e7e8de59STang Chen 			memblock_set_node(start, size, &memblock.memory, nid);
703ab1f9dacSPaul Mackerras 
704ab1f9dacSPaul Mackerras 		if (--ranges)
705ab1f9dacSPaul Mackerras 			goto new_range;
706ab1f9dacSPaul Mackerras 	}
707ab1f9dacSPaul Mackerras 
7080204568aSPaul Mackerras 	/*
709dfbe93a2SAnton Blanchard 	 * Now do the same thing for each MEMBLOCK listed in the
710dfbe93a2SAnton Blanchard 	 * ibm,dynamic-memory property in the
711dfbe93a2SAnton Blanchard 	 * ibm,dynamic-reconfiguration-memory node.
7120204568aSPaul Mackerras 	 */
7130204568aSPaul Mackerras 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
714514a9cb3SNathan Fontenot 	if (memory) {
715514a9cb3SNathan Fontenot 		walk_drmem_lmbs(memory, numa_setup_drmem_lmb);
716514a9cb3SNathan Fontenot 		of_node_put(memory);
717514a9cb3SNathan Fontenot 	}
7180204568aSPaul Mackerras 
719ab1f9dacSPaul Mackerras 	return 0;
720ab1f9dacSPaul Mackerras }
721ab1f9dacSPaul Mackerras 
722ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void)
723ab1f9dacSPaul Mackerras {
72495f72d1eSYinghai Lu 	unsigned long top_of_ram = memblock_end_of_DRAM();
72595f72d1eSYinghai Lu 	unsigned long total_ram = memblock_phys_mem_size();
726c67c3cb4SMel Gorman 	unsigned long start_pfn, end_pfn;
72728be7072SBenjamin Herrenschmidt 	unsigned int nid = 0;
72828be7072SBenjamin Herrenschmidt 	struct memblock_region *reg;
729ab1f9dacSPaul Mackerras 
730e110b281SOlof Johansson 	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
731ab1f9dacSPaul Mackerras 	       top_of_ram, total_ram);
732e110b281SOlof Johansson 	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
733ab1f9dacSPaul Mackerras 	       (top_of_ram - total_ram) >> 20);
734ab1f9dacSPaul Mackerras 
73528be7072SBenjamin Herrenschmidt 	for_each_memblock(memory, reg) {
736c7fc2de0SYinghai Lu 		start_pfn = memblock_region_memory_base_pfn(reg);
737c7fc2de0SYinghai Lu 		end_pfn = memblock_region_memory_end_pfn(reg);
7381daa6d08SBalbir Singh 
7391daa6d08SBalbir Singh 		fake_numa_create_new_node(end_pfn, &nid);
7401d7cfe18STejun Heo 		memblock_set_node(PFN_PHYS(start_pfn),
741e7e8de59STang Chen 				  PFN_PHYS(end_pfn - start_pfn),
742e7e8de59STang Chen 				  &memblock.memory, nid);
7431daa6d08SBalbir Singh 		node_set_online(nid);
744c67c3cb4SMel Gorman 	}
745ab1f9dacSPaul Mackerras }
746ab1f9dacSPaul Mackerras 
7474b703a23SAnton Blanchard void __init dump_numa_cpu_topology(void)
7484b703a23SAnton Blanchard {
7494b703a23SAnton Blanchard 	unsigned int node;
7504b703a23SAnton Blanchard 	unsigned int cpu, count;
7514b703a23SAnton Blanchard 
7524b703a23SAnton Blanchard 	if (min_common_depth == -1 || !numa_enabled)
7534b703a23SAnton Blanchard 		return;
7544b703a23SAnton Blanchard 
7554b703a23SAnton Blanchard 	for_each_online_node(node) {
7568467801cSAneesh Kumar K.V 		pr_info("Node %d CPUs:", node);
7574b703a23SAnton Blanchard 
7584b703a23SAnton Blanchard 		count = 0;
7594b703a23SAnton Blanchard 		/*
7604b703a23SAnton Blanchard 		 * If we used a CPU iterator here we would miss printing
7614b703a23SAnton Blanchard 		 * the holes in the cpumap.
7624b703a23SAnton Blanchard 		 */
76325863de0SAnton Blanchard 		for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
76425863de0SAnton Blanchard 			if (cpumask_test_cpu(cpu,
76525863de0SAnton Blanchard 					node_to_cpumask_map[node])) {
7664b703a23SAnton Blanchard 				if (count == 0)
7678467801cSAneesh Kumar K.V 					pr_cont(" %u", cpu);
7684b703a23SAnton Blanchard 				++count;
7694b703a23SAnton Blanchard 			} else {
7704b703a23SAnton Blanchard 				if (count > 1)
7718467801cSAneesh Kumar K.V 					pr_cont("-%u", cpu - 1);
7724b703a23SAnton Blanchard 				count = 0;
7734b703a23SAnton Blanchard 			}
7744b703a23SAnton Blanchard 		}
7754b703a23SAnton Blanchard 
7764b703a23SAnton Blanchard 		if (count > 1)
7778467801cSAneesh Kumar K.V 			pr_cont("-%u", nr_cpu_ids - 1);
7788467801cSAneesh Kumar K.V 		pr_cont("\n");
7794b703a23SAnton Blanchard 	}
7804b703a23SAnton Blanchard }
7814b703a23SAnton Blanchard 
78210239733SAnton Blanchard /* Initialize NODE_DATA for a node on the local memory */
78310239733SAnton Blanchard static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
784ab1f9dacSPaul Mackerras {
78510239733SAnton Blanchard 	u64 spanned_pages = end_pfn - start_pfn;
78610239733SAnton Blanchard 	const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
78710239733SAnton Blanchard 	u64 nd_pa;
78810239733SAnton Blanchard 	void *nd;
78910239733SAnton Blanchard 	int tnid;
790ab1f9dacSPaul Mackerras 
79110239733SAnton Blanchard 	nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
79210239733SAnton Blanchard 	nd = __va(nd_pa);
793ab1f9dacSPaul Mackerras 
79410239733SAnton Blanchard 	/* report and initialize */
79510239733SAnton Blanchard 	pr_info("  NODE_DATA [mem %#010Lx-%#010Lx]\n",
79610239733SAnton Blanchard 		nd_pa, nd_pa + nd_size - 1);
79710239733SAnton Blanchard 	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
79810239733SAnton Blanchard 	if (tnid != nid)
79910239733SAnton Blanchard 		pr_info("    NODE_DATA(%d) on node %d\n", nid, tnid);
8008f64e1f2SJon Tollefson 
80110239733SAnton Blanchard 	node_data[nid] = nd;
80210239733SAnton Blanchard 	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
80310239733SAnton Blanchard 	NODE_DATA(nid)->node_id = nid;
80410239733SAnton Blanchard 	NODE_DATA(nid)->node_start_pfn = start_pfn;
80510239733SAnton Blanchard 	NODE_DATA(nid)->node_spanned_pages = spanned_pages;
806ab1f9dacSPaul Mackerras }
8078f64e1f2SJon Tollefson 
808a346137eSMichael Bringmann static void __init find_possible_nodes(void)
809a346137eSMichael Bringmann {
810a346137eSMichael Bringmann 	struct device_node *rtas;
811a346137eSMichael Bringmann 	u32 numnodes, i;
812a346137eSMichael Bringmann 
813a346137eSMichael Bringmann 	if (min_common_depth <= 0)
814a346137eSMichael Bringmann 		return;
815a346137eSMichael Bringmann 
816a346137eSMichael Bringmann 	rtas = of_find_node_by_path("/rtas");
817a346137eSMichael Bringmann 	if (!rtas)
818a346137eSMichael Bringmann 		return;
819a346137eSMichael Bringmann 
820a346137eSMichael Bringmann 	if (of_property_read_u32_index(rtas,
821a346137eSMichael Bringmann 				"ibm,max-associativity-domains",
822a346137eSMichael Bringmann 				min_common_depth, &numnodes))
823a346137eSMichael Bringmann 		goto out;
824a346137eSMichael Bringmann 
825a346137eSMichael Bringmann 	for (i = 0; i < numnodes; i++) {
826ea05ba7cSMichael Bringmann 		if (!node_possible(i))
827a346137eSMichael Bringmann 			node_set(i, node_possible_map);
828a346137eSMichael Bringmann 	}
829a346137eSMichael Bringmann 
830a346137eSMichael Bringmann out:
831a346137eSMichael Bringmann 	of_node_put(rtas);
832a346137eSMichael Bringmann }
833a346137eSMichael Bringmann 
8349bd9be00SNicholas Piggin void __init mem_topology_setup(void)
8354a618669SDave Hansen {
8369bd9be00SNicholas Piggin 	int cpu;
8374a618669SDave Hansen 
8384a618669SDave Hansen 	if (parse_numa_properties())
8394a618669SDave Hansen 		setup_nonnuma();
8404a618669SDave Hansen 
8413af229f2SNishanth Aravamudan 	/*
842a346137eSMichael Bringmann 	 * Modify the set of possible NUMA nodes to reflect information
843a346137eSMichael Bringmann 	 * available about the set of online nodes, and the set of nodes
844a346137eSMichael Bringmann 	 * that we expect to make use of for this platform's affinity
845a346137eSMichael Bringmann 	 * calculations.
8463af229f2SNishanth Aravamudan 	 */
8473af229f2SNishanth Aravamudan 	nodes_and(node_possible_map, node_possible_map, node_online_map);
8483af229f2SNishanth Aravamudan 
849a346137eSMichael Bringmann 	find_possible_nodes();
850a346137eSMichael Bringmann 
8519bd9be00SNicholas Piggin 	setup_node_to_cpumask_map();
8529bd9be00SNicholas Piggin 
8539bd9be00SNicholas Piggin 	reset_numa_cpu_lookup_table();
8549bd9be00SNicholas Piggin 
8559bd9be00SNicholas Piggin 	for_each_present_cpu(cpu)
8569bd9be00SNicholas Piggin 		numa_setup_cpu(cpu);
8579bd9be00SNicholas Piggin }
8589bd9be00SNicholas Piggin 
8599bd9be00SNicholas Piggin void __init initmem_init(void)
8609bd9be00SNicholas Piggin {
8619bd9be00SNicholas Piggin 	int nid;
8629bd9be00SNicholas Piggin 
8639bd9be00SNicholas Piggin 	max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
8649bd9be00SNicholas Piggin 	max_pfn = max_low_pfn;
8659bd9be00SNicholas Piggin 
8669bd9be00SNicholas Piggin 	memblock_dump_all();
8679bd9be00SNicholas Piggin 
8684a618669SDave Hansen 	for_each_online_node(nid) {
8694a618669SDave Hansen 		unsigned long start_pfn, end_pfn;
8704a618669SDave Hansen 
8714a618669SDave Hansen 		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
87210239733SAnton Blanchard 		setup_node_data(nid, start_pfn, end_pfn);
8738f64e1f2SJon Tollefson 		sparse_memory_present_with_active_regions(nid);
874ab1f9dacSPaul Mackerras 	}
875d3f6204aSBenjamin Herrenschmidt 
87621098b9eSAnton Blanchard 	sparse_init();
87725863de0SAnton Blanchard 
8782fabf084SNishanth Aravamudan 	/*
8792fabf084SNishanth Aravamudan 	 * We need the numa_cpu_lookup_table to be accurate for all CPUs,
8802fabf084SNishanth Aravamudan 	 * even before we online them, so that we can use cpu_to_{node,mem}
8812fabf084SNishanth Aravamudan 	 * early in boot, cf. smp_prepare_cpus().
882bdab88e0SSebastian Andrzej Siewior 	 * _nocalls() + manual invocation is used because cpuhp is not yet
883bdab88e0SSebastian Andrzej Siewior 	 * initialized for the boot CPU.
8842fabf084SNishanth Aravamudan 	 */
88573c1b41eSThomas Gleixner 	cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare",
886bdab88e0SSebastian Andrzej Siewior 				  ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
8874a618669SDave Hansen }
888ab1f9dacSPaul Mackerras 
889ab1f9dacSPaul Mackerras static int __init early_numa(char *p)
890ab1f9dacSPaul Mackerras {
891ab1f9dacSPaul Mackerras 	if (!p)
892ab1f9dacSPaul Mackerras 		return 0;
893ab1f9dacSPaul Mackerras 
894ab1f9dacSPaul Mackerras 	if (strstr(p, "off"))
895ab1f9dacSPaul Mackerras 		numa_enabled = 0;
896ab1f9dacSPaul Mackerras 
897ab1f9dacSPaul Mackerras 	if (strstr(p, "debug"))
898ab1f9dacSPaul Mackerras 		numa_debug = 1;
899ab1f9dacSPaul Mackerras 
9001daa6d08SBalbir Singh 	p = strstr(p, "fake=");
9011daa6d08SBalbir Singh 	if (p)
9021daa6d08SBalbir Singh 		cmdline = p + strlen("fake=");
9031daa6d08SBalbir Singh 
904ab1f9dacSPaul Mackerras 	return 0;
905ab1f9dacSPaul Mackerras }
906ab1f9dacSPaul Mackerras early_param("numa", early_numa);
907237a0989SMike Kravetz 
9082d73bae1SNishanth Aravamudan static bool topology_updates_enabled = true;
9092d73bae1SNishanth Aravamudan 
9102d73bae1SNishanth Aravamudan static int __init early_topology_updates(char *p)
9112d73bae1SNishanth Aravamudan {
9122d73bae1SNishanth Aravamudan 	if (!p)
9132d73bae1SNishanth Aravamudan 		return 0;
9142d73bae1SNishanth Aravamudan 
9152d73bae1SNishanth Aravamudan 	if (!strcmp(p, "off")) {
9162d73bae1SNishanth Aravamudan 		pr_info("Disabling topology updates\n");
9172d73bae1SNishanth Aravamudan 		topology_updates_enabled = false;
9182d73bae1SNishanth Aravamudan 	}
9192d73bae1SNishanth Aravamudan 
9202d73bae1SNishanth Aravamudan 	return 0;
9212d73bae1SNishanth Aravamudan }
9222d73bae1SNishanth Aravamudan early_param("topology_updates", early_topology_updates);
9232d73bae1SNishanth Aravamudan 
924237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG
925237a0989SMike Kravetz /*
9260f16ef7fSNathan Fontenot  * Find the node associated with a hot added memory section for
9270f16ef7fSNathan Fontenot  * memory represented in the device tree by the property
9280f16ef7fSNathan Fontenot  * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
9290db9360aSNathan Fontenot  */
930514a9cb3SNathan Fontenot static int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
9310db9360aSNathan Fontenot {
932514a9cb3SNathan Fontenot 	struct drmem_lmb *lmb;
9333fdfd990SBenjamin Herrenschmidt 	unsigned long lmb_size;
9340f16ef7fSNathan Fontenot 	int nid = -1;
9350db9360aSNathan Fontenot 
936514a9cb3SNathan Fontenot 	lmb_size = drmem_lmb_size();
9370db9360aSNathan Fontenot 
938514a9cb3SNathan Fontenot 	for_each_drmem_lmb(lmb) {
9390db9360aSNathan Fontenot 		/* skip this block if it is reserved or not assigned to
9400db9360aSNathan Fontenot 		 * this partition */
941514a9cb3SNathan Fontenot 		if ((lmb->flags & DRCONF_MEM_RESERVED)
942514a9cb3SNathan Fontenot 		    || !(lmb->flags & DRCONF_MEM_ASSIGNED))
9430db9360aSNathan Fontenot 			continue;
9440db9360aSNathan Fontenot 
945514a9cb3SNathan Fontenot 		if ((scn_addr < lmb->base_addr)
946514a9cb3SNathan Fontenot 		    || (scn_addr >= (lmb->base_addr + lmb_size)))
9470f16ef7fSNathan Fontenot 			continue;
9480db9360aSNathan Fontenot 
949514a9cb3SNathan Fontenot 		nid = of_drconf_to_nid_single(lmb);
9500f16ef7fSNathan Fontenot 		break;
9510db9360aSNathan Fontenot 	}
9520db9360aSNathan Fontenot 
9530f16ef7fSNathan Fontenot 	return nid;
9540db9360aSNathan Fontenot }
9550db9360aSNathan Fontenot 
9560db9360aSNathan Fontenot /*
9570f16ef7fSNathan Fontenot  * Find the node associated with a hot added memory section for memory
9580f16ef7fSNathan Fontenot  * represented in the device tree as a node (i.e. memory@XXXX) for
95995f72d1eSYinghai Lu  * each memblock.
960237a0989SMike Kravetz  */
961ec32dd66SRobert Jennings static int hot_add_node_scn_to_nid(unsigned long scn_addr)
962237a0989SMike Kravetz {
96394db7c5eSAnton Blanchard 	struct device_node *memory;
9640f16ef7fSNathan Fontenot 	int nid = -1;
965237a0989SMike Kravetz 
96694db7c5eSAnton Blanchard 	for_each_node_by_type(memory, "memory") {
967237a0989SMike Kravetz 		unsigned long start, size;
968b226e462SMike Kravetz 		int ranges;
969b08a2a12SAlistair Popple 		const __be32 *memcell_buf;
970237a0989SMike Kravetz 		unsigned int len;
971237a0989SMike Kravetz 
972e2eb6392SStephen Rothwell 		memcell_buf = of_get_property(memory, "reg", &len);
973237a0989SMike Kravetz 		if (!memcell_buf || len <= 0)
974237a0989SMike Kravetz 			continue;
975237a0989SMike Kravetz 
976cc5d0189SBenjamin Herrenschmidt 		/* ranges in cell */
977cc5d0189SBenjamin Herrenschmidt 		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
9780f16ef7fSNathan Fontenot 
9790f16ef7fSNathan Fontenot 		while (ranges--) {
980237a0989SMike Kravetz 			start = read_n_cells(n_mem_addr_cells, &memcell_buf);
981237a0989SMike Kravetz 			size = read_n_cells(n_mem_size_cells, &memcell_buf);
982237a0989SMike Kravetz 
9830f16ef7fSNathan Fontenot 			if ((scn_addr < start) || (scn_addr >= (start + size)))
9840f16ef7fSNathan Fontenot 				continue;
9850f16ef7fSNathan Fontenot 
9860f16ef7fSNathan Fontenot 			nid = of_node_to_nid_single(memory);
9870f16ef7fSNathan Fontenot 			break;
9880f16ef7fSNathan Fontenot 		}
9890f16ef7fSNathan Fontenot 
9900f16ef7fSNathan Fontenot 		if (nid >= 0)
9910f16ef7fSNathan Fontenot 			break;
9920f16ef7fSNathan Fontenot 	}
9930f16ef7fSNathan Fontenot 
99460831842SAnton Blanchard 	of_node_put(memory);
99560831842SAnton Blanchard 
9960db9360aSNathan Fontenot 	return nid;
997237a0989SMike Kravetz }
998237a0989SMike Kravetz 
9990f16ef7fSNathan Fontenot /*
10000f16ef7fSNathan Fontenot  * Find the node associated with a hot added memory section.  Section
100195f72d1eSYinghai Lu  * corresponds to a SPARSEMEM section, not an MEMBLOCK.  It is assumed that
100295f72d1eSYinghai Lu  * sections are fully contained within a single MEMBLOCK.
10030f16ef7fSNathan Fontenot  */
10040f16ef7fSNathan Fontenot int hot_add_scn_to_nid(unsigned long scn_addr)
10050f16ef7fSNathan Fontenot {
10060f16ef7fSNathan Fontenot 	struct device_node *memory = NULL;
10074a3bac4eSReza Arbab 	int nid;
10080f16ef7fSNathan Fontenot 
10090f16ef7fSNathan Fontenot 	if (!numa_enabled || (min_common_depth < 0))
101072c33688SH Hartley Sweeten 		return first_online_node;
10110f16ef7fSNathan Fontenot 
10120f16ef7fSNathan Fontenot 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
10130f16ef7fSNathan Fontenot 	if (memory) {
1014514a9cb3SNathan Fontenot 		nid = hot_add_drconf_scn_to_nid(scn_addr);
10150f16ef7fSNathan Fontenot 		of_node_put(memory);
10160f16ef7fSNathan Fontenot 	} else {
10170f16ef7fSNathan Fontenot 		nid = hot_add_node_scn_to_nid(scn_addr);
1018237a0989SMike Kravetz 	}
10190f16ef7fSNathan Fontenot 
10202a8628d4SReza Arbab 	if (nid < 0 || !node_possible(nid))
102172c33688SH Hartley Sweeten 		nid = first_online_node;
10220f16ef7fSNathan Fontenot 
10230f16ef7fSNathan Fontenot 	return nid;
10240f16ef7fSNathan Fontenot }
10250f16ef7fSNathan Fontenot 
1026cd34206eSNishanth Aravamudan static u64 hot_add_drconf_memory_max(void)
1027cd34206eSNishanth Aravamudan {
1028cd34206eSNishanth Aravamudan 	struct device_node *memory = NULL;
102945b64ee6SBharata B Rao 	struct device_node *dn = NULL;
103045b64ee6SBharata B Rao 	const __be64 *lrdr = NULL;
103145b64ee6SBharata B Rao 
103245b64ee6SBharata B Rao 	dn = of_find_node_by_path("/rtas");
103345b64ee6SBharata B Rao 	if (dn) {
103445b64ee6SBharata B Rao 		lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL);
103545b64ee6SBharata B Rao 		of_node_put(dn);
103645b64ee6SBharata B Rao 		if (lrdr)
103745b64ee6SBharata B Rao 			return be64_to_cpup(lrdr);
103845b64ee6SBharata B Rao 	}
1039cd34206eSNishanth Aravamudan 
1040cd34206eSNishanth Aravamudan 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
1041cd34206eSNishanth Aravamudan 	if (memory) {
1042cd34206eSNishanth Aravamudan 		of_node_put(memory);
1043514a9cb3SNathan Fontenot 		return drmem_lmb_memory_max();
1044cd34206eSNishanth Aravamudan 	}
104545b64ee6SBharata B Rao 	return 0;
1046cd34206eSNishanth Aravamudan }
1047cd34206eSNishanth Aravamudan 
1048cd34206eSNishanth Aravamudan /*
1049cd34206eSNishanth Aravamudan  * memory_hotplug_max - return max address of memory that may be added
1050cd34206eSNishanth Aravamudan  *
1051cd34206eSNishanth Aravamudan  * This is currently only used on systems that support drconfig memory
1052cd34206eSNishanth Aravamudan  * hotplug.
1053cd34206eSNishanth Aravamudan  */
1054cd34206eSNishanth Aravamudan u64 memory_hotplug_max(void)
1055cd34206eSNishanth Aravamudan {
1056cd34206eSNishanth Aravamudan         return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
1057cd34206eSNishanth Aravamudan }
1058237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */
10599eff1a38SJesse Larrew 
1060bd03403aSJesse Larrew /* Virtual Processor Home Node (VPHN) support */
106139bf990eSJesse Larrew #ifdef CONFIG_PPC_SPLPAR
10624b6cfb2aSGreg Kurz 
10634b6cfb2aSGreg Kurz #include "vphn.h"
10644b6cfb2aSGreg Kurz 
106530c05350SNathan Fontenot struct topology_update_data {
106630c05350SNathan Fontenot 	struct topology_update_data *next;
106730c05350SNathan Fontenot 	unsigned int cpu;
106830c05350SNathan Fontenot 	int old_nid;
106930c05350SNathan Fontenot 	int new_nid;
107030c05350SNathan Fontenot };
107130c05350SNathan Fontenot 
1072cee5405dSMichael Bringmann #define TOPOLOGY_DEF_TIMER_SECS	60
1073cee5405dSMichael Bringmann 
10745de16699SAnton Blanchard static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
10759eff1a38SJesse Larrew static cpumask_t cpu_associativity_changes_mask;
10769eff1a38SJesse Larrew static int vphn_enabled;
10775d88aa85SJesse Larrew static int prrn_enabled;
10785d88aa85SJesse Larrew static void reset_topology_timer(void);
1079cee5405dSMichael Bringmann static int topology_timer_secs = 1;
108017f444c0SMichael Bringmann static int topology_inited;
10819eff1a38SJesse Larrew 
10829eff1a38SJesse Larrew /*
1083cee5405dSMichael Bringmann  * Change polling interval for associativity changes.
1084cee5405dSMichael Bringmann  */
1085cee5405dSMichael Bringmann int timed_topology_update(int nsecs)
1086cee5405dSMichael Bringmann {
1087cee5405dSMichael Bringmann 	if (vphn_enabled) {
1088cee5405dSMichael Bringmann 		if (nsecs > 0)
1089cee5405dSMichael Bringmann 			topology_timer_secs = nsecs;
1090cee5405dSMichael Bringmann 		else
1091cee5405dSMichael Bringmann 			topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
1092cee5405dSMichael Bringmann 
1093cee5405dSMichael Bringmann 		reset_topology_timer();
1094cee5405dSMichael Bringmann 	}
1095cee5405dSMichael Bringmann 
1096cee5405dSMichael Bringmann 	return 0;
1097cee5405dSMichael Bringmann }
10989eff1a38SJesse Larrew 
10999eff1a38SJesse Larrew /*
11009eff1a38SJesse Larrew  * Store the current values of the associativity change counters in the
11019eff1a38SJesse Larrew  * hypervisor.
11029eff1a38SJesse Larrew  */
11039eff1a38SJesse Larrew static void setup_cpu_associativity_change_counters(void)
11049eff1a38SJesse Larrew {
1105cd9d6cc7SJesse Larrew 	int cpu;
11069eff1a38SJesse Larrew 
11075de16699SAnton Blanchard 	/* The VPHN feature supports a maximum of 8 reference points */
11085de16699SAnton Blanchard 	BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
11095de16699SAnton Blanchard 
11109eff1a38SJesse Larrew 	for_each_possible_cpu(cpu) {
1111cd9d6cc7SJesse Larrew 		int i;
11129eff1a38SJesse Larrew 		u8 *counts = vphn_cpu_change_counts[cpu];
1113499dcd41SNicholas Piggin 		volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
11149eff1a38SJesse Larrew 
11155de16699SAnton Blanchard 		for (i = 0; i < distance_ref_points_depth; i++)
11169eff1a38SJesse Larrew 			counts[i] = hypervisor_counts[i];
11179eff1a38SJesse Larrew 	}
11189eff1a38SJesse Larrew }
11199eff1a38SJesse Larrew 
11209eff1a38SJesse Larrew /*
11219eff1a38SJesse Larrew  * The hypervisor maintains a set of 8 associativity change counters in
11229eff1a38SJesse Larrew  * the VPA of each cpu that correspond to the associativity levels in the
11239eff1a38SJesse Larrew  * ibm,associativity-reference-points property. When an associativity
11249eff1a38SJesse Larrew  * level changes, the corresponding counter is incremented.
11259eff1a38SJesse Larrew  *
11269eff1a38SJesse Larrew  * Set a bit in cpu_associativity_changes_mask for each cpu whose home
11279eff1a38SJesse Larrew  * node associativity levels have changed.
11289eff1a38SJesse Larrew  *
11299eff1a38SJesse Larrew  * Returns the number of cpus with unhandled associativity changes.
11309eff1a38SJesse Larrew  */
11319eff1a38SJesse Larrew static int update_cpu_associativity_changes_mask(void)
11329eff1a38SJesse Larrew {
11335d88aa85SJesse Larrew 	int cpu;
11349eff1a38SJesse Larrew 	cpumask_t *changes = &cpu_associativity_changes_mask;
11359eff1a38SJesse Larrew 
11369eff1a38SJesse Larrew 	for_each_possible_cpu(cpu) {
11379eff1a38SJesse Larrew 		int i, changed = 0;
11389eff1a38SJesse Larrew 		u8 *counts = vphn_cpu_change_counts[cpu];
1139499dcd41SNicholas Piggin 		volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
11409eff1a38SJesse Larrew 
11415de16699SAnton Blanchard 		for (i = 0; i < distance_ref_points_depth; i++) {
1142d69043e8SAnton Blanchard 			if (hypervisor_counts[i] != counts[i]) {
11439eff1a38SJesse Larrew 				counts[i] = hypervisor_counts[i];
11449eff1a38SJesse Larrew 				changed = 1;
11459eff1a38SJesse Larrew 			}
11469eff1a38SJesse Larrew 		}
11479eff1a38SJesse Larrew 		if (changed) {
11483be7db6aSRobert Jennings 			cpumask_or(changes, changes, cpu_sibling_mask(cpu));
11493be7db6aSRobert Jennings 			cpu = cpu_last_thread_sibling(cpu);
11509eff1a38SJesse Larrew 		}
11519eff1a38SJesse Larrew 	}
11529eff1a38SJesse Larrew 
11535d88aa85SJesse Larrew 	return cpumask_weight(changes);
11549eff1a38SJesse Larrew }
11559eff1a38SJesse Larrew 
11569eff1a38SJesse Larrew /*
11579eff1a38SJesse Larrew  * Retrieve the new associativity information for a virtual processor's
11589eff1a38SJesse Larrew  * home node.
11599eff1a38SJesse Larrew  */
1160b08a2a12SAlistair Popple static long hcall_vphn(unsigned long cpu, __be32 *associativity)
11619eff1a38SJesse Larrew {
1162cd9d6cc7SJesse Larrew 	long rc;
11639eff1a38SJesse Larrew 	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
11649eff1a38SJesse Larrew 	u64 flags = 1;
11659eff1a38SJesse Larrew 	int hwcpu = get_hard_smp_processor_id(cpu);
11669eff1a38SJesse Larrew 
11679eff1a38SJesse Larrew 	rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
11689eff1a38SJesse Larrew 	vphn_unpack_associativity(retbuf, associativity);
11699eff1a38SJesse Larrew 
11709eff1a38SJesse Larrew 	return rc;
11719eff1a38SJesse Larrew }
11729eff1a38SJesse Larrew 
11739eff1a38SJesse Larrew static long vphn_get_associativity(unsigned long cpu,
1174b08a2a12SAlistair Popple 					__be32 *associativity)
11759eff1a38SJesse Larrew {
1176cd9d6cc7SJesse Larrew 	long rc;
11779eff1a38SJesse Larrew 
11789eff1a38SJesse Larrew 	rc = hcall_vphn(cpu, associativity);
11799eff1a38SJesse Larrew 
11809eff1a38SJesse Larrew 	switch (rc) {
11819eff1a38SJesse Larrew 	case H_FUNCTION:
11829eff1a38SJesse Larrew 		printk(KERN_INFO
11839eff1a38SJesse Larrew 			"VPHN is not supported. Disabling polling...\n");
11849eff1a38SJesse Larrew 		stop_topology_update();
11859eff1a38SJesse Larrew 		break;
11869eff1a38SJesse Larrew 	case H_HARDWARE:
11879eff1a38SJesse Larrew 		printk(KERN_ERR
11889eff1a38SJesse Larrew 			"hcall_vphn() experienced a hardware fault "
11899eff1a38SJesse Larrew 			"preventing VPHN. Disabling polling...\n");
11909eff1a38SJesse Larrew 		stop_topology_update();
119117f444c0SMichael Bringmann 		break;
119217f444c0SMichael Bringmann 	case H_SUCCESS:
119317f444c0SMichael Bringmann 		dbg("VPHN hcall succeeded. Reset polling...\n");
1194cee5405dSMichael Bringmann 		timed_topology_update(0);
119517f444c0SMichael Bringmann 		break;
11969eff1a38SJesse Larrew 	}
11979eff1a38SJesse Larrew 
11989eff1a38SJesse Larrew 	return rc;
11999eff1a38SJesse Larrew }
12009eff1a38SJesse Larrew 
1201e67e02a5SMichael Bringmann int find_and_online_cpu_nid(int cpu)
1202ea05ba7cSMichael Bringmann {
1203ea05ba7cSMichael Bringmann 	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
1204ea05ba7cSMichael Bringmann 	int new_nid;
1205ea05ba7cSMichael Bringmann 
1206ea05ba7cSMichael Bringmann 	/* Use associativity from first thread for all siblings */
1207*2483ef05SSrikar Dronamraju 	if (vphn_get_associativity(cpu, associativity))
1208*2483ef05SSrikar Dronamraju 		return cpu_to_node(cpu);
1209*2483ef05SSrikar Dronamraju 
1210ea05ba7cSMichael Bringmann 	new_nid = associativity_to_nid(associativity);
1211ea05ba7cSMichael Bringmann 	if (new_nid < 0 || !node_possible(new_nid))
1212ea05ba7cSMichael Bringmann 		new_nid = first_online_node;
1213ea05ba7cSMichael Bringmann 
1214ea05ba7cSMichael Bringmann 	if (NODE_DATA(new_nid) == NULL) {
1215ea05ba7cSMichael Bringmann #ifdef CONFIG_MEMORY_HOTPLUG
1216ea05ba7cSMichael Bringmann 		/*
1217ea05ba7cSMichael Bringmann 		 * Need to ensure that NODE_DATA is initialized for a node from
1218ea05ba7cSMichael Bringmann 		 * available memory (see memblock_alloc_try_nid). If unable to
1219ea05ba7cSMichael Bringmann 		 * init the node, then default to nearest node that has memory
1220ea05ba7cSMichael Bringmann 		 * installed.
1221ea05ba7cSMichael Bringmann 		 */
1222ea05ba7cSMichael Bringmann 		if (try_online_node(new_nid))
1223ea05ba7cSMichael Bringmann 			new_nid = first_online_node;
1224ea05ba7cSMichael Bringmann #else
1225ea05ba7cSMichael Bringmann 		/*
1226ea05ba7cSMichael Bringmann 		 * Default to using the nearest node that has memory installed.
1227ea05ba7cSMichael Bringmann 		 * Otherwise, it would be necessary to patch the kernel MM code
1228ea05ba7cSMichael Bringmann 		 * to deal with more memoryless-node error conditions.
1229ea05ba7cSMichael Bringmann 		 */
1230ea05ba7cSMichael Bringmann 		new_nid = first_online_node;
1231ea05ba7cSMichael Bringmann #endif
1232ea05ba7cSMichael Bringmann 	}
1233ea05ba7cSMichael Bringmann 
1234e67e02a5SMichael Bringmann 	pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__,
1235e67e02a5SMichael Bringmann 		cpu, new_nid);
1236ea05ba7cSMichael Bringmann 	return new_nid;
1237ea05ba7cSMichael Bringmann }
1238ea05ba7cSMichael Bringmann 
12399eff1a38SJesse Larrew /*
124030c05350SNathan Fontenot  * Update the CPU maps and sysfs entries for a single CPU when its NUMA
124130c05350SNathan Fontenot  * characteristics change. This function doesn't perform any locking and is
124230c05350SNathan Fontenot  * only safe to call from stop_machine().
124330c05350SNathan Fontenot  */
124430c05350SNathan Fontenot static int update_cpu_topology(void *data)
124530c05350SNathan Fontenot {
124630c05350SNathan Fontenot 	struct topology_update_data *update;
124730c05350SNathan Fontenot 	unsigned long cpu;
124830c05350SNathan Fontenot 
124930c05350SNathan Fontenot 	if (!data)
125030c05350SNathan Fontenot 		return -EINVAL;
125130c05350SNathan Fontenot 
12523be7db6aSRobert Jennings 	cpu = smp_processor_id();
125330c05350SNathan Fontenot 
125430c05350SNathan Fontenot 	for (update = data; update; update = update->next) {
12552c0a33f9SNishanth Aravamudan 		int new_nid = update->new_nid;
125630c05350SNathan Fontenot 		if (cpu != update->cpu)
125730c05350SNathan Fontenot 			continue;
125830c05350SNathan Fontenot 
125949f8d8c0SNishanth Aravamudan 		unmap_cpu_from_node(cpu);
12602c0a33f9SNishanth Aravamudan 		map_cpu_to_node(cpu, new_nid);
12612c0a33f9SNishanth Aravamudan 		set_cpu_numa_node(cpu, new_nid);
12622c0a33f9SNishanth Aravamudan 		set_cpu_numa_mem(cpu, local_memory_node(new_nid));
1263176bbf14SJesse Larrew 		vdso_getcpu_init();
126430c05350SNathan Fontenot 	}
126530c05350SNathan Fontenot 
126630c05350SNathan Fontenot 	return 0;
126730c05350SNathan Fontenot }
126830c05350SNathan Fontenot 
1269d4edc5b6SSrivatsa S. Bhat static int update_lookup_table(void *data)
1270d4edc5b6SSrivatsa S. Bhat {
1271d4edc5b6SSrivatsa S. Bhat 	struct topology_update_data *update;
1272d4edc5b6SSrivatsa S. Bhat 
1273d4edc5b6SSrivatsa S. Bhat 	if (!data)
1274d4edc5b6SSrivatsa S. Bhat 		return -EINVAL;
1275d4edc5b6SSrivatsa S. Bhat 
1276d4edc5b6SSrivatsa S. Bhat 	/*
1277d4edc5b6SSrivatsa S. Bhat 	 * Upon topology update, the numa-cpu lookup table needs to be updated
1278d4edc5b6SSrivatsa S. Bhat 	 * for all threads in the core, including offline CPUs, to ensure that
1279d4edc5b6SSrivatsa S. Bhat 	 * future hotplug operations respect the cpu-to-node associativity
1280d4edc5b6SSrivatsa S. Bhat 	 * properly.
1281d4edc5b6SSrivatsa S. Bhat 	 */
1282d4edc5b6SSrivatsa S. Bhat 	for (update = data; update; update = update->next) {
1283d4edc5b6SSrivatsa S. Bhat 		int nid, base, j;
1284d4edc5b6SSrivatsa S. Bhat 
1285d4edc5b6SSrivatsa S. Bhat 		nid = update->new_nid;
1286d4edc5b6SSrivatsa S. Bhat 		base = cpu_first_thread_sibling(update->cpu);
1287d4edc5b6SSrivatsa S. Bhat 
1288d4edc5b6SSrivatsa S. Bhat 		for (j = 0; j < threads_per_core; j++) {
1289d4edc5b6SSrivatsa S. Bhat 			update_numa_cpu_lookup_table(base + j, nid);
1290d4edc5b6SSrivatsa S. Bhat 		}
1291d4edc5b6SSrivatsa S. Bhat 	}
1292d4edc5b6SSrivatsa S. Bhat 
1293d4edc5b6SSrivatsa S. Bhat 	return 0;
1294d4edc5b6SSrivatsa S. Bhat }
1295d4edc5b6SSrivatsa S. Bhat 
129630c05350SNathan Fontenot /*
12979eff1a38SJesse Larrew  * Update the node maps and sysfs entries for each cpu whose home node
129879c5fcebSJesse Larrew  * has changed. Returns 1 when the topology has changed, and 0 otherwise.
12993e401f7aSThiago Jung Bauermann  *
13003e401f7aSThiago Jung Bauermann  * cpus_locked says whether we already hold cpu_hotplug_lock.
13019eff1a38SJesse Larrew  */
13023e401f7aSThiago Jung Bauermann int numa_update_cpu_topology(bool cpus_locked)
13039eff1a38SJesse Larrew {
13043be7db6aSRobert Jennings 	unsigned int cpu, sibling, changed = 0;
130530c05350SNathan Fontenot 	struct topology_update_data *updates, *ud;
1306176bbf14SJesse Larrew 	cpumask_t updated_cpus;
13078a25a2fdSKay Sievers 	struct device *dev;
13083be7db6aSRobert Jennings 	int weight, new_nid, i = 0;
130930c05350SNathan Fontenot 
13102ea62630SSrikar Dronamraju 	if (!prrn_enabled && !vphn_enabled && topology_inited)
13112d73bae1SNishanth Aravamudan 		return 0;
13122d73bae1SNishanth Aravamudan 
131330c05350SNathan Fontenot 	weight = cpumask_weight(&cpu_associativity_changes_mask);
131430c05350SNathan Fontenot 	if (!weight)
131530c05350SNathan Fontenot 		return 0;
131630c05350SNathan Fontenot 
13176396bb22SKees Cook 	updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL);
131830c05350SNathan Fontenot 	if (!updates)
131930c05350SNathan Fontenot 		return 0;
13209eff1a38SJesse Larrew 
1321176bbf14SJesse Larrew 	cpumask_clear(&updated_cpus);
13229eff1a38SJesse Larrew 
1323104699c0SKOSAKI Motohiro 	for_each_cpu(cpu, &cpu_associativity_changes_mask) {
13243be7db6aSRobert Jennings 		/*
13253be7db6aSRobert Jennings 		 * If siblings aren't flagged for changes, updates list
13263be7db6aSRobert Jennings 		 * will be too short. Skip on this update and set for next
13273be7db6aSRobert Jennings 		 * update.
13283be7db6aSRobert Jennings 		 */
13293be7db6aSRobert Jennings 		if (!cpumask_subset(cpu_sibling_mask(cpu),
13303be7db6aSRobert Jennings 					&cpu_associativity_changes_mask)) {
13313be7db6aSRobert Jennings 			pr_info("Sibling bits not set for associativity "
13323be7db6aSRobert Jennings 					"change, cpu%d\n", cpu);
13333be7db6aSRobert Jennings 			cpumask_or(&cpu_associativity_changes_mask,
13343be7db6aSRobert Jennings 					&cpu_associativity_changes_mask,
13353be7db6aSRobert Jennings 					cpu_sibling_mask(cpu));
13363be7db6aSRobert Jennings 			cpu = cpu_last_thread_sibling(cpu);
13373be7db6aSRobert Jennings 			continue;
13383be7db6aSRobert Jennings 		}
13393be7db6aSRobert Jennings 
1340ea05ba7cSMichael Bringmann 		new_nid = find_and_online_cpu_nid(cpu);
13419eff1a38SJesse Larrew 
13423be7db6aSRobert Jennings 		if (new_nid == numa_cpu_lookup_table[cpu]) {
13433be7db6aSRobert Jennings 			cpumask_andnot(&cpu_associativity_changes_mask,
13443be7db6aSRobert Jennings 					&cpu_associativity_changes_mask,
13453be7db6aSRobert Jennings 					cpu_sibling_mask(cpu));
134617f444c0SMichael Bringmann 			dbg("Assoc chg gives same node %d for cpu%d\n",
134717f444c0SMichael Bringmann 					new_nid, cpu);
13483be7db6aSRobert Jennings 			cpu = cpu_last_thread_sibling(cpu);
13493be7db6aSRobert Jennings 			continue;
13503be7db6aSRobert Jennings 		}
13519eff1a38SJesse Larrew 
13523be7db6aSRobert Jennings 		for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
13533be7db6aSRobert Jennings 			ud = &updates[i++];
13548bc93149SMichael Bringmann 			ud->next = &updates[i];
13553be7db6aSRobert Jennings 			ud->cpu = sibling;
13563be7db6aSRobert Jennings 			ud->new_nid = new_nid;
13573be7db6aSRobert Jennings 			ud->old_nid = numa_cpu_lookup_table[sibling];
13583be7db6aSRobert Jennings 			cpumask_set_cpu(sibling, &updated_cpus);
135930c05350SNathan Fontenot 		}
13603be7db6aSRobert Jennings 		cpu = cpu_last_thread_sibling(cpu);
13613be7db6aSRobert Jennings 	}
13629eff1a38SJesse Larrew 
13638bc93149SMichael Bringmann 	/*
13648bc93149SMichael Bringmann 	 * Prevent processing of 'updates' from overflowing array
13658bc93149SMichael Bringmann 	 * where last entry filled in a 'next' pointer.
13668bc93149SMichael Bringmann 	 */
13678bc93149SMichael Bringmann 	if (i)
13688bc93149SMichael Bringmann 		updates[i-1].next = NULL;
13698bc93149SMichael Bringmann 
13702d73bae1SNishanth Aravamudan 	pr_debug("Topology update for the following CPUs:\n");
13712d73bae1SNishanth Aravamudan 	if (cpumask_weight(&updated_cpus)) {
13722d73bae1SNishanth Aravamudan 		for (ud = &updates[0]; ud; ud = ud->next) {
13732d73bae1SNishanth Aravamudan 			pr_debug("cpu %d moving from node %d "
13742d73bae1SNishanth Aravamudan 					  "to %d\n", ud->cpu,
13752d73bae1SNishanth Aravamudan 					  ud->old_nid, ud->new_nid);
13762d73bae1SNishanth Aravamudan 		}
13772d73bae1SNishanth Aravamudan 	}
13782d73bae1SNishanth Aravamudan 
13799a013361SMichael Wang 	/*
13809a013361SMichael Wang 	 * In cases where we have nothing to update (because the updates list
13819a013361SMichael Wang 	 * is too short or because the new topology is same as the old one),
13829a013361SMichael Wang 	 * skip invoking update_cpu_topology() via stop-machine(). This is
13839a013361SMichael Wang 	 * necessary (and not just a fast-path optimization) since stop-machine
13849a013361SMichael Wang 	 * can end up electing a random CPU to run update_cpu_topology(), and
13859a013361SMichael Wang 	 * thus trick us into setting up incorrect cpu-node mappings (since
13869a013361SMichael Wang 	 * 'updates' is kzalloc()'ed).
13879a013361SMichael Wang 	 *
13889a013361SMichael Wang 	 * And for the similar reason, we will skip all the following updating.
13899a013361SMichael Wang 	 */
13909a013361SMichael Wang 	if (!cpumask_weight(&updated_cpus))
13919a013361SMichael Wang 		goto out;
13929a013361SMichael Wang 
13933e401f7aSThiago Jung Bauermann 	if (cpus_locked)
13943e401f7aSThiago Jung Bauermann 		stop_machine_cpuslocked(update_cpu_topology, &updates[0],
13953e401f7aSThiago Jung Bauermann 					&updated_cpus);
13963e401f7aSThiago Jung Bauermann 	else
1397176bbf14SJesse Larrew 		stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
139830c05350SNathan Fontenot 
1399d4edc5b6SSrivatsa S. Bhat 	/*
1400d4edc5b6SSrivatsa S. Bhat 	 * Update the numa-cpu lookup table with the new mappings, even for
1401d4edc5b6SSrivatsa S. Bhat 	 * offline CPUs. It is best to perform this update from the stop-
1402d4edc5b6SSrivatsa S. Bhat 	 * machine context.
1403d4edc5b6SSrivatsa S. Bhat 	 */
14043e401f7aSThiago Jung Bauermann 	if (cpus_locked)
14053e401f7aSThiago Jung Bauermann 		stop_machine_cpuslocked(update_lookup_table, &updates[0],
14063e401f7aSThiago Jung Bauermann 					cpumask_of(raw_smp_processor_id()));
14073e401f7aSThiago Jung Bauermann 	else
1408d4edc5b6SSrivatsa S. Bhat 		stop_machine(update_lookup_table, &updates[0],
1409d4edc5b6SSrivatsa S. Bhat 			     cpumask_of(raw_smp_processor_id()));
1410d4edc5b6SSrivatsa S. Bhat 
141130c05350SNathan Fontenot 	for (ud = &updates[0]; ud; ud = ud->next) {
1412dd023217SNathan Fontenot 		unregister_cpu_under_node(ud->cpu, ud->old_nid);
1413dd023217SNathan Fontenot 		register_cpu_under_node(ud->cpu, ud->new_nid);
1414dd023217SNathan Fontenot 
141530c05350SNathan Fontenot 		dev = get_cpu_device(ud->cpu);
14168a25a2fdSKay Sievers 		if (dev)
14178a25a2fdSKay Sievers 			kobject_uevent(&dev->kobj, KOBJ_CHANGE);
141830c05350SNathan Fontenot 		cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
141979c5fcebSJesse Larrew 		changed = 1;
14209eff1a38SJesse Larrew 	}
14219eff1a38SJesse Larrew 
14229a013361SMichael Wang out:
142330c05350SNathan Fontenot 	kfree(updates);
142479c5fcebSJesse Larrew 	return changed;
14259eff1a38SJesse Larrew }
14269eff1a38SJesse Larrew 
14273e401f7aSThiago Jung Bauermann int arch_update_cpu_topology(void)
14283e401f7aSThiago Jung Bauermann {
14293e401f7aSThiago Jung Bauermann 	return numa_update_cpu_topology(true);
14303e401f7aSThiago Jung Bauermann }
14313e401f7aSThiago Jung Bauermann 
14329eff1a38SJesse Larrew static void topology_work_fn(struct work_struct *work)
14339eff1a38SJesse Larrew {
14349eff1a38SJesse Larrew 	rebuild_sched_domains();
14359eff1a38SJesse Larrew }
14369eff1a38SJesse Larrew static DECLARE_WORK(topology_work, topology_work_fn);
14379eff1a38SJesse Larrew 
1438ec32dd66SRobert Jennings static void topology_schedule_update(void)
14399eff1a38SJesse Larrew {
14409eff1a38SJesse Larrew 	schedule_work(&topology_work);
14419eff1a38SJesse Larrew }
14429eff1a38SJesse Larrew 
1443df7e828cSKees Cook static void topology_timer_fn(struct timer_list *unused)
14449eff1a38SJesse Larrew {
14455d88aa85SJesse Larrew 	if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
14465d88aa85SJesse Larrew 		topology_schedule_update();
14475d88aa85SJesse Larrew 	else if (vphn_enabled) {
14489eff1a38SJesse Larrew 		if (update_cpu_associativity_changes_mask() > 0)
14499eff1a38SJesse Larrew 			topology_schedule_update();
14505d88aa85SJesse Larrew 		reset_topology_timer();
14515d88aa85SJesse Larrew 	}
14529eff1a38SJesse Larrew }
1453df7e828cSKees Cook static struct timer_list topology_timer;
14549eff1a38SJesse Larrew 
14555d88aa85SJesse Larrew static void reset_topology_timer(void)
14569eff1a38SJesse Larrew {
14578604895aSMichael Bringmann 	if (vphn_enabled)
14585b0e2cb0SLinus Torvalds 		mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
14599eff1a38SJesse Larrew }
14609eff1a38SJesse Larrew 
1461601abdc3SNathan Fontenot #ifdef CONFIG_SMP
1462601abdc3SNathan Fontenot 
14635d88aa85SJesse Larrew static void stage_topology_update(int core_id)
14645d88aa85SJesse Larrew {
14655d88aa85SJesse Larrew 	cpumask_or(&cpu_associativity_changes_mask,
14665d88aa85SJesse Larrew 		&cpu_associativity_changes_mask, cpu_sibling_mask(core_id));
14675d88aa85SJesse Larrew 	reset_topology_timer();
14685d88aa85SJesse Larrew }
14695d88aa85SJesse Larrew 
14705d88aa85SJesse Larrew static int dt_update_callback(struct notifier_block *nb,
14715d88aa85SJesse Larrew 				unsigned long action, void *data)
14725d88aa85SJesse Larrew {
1473f5242e5aSGrant Likely 	struct of_reconfig_data *update = data;
14745d88aa85SJesse Larrew 	int rc = NOTIFY_DONE;
14755d88aa85SJesse Larrew 
14765d88aa85SJesse Larrew 	switch (action) {
14775d88aa85SJesse Larrew 	case OF_RECONFIG_UPDATE_PROPERTY:
147830c05350SNathan Fontenot 		if (!of_prop_cmp(update->dn->type, "cpu") &&
147930c05350SNathan Fontenot 		    !of_prop_cmp(update->prop->name, "ibm,associativity")) {
14805d88aa85SJesse Larrew 			u32 core_id;
14815d88aa85SJesse Larrew 			of_property_read_u32(update->dn, "reg", &core_id);
14825d88aa85SJesse Larrew 			stage_topology_update(core_id);
14835d88aa85SJesse Larrew 			rc = NOTIFY_OK;
14845d88aa85SJesse Larrew 		}
14855d88aa85SJesse Larrew 		break;
14865d88aa85SJesse Larrew 	}
14875d88aa85SJesse Larrew 
14885d88aa85SJesse Larrew 	return rc;
14895d88aa85SJesse Larrew }
14905d88aa85SJesse Larrew 
14915d88aa85SJesse Larrew static struct notifier_block dt_update_nb = {
14925d88aa85SJesse Larrew 	.notifier_call = dt_update_callback,
14935d88aa85SJesse Larrew };
14945d88aa85SJesse Larrew 
1495601abdc3SNathan Fontenot #endif
1496601abdc3SNathan Fontenot 
14979eff1a38SJesse Larrew /*
14985d88aa85SJesse Larrew  * Start polling for associativity changes.
14999eff1a38SJesse Larrew  */
15009eff1a38SJesse Larrew int start_topology_update(void)
15019eff1a38SJesse Larrew {
15029eff1a38SJesse Larrew 	int rc = 0;
15039eff1a38SJesse Larrew 
15045d88aa85SJesse Larrew 	if (firmware_has_feature(FW_FEATURE_PRRN)) {
15055d88aa85SJesse Larrew 		if (!prrn_enabled) {
15065d88aa85SJesse Larrew 			prrn_enabled = 1;
1507601abdc3SNathan Fontenot #ifdef CONFIG_SMP
15085d88aa85SJesse Larrew 			rc = of_reconfig_notifier_register(&dt_update_nb);
1509601abdc3SNathan Fontenot #endif
15105d88aa85SJesse Larrew 		}
1511a3496e91SMichael Bringmann 	}
1512a3496e91SMichael Bringmann 	if (firmware_has_feature(FW_FEATURE_VPHN) &&
1513f13c13a0SAnton Blanchard 		   lppaca_shared_proc(get_lppaca())) {
15145d88aa85SJesse Larrew 		if (!vphn_enabled) {
15159eff1a38SJesse Larrew 			vphn_enabled = 1;
15169eff1a38SJesse Larrew 			setup_cpu_associativity_change_counters();
1517df7e828cSKees Cook 			timer_setup(&topology_timer, topology_timer_fn,
1518df7e828cSKees Cook 				    TIMER_DEFERRABLE);
15195d88aa85SJesse Larrew 			reset_topology_timer();
15205d88aa85SJesse Larrew 		}
15219eff1a38SJesse Larrew 	}
15229eff1a38SJesse Larrew 
15239eff1a38SJesse Larrew 	return rc;
15249eff1a38SJesse Larrew }
15259eff1a38SJesse Larrew 
15269eff1a38SJesse Larrew /*
15279eff1a38SJesse Larrew  * Disable polling for VPHN associativity changes.
15289eff1a38SJesse Larrew  */
15299eff1a38SJesse Larrew int stop_topology_update(void)
15309eff1a38SJesse Larrew {
15315d88aa85SJesse Larrew 	int rc = 0;
15325d88aa85SJesse Larrew 
15335d88aa85SJesse Larrew 	if (prrn_enabled) {
15345d88aa85SJesse Larrew 		prrn_enabled = 0;
1535601abdc3SNathan Fontenot #ifdef CONFIG_SMP
15365d88aa85SJesse Larrew 		rc = of_reconfig_notifier_unregister(&dt_update_nb);
1537601abdc3SNathan Fontenot #endif
1538a3496e91SMichael Bringmann 	}
1539a3496e91SMichael Bringmann 	if (vphn_enabled) {
15409eff1a38SJesse Larrew 		vphn_enabled = 0;
15415d88aa85SJesse Larrew 		rc = del_timer_sync(&topology_timer);
15429eff1a38SJesse Larrew 	}
15435d88aa85SJesse Larrew 
15445d88aa85SJesse Larrew 	return rc;
1545ab1f9dacSPaul Mackerras }
1546e04fa612SNathan Fontenot 
1547e04fa612SNathan Fontenot int prrn_is_enabled(void)
1548e04fa612SNathan Fontenot {
1549e04fa612SNathan Fontenot 	return prrn_enabled;
1550e04fa612SNathan Fontenot }
1551e04fa612SNathan Fontenot 
15522ea62630SSrikar Dronamraju void __init shared_proc_topology_init(void)
15532ea62630SSrikar Dronamraju {
15542ea62630SSrikar Dronamraju 	if (lppaca_shared_proc(get_lppaca())) {
15552ea62630SSrikar Dronamraju 		bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask),
15562ea62630SSrikar Dronamraju 			    nr_cpumask_bits);
15572ea62630SSrikar Dronamraju 		numa_update_cpu_topology(false);
15582ea62630SSrikar Dronamraju 	}
15592ea62630SSrikar Dronamraju }
15602ea62630SSrikar Dronamraju 
1561e04fa612SNathan Fontenot static int topology_read(struct seq_file *file, void *v)
1562e04fa612SNathan Fontenot {
1563e04fa612SNathan Fontenot 	if (vphn_enabled || prrn_enabled)
1564e04fa612SNathan Fontenot 		seq_puts(file, "on\n");
1565e04fa612SNathan Fontenot 	else
1566e04fa612SNathan Fontenot 		seq_puts(file, "off\n");
1567e04fa612SNathan Fontenot 
1568e04fa612SNathan Fontenot 	return 0;
1569e04fa612SNathan Fontenot }
1570e04fa612SNathan Fontenot 
1571e04fa612SNathan Fontenot static int topology_open(struct inode *inode, struct file *file)
1572e04fa612SNathan Fontenot {
1573e04fa612SNathan Fontenot 	return single_open(file, topology_read, NULL);
1574e04fa612SNathan Fontenot }
1575e04fa612SNathan Fontenot 
1576e04fa612SNathan Fontenot static ssize_t topology_write(struct file *file, const char __user *buf,
1577e04fa612SNathan Fontenot 			      size_t count, loff_t *off)
1578e04fa612SNathan Fontenot {
1579e04fa612SNathan Fontenot 	char kbuf[4]; /* "on" or "off" plus null. */
1580e04fa612SNathan Fontenot 	int read_len;
1581e04fa612SNathan Fontenot 
1582e04fa612SNathan Fontenot 	read_len = count < 3 ? count : 3;
1583e04fa612SNathan Fontenot 	if (copy_from_user(kbuf, buf, read_len))
1584e04fa612SNathan Fontenot 		return -EINVAL;
1585e04fa612SNathan Fontenot 
1586e04fa612SNathan Fontenot 	kbuf[read_len] = '\0';
1587e04fa612SNathan Fontenot 
1588e04fa612SNathan Fontenot 	if (!strncmp(kbuf, "on", 2))
1589e04fa612SNathan Fontenot 		start_topology_update();
1590e04fa612SNathan Fontenot 	else if (!strncmp(kbuf, "off", 3))
1591e04fa612SNathan Fontenot 		stop_topology_update();
1592e04fa612SNathan Fontenot 	else
1593e04fa612SNathan Fontenot 		return -EINVAL;
1594e04fa612SNathan Fontenot 
1595e04fa612SNathan Fontenot 	return count;
1596e04fa612SNathan Fontenot }
1597e04fa612SNathan Fontenot 
1598e04fa612SNathan Fontenot static const struct file_operations topology_ops = {
1599e04fa612SNathan Fontenot 	.read = seq_read,
1600e04fa612SNathan Fontenot 	.write = topology_write,
1601e04fa612SNathan Fontenot 	.open = topology_open,
1602e04fa612SNathan Fontenot 	.release = single_release
1603e04fa612SNathan Fontenot };
1604e04fa612SNathan Fontenot 
1605e04fa612SNathan Fontenot static int topology_update_init(void)
1606e04fa612SNathan Fontenot {
16072d73bae1SNishanth Aravamudan 	/* Do not poll for changes if disabled at boot */
16082d73bae1SNishanth Aravamudan 	if (topology_updates_enabled)
1609e04fa612SNathan Fontenot 		start_topology_update();
16102d73bae1SNishanth Aravamudan 
161117f444c0SMichael Bringmann 	if (vphn_enabled)
161217f444c0SMichael Bringmann 		topology_schedule_update();
161317f444c0SMichael Bringmann 
16142d15b9b4SNishanth Aravamudan 	if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
16152d15b9b4SNishanth Aravamudan 		return -ENOMEM;
1616e04fa612SNathan Fontenot 
161717f444c0SMichael Bringmann 	topology_inited = 1;
1618e04fa612SNathan Fontenot 	return 0;
1619e04fa612SNathan Fontenot }
1620e04fa612SNathan Fontenot device_initcall(topology_update_init);
162139bf990eSJesse Larrew #endif /* CONFIG_PPC_SPLPAR */
1622