1ab1f9dacSPaul Mackerras /* 2ab1f9dacSPaul Mackerras * pSeries NUMA support 3ab1f9dacSPaul Mackerras * 4ab1f9dacSPaul Mackerras * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 5ab1f9dacSPaul Mackerras * 6ab1f9dacSPaul Mackerras * This program is free software; you can redistribute it and/or 7ab1f9dacSPaul Mackerras * modify it under the terms of the GNU General Public License 8ab1f9dacSPaul Mackerras * as published by the Free Software Foundation; either version 9ab1f9dacSPaul Mackerras * 2 of the License, or (at your option) any later version. 10ab1f9dacSPaul Mackerras */ 112d73bae1SNishanth Aravamudan #define pr_fmt(fmt) "numa: " fmt 122d73bae1SNishanth Aravamudan 13ab1f9dacSPaul Mackerras #include <linux/threads.h> 14ab1f9dacSPaul Mackerras #include <linux/bootmem.h> 15ab1f9dacSPaul Mackerras #include <linux/init.h> 16ab1f9dacSPaul Mackerras #include <linux/mm.h> 17ab1f9dacSPaul Mackerras #include <linux/mmzone.h> 184b16f8e2SPaul Gortmaker #include <linux/export.h> 19ab1f9dacSPaul Mackerras #include <linux/nodemask.h> 20ab1f9dacSPaul Mackerras #include <linux/cpu.h> 21ab1f9dacSPaul Mackerras #include <linux/notifier.h> 2295f72d1eSYinghai Lu #include <linux/memblock.h> 236df1646eSMichael Ellerman #include <linux/of.h> 2406eccea6SDave Hansen #include <linux/pfn.h> 259eff1a38SJesse Larrew #include <linux/cpuset.h> 269eff1a38SJesse Larrew #include <linux/node.h> 2730c05350SNathan Fontenot #include <linux/stop_machine.h> 28e04fa612SNathan Fontenot #include <linux/proc_fs.h> 29e04fa612SNathan Fontenot #include <linux/seq_file.h> 30e04fa612SNathan Fontenot #include <linux/uaccess.h> 31191a7120SLinus Torvalds #include <linux/slab.h> 323be7db6aSRobert Jennings #include <asm/cputhreads.h> 3345fb6ceaSAnton Blanchard #include <asm/sparsemem.h> 34d9b2b2a2SDavid S. Miller #include <asm/prom.h> 352249ca9dSPaul Mackerras #include <asm/smp.h> 36d4edc5b6SSrivatsa S. Bhat #include <asm/cputhreads.h> 37d4edc5b6SSrivatsa S. Bhat #include <asm/topology.h> 389eff1a38SJesse Larrew #include <asm/firmware.h> 399eff1a38SJesse Larrew #include <asm/paca.h> 4039bf990eSJesse Larrew #include <asm/hvcall.h> 41ae3a197eSDavid Howells #include <asm/setup.h> 42176bbf14SJesse Larrew #include <asm/vdso.h> 43514a9cb3SNathan Fontenot #include <asm/drmem.h> 44ab1f9dacSPaul Mackerras 45ab1f9dacSPaul Mackerras static int numa_enabled = 1; 46ab1f9dacSPaul Mackerras 471daa6d08SBalbir Singh static char *cmdline __initdata; 481daa6d08SBalbir Singh 49ab1f9dacSPaul Mackerras static int numa_debug; 50ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 51ab1f9dacSPaul Mackerras 5245fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS]; 5325863de0SAnton Blanchard cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; 54ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES]; 5545fb6ceaSAnton Blanchard 5645fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table); 5725863de0SAnton Blanchard EXPORT_SYMBOL(node_to_cpumask_map); 5845fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data); 5945fb6ceaSAnton Blanchard 60ab1f9dacSPaul Mackerras static int min_common_depth; 61237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells; 6241eab6f8SAnton Blanchard static int form1_affinity; 6341eab6f8SAnton Blanchard 6441eab6f8SAnton Blanchard #define MAX_DISTANCE_REF_POINTS 4 6541eab6f8SAnton Blanchard static int distance_ref_points_depth; 66b08a2a12SAlistair Popple static const __be32 *distance_ref_points; 6741eab6f8SAnton Blanchard static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; 68ab1f9dacSPaul Mackerras 6925863de0SAnton Blanchard /* 7025863de0SAnton Blanchard * Allocate node_to_cpumask_map based on number of available nodes 7125863de0SAnton Blanchard * Requires node_possible_map to be valid. 7225863de0SAnton Blanchard * 739512938bSWanlong Gao * Note: cpumask_of_node() is not valid until after this is done. 7425863de0SAnton Blanchard */ 7525863de0SAnton Blanchard static void __init setup_node_to_cpumask_map(void) 7625863de0SAnton Blanchard { 77f9d531b8SCody P Schafer unsigned int node; 7825863de0SAnton Blanchard 7925863de0SAnton Blanchard /* setup nr_node_ids if not done yet */ 80f9d531b8SCody P Schafer if (nr_node_ids == MAX_NUMNODES) 81f9d531b8SCody P Schafer setup_nr_node_ids(); 8225863de0SAnton Blanchard 8325863de0SAnton Blanchard /* allocate the map */ 84c118baf8SRaghavendra K T for_each_node(node) 8525863de0SAnton Blanchard alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); 8625863de0SAnton Blanchard 8725863de0SAnton Blanchard /* cpumask_of_node() will now work */ 8825863de0SAnton Blanchard dbg("Node to cpumask map for %d nodes\n", nr_node_ids); 8925863de0SAnton Blanchard } 9025863de0SAnton Blanchard 9155671f3cSStephen Rothwell static int __init fake_numa_create_new_node(unsigned long end_pfn, 921daa6d08SBalbir Singh unsigned int *nid) 931daa6d08SBalbir Singh { 941daa6d08SBalbir Singh unsigned long long mem; 951daa6d08SBalbir Singh char *p = cmdline; 961daa6d08SBalbir Singh static unsigned int fake_nid; 971daa6d08SBalbir Singh static unsigned long long curr_boundary; 981daa6d08SBalbir Singh 991daa6d08SBalbir Singh /* 1001daa6d08SBalbir Singh * Modify node id, iff we started creating NUMA nodes 1011daa6d08SBalbir Singh * We want to continue from where we left of the last time 1021daa6d08SBalbir Singh */ 1031daa6d08SBalbir Singh if (fake_nid) 1041daa6d08SBalbir Singh *nid = fake_nid; 1051daa6d08SBalbir Singh /* 1061daa6d08SBalbir Singh * In case there are no more arguments to parse, the 1071daa6d08SBalbir Singh * node_id should be the same as the last fake node id 1081daa6d08SBalbir Singh * (we've handled this above). 1091daa6d08SBalbir Singh */ 1101daa6d08SBalbir Singh if (!p) 1111daa6d08SBalbir Singh return 0; 1121daa6d08SBalbir Singh 1131daa6d08SBalbir Singh mem = memparse(p, &p); 1141daa6d08SBalbir Singh if (!mem) 1151daa6d08SBalbir Singh return 0; 1161daa6d08SBalbir Singh 1171daa6d08SBalbir Singh if (mem < curr_boundary) 1181daa6d08SBalbir Singh return 0; 1191daa6d08SBalbir Singh 1201daa6d08SBalbir Singh curr_boundary = mem; 1211daa6d08SBalbir Singh 1221daa6d08SBalbir Singh if ((end_pfn << PAGE_SHIFT) > mem) { 1231daa6d08SBalbir Singh /* 1241daa6d08SBalbir Singh * Skip commas and spaces 1251daa6d08SBalbir Singh */ 1261daa6d08SBalbir Singh while (*p == ',' || *p == ' ' || *p == '\t') 1271daa6d08SBalbir Singh p++; 1281daa6d08SBalbir Singh 1291daa6d08SBalbir Singh cmdline = p; 1301daa6d08SBalbir Singh fake_nid++; 1311daa6d08SBalbir Singh *nid = fake_nid; 1321daa6d08SBalbir Singh dbg("created new fake_node with id %d\n", fake_nid); 1331daa6d08SBalbir Singh return 1; 1341daa6d08SBalbir Singh } 1351daa6d08SBalbir Singh return 0; 1361daa6d08SBalbir Singh } 1371daa6d08SBalbir Singh 138d4edc5b6SSrivatsa S. Bhat static void reset_numa_cpu_lookup_table(void) 139d4edc5b6SSrivatsa S. Bhat { 140d4edc5b6SSrivatsa S. Bhat unsigned int cpu; 141d4edc5b6SSrivatsa S. Bhat 142d4edc5b6SSrivatsa S. Bhat for_each_possible_cpu(cpu) 143d4edc5b6SSrivatsa S. Bhat numa_cpu_lookup_table[cpu] = -1; 144d4edc5b6SSrivatsa S. Bhat } 145d4edc5b6SSrivatsa S. Bhat 146d4edc5b6SSrivatsa S. Bhat static void map_cpu_to_node(int cpu, int node) 147d4edc5b6SSrivatsa S. Bhat { 148d4edc5b6SSrivatsa S. Bhat update_numa_cpu_lookup_table(cpu, node); 14945fb6ceaSAnton Blanchard 150bf4b85b0SNathan Lynch dbg("adding cpu %d to node %d\n", cpu, node); 151bf4b85b0SNathan Lynch 15225863de0SAnton Blanchard if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) 15325863de0SAnton Blanchard cpumask_set_cpu(cpu, node_to_cpumask_map[node]); 154ab1f9dacSPaul Mackerras } 155ab1f9dacSPaul Mackerras 15639bf990eSJesse Larrew #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR) 157ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu) 158ab1f9dacSPaul Mackerras { 159ab1f9dacSPaul Mackerras int node = numa_cpu_lookup_table[cpu]; 160ab1f9dacSPaul Mackerras 161ab1f9dacSPaul Mackerras dbg("removing cpu %lu from node %d\n", cpu, node); 162ab1f9dacSPaul Mackerras 16325863de0SAnton Blanchard if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { 164429f4d8dSAnton Blanchard cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); 165ab1f9dacSPaul Mackerras } else { 166ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 167ab1f9dacSPaul Mackerras cpu, node); 168ab1f9dacSPaul Mackerras } 169ab1f9dacSPaul Mackerras } 17039bf990eSJesse Larrew #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ 171ab1f9dacSPaul Mackerras 172ab1f9dacSPaul Mackerras /* must hold reference to node during call */ 173b08a2a12SAlistair Popple static const __be32 *of_get_associativity(struct device_node *dev) 174ab1f9dacSPaul Mackerras { 175e2eb6392SStephen Rothwell return of_get_property(dev, "ibm,associativity", NULL); 176ab1f9dacSPaul Mackerras } 177ab1f9dacSPaul Mackerras 17841eab6f8SAnton Blanchard int __node_distance(int a, int b) 17941eab6f8SAnton Blanchard { 18041eab6f8SAnton Blanchard int i; 18141eab6f8SAnton Blanchard int distance = LOCAL_DISTANCE; 18241eab6f8SAnton Blanchard 18341eab6f8SAnton Blanchard if (!form1_affinity) 1847122beeeSVaidyanathan Srinivasan return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE); 18541eab6f8SAnton Blanchard 18641eab6f8SAnton Blanchard for (i = 0; i < distance_ref_points_depth; i++) { 18741eab6f8SAnton Blanchard if (distance_lookup_table[a][i] == distance_lookup_table[b][i]) 18841eab6f8SAnton Blanchard break; 18941eab6f8SAnton Blanchard 19041eab6f8SAnton Blanchard /* Double the distance for each NUMA level */ 19141eab6f8SAnton Blanchard distance *= 2; 19241eab6f8SAnton Blanchard } 19341eab6f8SAnton Blanchard 19441eab6f8SAnton Blanchard return distance; 19541eab6f8SAnton Blanchard } 19612c743ebSMike Qiu EXPORT_SYMBOL(__node_distance); 19741eab6f8SAnton Blanchard 19841eab6f8SAnton Blanchard static void initialize_distance_lookup_table(int nid, 199b08a2a12SAlistair Popple const __be32 *associativity) 20041eab6f8SAnton Blanchard { 20141eab6f8SAnton Blanchard int i; 20241eab6f8SAnton Blanchard 20341eab6f8SAnton Blanchard if (!form1_affinity) 20441eab6f8SAnton Blanchard return; 20541eab6f8SAnton Blanchard 20641eab6f8SAnton Blanchard for (i = 0; i < distance_ref_points_depth; i++) { 207b08a2a12SAlistair Popple const __be32 *entry; 208b08a2a12SAlistair Popple 2091d805440SNikunj A Dadhania entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1]; 210b08a2a12SAlistair Popple distance_lookup_table[nid][i] = of_read_number(entry, 1); 21141eab6f8SAnton Blanchard } 21241eab6f8SAnton Blanchard } 21341eab6f8SAnton Blanchard 214482ec7c4SNathan Lynch /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa 215482ec7c4SNathan Lynch * info is found. 216482ec7c4SNathan Lynch */ 217b08a2a12SAlistair Popple static int associativity_to_nid(const __be32 *associativity) 218ab1f9dacSPaul Mackerras { 219482ec7c4SNathan Lynch int nid = -1; 220ab1f9dacSPaul Mackerras 221ab1f9dacSPaul Mackerras if (min_common_depth == -1) 222482ec7c4SNathan Lynch goto out; 223ab1f9dacSPaul Mackerras 224b08a2a12SAlistair Popple if (of_read_number(associativity, 1) >= min_common_depth) 225b08a2a12SAlistair Popple nid = of_read_number(&associativity[min_common_depth], 1); 226bc16a759SNathan Lynch 227bc16a759SNathan Lynch /* POWER4 LPAR uses 0xffff as invalid node */ 228482ec7c4SNathan Lynch if (nid == 0xffff || nid >= MAX_NUMNODES) 229482ec7c4SNathan Lynch nid = -1; 23041eab6f8SAnton Blanchard 231b08a2a12SAlistair Popple if (nid > 0 && 2321d805440SNikunj A Dadhania of_read_number(associativity, 1) >= distance_ref_points_depth) { 2331d805440SNikunj A Dadhania /* 2341d805440SNikunj A Dadhania * Skip the length field and send start of associativity array 2351d805440SNikunj A Dadhania */ 2361d805440SNikunj A Dadhania initialize_distance_lookup_table(nid, associativity + 1); 2371d805440SNikunj A Dadhania } 23841eab6f8SAnton Blanchard 239482ec7c4SNathan Lynch out: 240cf950b7aSNathan Lynch return nid; 241ab1f9dacSPaul Mackerras } 242ab1f9dacSPaul Mackerras 2439eff1a38SJesse Larrew /* Returns the nid associated with the given device tree node, 2449eff1a38SJesse Larrew * or -1 if not found. 2459eff1a38SJesse Larrew */ 2469eff1a38SJesse Larrew static int of_node_to_nid_single(struct device_node *device) 2479eff1a38SJesse Larrew { 2489eff1a38SJesse Larrew int nid = -1; 249b08a2a12SAlistair Popple const __be32 *tmp; 2509eff1a38SJesse Larrew 2519eff1a38SJesse Larrew tmp = of_get_associativity(device); 2529eff1a38SJesse Larrew if (tmp) 2539eff1a38SJesse Larrew nid = associativity_to_nid(tmp); 2549eff1a38SJesse Larrew return nid; 2559eff1a38SJesse Larrew } 2569eff1a38SJesse Larrew 257953039c8SJeremy Kerr /* Walk the device tree upwards, looking for an associativity id */ 258953039c8SJeremy Kerr int of_node_to_nid(struct device_node *device) 259953039c8SJeremy Kerr { 260953039c8SJeremy Kerr int nid = -1; 261953039c8SJeremy Kerr 262953039c8SJeremy Kerr of_node_get(device); 263953039c8SJeremy Kerr while (device) { 264953039c8SJeremy Kerr nid = of_node_to_nid_single(device); 265953039c8SJeremy Kerr if (nid != -1) 266953039c8SJeremy Kerr break; 267953039c8SJeremy Kerr 2681def3758SChristophe Jaillet device = of_get_next_parent(device); 269953039c8SJeremy Kerr } 270953039c8SJeremy Kerr of_node_put(device); 271953039c8SJeremy Kerr 272953039c8SJeremy Kerr return nid; 273953039c8SJeremy Kerr } 274be9ba9ffSShailendra Singh EXPORT_SYMBOL(of_node_to_nid); 275953039c8SJeremy Kerr 276ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void) 277ab1f9dacSPaul Mackerras { 27841eab6f8SAnton Blanchard int depth; 279e70606ebSMichael Ellerman struct device_node *root; 280ab1f9dacSPaul Mackerras 2811c8ee733SDipankar Sarma if (firmware_has_feature(FW_FEATURE_OPAL)) 2821c8ee733SDipankar Sarma root = of_find_node_by_path("/ibm,opal"); 2831c8ee733SDipankar Sarma else 284e70606ebSMichael Ellerman root = of_find_node_by_path("/rtas"); 285e70606ebSMichael Ellerman if (!root) 286e70606ebSMichael Ellerman root = of_find_node_by_path("/"); 287ab1f9dacSPaul Mackerras 288ab1f9dacSPaul Mackerras /* 28941eab6f8SAnton Blanchard * This property is a set of 32-bit integers, each representing 29041eab6f8SAnton Blanchard * an index into the ibm,associativity nodes. 29141eab6f8SAnton Blanchard * 29241eab6f8SAnton Blanchard * With form 0 affinity the first integer is for an SMP configuration 29341eab6f8SAnton Blanchard * (should be all 0's) and the second is for a normal NUMA 29441eab6f8SAnton Blanchard * configuration. We have only one level of NUMA. 29541eab6f8SAnton Blanchard * 29641eab6f8SAnton Blanchard * With form 1 affinity the first integer is the most significant 29741eab6f8SAnton Blanchard * NUMA boundary and the following are progressively less significant 29841eab6f8SAnton Blanchard * boundaries. There can be more than one level of NUMA. 299ab1f9dacSPaul Mackerras */ 300e70606ebSMichael Ellerman distance_ref_points = of_get_property(root, 30141eab6f8SAnton Blanchard "ibm,associativity-reference-points", 30241eab6f8SAnton Blanchard &distance_ref_points_depth); 303ab1f9dacSPaul Mackerras 30441eab6f8SAnton Blanchard if (!distance_ref_points) { 30541eab6f8SAnton Blanchard dbg("NUMA: ibm,associativity-reference-points not found.\n"); 30641eab6f8SAnton Blanchard goto err; 30741eab6f8SAnton Blanchard } 30841eab6f8SAnton Blanchard 30941eab6f8SAnton Blanchard distance_ref_points_depth /= sizeof(int); 31041eab6f8SAnton Blanchard 3118002b0c5SNathan Fontenot if (firmware_has_feature(FW_FEATURE_OPAL) || 3128002b0c5SNathan Fontenot firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) { 313bc8449ccSAnton Blanchard dbg("Using form 1 affinity\n"); 31441eab6f8SAnton Blanchard form1_affinity = 1; 3154b83c330SAnton Blanchard } 3165b958a7eSGavin Shan 31741eab6f8SAnton Blanchard if (form1_affinity) { 318b08a2a12SAlistair Popple depth = of_read_number(distance_ref_points, 1); 319ab1f9dacSPaul Mackerras } else { 32041eab6f8SAnton Blanchard if (distance_ref_points_depth < 2) { 32141eab6f8SAnton Blanchard printk(KERN_WARNING "NUMA: " 32241eab6f8SAnton Blanchard "short ibm,associativity-reference-points\n"); 32341eab6f8SAnton Blanchard goto err; 324ab1f9dacSPaul Mackerras } 325ab1f9dacSPaul Mackerras 326b08a2a12SAlistair Popple depth = of_read_number(&distance_ref_points[1], 1); 32741eab6f8SAnton Blanchard } 32841eab6f8SAnton Blanchard 32941eab6f8SAnton Blanchard /* 33041eab6f8SAnton Blanchard * Warn and cap if the hardware supports more than 33141eab6f8SAnton Blanchard * MAX_DISTANCE_REF_POINTS domains. 33241eab6f8SAnton Blanchard */ 33341eab6f8SAnton Blanchard if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) { 33441eab6f8SAnton Blanchard printk(KERN_WARNING "NUMA: distance array capped at " 33541eab6f8SAnton Blanchard "%d entries\n", MAX_DISTANCE_REF_POINTS); 33641eab6f8SAnton Blanchard distance_ref_points_depth = MAX_DISTANCE_REF_POINTS; 33741eab6f8SAnton Blanchard } 33841eab6f8SAnton Blanchard 339e70606ebSMichael Ellerman of_node_put(root); 340ab1f9dacSPaul Mackerras return depth; 34141eab6f8SAnton Blanchard 34241eab6f8SAnton Blanchard err: 343e70606ebSMichael Ellerman of_node_put(root); 34441eab6f8SAnton Blanchard return -1; 345ab1f9dacSPaul Mackerras } 346ab1f9dacSPaul Mackerras 34784c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 348ab1f9dacSPaul Mackerras { 349ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 350ab1f9dacSPaul Mackerras 351ab1f9dacSPaul Mackerras memory = of_find_node_by_type(memory, "memory"); 35254c23310SPaul Mackerras if (!memory) 35384c9fdd1SMike Kravetz panic("numa.c: No memory nodes found!"); 35454c23310SPaul Mackerras 355a8bda5ddSStephen Rothwell *n_addr_cells = of_n_addr_cells(memory); 3569213feeaSStephen Rothwell *n_size_cells = of_n_size_cells(memory); 35784c9fdd1SMike Kravetz of_node_put(memory); 358ab1f9dacSPaul Mackerras } 359ab1f9dacSPaul Mackerras 360b08a2a12SAlistair Popple static unsigned long read_n_cells(int n, const __be32 **buf) 361ab1f9dacSPaul Mackerras { 362ab1f9dacSPaul Mackerras unsigned long result = 0; 363ab1f9dacSPaul Mackerras 364ab1f9dacSPaul Mackerras while (n--) { 365b08a2a12SAlistair Popple result = (result << 32) | of_read_number(*buf, 1); 366ab1f9dacSPaul Mackerras (*buf)++; 367ab1f9dacSPaul Mackerras } 368ab1f9dacSPaul Mackerras return result; 369ab1f9dacSPaul Mackerras } 370ab1f9dacSPaul Mackerras 3718342681dSNathan Fontenot struct assoc_arrays { 3728342681dSNathan Fontenot u32 n_arrays; 3738342681dSNathan Fontenot u32 array_sz; 374b08a2a12SAlistair Popple const __be32 *arrays; 3758342681dSNathan Fontenot }; 3768342681dSNathan Fontenot 3778342681dSNathan Fontenot /* 37825985edcSLucas De Marchi * Retrieve and validate the list of associativity arrays for drconf 3798342681dSNathan Fontenot * memory from the ibm,associativity-lookup-arrays property of the 3808342681dSNathan Fontenot * device tree.. 3818342681dSNathan Fontenot * 3828342681dSNathan Fontenot * The layout of the ibm,associativity-lookup-arrays property is a number N 3838342681dSNathan Fontenot * indicating the number of associativity arrays, followed by a number M 3848342681dSNathan Fontenot * indicating the size of each associativity array, followed by a list 3858342681dSNathan Fontenot * of N associativity arrays. 3868342681dSNathan Fontenot */ 38735f80debSNathan Fontenot static int of_get_assoc_arrays(struct assoc_arrays *aa) 3888342681dSNathan Fontenot { 38935f80debSNathan Fontenot struct device_node *memory; 390b08a2a12SAlistair Popple const __be32 *prop; 3918342681dSNathan Fontenot u32 len; 3928342681dSNathan Fontenot 39335f80debSNathan Fontenot memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 39435f80debSNathan Fontenot if (!memory) 3958342681dSNathan Fontenot return -1; 3968342681dSNathan Fontenot 39735f80debSNathan Fontenot prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len); 39835f80debSNathan Fontenot if (!prop || len < 2 * sizeof(unsigned int)) { 39935f80debSNathan Fontenot of_node_put(memory); 40035f80debSNathan Fontenot return -1; 40135f80debSNathan Fontenot } 40235f80debSNathan Fontenot 403b08a2a12SAlistair Popple aa->n_arrays = of_read_number(prop++, 1); 404b08a2a12SAlistair Popple aa->array_sz = of_read_number(prop++, 1); 4058342681dSNathan Fontenot 40635f80debSNathan Fontenot of_node_put(memory); 40735f80debSNathan Fontenot 40842b2aa86SJustin P. Mattock /* Now that we know the number of arrays and size of each array, 4098342681dSNathan Fontenot * revalidate the size of the property read in. 4108342681dSNathan Fontenot */ 4118342681dSNathan Fontenot if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int)) 4128342681dSNathan Fontenot return -1; 4138342681dSNathan Fontenot 4148342681dSNathan Fontenot aa->arrays = prop; 4158342681dSNathan Fontenot return 0; 4168342681dSNathan Fontenot } 4178342681dSNathan Fontenot 4188342681dSNathan Fontenot /* 4198342681dSNathan Fontenot * This is like of_node_to_nid_single() for memory represented in the 4208342681dSNathan Fontenot * ibm,dynamic-reconfiguration-memory node. 4218342681dSNathan Fontenot */ 422514a9cb3SNathan Fontenot static int of_drconf_to_nid_single(struct drmem_lmb *lmb) 4238342681dSNathan Fontenot { 424b88fc309SNathan Fontenot struct assoc_arrays aa = { .arrays = NULL }; 4258342681dSNathan Fontenot int default_nid = 0; 4268342681dSNathan Fontenot int nid = default_nid; 427b88fc309SNathan Fontenot int rc, index; 4288342681dSNathan Fontenot 429b88fc309SNathan Fontenot rc = of_get_assoc_arrays(&aa); 430b88fc309SNathan Fontenot if (rc) 431b88fc309SNathan Fontenot return default_nid; 432b88fc309SNathan Fontenot 433b88fc309SNathan Fontenot if (min_common_depth > 0 && min_common_depth <= aa.array_sz && 434514a9cb3SNathan Fontenot !(lmb->flags & DRCONF_MEM_AI_INVALID) && 435514a9cb3SNathan Fontenot lmb->aa_index < aa.n_arrays) { 436514a9cb3SNathan Fontenot index = lmb->aa_index * aa.array_sz + min_common_depth - 1; 437b88fc309SNathan Fontenot nid = of_read_number(&aa.arrays[index], 1); 4388342681dSNathan Fontenot 4398342681dSNathan Fontenot if (nid == 0xffff || nid >= MAX_NUMNODES) 4408342681dSNathan Fontenot nid = default_nid; 4411d805440SNikunj A Dadhania 4421d805440SNikunj A Dadhania if (nid > 0) { 443514a9cb3SNathan Fontenot index = lmb->aa_index * aa.array_sz; 4441d805440SNikunj A Dadhania initialize_distance_lookup_table(nid, 445b88fc309SNathan Fontenot &aa.arrays[index]); 4461d805440SNikunj A Dadhania } 4478342681dSNathan Fontenot } 4488342681dSNathan Fontenot 4498342681dSNathan Fontenot return nid; 4508342681dSNathan Fontenot } 4518342681dSNathan Fontenot 452ab1f9dacSPaul Mackerras /* 453ab1f9dacSPaul Mackerras * Figure out to which domain a cpu belongs and stick it there. 454ab1f9dacSPaul Mackerras * Return the id of the domain used. 455ab1f9dacSPaul Mackerras */ 456061d19f2SPaul Gortmaker static int numa_setup_cpu(unsigned long lcpu) 457ab1f9dacSPaul Mackerras { 458297cf502SLi Zhong int nid = -1; 459d4edc5b6SSrivatsa S. Bhat struct device_node *cpu; 460d4edc5b6SSrivatsa S. Bhat 461d4edc5b6SSrivatsa S. Bhat /* 462d4edc5b6SSrivatsa S. Bhat * If a valid cpu-to-node mapping is already available, use it 463d4edc5b6SSrivatsa S. Bhat * directly instead of querying the firmware, since it represents 464d4edc5b6SSrivatsa S. Bhat * the most recent mapping notified to us by the platform (eg: VPHN). 465d4edc5b6SSrivatsa S. Bhat */ 466d4edc5b6SSrivatsa S. Bhat if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) { 467d4edc5b6SSrivatsa S. Bhat map_cpu_to_node(lcpu, nid); 468d4edc5b6SSrivatsa S. Bhat return nid; 469d4edc5b6SSrivatsa S. Bhat } 470d4edc5b6SSrivatsa S. Bhat 471d4edc5b6SSrivatsa S. Bhat cpu = of_get_cpu_node(lcpu, NULL); 472ab1f9dacSPaul Mackerras 473ab1f9dacSPaul Mackerras if (!cpu) { 474ab1f9dacSPaul Mackerras WARN_ON(1); 475297cf502SLi Zhong if (cpu_present(lcpu)) 476297cf502SLi Zhong goto out_present; 477297cf502SLi Zhong else 478ab1f9dacSPaul Mackerras goto out; 479ab1f9dacSPaul Mackerras } 480ab1f9dacSPaul Mackerras 481953039c8SJeremy Kerr nid = of_node_to_nid_single(cpu); 482ab1f9dacSPaul Mackerras 483297cf502SLi Zhong out_present: 484ea05ba7cSMichael Bringmann if (nid < 0 || !node_possible(nid)) 48572c33688SH Hartley Sweeten nid = first_online_node; 486297cf502SLi Zhong 487cf950b7aSNathan Lynch map_cpu_to_node(lcpu, nid); 488ab1f9dacSPaul Mackerras of_node_put(cpu); 489297cf502SLi Zhong out: 490cf950b7aSNathan Lynch return nid; 491ab1f9dacSPaul Mackerras } 492ab1f9dacSPaul Mackerras 49368fb18aaSSrivatsa S. Bhat static void verify_cpu_node_mapping(int cpu, int node) 49468fb18aaSSrivatsa S. Bhat { 49568fb18aaSSrivatsa S. Bhat int base, sibling, i; 49668fb18aaSSrivatsa S. Bhat 49768fb18aaSSrivatsa S. Bhat /* Verify that all the threads in the core belong to the same node */ 49868fb18aaSSrivatsa S. Bhat base = cpu_first_thread_sibling(cpu); 49968fb18aaSSrivatsa S. Bhat 50068fb18aaSSrivatsa S. Bhat for (i = 0; i < threads_per_core; i++) { 50168fb18aaSSrivatsa S. Bhat sibling = base + i; 50268fb18aaSSrivatsa S. Bhat 50368fb18aaSSrivatsa S. Bhat if (sibling == cpu || cpu_is_offline(sibling)) 50468fb18aaSSrivatsa S. Bhat continue; 50568fb18aaSSrivatsa S. Bhat 50668fb18aaSSrivatsa S. Bhat if (cpu_to_node(sibling) != node) { 50768fb18aaSSrivatsa S. Bhat WARN(1, "CPU thread siblings %d and %d don't belong" 50868fb18aaSSrivatsa S. Bhat " to the same node!\n", cpu, sibling); 50968fb18aaSSrivatsa S. Bhat break; 51068fb18aaSSrivatsa S. Bhat } 51168fb18aaSSrivatsa S. Bhat } 51268fb18aaSSrivatsa S. Bhat } 51368fb18aaSSrivatsa S. Bhat 514bdab88e0SSebastian Andrzej Siewior /* Must run before sched domains notifier. */ 515bdab88e0SSebastian Andrzej Siewior static int ppc_numa_cpu_prepare(unsigned int cpu) 516ab1f9dacSPaul Mackerras { 517bdab88e0SSebastian Andrzej Siewior int nid; 518ab1f9dacSPaul Mackerras 519bdab88e0SSebastian Andrzej Siewior nid = numa_setup_cpu(cpu); 520bdab88e0SSebastian Andrzej Siewior verify_cpu_node_mapping(cpu, nid); 521bdab88e0SSebastian Andrzej Siewior return 0; 522ab1f9dacSPaul Mackerras } 523bdab88e0SSebastian Andrzej Siewior 524bdab88e0SSebastian Andrzej Siewior static int ppc_numa_cpu_dead(unsigned int cpu) 525bdab88e0SSebastian Andrzej Siewior { 526bdab88e0SSebastian Andrzej Siewior #ifdef CONFIG_HOTPLUG_CPU 527bdab88e0SSebastian Andrzej Siewior unmap_cpu_from_node(cpu); 528bdab88e0SSebastian Andrzej Siewior #endif 529bdab88e0SSebastian Andrzej Siewior return 0; 530ab1f9dacSPaul Mackerras } 531ab1f9dacSPaul Mackerras 532ab1f9dacSPaul Mackerras /* 533ab1f9dacSPaul Mackerras * Check and possibly modify a memory region to enforce the memory limit. 534ab1f9dacSPaul Mackerras * 535ab1f9dacSPaul Mackerras * Returns the size the region should have to enforce the memory limit. 536ab1f9dacSPaul Mackerras * This will either be the original value of size, a truncated value, 537ab1f9dacSPaul Mackerras * or zero. If the returned value of size is 0 the region should be 53825985edcSLucas De Marchi * discarded as it lies wholly above the memory limit. 539ab1f9dacSPaul Mackerras */ 54045fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start, 54145fb6ceaSAnton Blanchard unsigned long size) 542ab1f9dacSPaul Mackerras { 543ab1f9dacSPaul Mackerras /* 54495f72d1eSYinghai Lu * We use memblock_end_of_DRAM() in here instead of memory_limit because 545ab1f9dacSPaul Mackerras * we've already adjusted it for the limit and it takes care of 546fe55249dSMilton Miller * having memory holes below the limit. Also, in the case of 547fe55249dSMilton Miller * iommu_is_off, memory_limit is not set but is implicitly enforced. 548ab1f9dacSPaul Mackerras */ 549ab1f9dacSPaul Mackerras 55095f72d1eSYinghai Lu if (start + size <= memblock_end_of_DRAM()) 551ab1f9dacSPaul Mackerras return size; 552ab1f9dacSPaul Mackerras 55395f72d1eSYinghai Lu if (start >= memblock_end_of_DRAM()) 554ab1f9dacSPaul Mackerras return 0; 555ab1f9dacSPaul Mackerras 55695f72d1eSYinghai Lu return memblock_end_of_DRAM() - start; 557ab1f9dacSPaul Mackerras } 558ab1f9dacSPaul Mackerras 5590204568aSPaul Mackerras /* 560cf00085dSChandru * Reads the counter for a given entry in 561cf00085dSChandru * linux,drconf-usable-memory property 562cf00085dSChandru */ 563b08a2a12SAlistair Popple static inline int __init read_usm_ranges(const __be32 **usm) 564cf00085dSChandru { 565cf00085dSChandru /* 5663fdfd990SBenjamin Herrenschmidt * For each lmb in ibm,dynamic-memory a corresponding 567cf00085dSChandru * entry in linux,drconf-usable-memory property contains 568cf00085dSChandru * a counter followed by that many (base, size) duple. 569cf00085dSChandru * read the counter from linux,drconf-usable-memory 570cf00085dSChandru */ 571cf00085dSChandru return read_n_cells(n_mem_size_cells, usm); 572cf00085dSChandru } 573cf00085dSChandru 574cf00085dSChandru /* 5750204568aSPaul Mackerras * Extract NUMA information from the ibm,dynamic-reconfiguration-memory 5760204568aSPaul Mackerras * node. This assumes n_mem_{addr,size}_cells have been set. 5770204568aSPaul Mackerras */ 578514a9cb3SNathan Fontenot static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, 579514a9cb3SNathan Fontenot const __be32 **usm) 5800204568aSPaul Mackerras { 581514a9cb3SNathan Fontenot unsigned int ranges, is_kexec_kdump = 0; 582514a9cb3SNathan Fontenot unsigned long base, size, sz; 5838342681dSNathan Fontenot int nid; 5840204568aSPaul Mackerras 585514a9cb3SNathan Fontenot /* 586514a9cb3SNathan Fontenot * Skip this block if the reserved bit is set in flags (0x80) 587514a9cb3SNathan Fontenot * or if the block is not assigned to this partition (0x8) 588514a9cb3SNathan Fontenot */ 589514a9cb3SNathan Fontenot if ((lmb->flags & DRCONF_MEM_RESERVED) 590514a9cb3SNathan Fontenot || !(lmb->flags & DRCONF_MEM_ASSIGNED)) 5910204568aSPaul Mackerras return; 5920204568aSPaul Mackerras 593514a9cb3SNathan Fontenot if (*usm) 594cf00085dSChandru is_kexec_kdump = 1; 595cf00085dSChandru 596514a9cb3SNathan Fontenot base = lmb->base_addr; 597514a9cb3SNathan Fontenot size = drmem_lmb_size(); 598cf00085dSChandru ranges = 1; 5998342681dSNathan Fontenot 600cf00085dSChandru if (is_kexec_kdump) { 601514a9cb3SNathan Fontenot ranges = read_usm_ranges(usm); 602cf00085dSChandru if (!ranges) /* there are no (base, size) duple */ 603514a9cb3SNathan Fontenot return; 604cf00085dSChandru } 605514a9cb3SNathan Fontenot 606cf00085dSChandru do { 607cf00085dSChandru if (is_kexec_kdump) { 608514a9cb3SNathan Fontenot base = read_n_cells(n_mem_addr_cells, usm); 609514a9cb3SNathan Fontenot size = read_n_cells(n_mem_size_cells, usm); 610cf00085dSChandru } 611514a9cb3SNathan Fontenot 612514a9cb3SNathan Fontenot nid = of_drconf_to_nid_single(lmb); 613514a9cb3SNathan Fontenot fake_numa_create_new_node(((base + size) >> PAGE_SHIFT), 614cf00085dSChandru &nid); 615cf00085dSChandru node_set_online(nid); 616cf00085dSChandru sz = numa_enforce_memory_limit(base, size); 617cf00085dSChandru if (sz) 618514a9cb3SNathan Fontenot memblock_set_node(base, sz, &memblock.memory, nid); 619cf00085dSChandru } while (--ranges); 6200204568aSPaul Mackerras } 6210204568aSPaul Mackerras 622ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void) 623ab1f9dacSPaul Mackerras { 62494db7c5eSAnton Blanchard struct device_node *memory; 625482ec7c4SNathan Lynch int default_nid = 0; 626ab1f9dacSPaul Mackerras unsigned long i; 627ab1f9dacSPaul Mackerras 628ab1f9dacSPaul Mackerras if (numa_enabled == 0) { 629ab1f9dacSPaul Mackerras printk(KERN_WARNING "NUMA disabled by user\n"); 630ab1f9dacSPaul Mackerras return -1; 631ab1f9dacSPaul Mackerras } 632ab1f9dacSPaul Mackerras 633ab1f9dacSPaul Mackerras min_common_depth = find_min_common_depth(); 634ab1f9dacSPaul Mackerras 635ab1f9dacSPaul Mackerras if (min_common_depth < 0) 636ab1f9dacSPaul Mackerras return min_common_depth; 637ab1f9dacSPaul Mackerras 638bf4b85b0SNathan Lynch dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 639bf4b85b0SNathan Lynch 640ab1f9dacSPaul Mackerras /* 641482ec7c4SNathan Lynch * Even though we connect cpus to numa domains later in SMP 642482ec7c4SNathan Lynch * init, we need to know the node ids now. This is because 643482ec7c4SNathan Lynch * each node to be onlined must have NODE_DATA etc backing it. 644ab1f9dacSPaul Mackerras */ 645482ec7c4SNathan Lynch for_each_present_cpu(i) { 646dfbe93a2SAnton Blanchard struct device_node *cpu; 647cf950b7aSNathan Lynch int nid; 648ab1f9dacSPaul Mackerras 6498b16cd23SMilton Miller cpu = of_get_cpu_node(i, NULL); 650482ec7c4SNathan Lynch BUG_ON(!cpu); 651953039c8SJeremy Kerr nid = of_node_to_nid_single(cpu); 652ab1f9dacSPaul Mackerras of_node_put(cpu); 653ab1f9dacSPaul Mackerras 654482ec7c4SNathan Lynch /* 655482ec7c4SNathan Lynch * Don't fall back to default_nid yet -- we will plug 656482ec7c4SNathan Lynch * cpus into nodes once the memory scan has discovered 657482ec7c4SNathan Lynch * the topology. 658482ec7c4SNathan Lynch */ 659482ec7c4SNathan Lynch if (nid < 0) 660482ec7c4SNathan Lynch continue; 661482ec7c4SNathan Lynch node_set_online(nid); 662ab1f9dacSPaul Mackerras } 663ab1f9dacSPaul Mackerras 664237a0989SMike Kravetz get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 66594db7c5eSAnton Blanchard 66694db7c5eSAnton Blanchard for_each_node_by_type(memory, "memory") { 667ab1f9dacSPaul Mackerras unsigned long start; 668ab1f9dacSPaul Mackerras unsigned long size; 669cf950b7aSNathan Lynch int nid; 670ab1f9dacSPaul Mackerras int ranges; 671b08a2a12SAlistair Popple const __be32 *memcell_buf; 672ab1f9dacSPaul Mackerras unsigned int len; 673ab1f9dacSPaul Mackerras 674e2eb6392SStephen Rothwell memcell_buf = of_get_property(memory, 675ba759485SMichael Ellerman "linux,usable-memory", &len); 676ba759485SMichael Ellerman if (!memcell_buf || len <= 0) 677e2eb6392SStephen Rothwell memcell_buf = of_get_property(memory, "reg", &len); 678ab1f9dacSPaul Mackerras if (!memcell_buf || len <= 0) 679ab1f9dacSPaul Mackerras continue; 680ab1f9dacSPaul Mackerras 681cc5d0189SBenjamin Herrenschmidt /* ranges in cell */ 682cc5d0189SBenjamin Herrenschmidt ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 683ab1f9dacSPaul Mackerras new_range: 684ab1f9dacSPaul Mackerras /* these are order-sensitive, and modify the buffer pointer */ 685237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 686237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 687ab1f9dacSPaul Mackerras 688482ec7c4SNathan Lynch /* 689482ec7c4SNathan Lynch * Assumption: either all memory nodes or none will 690482ec7c4SNathan Lynch * have associativity properties. If none, then 691482ec7c4SNathan Lynch * everything goes to default_nid. 692482ec7c4SNathan Lynch */ 693953039c8SJeremy Kerr nid = of_node_to_nid_single(memory); 694482ec7c4SNathan Lynch if (nid < 0) 695482ec7c4SNathan Lynch nid = default_nid; 6961daa6d08SBalbir Singh 6971daa6d08SBalbir Singh fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); 698482ec7c4SNathan Lynch node_set_online(nid); 699ab1f9dacSPaul Mackerras 7007656cd8eSReza Arbab size = numa_enforce_memory_limit(start, size); 7017656cd8eSReza Arbab if (size) 702e7e8de59STang Chen memblock_set_node(start, size, &memblock.memory, nid); 703ab1f9dacSPaul Mackerras 704ab1f9dacSPaul Mackerras if (--ranges) 705ab1f9dacSPaul Mackerras goto new_range; 706ab1f9dacSPaul Mackerras } 707ab1f9dacSPaul Mackerras 7080204568aSPaul Mackerras /* 709dfbe93a2SAnton Blanchard * Now do the same thing for each MEMBLOCK listed in the 710dfbe93a2SAnton Blanchard * ibm,dynamic-memory property in the 711dfbe93a2SAnton Blanchard * ibm,dynamic-reconfiguration-memory node. 7120204568aSPaul Mackerras */ 7130204568aSPaul Mackerras memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 714514a9cb3SNathan Fontenot if (memory) { 715514a9cb3SNathan Fontenot walk_drmem_lmbs(memory, numa_setup_drmem_lmb); 716514a9cb3SNathan Fontenot of_node_put(memory); 717514a9cb3SNathan Fontenot } 7180204568aSPaul Mackerras 719ab1f9dacSPaul Mackerras return 0; 720ab1f9dacSPaul Mackerras } 721ab1f9dacSPaul Mackerras 722ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void) 723ab1f9dacSPaul Mackerras { 72495f72d1eSYinghai Lu unsigned long top_of_ram = memblock_end_of_DRAM(); 72595f72d1eSYinghai Lu unsigned long total_ram = memblock_phys_mem_size(); 726c67c3cb4SMel Gorman unsigned long start_pfn, end_pfn; 72728be7072SBenjamin Herrenschmidt unsigned int nid = 0; 72828be7072SBenjamin Herrenschmidt struct memblock_region *reg; 729ab1f9dacSPaul Mackerras 730e110b281SOlof Johansson printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 731ab1f9dacSPaul Mackerras top_of_ram, total_ram); 732e110b281SOlof Johansson printk(KERN_DEBUG "Memory hole size: %ldMB\n", 733ab1f9dacSPaul Mackerras (top_of_ram - total_ram) >> 20); 734ab1f9dacSPaul Mackerras 73528be7072SBenjamin Herrenschmidt for_each_memblock(memory, reg) { 736c7fc2de0SYinghai Lu start_pfn = memblock_region_memory_base_pfn(reg); 737c7fc2de0SYinghai Lu end_pfn = memblock_region_memory_end_pfn(reg); 7381daa6d08SBalbir Singh 7391daa6d08SBalbir Singh fake_numa_create_new_node(end_pfn, &nid); 7401d7cfe18STejun Heo memblock_set_node(PFN_PHYS(start_pfn), 741e7e8de59STang Chen PFN_PHYS(end_pfn - start_pfn), 742e7e8de59STang Chen &memblock.memory, nid); 7431daa6d08SBalbir Singh node_set_online(nid); 744c67c3cb4SMel Gorman } 745ab1f9dacSPaul Mackerras } 746ab1f9dacSPaul Mackerras 7474b703a23SAnton Blanchard void __init dump_numa_cpu_topology(void) 7484b703a23SAnton Blanchard { 7494b703a23SAnton Blanchard unsigned int node; 7504b703a23SAnton Blanchard unsigned int cpu, count; 7514b703a23SAnton Blanchard 7524b703a23SAnton Blanchard if (min_common_depth == -1 || !numa_enabled) 7534b703a23SAnton Blanchard return; 7544b703a23SAnton Blanchard 7554b703a23SAnton Blanchard for_each_online_node(node) { 7568467801cSAneesh Kumar K.V pr_info("Node %d CPUs:", node); 7574b703a23SAnton Blanchard 7584b703a23SAnton Blanchard count = 0; 7594b703a23SAnton Blanchard /* 7604b703a23SAnton Blanchard * If we used a CPU iterator here we would miss printing 7614b703a23SAnton Blanchard * the holes in the cpumap. 7624b703a23SAnton Blanchard */ 76325863de0SAnton Blanchard for (cpu = 0; cpu < nr_cpu_ids; cpu++) { 76425863de0SAnton Blanchard if (cpumask_test_cpu(cpu, 76525863de0SAnton Blanchard node_to_cpumask_map[node])) { 7664b703a23SAnton Blanchard if (count == 0) 7678467801cSAneesh Kumar K.V pr_cont(" %u", cpu); 7684b703a23SAnton Blanchard ++count; 7694b703a23SAnton Blanchard } else { 7704b703a23SAnton Blanchard if (count > 1) 7718467801cSAneesh Kumar K.V pr_cont("-%u", cpu - 1); 7724b703a23SAnton Blanchard count = 0; 7734b703a23SAnton Blanchard } 7744b703a23SAnton Blanchard } 7754b703a23SAnton Blanchard 7764b703a23SAnton Blanchard if (count > 1) 7778467801cSAneesh Kumar K.V pr_cont("-%u", nr_cpu_ids - 1); 7788467801cSAneesh Kumar K.V pr_cont("\n"); 7794b703a23SAnton Blanchard } 7804b703a23SAnton Blanchard } 7814b703a23SAnton Blanchard 78210239733SAnton Blanchard /* Initialize NODE_DATA for a node on the local memory */ 78310239733SAnton Blanchard static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) 784ab1f9dacSPaul Mackerras { 78510239733SAnton Blanchard u64 spanned_pages = end_pfn - start_pfn; 78610239733SAnton Blanchard const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); 78710239733SAnton Blanchard u64 nd_pa; 78810239733SAnton Blanchard void *nd; 78910239733SAnton Blanchard int tnid; 790ab1f9dacSPaul Mackerras 79110239733SAnton Blanchard nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); 79210239733SAnton Blanchard nd = __va(nd_pa); 793ab1f9dacSPaul Mackerras 79410239733SAnton Blanchard /* report and initialize */ 79510239733SAnton Blanchard pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n", 79610239733SAnton Blanchard nd_pa, nd_pa + nd_size - 1); 79710239733SAnton Blanchard tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 79810239733SAnton Blanchard if (tnid != nid) 79910239733SAnton Blanchard pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid); 8008f64e1f2SJon Tollefson 80110239733SAnton Blanchard node_data[nid] = nd; 80210239733SAnton Blanchard memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); 80310239733SAnton Blanchard NODE_DATA(nid)->node_id = nid; 80410239733SAnton Blanchard NODE_DATA(nid)->node_start_pfn = start_pfn; 80510239733SAnton Blanchard NODE_DATA(nid)->node_spanned_pages = spanned_pages; 806ab1f9dacSPaul Mackerras } 8078f64e1f2SJon Tollefson 808a346137eSMichael Bringmann static void __init find_possible_nodes(void) 809a346137eSMichael Bringmann { 810a346137eSMichael Bringmann struct device_node *rtas; 811a346137eSMichael Bringmann u32 numnodes, i; 812a346137eSMichael Bringmann 813a346137eSMichael Bringmann if (min_common_depth <= 0) 814a346137eSMichael Bringmann return; 815a346137eSMichael Bringmann 816a346137eSMichael Bringmann rtas = of_find_node_by_path("/rtas"); 817a346137eSMichael Bringmann if (!rtas) 818a346137eSMichael Bringmann return; 819a346137eSMichael Bringmann 820a346137eSMichael Bringmann if (of_property_read_u32_index(rtas, 821a346137eSMichael Bringmann "ibm,max-associativity-domains", 822a346137eSMichael Bringmann min_common_depth, &numnodes)) 823a346137eSMichael Bringmann goto out; 824a346137eSMichael Bringmann 825a346137eSMichael Bringmann for (i = 0; i < numnodes; i++) { 826ea05ba7cSMichael Bringmann if (!node_possible(i)) 827a346137eSMichael Bringmann node_set(i, node_possible_map); 828a346137eSMichael Bringmann } 829a346137eSMichael Bringmann 830a346137eSMichael Bringmann out: 831a346137eSMichael Bringmann of_node_put(rtas); 832a346137eSMichael Bringmann } 833a346137eSMichael Bringmann 8349bd9be00SNicholas Piggin void __init mem_topology_setup(void) 8354a618669SDave Hansen { 8369bd9be00SNicholas Piggin int cpu; 8374a618669SDave Hansen 8384a618669SDave Hansen if (parse_numa_properties()) 8394a618669SDave Hansen setup_nonnuma(); 8404a618669SDave Hansen 8413af229f2SNishanth Aravamudan /* 842a346137eSMichael Bringmann * Modify the set of possible NUMA nodes to reflect information 843a346137eSMichael Bringmann * available about the set of online nodes, and the set of nodes 844a346137eSMichael Bringmann * that we expect to make use of for this platform's affinity 845a346137eSMichael Bringmann * calculations. 8463af229f2SNishanth Aravamudan */ 8473af229f2SNishanth Aravamudan nodes_and(node_possible_map, node_possible_map, node_online_map); 8483af229f2SNishanth Aravamudan 849a346137eSMichael Bringmann find_possible_nodes(); 850a346137eSMichael Bringmann 8519bd9be00SNicholas Piggin setup_node_to_cpumask_map(); 8529bd9be00SNicholas Piggin 8539bd9be00SNicholas Piggin reset_numa_cpu_lookup_table(); 8549bd9be00SNicholas Piggin 8559bd9be00SNicholas Piggin for_each_present_cpu(cpu) 8569bd9be00SNicholas Piggin numa_setup_cpu(cpu); 8579bd9be00SNicholas Piggin } 8589bd9be00SNicholas Piggin 8599bd9be00SNicholas Piggin void __init initmem_init(void) 8609bd9be00SNicholas Piggin { 8619bd9be00SNicholas Piggin int nid; 8629bd9be00SNicholas Piggin 8639bd9be00SNicholas Piggin max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 8649bd9be00SNicholas Piggin max_pfn = max_low_pfn; 8659bd9be00SNicholas Piggin 8669bd9be00SNicholas Piggin memblock_dump_all(); 8679bd9be00SNicholas Piggin 8684a618669SDave Hansen for_each_online_node(nid) { 8694a618669SDave Hansen unsigned long start_pfn, end_pfn; 8704a618669SDave Hansen 8714a618669SDave Hansen get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 87210239733SAnton Blanchard setup_node_data(nid, start_pfn, end_pfn); 8738f64e1f2SJon Tollefson sparse_memory_present_with_active_regions(nid); 874ab1f9dacSPaul Mackerras } 875d3f6204aSBenjamin Herrenschmidt 87621098b9eSAnton Blanchard sparse_init(); 87725863de0SAnton Blanchard 8782fabf084SNishanth Aravamudan /* 8792fabf084SNishanth Aravamudan * We need the numa_cpu_lookup_table to be accurate for all CPUs, 8802fabf084SNishanth Aravamudan * even before we online them, so that we can use cpu_to_{node,mem} 8812fabf084SNishanth Aravamudan * early in boot, cf. smp_prepare_cpus(). 882bdab88e0SSebastian Andrzej Siewior * _nocalls() + manual invocation is used because cpuhp is not yet 883bdab88e0SSebastian Andrzej Siewior * initialized for the boot CPU. 8842fabf084SNishanth Aravamudan */ 88573c1b41eSThomas Gleixner cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare", 886bdab88e0SSebastian Andrzej Siewior ppc_numa_cpu_prepare, ppc_numa_cpu_dead); 8874a618669SDave Hansen } 888ab1f9dacSPaul Mackerras 889ab1f9dacSPaul Mackerras static int __init early_numa(char *p) 890ab1f9dacSPaul Mackerras { 891ab1f9dacSPaul Mackerras if (!p) 892ab1f9dacSPaul Mackerras return 0; 893ab1f9dacSPaul Mackerras 894ab1f9dacSPaul Mackerras if (strstr(p, "off")) 895ab1f9dacSPaul Mackerras numa_enabled = 0; 896ab1f9dacSPaul Mackerras 897ab1f9dacSPaul Mackerras if (strstr(p, "debug")) 898ab1f9dacSPaul Mackerras numa_debug = 1; 899ab1f9dacSPaul Mackerras 9001daa6d08SBalbir Singh p = strstr(p, "fake="); 9011daa6d08SBalbir Singh if (p) 9021daa6d08SBalbir Singh cmdline = p + strlen("fake="); 9031daa6d08SBalbir Singh 904ab1f9dacSPaul Mackerras return 0; 905ab1f9dacSPaul Mackerras } 906ab1f9dacSPaul Mackerras early_param("numa", early_numa); 907237a0989SMike Kravetz 9082d73bae1SNishanth Aravamudan static bool topology_updates_enabled = true; 9092d73bae1SNishanth Aravamudan 9102d73bae1SNishanth Aravamudan static int __init early_topology_updates(char *p) 9112d73bae1SNishanth Aravamudan { 9122d73bae1SNishanth Aravamudan if (!p) 9132d73bae1SNishanth Aravamudan return 0; 9142d73bae1SNishanth Aravamudan 9152d73bae1SNishanth Aravamudan if (!strcmp(p, "off")) { 9162d73bae1SNishanth Aravamudan pr_info("Disabling topology updates\n"); 9172d73bae1SNishanth Aravamudan topology_updates_enabled = false; 9182d73bae1SNishanth Aravamudan } 9192d73bae1SNishanth Aravamudan 9202d73bae1SNishanth Aravamudan return 0; 9212d73bae1SNishanth Aravamudan } 9222d73bae1SNishanth Aravamudan early_param("topology_updates", early_topology_updates); 9232d73bae1SNishanth Aravamudan 924237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG 925237a0989SMike Kravetz /* 9260f16ef7fSNathan Fontenot * Find the node associated with a hot added memory section for 9270f16ef7fSNathan Fontenot * memory represented in the device tree by the property 9280f16ef7fSNathan Fontenot * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory. 9290db9360aSNathan Fontenot */ 930514a9cb3SNathan Fontenot static int hot_add_drconf_scn_to_nid(unsigned long scn_addr) 9310db9360aSNathan Fontenot { 932514a9cb3SNathan Fontenot struct drmem_lmb *lmb; 9333fdfd990SBenjamin Herrenschmidt unsigned long lmb_size; 9340f16ef7fSNathan Fontenot int nid = -1; 9350db9360aSNathan Fontenot 936514a9cb3SNathan Fontenot lmb_size = drmem_lmb_size(); 9370db9360aSNathan Fontenot 938514a9cb3SNathan Fontenot for_each_drmem_lmb(lmb) { 9390db9360aSNathan Fontenot /* skip this block if it is reserved or not assigned to 9400db9360aSNathan Fontenot * this partition */ 941514a9cb3SNathan Fontenot if ((lmb->flags & DRCONF_MEM_RESERVED) 942514a9cb3SNathan Fontenot || !(lmb->flags & DRCONF_MEM_ASSIGNED)) 9430db9360aSNathan Fontenot continue; 9440db9360aSNathan Fontenot 945514a9cb3SNathan Fontenot if ((scn_addr < lmb->base_addr) 946514a9cb3SNathan Fontenot || (scn_addr >= (lmb->base_addr + lmb_size))) 9470f16ef7fSNathan Fontenot continue; 9480db9360aSNathan Fontenot 949514a9cb3SNathan Fontenot nid = of_drconf_to_nid_single(lmb); 9500f16ef7fSNathan Fontenot break; 9510db9360aSNathan Fontenot } 9520db9360aSNathan Fontenot 9530f16ef7fSNathan Fontenot return nid; 9540db9360aSNathan Fontenot } 9550db9360aSNathan Fontenot 9560db9360aSNathan Fontenot /* 9570f16ef7fSNathan Fontenot * Find the node associated with a hot added memory section for memory 9580f16ef7fSNathan Fontenot * represented in the device tree as a node (i.e. memory@XXXX) for 95995f72d1eSYinghai Lu * each memblock. 960237a0989SMike Kravetz */ 961ec32dd66SRobert Jennings static int hot_add_node_scn_to_nid(unsigned long scn_addr) 962237a0989SMike Kravetz { 96394db7c5eSAnton Blanchard struct device_node *memory; 9640f16ef7fSNathan Fontenot int nid = -1; 965237a0989SMike Kravetz 96694db7c5eSAnton Blanchard for_each_node_by_type(memory, "memory") { 967237a0989SMike Kravetz unsigned long start, size; 968b226e462SMike Kravetz int ranges; 969b08a2a12SAlistair Popple const __be32 *memcell_buf; 970237a0989SMike Kravetz unsigned int len; 971237a0989SMike Kravetz 972e2eb6392SStephen Rothwell memcell_buf = of_get_property(memory, "reg", &len); 973237a0989SMike Kravetz if (!memcell_buf || len <= 0) 974237a0989SMike Kravetz continue; 975237a0989SMike Kravetz 976cc5d0189SBenjamin Herrenschmidt /* ranges in cell */ 977cc5d0189SBenjamin Herrenschmidt ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 9780f16ef7fSNathan Fontenot 9790f16ef7fSNathan Fontenot while (ranges--) { 980237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 981237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 982237a0989SMike Kravetz 9830f16ef7fSNathan Fontenot if ((scn_addr < start) || (scn_addr >= (start + size))) 9840f16ef7fSNathan Fontenot continue; 9850f16ef7fSNathan Fontenot 9860f16ef7fSNathan Fontenot nid = of_node_to_nid_single(memory); 9870f16ef7fSNathan Fontenot break; 9880f16ef7fSNathan Fontenot } 9890f16ef7fSNathan Fontenot 9900f16ef7fSNathan Fontenot if (nid >= 0) 9910f16ef7fSNathan Fontenot break; 9920f16ef7fSNathan Fontenot } 9930f16ef7fSNathan Fontenot 99460831842SAnton Blanchard of_node_put(memory); 99560831842SAnton Blanchard 9960db9360aSNathan Fontenot return nid; 997237a0989SMike Kravetz } 998237a0989SMike Kravetz 9990f16ef7fSNathan Fontenot /* 10000f16ef7fSNathan Fontenot * Find the node associated with a hot added memory section. Section 100195f72d1eSYinghai Lu * corresponds to a SPARSEMEM section, not an MEMBLOCK. It is assumed that 100295f72d1eSYinghai Lu * sections are fully contained within a single MEMBLOCK. 10030f16ef7fSNathan Fontenot */ 10040f16ef7fSNathan Fontenot int hot_add_scn_to_nid(unsigned long scn_addr) 10050f16ef7fSNathan Fontenot { 10060f16ef7fSNathan Fontenot struct device_node *memory = NULL; 10074a3bac4eSReza Arbab int nid; 10080f16ef7fSNathan Fontenot 10090f16ef7fSNathan Fontenot if (!numa_enabled || (min_common_depth < 0)) 101072c33688SH Hartley Sweeten return first_online_node; 10110f16ef7fSNathan Fontenot 10120f16ef7fSNathan Fontenot memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 10130f16ef7fSNathan Fontenot if (memory) { 1014514a9cb3SNathan Fontenot nid = hot_add_drconf_scn_to_nid(scn_addr); 10150f16ef7fSNathan Fontenot of_node_put(memory); 10160f16ef7fSNathan Fontenot } else { 10170f16ef7fSNathan Fontenot nid = hot_add_node_scn_to_nid(scn_addr); 1018237a0989SMike Kravetz } 10190f16ef7fSNathan Fontenot 10202a8628d4SReza Arbab if (nid < 0 || !node_possible(nid)) 102172c33688SH Hartley Sweeten nid = first_online_node; 10220f16ef7fSNathan Fontenot 10230f16ef7fSNathan Fontenot return nid; 10240f16ef7fSNathan Fontenot } 10250f16ef7fSNathan Fontenot 1026cd34206eSNishanth Aravamudan static u64 hot_add_drconf_memory_max(void) 1027cd34206eSNishanth Aravamudan { 1028cd34206eSNishanth Aravamudan struct device_node *memory = NULL; 102945b64ee6SBharata B Rao struct device_node *dn = NULL; 103045b64ee6SBharata B Rao const __be64 *lrdr = NULL; 103145b64ee6SBharata B Rao 103245b64ee6SBharata B Rao dn = of_find_node_by_path("/rtas"); 103345b64ee6SBharata B Rao if (dn) { 103445b64ee6SBharata B Rao lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL); 103545b64ee6SBharata B Rao of_node_put(dn); 103645b64ee6SBharata B Rao if (lrdr) 103745b64ee6SBharata B Rao return be64_to_cpup(lrdr); 103845b64ee6SBharata B Rao } 1039cd34206eSNishanth Aravamudan 1040cd34206eSNishanth Aravamudan memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 1041cd34206eSNishanth Aravamudan if (memory) { 1042cd34206eSNishanth Aravamudan of_node_put(memory); 1043514a9cb3SNathan Fontenot return drmem_lmb_memory_max(); 1044cd34206eSNishanth Aravamudan } 104545b64ee6SBharata B Rao return 0; 1046cd34206eSNishanth Aravamudan } 1047cd34206eSNishanth Aravamudan 1048cd34206eSNishanth Aravamudan /* 1049cd34206eSNishanth Aravamudan * memory_hotplug_max - return max address of memory that may be added 1050cd34206eSNishanth Aravamudan * 1051cd34206eSNishanth Aravamudan * This is currently only used on systems that support drconfig memory 1052cd34206eSNishanth Aravamudan * hotplug. 1053cd34206eSNishanth Aravamudan */ 1054cd34206eSNishanth Aravamudan u64 memory_hotplug_max(void) 1055cd34206eSNishanth Aravamudan { 1056cd34206eSNishanth Aravamudan return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM()); 1057cd34206eSNishanth Aravamudan } 1058237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */ 10599eff1a38SJesse Larrew 1060bd03403aSJesse Larrew /* Virtual Processor Home Node (VPHN) support */ 106139bf990eSJesse Larrew #ifdef CONFIG_PPC_SPLPAR 10624b6cfb2aSGreg Kurz 10634b6cfb2aSGreg Kurz #include "vphn.h" 10644b6cfb2aSGreg Kurz 106530c05350SNathan Fontenot struct topology_update_data { 106630c05350SNathan Fontenot struct topology_update_data *next; 106730c05350SNathan Fontenot unsigned int cpu; 106830c05350SNathan Fontenot int old_nid; 106930c05350SNathan Fontenot int new_nid; 107030c05350SNathan Fontenot }; 107130c05350SNathan Fontenot 1072cee5405dSMichael Bringmann #define TOPOLOGY_DEF_TIMER_SECS 60 1073cee5405dSMichael Bringmann 10745de16699SAnton Blanchard static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; 10759eff1a38SJesse Larrew static cpumask_t cpu_associativity_changes_mask; 10769eff1a38SJesse Larrew static int vphn_enabled; 10775d88aa85SJesse Larrew static int prrn_enabled; 10785d88aa85SJesse Larrew static void reset_topology_timer(void); 1079cee5405dSMichael Bringmann static int topology_timer_secs = 1; 108017f444c0SMichael Bringmann static int topology_inited; 10819eff1a38SJesse Larrew 10829eff1a38SJesse Larrew /* 1083cee5405dSMichael Bringmann * Change polling interval for associativity changes. 1084cee5405dSMichael Bringmann */ 1085cee5405dSMichael Bringmann int timed_topology_update(int nsecs) 1086cee5405dSMichael Bringmann { 1087cee5405dSMichael Bringmann if (vphn_enabled) { 1088cee5405dSMichael Bringmann if (nsecs > 0) 1089cee5405dSMichael Bringmann topology_timer_secs = nsecs; 1090cee5405dSMichael Bringmann else 1091cee5405dSMichael Bringmann topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS; 1092cee5405dSMichael Bringmann 1093cee5405dSMichael Bringmann reset_topology_timer(); 1094cee5405dSMichael Bringmann } 1095cee5405dSMichael Bringmann 1096cee5405dSMichael Bringmann return 0; 1097cee5405dSMichael Bringmann } 10989eff1a38SJesse Larrew 10999eff1a38SJesse Larrew /* 11009eff1a38SJesse Larrew * Store the current values of the associativity change counters in the 11019eff1a38SJesse Larrew * hypervisor. 11029eff1a38SJesse Larrew */ 11039eff1a38SJesse Larrew static void setup_cpu_associativity_change_counters(void) 11049eff1a38SJesse Larrew { 1105cd9d6cc7SJesse Larrew int cpu; 11069eff1a38SJesse Larrew 11075de16699SAnton Blanchard /* The VPHN feature supports a maximum of 8 reference points */ 11085de16699SAnton Blanchard BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8); 11095de16699SAnton Blanchard 11109eff1a38SJesse Larrew for_each_possible_cpu(cpu) { 1111cd9d6cc7SJesse Larrew int i; 11129eff1a38SJesse Larrew u8 *counts = vphn_cpu_change_counts[cpu]; 1113499dcd41SNicholas Piggin volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; 11149eff1a38SJesse Larrew 11155de16699SAnton Blanchard for (i = 0; i < distance_ref_points_depth; i++) 11169eff1a38SJesse Larrew counts[i] = hypervisor_counts[i]; 11179eff1a38SJesse Larrew } 11189eff1a38SJesse Larrew } 11199eff1a38SJesse Larrew 11209eff1a38SJesse Larrew /* 11219eff1a38SJesse Larrew * The hypervisor maintains a set of 8 associativity change counters in 11229eff1a38SJesse Larrew * the VPA of each cpu that correspond to the associativity levels in the 11239eff1a38SJesse Larrew * ibm,associativity-reference-points property. When an associativity 11249eff1a38SJesse Larrew * level changes, the corresponding counter is incremented. 11259eff1a38SJesse Larrew * 11269eff1a38SJesse Larrew * Set a bit in cpu_associativity_changes_mask for each cpu whose home 11279eff1a38SJesse Larrew * node associativity levels have changed. 11289eff1a38SJesse Larrew * 11299eff1a38SJesse Larrew * Returns the number of cpus with unhandled associativity changes. 11309eff1a38SJesse Larrew */ 11319eff1a38SJesse Larrew static int update_cpu_associativity_changes_mask(void) 11329eff1a38SJesse Larrew { 11335d88aa85SJesse Larrew int cpu; 11349eff1a38SJesse Larrew cpumask_t *changes = &cpu_associativity_changes_mask; 11359eff1a38SJesse Larrew 11369eff1a38SJesse Larrew for_each_possible_cpu(cpu) { 11379eff1a38SJesse Larrew int i, changed = 0; 11389eff1a38SJesse Larrew u8 *counts = vphn_cpu_change_counts[cpu]; 1139499dcd41SNicholas Piggin volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; 11409eff1a38SJesse Larrew 11415de16699SAnton Blanchard for (i = 0; i < distance_ref_points_depth; i++) { 1142d69043e8SAnton Blanchard if (hypervisor_counts[i] != counts[i]) { 11439eff1a38SJesse Larrew counts[i] = hypervisor_counts[i]; 11449eff1a38SJesse Larrew changed = 1; 11459eff1a38SJesse Larrew } 11469eff1a38SJesse Larrew } 11479eff1a38SJesse Larrew if (changed) { 11483be7db6aSRobert Jennings cpumask_or(changes, changes, cpu_sibling_mask(cpu)); 11493be7db6aSRobert Jennings cpu = cpu_last_thread_sibling(cpu); 11509eff1a38SJesse Larrew } 11519eff1a38SJesse Larrew } 11529eff1a38SJesse Larrew 11535d88aa85SJesse Larrew return cpumask_weight(changes); 11549eff1a38SJesse Larrew } 11559eff1a38SJesse Larrew 11569eff1a38SJesse Larrew /* 11579eff1a38SJesse Larrew * Retrieve the new associativity information for a virtual processor's 11589eff1a38SJesse Larrew * home node. 11599eff1a38SJesse Larrew */ 1160b08a2a12SAlistair Popple static long hcall_vphn(unsigned long cpu, __be32 *associativity) 11619eff1a38SJesse Larrew { 1162cd9d6cc7SJesse Larrew long rc; 11639eff1a38SJesse Larrew long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 11649eff1a38SJesse Larrew u64 flags = 1; 11659eff1a38SJesse Larrew int hwcpu = get_hard_smp_processor_id(cpu); 11669eff1a38SJesse Larrew 11679eff1a38SJesse Larrew rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); 11689eff1a38SJesse Larrew vphn_unpack_associativity(retbuf, associativity); 11699eff1a38SJesse Larrew 11709eff1a38SJesse Larrew return rc; 11719eff1a38SJesse Larrew } 11729eff1a38SJesse Larrew 11739eff1a38SJesse Larrew static long vphn_get_associativity(unsigned long cpu, 1174b08a2a12SAlistair Popple __be32 *associativity) 11759eff1a38SJesse Larrew { 1176cd9d6cc7SJesse Larrew long rc; 11779eff1a38SJesse Larrew 11789eff1a38SJesse Larrew rc = hcall_vphn(cpu, associativity); 11799eff1a38SJesse Larrew 11809eff1a38SJesse Larrew switch (rc) { 11819eff1a38SJesse Larrew case H_FUNCTION: 11829eff1a38SJesse Larrew printk(KERN_INFO 11839eff1a38SJesse Larrew "VPHN is not supported. Disabling polling...\n"); 11849eff1a38SJesse Larrew stop_topology_update(); 11859eff1a38SJesse Larrew break; 11869eff1a38SJesse Larrew case H_HARDWARE: 11879eff1a38SJesse Larrew printk(KERN_ERR 11889eff1a38SJesse Larrew "hcall_vphn() experienced a hardware fault " 11899eff1a38SJesse Larrew "preventing VPHN. Disabling polling...\n"); 11909eff1a38SJesse Larrew stop_topology_update(); 119117f444c0SMichael Bringmann break; 119217f444c0SMichael Bringmann case H_SUCCESS: 119317f444c0SMichael Bringmann dbg("VPHN hcall succeeded. Reset polling...\n"); 1194cee5405dSMichael Bringmann timed_topology_update(0); 119517f444c0SMichael Bringmann break; 11969eff1a38SJesse Larrew } 11979eff1a38SJesse Larrew 11989eff1a38SJesse Larrew return rc; 11999eff1a38SJesse Larrew } 12009eff1a38SJesse Larrew 1201e67e02a5SMichael Bringmann int find_and_online_cpu_nid(int cpu) 1202ea05ba7cSMichael Bringmann { 1203ea05ba7cSMichael Bringmann __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; 1204ea05ba7cSMichael Bringmann int new_nid; 1205ea05ba7cSMichael Bringmann 1206ea05ba7cSMichael Bringmann /* Use associativity from first thread for all siblings */ 1207ea05ba7cSMichael Bringmann vphn_get_associativity(cpu, associativity); 1208ea05ba7cSMichael Bringmann new_nid = associativity_to_nid(associativity); 1209ea05ba7cSMichael Bringmann if (new_nid < 0 || !node_possible(new_nid)) 1210ea05ba7cSMichael Bringmann new_nid = first_online_node; 1211ea05ba7cSMichael Bringmann 1212ea05ba7cSMichael Bringmann if (NODE_DATA(new_nid) == NULL) { 1213ea05ba7cSMichael Bringmann #ifdef CONFIG_MEMORY_HOTPLUG 1214ea05ba7cSMichael Bringmann /* 1215ea05ba7cSMichael Bringmann * Need to ensure that NODE_DATA is initialized for a node from 1216ea05ba7cSMichael Bringmann * available memory (see memblock_alloc_try_nid). If unable to 1217ea05ba7cSMichael Bringmann * init the node, then default to nearest node that has memory 1218ea05ba7cSMichael Bringmann * installed. 1219ea05ba7cSMichael Bringmann */ 1220ea05ba7cSMichael Bringmann if (try_online_node(new_nid)) 1221ea05ba7cSMichael Bringmann new_nid = first_online_node; 1222ea05ba7cSMichael Bringmann #else 1223ea05ba7cSMichael Bringmann /* 1224ea05ba7cSMichael Bringmann * Default to using the nearest node that has memory installed. 1225ea05ba7cSMichael Bringmann * Otherwise, it would be necessary to patch the kernel MM code 1226ea05ba7cSMichael Bringmann * to deal with more memoryless-node error conditions. 1227ea05ba7cSMichael Bringmann */ 1228ea05ba7cSMichael Bringmann new_nid = first_online_node; 1229ea05ba7cSMichael Bringmann #endif 1230ea05ba7cSMichael Bringmann } 1231ea05ba7cSMichael Bringmann 1232e67e02a5SMichael Bringmann pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__, 1233e67e02a5SMichael Bringmann cpu, new_nid); 1234ea05ba7cSMichael Bringmann return new_nid; 1235ea05ba7cSMichael Bringmann } 1236ea05ba7cSMichael Bringmann 12379eff1a38SJesse Larrew /* 123830c05350SNathan Fontenot * Update the CPU maps and sysfs entries for a single CPU when its NUMA 123930c05350SNathan Fontenot * characteristics change. This function doesn't perform any locking and is 124030c05350SNathan Fontenot * only safe to call from stop_machine(). 124130c05350SNathan Fontenot */ 124230c05350SNathan Fontenot static int update_cpu_topology(void *data) 124330c05350SNathan Fontenot { 124430c05350SNathan Fontenot struct topology_update_data *update; 124530c05350SNathan Fontenot unsigned long cpu; 124630c05350SNathan Fontenot 124730c05350SNathan Fontenot if (!data) 124830c05350SNathan Fontenot return -EINVAL; 124930c05350SNathan Fontenot 12503be7db6aSRobert Jennings cpu = smp_processor_id(); 125130c05350SNathan Fontenot 125230c05350SNathan Fontenot for (update = data; update; update = update->next) { 12532c0a33f9SNishanth Aravamudan int new_nid = update->new_nid; 125430c05350SNathan Fontenot if (cpu != update->cpu) 125530c05350SNathan Fontenot continue; 125630c05350SNathan Fontenot 125749f8d8c0SNishanth Aravamudan unmap_cpu_from_node(cpu); 12582c0a33f9SNishanth Aravamudan map_cpu_to_node(cpu, new_nid); 12592c0a33f9SNishanth Aravamudan set_cpu_numa_node(cpu, new_nid); 12602c0a33f9SNishanth Aravamudan set_cpu_numa_mem(cpu, local_memory_node(new_nid)); 1261176bbf14SJesse Larrew vdso_getcpu_init(); 126230c05350SNathan Fontenot } 126330c05350SNathan Fontenot 126430c05350SNathan Fontenot return 0; 126530c05350SNathan Fontenot } 126630c05350SNathan Fontenot 1267d4edc5b6SSrivatsa S. Bhat static int update_lookup_table(void *data) 1268d4edc5b6SSrivatsa S. Bhat { 1269d4edc5b6SSrivatsa S. Bhat struct topology_update_data *update; 1270d4edc5b6SSrivatsa S. Bhat 1271d4edc5b6SSrivatsa S. Bhat if (!data) 1272d4edc5b6SSrivatsa S. Bhat return -EINVAL; 1273d4edc5b6SSrivatsa S. Bhat 1274d4edc5b6SSrivatsa S. Bhat /* 1275d4edc5b6SSrivatsa S. Bhat * Upon topology update, the numa-cpu lookup table needs to be updated 1276d4edc5b6SSrivatsa S. Bhat * for all threads in the core, including offline CPUs, to ensure that 1277d4edc5b6SSrivatsa S. Bhat * future hotplug operations respect the cpu-to-node associativity 1278d4edc5b6SSrivatsa S. Bhat * properly. 1279d4edc5b6SSrivatsa S. Bhat */ 1280d4edc5b6SSrivatsa S. Bhat for (update = data; update; update = update->next) { 1281d4edc5b6SSrivatsa S. Bhat int nid, base, j; 1282d4edc5b6SSrivatsa S. Bhat 1283d4edc5b6SSrivatsa S. Bhat nid = update->new_nid; 1284d4edc5b6SSrivatsa S. Bhat base = cpu_first_thread_sibling(update->cpu); 1285d4edc5b6SSrivatsa S. Bhat 1286d4edc5b6SSrivatsa S. Bhat for (j = 0; j < threads_per_core; j++) { 1287d4edc5b6SSrivatsa S. Bhat update_numa_cpu_lookup_table(base + j, nid); 1288d4edc5b6SSrivatsa S. Bhat } 1289d4edc5b6SSrivatsa S. Bhat } 1290d4edc5b6SSrivatsa S. Bhat 1291d4edc5b6SSrivatsa S. Bhat return 0; 1292d4edc5b6SSrivatsa S. Bhat } 1293d4edc5b6SSrivatsa S. Bhat 129430c05350SNathan Fontenot /* 12959eff1a38SJesse Larrew * Update the node maps and sysfs entries for each cpu whose home node 129679c5fcebSJesse Larrew * has changed. Returns 1 when the topology has changed, and 0 otherwise. 12973e401f7aSThiago Jung Bauermann * 12983e401f7aSThiago Jung Bauermann * cpus_locked says whether we already hold cpu_hotplug_lock. 12999eff1a38SJesse Larrew */ 13003e401f7aSThiago Jung Bauermann int numa_update_cpu_topology(bool cpus_locked) 13019eff1a38SJesse Larrew { 13023be7db6aSRobert Jennings unsigned int cpu, sibling, changed = 0; 130330c05350SNathan Fontenot struct topology_update_data *updates, *ud; 1304176bbf14SJesse Larrew cpumask_t updated_cpus; 13058a25a2fdSKay Sievers struct device *dev; 13063be7db6aSRobert Jennings int weight, new_nid, i = 0; 130730c05350SNathan Fontenot 13082ea62630SSrikar Dronamraju if (!prrn_enabled && !vphn_enabled && topology_inited) 13092d73bae1SNishanth Aravamudan return 0; 13102d73bae1SNishanth Aravamudan 131130c05350SNathan Fontenot weight = cpumask_weight(&cpu_associativity_changes_mask); 131230c05350SNathan Fontenot if (!weight) 131330c05350SNathan Fontenot return 0; 131430c05350SNathan Fontenot 13156396bb22SKees Cook updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL); 131630c05350SNathan Fontenot if (!updates) 131730c05350SNathan Fontenot return 0; 13189eff1a38SJesse Larrew 1319176bbf14SJesse Larrew cpumask_clear(&updated_cpus); 13209eff1a38SJesse Larrew 1321104699c0SKOSAKI Motohiro for_each_cpu(cpu, &cpu_associativity_changes_mask) { 13223be7db6aSRobert Jennings /* 13233be7db6aSRobert Jennings * If siblings aren't flagged for changes, updates list 13243be7db6aSRobert Jennings * will be too short. Skip on this update and set for next 13253be7db6aSRobert Jennings * update. 13263be7db6aSRobert Jennings */ 13273be7db6aSRobert Jennings if (!cpumask_subset(cpu_sibling_mask(cpu), 13283be7db6aSRobert Jennings &cpu_associativity_changes_mask)) { 13293be7db6aSRobert Jennings pr_info("Sibling bits not set for associativity " 13303be7db6aSRobert Jennings "change, cpu%d\n", cpu); 13313be7db6aSRobert Jennings cpumask_or(&cpu_associativity_changes_mask, 13323be7db6aSRobert Jennings &cpu_associativity_changes_mask, 13333be7db6aSRobert Jennings cpu_sibling_mask(cpu)); 13343be7db6aSRobert Jennings cpu = cpu_last_thread_sibling(cpu); 13353be7db6aSRobert Jennings continue; 13363be7db6aSRobert Jennings } 13373be7db6aSRobert Jennings 1338ea05ba7cSMichael Bringmann new_nid = find_and_online_cpu_nid(cpu); 13399eff1a38SJesse Larrew 13403be7db6aSRobert Jennings if (new_nid == numa_cpu_lookup_table[cpu]) { 13413be7db6aSRobert Jennings cpumask_andnot(&cpu_associativity_changes_mask, 13423be7db6aSRobert Jennings &cpu_associativity_changes_mask, 13433be7db6aSRobert Jennings cpu_sibling_mask(cpu)); 134417f444c0SMichael Bringmann dbg("Assoc chg gives same node %d for cpu%d\n", 134517f444c0SMichael Bringmann new_nid, cpu); 13463be7db6aSRobert Jennings cpu = cpu_last_thread_sibling(cpu); 13473be7db6aSRobert Jennings continue; 13483be7db6aSRobert Jennings } 13499eff1a38SJesse Larrew 13503be7db6aSRobert Jennings for_each_cpu(sibling, cpu_sibling_mask(cpu)) { 13513be7db6aSRobert Jennings ud = &updates[i++]; 13528bc93149SMichael Bringmann ud->next = &updates[i]; 13533be7db6aSRobert Jennings ud->cpu = sibling; 13543be7db6aSRobert Jennings ud->new_nid = new_nid; 13553be7db6aSRobert Jennings ud->old_nid = numa_cpu_lookup_table[sibling]; 13563be7db6aSRobert Jennings cpumask_set_cpu(sibling, &updated_cpus); 135730c05350SNathan Fontenot } 13583be7db6aSRobert Jennings cpu = cpu_last_thread_sibling(cpu); 13593be7db6aSRobert Jennings } 13609eff1a38SJesse Larrew 13618bc93149SMichael Bringmann /* 13628bc93149SMichael Bringmann * Prevent processing of 'updates' from overflowing array 13638bc93149SMichael Bringmann * where last entry filled in a 'next' pointer. 13648bc93149SMichael Bringmann */ 13658bc93149SMichael Bringmann if (i) 13668bc93149SMichael Bringmann updates[i-1].next = NULL; 13678bc93149SMichael Bringmann 13682d73bae1SNishanth Aravamudan pr_debug("Topology update for the following CPUs:\n"); 13692d73bae1SNishanth Aravamudan if (cpumask_weight(&updated_cpus)) { 13702d73bae1SNishanth Aravamudan for (ud = &updates[0]; ud; ud = ud->next) { 13712d73bae1SNishanth Aravamudan pr_debug("cpu %d moving from node %d " 13722d73bae1SNishanth Aravamudan "to %d\n", ud->cpu, 13732d73bae1SNishanth Aravamudan ud->old_nid, ud->new_nid); 13742d73bae1SNishanth Aravamudan } 13752d73bae1SNishanth Aravamudan } 13762d73bae1SNishanth Aravamudan 13779a013361SMichael Wang /* 13789a013361SMichael Wang * In cases where we have nothing to update (because the updates list 13799a013361SMichael Wang * is too short or because the new topology is same as the old one), 13809a013361SMichael Wang * skip invoking update_cpu_topology() via stop-machine(). This is 13819a013361SMichael Wang * necessary (and not just a fast-path optimization) since stop-machine 13829a013361SMichael Wang * can end up electing a random CPU to run update_cpu_topology(), and 13839a013361SMichael Wang * thus trick us into setting up incorrect cpu-node mappings (since 13849a013361SMichael Wang * 'updates' is kzalloc()'ed). 13859a013361SMichael Wang * 13869a013361SMichael Wang * And for the similar reason, we will skip all the following updating. 13879a013361SMichael Wang */ 13889a013361SMichael Wang if (!cpumask_weight(&updated_cpus)) 13899a013361SMichael Wang goto out; 13909a013361SMichael Wang 13913e401f7aSThiago Jung Bauermann if (cpus_locked) 13923e401f7aSThiago Jung Bauermann stop_machine_cpuslocked(update_cpu_topology, &updates[0], 13933e401f7aSThiago Jung Bauermann &updated_cpus); 13943e401f7aSThiago Jung Bauermann else 1395176bbf14SJesse Larrew stop_machine(update_cpu_topology, &updates[0], &updated_cpus); 139630c05350SNathan Fontenot 1397d4edc5b6SSrivatsa S. Bhat /* 1398d4edc5b6SSrivatsa S. Bhat * Update the numa-cpu lookup table with the new mappings, even for 1399d4edc5b6SSrivatsa S. Bhat * offline CPUs. It is best to perform this update from the stop- 1400d4edc5b6SSrivatsa S. Bhat * machine context. 1401d4edc5b6SSrivatsa S. Bhat */ 14023e401f7aSThiago Jung Bauermann if (cpus_locked) 14033e401f7aSThiago Jung Bauermann stop_machine_cpuslocked(update_lookup_table, &updates[0], 14043e401f7aSThiago Jung Bauermann cpumask_of(raw_smp_processor_id())); 14053e401f7aSThiago Jung Bauermann else 1406d4edc5b6SSrivatsa S. Bhat stop_machine(update_lookup_table, &updates[0], 1407d4edc5b6SSrivatsa S. Bhat cpumask_of(raw_smp_processor_id())); 1408d4edc5b6SSrivatsa S. Bhat 140930c05350SNathan Fontenot for (ud = &updates[0]; ud; ud = ud->next) { 1410dd023217SNathan Fontenot unregister_cpu_under_node(ud->cpu, ud->old_nid); 1411dd023217SNathan Fontenot register_cpu_under_node(ud->cpu, ud->new_nid); 1412dd023217SNathan Fontenot 141330c05350SNathan Fontenot dev = get_cpu_device(ud->cpu); 14148a25a2fdSKay Sievers if (dev) 14158a25a2fdSKay Sievers kobject_uevent(&dev->kobj, KOBJ_CHANGE); 141630c05350SNathan Fontenot cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask); 141779c5fcebSJesse Larrew changed = 1; 14189eff1a38SJesse Larrew } 14199eff1a38SJesse Larrew 14209a013361SMichael Wang out: 142130c05350SNathan Fontenot kfree(updates); 142279c5fcebSJesse Larrew return changed; 14239eff1a38SJesse Larrew } 14249eff1a38SJesse Larrew 14253e401f7aSThiago Jung Bauermann int arch_update_cpu_topology(void) 14263e401f7aSThiago Jung Bauermann { 14273e401f7aSThiago Jung Bauermann return numa_update_cpu_topology(true); 14283e401f7aSThiago Jung Bauermann } 14293e401f7aSThiago Jung Bauermann 14309eff1a38SJesse Larrew static void topology_work_fn(struct work_struct *work) 14319eff1a38SJesse Larrew { 14329eff1a38SJesse Larrew rebuild_sched_domains(); 14339eff1a38SJesse Larrew } 14349eff1a38SJesse Larrew static DECLARE_WORK(topology_work, topology_work_fn); 14359eff1a38SJesse Larrew 1436ec32dd66SRobert Jennings static void topology_schedule_update(void) 14379eff1a38SJesse Larrew { 14389eff1a38SJesse Larrew schedule_work(&topology_work); 14399eff1a38SJesse Larrew } 14409eff1a38SJesse Larrew 1441df7e828cSKees Cook static void topology_timer_fn(struct timer_list *unused) 14429eff1a38SJesse Larrew { 14435d88aa85SJesse Larrew if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) 14445d88aa85SJesse Larrew topology_schedule_update(); 14455d88aa85SJesse Larrew else if (vphn_enabled) { 14469eff1a38SJesse Larrew if (update_cpu_associativity_changes_mask() > 0) 14479eff1a38SJesse Larrew topology_schedule_update(); 14485d88aa85SJesse Larrew reset_topology_timer(); 14495d88aa85SJesse Larrew } 14509eff1a38SJesse Larrew } 1451df7e828cSKees Cook static struct timer_list topology_timer; 14529eff1a38SJesse Larrew 14535d88aa85SJesse Larrew static void reset_topology_timer(void) 14549eff1a38SJesse Larrew { 1455*8604895aSMichael Bringmann if (vphn_enabled) 14565b0e2cb0SLinus Torvalds mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); 14579eff1a38SJesse Larrew } 14589eff1a38SJesse Larrew 1459601abdc3SNathan Fontenot #ifdef CONFIG_SMP 1460601abdc3SNathan Fontenot 14615d88aa85SJesse Larrew static void stage_topology_update(int core_id) 14625d88aa85SJesse Larrew { 14635d88aa85SJesse Larrew cpumask_or(&cpu_associativity_changes_mask, 14645d88aa85SJesse Larrew &cpu_associativity_changes_mask, cpu_sibling_mask(core_id)); 14655d88aa85SJesse Larrew reset_topology_timer(); 14665d88aa85SJesse Larrew } 14675d88aa85SJesse Larrew 14685d88aa85SJesse Larrew static int dt_update_callback(struct notifier_block *nb, 14695d88aa85SJesse Larrew unsigned long action, void *data) 14705d88aa85SJesse Larrew { 1471f5242e5aSGrant Likely struct of_reconfig_data *update = data; 14725d88aa85SJesse Larrew int rc = NOTIFY_DONE; 14735d88aa85SJesse Larrew 14745d88aa85SJesse Larrew switch (action) { 14755d88aa85SJesse Larrew case OF_RECONFIG_UPDATE_PROPERTY: 147630c05350SNathan Fontenot if (!of_prop_cmp(update->dn->type, "cpu") && 147730c05350SNathan Fontenot !of_prop_cmp(update->prop->name, "ibm,associativity")) { 14785d88aa85SJesse Larrew u32 core_id; 14795d88aa85SJesse Larrew of_property_read_u32(update->dn, "reg", &core_id); 14805d88aa85SJesse Larrew stage_topology_update(core_id); 14815d88aa85SJesse Larrew rc = NOTIFY_OK; 14825d88aa85SJesse Larrew } 14835d88aa85SJesse Larrew break; 14845d88aa85SJesse Larrew } 14855d88aa85SJesse Larrew 14865d88aa85SJesse Larrew return rc; 14875d88aa85SJesse Larrew } 14885d88aa85SJesse Larrew 14895d88aa85SJesse Larrew static struct notifier_block dt_update_nb = { 14905d88aa85SJesse Larrew .notifier_call = dt_update_callback, 14915d88aa85SJesse Larrew }; 14925d88aa85SJesse Larrew 1493601abdc3SNathan Fontenot #endif 1494601abdc3SNathan Fontenot 14959eff1a38SJesse Larrew /* 14965d88aa85SJesse Larrew * Start polling for associativity changes. 14979eff1a38SJesse Larrew */ 14989eff1a38SJesse Larrew int start_topology_update(void) 14999eff1a38SJesse Larrew { 15009eff1a38SJesse Larrew int rc = 0; 15019eff1a38SJesse Larrew 15025d88aa85SJesse Larrew if (firmware_has_feature(FW_FEATURE_PRRN)) { 15035d88aa85SJesse Larrew if (!prrn_enabled) { 15045d88aa85SJesse Larrew prrn_enabled = 1; 1505601abdc3SNathan Fontenot #ifdef CONFIG_SMP 15065d88aa85SJesse Larrew rc = of_reconfig_notifier_register(&dt_update_nb); 1507601abdc3SNathan Fontenot #endif 15085d88aa85SJesse Larrew } 1509a3496e91SMichael Bringmann } 1510a3496e91SMichael Bringmann if (firmware_has_feature(FW_FEATURE_VPHN) && 1511f13c13a0SAnton Blanchard lppaca_shared_proc(get_lppaca())) { 15125d88aa85SJesse Larrew if (!vphn_enabled) { 15139eff1a38SJesse Larrew vphn_enabled = 1; 15149eff1a38SJesse Larrew setup_cpu_associativity_change_counters(); 1515df7e828cSKees Cook timer_setup(&topology_timer, topology_timer_fn, 1516df7e828cSKees Cook TIMER_DEFERRABLE); 15175d88aa85SJesse Larrew reset_topology_timer(); 15185d88aa85SJesse Larrew } 15199eff1a38SJesse Larrew } 15209eff1a38SJesse Larrew 15219eff1a38SJesse Larrew return rc; 15229eff1a38SJesse Larrew } 15239eff1a38SJesse Larrew 15249eff1a38SJesse Larrew /* 15259eff1a38SJesse Larrew * Disable polling for VPHN associativity changes. 15269eff1a38SJesse Larrew */ 15279eff1a38SJesse Larrew int stop_topology_update(void) 15289eff1a38SJesse Larrew { 15295d88aa85SJesse Larrew int rc = 0; 15305d88aa85SJesse Larrew 15315d88aa85SJesse Larrew if (prrn_enabled) { 15325d88aa85SJesse Larrew prrn_enabled = 0; 1533601abdc3SNathan Fontenot #ifdef CONFIG_SMP 15345d88aa85SJesse Larrew rc = of_reconfig_notifier_unregister(&dt_update_nb); 1535601abdc3SNathan Fontenot #endif 1536a3496e91SMichael Bringmann } 1537a3496e91SMichael Bringmann if (vphn_enabled) { 15389eff1a38SJesse Larrew vphn_enabled = 0; 15395d88aa85SJesse Larrew rc = del_timer_sync(&topology_timer); 15409eff1a38SJesse Larrew } 15415d88aa85SJesse Larrew 15425d88aa85SJesse Larrew return rc; 1543ab1f9dacSPaul Mackerras } 1544e04fa612SNathan Fontenot 1545e04fa612SNathan Fontenot int prrn_is_enabled(void) 1546e04fa612SNathan Fontenot { 1547e04fa612SNathan Fontenot return prrn_enabled; 1548e04fa612SNathan Fontenot } 1549e04fa612SNathan Fontenot 15502ea62630SSrikar Dronamraju void __init shared_proc_topology_init(void) 15512ea62630SSrikar Dronamraju { 15522ea62630SSrikar Dronamraju if (lppaca_shared_proc(get_lppaca())) { 15532ea62630SSrikar Dronamraju bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask), 15542ea62630SSrikar Dronamraju nr_cpumask_bits); 15552ea62630SSrikar Dronamraju numa_update_cpu_topology(false); 15562ea62630SSrikar Dronamraju } 15572ea62630SSrikar Dronamraju } 15582ea62630SSrikar Dronamraju 1559e04fa612SNathan Fontenot static int topology_read(struct seq_file *file, void *v) 1560e04fa612SNathan Fontenot { 1561e04fa612SNathan Fontenot if (vphn_enabled || prrn_enabled) 1562e04fa612SNathan Fontenot seq_puts(file, "on\n"); 1563e04fa612SNathan Fontenot else 1564e04fa612SNathan Fontenot seq_puts(file, "off\n"); 1565e04fa612SNathan Fontenot 1566e04fa612SNathan Fontenot return 0; 1567e04fa612SNathan Fontenot } 1568e04fa612SNathan Fontenot 1569e04fa612SNathan Fontenot static int topology_open(struct inode *inode, struct file *file) 1570e04fa612SNathan Fontenot { 1571e04fa612SNathan Fontenot return single_open(file, topology_read, NULL); 1572e04fa612SNathan Fontenot } 1573e04fa612SNathan Fontenot 1574e04fa612SNathan Fontenot static ssize_t topology_write(struct file *file, const char __user *buf, 1575e04fa612SNathan Fontenot size_t count, loff_t *off) 1576e04fa612SNathan Fontenot { 1577e04fa612SNathan Fontenot char kbuf[4]; /* "on" or "off" plus null. */ 1578e04fa612SNathan Fontenot int read_len; 1579e04fa612SNathan Fontenot 1580e04fa612SNathan Fontenot read_len = count < 3 ? count : 3; 1581e04fa612SNathan Fontenot if (copy_from_user(kbuf, buf, read_len)) 1582e04fa612SNathan Fontenot return -EINVAL; 1583e04fa612SNathan Fontenot 1584e04fa612SNathan Fontenot kbuf[read_len] = '\0'; 1585e04fa612SNathan Fontenot 1586e04fa612SNathan Fontenot if (!strncmp(kbuf, "on", 2)) 1587e04fa612SNathan Fontenot start_topology_update(); 1588e04fa612SNathan Fontenot else if (!strncmp(kbuf, "off", 3)) 1589e04fa612SNathan Fontenot stop_topology_update(); 1590e04fa612SNathan Fontenot else 1591e04fa612SNathan Fontenot return -EINVAL; 1592e04fa612SNathan Fontenot 1593e04fa612SNathan Fontenot return count; 1594e04fa612SNathan Fontenot } 1595e04fa612SNathan Fontenot 1596e04fa612SNathan Fontenot static const struct file_operations topology_ops = { 1597e04fa612SNathan Fontenot .read = seq_read, 1598e04fa612SNathan Fontenot .write = topology_write, 1599e04fa612SNathan Fontenot .open = topology_open, 1600e04fa612SNathan Fontenot .release = single_release 1601e04fa612SNathan Fontenot }; 1602e04fa612SNathan Fontenot 1603e04fa612SNathan Fontenot static int topology_update_init(void) 1604e04fa612SNathan Fontenot { 16052d73bae1SNishanth Aravamudan /* Do not poll for changes if disabled at boot */ 16062d73bae1SNishanth Aravamudan if (topology_updates_enabled) 1607e04fa612SNathan Fontenot start_topology_update(); 16082d73bae1SNishanth Aravamudan 160917f444c0SMichael Bringmann if (vphn_enabled) 161017f444c0SMichael Bringmann topology_schedule_update(); 161117f444c0SMichael Bringmann 16122d15b9b4SNishanth Aravamudan if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops)) 16132d15b9b4SNishanth Aravamudan return -ENOMEM; 1614e04fa612SNathan Fontenot 161517f444c0SMichael Bringmann topology_inited = 1; 1616e04fa612SNathan Fontenot return 0; 1617e04fa612SNathan Fontenot } 1618e04fa612SNathan Fontenot device_initcall(topology_update_init); 161939bf990eSJesse Larrew #endif /* CONFIG_PPC_SPLPAR */ 1620