1ab1f9dacSPaul Mackerras /* 2ab1f9dacSPaul Mackerras * pSeries NUMA support 3ab1f9dacSPaul Mackerras * 4ab1f9dacSPaul Mackerras * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 5ab1f9dacSPaul Mackerras * 6ab1f9dacSPaul Mackerras * This program is free software; you can redistribute it and/or 7ab1f9dacSPaul Mackerras * modify it under the terms of the GNU General Public License 8ab1f9dacSPaul Mackerras * as published by the Free Software Foundation; either version 9ab1f9dacSPaul Mackerras * 2 of the License, or (at your option) any later version. 10ab1f9dacSPaul Mackerras */ 112d73bae1SNishanth Aravamudan #define pr_fmt(fmt) "numa: " fmt 122d73bae1SNishanth Aravamudan 13ab1f9dacSPaul Mackerras #include <linux/threads.h> 14ab1f9dacSPaul Mackerras #include <linux/bootmem.h> 15ab1f9dacSPaul Mackerras #include <linux/init.h> 16ab1f9dacSPaul Mackerras #include <linux/mm.h> 17ab1f9dacSPaul Mackerras #include <linux/mmzone.h> 184b16f8e2SPaul Gortmaker #include <linux/export.h> 19ab1f9dacSPaul Mackerras #include <linux/nodemask.h> 20ab1f9dacSPaul Mackerras #include <linux/cpu.h> 21ab1f9dacSPaul Mackerras #include <linux/notifier.h> 2295f72d1eSYinghai Lu #include <linux/memblock.h> 236df1646eSMichael Ellerman #include <linux/of.h> 2406eccea6SDave Hansen #include <linux/pfn.h> 259eff1a38SJesse Larrew #include <linux/cpuset.h> 269eff1a38SJesse Larrew #include <linux/node.h> 2730c05350SNathan Fontenot #include <linux/stop_machine.h> 28e04fa612SNathan Fontenot #include <linux/proc_fs.h> 29e04fa612SNathan Fontenot #include <linux/seq_file.h> 30e04fa612SNathan Fontenot #include <linux/uaccess.h> 31191a7120SLinus Torvalds #include <linux/slab.h> 323be7db6aSRobert Jennings #include <asm/cputhreads.h> 3345fb6ceaSAnton Blanchard #include <asm/sparsemem.h> 34d9b2b2a2SDavid S. Miller #include <asm/prom.h> 352249ca9dSPaul Mackerras #include <asm/smp.h> 36d4edc5b6SSrivatsa S. Bhat #include <asm/cputhreads.h> 37d4edc5b6SSrivatsa S. Bhat #include <asm/topology.h> 389eff1a38SJesse Larrew #include <asm/firmware.h> 399eff1a38SJesse Larrew #include <asm/paca.h> 4039bf990eSJesse Larrew #include <asm/hvcall.h> 41ae3a197eSDavid Howells #include <asm/setup.h> 42176bbf14SJesse Larrew #include <asm/vdso.h> 43ab1f9dacSPaul Mackerras 44ab1f9dacSPaul Mackerras static int numa_enabled = 1; 45ab1f9dacSPaul Mackerras 461daa6d08SBalbir Singh static char *cmdline __initdata; 471daa6d08SBalbir Singh 48ab1f9dacSPaul Mackerras static int numa_debug; 49ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 50ab1f9dacSPaul Mackerras 5145fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS]; 5225863de0SAnton Blanchard cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; 53ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES]; 5445fb6ceaSAnton Blanchard 5545fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table); 5625863de0SAnton Blanchard EXPORT_SYMBOL(node_to_cpumask_map); 5745fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data); 5845fb6ceaSAnton Blanchard 59ab1f9dacSPaul Mackerras static int min_common_depth; 60237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells; 6141eab6f8SAnton Blanchard static int form1_affinity; 6241eab6f8SAnton Blanchard 6341eab6f8SAnton Blanchard #define MAX_DISTANCE_REF_POINTS 4 6441eab6f8SAnton Blanchard static int distance_ref_points_depth; 65b08a2a12SAlistair Popple static const __be32 *distance_ref_points; 6641eab6f8SAnton Blanchard static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; 67ab1f9dacSPaul Mackerras 6825863de0SAnton Blanchard /* 6925863de0SAnton Blanchard * Allocate node_to_cpumask_map based on number of available nodes 7025863de0SAnton Blanchard * Requires node_possible_map to be valid. 7125863de0SAnton Blanchard * 729512938bSWanlong Gao * Note: cpumask_of_node() is not valid until after this is done. 7325863de0SAnton Blanchard */ 7425863de0SAnton Blanchard static void __init setup_node_to_cpumask_map(void) 7525863de0SAnton Blanchard { 76f9d531b8SCody P Schafer unsigned int node; 7725863de0SAnton Blanchard 7825863de0SAnton Blanchard /* setup nr_node_ids if not done yet */ 79f9d531b8SCody P Schafer if (nr_node_ids == MAX_NUMNODES) 80f9d531b8SCody P Schafer setup_nr_node_ids(); 8125863de0SAnton Blanchard 8225863de0SAnton Blanchard /* allocate the map */ 83c118baf8SRaghavendra K T for_each_node(node) 8425863de0SAnton Blanchard alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); 8525863de0SAnton Blanchard 8625863de0SAnton Blanchard /* cpumask_of_node() will now work */ 8725863de0SAnton Blanchard dbg("Node to cpumask map for %d nodes\n", nr_node_ids); 8825863de0SAnton Blanchard } 8925863de0SAnton Blanchard 9055671f3cSStephen Rothwell static int __init fake_numa_create_new_node(unsigned long end_pfn, 911daa6d08SBalbir Singh unsigned int *nid) 921daa6d08SBalbir Singh { 931daa6d08SBalbir Singh unsigned long long mem; 941daa6d08SBalbir Singh char *p = cmdline; 951daa6d08SBalbir Singh static unsigned int fake_nid; 961daa6d08SBalbir Singh static unsigned long long curr_boundary; 971daa6d08SBalbir Singh 981daa6d08SBalbir Singh /* 991daa6d08SBalbir Singh * Modify node id, iff we started creating NUMA nodes 1001daa6d08SBalbir Singh * We want to continue from where we left of the last time 1011daa6d08SBalbir Singh */ 1021daa6d08SBalbir Singh if (fake_nid) 1031daa6d08SBalbir Singh *nid = fake_nid; 1041daa6d08SBalbir Singh /* 1051daa6d08SBalbir Singh * In case there are no more arguments to parse, the 1061daa6d08SBalbir Singh * node_id should be the same as the last fake node id 1071daa6d08SBalbir Singh * (we've handled this above). 1081daa6d08SBalbir Singh */ 1091daa6d08SBalbir Singh if (!p) 1101daa6d08SBalbir Singh return 0; 1111daa6d08SBalbir Singh 1121daa6d08SBalbir Singh mem = memparse(p, &p); 1131daa6d08SBalbir Singh if (!mem) 1141daa6d08SBalbir Singh return 0; 1151daa6d08SBalbir Singh 1161daa6d08SBalbir Singh if (mem < curr_boundary) 1171daa6d08SBalbir Singh return 0; 1181daa6d08SBalbir Singh 1191daa6d08SBalbir Singh curr_boundary = mem; 1201daa6d08SBalbir Singh 1211daa6d08SBalbir Singh if ((end_pfn << PAGE_SHIFT) > mem) { 1221daa6d08SBalbir Singh /* 1231daa6d08SBalbir Singh * Skip commas and spaces 1241daa6d08SBalbir Singh */ 1251daa6d08SBalbir Singh while (*p == ',' || *p == ' ' || *p == '\t') 1261daa6d08SBalbir Singh p++; 1271daa6d08SBalbir Singh 1281daa6d08SBalbir Singh cmdline = p; 1291daa6d08SBalbir Singh fake_nid++; 1301daa6d08SBalbir Singh *nid = fake_nid; 1311daa6d08SBalbir Singh dbg("created new fake_node with id %d\n", fake_nid); 1321daa6d08SBalbir Singh return 1; 1331daa6d08SBalbir Singh } 1341daa6d08SBalbir Singh return 0; 1351daa6d08SBalbir Singh } 1361daa6d08SBalbir Singh 137d4edc5b6SSrivatsa S. Bhat static void reset_numa_cpu_lookup_table(void) 138d4edc5b6SSrivatsa S. Bhat { 139d4edc5b6SSrivatsa S. Bhat unsigned int cpu; 140d4edc5b6SSrivatsa S. Bhat 141d4edc5b6SSrivatsa S. Bhat for_each_possible_cpu(cpu) 142d4edc5b6SSrivatsa S. Bhat numa_cpu_lookup_table[cpu] = -1; 143d4edc5b6SSrivatsa S. Bhat } 144d4edc5b6SSrivatsa S. Bhat 145d4edc5b6SSrivatsa S. Bhat static void update_numa_cpu_lookup_table(unsigned int cpu, int node) 146ab1f9dacSPaul Mackerras { 147ab1f9dacSPaul Mackerras numa_cpu_lookup_table[cpu] = node; 148d4edc5b6SSrivatsa S. Bhat } 149d4edc5b6SSrivatsa S. Bhat 150d4edc5b6SSrivatsa S. Bhat static void map_cpu_to_node(int cpu, int node) 151d4edc5b6SSrivatsa S. Bhat { 152d4edc5b6SSrivatsa S. Bhat update_numa_cpu_lookup_table(cpu, node); 15345fb6ceaSAnton Blanchard 154bf4b85b0SNathan Lynch dbg("adding cpu %d to node %d\n", cpu, node); 155bf4b85b0SNathan Lynch 15625863de0SAnton Blanchard if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) 15725863de0SAnton Blanchard cpumask_set_cpu(cpu, node_to_cpumask_map[node]); 158ab1f9dacSPaul Mackerras } 159ab1f9dacSPaul Mackerras 16039bf990eSJesse Larrew #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR) 161ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu) 162ab1f9dacSPaul Mackerras { 163ab1f9dacSPaul Mackerras int node = numa_cpu_lookup_table[cpu]; 164ab1f9dacSPaul Mackerras 165ab1f9dacSPaul Mackerras dbg("removing cpu %lu from node %d\n", cpu, node); 166ab1f9dacSPaul Mackerras 16725863de0SAnton Blanchard if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { 168429f4d8dSAnton Blanchard cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); 169ab1f9dacSPaul Mackerras } else { 170ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 171ab1f9dacSPaul Mackerras cpu, node); 172ab1f9dacSPaul Mackerras } 173ab1f9dacSPaul Mackerras } 17439bf990eSJesse Larrew #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ 175ab1f9dacSPaul Mackerras 176ab1f9dacSPaul Mackerras /* must hold reference to node during call */ 177b08a2a12SAlistair Popple static const __be32 *of_get_associativity(struct device_node *dev) 178ab1f9dacSPaul Mackerras { 179e2eb6392SStephen Rothwell return of_get_property(dev, "ibm,associativity", NULL); 180ab1f9dacSPaul Mackerras } 181ab1f9dacSPaul Mackerras 182cf00085dSChandru /* 183cf00085dSChandru * Returns the property linux,drconf-usable-memory if 184cf00085dSChandru * it exists (the property exists only in kexec/kdump kernels, 185cf00085dSChandru * added by kexec-tools) 186cf00085dSChandru */ 187b08a2a12SAlistair Popple static const __be32 *of_get_usable_memory(struct device_node *memory) 188cf00085dSChandru { 189b08a2a12SAlistair Popple const __be32 *prop; 190cf00085dSChandru u32 len; 191cf00085dSChandru prop = of_get_property(memory, "linux,drconf-usable-memory", &len); 192cf00085dSChandru if (!prop || len < sizeof(unsigned int)) 193ec32dd66SRobert Jennings return NULL; 194cf00085dSChandru return prop; 195cf00085dSChandru } 196cf00085dSChandru 19741eab6f8SAnton Blanchard int __node_distance(int a, int b) 19841eab6f8SAnton Blanchard { 19941eab6f8SAnton Blanchard int i; 20041eab6f8SAnton Blanchard int distance = LOCAL_DISTANCE; 20141eab6f8SAnton Blanchard 20241eab6f8SAnton Blanchard if (!form1_affinity) 2037122beeeSVaidyanathan Srinivasan return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE); 20441eab6f8SAnton Blanchard 20541eab6f8SAnton Blanchard for (i = 0; i < distance_ref_points_depth; i++) { 20641eab6f8SAnton Blanchard if (distance_lookup_table[a][i] == distance_lookup_table[b][i]) 20741eab6f8SAnton Blanchard break; 20841eab6f8SAnton Blanchard 20941eab6f8SAnton Blanchard /* Double the distance for each NUMA level */ 21041eab6f8SAnton Blanchard distance *= 2; 21141eab6f8SAnton Blanchard } 21241eab6f8SAnton Blanchard 21341eab6f8SAnton Blanchard return distance; 21441eab6f8SAnton Blanchard } 21512c743ebSMike Qiu EXPORT_SYMBOL(__node_distance); 21641eab6f8SAnton Blanchard 21741eab6f8SAnton Blanchard static void initialize_distance_lookup_table(int nid, 218b08a2a12SAlistair Popple const __be32 *associativity) 21941eab6f8SAnton Blanchard { 22041eab6f8SAnton Blanchard int i; 22141eab6f8SAnton Blanchard 22241eab6f8SAnton Blanchard if (!form1_affinity) 22341eab6f8SAnton Blanchard return; 22441eab6f8SAnton Blanchard 22541eab6f8SAnton Blanchard for (i = 0; i < distance_ref_points_depth; i++) { 226b08a2a12SAlistair Popple const __be32 *entry; 227b08a2a12SAlistair Popple 2281d805440SNikunj A Dadhania entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1]; 229b08a2a12SAlistair Popple distance_lookup_table[nid][i] = of_read_number(entry, 1); 23041eab6f8SAnton Blanchard } 23141eab6f8SAnton Blanchard } 23241eab6f8SAnton Blanchard 233482ec7c4SNathan Lynch /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa 234482ec7c4SNathan Lynch * info is found. 235482ec7c4SNathan Lynch */ 236b08a2a12SAlistair Popple static int associativity_to_nid(const __be32 *associativity) 237ab1f9dacSPaul Mackerras { 238482ec7c4SNathan Lynch int nid = -1; 239ab1f9dacSPaul Mackerras 240ab1f9dacSPaul Mackerras if (min_common_depth == -1) 241482ec7c4SNathan Lynch goto out; 242ab1f9dacSPaul Mackerras 243b08a2a12SAlistair Popple if (of_read_number(associativity, 1) >= min_common_depth) 244b08a2a12SAlistair Popple nid = of_read_number(&associativity[min_common_depth], 1); 245bc16a759SNathan Lynch 246bc16a759SNathan Lynch /* POWER4 LPAR uses 0xffff as invalid node */ 247482ec7c4SNathan Lynch if (nid == 0xffff || nid >= MAX_NUMNODES) 248482ec7c4SNathan Lynch nid = -1; 24941eab6f8SAnton Blanchard 250b08a2a12SAlistair Popple if (nid > 0 && 2511d805440SNikunj A Dadhania of_read_number(associativity, 1) >= distance_ref_points_depth) { 2521d805440SNikunj A Dadhania /* 2531d805440SNikunj A Dadhania * Skip the length field and send start of associativity array 2541d805440SNikunj A Dadhania */ 2551d805440SNikunj A Dadhania initialize_distance_lookup_table(nid, associativity + 1); 2561d805440SNikunj A Dadhania } 25741eab6f8SAnton Blanchard 258482ec7c4SNathan Lynch out: 259cf950b7aSNathan Lynch return nid; 260ab1f9dacSPaul Mackerras } 261ab1f9dacSPaul Mackerras 2629eff1a38SJesse Larrew /* Returns the nid associated with the given device tree node, 2639eff1a38SJesse Larrew * or -1 if not found. 2649eff1a38SJesse Larrew */ 2659eff1a38SJesse Larrew static int of_node_to_nid_single(struct device_node *device) 2669eff1a38SJesse Larrew { 2679eff1a38SJesse Larrew int nid = -1; 268b08a2a12SAlistair Popple const __be32 *tmp; 2699eff1a38SJesse Larrew 2709eff1a38SJesse Larrew tmp = of_get_associativity(device); 2719eff1a38SJesse Larrew if (tmp) 2729eff1a38SJesse Larrew nid = associativity_to_nid(tmp); 2739eff1a38SJesse Larrew return nid; 2749eff1a38SJesse Larrew } 2759eff1a38SJesse Larrew 276953039c8SJeremy Kerr /* Walk the device tree upwards, looking for an associativity id */ 277953039c8SJeremy Kerr int of_node_to_nid(struct device_node *device) 278953039c8SJeremy Kerr { 279953039c8SJeremy Kerr int nid = -1; 280953039c8SJeremy Kerr 281953039c8SJeremy Kerr of_node_get(device); 282953039c8SJeremy Kerr while (device) { 283953039c8SJeremy Kerr nid = of_node_to_nid_single(device); 284953039c8SJeremy Kerr if (nid != -1) 285953039c8SJeremy Kerr break; 286953039c8SJeremy Kerr 2871def3758SChristophe Jaillet device = of_get_next_parent(device); 288953039c8SJeremy Kerr } 289953039c8SJeremy Kerr of_node_put(device); 290953039c8SJeremy Kerr 291953039c8SJeremy Kerr return nid; 292953039c8SJeremy Kerr } 293be9ba9ffSShailendra Singh EXPORT_SYMBOL(of_node_to_nid); 294953039c8SJeremy Kerr 295ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void) 296ab1f9dacSPaul Mackerras { 29741eab6f8SAnton Blanchard int depth; 298e70606ebSMichael Ellerman struct device_node *root; 299ab1f9dacSPaul Mackerras 3001c8ee733SDipankar Sarma if (firmware_has_feature(FW_FEATURE_OPAL)) 3011c8ee733SDipankar Sarma root = of_find_node_by_path("/ibm,opal"); 3021c8ee733SDipankar Sarma else 303e70606ebSMichael Ellerman root = of_find_node_by_path("/rtas"); 304e70606ebSMichael Ellerman if (!root) 305e70606ebSMichael Ellerman root = of_find_node_by_path("/"); 306ab1f9dacSPaul Mackerras 307ab1f9dacSPaul Mackerras /* 30841eab6f8SAnton Blanchard * This property is a set of 32-bit integers, each representing 30941eab6f8SAnton Blanchard * an index into the ibm,associativity nodes. 31041eab6f8SAnton Blanchard * 31141eab6f8SAnton Blanchard * With form 0 affinity the first integer is for an SMP configuration 31241eab6f8SAnton Blanchard * (should be all 0's) and the second is for a normal NUMA 31341eab6f8SAnton Blanchard * configuration. We have only one level of NUMA. 31441eab6f8SAnton Blanchard * 31541eab6f8SAnton Blanchard * With form 1 affinity the first integer is the most significant 31641eab6f8SAnton Blanchard * NUMA boundary and the following are progressively less significant 31741eab6f8SAnton Blanchard * boundaries. There can be more than one level of NUMA. 318ab1f9dacSPaul Mackerras */ 319e70606ebSMichael Ellerman distance_ref_points = of_get_property(root, 32041eab6f8SAnton Blanchard "ibm,associativity-reference-points", 32141eab6f8SAnton Blanchard &distance_ref_points_depth); 322ab1f9dacSPaul Mackerras 32341eab6f8SAnton Blanchard if (!distance_ref_points) { 32441eab6f8SAnton Blanchard dbg("NUMA: ibm,associativity-reference-points not found.\n"); 32541eab6f8SAnton Blanchard goto err; 32641eab6f8SAnton Blanchard } 32741eab6f8SAnton Blanchard 32841eab6f8SAnton Blanchard distance_ref_points_depth /= sizeof(int); 32941eab6f8SAnton Blanchard 3308002b0c5SNathan Fontenot if (firmware_has_feature(FW_FEATURE_OPAL) || 3318002b0c5SNathan Fontenot firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) { 332bc8449ccSAnton Blanchard dbg("Using form 1 affinity\n"); 33341eab6f8SAnton Blanchard form1_affinity = 1; 3344b83c330SAnton Blanchard } 3355b958a7eSGavin Shan 33641eab6f8SAnton Blanchard if (form1_affinity) { 337b08a2a12SAlistair Popple depth = of_read_number(distance_ref_points, 1); 338ab1f9dacSPaul Mackerras } else { 33941eab6f8SAnton Blanchard if (distance_ref_points_depth < 2) { 34041eab6f8SAnton Blanchard printk(KERN_WARNING "NUMA: " 34141eab6f8SAnton Blanchard "short ibm,associativity-reference-points\n"); 34241eab6f8SAnton Blanchard goto err; 343ab1f9dacSPaul Mackerras } 344ab1f9dacSPaul Mackerras 345b08a2a12SAlistair Popple depth = of_read_number(&distance_ref_points[1], 1); 34641eab6f8SAnton Blanchard } 34741eab6f8SAnton Blanchard 34841eab6f8SAnton Blanchard /* 34941eab6f8SAnton Blanchard * Warn and cap if the hardware supports more than 35041eab6f8SAnton Blanchard * MAX_DISTANCE_REF_POINTS domains. 35141eab6f8SAnton Blanchard */ 35241eab6f8SAnton Blanchard if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) { 35341eab6f8SAnton Blanchard printk(KERN_WARNING "NUMA: distance array capped at " 35441eab6f8SAnton Blanchard "%d entries\n", MAX_DISTANCE_REF_POINTS); 35541eab6f8SAnton Blanchard distance_ref_points_depth = MAX_DISTANCE_REF_POINTS; 35641eab6f8SAnton Blanchard } 35741eab6f8SAnton Blanchard 358e70606ebSMichael Ellerman of_node_put(root); 359ab1f9dacSPaul Mackerras return depth; 36041eab6f8SAnton Blanchard 36141eab6f8SAnton Blanchard err: 362e70606ebSMichael Ellerman of_node_put(root); 36341eab6f8SAnton Blanchard return -1; 364ab1f9dacSPaul Mackerras } 365ab1f9dacSPaul Mackerras 36684c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 367ab1f9dacSPaul Mackerras { 368ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 369ab1f9dacSPaul Mackerras 370ab1f9dacSPaul Mackerras memory = of_find_node_by_type(memory, "memory"); 37154c23310SPaul Mackerras if (!memory) 37284c9fdd1SMike Kravetz panic("numa.c: No memory nodes found!"); 37354c23310SPaul Mackerras 374a8bda5ddSStephen Rothwell *n_addr_cells = of_n_addr_cells(memory); 3759213feeaSStephen Rothwell *n_size_cells = of_n_size_cells(memory); 37684c9fdd1SMike Kravetz of_node_put(memory); 377ab1f9dacSPaul Mackerras } 378ab1f9dacSPaul Mackerras 379b08a2a12SAlistair Popple static unsigned long read_n_cells(int n, const __be32 **buf) 380ab1f9dacSPaul Mackerras { 381ab1f9dacSPaul Mackerras unsigned long result = 0; 382ab1f9dacSPaul Mackerras 383ab1f9dacSPaul Mackerras while (n--) { 384b08a2a12SAlistair Popple result = (result << 32) | of_read_number(*buf, 1); 385ab1f9dacSPaul Mackerras (*buf)++; 386ab1f9dacSPaul Mackerras } 387ab1f9dacSPaul Mackerras return result; 388ab1f9dacSPaul Mackerras } 389ab1f9dacSPaul Mackerras 3908342681dSNathan Fontenot /* 39195f72d1eSYinghai Lu * Read the next memblock list entry from the ibm,dynamic-memory property 3928342681dSNathan Fontenot * and return the information in the provided of_drconf_cell structure. 3938342681dSNathan Fontenot */ 394b08a2a12SAlistair Popple static void read_drconf_cell(struct of_drconf_cell *drmem, const __be32 **cellp) 3958342681dSNathan Fontenot { 396b08a2a12SAlistair Popple const __be32 *cp; 3978342681dSNathan Fontenot 3988342681dSNathan Fontenot drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp); 3998342681dSNathan Fontenot 4008342681dSNathan Fontenot cp = *cellp; 401b08a2a12SAlistair Popple drmem->drc_index = of_read_number(cp, 1); 402b08a2a12SAlistair Popple drmem->reserved = of_read_number(&cp[1], 1); 403b08a2a12SAlistair Popple drmem->aa_index = of_read_number(&cp[2], 1); 404b08a2a12SAlistair Popple drmem->flags = of_read_number(&cp[3], 1); 4058342681dSNathan Fontenot 4068342681dSNathan Fontenot *cellp = cp + 4; 4078342681dSNathan Fontenot } 4088342681dSNathan Fontenot 4098342681dSNathan Fontenot /* 41025985edcSLucas De Marchi * Retrieve and validate the ibm,dynamic-memory property of the device tree. 4118342681dSNathan Fontenot * 41295f72d1eSYinghai Lu * The layout of the ibm,dynamic-memory property is a number N of memblock 41395f72d1eSYinghai Lu * list entries followed by N memblock list entries. Each memblock list entry 41425985edcSLucas De Marchi * contains information as laid out in the of_drconf_cell struct above. 4158342681dSNathan Fontenot */ 416b08a2a12SAlistair Popple static int of_get_drconf_memory(struct device_node *memory, const __be32 **dm) 4178342681dSNathan Fontenot { 418b08a2a12SAlistair Popple const __be32 *prop; 4198342681dSNathan Fontenot u32 len, entries; 4208342681dSNathan Fontenot 4218342681dSNathan Fontenot prop = of_get_property(memory, "ibm,dynamic-memory", &len); 4228342681dSNathan Fontenot if (!prop || len < sizeof(unsigned int)) 4238342681dSNathan Fontenot return 0; 4248342681dSNathan Fontenot 425b08a2a12SAlistair Popple entries = of_read_number(prop++, 1); 4268342681dSNathan Fontenot 4278342681dSNathan Fontenot /* Now that we know the number of entries, revalidate the size 4288342681dSNathan Fontenot * of the property read in to ensure we have everything 4298342681dSNathan Fontenot */ 4308342681dSNathan Fontenot if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int)) 4318342681dSNathan Fontenot return 0; 4328342681dSNathan Fontenot 4338342681dSNathan Fontenot *dm = prop; 4348342681dSNathan Fontenot return entries; 4358342681dSNathan Fontenot } 4368342681dSNathan Fontenot 4378342681dSNathan Fontenot /* 43825985edcSLucas De Marchi * Retrieve and validate the ibm,lmb-size property for drconf memory 4398342681dSNathan Fontenot * from the device tree. 4408342681dSNathan Fontenot */ 4413fdfd990SBenjamin Herrenschmidt static u64 of_get_lmb_size(struct device_node *memory) 4428342681dSNathan Fontenot { 443b08a2a12SAlistair Popple const __be32 *prop; 4448342681dSNathan Fontenot u32 len; 4458342681dSNathan Fontenot 4463fdfd990SBenjamin Herrenschmidt prop = of_get_property(memory, "ibm,lmb-size", &len); 4478342681dSNathan Fontenot if (!prop || len < sizeof(unsigned int)) 4488342681dSNathan Fontenot return 0; 4498342681dSNathan Fontenot 4508342681dSNathan Fontenot return read_n_cells(n_mem_size_cells, &prop); 4518342681dSNathan Fontenot } 4528342681dSNathan Fontenot 4538342681dSNathan Fontenot struct assoc_arrays { 4548342681dSNathan Fontenot u32 n_arrays; 4558342681dSNathan Fontenot u32 array_sz; 456b08a2a12SAlistair Popple const __be32 *arrays; 4578342681dSNathan Fontenot }; 4588342681dSNathan Fontenot 4598342681dSNathan Fontenot /* 46025985edcSLucas De Marchi * Retrieve and validate the list of associativity arrays for drconf 4618342681dSNathan Fontenot * memory from the ibm,associativity-lookup-arrays property of the 4628342681dSNathan Fontenot * device tree.. 4638342681dSNathan Fontenot * 4648342681dSNathan Fontenot * The layout of the ibm,associativity-lookup-arrays property is a number N 4658342681dSNathan Fontenot * indicating the number of associativity arrays, followed by a number M 4668342681dSNathan Fontenot * indicating the size of each associativity array, followed by a list 4678342681dSNathan Fontenot * of N associativity arrays. 4688342681dSNathan Fontenot */ 4698342681dSNathan Fontenot static int of_get_assoc_arrays(struct device_node *memory, 4708342681dSNathan Fontenot struct assoc_arrays *aa) 4718342681dSNathan Fontenot { 472b08a2a12SAlistair Popple const __be32 *prop; 4738342681dSNathan Fontenot u32 len; 4748342681dSNathan Fontenot 4758342681dSNathan Fontenot prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len); 4768342681dSNathan Fontenot if (!prop || len < 2 * sizeof(unsigned int)) 4778342681dSNathan Fontenot return -1; 4788342681dSNathan Fontenot 479b08a2a12SAlistair Popple aa->n_arrays = of_read_number(prop++, 1); 480b08a2a12SAlistair Popple aa->array_sz = of_read_number(prop++, 1); 4818342681dSNathan Fontenot 48242b2aa86SJustin P. Mattock /* Now that we know the number of arrays and size of each array, 4838342681dSNathan Fontenot * revalidate the size of the property read in. 4848342681dSNathan Fontenot */ 4858342681dSNathan Fontenot if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int)) 4868342681dSNathan Fontenot return -1; 4878342681dSNathan Fontenot 4888342681dSNathan Fontenot aa->arrays = prop; 4898342681dSNathan Fontenot return 0; 4908342681dSNathan Fontenot } 4918342681dSNathan Fontenot 4928342681dSNathan Fontenot /* 4938342681dSNathan Fontenot * This is like of_node_to_nid_single() for memory represented in the 4948342681dSNathan Fontenot * ibm,dynamic-reconfiguration-memory node. 4958342681dSNathan Fontenot */ 4968342681dSNathan Fontenot static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, 4978342681dSNathan Fontenot struct assoc_arrays *aa) 4988342681dSNathan Fontenot { 4998342681dSNathan Fontenot int default_nid = 0; 5008342681dSNathan Fontenot int nid = default_nid; 5018342681dSNathan Fontenot int index; 5028342681dSNathan Fontenot 5038342681dSNathan Fontenot if (min_common_depth > 0 && min_common_depth <= aa->array_sz && 5048342681dSNathan Fontenot !(drmem->flags & DRCONF_MEM_AI_INVALID) && 5058342681dSNathan Fontenot drmem->aa_index < aa->n_arrays) { 5068342681dSNathan Fontenot index = drmem->aa_index * aa->array_sz + min_common_depth - 1; 507b08a2a12SAlistair Popple nid = of_read_number(&aa->arrays[index], 1); 5088342681dSNathan Fontenot 5098342681dSNathan Fontenot if (nid == 0xffff || nid >= MAX_NUMNODES) 5108342681dSNathan Fontenot nid = default_nid; 5111d805440SNikunj A Dadhania 5121d805440SNikunj A Dadhania if (nid > 0) { 5131d805440SNikunj A Dadhania index = drmem->aa_index * aa->array_sz; 5141d805440SNikunj A Dadhania initialize_distance_lookup_table(nid, 5151d805440SNikunj A Dadhania &aa->arrays[index]); 5161d805440SNikunj A Dadhania } 5178342681dSNathan Fontenot } 5188342681dSNathan Fontenot 5198342681dSNathan Fontenot return nid; 5208342681dSNathan Fontenot } 5218342681dSNathan Fontenot 522ab1f9dacSPaul Mackerras /* 523ab1f9dacSPaul Mackerras * Figure out to which domain a cpu belongs and stick it there. 524ab1f9dacSPaul Mackerras * Return the id of the domain used. 525ab1f9dacSPaul Mackerras */ 526061d19f2SPaul Gortmaker static int numa_setup_cpu(unsigned long lcpu) 527ab1f9dacSPaul Mackerras { 528297cf502SLi Zhong int nid = -1; 529d4edc5b6SSrivatsa S. Bhat struct device_node *cpu; 530d4edc5b6SSrivatsa S. Bhat 531d4edc5b6SSrivatsa S. Bhat /* 532d4edc5b6SSrivatsa S. Bhat * If a valid cpu-to-node mapping is already available, use it 533d4edc5b6SSrivatsa S. Bhat * directly instead of querying the firmware, since it represents 534d4edc5b6SSrivatsa S. Bhat * the most recent mapping notified to us by the platform (eg: VPHN). 535d4edc5b6SSrivatsa S. Bhat */ 536d4edc5b6SSrivatsa S. Bhat if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) { 537d4edc5b6SSrivatsa S. Bhat map_cpu_to_node(lcpu, nid); 538d4edc5b6SSrivatsa S. Bhat return nid; 539d4edc5b6SSrivatsa S. Bhat } 540d4edc5b6SSrivatsa S. Bhat 541d4edc5b6SSrivatsa S. Bhat cpu = of_get_cpu_node(lcpu, NULL); 542ab1f9dacSPaul Mackerras 543ab1f9dacSPaul Mackerras if (!cpu) { 544ab1f9dacSPaul Mackerras WARN_ON(1); 545297cf502SLi Zhong if (cpu_present(lcpu)) 546297cf502SLi Zhong goto out_present; 547297cf502SLi Zhong else 548ab1f9dacSPaul Mackerras goto out; 549ab1f9dacSPaul Mackerras } 550ab1f9dacSPaul Mackerras 551953039c8SJeremy Kerr nid = of_node_to_nid_single(cpu); 552ab1f9dacSPaul Mackerras 553297cf502SLi Zhong out_present: 554482ec7c4SNathan Lynch if (nid < 0 || !node_online(nid)) 55572c33688SH Hartley Sweeten nid = first_online_node; 556297cf502SLi Zhong 557cf950b7aSNathan Lynch map_cpu_to_node(lcpu, nid); 558ab1f9dacSPaul Mackerras of_node_put(cpu); 559297cf502SLi Zhong out: 560cf950b7aSNathan Lynch return nid; 561ab1f9dacSPaul Mackerras } 562ab1f9dacSPaul Mackerras 56368fb18aaSSrivatsa S. Bhat static void verify_cpu_node_mapping(int cpu, int node) 56468fb18aaSSrivatsa S. Bhat { 56568fb18aaSSrivatsa S. Bhat int base, sibling, i; 56668fb18aaSSrivatsa S. Bhat 56768fb18aaSSrivatsa S. Bhat /* Verify that all the threads in the core belong to the same node */ 56868fb18aaSSrivatsa S. Bhat base = cpu_first_thread_sibling(cpu); 56968fb18aaSSrivatsa S. Bhat 57068fb18aaSSrivatsa S. Bhat for (i = 0; i < threads_per_core; i++) { 57168fb18aaSSrivatsa S. Bhat sibling = base + i; 57268fb18aaSSrivatsa S. Bhat 57368fb18aaSSrivatsa S. Bhat if (sibling == cpu || cpu_is_offline(sibling)) 57468fb18aaSSrivatsa S. Bhat continue; 57568fb18aaSSrivatsa S. Bhat 57668fb18aaSSrivatsa S. Bhat if (cpu_to_node(sibling) != node) { 57768fb18aaSSrivatsa S. Bhat WARN(1, "CPU thread siblings %d and %d don't belong" 57868fb18aaSSrivatsa S. Bhat " to the same node!\n", cpu, sibling); 57968fb18aaSSrivatsa S. Bhat break; 58068fb18aaSSrivatsa S. Bhat } 58168fb18aaSSrivatsa S. Bhat } 58268fb18aaSSrivatsa S. Bhat } 58368fb18aaSSrivatsa S. Bhat 584bdab88e0SSebastian Andrzej Siewior /* Must run before sched domains notifier. */ 585bdab88e0SSebastian Andrzej Siewior static int ppc_numa_cpu_prepare(unsigned int cpu) 586ab1f9dacSPaul Mackerras { 587bdab88e0SSebastian Andrzej Siewior int nid; 588ab1f9dacSPaul Mackerras 589bdab88e0SSebastian Andrzej Siewior nid = numa_setup_cpu(cpu); 590bdab88e0SSebastian Andrzej Siewior verify_cpu_node_mapping(cpu, nid); 591bdab88e0SSebastian Andrzej Siewior return 0; 592ab1f9dacSPaul Mackerras } 593bdab88e0SSebastian Andrzej Siewior 594bdab88e0SSebastian Andrzej Siewior static int ppc_numa_cpu_dead(unsigned int cpu) 595bdab88e0SSebastian Andrzej Siewior { 596bdab88e0SSebastian Andrzej Siewior #ifdef CONFIG_HOTPLUG_CPU 597bdab88e0SSebastian Andrzej Siewior unmap_cpu_from_node(cpu); 598bdab88e0SSebastian Andrzej Siewior #endif 599bdab88e0SSebastian Andrzej Siewior return 0; 600ab1f9dacSPaul Mackerras } 601ab1f9dacSPaul Mackerras 602ab1f9dacSPaul Mackerras /* 603ab1f9dacSPaul Mackerras * Check and possibly modify a memory region to enforce the memory limit. 604ab1f9dacSPaul Mackerras * 605ab1f9dacSPaul Mackerras * Returns the size the region should have to enforce the memory limit. 606ab1f9dacSPaul Mackerras * This will either be the original value of size, a truncated value, 607ab1f9dacSPaul Mackerras * or zero. If the returned value of size is 0 the region should be 60825985edcSLucas De Marchi * discarded as it lies wholly above the memory limit. 609ab1f9dacSPaul Mackerras */ 61045fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start, 61145fb6ceaSAnton Blanchard unsigned long size) 612ab1f9dacSPaul Mackerras { 613ab1f9dacSPaul Mackerras /* 61495f72d1eSYinghai Lu * We use memblock_end_of_DRAM() in here instead of memory_limit because 615ab1f9dacSPaul Mackerras * we've already adjusted it for the limit and it takes care of 616fe55249dSMilton Miller * having memory holes below the limit. Also, in the case of 617fe55249dSMilton Miller * iommu_is_off, memory_limit is not set but is implicitly enforced. 618ab1f9dacSPaul Mackerras */ 619ab1f9dacSPaul Mackerras 62095f72d1eSYinghai Lu if (start + size <= memblock_end_of_DRAM()) 621ab1f9dacSPaul Mackerras return size; 622ab1f9dacSPaul Mackerras 62395f72d1eSYinghai Lu if (start >= memblock_end_of_DRAM()) 624ab1f9dacSPaul Mackerras return 0; 625ab1f9dacSPaul Mackerras 62695f72d1eSYinghai Lu return memblock_end_of_DRAM() - start; 627ab1f9dacSPaul Mackerras } 628ab1f9dacSPaul Mackerras 6290204568aSPaul Mackerras /* 630cf00085dSChandru * Reads the counter for a given entry in 631cf00085dSChandru * linux,drconf-usable-memory property 632cf00085dSChandru */ 633b08a2a12SAlistair Popple static inline int __init read_usm_ranges(const __be32 **usm) 634cf00085dSChandru { 635cf00085dSChandru /* 6363fdfd990SBenjamin Herrenschmidt * For each lmb in ibm,dynamic-memory a corresponding 637cf00085dSChandru * entry in linux,drconf-usable-memory property contains 638cf00085dSChandru * a counter followed by that many (base, size) duple. 639cf00085dSChandru * read the counter from linux,drconf-usable-memory 640cf00085dSChandru */ 641cf00085dSChandru return read_n_cells(n_mem_size_cells, usm); 642cf00085dSChandru } 643cf00085dSChandru 644cf00085dSChandru /* 6450204568aSPaul Mackerras * Extract NUMA information from the ibm,dynamic-reconfiguration-memory 6460204568aSPaul Mackerras * node. This assumes n_mem_{addr,size}_cells have been set. 6470204568aSPaul Mackerras */ 6480204568aSPaul Mackerras static void __init parse_drconf_memory(struct device_node *memory) 6490204568aSPaul Mackerras { 650b08a2a12SAlistair Popple const __be32 *uninitialized_var(dm), *usm; 651cf00085dSChandru unsigned int n, rc, ranges, is_kexec_kdump = 0; 6523fdfd990SBenjamin Herrenschmidt unsigned long lmb_size, base, size, sz; 6538342681dSNathan Fontenot int nid; 654aa709f3bSBenjamin Herrenschmidt struct assoc_arrays aa = { .arrays = NULL }; 6550204568aSPaul Mackerras 6568342681dSNathan Fontenot n = of_get_drconf_memory(memory, &dm); 6578342681dSNathan Fontenot if (!n) 6580204568aSPaul Mackerras return; 6590204568aSPaul Mackerras 6603fdfd990SBenjamin Herrenschmidt lmb_size = of_get_lmb_size(memory); 6613fdfd990SBenjamin Herrenschmidt if (!lmb_size) 6628342681dSNathan Fontenot return; 6638342681dSNathan Fontenot 6648342681dSNathan Fontenot rc = of_get_assoc_arrays(memory, &aa); 6658342681dSNathan Fontenot if (rc) 6660204568aSPaul Mackerras return; 6670204568aSPaul Mackerras 668cf00085dSChandru /* check if this is a kexec/kdump kernel */ 669cf00085dSChandru usm = of_get_usable_memory(memory); 670cf00085dSChandru if (usm != NULL) 671cf00085dSChandru is_kexec_kdump = 1; 672cf00085dSChandru 6730204568aSPaul Mackerras for (; n != 0; --n) { 6748342681dSNathan Fontenot struct of_drconf_cell drmem; 6751daa6d08SBalbir Singh 6768342681dSNathan Fontenot read_drconf_cell(&drmem, &dm); 6778342681dSNathan Fontenot 6788342681dSNathan Fontenot /* skip this block if the reserved bit is set in flags (0x80) 6798342681dSNathan Fontenot or if the block is not assigned to this partition (0x8) */ 6808342681dSNathan Fontenot if ((drmem.flags & DRCONF_MEM_RESERVED) 6818342681dSNathan Fontenot || !(drmem.flags & DRCONF_MEM_ASSIGNED)) 6828342681dSNathan Fontenot continue; 6838342681dSNathan Fontenot 684cf00085dSChandru base = drmem.base_addr; 6853fdfd990SBenjamin Herrenschmidt size = lmb_size; 686cf00085dSChandru ranges = 1; 6878342681dSNathan Fontenot 688cf00085dSChandru if (is_kexec_kdump) { 689cf00085dSChandru ranges = read_usm_ranges(&usm); 690cf00085dSChandru if (!ranges) /* there are no (base, size) duple */ 6910204568aSPaul Mackerras continue; 692cf00085dSChandru } 693cf00085dSChandru do { 694cf00085dSChandru if (is_kexec_kdump) { 695cf00085dSChandru base = read_n_cells(n_mem_addr_cells, &usm); 696cf00085dSChandru size = read_n_cells(n_mem_size_cells, &usm); 697cf00085dSChandru } 698cf00085dSChandru nid = of_drconf_to_nid_single(&drmem, &aa); 699cf00085dSChandru fake_numa_create_new_node( 700cf00085dSChandru ((base + size) >> PAGE_SHIFT), 701cf00085dSChandru &nid); 702cf00085dSChandru node_set_online(nid); 703cf00085dSChandru sz = numa_enforce_memory_limit(base, size); 704cf00085dSChandru if (sz) 705e7e8de59STang Chen memblock_set_node(base, sz, 706e7e8de59STang Chen &memblock.memory, nid); 707cf00085dSChandru } while (--ranges); 7080204568aSPaul Mackerras } 7090204568aSPaul Mackerras } 7100204568aSPaul Mackerras 711ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void) 712ab1f9dacSPaul Mackerras { 71394db7c5eSAnton Blanchard struct device_node *memory; 714482ec7c4SNathan Lynch int default_nid = 0; 715ab1f9dacSPaul Mackerras unsigned long i; 716ab1f9dacSPaul Mackerras 717ab1f9dacSPaul Mackerras if (numa_enabled == 0) { 718ab1f9dacSPaul Mackerras printk(KERN_WARNING "NUMA disabled by user\n"); 719ab1f9dacSPaul Mackerras return -1; 720ab1f9dacSPaul Mackerras } 721ab1f9dacSPaul Mackerras 722ab1f9dacSPaul Mackerras min_common_depth = find_min_common_depth(); 723ab1f9dacSPaul Mackerras 724ab1f9dacSPaul Mackerras if (min_common_depth < 0) 725ab1f9dacSPaul Mackerras return min_common_depth; 726ab1f9dacSPaul Mackerras 727bf4b85b0SNathan Lynch dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 728bf4b85b0SNathan Lynch 729ab1f9dacSPaul Mackerras /* 730482ec7c4SNathan Lynch * Even though we connect cpus to numa domains later in SMP 731482ec7c4SNathan Lynch * init, we need to know the node ids now. This is because 732482ec7c4SNathan Lynch * each node to be onlined must have NODE_DATA etc backing it. 733ab1f9dacSPaul Mackerras */ 734482ec7c4SNathan Lynch for_each_present_cpu(i) { 735dfbe93a2SAnton Blanchard struct device_node *cpu; 736cf950b7aSNathan Lynch int nid; 737ab1f9dacSPaul Mackerras 7388b16cd23SMilton Miller cpu = of_get_cpu_node(i, NULL); 739482ec7c4SNathan Lynch BUG_ON(!cpu); 740953039c8SJeremy Kerr nid = of_node_to_nid_single(cpu); 741ab1f9dacSPaul Mackerras of_node_put(cpu); 742ab1f9dacSPaul Mackerras 743482ec7c4SNathan Lynch /* 744482ec7c4SNathan Lynch * Don't fall back to default_nid yet -- we will plug 745482ec7c4SNathan Lynch * cpus into nodes once the memory scan has discovered 746482ec7c4SNathan Lynch * the topology. 747482ec7c4SNathan Lynch */ 748482ec7c4SNathan Lynch if (nid < 0) 749482ec7c4SNathan Lynch continue; 750482ec7c4SNathan Lynch node_set_online(nid); 751ab1f9dacSPaul Mackerras } 752ab1f9dacSPaul Mackerras 753237a0989SMike Kravetz get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 75494db7c5eSAnton Blanchard 75594db7c5eSAnton Blanchard for_each_node_by_type(memory, "memory") { 756ab1f9dacSPaul Mackerras unsigned long start; 757ab1f9dacSPaul Mackerras unsigned long size; 758cf950b7aSNathan Lynch int nid; 759ab1f9dacSPaul Mackerras int ranges; 760b08a2a12SAlistair Popple const __be32 *memcell_buf; 761ab1f9dacSPaul Mackerras unsigned int len; 762ab1f9dacSPaul Mackerras 763e2eb6392SStephen Rothwell memcell_buf = of_get_property(memory, 764ba759485SMichael Ellerman "linux,usable-memory", &len); 765ba759485SMichael Ellerman if (!memcell_buf || len <= 0) 766e2eb6392SStephen Rothwell memcell_buf = of_get_property(memory, "reg", &len); 767ab1f9dacSPaul Mackerras if (!memcell_buf || len <= 0) 768ab1f9dacSPaul Mackerras continue; 769ab1f9dacSPaul Mackerras 770cc5d0189SBenjamin Herrenschmidt /* ranges in cell */ 771cc5d0189SBenjamin Herrenschmidt ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 772ab1f9dacSPaul Mackerras new_range: 773ab1f9dacSPaul Mackerras /* these are order-sensitive, and modify the buffer pointer */ 774237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 775237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 776ab1f9dacSPaul Mackerras 777482ec7c4SNathan Lynch /* 778482ec7c4SNathan Lynch * Assumption: either all memory nodes or none will 779482ec7c4SNathan Lynch * have associativity properties. If none, then 780482ec7c4SNathan Lynch * everything goes to default_nid. 781482ec7c4SNathan Lynch */ 782953039c8SJeremy Kerr nid = of_node_to_nid_single(memory); 783482ec7c4SNathan Lynch if (nid < 0) 784482ec7c4SNathan Lynch nid = default_nid; 7851daa6d08SBalbir Singh 7861daa6d08SBalbir Singh fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); 787482ec7c4SNathan Lynch node_set_online(nid); 788ab1f9dacSPaul Mackerras 7897656cd8eSReza Arbab size = numa_enforce_memory_limit(start, size); 7907656cd8eSReza Arbab if (size) 791e7e8de59STang Chen memblock_set_node(start, size, &memblock.memory, nid); 792ab1f9dacSPaul Mackerras 793ab1f9dacSPaul Mackerras if (--ranges) 794ab1f9dacSPaul Mackerras goto new_range; 795ab1f9dacSPaul Mackerras } 796ab1f9dacSPaul Mackerras 7970204568aSPaul Mackerras /* 798dfbe93a2SAnton Blanchard * Now do the same thing for each MEMBLOCK listed in the 799dfbe93a2SAnton Blanchard * ibm,dynamic-memory property in the 800dfbe93a2SAnton Blanchard * ibm,dynamic-reconfiguration-memory node. 8010204568aSPaul Mackerras */ 8020204568aSPaul Mackerras memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 8030204568aSPaul Mackerras if (memory) 8040204568aSPaul Mackerras parse_drconf_memory(memory); 8050204568aSPaul Mackerras 806ab1f9dacSPaul Mackerras return 0; 807ab1f9dacSPaul Mackerras } 808ab1f9dacSPaul Mackerras 809ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void) 810ab1f9dacSPaul Mackerras { 81195f72d1eSYinghai Lu unsigned long top_of_ram = memblock_end_of_DRAM(); 81295f72d1eSYinghai Lu unsigned long total_ram = memblock_phys_mem_size(); 813c67c3cb4SMel Gorman unsigned long start_pfn, end_pfn; 81428be7072SBenjamin Herrenschmidt unsigned int nid = 0; 81528be7072SBenjamin Herrenschmidt struct memblock_region *reg; 816ab1f9dacSPaul Mackerras 817e110b281SOlof Johansson printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 818ab1f9dacSPaul Mackerras top_of_ram, total_ram); 819e110b281SOlof Johansson printk(KERN_DEBUG "Memory hole size: %ldMB\n", 820ab1f9dacSPaul Mackerras (top_of_ram - total_ram) >> 20); 821ab1f9dacSPaul Mackerras 82228be7072SBenjamin Herrenschmidt for_each_memblock(memory, reg) { 823c7fc2de0SYinghai Lu start_pfn = memblock_region_memory_base_pfn(reg); 824c7fc2de0SYinghai Lu end_pfn = memblock_region_memory_end_pfn(reg); 8251daa6d08SBalbir Singh 8261daa6d08SBalbir Singh fake_numa_create_new_node(end_pfn, &nid); 8271d7cfe18STejun Heo memblock_set_node(PFN_PHYS(start_pfn), 828e7e8de59STang Chen PFN_PHYS(end_pfn - start_pfn), 829e7e8de59STang Chen &memblock.memory, nid); 8301daa6d08SBalbir Singh node_set_online(nid); 831c67c3cb4SMel Gorman } 832ab1f9dacSPaul Mackerras } 833ab1f9dacSPaul Mackerras 8344b703a23SAnton Blanchard void __init dump_numa_cpu_topology(void) 8354b703a23SAnton Blanchard { 8364b703a23SAnton Blanchard unsigned int node; 8374b703a23SAnton Blanchard unsigned int cpu, count; 8384b703a23SAnton Blanchard 8394b703a23SAnton Blanchard if (min_common_depth == -1 || !numa_enabled) 8404b703a23SAnton Blanchard return; 8414b703a23SAnton Blanchard 8424b703a23SAnton Blanchard for_each_online_node(node) { 8438467801cSAneesh Kumar K.V pr_info("Node %d CPUs:", node); 8444b703a23SAnton Blanchard 8454b703a23SAnton Blanchard count = 0; 8464b703a23SAnton Blanchard /* 8474b703a23SAnton Blanchard * If we used a CPU iterator here we would miss printing 8484b703a23SAnton Blanchard * the holes in the cpumap. 8494b703a23SAnton Blanchard */ 85025863de0SAnton Blanchard for (cpu = 0; cpu < nr_cpu_ids; cpu++) { 85125863de0SAnton Blanchard if (cpumask_test_cpu(cpu, 85225863de0SAnton Blanchard node_to_cpumask_map[node])) { 8534b703a23SAnton Blanchard if (count == 0) 8548467801cSAneesh Kumar K.V pr_cont(" %u", cpu); 8554b703a23SAnton Blanchard ++count; 8564b703a23SAnton Blanchard } else { 8574b703a23SAnton Blanchard if (count > 1) 8588467801cSAneesh Kumar K.V pr_cont("-%u", cpu - 1); 8594b703a23SAnton Blanchard count = 0; 8604b703a23SAnton Blanchard } 8614b703a23SAnton Blanchard } 8624b703a23SAnton Blanchard 8634b703a23SAnton Blanchard if (count > 1) 8648467801cSAneesh Kumar K.V pr_cont("-%u", nr_cpu_ids - 1); 8658467801cSAneesh Kumar K.V pr_cont("\n"); 8664b703a23SAnton Blanchard } 8674b703a23SAnton Blanchard } 8684b703a23SAnton Blanchard 86910239733SAnton Blanchard /* Initialize NODE_DATA for a node on the local memory */ 87010239733SAnton Blanchard static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) 871ab1f9dacSPaul Mackerras { 87210239733SAnton Blanchard u64 spanned_pages = end_pfn - start_pfn; 87310239733SAnton Blanchard const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); 87410239733SAnton Blanchard u64 nd_pa; 87510239733SAnton Blanchard void *nd; 87610239733SAnton Blanchard int tnid; 877ab1f9dacSPaul Mackerras 87810239733SAnton Blanchard nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); 87910239733SAnton Blanchard nd = __va(nd_pa); 880ab1f9dacSPaul Mackerras 88110239733SAnton Blanchard /* report and initialize */ 88210239733SAnton Blanchard pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n", 88310239733SAnton Blanchard nd_pa, nd_pa + nd_size - 1); 88410239733SAnton Blanchard tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 88510239733SAnton Blanchard if (tnid != nid) 88610239733SAnton Blanchard pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid); 8878f64e1f2SJon Tollefson 88810239733SAnton Blanchard node_data[nid] = nd; 88910239733SAnton Blanchard memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); 89010239733SAnton Blanchard NODE_DATA(nid)->node_id = nid; 89110239733SAnton Blanchard NODE_DATA(nid)->node_start_pfn = start_pfn; 89210239733SAnton Blanchard NODE_DATA(nid)->node_spanned_pages = spanned_pages; 893ab1f9dacSPaul Mackerras } 8948f64e1f2SJon Tollefson 89510239733SAnton Blanchard void __init initmem_init(void) 8964a618669SDave Hansen { 8972fabf084SNishanth Aravamudan int nid, cpu; 8984a618669SDave Hansen 89995f72d1eSYinghai Lu max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 9004a618669SDave Hansen max_pfn = max_low_pfn; 9014a618669SDave Hansen 9024a618669SDave Hansen if (parse_numa_properties()) 9034a618669SDave Hansen setup_nonnuma(); 9044a618669SDave Hansen 90510239733SAnton Blanchard memblock_dump_all(); 90610239733SAnton Blanchard 9073af229f2SNishanth Aravamudan /* 9083af229f2SNishanth Aravamudan * Reduce the possible NUMA nodes to the online NUMA nodes, 9093af229f2SNishanth Aravamudan * since we do not support node hotplug. This ensures that we 9103af229f2SNishanth Aravamudan * lower the maximum NUMA node ID to what is actually present. 9113af229f2SNishanth Aravamudan */ 9123af229f2SNishanth Aravamudan nodes_and(node_possible_map, node_possible_map, node_online_map); 9133af229f2SNishanth Aravamudan 9144a618669SDave Hansen for_each_online_node(nid) { 9154a618669SDave Hansen unsigned long start_pfn, end_pfn; 9164a618669SDave Hansen 9174a618669SDave Hansen get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 91810239733SAnton Blanchard setup_node_data(nid, start_pfn, end_pfn); 9198f64e1f2SJon Tollefson sparse_memory_present_with_active_regions(nid); 920ab1f9dacSPaul Mackerras } 921d3f6204aSBenjamin Herrenschmidt 92221098b9eSAnton Blanchard sparse_init(); 92325863de0SAnton Blanchard 92425863de0SAnton Blanchard setup_node_to_cpumask_map(); 92525863de0SAnton Blanchard 926d4edc5b6SSrivatsa S. Bhat reset_numa_cpu_lookup_table(); 927bdab88e0SSebastian Andrzej Siewior 9282fabf084SNishanth Aravamudan /* 9292fabf084SNishanth Aravamudan * We need the numa_cpu_lookup_table to be accurate for all CPUs, 9302fabf084SNishanth Aravamudan * even before we online them, so that we can use cpu_to_{node,mem} 9312fabf084SNishanth Aravamudan * early in boot, cf. smp_prepare_cpus(). 932bdab88e0SSebastian Andrzej Siewior * _nocalls() + manual invocation is used because cpuhp is not yet 933bdab88e0SSebastian Andrzej Siewior * initialized for the boot CPU. 9342fabf084SNishanth Aravamudan */ 93573c1b41eSThomas Gleixner cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare", 936bdab88e0SSebastian Andrzej Siewior ppc_numa_cpu_prepare, ppc_numa_cpu_dead); 937bdab88e0SSebastian Andrzej Siewior for_each_present_cpu(cpu) 938bdab88e0SSebastian Andrzej Siewior numa_setup_cpu(cpu); 9394a618669SDave Hansen } 940ab1f9dacSPaul Mackerras 941ab1f9dacSPaul Mackerras static int __init early_numa(char *p) 942ab1f9dacSPaul Mackerras { 943ab1f9dacSPaul Mackerras if (!p) 944ab1f9dacSPaul Mackerras return 0; 945ab1f9dacSPaul Mackerras 946ab1f9dacSPaul Mackerras if (strstr(p, "off")) 947ab1f9dacSPaul Mackerras numa_enabled = 0; 948ab1f9dacSPaul Mackerras 949ab1f9dacSPaul Mackerras if (strstr(p, "debug")) 950ab1f9dacSPaul Mackerras numa_debug = 1; 951ab1f9dacSPaul Mackerras 9521daa6d08SBalbir Singh p = strstr(p, "fake="); 9531daa6d08SBalbir Singh if (p) 9541daa6d08SBalbir Singh cmdline = p + strlen("fake="); 9551daa6d08SBalbir Singh 956ab1f9dacSPaul Mackerras return 0; 957ab1f9dacSPaul Mackerras } 958ab1f9dacSPaul Mackerras early_param("numa", early_numa); 959237a0989SMike Kravetz 9602d73bae1SNishanth Aravamudan static bool topology_updates_enabled = true; 9612d73bae1SNishanth Aravamudan 9622d73bae1SNishanth Aravamudan static int __init early_topology_updates(char *p) 9632d73bae1SNishanth Aravamudan { 9642d73bae1SNishanth Aravamudan if (!p) 9652d73bae1SNishanth Aravamudan return 0; 9662d73bae1SNishanth Aravamudan 9672d73bae1SNishanth Aravamudan if (!strcmp(p, "off")) { 9682d73bae1SNishanth Aravamudan pr_info("Disabling topology updates\n"); 9692d73bae1SNishanth Aravamudan topology_updates_enabled = false; 9702d73bae1SNishanth Aravamudan } 9712d73bae1SNishanth Aravamudan 9722d73bae1SNishanth Aravamudan return 0; 9732d73bae1SNishanth Aravamudan } 9742d73bae1SNishanth Aravamudan early_param("topology_updates", early_topology_updates); 9752d73bae1SNishanth Aravamudan 976237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG 977237a0989SMike Kravetz /* 9780f16ef7fSNathan Fontenot * Find the node associated with a hot added memory section for 9790f16ef7fSNathan Fontenot * memory represented in the device tree by the property 9800f16ef7fSNathan Fontenot * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory. 9810db9360aSNathan Fontenot */ 9820db9360aSNathan Fontenot static int hot_add_drconf_scn_to_nid(struct device_node *memory, 9830db9360aSNathan Fontenot unsigned long scn_addr) 9840db9360aSNathan Fontenot { 985b08a2a12SAlistair Popple const __be32 *dm; 9860f16ef7fSNathan Fontenot unsigned int drconf_cell_cnt, rc; 9873fdfd990SBenjamin Herrenschmidt unsigned long lmb_size; 9880db9360aSNathan Fontenot struct assoc_arrays aa; 9890f16ef7fSNathan Fontenot int nid = -1; 9900db9360aSNathan Fontenot 9910f16ef7fSNathan Fontenot drconf_cell_cnt = of_get_drconf_memory(memory, &dm); 9920f16ef7fSNathan Fontenot if (!drconf_cell_cnt) 9930f16ef7fSNathan Fontenot return -1; 9940db9360aSNathan Fontenot 9953fdfd990SBenjamin Herrenschmidt lmb_size = of_get_lmb_size(memory); 9963fdfd990SBenjamin Herrenschmidt if (!lmb_size) 9970f16ef7fSNathan Fontenot return -1; 9980db9360aSNathan Fontenot 9990db9360aSNathan Fontenot rc = of_get_assoc_arrays(memory, &aa); 10000db9360aSNathan Fontenot if (rc) 10010f16ef7fSNathan Fontenot return -1; 10020db9360aSNathan Fontenot 10030f16ef7fSNathan Fontenot for (; drconf_cell_cnt != 0; --drconf_cell_cnt) { 10040db9360aSNathan Fontenot struct of_drconf_cell drmem; 10050db9360aSNathan Fontenot 10060db9360aSNathan Fontenot read_drconf_cell(&drmem, &dm); 10070db9360aSNathan Fontenot 10080db9360aSNathan Fontenot /* skip this block if it is reserved or not assigned to 10090db9360aSNathan Fontenot * this partition */ 10100db9360aSNathan Fontenot if ((drmem.flags & DRCONF_MEM_RESERVED) 10110db9360aSNathan Fontenot || !(drmem.flags & DRCONF_MEM_ASSIGNED)) 10120db9360aSNathan Fontenot continue; 10130db9360aSNathan Fontenot 10140f16ef7fSNathan Fontenot if ((scn_addr < drmem.base_addr) 10153fdfd990SBenjamin Herrenschmidt || (scn_addr >= (drmem.base_addr + lmb_size))) 10160f16ef7fSNathan Fontenot continue; 10170db9360aSNathan Fontenot 10180f16ef7fSNathan Fontenot nid = of_drconf_to_nid_single(&drmem, &aa); 10190f16ef7fSNathan Fontenot break; 10200db9360aSNathan Fontenot } 10210db9360aSNathan Fontenot 10220f16ef7fSNathan Fontenot return nid; 10230db9360aSNathan Fontenot } 10240db9360aSNathan Fontenot 10250db9360aSNathan Fontenot /* 10260f16ef7fSNathan Fontenot * Find the node associated with a hot added memory section for memory 10270f16ef7fSNathan Fontenot * represented in the device tree as a node (i.e. memory@XXXX) for 102895f72d1eSYinghai Lu * each memblock. 1029237a0989SMike Kravetz */ 1030ec32dd66SRobert Jennings static int hot_add_node_scn_to_nid(unsigned long scn_addr) 1031237a0989SMike Kravetz { 103294db7c5eSAnton Blanchard struct device_node *memory; 10330f16ef7fSNathan Fontenot int nid = -1; 1034237a0989SMike Kravetz 103594db7c5eSAnton Blanchard for_each_node_by_type(memory, "memory") { 1036237a0989SMike Kravetz unsigned long start, size; 1037b226e462SMike Kravetz int ranges; 1038b08a2a12SAlistair Popple const __be32 *memcell_buf; 1039237a0989SMike Kravetz unsigned int len; 1040237a0989SMike Kravetz 1041e2eb6392SStephen Rothwell memcell_buf = of_get_property(memory, "reg", &len); 1042237a0989SMike Kravetz if (!memcell_buf || len <= 0) 1043237a0989SMike Kravetz continue; 1044237a0989SMike Kravetz 1045cc5d0189SBenjamin Herrenschmidt /* ranges in cell */ 1046cc5d0189SBenjamin Herrenschmidt ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 10470f16ef7fSNathan Fontenot 10480f16ef7fSNathan Fontenot while (ranges--) { 1049237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 1050237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 1051237a0989SMike Kravetz 10520f16ef7fSNathan Fontenot if ((scn_addr < start) || (scn_addr >= (start + size))) 10530f16ef7fSNathan Fontenot continue; 10540f16ef7fSNathan Fontenot 10550f16ef7fSNathan Fontenot nid = of_node_to_nid_single(memory); 10560f16ef7fSNathan Fontenot break; 10570f16ef7fSNathan Fontenot } 10580f16ef7fSNathan Fontenot 10590f16ef7fSNathan Fontenot if (nid >= 0) 10600f16ef7fSNathan Fontenot break; 10610f16ef7fSNathan Fontenot } 10620f16ef7fSNathan Fontenot 106360831842SAnton Blanchard of_node_put(memory); 106460831842SAnton Blanchard 10650db9360aSNathan Fontenot return nid; 1066237a0989SMike Kravetz } 1067237a0989SMike Kravetz 10680f16ef7fSNathan Fontenot /* 10690f16ef7fSNathan Fontenot * Find the node associated with a hot added memory section. Section 107095f72d1eSYinghai Lu * corresponds to a SPARSEMEM section, not an MEMBLOCK. It is assumed that 107195f72d1eSYinghai Lu * sections are fully contained within a single MEMBLOCK. 10720f16ef7fSNathan Fontenot */ 10730f16ef7fSNathan Fontenot int hot_add_scn_to_nid(unsigned long scn_addr) 10740f16ef7fSNathan Fontenot { 10750f16ef7fSNathan Fontenot struct device_node *memory = NULL; 10764a3bac4eSReza Arbab int nid; 10770f16ef7fSNathan Fontenot 10780f16ef7fSNathan Fontenot if (!numa_enabled || (min_common_depth < 0)) 107972c33688SH Hartley Sweeten return first_online_node; 10800f16ef7fSNathan Fontenot 10810f16ef7fSNathan Fontenot memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 10820f16ef7fSNathan Fontenot if (memory) { 10830f16ef7fSNathan Fontenot nid = hot_add_drconf_scn_to_nid(memory, scn_addr); 10840f16ef7fSNathan Fontenot of_node_put(memory); 10850f16ef7fSNathan Fontenot } else { 10860f16ef7fSNathan Fontenot nid = hot_add_node_scn_to_nid(scn_addr); 1087237a0989SMike Kravetz } 10880f16ef7fSNathan Fontenot 10892a8628d4SReza Arbab if (nid < 0 || !node_possible(nid)) 109072c33688SH Hartley Sweeten nid = first_online_node; 10910f16ef7fSNathan Fontenot 10920f16ef7fSNathan Fontenot return nid; 10930f16ef7fSNathan Fontenot } 10940f16ef7fSNathan Fontenot 1095cd34206eSNishanth Aravamudan static u64 hot_add_drconf_memory_max(void) 1096cd34206eSNishanth Aravamudan { 1097cd34206eSNishanth Aravamudan struct device_node *memory = NULL; 109845b64ee6SBharata B Rao struct device_node *dn = NULL; 1099cd34206eSNishanth Aravamudan unsigned int drconf_cell_cnt = 0; 1100cd34206eSNishanth Aravamudan u64 lmb_size = 0; 1101ec32dd66SRobert Jennings const __be32 *dm = NULL; 110245b64ee6SBharata B Rao const __be64 *lrdr = NULL; 110345b64ee6SBharata B Rao struct of_drconf_cell drmem; 110445b64ee6SBharata B Rao 110545b64ee6SBharata B Rao dn = of_find_node_by_path("/rtas"); 110645b64ee6SBharata B Rao if (dn) { 110745b64ee6SBharata B Rao lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL); 110845b64ee6SBharata B Rao of_node_put(dn); 110945b64ee6SBharata B Rao if (lrdr) 111045b64ee6SBharata B Rao return be64_to_cpup(lrdr); 111145b64ee6SBharata B Rao } 1112cd34206eSNishanth Aravamudan 1113cd34206eSNishanth Aravamudan memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 1114cd34206eSNishanth Aravamudan if (memory) { 1115cd34206eSNishanth Aravamudan drconf_cell_cnt = of_get_drconf_memory(memory, &dm); 1116cd34206eSNishanth Aravamudan lmb_size = of_get_lmb_size(memory); 111745b64ee6SBharata B Rao 111845b64ee6SBharata B Rao /* Advance to the last cell, each cell has 6 32 bit integers */ 111945b64ee6SBharata B Rao dm += (drconf_cell_cnt - 1) * 6; 112045b64ee6SBharata B Rao read_drconf_cell(&drmem, &dm); 1121cd34206eSNishanth Aravamudan of_node_put(memory); 112245b64ee6SBharata B Rao return drmem.base_addr + lmb_size; 1123cd34206eSNishanth Aravamudan } 112445b64ee6SBharata B Rao return 0; 1125cd34206eSNishanth Aravamudan } 1126cd34206eSNishanth Aravamudan 1127cd34206eSNishanth Aravamudan /* 1128cd34206eSNishanth Aravamudan * memory_hotplug_max - return max address of memory that may be added 1129cd34206eSNishanth Aravamudan * 1130cd34206eSNishanth Aravamudan * This is currently only used on systems that support drconfig memory 1131cd34206eSNishanth Aravamudan * hotplug. 1132cd34206eSNishanth Aravamudan */ 1133cd34206eSNishanth Aravamudan u64 memory_hotplug_max(void) 1134cd34206eSNishanth Aravamudan { 1135cd34206eSNishanth Aravamudan return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM()); 1136cd34206eSNishanth Aravamudan } 1137237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */ 11389eff1a38SJesse Larrew 1139bd03403aSJesse Larrew /* Virtual Processor Home Node (VPHN) support */ 114039bf990eSJesse Larrew #ifdef CONFIG_PPC_SPLPAR 11414b6cfb2aSGreg Kurz 11424b6cfb2aSGreg Kurz #include "vphn.h" 11434b6cfb2aSGreg Kurz 114430c05350SNathan Fontenot struct topology_update_data { 114530c05350SNathan Fontenot struct topology_update_data *next; 114630c05350SNathan Fontenot unsigned int cpu; 114730c05350SNathan Fontenot int old_nid; 114830c05350SNathan Fontenot int new_nid; 114930c05350SNathan Fontenot }; 115030c05350SNathan Fontenot 11515de16699SAnton Blanchard static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; 11529eff1a38SJesse Larrew static cpumask_t cpu_associativity_changes_mask; 11539eff1a38SJesse Larrew static int vphn_enabled; 11545d88aa85SJesse Larrew static int prrn_enabled; 11555d88aa85SJesse Larrew static void reset_topology_timer(void); 11569eff1a38SJesse Larrew 11579eff1a38SJesse Larrew /* 11589eff1a38SJesse Larrew * Store the current values of the associativity change counters in the 11599eff1a38SJesse Larrew * hypervisor. 11609eff1a38SJesse Larrew */ 11619eff1a38SJesse Larrew static void setup_cpu_associativity_change_counters(void) 11629eff1a38SJesse Larrew { 1163cd9d6cc7SJesse Larrew int cpu; 11649eff1a38SJesse Larrew 11655de16699SAnton Blanchard /* The VPHN feature supports a maximum of 8 reference points */ 11665de16699SAnton Blanchard BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8); 11675de16699SAnton Blanchard 11689eff1a38SJesse Larrew for_each_possible_cpu(cpu) { 1169cd9d6cc7SJesse Larrew int i; 11709eff1a38SJesse Larrew u8 *counts = vphn_cpu_change_counts[cpu]; 11719eff1a38SJesse Larrew volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; 11729eff1a38SJesse Larrew 11735de16699SAnton Blanchard for (i = 0; i < distance_ref_points_depth; i++) 11749eff1a38SJesse Larrew counts[i] = hypervisor_counts[i]; 11759eff1a38SJesse Larrew } 11769eff1a38SJesse Larrew } 11779eff1a38SJesse Larrew 11789eff1a38SJesse Larrew /* 11799eff1a38SJesse Larrew * The hypervisor maintains a set of 8 associativity change counters in 11809eff1a38SJesse Larrew * the VPA of each cpu that correspond to the associativity levels in the 11819eff1a38SJesse Larrew * ibm,associativity-reference-points property. When an associativity 11829eff1a38SJesse Larrew * level changes, the corresponding counter is incremented. 11839eff1a38SJesse Larrew * 11849eff1a38SJesse Larrew * Set a bit in cpu_associativity_changes_mask for each cpu whose home 11859eff1a38SJesse Larrew * node associativity levels have changed. 11869eff1a38SJesse Larrew * 11879eff1a38SJesse Larrew * Returns the number of cpus with unhandled associativity changes. 11889eff1a38SJesse Larrew */ 11899eff1a38SJesse Larrew static int update_cpu_associativity_changes_mask(void) 11909eff1a38SJesse Larrew { 11915d88aa85SJesse Larrew int cpu; 11929eff1a38SJesse Larrew cpumask_t *changes = &cpu_associativity_changes_mask; 11939eff1a38SJesse Larrew 11949eff1a38SJesse Larrew for_each_possible_cpu(cpu) { 11959eff1a38SJesse Larrew int i, changed = 0; 11969eff1a38SJesse Larrew u8 *counts = vphn_cpu_change_counts[cpu]; 11979eff1a38SJesse Larrew volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; 11989eff1a38SJesse Larrew 11995de16699SAnton Blanchard for (i = 0; i < distance_ref_points_depth; i++) { 1200d69043e8SAnton Blanchard if (hypervisor_counts[i] != counts[i]) { 12019eff1a38SJesse Larrew counts[i] = hypervisor_counts[i]; 12029eff1a38SJesse Larrew changed = 1; 12039eff1a38SJesse Larrew } 12049eff1a38SJesse Larrew } 12059eff1a38SJesse Larrew if (changed) { 12063be7db6aSRobert Jennings cpumask_or(changes, changes, cpu_sibling_mask(cpu)); 12073be7db6aSRobert Jennings cpu = cpu_last_thread_sibling(cpu); 12089eff1a38SJesse Larrew } 12099eff1a38SJesse Larrew } 12109eff1a38SJesse Larrew 12115d88aa85SJesse Larrew return cpumask_weight(changes); 12129eff1a38SJesse Larrew } 12139eff1a38SJesse Larrew 12149eff1a38SJesse Larrew /* 12159eff1a38SJesse Larrew * Retrieve the new associativity information for a virtual processor's 12169eff1a38SJesse Larrew * home node. 12179eff1a38SJesse Larrew */ 1218b08a2a12SAlistair Popple static long hcall_vphn(unsigned long cpu, __be32 *associativity) 12199eff1a38SJesse Larrew { 1220cd9d6cc7SJesse Larrew long rc; 12219eff1a38SJesse Larrew long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 12229eff1a38SJesse Larrew u64 flags = 1; 12239eff1a38SJesse Larrew int hwcpu = get_hard_smp_processor_id(cpu); 12249eff1a38SJesse Larrew 12259eff1a38SJesse Larrew rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); 12269eff1a38SJesse Larrew vphn_unpack_associativity(retbuf, associativity); 12279eff1a38SJesse Larrew 12289eff1a38SJesse Larrew return rc; 12299eff1a38SJesse Larrew } 12309eff1a38SJesse Larrew 12319eff1a38SJesse Larrew static long vphn_get_associativity(unsigned long cpu, 1232b08a2a12SAlistair Popple __be32 *associativity) 12339eff1a38SJesse Larrew { 1234cd9d6cc7SJesse Larrew long rc; 12359eff1a38SJesse Larrew 12369eff1a38SJesse Larrew rc = hcall_vphn(cpu, associativity); 12379eff1a38SJesse Larrew 12389eff1a38SJesse Larrew switch (rc) { 12399eff1a38SJesse Larrew case H_FUNCTION: 12409eff1a38SJesse Larrew printk(KERN_INFO 12419eff1a38SJesse Larrew "VPHN is not supported. Disabling polling...\n"); 12429eff1a38SJesse Larrew stop_topology_update(); 12439eff1a38SJesse Larrew break; 12449eff1a38SJesse Larrew case H_HARDWARE: 12459eff1a38SJesse Larrew printk(KERN_ERR 12469eff1a38SJesse Larrew "hcall_vphn() experienced a hardware fault " 12479eff1a38SJesse Larrew "preventing VPHN. Disabling polling...\n"); 12489eff1a38SJesse Larrew stop_topology_update(); 12499eff1a38SJesse Larrew } 12509eff1a38SJesse Larrew 12519eff1a38SJesse Larrew return rc; 12529eff1a38SJesse Larrew } 12539eff1a38SJesse Larrew 12549eff1a38SJesse Larrew /* 125530c05350SNathan Fontenot * Update the CPU maps and sysfs entries for a single CPU when its NUMA 125630c05350SNathan Fontenot * characteristics change. This function doesn't perform any locking and is 125730c05350SNathan Fontenot * only safe to call from stop_machine(). 125830c05350SNathan Fontenot */ 125930c05350SNathan Fontenot static int update_cpu_topology(void *data) 126030c05350SNathan Fontenot { 126130c05350SNathan Fontenot struct topology_update_data *update; 126230c05350SNathan Fontenot unsigned long cpu; 126330c05350SNathan Fontenot 126430c05350SNathan Fontenot if (!data) 126530c05350SNathan Fontenot return -EINVAL; 126630c05350SNathan Fontenot 12673be7db6aSRobert Jennings cpu = smp_processor_id(); 126830c05350SNathan Fontenot 126930c05350SNathan Fontenot for (update = data; update; update = update->next) { 12702c0a33f9SNishanth Aravamudan int new_nid = update->new_nid; 127130c05350SNathan Fontenot if (cpu != update->cpu) 127230c05350SNathan Fontenot continue; 127330c05350SNathan Fontenot 127449f8d8c0SNishanth Aravamudan unmap_cpu_from_node(cpu); 12752c0a33f9SNishanth Aravamudan map_cpu_to_node(cpu, new_nid); 12762c0a33f9SNishanth Aravamudan set_cpu_numa_node(cpu, new_nid); 12772c0a33f9SNishanth Aravamudan set_cpu_numa_mem(cpu, local_memory_node(new_nid)); 1278176bbf14SJesse Larrew vdso_getcpu_init(); 127930c05350SNathan Fontenot } 128030c05350SNathan Fontenot 128130c05350SNathan Fontenot return 0; 128230c05350SNathan Fontenot } 128330c05350SNathan Fontenot 1284d4edc5b6SSrivatsa S. Bhat static int update_lookup_table(void *data) 1285d4edc5b6SSrivatsa S. Bhat { 1286d4edc5b6SSrivatsa S. Bhat struct topology_update_data *update; 1287d4edc5b6SSrivatsa S. Bhat 1288d4edc5b6SSrivatsa S. Bhat if (!data) 1289d4edc5b6SSrivatsa S. Bhat return -EINVAL; 1290d4edc5b6SSrivatsa S. Bhat 1291d4edc5b6SSrivatsa S. Bhat /* 1292d4edc5b6SSrivatsa S. Bhat * Upon topology update, the numa-cpu lookup table needs to be updated 1293d4edc5b6SSrivatsa S. Bhat * for all threads in the core, including offline CPUs, to ensure that 1294d4edc5b6SSrivatsa S. Bhat * future hotplug operations respect the cpu-to-node associativity 1295d4edc5b6SSrivatsa S. Bhat * properly. 1296d4edc5b6SSrivatsa S. Bhat */ 1297d4edc5b6SSrivatsa S. Bhat for (update = data; update; update = update->next) { 1298d4edc5b6SSrivatsa S. Bhat int nid, base, j; 1299d4edc5b6SSrivatsa S. Bhat 1300d4edc5b6SSrivatsa S. Bhat nid = update->new_nid; 1301d4edc5b6SSrivatsa S. Bhat base = cpu_first_thread_sibling(update->cpu); 1302d4edc5b6SSrivatsa S. Bhat 1303d4edc5b6SSrivatsa S. Bhat for (j = 0; j < threads_per_core; j++) { 1304d4edc5b6SSrivatsa S. Bhat update_numa_cpu_lookup_table(base + j, nid); 1305d4edc5b6SSrivatsa S. Bhat } 1306d4edc5b6SSrivatsa S. Bhat } 1307d4edc5b6SSrivatsa S. Bhat 1308d4edc5b6SSrivatsa S. Bhat return 0; 1309d4edc5b6SSrivatsa S. Bhat } 1310d4edc5b6SSrivatsa S. Bhat 131130c05350SNathan Fontenot /* 13129eff1a38SJesse Larrew * Update the node maps and sysfs entries for each cpu whose home node 131379c5fcebSJesse Larrew * has changed. Returns 1 when the topology has changed, and 0 otherwise. 1314*3e401f7aSThiago Jung Bauermann * 1315*3e401f7aSThiago Jung Bauermann * cpus_locked says whether we already hold cpu_hotplug_lock. 13169eff1a38SJesse Larrew */ 1317*3e401f7aSThiago Jung Bauermann int numa_update_cpu_topology(bool cpus_locked) 13189eff1a38SJesse Larrew { 13193be7db6aSRobert Jennings unsigned int cpu, sibling, changed = 0; 132030c05350SNathan Fontenot struct topology_update_data *updates, *ud; 1321b08a2a12SAlistair Popple __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; 1322176bbf14SJesse Larrew cpumask_t updated_cpus; 13238a25a2fdSKay Sievers struct device *dev; 13243be7db6aSRobert Jennings int weight, new_nid, i = 0; 132530c05350SNathan Fontenot 13262d73bae1SNishanth Aravamudan if (!prrn_enabled && !vphn_enabled) 13272d73bae1SNishanth Aravamudan return 0; 13282d73bae1SNishanth Aravamudan 132930c05350SNathan Fontenot weight = cpumask_weight(&cpu_associativity_changes_mask); 133030c05350SNathan Fontenot if (!weight) 133130c05350SNathan Fontenot return 0; 133230c05350SNathan Fontenot 133330c05350SNathan Fontenot updates = kzalloc(weight * (sizeof(*updates)), GFP_KERNEL); 133430c05350SNathan Fontenot if (!updates) 133530c05350SNathan Fontenot return 0; 13369eff1a38SJesse Larrew 1337176bbf14SJesse Larrew cpumask_clear(&updated_cpus); 13389eff1a38SJesse Larrew 1339104699c0SKOSAKI Motohiro for_each_cpu(cpu, &cpu_associativity_changes_mask) { 13403be7db6aSRobert Jennings /* 13413be7db6aSRobert Jennings * If siblings aren't flagged for changes, updates list 13423be7db6aSRobert Jennings * will be too short. Skip on this update and set for next 13433be7db6aSRobert Jennings * update. 13443be7db6aSRobert Jennings */ 13453be7db6aSRobert Jennings if (!cpumask_subset(cpu_sibling_mask(cpu), 13463be7db6aSRobert Jennings &cpu_associativity_changes_mask)) { 13473be7db6aSRobert Jennings pr_info("Sibling bits not set for associativity " 13483be7db6aSRobert Jennings "change, cpu%d\n", cpu); 13493be7db6aSRobert Jennings cpumask_or(&cpu_associativity_changes_mask, 13503be7db6aSRobert Jennings &cpu_associativity_changes_mask, 13513be7db6aSRobert Jennings cpu_sibling_mask(cpu)); 13523be7db6aSRobert Jennings cpu = cpu_last_thread_sibling(cpu); 13533be7db6aSRobert Jennings continue; 13543be7db6aSRobert Jennings } 13553be7db6aSRobert Jennings 13563be7db6aSRobert Jennings /* Use associativity from first thread for all siblings */ 13579eff1a38SJesse Larrew vphn_get_associativity(cpu, associativity); 13583be7db6aSRobert Jennings new_nid = associativity_to_nid(associativity); 13593be7db6aSRobert Jennings if (new_nid < 0 || !node_online(new_nid)) 13603be7db6aSRobert Jennings new_nid = first_online_node; 13619eff1a38SJesse Larrew 13623be7db6aSRobert Jennings if (new_nid == numa_cpu_lookup_table[cpu]) { 13633be7db6aSRobert Jennings cpumask_andnot(&cpu_associativity_changes_mask, 13643be7db6aSRobert Jennings &cpu_associativity_changes_mask, 13653be7db6aSRobert Jennings cpu_sibling_mask(cpu)); 13663be7db6aSRobert Jennings cpu = cpu_last_thread_sibling(cpu); 13673be7db6aSRobert Jennings continue; 13683be7db6aSRobert Jennings } 13699eff1a38SJesse Larrew 13703be7db6aSRobert Jennings for_each_cpu(sibling, cpu_sibling_mask(cpu)) { 13713be7db6aSRobert Jennings ud = &updates[i++]; 13723be7db6aSRobert Jennings ud->cpu = sibling; 13733be7db6aSRobert Jennings ud->new_nid = new_nid; 13743be7db6aSRobert Jennings ud->old_nid = numa_cpu_lookup_table[sibling]; 13753be7db6aSRobert Jennings cpumask_set_cpu(sibling, &updated_cpus); 137630c05350SNathan Fontenot if (i < weight) 137730c05350SNathan Fontenot ud->next = &updates[i]; 137830c05350SNathan Fontenot } 13793be7db6aSRobert Jennings cpu = cpu_last_thread_sibling(cpu); 13803be7db6aSRobert Jennings } 13819eff1a38SJesse Larrew 13822d73bae1SNishanth Aravamudan pr_debug("Topology update for the following CPUs:\n"); 13832d73bae1SNishanth Aravamudan if (cpumask_weight(&updated_cpus)) { 13842d73bae1SNishanth Aravamudan for (ud = &updates[0]; ud; ud = ud->next) { 13852d73bae1SNishanth Aravamudan pr_debug("cpu %d moving from node %d " 13862d73bae1SNishanth Aravamudan "to %d\n", ud->cpu, 13872d73bae1SNishanth Aravamudan ud->old_nid, ud->new_nid); 13882d73bae1SNishanth Aravamudan } 13892d73bae1SNishanth Aravamudan } 13902d73bae1SNishanth Aravamudan 13919a013361SMichael Wang /* 13929a013361SMichael Wang * In cases where we have nothing to update (because the updates list 13939a013361SMichael Wang * is too short or because the new topology is same as the old one), 13949a013361SMichael Wang * skip invoking update_cpu_topology() via stop-machine(). This is 13959a013361SMichael Wang * necessary (and not just a fast-path optimization) since stop-machine 13969a013361SMichael Wang * can end up electing a random CPU to run update_cpu_topology(), and 13979a013361SMichael Wang * thus trick us into setting up incorrect cpu-node mappings (since 13989a013361SMichael Wang * 'updates' is kzalloc()'ed). 13999a013361SMichael Wang * 14009a013361SMichael Wang * And for the similar reason, we will skip all the following updating. 14019a013361SMichael Wang */ 14029a013361SMichael Wang if (!cpumask_weight(&updated_cpus)) 14039a013361SMichael Wang goto out; 14049a013361SMichael Wang 1405*3e401f7aSThiago Jung Bauermann if (cpus_locked) 1406*3e401f7aSThiago Jung Bauermann stop_machine_cpuslocked(update_cpu_topology, &updates[0], 1407*3e401f7aSThiago Jung Bauermann &updated_cpus); 1408*3e401f7aSThiago Jung Bauermann else 1409176bbf14SJesse Larrew stop_machine(update_cpu_topology, &updates[0], &updated_cpus); 141030c05350SNathan Fontenot 1411d4edc5b6SSrivatsa S. Bhat /* 1412d4edc5b6SSrivatsa S. Bhat * Update the numa-cpu lookup table with the new mappings, even for 1413d4edc5b6SSrivatsa S. Bhat * offline CPUs. It is best to perform this update from the stop- 1414d4edc5b6SSrivatsa S. Bhat * machine context. 1415d4edc5b6SSrivatsa S. Bhat */ 1416*3e401f7aSThiago Jung Bauermann if (cpus_locked) 1417*3e401f7aSThiago Jung Bauermann stop_machine_cpuslocked(update_lookup_table, &updates[0], 1418*3e401f7aSThiago Jung Bauermann cpumask_of(raw_smp_processor_id())); 1419*3e401f7aSThiago Jung Bauermann else 1420d4edc5b6SSrivatsa S. Bhat stop_machine(update_lookup_table, &updates[0], 1421d4edc5b6SSrivatsa S. Bhat cpumask_of(raw_smp_processor_id())); 1422d4edc5b6SSrivatsa S. Bhat 142330c05350SNathan Fontenot for (ud = &updates[0]; ud; ud = ud->next) { 1424dd023217SNathan Fontenot unregister_cpu_under_node(ud->cpu, ud->old_nid); 1425dd023217SNathan Fontenot register_cpu_under_node(ud->cpu, ud->new_nid); 1426dd023217SNathan Fontenot 142730c05350SNathan Fontenot dev = get_cpu_device(ud->cpu); 14288a25a2fdSKay Sievers if (dev) 14298a25a2fdSKay Sievers kobject_uevent(&dev->kobj, KOBJ_CHANGE); 143030c05350SNathan Fontenot cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask); 143179c5fcebSJesse Larrew changed = 1; 14329eff1a38SJesse Larrew } 14339eff1a38SJesse Larrew 14349a013361SMichael Wang out: 143530c05350SNathan Fontenot kfree(updates); 143679c5fcebSJesse Larrew return changed; 14379eff1a38SJesse Larrew } 14389eff1a38SJesse Larrew 1439*3e401f7aSThiago Jung Bauermann int arch_update_cpu_topology(void) 1440*3e401f7aSThiago Jung Bauermann { 1441*3e401f7aSThiago Jung Bauermann lockdep_assert_cpus_held(); 1442*3e401f7aSThiago Jung Bauermann return numa_update_cpu_topology(true); 1443*3e401f7aSThiago Jung Bauermann } 1444*3e401f7aSThiago Jung Bauermann 14459eff1a38SJesse Larrew static void topology_work_fn(struct work_struct *work) 14469eff1a38SJesse Larrew { 14479eff1a38SJesse Larrew rebuild_sched_domains(); 14489eff1a38SJesse Larrew } 14499eff1a38SJesse Larrew static DECLARE_WORK(topology_work, topology_work_fn); 14509eff1a38SJesse Larrew 1451ec32dd66SRobert Jennings static void topology_schedule_update(void) 14529eff1a38SJesse Larrew { 14539eff1a38SJesse Larrew schedule_work(&topology_work); 14549eff1a38SJesse Larrew } 14559eff1a38SJesse Larrew 14569eff1a38SJesse Larrew static void topology_timer_fn(unsigned long ignored) 14579eff1a38SJesse Larrew { 14585d88aa85SJesse Larrew if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) 14595d88aa85SJesse Larrew topology_schedule_update(); 14605d88aa85SJesse Larrew else if (vphn_enabled) { 14619eff1a38SJesse Larrew if (update_cpu_associativity_changes_mask() > 0) 14629eff1a38SJesse Larrew topology_schedule_update(); 14635d88aa85SJesse Larrew reset_topology_timer(); 14645d88aa85SJesse Larrew } 14659eff1a38SJesse Larrew } 14669eff1a38SJesse Larrew static struct timer_list topology_timer = 14679eff1a38SJesse Larrew TIMER_INITIALIZER(topology_timer_fn, 0, 0); 14689eff1a38SJesse Larrew 14695d88aa85SJesse Larrew static void reset_topology_timer(void) 14709eff1a38SJesse Larrew { 14719eff1a38SJesse Larrew topology_timer.data = 0; 14729eff1a38SJesse Larrew topology_timer.expires = jiffies + 60 * HZ; 14735d88aa85SJesse Larrew mod_timer(&topology_timer, topology_timer.expires); 14749eff1a38SJesse Larrew } 14759eff1a38SJesse Larrew 1476601abdc3SNathan Fontenot #ifdef CONFIG_SMP 1477601abdc3SNathan Fontenot 14785d88aa85SJesse Larrew static void stage_topology_update(int core_id) 14795d88aa85SJesse Larrew { 14805d88aa85SJesse Larrew cpumask_or(&cpu_associativity_changes_mask, 14815d88aa85SJesse Larrew &cpu_associativity_changes_mask, cpu_sibling_mask(core_id)); 14825d88aa85SJesse Larrew reset_topology_timer(); 14835d88aa85SJesse Larrew } 14845d88aa85SJesse Larrew 14855d88aa85SJesse Larrew static int dt_update_callback(struct notifier_block *nb, 14865d88aa85SJesse Larrew unsigned long action, void *data) 14875d88aa85SJesse Larrew { 1488f5242e5aSGrant Likely struct of_reconfig_data *update = data; 14895d88aa85SJesse Larrew int rc = NOTIFY_DONE; 14905d88aa85SJesse Larrew 14915d88aa85SJesse Larrew switch (action) { 14925d88aa85SJesse Larrew case OF_RECONFIG_UPDATE_PROPERTY: 149330c05350SNathan Fontenot if (!of_prop_cmp(update->dn->type, "cpu") && 149430c05350SNathan Fontenot !of_prop_cmp(update->prop->name, "ibm,associativity")) { 14955d88aa85SJesse Larrew u32 core_id; 14965d88aa85SJesse Larrew of_property_read_u32(update->dn, "reg", &core_id); 14975d88aa85SJesse Larrew stage_topology_update(core_id); 14985d88aa85SJesse Larrew rc = NOTIFY_OK; 14995d88aa85SJesse Larrew } 15005d88aa85SJesse Larrew break; 15015d88aa85SJesse Larrew } 15025d88aa85SJesse Larrew 15035d88aa85SJesse Larrew return rc; 15045d88aa85SJesse Larrew } 15055d88aa85SJesse Larrew 15065d88aa85SJesse Larrew static struct notifier_block dt_update_nb = { 15075d88aa85SJesse Larrew .notifier_call = dt_update_callback, 15085d88aa85SJesse Larrew }; 15095d88aa85SJesse Larrew 1510601abdc3SNathan Fontenot #endif 1511601abdc3SNathan Fontenot 15129eff1a38SJesse Larrew /* 15135d88aa85SJesse Larrew * Start polling for associativity changes. 15149eff1a38SJesse Larrew */ 15159eff1a38SJesse Larrew int start_topology_update(void) 15169eff1a38SJesse Larrew { 15179eff1a38SJesse Larrew int rc = 0; 15189eff1a38SJesse Larrew 15195d88aa85SJesse Larrew if (firmware_has_feature(FW_FEATURE_PRRN)) { 15205d88aa85SJesse Larrew if (!prrn_enabled) { 15215d88aa85SJesse Larrew prrn_enabled = 1; 15225d88aa85SJesse Larrew vphn_enabled = 0; 1523601abdc3SNathan Fontenot #ifdef CONFIG_SMP 15245d88aa85SJesse Larrew rc = of_reconfig_notifier_register(&dt_update_nb); 1525601abdc3SNathan Fontenot #endif 15265d88aa85SJesse Larrew } 1527b7abef04SJesse Larrew } else if (firmware_has_feature(FW_FEATURE_VPHN) && 1528f13c13a0SAnton Blanchard lppaca_shared_proc(get_lppaca())) { 15295d88aa85SJesse Larrew if (!vphn_enabled) { 15305d88aa85SJesse Larrew prrn_enabled = 0; 15319eff1a38SJesse Larrew vphn_enabled = 1; 15329eff1a38SJesse Larrew setup_cpu_associativity_change_counters(); 15339eff1a38SJesse Larrew init_timer_deferrable(&topology_timer); 15345d88aa85SJesse Larrew reset_topology_timer(); 15355d88aa85SJesse Larrew } 15369eff1a38SJesse Larrew } 15379eff1a38SJesse Larrew 15389eff1a38SJesse Larrew return rc; 15399eff1a38SJesse Larrew } 15409eff1a38SJesse Larrew 15419eff1a38SJesse Larrew /* 15429eff1a38SJesse Larrew * Disable polling for VPHN associativity changes. 15439eff1a38SJesse Larrew */ 15449eff1a38SJesse Larrew int stop_topology_update(void) 15459eff1a38SJesse Larrew { 15465d88aa85SJesse Larrew int rc = 0; 15475d88aa85SJesse Larrew 15485d88aa85SJesse Larrew if (prrn_enabled) { 15495d88aa85SJesse Larrew prrn_enabled = 0; 1550601abdc3SNathan Fontenot #ifdef CONFIG_SMP 15515d88aa85SJesse Larrew rc = of_reconfig_notifier_unregister(&dt_update_nb); 1552601abdc3SNathan Fontenot #endif 15535d88aa85SJesse Larrew } else if (vphn_enabled) { 15549eff1a38SJesse Larrew vphn_enabled = 0; 15555d88aa85SJesse Larrew rc = del_timer_sync(&topology_timer); 15569eff1a38SJesse Larrew } 15575d88aa85SJesse Larrew 15585d88aa85SJesse Larrew return rc; 1559ab1f9dacSPaul Mackerras } 1560e04fa612SNathan Fontenot 1561e04fa612SNathan Fontenot int prrn_is_enabled(void) 1562e04fa612SNathan Fontenot { 1563e04fa612SNathan Fontenot return prrn_enabled; 1564e04fa612SNathan Fontenot } 1565e04fa612SNathan Fontenot 1566e04fa612SNathan Fontenot static int topology_read(struct seq_file *file, void *v) 1567e04fa612SNathan Fontenot { 1568e04fa612SNathan Fontenot if (vphn_enabled || prrn_enabled) 1569e04fa612SNathan Fontenot seq_puts(file, "on\n"); 1570e04fa612SNathan Fontenot else 1571e04fa612SNathan Fontenot seq_puts(file, "off\n"); 1572e04fa612SNathan Fontenot 1573e04fa612SNathan Fontenot return 0; 1574e04fa612SNathan Fontenot } 1575e04fa612SNathan Fontenot 1576e04fa612SNathan Fontenot static int topology_open(struct inode *inode, struct file *file) 1577e04fa612SNathan Fontenot { 1578e04fa612SNathan Fontenot return single_open(file, topology_read, NULL); 1579e04fa612SNathan Fontenot } 1580e04fa612SNathan Fontenot 1581e04fa612SNathan Fontenot static ssize_t topology_write(struct file *file, const char __user *buf, 1582e04fa612SNathan Fontenot size_t count, loff_t *off) 1583e04fa612SNathan Fontenot { 1584e04fa612SNathan Fontenot char kbuf[4]; /* "on" or "off" plus null. */ 1585e04fa612SNathan Fontenot int read_len; 1586e04fa612SNathan Fontenot 1587e04fa612SNathan Fontenot read_len = count < 3 ? count : 3; 1588e04fa612SNathan Fontenot if (copy_from_user(kbuf, buf, read_len)) 1589e04fa612SNathan Fontenot return -EINVAL; 1590e04fa612SNathan Fontenot 1591e04fa612SNathan Fontenot kbuf[read_len] = '\0'; 1592e04fa612SNathan Fontenot 1593e04fa612SNathan Fontenot if (!strncmp(kbuf, "on", 2)) 1594e04fa612SNathan Fontenot start_topology_update(); 1595e04fa612SNathan Fontenot else if (!strncmp(kbuf, "off", 3)) 1596e04fa612SNathan Fontenot stop_topology_update(); 1597e04fa612SNathan Fontenot else 1598e04fa612SNathan Fontenot return -EINVAL; 1599e04fa612SNathan Fontenot 1600e04fa612SNathan Fontenot return count; 1601e04fa612SNathan Fontenot } 1602e04fa612SNathan Fontenot 1603e04fa612SNathan Fontenot static const struct file_operations topology_ops = { 1604e04fa612SNathan Fontenot .read = seq_read, 1605e04fa612SNathan Fontenot .write = topology_write, 1606e04fa612SNathan Fontenot .open = topology_open, 1607e04fa612SNathan Fontenot .release = single_release 1608e04fa612SNathan Fontenot }; 1609e04fa612SNathan Fontenot 1610e04fa612SNathan Fontenot static int topology_update_init(void) 1611e04fa612SNathan Fontenot { 16122d73bae1SNishanth Aravamudan /* Do not poll for changes if disabled at boot */ 16132d73bae1SNishanth Aravamudan if (topology_updates_enabled) 1614e04fa612SNathan Fontenot start_topology_update(); 16152d73bae1SNishanth Aravamudan 16162d15b9b4SNishanth Aravamudan if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops)) 16172d15b9b4SNishanth Aravamudan return -ENOMEM; 1618e04fa612SNathan Fontenot 1619e04fa612SNathan Fontenot return 0; 1620e04fa612SNathan Fontenot } 1621e04fa612SNathan Fontenot device_initcall(topology_update_init); 162239bf990eSJesse Larrew #endif /* CONFIG_PPC_SPLPAR */ 1623