1ab1f9dacSPaul Mackerras /* 2ab1f9dacSPaul Mackerras * pSeries NUMA support 3ab1f9dacSPaul Mackerras * 4ab1f9dacSPaul Mackerras * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 5ab1f9dacSPaul Mackerras * 6ab1f9dacSPaul Mackerras * This program is free software; you can redistribute it and/or 7ab1f9dacSPaul Mackerras * modify it under the terms of the GNU General Public License 8ab1f9dacSPaul Mackerras * as published by the Free Software Foundation; either version 9ab1f9dacSPaul Mackerras * 2 of the License, or (at your option) any later version. 10ab1f9dacSPaul Mackerras */ 11ab1f9dacSPaul Mackerras #include <linux/threads.h> 12ab1f9dacSPaul Mackerras #include <linux/bootmem.h> 13ab1f9dacSPaul Mackerras #include <linux/init.h> 14ab1f9dacSPaul Mackerras #include <linux/mm.h> 15ab1f9dacSPaul Mackerras #include <linux/mmzone.h> 16ab1f9dacSPaul Mackerras #include <linux/module.h> 17ab1f9dacSPaul Mackerras #include <linux/nodemask.h> 18ab1f9dacSPaul Mackerras #include <linux/cpu.h> 19ab1f9dacSPaul Mackerras #include <linux/notifier.h> 20d9b2b2a2SDavid S. Miller #include <linux/lmb.h> 216df1646eSMichael Ellerman #include <linux/of.h> 2245fb6ceaSAnton Blanchard #include <asm/sparsemem.h> 23d9b2b2a2SDavid S. Miller #include <asm/prom.h> 24cf00a8d1SPaul Mackerras #include <asm/system.h> 252249ca9dSPaul Mackerras #include <asm/smp.h> 26ab1f9dacSPaul Mackerras 27ab1f9dacSPaul Mackerras static int numa_enabled = 1; 28ab1f9dacSPaul Mackerras 291daa6d08SBalbir Singh static char *cmdline __initdata; 301daa6d08SBalbir Singh 31ab1f9dacSPaul Mackerras static int numa_debug; 32ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 33ab1f9dacSPaul Mackerras 3445fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS]; 35ab1f9dacSPaul Mackerras cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; 36ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES]; 3745fb6ceaSAnton Blanchard 3845fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table); 3945fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpumask_lookup_table); 4045fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data); 4145fb6ceaSAnton Blanchard 42ab1f9dacSPaul Mackerras static int min_common_depth; 43237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells; 44ab1f9dacSPaul Mackerras 451daa6d08SBalbir Singh static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn, 461daa6d08SBalbir Singh unsigned int *nid) 471daa6d08SBalbir Singh { 481daa6d08SBalbir Singh unsigned long long mem; 491daa6d08SBalbir Singh char *p = cmdline; 501daa6d08SBalbir Singh static unsigned int fake_nid; 511daa6d08SBalbir Singh static unsigned long long curr_boundary; 521daa6d08SBalbir Singh 531daa6d08SBalbir Singh /* 541daa6d08SBalbir Singh * Modify node id, iff we started creating NUMA nodes 551daa6d08SBalbir Singh * We want to continue from where we left of the last time 561daa6d08SBalbir Singh */ 571daa6d08SBalbir Singh if (fake_nid) 581daa6d08SBalbir Singh *nid = fake_nid; 591daa6d08SBalbir Singh /* 601daa6d08SBalbir Singh * In case there are no more arguments to parse, the 611daa6d08SBalbir Singh * node_id should be the same as the last fake node id 621daa6d08SBalbir Singh * (we've handled this above). 631daa6d08SBalbir Singh */ 641daa6d08SBalbir Singh if (!p) 651daa6d08SBalbir Singh return 0; 661daa6d08SBalbir Singh 671daa6d08SBalbir Singh mem = memparse(p, &p); 681daa6d08SBalbir Singh if (!mem) 691daa6d08SBalbir Singh return 0; 701daa6d08SBalbir Singh 711daa6d08SBalbir Singh if (mem < curr_boundary) 721daa6d08SBalbir Singh return 0; 731daa6d08SBalbir Singh 741daa6d08SBalbir Singh curr_boundary = mem; 751daa6d08SBalbir Singh 761daa6d08SBalbir Singh if ((end_pfn << PAGE_SHIFT) > mem) { 771daa6d08SBalbir Singh /* 781daa6d08SBalbir Singh * Skip commas and spaces 791daa6d08SBalbir Singh */ 801daa6d08SBalbir Singh while (*p == ',' || *p == ' ' || *p == '\t') 811daa6d08SBalbir Singh p++; 821daa6d08SBalbir Singh 831daa6d08SBalbir Singh cmdline = p; 841daa6d08SBalbir Singh fake_nid++; 851daa6d08SBalbir Singh *nid = fake_nid; 861daa6d08SBalbir Singh dbg("created new fake_node with id %d\n", fake_nid); 871daa6d08SBalbir Singh return 1; 881daa6d08SBalbir Singh } 891daa6d08SBalbir Singh return 0; 901daa6d08SBalbir Singh } 911daa6d08SBalbir Singh 928f64e1f2SJon Tollefson /* 938f64e1f2SJon Tollefson * get_active_region_work_fn - A helper function for get_node_active_region 948f64e1f2SJon Tollefson * Returns datax set to the start_pfn and end_pfn if they contain 958f64e1f2SJon Tollefson * the initial value of datax->start_pfn between them 968f64e1f2SJon Tollefson * @start_pfn: start page(inclusive) of region to check 978f64e1f2SJon Tollefson * @end_pfn: end page(exclusive) of region to check 988f64e1f2SJon Tollefson * @datax: comes in with ->start_pfn set to value to search for and 998f64e1f2SJon Tollefson * goes out with active range if it contains it 1008f64e1f2SJon Tollefson * Returns 1 if search value is in range else 0 1018f64e1f2SJon Tollefson */ 1028f64e1f2SJon Tollefson static int __init get_active_region_work_fn(unsigned long start_pfn, 1038f64e1f2SJon Tollefson unsigned long end_pfn, void *datax) 1048f64e1f2SJon Tollefson { 1058f64e1f2SJon Tollefson struct node_active_region *data; 1068f64e1f2SJon Tollefson data = (struct node_active_region *)datax; 1078f64e1f2SJon Tollefson 1088f64e1f2SJon Tollefson if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) { 1098f64e1f2SJon Tollefson data->start_pfn = start_pfn; 1108f64e1f2SJon Tollefson data->end_pfn = end_pfn; 1118f64e1f2SJon Tollefson return 1; 1128f64e1f2SJon Tollefson } 1138f64e1f2SJon Tollefson return 0; 1148f64e1f2SJon Tollefson 1158f64e1f2SJon Tollefson } 1168f64e1f2SJon Tollefson 1178f64e1f2SJon Tollefson /* 1188f64e1f2SJon Tollefson * get_node_active_region - Return active region containing start_pfn 119e8170372SJon Tollefson * Active range returned is empty if none found. 1208f64e1f2SJon Tollefson * @start_pfn: The page to return the region for. 1218f64e1f2SJon Tollefson * @node_ar: Returned set to the active region containing start_pfn 1228f64e1f2SJon Tollefson */ 1238f64e1f2SJon Tollefson static void __init get_node_active_region(unsigned long start_pfn, 1248f64e1f2SJon Tollefson struct node_active_region *node_ar) 1258f64e1f2SJon Tollefson { 1268f64e1f2SJon Tollefson int nid = early_pfn_to_nid(start_pfn); 1278f64e1f2SJon Tollefson 1288f64e1f2SJon Tollefson node_ar->nid = nid; 1298f64e1f2SJon Tollefson node_ar->start_pfn = start_pfn; 130e8170372SJon Tollefson node_ar->end_pfn = start_pfn; 1318f64e1f2SJon Tollefson work_with_active_regions(nid, get_active_region_work_fn, node_ar); 1328f64e1f2SJon Tollefson } 1338f64e1f2SJon Tollefson 1342e5ce39dSNathan Lynch static void __cpuinit map_cpu_to_node(int cpu, int node) 135ab1f9dacSPaul Mackerras { 136ab1f9dacSPaul Mackerras numa_cpu_lookup_table[cpu] = node; 13745fb6ceaSAnton Blanchard 138bf4b85b0SNathan Lynch dbg("adding cpu %d to node %d\n", cpu, node); 139bf4b85b0SNathan Lynch 14045fb6ceaSAnton Blanchard if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) 141ab1f9dacSPaul Mackerras cpu_set(cpu, numa_cpumask_lookup_table[node]); 142ab1f9dacSPaul Mackerras } 143ab1f9dacSPaul Mackerras 144ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU 145ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu) 146ab1f9dacSPaul Mackerras { 147ab1f9dacSPaul Mackerras int node = numa_cpu_lookup_table[cpu]; 148ab1f9dacSPaul Mackerras 149ab1f9dacSPaul Mackerras dbg("removing cpu %lu from node %d\n", cpu, node); 150ab1f9dacSPaul Mackerras 151ab1f9dacSPaul Mackerras if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 152ab1f9dacSPaul Mackerras cpu_clear(cpu, numa_cpumask_lookup_table[node]); 153ab1f9dacSPaul Mackerras } else { 154ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 155ab1f9dacSPaul Mackerras cpu, node); 156ab1f9dacSPaul Mackerras } 157ab1f9dacSPaul Mackerras } 158ab1f9dacSPaul Mackerras #endif /* CONFIG_HOTPLUG_CPU */ 159ab1f9dacSPaul Mackerras 1602e5ce39dSNathan Lynch static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) 161ab1f9dacSPaul Mackerras { 162ab1f9dacSPaul Mackerras unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); 163ab1f9dacSPaul Mackerras struct device_node *cpu_node = NULL; 164a7f67bdfSJeremy Kerr const unsigned int *interrupt_server, *reg; 165ab1f9dacSPaul Mackerras int len; 166ab1f9dacSPaul Mackerras 167ab1f9dacSPaul Mackerras while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { 168ab1f9dacSPaul Mackerras /* Try interrupt server first */ 169e2eb6392SStephen Rothwell interrupt_server = of_get_property(cpu_node, 170ab1f9dacSPaul Mackerras "ibm,ppc-interrupt-server#s", &len); 171ab1f9dacSPaul Mackerras 172ab1f9dacSPaul Mackerras len = len / sizeof(u32); 173ab1f9dacSPaul Mackerras 174ab1f9dacSPaul Mackerras if (interrupt_server && (len > 0)) { 175ab1f9dacSPaul Mackerras while (len--) { 176ab1f9dacSPaul Mackerras if (interrupt_server[len] == hw_cpuid) 177ab1f9dacSPaul Mackerras return cpu_node; 178ab1f9dacSPaul Mackerras } 179ab1f9dacSPaul Mackerras } else { 180e2eb6392SStephen Rothwell reg = of_get_property(cpu_node, "reg", &len); 181ab1f9dacSPaul Mackerras if (reg && (len > 0) && (reg[0] == hw_cpuid)) 182ab1f9dacSPaul Mackerras return cpu_node; 183ab1f9dacSPaul Mackerras } 184ab1f9dacSPaul Mackerras } 185ab1f9dacSPaul Mackerras 186ab1f9dacSPaul Mackerras return NULL; 187ab1f9dacSPaul Mackerras } 188ab1f9dacSPaul Mackerras 189ab1f9dacSPaul Mackerras /* must hold reference to node during call */ 190a7f67bdfSJeremy Kerr static const int *of_get_associativity(struct device_node *dev) 191ab1f9dacSPaul Mackerras { 192e2eb6392SStephen Rothwell return of_get_property(dev, "ibm,associativity", NULL); 193ab1f9dacSPaul Mackerras } 194ab1f9dacSPaul Mackerras 195cf00085dSChandru /* 196cf00085dSChandru * Returns the property linux,drconf-usable-memory if 197cf00085dSChandru * it exists (the property exists only in kexec/kdump kernels, 198cf00085dSChandru * added by kexec-tools) 199cf00085dSChandru */ 200cf00085dSChandru static const u32 *of_get_usable_memory(struct device_node *memory) 201cf00085dSChandru { 202cf00085dSChandru const u32 *prop; 203cf00085dSChandru u32 len; 204cf00085dSChandru prop = of_get_property(memory, "linux,drconf-usable-memory", &len); 205cf00085dSChandru if (!prop || len < sizeof(unsigned int)) 206cf00085dSChandru return 0; 207cf00085dSChandru return prop; 208cf00085dSChandru } 209cf00085dSChandru 210482ec7c4SNathan Lynch /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa 211482ec7c4SNathan Lynch * info is found. 212482ec7c4SNathan Lynch */ 213953039c8SJeremy Kerr static int of_node_to_nid_single(struct device_node *device) 214ab1f9dacSPaul Mackerras { 215482ec7c4SNathan Lynch int nid = -1; 216a7f67bdfSJeremy Kerr const unsigned int *tmp; 217ab1f9dacSPaul Mackerras 218ab1f9dacSPaul Mackerras if (min_common_depth == -1) 219482ec7c4SNathan Lynch goto out; 220ab1f9dacSPaul Mackerras 221ab1f9dacSPaul Mackerras tmp = of_get_associativity(device); 222482ec7c4SNathan Lynch if (!tmp) 223482ec7c4SNathan Lynch goto out; 224482ec7c4SNathan Lynch 225482ec7c4SNathan Lynch if (tmp[0] >= min_common_depth) 226cf950b7aSNathan Lynch nid = tmp[min_common_depth]; 227bc16a759SNathan Lynch 228bc16a759SNathan Lynch /* POWER4 LPAR uses 0xffff as invalid node */ 229482ec7c4SNathan Lynch if (nid == 0xffff || nid >= MAX_NUMNODES) 230482ec7c4SNathan Lynch nid = -1; 231482ec7c4SNathan Lynch out: 232cf950b7aSNathan Lynch return nid; 233ab1f9dacSPaul Mackerras } 234ab1f9dacSPaul Mackerras 235953039c8SJeremy Kerr /* Walk the device tree upwards, looking for an associativity id */ 236953039c8SJeremy Kerr int of_node_to_nid(struct device_node *device) 237953039c8SJeremy Kerr { 238953039c8SJeremy Kerr struct device_node *tmp; 239953039c8SJeremy Kerr int nid = -1; 240953039c8SJeremy Kerr 241953039c8SJeremy Kerr of_node_get(device); 242953039c8SJeremy Kerr while (device) { 243953039c8SJeremy Kerr nid = of_node_to_nid_single(device); 244953039c8SJeremy Kerr if (nid != -1) 245953039c8SJeremy Kerr break; 246953039c8SJeremy Kerr 247953039c8SJeremy Kerr tmp = device; 248953039c8SJeremy Kerr device = of_get_parent(tmp); 249953039c8SJeremy Kerr of_node_put(tmp); 250953039c8SJeremy Kerr } 251953039c8SJeremy Kerr of_node_put(device); 252953039c8SJeremy Kerr 253953039c8SJeremy Kerr return nid; 254953039c8SJeremy Kerr } 255953039c8SJeremy Kerr EXPORT_SYMBOL_GPL(of_node_to_nid); 256953039c8SJeremy Kerr 257ab1f9dacSPaul Mackerras /* 258ab1f9dacSPaul Mackerras * In theory, the "ibm,associativity" property may contain multiple 259ab1f9dacSPaul Mackerras * associativity lists because a resource may be multiply connected 260ab1f9dacSPaul Mackerras * into the machine. This resource then has different associativity 261ab1f9dacSPaul Mackerras * characteristics relative to its multiple connections. We ignore 262ab1f9dacSPaul Mackerras * this for now. We also assume that all cpu and memory sets have 263ab1f9dacSPaul Mackerras * their distances represented at a common level. This won't be 2641b3c3714SUwe Kleine-König * true for hierarchical NUMA. 265ab1f9dacSPaul Mackerras * 266ab1f9dacSPaul Mackerras * In any case the ibm,associativity-reference-points should give 267ab1f9dacSPaul Mackerras * the correct depth for a normal NUMA system. 268ab1f9dacSPaul Mackerras * 269ab1f9dacSPaul Mackerras * - Dave Hansen <haveblue@us.ibm.com> 270ab1f9dacSPaul Mackerras */ 271ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void) 272ab1f9dacSPaul Mackerras { 273ab1f9dacSPaul Mackerras int depth; 274a7f67bdfSJeremy Kerr const unsigned int *ref_points; 275ab1f9dacSPaul Mackerras struct device_node *rtas_root; 276ab1f9dacSPaul Mackerras unsigned int len; 277ab1f9dacSPaul Mackerras 278ab1f9dacSPaul Mackerras rtas_root = of_find_node_by_path("/rtas"); 279ab1f9dacSPaul Mackerras 280ab1f9dacSPaul Mackerras if (!rtas_root) 281ab1f9dacSPaul Mackerras return -1; 282ab1f9dacSPaul Mackerras 283ab1f9dacSPaul Mackerras /* 284ab1f9dacSPaul Mackerras * this property is 2 32-bit integers, each representing a level of 285ab1f9dacSPaul Mackerras * depth in the associativity nodes. The first is for an SMP 286ab1f9dacSPaul Mackerras * configuration (should be all 0's) and the second is for a normal 287ab1f9dacSPaul Mackerras * NUMA configuration. 288ab1f9dacSPaul Mackerras */ 289e2eb6392SStephen Rothwell ref_points = of_get_property(rtas_root, 290ab1f9dacSPaul Mackerras "ibm,associativity-reference-points", &len); 291ab1f9dacSPaul Mackerras 292ab1f9dacSPaul Mackerras if ((len >= 1) && ref_points) { 293ab1f9dacSPaul Mackerras depth = ref_points[1]; 294ab1f9dacSPaul Mackerras } else { 295bf4b85b0SNathan Lynch dbg("NUMA: ibm,associativity-reference-points not found.\n"); 296ab1f9dacSPaul Mackerras depth = -1; 297ab1f9dacSPaul Mackerras } 298ab1f9dacSPaul Mackerras of_node_put(rtas_root); 299ab1f9dacSPaul Mackerras 300ab1f9dacSPaul Mackerras return depth; 301ab1f9dacSPaul Mackerras } 302ab1f9dacSPaul Mackerras 30384c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 304ab1f9dacSPaul Mackerras { 305ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 306ab1f9dacSPaul Mackerras 307ab1f9dacSPaul Mackerras memory = of_find_node_by_type(memory, "memory"); 30854c23310SPaul Mackerras if (!memory) 30984c9fdd1SMike Kravetz panic("numa.c: No memory nodes found!"); 31054c23310SPaul Mackerras 311a8bda5ddSStephen Rothwell *n_addr_cells = of_n_addr_cells(memory); 3129213feeaSStephen Rothwell *n_size_cells = of_n_size_cells(memory); 31384c9fdd1SMike Kravetz of_node_put(memory); 314ab1f9dacSPaul Mackerras } 315ab1f9dacSPaul Mackerras 316a7f67bdfSJeremy Kerr static unsigned long __devinit read_n_cells(int n, const unsigned int **buf) 317ab1f9dacSPaul Mackerras { 318ab1f9dacSPaul Mackerras unsigned long result = 0; 319ab1f9dacSPaul Mackerras 320ab1f9dacSPaul Mackerras while (n--) { 321ab1f9dacSPaul Mackerras result = (result << 32) | **buf; 322ab1f9dacSPaul Mackerras (*buf)++; 323ab1f9dacSPaul Mackerras } 324ab1f9dacSPaul Mackerras return result; 325ab1f9dacSPaul Mackerras } 326ab1f9dacSPaul Mackerras 3278342681dSNathan Fontenot struct of_drconf_cell { 3288342681dSNathan Fontenot u64 base_addr; 3298342681dSNathan Fontenot u32 drc_index; 3308342681dSNathan Fontenot u32 reserved; 3318342681dSNathan Fontenot u32 aa_index; 3328342681dSNathan Fontenot u32 flags; 3338342681dSNathan Fontenot }; 3348342681dSNathan Fontenot 3358342681dSNathan Fontenot #define DRCONF_MEM_ASSIGNED 0x00000008 3368342681dSNathan Fontenot #define DRCONF_MEM_AI_INVALID 0x00000040 3378342681dSNathan Fontenot #define DRCONF_MEM_RESERVED 0x00000080 3388342681dSNathan Fontenot 3398342681dSNathan Fontenot /* 3408342681dSNathan Fontenot * Read the next lmb list entry from the ibm,dynamic-memory property 3418342681dSNathan Fontenot * and return the information in the provided of_drconf_cell structure. 3428342681dSNathan Fontenot */ 3438342681dSNathan Fontenot static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) 3448342681dSNathan Fontenot { 3458342681dSNathan Fontenot const u32 *cp; 3468342681dSNathan Fontenot 3478342681dSNathan Fontenot drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp); 3488342681dSNathan Fontenot 3498342681dSNathan Fontenot cp = *cellp; 3508342681dSNathan Fontenot drmem->drc_index = cp[0]; 3518342681dSNathan Fontenot drmem->reserved = cp[1]; 3528342681dSNathan Fontenot drmem->aa_index = cp[2]; 3538342681dSNathan Fontenot drmem->flags = cp[3]; 3548342681dSNathan Fontenot 3558342681dSNathan Fontenot *cellp = cp + 4; 3568342681dSNathan Fontenot } 3578342681dSNathan Fontenot 3588342681dSNathan Fontenot /* 3598342681dSNathan Fontenot * Retreive and validate the ibm,dynamic-memory property of the device tree. 3608342681dSNathan Fontenot * 3618342681dSNathan Fontenot * The layout of the ibm,dynamic-memory property is a number N of lmb 3628342681dSNathan Fontenot * list entries followed by N lmb list entries. Each lmb list entry 3638342681dSNathan Fontenot * contains information as layed out in the of_drconf_cell struct above. 3648342681dSNathan Fontenot */ 3658342681dSNathan Fontenot static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) 3668342681dSNathan Fontenot { 3678342681dSNathan Fontenot const u32 *prop; 3688342681dSNathan Fontenot u32 len, entries; 3698342681dSNathan Fontenot 3708342681dSNathan Fontenot prop = of_get_property(memory, "ibm,dynamic-memory", &len); 3718342681dSNathan Fontenot if (!prop || len < sizeof(unsigned int)) 3728342681dSNathan Fontenot return 0; 3738342681dSNathan Fontenot 3748342681dSNathan Fontenot entries = *prop++; 3758342681dSNathan Fontenot 3768342681dSNathan Fontenot /* Now that we know the number of entries, revalidate the size 3778342681dSNathan Fontenot * of the property read in to ensure we have everything 3788342681dSNathan Fontenot */ 3798342681dSNathan Fontenot if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int)) 3808342681dSNathan Fontenot return 0; 3818342681dSNathan Fontenot 3828342681dSNathan Fontenot *dm = prop; 3838342681dSNathan Fontenot return entries; 3848342681dSNathan Fontenot } 3858342681dSNathan Fontenot 3868342681dSNathan Fontenot /* 3878342681dSNathan Fontenot * Retreive and validate the ibm,lmb-size property for drconf memory 3888342681dSNathan Fontenot * from the device tree. 3898342681dSNathan Fontenot */ 3908342681dSNathan Fontenot static u64 of_get_lmb_size(struct device_node *memory) 3918342681dSNathan Fontenot { 3928342681dSNathan Fontenot const u32 *prop; 3938342681dSNathan Fontenot u32 len; 3948342681dSNathan Fontenot 3958342681dSNathan Fontenot prop = of_get_property(memory, "ibm,lmb-size", &len); 3968342681dSNathan Fontenot if (!prop || len < sizeof(unsigned int)) 3978342681dSNathan Fontenot return 0; 3988342681dSNathan Fontenot 3998342681dSNathan Fontenot return read_n_cells(n_mem_size_cells, &prop); 4008342681dSNathan Fontenot } 4018342681dSNathan Fontenot 4028342681dSNathan Fontenot struct assoc_arrays { 4038342681dSNathan Fontenot u32 n_arrays; 4048342681dSNathan Fontenot u32 array_sz; 4058342681dSNathan Fontenot const u32 *arrays; 4068342681dSNathan Fontenot }; 4078342681dSNathan Fontenot 4088342681dSNathan Fontenot /* 4098342681dSNathan Fontenot * Retreive and validate the list of associativity arrays for drconf 4108342681dSNathan Fontenot * memory from the ibm,associativity-lookup-arrays property of the 4118342681dSNathan Fontenot * device tree.. 4128342681dSNathan Fontenot * 4138342681dSNathan Fontenot * The layout of the ibm,associativity-lookup-arrays property is a number N 4148342681dSNathan Fontenot * indicating the number of associativity arrays, followed by a number M 4158342681dSNathan Fontenot * indicating the size of each associativity array, followed by a list 4168342681dSNathan Fontenot * of N associativity arrays. 4178342681dSNathan Fontenot */ 4188342681dSNathan Fontenot static int of_get_assoc_arrays(struct device_node *memory, 4198342681dSNathan Fontenot struct assoc_arrays *aa) 4208342681dSNathan Fontenot { 4218342681dSNathan Fontenot const u32 *prop; 4228342681dSNathan Fontenot u32 len; 4238342681dSNathan Fontenot 4248342681dSNathan Fontenot prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len); 4258342681dSNathan Fontenot if (!prop || len < 2 * sizeof(unsigned int)) 4268342681dSNathan Fontenot return -1; 4278342681dSNathan Fontenot 4288342681dSNathan Fontenot aa->n_arrays = *prop++; 4298342681dSNathan Fontenot aa->array_sz = *prop++; 4308342681dSNathan Fontenot 4318342681dSNathan Fontenot /* Now that we know the number of arrrays and size of each array, 4328342681dSNathan Fontenot * revalidate the size of the property read in. 4338342681dSNathan Fontenot */ 4348342681dSNathan Fontenot if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int)) 4358342681dSNathan Fontenot return -1; 4368342681dSNathan Fontenot 4378342681dSNathan Fontenot aa->arrays = prop; 4388342681dSNathan Fontenot return 0; 4398342681dSNathan Fontenot } 4408342681dSNathan Fontenot 4418342681dSNathan Fontenot /* 4428342681dSNathan Fontenot * This is like of_node_to_nid_single() for memory represented in the 4438342681dSNathan Fontenot * ibm,dynamic-reconfiguration-memory node. 4448342681dSNathan Fontenot */ 4458342681dSNathan Fontenot static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, 4468342681dSNathan Fontenot struct assoc_arrays *aa) 4478342681dSNathan Fontenot { 4488342681dSNathan Fontenot int default_nid = 0; 4498342681dSNathan Fontenot int nid = default_nid; 4508342681dSNathan Fontenot int index; 4518342681dSNathan Fontenot 4528342681dSNathan Fontenot if (min_common_depth > 0 && min_common_depth <= aa->array_sz && 4538342681dSNathan Fontenot !(drmem->flags & DRCONF_MEM_AI_INVALID) && 4548342681dSNathan Fontenot drmem->aa_index < aa->n_arrays) { 4558342681dSNathan Fontenot index = drmem->aa_index * aa->array_sz + min_common_depth - 1; 4568342681dSNathan Fontenot nid = aa->arrays[index]; 4578342681dSNathan Fontenot 4588342681dSNathan Fontenot if (nid == 0xffff || nid >= MAX_NUMNODES) 4598342681dSNathan Fontenot nid = default_nid; 4608342681dSNathan Fontenot } 4618342681dSNathan Fontenot 4628342681dSNathan Fontenot return nid; 4638342681dSNathan Fontenot } 4648342681dSNathan Fontenot 465ab1f9dacSPaul Mackerras /* 466ab1f9dacSPaul Mackerras * Figure out to which domain a cpu belongs and stick it there. 467ab1f9dacSPaul Mackerras * Return the id of the domain used. 468ab1f9dacSPaul Mackerras */ 4692e5ce39dSNathan Lynch static int __cpuinit numa_setup_cpu(unsigned long lcpu) 470ab1f9dacSPaul Mackerras { 471cf950b7aSNathan Lynch int nid = 0; 472ab1f9dacSPaul Mackerras struct device_node *cpu = find_cpu_node(lcpu); 473ab1f9dacSPaul Mackerras 474ab1f9dacSPaul Mackerras if (!cpu) { 475ab1f9dacSPaul Mackerras WARN_ON(1); 476ab1f9dacSPaul Mackerras goto out; 477ab1f9dacSPaul Mackerras } 478ab1f9dacSPaul Mackerras 479953039c8SJeremy Kerr nid = of_node_to_nid_single(cpu); 480ab1f9dacSPaul Mackerras 481482ec7c4SNathan Lynch if (nid < 0 || !node_online(nid)) 482482ec7c4SNathan Lynch nid = any_online_node(NODE_MASK_ALL); 483ab1f9dacSPaul Mackerras out: 484cf950b7aSNathan Lynch map_cpu_to_node(lcpu, nid); 485ab1f9dacSPaul Mackerras 486ab1f9dacSPaul Mackerras of_node_put(cpu); 487ab1f9dacSPaul Mackerras 488cf950b7aSNathan Lynch return nid; 489ab1f9dacSPaul Mackerras } 490ab1f9dacSPaul Mackerras 49174b85f37SChandra Seetharaman static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, 492ab1f9dacSPaul Mackerras unsigned long action, 493ab1f9dacSPaul Mackerras void *hcpu) 494ab1f9dacSPaul Mackerras { 495ab1f9dacSPaul Mackerras unsigned long lcpu = (unsigned long)hcpu; 496ab1f9dacSPaul Mackerras int ret = NOTIFY_DONE; 497ab1f9dacSPaul Mackerras 498ab1f9dacSPaul Mackerras switch (action) { 499ab1f9dacSPaul Mackerras case CPU_UP_PREPARE: 5008bb78442SRafael J. Wysocki case CPU_UP_PREPARE_FROZEN: 501ab1f9dacSPaul Mackerras numa_setup_cpu(lcpu); 502ab1f9dacSPaul Mackerras ret = NOTIFY_OK; 503ab1f9dacSPaul Mackerras break; 504ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU 505ab1f9dacSPaul Mackerras case CPU_DEAD: 5068bb78442SRafael J. Wysocki case CPU_DEAD_FROZEN: 507ab1f9dacSPaul Mackerras case CPU_UP_CANCELED: 5088bb78442SRafael J. Wysocki case CPU_UP_CANCELED_FROZEN: 509ab1f9dacSPaul Mackerras unmap_cpu_from_node(lcpu); 510ab1f9dacSPaul Mackerras break; 511ab1f9dacSPaul Mackerras ret = NOTIFY_OK; 512ab1f9dacSPaul Mackerras #endif 513ab1f9dacSPaul Mackerras } 514ab1f9dacSPaul Mackerras return ret; 515ab1f9dacSPaul Mackerras } 516ab1f9dacSPaul Mackerras 517ab1f9dacSPaul Mackerras /* 518ab1f9dacSPaul Mackerras * Check and possibly modify a memory region to enforce the memory limit. 519ab1f9dacSPaul Mackerras * 520ab1f9dacSPaul Mackerras * Returns the size the region should have to enforce the memory limit. 521ab1f9dacSPaul Mackerras * This will either be the original value of size, a truncated value, 522ab1f9dacSPaul Mackerras * or zero. If the returned value of size is 0 the region should be 523ab1f9dacSPaul Mackerras * discarded as it lies wholy above the memory limit. 524ab1f9dacSPaul Mackerras */ 52545fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start, 52645fb6ceaSAnton Blanchard unsigned long size) 527ab1f9dacSPaul Mackerras { 528ab1f9dacSPaul Mackerras /* 529ab1f9dacSPaul Mackerras * We use lmb_end_of_DRAM() in here instead of memory_limit because 530ab1f9dacSPaul Mackerras * we've already adjusted it for the limit and it takes care of 531fe55249dSMilton Miller * having memory holes below the limit. Also, in the case of 532fe55249dSMilton Miller * iommu_is_off, memory_limit is not set but is implicitly enforced. 533ab1f9dacSPaul Mackerras */ 534ab1f9dacSPaul Mackerras 535ab1f9dacSPaul Mackerras if (start + size <= lmb_end_of_DRAM()) 536ab1f9dacSPaul Mackerras return size; 537ab1f9dacSPaul Mackerras 538ab1f9dacSPaul Mackerras if (start >= lmb_end_of_DRAM()) 539ab1f9dacSPaul Mackerras return 0; 540ab1f9dacSPaul Mackerras 541ab1f9dacSPaul Mackerras return lmb_end_of_DRAM() - start; 542ab1f9dacSPaul Mackerras } 543ab1f9dacSPaul Mackerras 5440204568aSPaul Mackerras /* 545cf00085dSChandru * Reads the counter for a given entry in 546cf00085dSChandru * linux,drconf-usable-memory property 547cf00085dSChandru */ 548cf00085dSChandru static inline int __init read_usm_ranges(const u32 **usm) 549cf00085dSChandru { 550cf00085dSChandru /* 551cf00085dSChandru * For each lmb in ibm,dynamic-memory a corresponding 552cf00085dSChandru * entry in linux,drconf-usable-memory property contains 553cf00085dSChandru * a counter followed by that many (base, size) duple. 554cf00085dSChandru * read the counter from linux,drconf-usable-memory 555cf00085dSChandru */ 556cf00085dSChandru return read_n_cells(n_mem_size_cells, usm); 557cf00085dSChandru } 558cf00085dSChandru 559cf00085dSChandru /* 5600204568aSPaul Mackerras * Extract NUMA information from the ibm,dynamic-reconfiguration-memory 5610204568aSPaul Mackerras * node. This assumes n_mem_{addr,size}_cells have been set. 5620204568aSPaul Mackerras */ 5630204568aSPaul Mackerras static void __init parse_drconf_memory(struct device_node *memory) 5640204568aSPaul Mackerras { 565cf00085dSChandru const u32 *dm, *usm; 566cf00085dSChandru unsigned int n, rc, ranges, is_kexec_kdump = 0; 567cf00085dSChandru unsigned long lmb_size, base, size, sz; 5688342681dSNathan Fontenot int nid; 5698342681dSNathan Fontenot struct assoc_arrays aa; 5700204568aSPaul Mackerras 5718342681dSNathan Fontenot n = of_get_drconf_memory(memory, &dm); 5728342681dSNathan Fontenot if (!n) 5730204568aSPaul Mackerras return; 5740204568aSPaul Mackerras 5758342681dSNathan Fontenot lmb_size = of_get_lmb_size(memory); 5768342681dSNathan Fontenot if (!lmb_size) 5778342681dSNathan Fontenot return; 5788342681dSNathan Fontenot 5798342681dSNathan Fontenot rc = of_get_assoc_arrays(memory, &aa); 5808342681dSNathan Fontenot if (rc) 5810204568aSPaul Mackerras return; 5820204568aSPaul Mackerras 583cf00085dSChandru /* check if this is a kexec/kdump kernel */ 584cf00085dSChandru usm = of_get_usable_memory(memory); 585cf00085dSChandru if (usm != NULL) 586cf00085dSChandru is_kexec_kdump = 1; 587cf00085dSChandru 5880204568aSPaul Mackerras for (; n != 0; --n) { 5898342681dSNathan Fontenot struct of_drconf_cell drmem; 5901daa6d08SBalbir Singh 5918342681dSNathan Fontenot read_drconf_cell(&drmem, &dm); 5928342681dSNathan Fontenot 5938342681dSNathan Fontenot /* skip this block if the reserved bit is set in flags (0x80) 5948342681dSNathan Fontenot or if the block is not assigned to this partition (0x8) */ 5958342681dSNathan Fontenot if ((drmem.flags & DRCONF_MEM_RESERVED) 5968342681dSNathan Fontenot || !(drmem.flags & DRCONF_MEM_ASSIGNED)) 5978342681dSNathan Fontenot continue; 5988342681dSNathan Fontenot 599cf00085dSChandru base = drmem.base_addr; 600cf00085dSChandru size = lmb_size; 601cf00085dSChandru ranges = 1; 6028342681dSNathan Fontenot 603cf00085dSChandru if (is_kexec_kdump) { 604cf00085dSChandru ranges = read_usm_ranges(&usm); 605cf00085dSChandru if (!ranges) /* there are no (base, size) duple */ 6060204568aSPaul Mackerras continue; 607cf00085dSChandru } 608cf00085dSChandru do { 609cf00085dSChandru if (is_kexec_kdump) { 610cf00085dSChandru base = read_n_cells(n_mem_addr_cells, &usm); 611cf00085dSChandru size = read_n_cells(n_mem_size_cells, &usm); 612cf00085dSChandru } 613cf00085dSChandru nid = of_drconf_to_nid_single(&drmem, &aa); 614cf00085dSChandru fake_numa_create_new_node( 615cf00085dSChandru ((base + size) >> PAGE_SHIFT), 616cf00085dSChandru &nid); 617cf00085dSChandru node_set_online(nid); 618cf00085dSChandru sz = numa_enforce_memory_limit(base, size); 619cf00085dSChandru if (sz) 620cf00085dSChandru add_active_range(nid, base >> PAGE_SHIFT, 621cf00085dSChandru (base >> PAGE_SHIFT) 622cf00085dSChandru + (sz >> PAGE_SHIFT)); 623cf00085dSChandru } while (--ranges); 6240204568aSPaul Mackerras } 6250204568aSPaul Mackerras } 6260204568aSPaul Mackerras 627ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void) 628ab1f9dacSPaul Mackerras { 629ab1f9dacSPaul Mackerras struct device_node *cpu = NULL; 630ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 631482ec7c4SNathan Lynch int default_nid = 0; 632ab1f9dacSPaul Mackerras unsigned long i; 633ab1f9dacSPaul Mackerras 634ab1f9dacSPaul Mackerras if (numa_enabled == 0) { 635ab1f9dacSPaul Mackerras printk(KERN_WARNING "NUMA disabled by user\n"); 636ab1f9dacSPaul Mackerras return -1; 637ab1f9dacSPaul Mackerras } 638ab1f9dacSPaul Mackerras 639ab1f9dacSPaul Mackerras min_common_depth = find_min_common_depth(); 640ab1f9dacSPaul Mackerras 641ab1f9dacSPaul Mackerras if (min_common_depth < 0) 642ab1f9dacSPaul Mackerras return min_common_depth; 643ab1f9dacSPaul Mackerras 644bf4b85b0SNathan Lynch dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 645bf4b85b0SNathan Lynch 646ab1f9dacSPaul Mackerras /* 647482ec7c4SNathan Lynch * Even though we connect cpus to numa domains later in SMP 648482ec7c4SNathan Lynch * init, we need to know the node ids now. This is because 649482ec7c4SNathan Lynch * each node to be onlined must have NODE_DATA etc backing it. 650ab1f9dacSPaul Mackerras */ 651482ec7c4SNathan Lynch for_each_present_cpu(i) { 652cf950b7aSNathan Lynch int nid; 653ab1f9dacSPaul Mackerras 654ab1f9dacSPaul Mackerras cpu = find_cpu_node(i); 655482ec7c4SNathan Lynch BUG_ON(!cpu); 656953039c8SJeremy Kerr nid = of_node_to_nid_single(cpu); 657ab1f9dacSPaul Mackerras of_node_put(cpu); 658ab1f9dacSPaul Mackerras 659482ec7c4SNathan Lynch /* 660482ec7c4SNathan Lynch * Don't fall back to default_nid yet -- we will plug 661482ec7c4SNathan Lynch * cpus into nodes once the memory scan has discovered 662482ec7c4SNathan Lynch * the topology. 663482ec7c4SNathan Lynch */ 664482ec7c4SNathan Lynch if (nid < 0) 665482ec7c4SNathan Lynch continue; 666482ec7c4SNathan Lynch node_set_online(nid); 667ab1f9dacSPaul Mackerras } 668ab1f9dacSPaul Mackerras 669237a0989SMike Kravetz get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 670ab1f9dacSPaul Mackerras memory = NULL; 671ab1f9dacSPaul Mackerras while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 672ab1f9dacSPaul Mackerras unsigned long start; 673ab1f9dacSPaul Mackerras unsigned long size; 674cf950b7aSNathan Lynch int nid; 675ab1f9dacSPaul Mackerras int ranges; 676a7f67bdfSJeremy Kerr const unsigned int *memcell_buf; 677ab1f9dacSPaul Mackerras unsigned int len; 678ab1f9dacSPaul Mackerras 679e2eb6392SStephen Rothwell memcell_buf = of_get_property(memory, 680ba759485SMichael Ellerman "linux,usable-memory", &len); 681ba759485SMichael Ellerman if (!memcell_buf || len <= 0) 682e2eb6392SStephen Rothwell memcell_buf = of_get_property(memory, "reg", &len); 683ab1f9dacSPaul Mackerras if (!memcell_buf || len <= 0) 684ab1f9dacSPaul Mackerras continue; 685ab1f9dacSPaul Mackerras 686cc5d0189SBenjamin Herrenschmidt /* ranges in cell */ 687cc5d0189SBenjamin Herrenschmidt ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 688ab1f9dacSPaul Mackerras new_range: 689ab1f9dacSPaul Mackerras /* these are order-sensitive, and modify the buffer pointer */ 690237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 691237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 692ab1f9dacSPaul Mackerras 693482ec7c4SNathan Lynch /* 694482ec7c4SNathan Lynch * Assumption: either all memory nodes or none will 695482ec7c4SNathan Lynch * have associativity properties. If none, then 696482ec7c4SNathan Lynch * everything goes to default_nid. 697482ec7c4SNathan Lynch */ 698953039c8SJeremy Kerr nid = of_node_to_nid_single(memory); 699482ec7c4SNathan Lynch if (nid < 0) 700482ec7c4SNathan Lynch nid = default_nid; 7011daa6d08SBalbir Singh 7021daa6d08SBalbir Singh fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); 703482ec7c4SNathan Lynch node_set_online(nid); 704ab1f9dacSPaul Mackerras 705ab1f9dacSPaul Mackerras if (!(size = numa_enforce_memory_limit(start, size))) { 706ab1f9dacSPaul Mackerras if (--ranges) 707ab1f9dacSPaul Mackerras goto new_range; 708ab1f9dacSPaul Mackerras else 709ab1f9dacSPaul Mackerras continue; 710ab1f9dacSPaul Mackerras } 711ab1f9dacSPaul Mackerras 712c67c3cb4SMel Gorman add_active_range(nid, start >> PAGE_SHIFT, 713c67c3cb4SMel Gorman (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT)); 714ab1f9dacSPaul Mackerras 715ab1f9dacSPaul Mackerras if (--ranges) 716ab1f9dacSPaul Mackerras goto new_range; 717ab1f9dacSPaul Mackerras } 718ab1f9dacSPaul Mackerras 7190204568aSPaul Mackerras /* 7200204568aSPaul Mackerras * Now do the same thing for each LMB listed in the ibm,dynamic-memory 7210204568aSPaul Mackerras * property in the ibm,dynamic-reconfiguration-memory node. 7220204568aSPaul Mackerras */ 7230204568aSPaul Mackerras memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 7240204568aSPaul Mackerras if (memory) 7250204568aSPaul Mackerras parse_drconf_memory(memory); 7260204568aSPaul Mackerras 727ab1f9dacSPaul Mackerras return 0; 728ab1f9dacSPaul Mackerras } 729ab1f9dacSPaul Mackerras 730ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void) 731ab1f9dacSPaul Mackerras { 732ab1f9dacSPaul Mackerras unsigned long top_of_ram = lmb_end_of_DRAM(); 733ab1f9dacSPaul Mackerras unsigned long total_ram = lmb_phys_mem_size(); 734c67c3cb4SMel Gorman unsigned long start_pfn, end_pfn; 7351daa6d08SBalbir Singh unsigned int i, nid = 0; 736ab1f9dacSPaul Mackerras 737e110b281SOlof Johansson printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 738ab1f9dacSPaul Mackerras top_of_ram, total_ram); 739e110b281SOlof Johansson printk(KERN_DEBUG "Memory hole size: %ldMB\n", 740ab1f9dacSPaul Mackerras (top_of_ram - total_ram) >> 20); 741ab1f9dacSPaul Mackerras 742c67c3cb4SMel Gorman for (i = 0; i < lmb.memory.cnt; ++i) { 743c67c3cb4SMel Gorman start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; 744c67c3cb4SMel Gorman end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); 7451daa6d08SBalbir Singh 7461daa6d08SBalbir Singh fake_numa_create_new_node(end_pfn, &nid); 7471daa6d08SBalbir Singh add_active_range(nid, start_pfn, end_pfn); 7481daa6d08SBalbir Singh node_set_online(nid); 749c67c3cb4SMel Gorman } 750ab1f9dacSPaul Mackerras } 751ab1f9dacSPaul Mackerras 7524b703a23SAnton Blanchard void __init dump_numa_cpu_topology(void) 7534b703a23SAnton Blanchard { 7544b703a23SAnton Blanchard unsigned int node; 7554b703a23SAnton Blanchard unsigned int cpu, count; 7564b703a23SAnton Blanchard 7574b703a23SAnton Blanchard if (min_common_depth == -1 || !numa_enabled) 7584b703a23SAnton Blanchard return; 7594b703a23SAnton Blanchard 7604b703a23SAnton Blanchard for_each_online_node(node) { 761e110b281SOlof Johansson printk(KERN_DEBUG "Node %d CPUs:", node); 7624b703a23SAnton Blanchard 7634b703a23SAnton Blanchard count = 0; 7644b703a23SAnton Blanchard /* 7654b703a23SAnton Blanchard * If we used a CPU iterator here we would miss printing 7664b703a23SAnton Blanchard * the holes in the cpumap. 7674b703a23SAnton Blanchard */ 7684b703a23SAnton Blanchard for (cpu = 0; cpu < NR_CPUS; cpu++) { 7694b703a23SAnton Blanchard if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 7704b703a23SAnton Blanchard if (count == 0) 7714b703a23SAnton Blanchard printk(" %u", cpu); 7724b703a23SAnton Blanchard ++count; 7734b703a23SAnton Blanchard } else { 7744b703a23SAnton Blanchard if (count > 1) 7754b703a23SAnton Blanchard printk("-%u", cpu - 1); 7764b703a23SAnton Blanchard count = 0; 7774b703a23SAnton Blanchard } 7784b703a23SAnton Blanchard } 7794b703a23SAnton Blanchard 7804b703a23SAnton Blanchard if (count > 1) 7814b703a23SAnton Blanchard printk("-%u", NR_CPUS - 1); 7824b703a23SAnton Blanchard printk("\n"); 7834b703a23SAnton Blanchard } 7844b703a23SAnton Blanchard } 7854b703a23SAnton Blanchard 7864b703a23SAnton Blanchard static void __init dump_numa_memory_topology(void) 787ab1f9dacSPaul Mackerras { 788ab1f9dacSPaul Mackerras unsigned int node; 789ab1f9dacSPaul Mackerras unsigned int count; 790ab1f9dacSPaul Mackerras 791ab1f9dacSPaul Mackerras if (min_common_depth == -1 || !numa_enabled) 792ab1f9dacSPaul Mackerras return; 793ab1f9dacSPaul Mackerras 794ab1f9dacSPaul Mackerras for_each_online_node(node) { 795ab1f9dacSPaul Mackerras unsigned long i; 796ab1f9dacSPaul Mackerras 797e110b281SOlof Johansson printk(KERN_DEBUG "Node %d Memory:", node); 798ab1f9dacSPaul Mackerras 799ab1f9dacSPaul Mackerras count = 0; 800ab1f9dacSPaul Mackerras 80145fb6ceaSAnton Blanchard for (i = 0; i < lmb_end_of_DRAM(); 80245fb6ceaSAnton Blanchard i += (1 << SECTION_SIZE_BITS)) { 80345fb6ceaSAnton Blanchard if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { 804ab1f9dacSPaul Mackerras if (count == 0) 805ab1f9dacSPaul Mackerras printk(" 0x%lx", i); 806ab1f9dacSPaul Mackerras ++count; 807ab1f9dacSPaul Mackerras } else { 808ab1f9dacSPaul Mackerras if (count > 0) 809ab1f9dacSPaul Mackerras printk("-0x%lx", i); 810ab1f9dacSPaul Mackerras count = 0; 811ab1f9dacSPaul Mackerras } 812ab1f9dacSPaul Mackerras } 813ab1f9dacSPaul Mackerras 814ab1f9dacSPaul Mackerras if (count > 0) 815ab1f9dacSPaul Mackerras printk("-0x%lx", i); 816ab1f9dacSPaul Mackerras printk("\n"); 817ab1f9dacSPaul Mackerras } 818ab1f9dacSPaul Mackerras } 819ab1f9dacSPaul Mackerras 820ab1f9dacSPaul Mackerras /* 821ab1f9dacSPaul Mackerras * Allocate some memory, satisfying the lmb or bootmem allocator where 822ab1f9dacSPaul Mackerras * required. nid is the preferred node and end is the physical address of 823ab1f9dacSPaul Mackerras * the highest address in the node. 824ab1f9dacSPaul Mackerras * 825ab1f9dacSPaul Mackerras * Returns the physical address of the memory. 826ab1f9dacSPaul Mackerras */ 82745fb6ceaSAnton Blanchard static void __init *careful_allocation(int nid, unsigned long size, 82845fb6ceaSAnton Blanchard unsigned long align, 82945fb6ceaSAnton Blanchard unsigned long end_pfn) 830ab1f9dacSPaul Mackerras { 83145fb6ceaSAnton Blanchard int new_nid; 832d7a5b2ffSMichael Ellerman unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); 833ab1f9dacSPaul Mackerras 834ab1f9dacSPaul Mackerras /* retry over all memory */ 835ab1f9dacSPaul Mackerras if (!ret) 836d7a5b2ffSMichael Ellerman ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); 837ab1f9dacSPaul Mackerras 838ab1f9dacSPaul Mackerras if (!ret) 839ab1f9dacSPaul Mackerras panic("numa.c: cannot allocate %lu bytes on node %d", 840ab1f9dacSPaul Mackerras size, nid); 841ab1f9dacSPaul Mackerras 842ab1f9dacSPaul Mackerras /* 843ab1f9dacSPaul Mackerras * If the memory came from a previously allocated node, we must 844ab1f9dacSPaul Mackerras * retry with the bootmem allocator. 845ab1f9dacSPaul Mackerras */ 84645fb6ceaSAnton Blanchard new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); 84745fb6ceaSAnton Blanchard if (new_nid < nid) { 84845fb6ceaSAnton Blanchard ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), 849ab1f9dacSPaul Mackerras size, align, 0); 850ab1f9dacSPaul Mackerras 851ab1f9dacSPaul Mackerras if (!ret) 852ab1f9dacSPaul Mackerras panic("numa.c: cannot allocate %lu bytes on node %d", 85345fb6ceaSAnton Blanchard size, new_nid); 854ab1f9dacSPaul Mackerras 85545fb6ceaSAnton Blanchard ret = __pa(ret); 856ab1f9dacSPaul Mackerras 857ab1f9dacSPaul Mackerras dbg("alloc_bootmem %lx %lx\n", ret, size); 858ab1f9dacSPaul Mackerras } 859ab1f9dacSPaul Mackerras 86045fb6ceaSAnton Blanchard return (void *)ret; 861ab1f9dacSPaul Mackerras } 862ab1f9dacSPaul Mackerras 86374b85f37SChandra Seetharaman static struct notifier_block __cpuinitdata ppc64_numa_nb = { 86474b85f37SChandra Seetharaman .notifier_call = cpu_numa_callback, 86574b85f37SChandra Seetharaman .priority = 1 /* Must run before sched domains notifier. */ 86674b85f37SChandra Seetharaman }; 86774b85f37SChandra Seetharaman 868*4a618669SDave Hansen static void mark_reserved_regions_for_nid(int nid) 869ab1f9dacSPaul Mackerras { 870*4a618669SDave Hansen struct pglist_data *node = NODE_DATA(nid); 871*4a618669SDave Hansen int i; 872ab1f9dacSPaul Mackerras 873ab1f9dacSPaul Mackerras for (i = 0; i < lmb.reserved.cnt; i++) { 874ab1f9dacSPaul Mackerras unsigned long physbase = lmb.reserved.region[i].base; 875ab1f9dacSPaul Mackerras unsigned long size = lmb.reserved.region[i].size; 8768f64e1f2SJon Tollefson unsigned long start_pfn = physbase >> PAGE_SHIFT; 8778f64e1f2SJon Tollefson unsigned long end_pfn = ((physbase + size) >> PAGE_SHIFT); 8788f64e1f2SJon Tollefson struct node_active_region node_ar; 879*4a618669SDave Hansen unsigned long node_end_pfn = node->node_start_pfn + 880*4a618669SDave Hansen node->node_spanned_pages; 881*4a618669SDave Hansen 882*4a618669SDave Hansen /* 883*4a618669SDave Hansen * Check to make sure that this lmb.reserved area is 884*4a618669SDave Hansen * within the bounds of the node that we care about. 885*4a618669SDave Hansen * Checking the nid of the start and end points is not 886*4a618669SDave Hansen * sufficient because the reserved area could span the 887*4a618669SDave Hansen * entire node. 888*4a618669SDave Hansen */ 889*4a618669SDave Hansen if (end_pfn <= node->node_start_pfn || 890*4a618669SDave Hansen start_pfn >= node_end_pfn) 891*4a618669SDave Hansen continue; 892ab1f9dacSPaul Mackerras 8938f64e1f2SJon Tollefson get_node_active_region(start_pfn, &node_ar); 894e8170372SJon Tollefson while (start_pfn < end_pfn && 895e8170372SJon Tollefson node_ar.start_pfn < node_ar.end_pfn) { 896e8170372SJon Tollefson unsigned long reserve_size = size; 8978f64e1f2SJon Tollefson /* 8988f64e1f2SJon Tollefson * if reserved region extends past active region 8998f64e1f2SJon Tollefson * then trim size to active region 9008f64e1f2SJon Tollefson */ 9018f64e1f2SJon Tollefson if (end_pfn > node_ar.end_pfn) 902e8170372SJon Tollefson reserve_size = (node_ar.end_pfn << PAGE_SHIFT) 9038f64e1f2SJon Tollefson - (start_pfn << PAGE_SHIFT); 904e8170372SJon Tollefson dbg("reserve_bootmem %lx %lx nid=%d\n", physbase, 905e8170372SJon Tollefson reserve_size, node_ar.nid); 9068f64e1f2SJon Tollefson reserve_bootmem_node(NODE_DATA(node_ar.nid), physbase, 907e8170372SJon Tollefson reserve_size, BOOTMEM_DEFAULT); 9088f64e1f2SJon Tollefson /* 9098f64e1f2SJon Tollefson * if reserved region is contained in the active region 9108f64e1f2SJon Tollefson * then done. 9118f64e1f2SJon Tollefson */ 9128f64e1f2SJon Tollefson if (end_pfn <= node_ar.end_pfn) 9138f64e1f2SJon Tollefson break; 9148f64e1f2SJon Tollefson 9158f64e1f2SJon Tollefson /* 9168f64e1f2SJon Tollefson * reserved region extends past the active region 9178f64e1f2SJon Tollefson * get next active region that contains this 9188f64e1f2SJon Tollefson * reserved region 9198f64e1f2SJon Tollefson */ 9208f64e1f2SJon Tollefson start_pfn = node_ar.end_pfn; 9218f64e1f2SJon Tollefson physbase = start_pfn << PAGE_SHIFT; 922e8170372SJon Tollefson size = size - reserve_size; 9238f64e1f2SJon Tollefson get_node_active_region(start_pfn, &node_ar); 924ab1f9dacSPaul Mackerras } 925*4a618669SDave Hansen } 926ab1f9dacSPaul Mackerras } 9278f64e1f2SJon Tollefson 928*4a618669SDave Hansen 929*4a618669SDave Hansen void __init do_init_bootmem(void) 930*4a618669SDave Hansen { 931*4a618669SDave Hansen int nid; 932*4a618669SDave Hansen unsigned int i; 933*4a618669SDave Hansen 934*4a618669SDave Hansen min_low_pfn = 0; 935*4a618669SDave Hansen max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 936*4a618669SDave Hansen max_pfn = max_low_pfn; 937*4a618669SDave Hansen 938*4a618669SDave Hansen if (parse_numa_properties()) 939*4a618669SDave Hansen setup_nonnuma(); 940*4a618669SDave Hansen else 941*4a618669SDave Hansen dump_numa_memory_topology(); 942*4a618669SDave Hansen 943*4a618669SDave Hansen register_cpu_notifier(&ppc64_numa_nb); 944*4a618669SDave Hansen cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, 945*4a618669SDave Hansen (void *)(unsigned long)boot_cpuid); 946*4a618669SDave Hansen 947*4a618669SDave Hansen for_each_online_node(nid) { 948*4a618669SDave Hansen unsigned long start_pfn, end_pfn; 949*4a618669SDave Hansen unsigned long bootmem_paddr; 950*4a618669SDave Hansen unsigned long bootmap_pages; 951*4a618669SDave Hansen 952*4a618669SDave Hansen get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 953*4a618669SDave Hansen 954*4a618669SDave Hansen /* 955*4a618669SDave Hansen * Allocate the node structure node local if possible 956*4a618669SDave Hansen * 957*4a618669SDave Hansen * Be careful moving this around, as it relies on all 958*4a618669SDave Hansen * previous nodes' bootmem to be initialized and have 959*4a618669SDave Hansen * all reserved areas marked. 960*4a618669SDave Hansen */ 961*4a618669SDave Hansen NODE_DATA(nid) = careful_allocation(nid, 962*4a618669SDave Hansen sizeof(struct pglist_data), 963*4a618669SDave Hansen SMP_CACHE_BYTES, end_pfn); 964*4a618669SDave Hansen NODE_DATA(nid) = __va(NODE_DATA(nid)); 965*4a618669SDave Hansen memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 966*4a618669SDave Hansen 967*4a618669SDave Hansen dbg("node %d\n", nid); 968*4a618669SDave Hansen dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); 969*4a618669SDave Hansen 970*4a618669SDave Hansen NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; 971*4a618669SDave Hansen NODE_DATA(nid)->node_start_pfn = start_pfn; 972*4a618669SDave Hansen NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 973*4a618669SDave Hansen 974*4a618669SDave Hansen if (NODE_DATA(nid)->node_spanned_pages == 0) 975*4a618669SDave Hansen continue; 976*4a618669SDave Hansen 977*4a618669SDave Hansen dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); 978*4a618669SDave Hansen dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); 979*4a618669SDave Hansen 980*4a618669SDave Hansen bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 981*4a618669SDave Hansen bootmem_paddr = (unsigned long)careful_allocation(nid, 982*4a618669SDave Hansen bootmap_pages << PAGE_SHIFT, 983*4a618669SDave Hansen PAGE_SIZE, end_pfn); 984*4a618669SDave Hansen memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT); 985*4a618669SDave Hansen 986*4a618669SDave Hansen dbg("bootmap_paddr = %lx\n", bootmem_paddr); 987*4a618669SDave Hansen 988*4a618669SDave Hansen init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, 989*4a618669SDave Hansen start_pfn, end_pfn); 990*4a618669SDave Hansen 991*4a618669SDave Hansen free_bootmem_with_active_regions(nid, end_pfn); 992*4a618669SDave Hansen /* 993*4a618669SDave Hansen * Be very careful about moving this around. Future 994*4a618669SDave Hansen * calls to careful_allocation() depend on this getting 995*4a618669SDave Hansen * done correctly. 996*4a618669SDave Hansen */ 997*4a618669SDave Hansen mark_reserved_regions_for_nid(nid); 9988f64e1f2SJon Tollefson sparse_memory_present_with_active_regions(nid); 999ab1f9dacSPaul Mackerras } 1000*4a618669SDave Hansen } 1001ab1f9dacSPaul Mackerras 1002ab1f9dacSPaul Mackerras void __init paging_init(void) 1003ab1f9dacSPaul Mackerras { 10046391af17SMel Gorman unsigned long max_zone_pfns[MAX_NR_ZONES]; 10056391af17SMel Gorman memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 10066391af17SMel Gorman max_zone_pfns[ZONE_DMA] = lmb_end_of_DRAM() >> PAGE_SHIFT; 1007c67c3cb4SMel Gorman free_area_init_nodes(max_zone_pfns); 1008ab1f9dacSPaul Mackerras } 1009ab1f9dacSPaul Mackerras 1010ab1f9dacSPaul Mackerras static int __init early_numa(char *p) 1011ab1f9dacSPaul Mackerras { 1012ab1f9dacSPaul Mackerras if (!p) 1013ab1f9dacSPaul Mackerras return 0; 1014ab1f9dacSPaul Mackerras 1015ab1f9dacSPaul Mackerras if (strstr(p, "off")) 1016ab1f9dacSPaul Mackerras numa_enabled = 0; 1017ab1f9dacSPaul Mackerras 1018ab1f9dacSPaul Mackerras if (strstr(p, "debug")) 1019ab1f9dacSPaul Mackerras numa_debug = 1; 1020ab1f9dacSPaul Mackerras 10211daa6d08SBalbir Singh p = strstr(p, "fake="); 10221daa6d08SBalbir Singh if (p) 10231daa6d08SBalbir Singh cmdline = p + strlen("fake="); 10241daa6d08SBalbir Singh 1025ab1f9dacSPaul Mackerras return 0; 1026ab1f9dacSPaul Mackerras } 1027ab1f9dacSPaul Mackerras early_param("numa", early_numa); 1028237a0989SMike Kravetz 1029237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG 1030237a0989SMike Kravetz /* 10310db9360aSNathan Fontenot * Validate the node associated with the memory section we are 10320db9360aSNathan Fontenot * trying to add. 10330db9360aSNathan Fontenot */ 10340db9360aSNathan Fontenot int valid_hot_add_scn(int *nid, unsigned long start, u32 lmb_size, 10350db9360aSNathan Fontenot unsigned long scn_addr) 10360db9360aSNathan Fontenot { 10370db9360aSNathan Fontenot nodemask_t nodes; 10380db9360aSNathan Fontenot 10390db9360aSNathan Fontenot if (*nid < 0 || !node_online(*nid)) 10400db9360aSNathan Fontenot *nid = any_online_node(NODE_MASK_ALL); 10410db9360aSNathan Fontenot 10420db9360aSNathan Fontenot if ((scn_addr >= start) && (scn_addr < (start + lmb_size))) { 10430db9360aSNathan Fontenot nodes_setall(nodes); 10440db9360aSNathan Fontenot while (NODE_DATA(*nid)->node_spanned_pages == 0) { 10450db9360aSNathan Fontenot node_clear(*nid, nodes); 10460db9360aSNathan Fontenot *nid = any_online_node(nodes); 10470db9360aSNathan Fontenot } 10480db9360aSNathan Fontenot 10490db9360aSNathan Fontenot return 1; 10500db9360aSNathan Fontenot } 10510db9360aSNathan Fontenot 10520db9360aSNathan Fontenot return 0; 10530db9360aSNathan Fontenot } 10540db9360aSNathan Fontenot 10550db9360aSNathan Fontenot /* 10560db9360aSNathan Fontenot * Find the node associated with a hot added memory section represented 10570db9360aSNathan Fontenot * by the ibm,dynamic-reconfiguration-memory node. 10580db9360aSNathan Fontenot */ 10590db9360aSNathan Fontenot static int hot_add_drconf_scn_to_nid(struct device_node *memory, 10600db9360aSNathan Fontenot unsigned long scn_addr) 10610db9360aSNathan Fontenot { 10620db9360aSNathan Fontenot const u32 *dm; 10630db9360aSNathan Fontenot unsigned int n, rc; 10640db9360aSNathan Fontenot unsigned long lmb_size; 10650db9360aSNathan Fontenot int default_nid = any_online_node(NODE_MASK_ALL); 10660db9360aSNathan Fontenot int nid; 10670db9360aSNathan Fontenot struct assoc_arrays aa; 10680db9360aSNathan Fontenot 10690db9360aSNathan Fontenot n = of_get_drconf_memory(memory, &dm); 10700db9360aSNathan Fontenot if (!n) 10710db9360aSNathan Fontenot return default_nid;; 10720db9360aSNathan Fontenot 10730db9360aSNathan Fontenot lmb_size = of_get_lmb_size(memory); 10740db9360aSNathan Fontenot if (!lmb_size) 10750db9360aSNathan Fontenot return default_nid; 10760db9360aSNathan Fontenot 10770db9360aSNathan Fontenot rc = of_get_assoc_arrays(memory, &aa); 10780db9360aSNathan Fontenot if (rc) 10790db9360aSNathan Fontenot return default_nid; 10800db9360aSNathan Fontenot 10810db9360aSNathan Fontenot for (; n != 0; --n) { 10820db9360aSNathan Fontenot struct of_drconf_cell drmem; 10830db9360aSNathan Fontenot 10840db9360aSNathan Fontenot read_drconf_cell(&drmem, &dm); 10850db9360aSNathan Fontenot 10860db9360aSNathan Fontenot /* skip this block if it is reserved or not assigned to 10870db9360aSNathan Fontenot * this partition */ 10880db9360aSNathan Fontenot if ((drmem.flags & DRCONF_MEM_RESERVED) 10890db9360aSNathan Fontenot || !(drmem.flags & DRCONF_MEM_ASSIGNED)) 10900db9360aSNathan Fontenot continue; 10910db9360aSNathan Fontenot 10920db9360aSNathan Fontenot nid = of_drconf_to_nid_single(&drmem, &aa); 10930db9360aSNathan Fontenot 10940db9360aSNathan Fontenot if (valid_hot_add_scn(&nid, drmem.base_addr, lmb_size, 10950db9360aSNathan Fontenot scn_addr)) 10960db9360aSNathan Fontenot return nid; 10970db9360aSNathan Fontenot } 10980db9360aSNathan Fontenot 10990db9360aSNathan Fontenot BUG(); /* section address should be found above */ 11000db9360aSNathan Fontenot return 0; 11010db9360aSNathan Fontenot } 11020db9360aSNathan Fontenot 11030db9360aSNathan Fontenot /* 1104237a0989SMike Kravetz * Find the node associated with a hot added memory section. Section 1105237a0989SMike Kravetz * corresponds to a SPARSEMEM section, not an LMB. It is assumed that 1106237a0989SMike Kravetz * sections are fully contained within a single LMB. 1107237a0989SMike Kravetz */ 1108237a0989SMike Kravetz int hot_add_scn_to_nid(unsigned long scn_addr) 1109237a0989SMike Kravetz { 1110237a0989SMike Kravetz struct device_node *memory = NULL; 1111069007aeSAndrew Morton int nid; 1112237a0989SMike Kravetz 1113237a0989SMike Kravetz if (!numa_enabled || (min_common_depth < 0)) 11140db9360aSNathan Fontenot return any_online_node(NODE_MASK_ALL); 11150db9360aSNathan Fontenot 11160db9360aSNathan Fontenot memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 11170db9360aSNathan Fontenot if (memory) { 11180db9360aSNathan Fontenot nid = hot_add_drconf_scn_to_nid(memory, scn_addr); 11190db9360aSNathan Fontenot of_node_put(memory); 11200db9360aSNathan Fontenot return nid; 11210db9360aSNathan Fontenot } 1122237a0989SMike Kravetz 1123237a0989SMike Kravetz while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 1124237a0989SMike Kravetz unsigned long start, size; 1125b226e462SMike Kravetz int ranges; 1126a7f67bdfSJeremy Kerr const unsigned int *memcell_buf; 1127237a0989SMike Kravetz unsigned int len; 1128237a0989SMike Kravetz 1129e2eb6392SStephen Rothwell memcell_buf = of_get_property(memory, "reg", &len); 1130237a0989SMike Kravetz if (!memcell_buf || len <= 0) 1131237a0989SMike Kravetz continue; 1132237a0989SMike Kravetz 1133cc5d0189SBenjamin Herrenschmidt /* ranges in cell */ 1134cc5d0189SBenjamin Herrenschmidt ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 1135237a0989SMike Kravetz ha_new_range: 1136237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 1137237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 1138953039c8SJeremy Kerr nid = of_node_to_nid_single(memory); 1139237a0989SMike Kravetz 11400db9360aSNathan Fontenot if (valid_hot_add_scn(&nid, start, size, scn_addr)) { 1141237a0989SMike Kravetz of_node_put(memory); 11420db9360aSNathan Fontenot return nid; 1143237a0989SMike Kravetz } 1144237a0989SMike Kravetz 1145237a0989SMike Kravetz if (--ranges) /* process all ranges in cell */ 1146237a0989SMike Kravetz goto ha_new_range; 1147237a0989SMike Kravetz } 1148237a0989SMike Kravetz BUG(); /* section address should be found above */ 1149069007aeSAndrew Morton return 0; 1150237a0989SMike Kravetz } 1151237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */ 1152