1ab1f9dacSPaul Mackerras /* 2ab1f9dacSPaul Mackerras * pSeries NUMA support 3ab1f9dacSPaul Mackerras * 4ab1f9dacSPaul Mackerras * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 5ab1f9dacSPaul Mackerras * 6ab1f9dacSPaul Mackerras * This program is free software; you can redistribute it and/or 7ab1f9dacSPaul Mackerras * modify it under the terms of the GNU General Public License 8ab1f9dacSPaul Mackerras * as published by the Free Software Foundation; either version 9ab1f9dacSPaul Mackerras * 2 of the License, or (at your option) any later version. 10ab1f9dacSPaul Mackerras */ 11ab1f9dacSPaul Mackerras #include <linux/threads.h> 12ab1f9dacSPaul Mackerras #include <linux/bootmem.h> 13ab1f9dacSPaul Mackerras #include <linux/init.h> 14ab1f9dacSPaul Mackerras #include <linux/mm.h> 15ab1f9dacSPaul Mackerras #include <linux/mmzone.h> 16ab1f9dacSPaul Mackerras #include <linux/module.h> 17ab1f9dacSPaul Mackerras #include <linux/nodemask.h> 18ab1f9dacSPaul Mackerras #include <linux/cpu.h> 19ab1f9dacSPaul Mackerras #include <linux/notifier.h> 20d9b2b2a2SDavid S. Miller #include <linux/lmb.h> 216df1646eSMichael Ellerman #include <linux/of.h> 2245fb6ceaSAnton Blanchard #include <asm/sparsemem.h> 23d9b2b2a2SDavid S. Miller #include <asm/prom.h> 24cf00a8d1SPaul Mackerras #include <asm/system.h> 252249ca9dSPaul Mackerras #include <asm/smp.h> 26ab1f9dacSPaul Mackerras 27ab1f9dacSPaul Mackerras static int numa_enabled = 1; 28ab1f9dacSPaul Mackerras 291daa6d08SBalbir Singh static char *cmdline __initdata; 301daa6d08SBalbir Singh 31ab1f9dacSPaul Mackerras static int numa_debug; 32ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 33ab1f9dacSPaul Mackerras 3445fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS]; 35ab1f9dacSPaul Mackerras cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; 36ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES]; 3745fb6ceaSAnton Blanchard 3845fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table); 3945fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpumask_lookup_table); 4045fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data); 4145fb6ceaSAnton Blanchard 42ab1f9dacSPaul Mackerras static int min_common_depth; 43237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells; 44ab1f9dacSPaul Mackerras 451daa6d08SBalbir Singh static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn, 461daa6d08SBalbir Singh unsigned int *nid) 471daa6d08SBalbir Singh { 481daa6d08SBalbir Singh unsigned long long mem; 491daa6d08SBalbir Singh char *p = cmdline; 501daa6d08SBalbir Singh static unsigned int fake_nid; 511daa6d08SBalbir Singh static unsigned long long curr_boundary; 521daa6d08SBalbir Singh 531daa6d08SBalbir Singh /* 541daa6d08SBalbir Singh * Modify node id, iff we started creating NUMA nodes 551daa6d08SBalbir Singh * We want to continue from where we left of the last time 561daa6d08SBalbir Singh */ 571daa6d08SBalbir Singh if (fake_nid) 581daa6d08SBalbir Singh *nid = fake_nid; 591daa6d08SBalbir Singh /* 601daa6d08SBalbir Singh * In case there are no more arguments to parse, the 611daa6d08SBalbir Singh * node_id should be the same as the last fake node id 621daa6d08SBalbir Singh * (we've handled this above). 631daa6d08SBalbir Singh */ 641daa6d08SBalbir Singh if (!p) 651daa6d08SBalbir Singh return 0; 661daa6d08SBalbir Singh 671daa6d08SBalbir Singh mem = memparse(p, &p); 681daa6d08SBalbir Singh if (!mem) 691daa6d08SBalbir Singh return 0; 701daa6d08SBalbir Singh 711daa6d08SBalbir Singh if (mem < curr_boundary) 721daa6d08SBalbir Singh return 0; 731daa6d08SBalbir Singh 741daa6d08SBalbir Singh curr_boundary = mem; 751daa6d08SBalbir Singh 761daa6d08SBalbir Singh if ((end_pfn << PAGE_SHIFT) > mem) { 771daa6d08SBalbir Singh /* 781daa6d08SBalbir Singh * Skip commas and spaces 791daa6d08SBalbir Singh */ 801daa6d08SBalbir Singh while (*p == ',' || *p == ' ' || *p == '\t') 811daa6d08SBalbir Singh p++; 821daa6d08SBalbir Singh 831daa6d08SBalbir Singh cmdline = p; 841daa6d08SBalbir Singh fake_nid++; 851daa6d08SBalbir Singh *nid = fake_nid; 861daa6d08SBalbir Singh dbg("created new fake_node with id %d\n", fake_nid); 871daa6d08SBalbir Singh return 1; 881daa6d08SBalbir Singh } 891daa6d08SBalbir Singh return 0; 901daa6d08SBalbir Singh } 911daa6d08SBalbir Singh 92*8f64e1f2SJon Tollefson /* 93*8f64e1f2SJon Tollefson * get_active_region_work_fn - A helper function for get_node_active_region 94*8f64e1f2SJon Tollefson * Returns datax set to the start_pfn and end_pfn if they contain 95*8f64e1f2SJon Tollefson * the initial value of datax->start_pfn between them 96*8f64e1f2SJon Tollefson * @start_pfn: start page(inclusive) of region to check 97*8f64e1f2SJon Tollefson * @end_pfn: end page(exclusive) of region to check 98*8f64e1f2SJon Tollefson * @datax: comes in with ->start_pfn set to value to search for and 99*8f64e1f2SJon Tollefson * goes out with active range if it contains it 100*8f64e1f2SJon Tollefson * Returns 1 if search value is in range else 0 101*8f64e1f2SJon Tollefson */ 102*8f64e1f2SJon Tollefson static int __init get_active_region_work_fn(unsigned long start_pfn, 103*8f64e1f2SJon Tollefson unsigned long end_pfn, void *datax) 104*8f64e1f2SJon Tollefson { 105*8f64e1f2SJon Tollefson struct node_active_region *data; 106*8f64e1f2SJon Tollefson data = (struct node_active_region *)datax; 107*8f64e1f2SJon Tollefson 108*8f64e1f2SJon Tollefson if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) { 109*8f64e1f2SJon Tollefson data->start_pfn = start_pfn; 110*8f64e1f2SJon Tollefson data->end_pfn = end_pfn; 111*8f64e1f2SJon Tollefson return 1; 112*8f64e1f2SJon Tollefson } 113*8f64e1f2SJon Tollefson return 0; 114*8f64e1f2SJon Tollefson 115*8f64e1f2SJon Tollefson } 116*8f64e1f2SJon Tollefson 117*8f64e1f2SJon Tollefson /* 118*8f64e1f2SJon Tollefson * get_node_active_region - Return active region containing start_pfn 119*8f64e1f2SJon Tollefson * @start_pfn: The page to return the region for. 120*8f64e1f2SJon Tollefson * @node_ar: Returned set to the active region containing start_pfn 121*8f64e1f2SJon Tollefson */ 122*8f64e1f2SJon Tollefson static void __init get_node_active_region(unsigned long start_pfn, 123*8f64e1f2SJon Tollefson struct node_active_region *node_ar) 124*8f64e1f2SJon Tollefson { 125*8f64e1f2SJon Tollefson int nid = early_pfn_to_nid(start_pfn); 126*8f64e1f2SJon Tollefson 127*8f64e1f2SJon Tollefson node_ar->nid = nid; 128*8f64e1f2SJon Tollefson node_ar->start_pfn = start_pfn; 129*8f64e1f2SJon Tollefson work_with_active_regions(nid, get_active_region_work_fn, node_ar); 130*8f64e1f2SJon Tollefson } 131*8f64e1f2SJon Tollefson 1322e5ce39dSNathan Lynch static void __cpuinit map_cpu_to_node(int cpu, int node) 133ab1f9dacSPaul Mackerras { 134ab1f9dacSPaul Mackerras numa_cpu_lookup_table[cpu] = node; 13545fb6ceaSAnton Blanchard 136bf4b85b0SNathan Lynch dbg("adding cpu %d to node %d\n", cpu, node); 137bf4b85b0SNathan Lynch 13845fb6ceaSAnton Blanchard if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) 139ab1f9dacSPaul Mackerras cpu_set(cpu, numa_cpumask_lookup_table[node]); 140ab1f9dacSPaul Mackerras } 141ab1f9dacSPaul Mackerras 142ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU 143ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu) 144ab1f9dacSPaul Mackerras { 145ab1f9dacSPaul Mackerras int node = numa_cpu_lookup_table[cpu]; 146ab1f9dacSPaul Mackerras 147ab1f9dacSPaul Mackerras dbg("removing cpu %lu from node %d\n", cpu, node); 148ab1f9dacSPaul Mackerras 149ab1f9dacSPaul Mackerras if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 150ab1f9dacSPaul Mackerras cpu_clear(cpu, numa_cpumask_lookup_table[node]); 151ab1f9dacSPaul Mackerras } else { 152ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 153ab1f9dacSPaul Mackerras cpu, node); 154ab1f9dacSPaul Mackerras } 155ab1f9dacSPaul Mackerras } 156ab1f9dacSPaul Mackerras #endif /* CONFIG_HOTPLUG_CPU */ 157ab1f9dacSPaul Mackerras 1582e5ce39dSNathan Lynch static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) 159ab1f9dacSPaul Mackerras { 160ab1f9dacSPaul Mackerras unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); 161ab1f9dacSPaul Mackerras struct device_node *cpu_node = NULL; 162a7f67bdfSJeremy Kerr const unsigned int *interrupt_server, *reg; 163ab1f9dacSPaul Mackerras int len; 164ab1f9dacSPaul Mackerras 165ab1f9dacSPaul Mackerras while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { 166ab1f9dacSPaul Mackerras /* Try interrupt server first */ 167e2eb6392SStephen Rothwell interrupt_server = of_get_property(cpu_node, 168ab1f9dacSPaul Mackerras "ibm,ppc-interrupt-server#s", &len); 169ab1f9dacSPaul Mackerras 170ab1f9dacSPaul Mackerras len = len / sizeof(u32); 171ab1f9dacSPaul Mackerras 172ab1f9dacSPaul Mackerras if (interrupt_server && (len > 0)) { 173ab1f9dacSPaul Mackerras while (len--) { 174ab1f9dacSPaul Mackerras if (interrupt_server[len] == hw_cpuid) 175ab1f9dacSPaul Mackerras return cpu_node; 176ab1f9dacSPaul Mackerras } 177ab1f9dacSPaul Mackerras } else { 178e2eb6392SStephen Rothwell reg = of_get_property(cpu_node, "reg", &len); 179ab1f9dacSPaul Mackerras if (reg && (len > 0) && (reg[0] == hw_cpuid)) 180ab1f9dacSPaul Mackerras return cpu_node; 181ab1f9dacSPaul Mackerras } 182ab1f9dacSPaul Mackerras } 183ab1f9dacSPaul Mackerras 184ab1f9dacSPaul Mackerras return NULL; 185ab1f9dacSPaul Mackerras } 186ab1f9dacSPaul Mackerras 187ab1f9dacSPaul Mackerras /* must hold reference to node during call */ 188a7f67bdfSJeremy Kerr static const int *of_get_associativity(struct device_node *dev) 189ab1f9dacSPaul Mackerras { 190e2eb6392SStephen Rothwell return of_get_property(dev, "ibm,associativity", NULL); 191ab1f9dacSPaul Mackerras } 192ab1f9dacSPaul Mackerras 193cf00085dSChandru /* 194cf00085dSChandru * Returns the property linux,drconf-usable-memory if 195cf00085dSChandru * it exists (the property exists only in kexec/kdump kernels, 196cf00085dSChandru * added by kexec-tools) 197cf00085dSChandru */ 198cf00085dSChandru static const u32 *of_get_usable_memory(struct device_node *memory) 199cf00085dSChandru { 200cf00085dSChandru const u32 *prop; 201cf00085dSChandru u32 len; 202cf00085dSChandru prop = of_get_property(memory, "linux,drconf-usable-memory", &len); 203cf00085dSChandru if (!prop || len < sizeof(unsigned int)) 204cf00085dSChandru return 0; 205cf00085dSChandru return prop; 206cf00085dSChandru } 207cf00085dSChandru 208482ec7c4SNathan Lynch /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa 209482ec7c4SNathan Lynch * info is found. 210482ec7c4SNathan Lynch */ 211953039c8SJeremy Kerr static int of_node_to_nid_single(struct device_node *device) 212ab1f9dacSPaul Mackerras { 213482ec7c4SNathan Lynch int nid = -1; 214a7f67bdfSJeremy Kerr const unsigned int *tmp; 215ab1f9dacSPaul Mackerras 216ab1f9dacSPaul Mackerras if (min_common_depth == -1) 217482ec7c4SNathan Lynch goto out; 218ab1f9dacSPaul Mackerras 219ab1f9dacSPaul Mackerras tmp = of_get_associativity(device); 220482ec7c4SNathan Lynch if (!tmp) 221482ec7c4SNathan Lynch goto out; 222482ec7c4SNathan Lynch 223482ec7c4SNathan Lynch if (tmp[0] >= min_common_depth) 224cf950b7aSNathan Lynch nid = tmp[min_common_depth]; 225bc16a759SNathan Lynch 226bc16a759SNathan Lynch /* POWER4 LPAR uses 0xffff as invalid node */ 227482ec7c4SNathan Lynch if (nid == 0xffff || nid >= MAX_NUMNODES) 228482ec7c4SNathan Lynch nid = -1; 229482ec7c4SNathan Lynch out: 230cf950b7aSNathan Lynch return nid; 231ab1f9dacSPaul Mackerras } 232ab1f9dacSPaul Mackerras 233953039c8SJeremy Kerr /* Walk the device tree upwards, looking for an associativity id */ 234953039c8SJeremy Kerr int of_node_to_nid(struct device_node *device) 235953039c8SJeremy Kerr { 236953039c8SJeremy Kerr struct device_node *tmp; 237953039c8SJeremy Kerr int nid = -1; 238953039c8SJeremy Kerr 239953039c8SJeremy Kerr of_node_get(device); 240953039c8SJeremy Kerr while (device) { 241953039c8SJeremy Kerr nid = of_node_to_nid_single(device); 242953039c8SJeremy Kerr if (nid != -1) 243953039c8SJeremy Kerr break; 244953039c8SJeremy Kerr 245953039c8SJeremy Kerr tmp = device; 246953039c8SJeremy Kerr device = of_get_parent(tmp); 247953039c8SJeremy Kerr of_node_put(tmp); 248953039c8SJeremy Kerr } 249953039c8SJeremy Kerr of_node_put(device); 250953039c8SJeremy Kerr 251953039c8SJeremy Kerr return nid; 252953039c8SJeremy Kerr } 253953039c8SJeremy Kerr EXPORT_SYMBOL_GPL(of_node_to_nid); 254953039c8SJeremy Kerr 255ab1f9dacSPaul Mackerras /* 256ab1f9dacSPaul Mackerras * In theory, the "ibm,associativity" property may contain multiple 257ab1f9dacSPaul Mackerras * associativity lists because a resource may be multiply connected 258ab1f9dacSPaul Mackerras * into the machine. This resource then has different associativity 259ab1f9dacSPaul Mackerras * characteristics relative to its multiple connections. We ignore 260ab1f9dacSPaul Mackerras * this for now. We also assume that all cpu and memory sets have 261ab1f9dacSPaul Mackerras * their distances represented at a common level. This won't be 2621b3c3714SUwe Kleine-König * true for hierarchical NUMA. 263ab1f9dacSPaul Mackerras * 264ab1f9dacSPaul Mackerras * In any case the ibm,associativity-reference-points should give 265ab1f9dacSPaul Mackerras * the correct depth for a normal NUMA system. 266ab1f9dacSPaul Mackerras * 267ab1f9dacSPaul Mackerras * - Dave Hansen <haveblue@us.ibm.com> 268ab1f9dacSPaul Mackerras */ 269ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void) 270ab1f9dacSPaul Mackerras { 271ab1f9dacSPaul Mackerras int depth; 272a7f67bdfSJeremy Kerr const unsigned int *ref_points; 273ab1f9dacSPaul Mackerras struct device_node *rtas_root; 274ab1f9dacSPaul Mackerras unsigned int len; 275ab1f9dacSPaul Mackerras 276ab1f9dacSPaul Mackerras rtas_root = of_find_node_by_path("/rtas"); 277ab1f9dacSPaul Mackerras 278ab1f9dacSPaul Mackerras if (!rtas_root) 279ab1f9dacSPaul Mackerras return -1; 280ab1f9dacSPaul Mackerras 281ab1f9dacSPaul Mackerras /* 282ab1f9dacSPaul Mackerras * this property is 2 32-bit integers, each representing a level of 283ab1f9dacSPaul Mackerras * depth in the associativity nodes. The first is for an SMP 284ab1f9dacSPaul Mackerras * configuration (should be all 0's) and the second is for a normal 285ab1f9dacSPaul Mackerras * NUMA configuration. 286ab1f9dacSPaul Mackerras */ 287e2eb6392SStephen Rothwell ref_points = of_get_property(rtas_root, 288ab1f9dacSPaul Mackerras "ibm,associativity-reference-points", &len); 289ab1f9dacSPaul Mackerras 290ab1f9dacSPaul Mackerras if ((len >= 1) && ref_points) { 291ab1f9dacSPaul Mackerras depth = ref_points[1]; 292ab1f9dacSPaul Mackerras } else { 293bf4b85b0SNathan Lynch dbg("NUMA: ibm,associativity-reference-points not found.\n"); 294ab1f9dacSPaul Mackerras depth = -1; 295ab1f9dacSPaul Mackerras } 296ab1f9dacSPaul Mackerras of_node_put(rtas_root); 297ab1f9dacSPaul Mackerras 298ab1f9dacSPaul Mackerras return depth; 299ab1f9dacSPaul Mackerras } 300ab1f9dacSPaul Mackerras 30184c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 302ab1f9dacSPaul Mackerras { 303ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 304ab1f9dacSPaul Mackerras 305ab1f9dacSPaul Mackerras memory = of_find_node_by_type(memory, "memory"); 30654c23310SPaul Mackerras if (!memory) 30784c9fdd1SMike Kravetz panic("numa.c: No memory nodes found!"); 30854c23310SPaul Mackerras 309a8bda5ddSStephen Rothwell *n_addr_cells = of_n_addr_cells(memory); 3109213feeaSStephen Rothwell *n_size_cells = of_n_size_cells(memory); 31184c9fdd1SMike Kravetz of_node_put(memory); 312ab1f9dacSPaul Mackerras } 313ab1f9dacSPaul Mackerras 314a7f67bdfSJeremy Kerr static unsigned long __devinit read_n_cells(int n, const unsigned int **buf) 315ab1f9dacSPaul Mackerras { 316ab1f9dacSPaul Mackerras unsigned long result = 0; 317ab1f9dacSPaul Mackerras 318ab1f9dacSPaul Mackerras while (n--) { 319ab1f9dacSPaul Mackerras result = (result << 32) | **buf; 320ab1f9dacSPaul Mackerras (*buf)++; 321ab1f9dacSPaul Mackerras } 322ab1f9dacSPaul Mackerras return result; 323ab1f9dacSPaul Mackerras } 324ab1f9dacSPaul Mackerras 3258342681dSNathan Fontenot struct of_drconf_cell { 3268342681dSNathan Fontenot u64 base_addr; 3278342681dSNathan Fontenot u32 drc_index; 3288342681dSNathan Fontenot u32 reserved; 3298342681dSNathan Fontenot u32 aa_index; 3308342681dSNathan Fontenot u32 flags; 3318342681dSNathan Fontenot }; 3328342681dSNathan Fontenot 3338342681dSNathan Fontenot #define DRCONF_MEM_ASSIGNED 0x00000008 3348342681dSNathan Fontenot #define DRCONF_MEM_AI_INVALID 0x00000040 3358342681dSNathan Fontenot #define DRCONF_MEM_RESERVED 0x00000080 3368342681dSNathan Fontenot 3378342681dSNathan Fontenot /* 3388342681dSNathan Fontenot * Read the next lmb list entry from the ibm,dynamic-memory property 3398342681dSNathan Fontenot * and return the information in the provided of_drconf_cell structure. 3408342681dSNathan Fontenot */ 3418342681dSNathan Fontenot static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) 3428342681dSNathan Fontenot { 3438342681dSNathan Fontenot const u32 *cp; 3448342681dSNathan Fontenot 3458342681dSNathan Fontenot drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp); 3468342681dSNathan Fontenot 3478342681dSNathan Fontenot cp = *cellp; 3488342681dSNathan Fontenot drmem->drc_index = cp[0]; 3498342681dSNathan Fontenot drmem->reserved = cp[1]; 3508342681dSNathan Fontenot drmem->aa_index = cp[2]; 3518342681dSNathan Fontenot drmem->flags = cp[3]; 3528342681dSNathan Fontenot 3538342681dSNathan Fontenot *cellp = cp + 4; 3548342681dSNathan Fontenot } 3558342681dSNathan Fontenot 3568342681dSNathan Fontenot /* 3578342681dSNathan Fontenot * Retreive and validate the ibm,dynamic-memory property of the device tree. 3588342681dSNathan Fontenot * 3598342681dSNathan Fontenot * The layout of the ibm,dynamic-memory property is a number N of lmb 3608342681dSNathan Fontenot * list entries followed by N lmb list entries. Each lmb list entry 3618342681dSNathan Fontenot * contains information as layed out in the of_drconf_cell struct above. 3628342681dSNathan Fontenot */ 3638342681dSNathan Fontenot static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) 3648342681dSNathan Fontenot { 3658342681dSNathan Fontenot const u32 *prop; 3668342681dSNathan Fontenot u32 len, entries; 3678342681dSNathan Fontenot 3688342681dSNathan Fontenot prop = of_get_property(memory, "ibm,dynamic-memory", &len); 3698342681dSNathan Fontenot if (!prop || len < sizeof(unsigned int)) 3708342681dSNathan Fontenot return 0; 3718342681dSNathan Fontenot 3728342681dSNathan Fontenot entries = *prop++; 3738342681dSNathan Fontenot 3748342681dSNathan Fontenot /* Now that we know the number of entries, revalidate the size 3758342681dSNathan Fontenot * of the property read in to ensure we have everything 3768342681dSNathan Fontenot */ 3778342681dSNathan Fontenot if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int)) 3788342681dSNathan Fontenot return 0; 3798342681dSNathan Fontenot 3808342681dSNathan Fontenot *dm = prop; 3818342681dSNathan Fontenot return entries; 3828342681dSNathan Fontenot } 3838342681dSNathan Fontenot 3848342681dSNathan Fontenot /* 3858342681dSNathan Fontenot * Retreive and validate the ibm,lmb-size property for drconf memory 3868342681dSNathan Fontenot * from the device tree. 3878342681dSNathan Fontenot */ 3888342681dSNathan Fontenot static u64 of_get_lmb_size(struct device_node *memory) 3898342681dSNathan Fontenot { 3908342681dSNathan Fontenot const u32 *prop; 3918342681dSNathan Fontenot u32 len; 3928342681dSNathan Fontenot 3938342681dSNathan Fontenot prop = of_get_property(memory, "ibm,lmb-size", &len); 3948342681dSNathan Fontenot if (!prop || len < sizeof(unsigned int)) 3958342681dSNathan Fontenot return 0; 3968342681dSNathan Fontenot 3978342681dSNathan Fontenot return read_n_cells(n_mem_size_cells, &prop); 3988342681dSNathan Fontenot } 3998342681dSNathan Fontenot 4008342681dSNathan Fontenot struct assoc_arrays { 4018342681dSNathan Fontenot u32 n_arrays; 4028342681dSNathan Fontenot u32 array_sz; 4038342681dSNathan Fontenot const u32 *arrays; 4048342681dSNathan Fontenot }; 4058342681dSNathan Fontenot 4068342681dSNathan Fontenot /* 4078342681dSNathan Fontenot * Retreive and validate the list of associativity arrays for drconf 4088342681dSNathan Fontenot * memory from the ibm,associativity-lookup-arrays property of the 4098342681dSNathan Fontenot * device tree.. 4108342681dSNathan Fontenot * 4118342681dSNathan Fontenot * The layout of the ibm,associativity-lookup-arrays property is a number N 4128342681dSNathan Fontenot * indicating the number of associativity arrays, followed by a number M 4138342681dSNathan Fontenot * indicating the size of each associativity array, followed by a list 4148342681dSNathan Fontenot * of N associativity arrays. 4158342681dSNathan Fontenot */ 4168342681dSNathan Fontenot static int of_get_assoc_arrays(struct device_node *memory, 4178342681dSNathan Fontenot struct assoc_arrays *aa) 4188342681dSNathan Fontenot { 4198342681dSNathan Fontenot const u32 *prop; 4208342681dSNathan Fontenot u32 len; 4218342681dSNathan Fontenot 4228342681dSNathan Fontenot prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len); 4238342681dSNathan Fontenot if (!prop || len < 2 * sizeof(unsigned int)) 4248342681dSNathan Fontenot return -1; 4258342681dSNathan Fontenot 4268342681dSNathan Fontenot aa->n_arrays = *prop++; 4278342681dSNathan Fontenot aa->array_sz = *prop++; 4288342681dSNathan Fontenot 4298342681dSNathan Fontenot /* Now that we know the number of arrrays and size of each array, 4308342681dSNathan Fontenot * revalidate the size of the property read in. 4318342681dSNathan Fontenot */ 4328342681dSNathan Fontenot if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int)) 4338342681dSNathan Fontenot return -1; 4348342681dSNathan Fontenot 4358342681dSNathan Fontenot aa->arrays = prop; 4368342681dSNathan Fontenot return 0; 4378342681dSNathan Fontenot } 4388342681dSNathan Fontenot 4398342681dSNathan Fontenot /* 4408342681dSNathan Fontenot * This is like of_node_to_nid_single() for memory represented in the 4418342681dSNathan Fontenot * ibm,dynamic-reconfiguration-memory node. 4428342681dSNathan Fontenot */ 4438342681dSNathan Fontenot static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, 4448342681dSNathan Fontenot struct assoc_arrays *aa) 4458342681dSNathan Fontenot { 4468342681dSNathan Fontenot int default_nid = 0; 4478342681dSNathan Fontenot int nid = default_nid; 4488342681dSNathan Fontenot int index; 4498342681dSNathan Fontenot 4508342681dSNathan Fontenot if (min_common_depth > 0 && min_common_depth <= aa->array_sz && 4518342681dSNathan Fontenot !(drmem->flags & DRCONF_MEM_AI_INVALID) && 4528342681dSNathan Fontenot drmem->aa_index < aa->n_arrays) { 4538342681dSNathan Fontenot index = drmem->aa_index * aa->array_sz + min_common_depth - 1; 4548342681dSNathan Fontenot nid = aa->arrays[index]; 4558342681dSNathan Fontenot 4568342681dSNathan Fontenot if (nid == 0xffff || nid >= MAX_NUMNODES) 4578342681dSNathan Fontenot nid = default_nid; 4588342681dSNathan Fontenot } 4598342681dSNathan Fontenot 4608342681dSNathan Fontenot return nid; 4618342681dSNathan Fontenot } 4628342681dSNathan Fontenot 463ab1f9dacSPaul Mackerras /* 464ab1f9dacSPaul Mackerras * Figure out to which domain a cpu belongs and stick it there. 465ab1f9dacSPaul Mackerras * Return the id of the domain used. 466ab1f9dacSPaul Mackerras */ 4672e5ce39dSNathan Lynch static int __cpuinit numa_setup_cpu(unsigned long lcpu) 468ab1f9dacSPaul Mackerras { 469cf950b7aSNathan Lynch int nid = 0; 470ab1f9dacSPaul Mackerras struct device_node *cpu = find_cpu_node(lcpu); 471ab1f9dacSPaul Mackerras 472ab1f9dacSPaul Mackerras if (!cpu) { 473ab1f9dacSPaul Mackerras WARN_ON(1); 474ab1f9dacSPaul Mackerras goto out; 475ab1f9dacSPaul Mackerras } 476ab1f9dacSPaul Mackerras 477953039c8SJeremy Kerr nid = of_node_to_nid_single(cpu); 478ab1f9dacSPaul Mackerras 479482ec7c4SNathan Lynch if (nid < 0 || !node_online(nid)) 480482ec7c4SNathan Lynch nid = any_online_node(NODE_MASK_ALL); 481ab1f9dacSPaul Mackerras out: 482cf950b7aSNathan Lynch map_cpu_to_node(lcpu, nid); 483ab1f9dacSPaul Mackerras 484ab1f9dacSPaul Mackerras of_node_put(cpu); 485ab1f9dacSPaul Mackerras 486cf950b7aSNathan Lynch return nid; 487ab1f9dacSPaul Mackerras } 488ab1f9dacSPaul Mackerras 48974b85f37SChandra Seetharaman static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, 490ab1f9dacSPaul Mackerras unsigned long action, 491ab1f9dacSPaul Mackerras void *hcpu) 492ab1f9dacSPaul Mackerras { 493ab1f9dacSPaul Mackerras unsigned long lcpu = (unsigned long)hcpu; 494ab1f9dacSPaul Mackerras int ret = NOTIFY_DONE; 495ab1f9dacSPaul Mackerras 496ab1f9dacSPaul Mackerras switch (action) { 497ab1f9dacSPaul Mackerras case CPU_UP_PREPARE: 4988bb78442SRafael J. Wysocki case CPU_UP_PREPARE_FROZEN: 499ab1f9dacSPaul Mackerras numa_setup_cpu(lcpu); 500ab1f9dacSPaul Mackerras ret = NOTIFY_OK; 501ab1f9dacSPaul Mackerras break; 502ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU 503ab1f9dacSPaul Mackerras case CPU_DEAD: 5048bb78442SRafael J. Wysocki case CPU_DEAD_FROZEN: 505ab1f9dacSPaul Mackerras case CPU_UP_CANCELED: 5068bb78442SRafael J. Wysocki case CPU_UP_CANCELED_FROZEN: 507ab1f9dacSPaul Mackerras unmap_cpu_from_node(lcpu); 508ab1f9dacSPaul Mackerras break; 509ab1f9dacSPaul Mackerras ret = NOTIFY_OK; 510ab1f9dacSPaul Mackerras #endif 511ab1f9dacSPaul Mackerras } 512ab1f9dacSPaul Mackerras return ret; 513ab1f9dacSPaul Mackerras } 514ab1f9dacSPaul Mackerras 515ab1f9dacSPaul Mackerras /* 516ab1f9dacSPaul Mackerras * Check and possibly modify a memory region to enforce the memory limit. 517ab1f9dacSPaul Mackerras * 518ab1f9dacSPaul Mackerras * Returns the size the region should have to enforce the memory limit. 519ab1f9dacSPaul Mackerras * This will either be the original value of size, a truncated value, 520ab1f9dacSPaul Mackerras * or zero. If the returned value of size is 0 the region should be 521ab1f9dacSPaul Mackerras * discarded as it lies wholy above the memory limit. 522ab1f9dacSPaul Mackerras */ 52345fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start, 52445fb6ceaSAnton Blanchard unsigned long size) 525ab1f9dacSPaul Mackerras { 526ab1f9dacSPaul Mackerras /* 527ab1f9dacSPaul Mackerras * We use lmb_end_of_DRAM() in here instead of memory_limit because 528ab1f9dacSPaul Mackerras * we've already adjusted it for the limit and it takes care of 529ab1f9dacSPaul Mackerras * having memory holes below the limit. 530ab1f9dacSPaul Mackerras */ 531ab1f9dacSPaul Mackerras 532ab1f9dacSPaul Mackerras if (! memory_limit) 533ab1f9dacSPaul Mackerras return size; 534ab1f9dacSPaul Mackerras 535ab1f9dacSPaul Mackerras if (start + size <= lmb_end_of_DRAM()) 536ab1f9dacSPaul Mackerras return size; 537ab1f9dacSPaul Mackerras 538ab1f9dacSPaul Mackerras if (start >= lmb_end_of_DRAM()) 539ab1f9dacSPaul Mackerras return 0; 540ab1f9dacSPaul Mackerras 541ab1f9dacSPaul Mackerras return lmb_end_of_DRAM() - start; 542ab1f9dacSPaul Mackerras } 543ab1f9dacSPaul Mackerras 5440204568aSPaul Mackerras /* 545cf00085dSChandru * Reads the counter for a given entry in 546cf00085dSChandru * linux,drconf-usable-memory property 547cf00085dSChandru */ 548cf00085dSChandru static inline int __init read_usm_ranges(const u32 **usm) 549cf00085dSChandru { 550cf00085dSChandru /* 551cf00085dSChandru * For each lmb in ibm,dynamic-memory a corresponding 552cf00085dSChandru * entry in linux,drconf-usable-memory property contains 553cf00085dSChandru * a counter followed by that many (base, size) duple. 554cf00085dSChandru * read the counter from linux,drconf-usable-memory 555cf00085dSChandru */ 556cf00085dSChandru return read_n_cells(n_mem_size_cells, usm); 557cf00085dSChandru } 558cf00085dSChandru 559cf00085dSChandru /* 5600204568aSPaul Mackerras * Extract NUMA information from the ibm,dynamic-reconfiguration-memory 5610204568aSPaul Mackerras * node. This assumes n_mem_{addr,size}_cells have been set. 5620204568aSPaul Mackerras */ 5630204568aSPaul Mackerras static void __init parse_drconf_memory(struct device_node *memory) 5640204568aSPaul Mackerras { 565cf00085dSChandru const u32 *dm, *usm; 566cf00085dSChandru unsigned int n, rc, ranges, is_kexec_kdump = 0; 567cf00085dSChandru unsigned long lmb_size, base, size, sz; 5688342681dSNathan Fontenot int nid; 5698342681dSNathan Fontenot struct assoc_arrays aa; 5700204568aSPaul Mackerras 5718342681dSNathan Fontenot n = of_get_drconf_memory(memory, &dm); 5728342681dSNathan Fontenot if (!n) 5730204568aSPaul Mackerras return; 5740204568aSPaul Mackerras 5758342681dSNathan Fontenot lmb_size = of_get_lmb_size(memory); 5768342681dSNathan Fontenot if (!lmb_size) 5778342681dSNathan Fontenot return; 5788342681dSNathan Fontenot 5798342681dSNathan Fontenot rc = of_get_assoc_arrays(memory, &aa); 5808342681dSNathan Fontenot if (rc) 5810204568aSPaul Mackerras return; 5820204568aSPaul Mackerras 583cf00085dSChandru /* check if this is a kexec/kdump kernel */ 584cf00085dSChandru usm = of_get_usable_memory(memory); 585cf00085dSChandru if (usm != NULL) 586cf00085dSChandru is_kexec_kdump = 1; 587cf00085dSChandru 5880204568aSPaul Mackerras for (; n != 0; --n) { 5898342681dSNathan Fontenot struct of_drconf_cell drmem; 5901daa6d08SBalbir Singh 5918342681dSNathan Fontenot read_drconf_cell(&drmem, &dm); 5928342681dSNathan Fontenot 5938342681dSNathan Fontenot /* skip this block if the reserved bit is set in flags (0x80) 5948342681dSNathan Fontenot or if the block is not assigned to this partition (0x8) */ 5958342681dSNathan Fontenot if ((drmem.flags & DRCONF_MEM_RESERVED) 5968342681dSNathan Fontenot || !(drmem.flags & DRCONF_MEM_ASSIGNED)) 5978342681dSNathan Fontenot continue; 5988342681dSNathan Fontenot 599cf00085dSChandru base = drmem.base_addr; 600cf00085dSChandru size = lmb_size; 601cf00085dSChandru ranges = 1; 6028342681dSNathan Fontenot 603cf00085dSChandru if (is_kexec_kdump) { 604cf00085dSChandru ranges = read_usm_ranges(&usm); 605cf00085dSChandru if (!ranges) /* there are no (base, size) duple */ 6060204568aSPaul Mackerras continue; 607cf00085dSChandru } 608cf00085dSChandru do { 609cf00085dSChandru if (is_kexec_kdump) { 610cf00085dSChandru base = read_n_cells(n_mem_addr_cells, &usm); 611cf00085dSChandru size = read_n_cells(n_mem_size_cells, &usm); 612cf00085dSChandru } 613cf00085dSChandru nid = of_drconf_to_nid_single(&drmem, &aa); 614cf00085dSChandru fake_numa_create_new_node( 615cf00085dSChandru ((base + size) >> PAGE_SHIFT), 616cf00085dSChandru &nid); 617cf00085dSChandru node_set_online(nid); 618cf00085dSChandru sz = numa_enforce_memory_limit(base, size); 619cf00085dSChandru if (sz) 620cf00085dSChandru add_active_range(nid, base >> PAGE_SHIFT, 621cf00085dSChandru (base >> PAGE_SHIFT) 622cf00085dSChandru + (sz >> PAGE_SHIFT)); 623cf00085dSChandru } while (--ranges); 6240204568aSPaul Mackerras } 6250204568aSPaul Mackerras } 6260204568aSPaul Mackerras 627ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void) 628ab1f9dacSPaul Mackerras { 629ab1f9dacSPaul Mackerras struct device_node *cpu = NULL; 630ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 631482ec7c4SNathan Lynch int default_nid = 0; 632ab1f9dacSPaul Mackerras unsigned long i; 633ab1f9dacSPaul Mackerras 634ab1f9dacSPaul Mackerras if (numa_enabled == 0) { 635ab1f9dacSPaul Mackerras printk(KERN_WARNING "NUMA disabled by user\n"); 636ab1f9dacSPaul Mackerras return -1; 637ab1f9dacSPaul Mackerras } 638ab1f9dacSPaul Mackerras 639ab1f9dacSPaul Mackerras min_common_depth = find_min_common_depth(); 640ab1f9dacSPaul Mackerras 641ab1f9dacSPaul Mackerras if (min_common_depth < 0) 642ab1f9dacSPaul Mackerras return min_common_depth; 643ab1f9dacSPaul Mackerras 644bf4b85b0SNathan Lynch dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 645bf4b85b0SNathan Lynch 646ab1f9dacSPaul Mackerras /* 647482ec7c4SNathan Lynch * Even though we connect cpus to numa domains later in SMP 648482ec7c4SNathan Lynch * init, we need to know the node ids now. This is because 649482ec7c4SNathan Lynch * each node to be onlined must have NODE_DATA etc backing it. 650ab1f9dacSPaul Mackerras */ 651482ec7c4SNathan Lynch for_each_present_cpu(i) { 652cf950b7aSNathan Lynch int nid; 653ab1f9dacSPaul Mackerras 654ab1f9dacSPaul Mackerras cpu = find_cpu_node(i); 655482ec7c4SNathan Lynch BUG_ON(!cpu); 656953039c8SJeremy Kerr nid = of_node_to_nid_single(cpu); 657ab1f9dacSPaul Mackerras of_node_put(cpu); 658ab1f9dacSPaul Mackerras 659482ec7c4SNathan Lynch /* 660482ec7c4SNathan Lynch * Don't fall back to default_nid yet -- we will plug 661482ec7c4SNathan Lynch * cpus into nodes once the memory scan has discovered 662482ec7c4SNathan Lynch * the topology. 663482ec7c4SNathan Lynch */ 664482ec7c4SNathan Lynch if (nid < 0) 665482ec7c4SNathan Lynch continue; 666482ec7c4SNathan Lynch node_set_online(nid); 667ab1f9dacSPaul Mackerras } 668ab1f9dacSPaul Mackerras 669237a0989SMike Kravetz get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 670ab1f9dacSPaul Mackerras memory = NULL; 671ab1f9dacSPaul Mackerras while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 672ab1f9dacSPaul Mackerras unsigned long start; 673ab1f9dacSPaul Mackerras unsigned long size; 674cf950b7aSNathan Lynch int nid; 675ab1f9dacSPaul Mackerras int ranges; 676a7f67bdfSJeremy Kerr const unsigned int *memcell_buf; 677ab1f9dacSPaul Mackerras unsigned int len; 678ab1f9dacSPaul Mackerras 679e2eb6392SStephen Rothwell memcell_buf = of_get_property(memory, 680ba759485SMichael Ellerman "linux,usable-memory", &len); 681ba759485SMichael Ellerman if (!memcell_buf || len <= 0) 682e2eb6392SStephen Rothwell memcell_buf = of_get_property(memory, "reg", &len); 683ab1f9dacSPaul Mackerras if (!memcell_buf || len <= 0) 684ab1f9dacSPaul Mackerras continue; 685ab1f9dacSPaul Mackerras 686cc5d0189SBenjamin Herrenschmidt /* ranges in cell */ 687cc5d0189SBenjamin Herrenschmidt ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 688ab1f9dacSPaul Mackerras new_range: 689ab1f9dacSPaul Mackerras /* these are order-sensitive, and modify the buffer pointer */ 690237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 691237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 692ab1f9dacSPaul Mackerras 693482ec7c4SNathan Lynch /* 694482ec7c4SNathan Lynch * Assumption: either all memory nodes or none will 695482ec7c4SNathan Lynch * have associativity properties. If none, then 696482ec7c4SNathan Lynch * everything goes to default_nid. 697482ec7c4SNathan Lynch */ 698953039c8SJeremy Kerr nid = of_node_to_nid_single(memory); 699482ec7c4SNathan Lynch if (nid < 0) 700482ec7c4SNathan Lynch nid = default_nid; 7011daa6d08SBalbir Singh 7021daa6d08SBalbir Singh fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); 703482ec7c4SNathan Lynch node_set_online(nid); 704ab1f9dacSPaul Mackerras 705ab1f9dacSPaul Mackerras if (!(size = numa_enforce_memory_limit(start, size))) { 706ab1f9dacSPaul Mackerras if (--ranges) 707ab1f9dacSPaul Mackerras goto new_range; 708ab1f9dacSPaul Mackerras else 709ab1f9dacSPaul Mackerras continue; 710ab1f9dacSPaul Mackerras } 711ab1f9dacSPaul Mackerras 712c67c3cb4SMel Gorman add_active_range(nid, start >> PAGE_SHIFT, 713c67c3cb4SMel Gorman (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT)); 714ab1f9dacSPaul Mackerras 715ab1f9dacSPaul Mackerras if (--ranges) 716ab1f9dacSPaul Mackerras goto new_range; 717ab1f9dacSPaul Mackerras } 718ab1f9dacSPaul Mackerras 7190204568aSPaul Mackerras /* 7200204568aSPaul Mackerras * Now do the same thing for each LMB listed in the ibm,dynamic-memory 7210204568aSPaul Mackerras * property in the ibm,dynamic-reconfiguration-memory node. 7220204568aSPaul Mackerras */ 7230204568aSPaul Mackerras memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 7240204568aSPaul Mackerras if (memory) 7250204568aSPaul Mackerras parse_drconf_memory(memory); 7260204568aSPaul Mackerras 727ab1f9dacSPaul Mackerras return 0; 728ab1f9dacSPaul Mackerras } 729ab1f9dacSPaul Mackerras 730ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void) 731ab1f9dacSPaul Mackerras { 732ab1f9dacSPaul Mackerras unsigned long top_of_ram = lmb_end_of_DRAM(); 733ab1f9dacSPaul Mackerras unsigned long total_ram = lmb_phys_mem_size(); 734c67c3cb4SMel Gorman unsigned long start_pfn, end_pfn; 7351daa6d08SBalbir Singh unsigned int i, nid = 0; 736ab1f9dacSPaul Mackerras 737e110b281SOlof Johansson printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 738ab1f9dacSPaul Mackerras top_of_ram, total_ram); 739e110b281SOlof Johansson printk(KERN_DEBUG "Memory hole size: %ldMB\n", 740ab1f9dacSPaul Mackerras (top_of_ram - total_ram) >> 20); 741ab1f9dacSPaul Mackerras 742c67c3cb4SMel Gorman for (i = 0; i < lmb.memory.cnt; ++i) { 743c67c3cb4SMel Gorman start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; 744c67c3cb4SMel Gorman end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); 7451daa6d08SBalbir Singh 7461daa6d08SBalbir Singh fake_numa_create_new_node(end_pfn, &nid); 7471daa6d08SBalbir Singh add_active_range(nid, start_pfn, end_pfn); 7481daa6d08SBalbir Singh node_set_online(nid); 749c67c3cb4SMel Gorman } 750ab1f9dacSPaul Mackerras } 751ab1f9dacSPaul Mackerras 7524b703a23SAnton Blanchard void __init dump_numa_cpu_topology(void) 7534b703a23SAnton Blanchard { 7544b703a23SAnton Blanchard unsigned int node; 7554b703a23SAnton Blanchard unsigned int cpu, count; 7564b703a23SAnton Blanchard 7574b703a23SAnton Blanchard if (min_common_depth == -1 || !numa_enabled) 7584b703a23SAnton Blanchard return; 7594b703a23SAnton Blanchard 7604b703a23SAnton Blanchard for_each_online_node(node) { 761e110b281SOlof Johansson printk(KERN_DEBUG "Node %d CPUs:", node); 7624b703a23SAnton Blanchard 7634b703a23SAnton Blanchard count = 0; 7644b703a23SAnton Blanchard /* 7654b703a23SAnton Blanchard * If we used a CPU iterator here we would miss printing 7664b703a23SAnton Blanchard * the holes in the cpumap. 7674b703a23SAnton Blanchard */ 7684b703a23SAnton Blanchard for (cpu = 0; cpu < NR_CPUS; cpu++) { 7694b703a23SAnton Blanchard if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 7704b703a23SAnton Blanchard if (count == 0) 7714b703a23SAnton Blanchard printk(" %u", cpu); 7724b703a23SAnton Blanchard ++count; 7734b703a23SAnton Blanchard } else { 7744b703a23SAnton Blanchard if (count > 1) 7754b703a23SAnton Blanchard printk("-%u", cpu - 1); 7764b703a23SAnton Blanchard count = 0; 7774b703a23SAnton Blanchard } 7784b703a23SAnton Blanchard } 7794b703a23SAnton Blanchard 7804b703a23SAnton Blanchard if (count > 1) 7814b703a23SAnton Blanchard printk("-%u", NR_CPUS - 1); 7824b703a23SAnton Blanchard printk("\n"); 7834b703a23SAnton Blanchard } 7844b703a23SAnton Blanchard } 7854b703a23SAnton Blanchard 7864b703a23SAnton Blanchard static void __init dump_numa_memory_topology(void) 787ab1f9dacSPaul Mackerras { 788ab1f9dacSPaul Mackerras unsigned int node; 789ab1f9dacSPaul Mackerras unsigned int count; 790ab1f9dacSPaul Mackerras 791ab1f9dacSPaul Mackerras if (min_common_depth == -1 || !numa_enabled) 792ab1f9dacSPaul Mackerras return; 793ab1f9dacSPaul Mackerras 794ab1f9dacSPaul Mackerras for_each_online_node(node) { 795ab1f9dacSPaul Mackerras unsigned long i; 796ab1f9dacSPaul Mackerras 797e110b281SOlof Johansson printk(KERN_DEBUG "Node %d Memory:", node); 798ab1f9dacSPaul Mackerras 799ab1f9dacSPaul Mackerras count = 0; 800ab1f9dacSPaul Mackerras 80145fb6ceaSAnton Blanchard for (i = 0; i < lmb_end_of_DRAM(); 80245fb6ceaSAnton Blanchard i += (1 << SECTION_SIZE_BITS)) { 80345fb6ceaSAnton Blanchard if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { 804ab1f9dacSPaul Mackerras if (count == 0) 805ab1f9dacSPaul Mackerras printk(" 0x%lx", i); 806ab1f9dacSPaul Mackerras ++count; 807ab1f9dacSPaul Mackerras } else { 808ab1f9dacSPaul Mackerras if (count > 0) 809ab1f9dacSPaul Mackerras printk("-0x%lx", i); 810ab1f9dacSPaul Mackerras count = 0; 811ab1f9dacSPaul Mackerras } 812ab1f9dacSPaul Mackerras } 813ab1f9dacSPaul Mackerras 814ab1f9dacSPaul Mackerras if (count > 0) 815ab1f9dacSPaul Mackerras printk("-0x%lx", i); 816ab1f9dacSPaul Mackerras printk("\n"); 817ab1f9dacSPaul Mackerras } 818ab1f9dacSPaul Mackerras } 819ab1f9dacSPaul Mackerras 820ab1f9dacSPaul Mackerras /* 821ab1f9dacSPaul Mackerras * Allocate some memory, satisfying the lmb or bootmem allocator where 822ab1f9dacSPaul Mackerras * required. nid is the preferred node and end is the physical address of 823ab1f9dacSPaul Mackerras * the highest address in the node. 824ab1f9dacSPaul Mackerras * 825ab1f9dacSPaul Mackerras * Returns the physical address of the memory. 826ab1f9dacSPaul Mackerras */ 82745fb6ceaSAnton Blanchard static void __init *careful_allocation(int nid, unsigned long size, 82845fb6ceaSAnton Blanchard unsigned long align, 82945fb6ceaSAnton Blanchard unsigned long end_pfn) 830ab1f9dacSPaul Mackerras { 83145fb6ceaSAnton Blanchard int new_nid; 832d7a5b2ffSMichael Ellerman unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); 833ab1f9dacSPaul Mackerras 834ab1f9dacSPaul Mackerras /* retry over all memory */ 835ab1f9dacSPaul Mackerras if (!ret) 836d7a5b2ffSMichael Ellerman ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); 837ab1f9dacSPaul Mackerras 838ab1f9dacSPaul Mackerras if (!ret) 839ab1f9dacSPaul Mackerras panic("numa.c: cannot allocate %lu bytes on node %d", 840ab1f9dacSPaul Mackerras size, nid); 841ab1f9dacSPaul Mackerras 842ab1f9dacSPaul Mackerras /* 843ab1f9dacSPaul Mackerras * If the memory came from a previously allocated node, we must 844ab1f9dacSPaul Mackerras * retry with the bootmem allocator. 845ab1f9dacSPaul Mackerras */ 84645fb6ceaSAnton Blanchard new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); 84745fb6ceaSAnton Blanchard if (new_nid < nid) { 84845fb6ceaSAnton Blanchard ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), 849ab1f9dacSPaul Mackerras size, align, 0); 850ab1f9dacSPaul Mackerras 851ab1f9dacSPaul Mackerras if (!ret) 852ab1f9dacSPaul Mackerras panic("numa.c: cannot allocate %lu bytes on node %d", 85345fb6ceaSAnton Blanchard size, new_nid); 854ab1f9dacSPaul Mackerras 85545fb6ceaSAnton Blanchard ret = __pa(ret); 856ab1f9dacSPaul Mackerras 857ab1f9dacSPaul Mackerras dbg("alloc_bootmem %lx %lx\n", ret, size); 858ab1f9dacSPaul Mackerras } 859ab1f9dacSPaul Mackerras 86045fb6ceaSAnton Blanchard return (void *)ret; 861ab1f9dacSPaul Mackerras } 862ab1f9dacSPaul Mackerras 86374b85f37SChandra Seetharaman static struct notifier_block __cpuinitdata ppc64_numa_nb = { 86474b85f37SChandra Seetharaman .notifier_call = cpu_numa_callback, 86574b85f37SChandra Seetharaman .priority = 1 /* Must run before sched domains notifier. */ 86674b85f37SChandra Seetharaman }; 86774b85f37SChandra Seetharaman 868ab1f9dacSPaul Mackerras void __init do_init_bootmem(void) 869ab1f9dacSPaul Mackerras { 870ab1f9dacSPaul Mackerras int nid; 87145fb6ceaSAnton Blanchard unsigned int i; 872ab1f9dacSPaul Mackerras 873ab1f9dacSPaul Mackerras min_low_pfn = 0; 874ab1f9dacSPaul Mackerras max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 875ab1f9dacSPaul Mackerras max_pfn = max_low_pfn; 876ab1f9dacSPaul Mackerras 877ab1f9dacSPaul Mackerras if (parse_numa_properties()) 878ab1f9dacSPaul Mackerras setup_nonnuma(); 879ab1f9dacSPaul Mackerras else 8804b703a23SAnton Blanchard dump_numa_memory_topology(); 881ab1f9dacSPaul Mackerras 882ab1f9dacSPaul Mackerras register_cpu_notifier(&ppc64_numa_nb); 8832b261227SNathan Lynch cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, 8842b261227SNathan Lynch (void *)(unsigned long)boot_cpuid); 885ab1f9dacSPaul Mackerras 886ab1f9dacSPaul Mackerras for_each_online_node(nid) { 887c67c3cb4SMel Gorman unsigned long start_pfn, end_pfn; 888ab1f9dacSPaul Mackerras unsigned long bootmem_paddr; 889ab1f9dacSPaul Mackerras unsigned long bootmap_pages; 890ab1f9dacSPaul Mackerras 891c67c3cb4SMel Gorman get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 892ab1f9dacSPaul Mackerras 893ab1f9dacSPaul Mackerras /* Allocate the node structure node local if possible */ 89445fb6ceaSAnton Blanchard NODE_DATA(nid) = careful_allocation(nid, 895ab1f9dacSPaul Mackerras sizeof(struct pglist_data), 89645fb6ceaSAnton Blanchard SMP_CACHE_BYTES, end_pfn); 89745fb6ceaSAnton Blanchard NODE_DATA(nid) = __va(NODE_DATA(nid)); 898ab1f9dacSPaul Mackerras memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 899ab1f9dacSPaul Mackerras 900ab1f9dacSPaul Mackerras dbg("node %d\n", nid); 901ab1f9dacSPaul Mackerras dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); 902ab1f9dacSPaul Mackerras 903b61bfa3cSJohannes Weiner NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; 90445fb6ceaSAnton Blanchard NODE_DATA(nid)->node_start_pfn = start_pfn; 90545fb6ceaSAnton Blanchard NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 906ab1f9dacSPaul Mackerras 907ab1f9dacSPaul Mackerras if (NODE_DATA(nid)->node_spanned_pages == 0) 908ab1f9dacSPaul Mackerras continue; 909ab1f9dacSPaul Mackerras 91045fb6ceaSAnton Blanchard dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); 91145fb6ceaSAnton Blanchard dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); 912ab1f9dacSPaul Mackerras 91345fb6ceaSAnton Blanchard bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 91445fb6ceaSAnton Blanchard bootmem_paddr = (unsigned long)careful_allocation(nid, 915ab1f9dacSPaul Mackerras bootmap_pages << PAGE_SHIFT, 91645fb6ceaSAnton Blanchard PAGE_SIZE, end_pfn); 91745fb6ceaSAnton Blanchard memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT); 91845fb6ceaSAnton Blanchard 919ab1f9dacSPaul Mackerras dbg("bootmap_paddr = %lx\n", bootmem_paddr); 920ab1f9dacSPaul Mackerras 921ab1f9dacSPaul Mackerras init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, 92245fb6ceaSAnton Blanchard start_pfn, end_pfn); 923ab1f9dacSPaul Mackerras 924c67c3cb4SMel Gorman free_bootmem_with_active_regions(nid, end_pfn); 925*8f64e1f2SJon Tollefson } 926ab1f9dacSPaul Mackerras 927*8f64e1f2SJon Tollefson /* Mark reserved regions */ 928ab1f9dacSPaul Mackerras for (i = 0; i < lmb.reserved.cnt; i++) { 929ab1f9dacSPaul Mackerras unsigned long physbase = lmb.reserved.region[i].base; 930ab1f9dacSPaul Mackerras unsigned long size = lmb.reserved.region[i].size; 931*8f64e1f2SJon Tollefson unsigned long start_pfn = physbase >> PAGE_SHIFT; 932*8f64e1f2SJon Tollefson unsigned long end_pfn = ((physbase + size) >> PAGE_SHIFT); 933*8f64e1f2SJon Tollefson struct node_active_region node_ar; 934ab1f9dacSPaul Mackerras 935*8f64e1f2SJon Tollefson get_node_active_region(start_pfn, &node_ar); 936*8f64e1f2SJon Tollefson while (start_pfn < end_pfn) { 937*8f64e1f2SJon Tollefson /* 938*8f64e1f2SJon Tollefson * if reserved region extends past active region 939*8f64e1f2SJon Tollefson * then trim size to active region 940*8f64e1f2SJon Tollefson */ 941*8f64e1f2SJon Tollefson if (end_pfn > node_ar.end_pfn) 942*8f64e1f2SJon Tollefson size = (node_ar.end_pfn << PAGE_SHIFT) 943*8f64e1f2SJon Tollefson - (start_pfn << PAGE_SHIFT); 944*8f64e1f2SJon Tollefson dbg("reserve_bootmem %lx %lx nid=%d\n", physbase, size, 945*8f64e1f2SJon Tollefson node_ar.nid); 946*8f64e1f2SJon Tollefson reserve_bootmem_node(NODE_DATA(node_ar.nid), physbase, 94772a7fe39SBernhard Walle size, BOOTMEM_DEFAULT); 948*8f64e1f2SJon Tollefson /* 949*8f64e1f2SJon Tollefson * if reserved region is contained in the active region 950*8f64e1f2SJon Tollefson * then done. 951*8f64e1f2SJon Tollefson */ 952*8f64e1f2SJon Tollefson if (end_pfn <= node_ar.end_pfn) 953*8f64e1f2SJon Tollefson break; 954*8f64e1f2SJon Tollefson 955*8f64e1f2SJon Tollefson /* 956*8f64e1f2SJon Tollefson * reserved region extends past the active region 957*8f64e1f2SJon Tollefson * get next active region that contains this 958*8f64e1f2SJon Tollefson * reserved region 959*8f64e1f2SJon Tollefson */ 960*8f64e1f2SJon Tollefson start_pfn = node_ar.end_pfn; 961*8f64e1f2SJon Tollefson physbase = start_pfn << PAGE_SHIFT; 962*8f64e1f2SJon Tollefson get_node_active_region(start_pfn, &node_ar); 963ab1f9dacSPaul Mackerras } 964ab1f9dacSPaul Mackerras 965ab1f9dacSPaul Mackerras } 966*8f64e1f2SJon Tollefson 967*8f64e1f2SJon Tollefson for_each_online_node(nid) 968*8f64e1f2SJon Tollefson sparse_memory_present_with_active_regions(nid); 969ab1f9dacSPaul Mackerras } 970ab1f9dacSPaul Mackerras 971ab1f9dacSPaul Mackerras void __init paging_init(void) 972ab1f9dacSPaul Mackerras { 9736391af17SMel Gorman unsigned long max_zone_pfns[MAX_NR_ZONES]; 9746391af17SMel Gorman memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 9756391af17SMel Gorman max_zone_pfns[ZONE_DMA] = lmb_end_of_DRAM() >> PAGE_SHIFT; 976c67c3cb4SMel Gorman free_area_init_nodes(max_zone_pfns); 977ab1f9dacSPaul Mackerras } 978ab1f9dacSPaul Mackerras 979ab1f9dacSPaul Mackerras static int __init early_numa(char *p) 980ab1f9dacSPaul Mackerras { 981ab1f9dacSPaul Mackerras if (!p) 982ab1f9dacSPaul Mackerras return 0; 983ab1f9dacSPaul Mackerras 984ab1f9dacSPaul Mackerras if (strstr(p, "off")) 985ab1f9dacSPaul Mackerras numa_enabled = 0; 986ab1f9dacSPaul Mackerras 987ab1f9dacSPaul Mackerras if (strstr(p, "debug")) 988ab1f9dacSPaul Mackerras numa_debug = 1; 989ab1f9dacSPaul Mackerras 9901daa6d08SBalbir Singh p = strstr(p, "fake="); 9911daa6d08SBalbir Singh if (p) 9921daa6d08SBalbir Singh cmdline = p + strlen("fake="); 9931daa6d08SBalbir Singh 994ab1f9dacSPaul Mackerras return 0; 995ab1f9dacSPaul Mackerras } 996ab1f9dacSPaul Mackerras early_param("numa", early_numa); 997237a0989SMike Kravetz 998237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG 999237a0989SMike Kravetz /* 10000db9360aSNathan Fontenot * Validate the node associated with the memory section we are 10010db9360aSNathan Fontenot * trying to add. 10020db9360aSNathan Fontenot */ 10030db9360aSNathan Fontenot int valid_hot_add_scn(int *nid, unsigned long start, u32 lmb_size, 10040db9360aSNathan Fontenot unsigned long scn_addr) 10050db9360aSNathan Fontenot { 10060db9360aSNathan Fontenot nodemask_t nodes; 10070db9360aSNathan Fontenot 10080db9360aSNathan Fontenot if (*nid < 0 || !node_online(*nid)) 10090db9360aSNathan Fontenot *nid = any_online_node(NODE_MASK_ALL); 10100db9360aSNathan Fontenot 10110db9360aSNathan Fontenot if ((scn_addr >= start) && (scn_addr < (start + lmb_size))) { 10120db9360aSNathan Fontenot nodes_setall(nodes); 10130db9360aSNathan Fontenot while (NODE_DATA(*nid)->node_spanned_pages == 0) { 10140db9360aSNathan Fontenot node_clear(*nid, nodes); 10150db9360aSNathan Fontenot *nid = any_online_node(nodes); 10160db9360aSNathan Fontenot } 10170db9360aSNathan Fontenot 10180db9360aSNathan Fontenot return 1; 10190db9360aSNathan Fontenot } 10200db9360aSNathan Fontenot 10210db9360aSNathan Fontenot return 0; 10220db9360aSNathan Fontenot } 10230db9360aSNathan Fontenot 10240db9360aSNathan Fontenot /* 10250db9360aSNathan Fontenot * Find the node associated with a hot added memory section represented 10260db9360aSNathan Fontenot * by the ibm,dynamic-reconfiguration-memory node. 10270db9360aSNathan Fontenot */ 10280db9360aSNathan Fontenot static int hot_add_drconf_scn_to_nid(struct device_node *memory, 10290db9360aSNathan Fontenot unsigned long scn_addr) 10300db9360aSNathan Fontenot { 10310db9360aSNathan Fontenot const u32 *dm; 10320db9360aSNathan Fontenot unsigned int n, rc; 10330db9360aSNathan Fontenot unsigned long lmb_size; 10340db9360aSNathan Fontenot int default_nid = any_online_node(NODE_MASK_ALL); 10350db9360aSNathan Fontenot int nid; 10360db9360aSNathan Fontenot struct assoc_arrays aa; 10370db9360aSNathan Fontenot 10380db9360aSNathan Fontenot n = of_get_drconf_memory(memory, &dm); 10390db9360aSNathan Fontenot if (!n) 10400db9360aSNathan Fontenot return default_nid;; 10410db9360aSNathan Fontenot 10420db9360aSNathan Fontenot lmb_size = of_get_lmb_size(memory); 10430db9360aSNathan Fontenot if (!lmb_size) 10440db9360aSNathan Fontenot return default_nid; 10450db9360aSNathan Fontenot 10460db9360aSNathan Fontenot rc = of_get_assoc_arrays(memory, &aa); 10470db9360aSNathan Fontenot if (rc) 10480db9360aSNathan Fontenot return default_nid; 10490db9360aSNathan Fontenot 10500db9360aSNathan Fontenot for (; n != 0; --n) { 10510db9360aSNathan Fontenot struct of_drconf_cell drmem; 10520db9360aSNathan Fontenot 10530db9360aSNathan Fontenot read_drconf_cell(&drmem, &dm); 10540db9360aSNathan Fontenot 10550db9360aSNathan Fontenot /* skip this block if it is reserved or not assigned to 10560db9360aSNathan Fontenot * this partition */ 10570db9360aSNathan Fontenot if ((drmem.flags & DRCONF_MEM_RESERVED) 10580db9360aSNathan Fontenot || !(drmem.flags & DRCONF_MEM_ASSIGNED)) 10590db9360aSNathan Fontenot continue; 10600db9360aSNathan Fontenot 10610db9360aSNathan Fontenot nid = of_drconf_to_nid_single(&drmem, &aa); 10620db9360aSNathan Fontenot 10630db9360aSNathan Fontenot if (valid_hot_add_scn(&nid, drmem.base_addr, lmb_size, 10640db9360aSNathan Fontenot scn_addr)) 10650db9360aSNathan Fontenot return nid; 10660db9360aSNathan Fontenot } 10670db9360aSNathan Fontenot 10680db9360aSNathan Fontenot BUG(); /* section address should be found above */ 10690db9360aSNathan Fontenot return 0; 10700db9360aSNathan Fontenot } 10710db9360aSNathan Fontenot 10720db9360aSNathan Fontenot /* 1073237a0989SMike Kravetz * Find the node associated with a hot added memory section. Section 1074237a0989SMike Kravetz * corresponds to a SPARSEMEM section, not an LMB. It is assumed that 1075237a0989SMike Kravetz * sections are fully contained within a single LMB. 1076237a0989SMike Kravetz */ 1077237a0989SMike Kravetz int hot_add_scn_to_nid(unsigned long scn_addr) 1078237a0989SMike Kravetz { 1079237a0989SMike Kravetz struct device_node *memory = NULL; 1080069007aeSAndrew Morton int nid; 1081237a0989SMike Kravetz 1082237a0989SMike Kravetz if (!numa_enabled || (min_common_depth < 0)) 10830db9360aSNathan Fontenot return any_online_node(NODE_MASK_ALL); 10840db9360aSNathan Fontenot 10850db9360aSNathan Fontenot memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 10860db9360aSNathan Fontenot if (memory) { 10870db9360aSNathan Fontenot nid = hot_add_drconf_scn_to_nid(memory, scn_addr); 10880db9360aSNathan Fontenot of_node_put(memory); 10890db9360aSNathan Fontenot return nid; 10900db9360aSNathan Fontenot } 1091237a0989SMike Kravetz 1092237a0989SMike Kravetz while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 1093237a0989SMike Kravetz unsigned long start, size; 1094b226e462SMike Kravetz int ranges; 1095a7f67bdfSJeremy Kerr const unsigned int *memcell_buf; 1096237a0989SMike Kravetz unsigned int len; 1097237a0989SMike Kravetz 1098e2eb6392SStephen Rothwell memcell_buf = of_get_property(memory, "reg", &len); 1099237a0989SMike Kravetz if (!memcell_buf || len <= 0) 1100237a0989SMike Kravetz continue; 1101237a0989SMike Kravetz 1102cc5d0189SBenjamin Herrenschmidt /* ranges in cell */ 1103cc5d0189SBenjamin Herrenschmidt ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 1104237a0989SMike Kravetz ha_new_range: 1105237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 1106237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 1107953039c8SJeremy Kerr nid = of_node_to_nid_single(memory); 1108237a0989SMike Kravetz 11090db9360aSNathan Fontenot if (valid_hot_add_scn(&nid, start, size, scn_addr)) { 1110237a0989SMike Kravetz of_node_put(memory); 11110db9360aSNathan Fontenot return nid; 1112237a0989SMike Kravetz } 1113237a0989SMike Kravetz 1114237a0989SMike Kravetz if (--ranges) /* process all ranges in cell */ 1115237a0989SMike Kravetz goto ha_new_range; 1116237a0989SMike Kravetz } 1117237a0989SMike Kravetz BUG(); /* section address should be found above */ 1118069007aeSAndrew Morton return 0; 1119237a0989SMike Kravetz } 1120237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */ 1121