1ab1f9dacSPaul Mackerras /* 2ab1f9dacSPaul Mackerras * pSeries NUMA support 3ab1f9dacSPaul Mackerras * 4ab1f9dacSPaul Mackerras * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 5ab1f9dacSPaul Mackerras * 6ab1f9dacSPaul Mackerras * This program is free software; you can redistribute it and/or 7ab1f9dacSPaul Mackerras * modify it under the terms of the GNU General Public License 8ab1f9dacSPaul Mackerras * as published by the Free Software Foundation; either version 9ab1f9dacSPaul Mackerras * 2 of the License, or (at your option) any later version. 10ab1f9dacSPaul Mackerras */ 11ab1f9dacSPaul Mackerras #include <linux/threads.h> 12ab1f9dacSPaul Mackerras #include <linux/bootmem.h> 13ab1f9dacSPaul Mackerras #include <linux/init.h> 14ab1f9dacSPaul Mackerras #include <linux/mm.h> 15ab1f9dacSPaul Mackerras #include <linux/mmzone.h> 16ab1f9dacSPaul Mackerras #include <linux/module.h> 17ab1f9dacSPaul Mackerras #include <linux/nodemask.h> 18ab1f9dacSPaul Mackerras #include <linux/cpu.h> 19ab1f9dacSPaul Mackerras #include <linux/notifier.h> 2045fb6ceaSAnton Blanchard #include <asm/sparsemem.h> 21ab1f9dacSPaul Mackerras #include <asm/lmb.h> 22cf00a8d1SPaul Mackerras #include <asm/system.h> 232249ca9dSPaul Mackerras #include <asm/smp.h> 24ab1f9dacSPaul Mackerras 25ab1f9dacSPaul Mackerras static int numa_enabled = 1; 26ab1f9dacSPaul Mackerras 27ab1f9dacSPaul Mackerras static int numa_debug; 28ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 29ab1f9dacSPaul Mackerras 3045fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS]; 31ab1f9dacSPaul Mackerras cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; 32ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES]; 3345fb6ceaSAnton Blanchard 3445fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table); 3545fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpumask_lookup_table); 3645fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data); 3745fb6ceaSAnton Blanchard 3845fb6ceaSAnton Blanchard static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; 39ab1f9dacSPaul Mackerras static int min_common_depth; 40237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells; 41ab1f9dacSPaul Mackerras 42ab1f9dacSPaul Mackerras /* 4345fb6ceaSAnton Blanchard * We need somewhere to store start/end/node for each region until we have 44ab1f9dacSPaul Mackerras * allocated the real node_data structures. 45ab1f9dacSPaul Mackerras */ 4645fb6ceaSAnton Blanchard #define MAX_REGIONS (MAX_LMB_REGIONS*2) 47ab1f9dacSPaul Mackerras static struct { 4845fb6ceaSAnton Blanchard unsigned long start_pfn; 4945fb6ceaSAnton Blanchard unsigned long end_pfn; 5045fb6ceaSAnton Blanchard int nid; 5145fb6ceaSAnton Blanchard } init_node_data[MAX_REGIONS] __initdata; 52ab1f9dacSPaul Mackerras 5345fb6ceaSAnton Blanchard int __init early_pfn_to_nid(unsigned long pfn) 5445fb6ceaSAnton Blanchard { 5545fb6ceaSAnton Blanchard unsigned int i; 5645fb6ceaSAnton Blanchard 5745fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 5845fb6ceaSAnton Blanchard unsigned long start_pfn = init_node_data[i].start_pfn; 5945fb6ceaSAnton Blanchard unsigned long end_pfn = init_node_data[i].end_pfn; 6045fb6ceaSAnton Blanchard 6145fb6ceaSAnton Blanchard if ((start_pfn <= pfn) && (pfn < end_pfn)) 6245fb6ceaSAnton Blanchard return init_node_data[i].nid; 6345fb6ceaSAnton Blanchard } 6445fb6ceaSAnton Blanchard 6545fb6ceaSAnton Blanchard return -1; 6645fb6ceaSAnton Blanchard } 6745fb6ceaSAnton Blanchard 6845fb6ceaSAnton Blanchard void __init add_region(unsigned int nid, unsigned long start_pfn, 6945fb6ceaSAnton Blanchard unsigned long pages) 7045fb6ceaSAnton Blanchard { 7145fb6ceaSAnton Blanchard unsigned int i; 7245fb6ceaSAnton Blanchard 7345fb6ceaSAnton Blanchard dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n", 7445fb6ceaSAnton Blanchard nid, start_pfn, pages); 7545fb6ceaSAnton Blanchard 7645fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 7745fb6ceaSAnton Blanchard if (init_node_data[i].nid != nid) 7845fb6ceaSAnton Blanchard continue; 7945fb6ceaSAnton Blanchard if (init_node_data[i].end_pfn == start_pfn) { 8045fb6ceaSAnton Blanchard init_node_data[i].end_pfn += pages; 8145fb6ceaSAnton Blanchard return; 8245fb6ceaSAnton Blanchard } 8345fb6ceaSAnton Blanchard if (init_node_data[i].start_pfn == (start_pfn + pages)) { 8445fb6ceaSAnton Blanchard init_node_data[i].start_pfn -= pages; 8545fb6ceaSAnton Blanchard return; 8645fb6ceaSAnton Blanchard } 8745fb6ceaSAnton Blanchard } 8845fb6ceaSAnton Blanchard 8945fb6ceaSAnton Blanchard /* 9045fb6ceaSAnton Blanchard * Leave last entry NULL so we dont iterate off the end (we use 9145fb6ceaSAnton Blanchard * entry.end_pfn to terminate the walk). 9245fb6ceaSAnton Blanchard */ 9345fb6ceaSAnton Blanchard if (i >= (MAX_REGIONS - 1)) { 9445fb6ceaSAnton Blanchard printk(KERN_ERR "WARNING: too many memory regions in " 9545fb6ceaSAnton Blanchard "numa code, truncating\n"); 9645fb6ceaSAnton Blanchard return; 9745fb6ceaSAnton Blanchard } 9845fb6ceaSAnton Blanchard 9945fb6ceaSAnton Blanchard init_node_data[i].start_pfn = start_pfn; 10045fb6ceaSAnton Blanchard init_node_data[i].end_pfn = start_pfn + pages; 10145fb6ceaSAnton Blanchard init_node_data[i].nid = nid; 10245fb6ceaSAnton Blanchard } 10345fb6ceaSAnton Blanchard 10445fb6ceaSAnton Blanchard /* We assume init_node_data has no overlapping regions */ 10545fb6ceaSAnton Blanchard void __init get_region(unsigned int nid, unsigned long *start_pfn, 10645fb6ceaSAnton Blanchard unsigned long *end_pfn, unsigned long *pages_present) 10745fb6ceaSAnton Blanchard { 10845fb6ceaSAnton Blanchard unsigned int i; 10945fb6ceaSAnton Blanchard 11045fb6ceaSAnton Blanchard *start_pfn = -1UL; 11145fb6ceaSAnton Blanchard *end_pfn = *pages_present = 0; 11245fb6ceaSAnton Blanchard 11345fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 11445fb6ceaSAnton Blanchard if (init_node_data[i].nid != nid) 11545fb6ceaSAnton Blanchard continue; 11645fb6ceaSAnton Blanchard 11745fb6ceaSAnton Blanchard *pages_present += init_node_data[i].end_pfn - 11845fb6ceaSAnton Blanchard init_node_data[i].start_pfn; 11945fb6ceaSAnton Blanchard 12045fb6ceaSAnton Blanchard if (init_node_data[i].start_pfn < *start_pfn) 12145fb6ceaSAnton Blanchard *start_pfn = init_node_data[i].start_pfn; 12245fb6ceaSAnton Blanchard 12345fb6ceaSAnton Blanchard if (init_node_data[i].end_pfn > *end_pfn) 12445fb6ceaSAnton Blanchard *end_pfn = init_node_data[i].end_pfn; 12545fb6ceaSAnton Blanchard } 12645fb6ceaSAnton Blanchard 12745fb6ceaSAnton Blanchard /* We didnt find a matching region, return start/end as 0 */ 12845fb6ceaSAnton Blanchard if (*start_pfn == -1UL) 1296d91bb93SMike Kravetz *start_pfn = 0; 13045fb6ceaSAnton Blanchard } 131ab1f9dacSPaul Mackerras 132ab1f9dacSPaul Mackerras static inline void map_cpu_to_node(int cpu, int node) 133ab1f9dacSPaul Mackerras { 134ab1f9dacSPaul Mackerras numa_cpu_lookup_table[cpu] = node; 13545fb6ceaSAnton Blanchard 13645fb6ceaSAnton Blanchard if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) 137ab1f9dacSPaul Mackerras cpu_set(cpu, numa_cpumask_lookup_table[node]); 138ab1f9dacSPaul Mackerras } 139ab1f9dacSPaul Mackerras 140ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU 141ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu) 142ab1f9dacSPaul Mackerras { 143ab1f9dacSPaul Mackerras int node = numa_cpu_lookup_table[cpu]; 144ab1f9dacSPaul Mackerras 145ab1f9dacSPaul Mackerras dbg("removing cpu %lu from node %d\n", cpu, node); 146ab1f9dacSPaul Mackerras 147ab1f9dacSPaul Mackerras if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 148ab1f9dacSPaul Mackerras cpu_clear(cpu, numa_cpumask_lookup_table[node]); 149ab1f9dacSPaul Mackerras } else { 150ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 151ab1f9dacSPaul Mackerras cpu, node); 152ab1f9dacSPaul Mackerras } 153ab1f9dacSPaul Mackerras } 154ab1f9dacSPaul Mackerras #endif /* CONFIG_HOTPLUG_CPU */ 155ab1f9dacSPaul Mackerras 15645fb6ceaSAnton Blanchard static struct device_node *find_cpu_node(unsigned int cpu) 157ab1f9dacSPaul Mackerras { 158ab1f9dacSPaul Mackerras unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); 159ab1f9dacSPaul Mackerras struct device_node *cpu_node = NULL; 160ab1f9dacSPaul Mackerras unsigned int *interrupt_server, *reg; 161ab1f9dacSPaul Mackerras int len; 162ab1f9dacSPaul Mackerras 163ab1f9dacSPaul Mackerras while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { 164ab1f9dacSPaul Mackerras /* Try interrupt server first */ 165ab1f9dacSPaul Mackerras interrupt_server = (unsigned int *)get_property(cpu_node, 166ab1f9dacSPaul Mackerras "ibm,ppc-interrupt-server#s", &len); 167ab1f9dacSPaul Mackerras 168ab1f9dacSPaul Mackerras len = len / sizeof(u32); 169ab1f9dacSPaul Mackerras 170ab1f9dacSPaul Mackerras if (interrupt_server && (len > 0)) { 171ab1f9dacSPaul Mackerras while (len--) { 172ab1f9dacSPaul Mackerras if (interrupt_server[len] == hw_cpuid) 173ab1f9dacSPaul Mackerras return cpu_node; 174ab1f9dacSPaul Mackerras } 175ab1f9dacSPaul Mackerras } else { 176ab1f9dacSPaul Mackerras reg = (unsigned int *)get_property(cpu_node, 177ab1f9dacSPaul Mackerras "reg", &len); 178ab1f9dacSPaul Mackerras if (reg && (len > 0) && (reg[0] == hw_cpuid)) 179ab1f9dacSPaul Mackerras return cpu_node; 180ab1f9dacSPaul Mackerras } 181ab1f9dacSPaul Mackerras } 182ab1f9dacSPaul Mackerras 183ab1f9dacSPaul Mackerras return NULL; 184ab1f9dacSPaul Mackerras } 185ab1f9dacSPaul Mackerras 186ab1f9dacSPaul Mackerras /* must hold reference to node during call */ 187ab1f9dacSPaul Mackerras static int *of_get_associativity(struct device_node *dev) 188ab1f9dacSPaul Mackerras { 189ab1f9dacSPaul Mackerras return (unsigned int *)get_property(dev, "ibm,associativity", NULL); 190ab1f9dacSPaul Mackerras } 191ab1f9dacSPaul Mackerras 192ab1f9dacSPaul Mackerras static int of_node_numa_domain(struct device_node *device) 193ab1f9dacSPaul Mackerras { 194ab1f9dacSPaul Mackerras int numa_domain; 195ab1f9dacSPaul Mackerras unsigned int *tmp; 196ab1f9dacSPaul Mackerras 197ab1f9dacSPaul Mackerras if (min_common_depth == -1) 198ab1f9dacSPaul Mackerras return 0; 199ab1f9dacSPaul Mackerras 200ab1f9dacSPaul Mackerras tmp = of_get_associativity(device); 201ab1f9dacSPaul Mackerras if (tmp && (tmp[0] >= min_common_depth)) { 202ab1f9dacSPaul Mackerras numa_domain = tmp[min_common_depth]; 203ab1f9dacSPaul Mackerras } else { 204ab1f9dacSPaul Mackerras dbg("WARNING: no NUMA information for %s\n", 205ab1f9dacSPaul Mackerras device->full_name); 206ab1f9dacSPaul Mackerras numa_domain = 0; 207ab1f9dacSPaul Mackerras } 208ab1f9dacSPaul Mackerras return numa_domain; 209ab1f9dacSPaul Mackerras } 210ab1f9dacSPaul Mackerras 211ab1f9dacSPaul Mackerras /* 212ab1f9dacSPaul Mackerras * In theory, the "ibm,associativity" property may contain multiple 213ab1f9dacSPaul Mackerras * associativity lists because a resource may be multiply connected 214ab1f9dacSPaul Mackerras * into the machine. This resource then has different associativity 215ab1f9dacSPaul Mackerras * characteristics relative to its multiple connections. We ignore 216ab1f9dacSPaul Mackerras * this for now. We also assume that all cpu and memory sets have 217ab1f9dacSPaul Mackerras * their distances represented at a common level. This won't be 218ab1f9dacSPaul Mackerras * true for heirarchical NUMA. 219ab1f9dacSPaul Mackerras * 220ab1f9dacSPaul Mackerras * In any case the ibm,associativity-reference-points should give 221ab1f9dacSPaul Mackerras * the correct depth for a normal NUMA system. 222ab1f9dacSPaul Mackerras * 223ab1f9dacSPaul Mackerras * - Dave Hansen <haveblue@us.ibm.com> 224ab1f9dacSPaul Mackerras */ 225ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void) 226ab1f9dacSPaul Mackerras { 227ab1f9dacSPaul Mackerras int depth; 228ab1f9dacSPaul Mackerras unsigned int *ref_points; 229ab1f9dacSPaul Mackerras struct device_node *rtas_root; 230ab1f9dacSPaul Mackerras unsigned int len; 231ab1f9dacSPaul Mackerras 232ab1f9dacSPaul Mackerras rtas_root = of_find_node_by_path("/rtas"); 233ab1f9dacSPaul Mackerras 234ab1f9dacSPaul Mackerras if (!rtas_root) 235ab1f9dacSPaul Mackerras return -1; 236ab1f9dacSPaul Mackerras 237ab1f9dacSPaul Mackerras /* 238ab1f9dacSPaul Mackerras * this property is 2 32-bit integers, each representing a level of 239ab1f9dacSPaul Mackerras * depth in the associativity nodes. The first is for an SMP 240ab1f9dacSPaul Mackerras * configuration (should be all 0's) and the second is for a normal 241ab1f9dacSPaul Mackerras * NUMA configuration. 242ab1f9dacSPaul Mackerras */ 243ab1f9dacSPaul Mackerras ref_points = (unsigned int *)get_property(rtas_root, 244ab1f9dacSPaul Mackerras "ibm,associativity-reference-points", &len); 245ab1f9dacSPaul Mackerras 246ab1f9dacSPaul Mackerras if ((len >= 1) && ref_points) { 247ab1f9dacSPaul Mackerras depth = ref_points[1]; 248ab1f9dacSPaul Mackerras } else { 249ab1f9dacSPaul Mackerras dbg("WARNING: could not find NUMA " 250ab1f9dacSPaul Mackerras "associativity reference point\n"); 251ab1f9dacSPaul Mackerras depth = -1; 252ab1f9dacSPaul Mackerras } 253ab1f9dacSPaul Mackerras of_node_put(rtas_root); 254ab1f9dacSPaul Mackerras 255ab1f9dacSPaul Mackerras return depth; 256ab1f9dacSPaul Mackerras } 257ab1f9dacSPaul Mackerras 25884c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 259ab1f9dacSPaul Mackerras { 260ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 261ab1f9dacSPaul Mackerras 262ab1f9dacSPaul Mackerras memory = of_find_node_by_type(memory, "memory"); 26354c23310SPaul Mackerras if (!memory) 26484c9fdd1SMike Kravetz panic("numa.c: No memory nodes found!"); 26554c23310SPaul Mackerras 26684c9fdd1SMike Kravetz *n_addr_cells = prom_n_addr_cells(memory); 26784c9fdd1SMike Kravetz *n_size_cells = prom_n_size_cells(memory); 26884c9fdd1SMike Kravetz of_node_put(memory); 269ab1f9dacSPaul Mackerras } 270ab1f9dacSPaul Mackerras 271237a0989SMike Kravetz static unsigned long __devinit read_n_cells(int n, unsigned int **buf) 272ab1f9dacSPaul Mackerras { 273ab1f9dacSPaul Mackerras unsigned long result = 0; 274ab1f9dacSPaul Mackerras 275ab1f9dacSPaul Mackerras while (n--) { 276ab1f9dacSPaul Mackerras result = (result << 32) | **buf; 277ab1f9dacSPaul Mackerras (*buf)++; 278ab1f9dacSPaul Mackerras } 279ab1f9dacSPaul Mackerras return result; 280ab1f9dacSPaul Mackerras } 281ab1f9dacSPaul Mackerras 282ab1f9dacSPaul Mackerras /* 283ab1f9dacSPaul Mackerras * Figure out to which domain a cpu belongs and stick it there. 284ab1f9dacSPaul Mackerras * Return the id of the domain used. 285ab1f9dacSPaul Mackerras */ 286ab1f9dacSPaul Mackerras static int numa_setup_cpu(unsigned long lcpu) 287ab1f9dacSPaul Mackerras { 288ab1f9dacSPaul Mackerras int numa_domain = 0; 289ab1f9dacSPaul Mackerras struct device_node *cpu = find_cpu_node(lcpu); 290ab1f9dacSPaul Mackerras 291ab1f9dacSPaul Mackerras if (!cpu) { 292ab1f9dacSPaul Mackerras WARN_ON(1); 293ab1f9dacSPaul Mackerras goto out; 294ab1f9dacSPaul Mackerras } 295ab1f9dacSPaul Mackerras 296ab1f9dacSPaul Mackerras numa_domain = of_node_numa_domain(cpu); 297ab1f9dacSPaul Mackerras 298ab1f9dacSPaul Mackerras if (numa_domain >= num_online_nodes()) { 299ab1f9dacSPaul Mackerras /* 300ab1f9dacSPaul Mackerras * POWER4 LPAR uses 0xffff as invalid node, 301ab1f9dacSPaul Mackerras * dont warn in this case. 302ab1f9dacSPaul Mackerras */ 303ab1f9dacSPaul Mackerras if (numa_domain != 0xffff) 304ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: cpu %ld " 305ab1f9dacSPaul Mackerras "maps to invalid NUMA node %d\n", 306ab1f9dacSPaul Mackerras lcpu, numa_domain); 307ab1f9dacSPaul Mackerras numa_domain = 0; 308ab1f9dacSPaul Mackerras } 309ab1f9dacSPaul Mackerras out: 310ab1f9dacSPaul Mackerras node_set_online(numa_domain); 311ab1f9dacSPaul Mackerras 312ab1f9dacSPaul Mackerras map_cpu_to_node(lcpu, numa_domain); 313ab1f9dacSPaul Mackerras 314ab1f9dacSPaul Mackerras of_node_put(cpu); 315ab1f9dacSPaul Mackerras 316ab1f9dacSPaul Mackerras return numa_domain; 317ab1f9dacSPaul Mackerras } 318ab1f9dacSPaul Mackerras 319ab1f9dacSPaul Mackerras static int cpu_numa_callback(struct notifier_block *nfb, 320ab1f9dacSPaul Mackerras unsigned long action, 321ab1f9dacSPaul Mackerras void *hcpu) 322ab1f9dacSPaul Mackerras { 323ab1f9dacSPaul Mackerras unsigned long lcpu = (unsigned long)hcpu; 324ab1f9dacSPaul Mackerras int ret = NOTIFY_DONE; 325ab1f9dacSPaul Mackerras 326ab1f9dacSPaul Mackerras switch (action) { 327ab1f9dacSPaul Mackerras case CPU_UP_PREPARE: 328ab1f9dacSPaul Mackerras if (min_common_depth == -1 || !numa_enabled) 329ab1f9dacSPaul Mackerras map_cpu_to_node(lcpu, 0); 330ab1f9dacSPaul Mackerras else 331ab1f9dacSPaul Mackerras numa_setup_cpu(lcpu); 332ab1f9dacSPaul Mackerras ret = NOTIFY_OK; 333ab1f9dacSPaul Mackerras break; 334ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU 335ab1f9dacSPaul Mackerras case CPU_DEAD: 336ab1f9dacSPaul Mackerras case CPU_UP_CANCELED: 337ab1f9dacSPaul Mackerras unmap_cpu_from_node(lcpu); 338ab1f9dacSPaul Mackerras break; 339ab1f9dacSPaul Mackerras ret = NOTIFY_OK; 340ab1f9dacSPaul Mackerras #endif 341ab1f9dacSPaul Mackerras } 342ab1f9dacSPaul Mackerras return ret; 343ab1f9dacSPaul Mackerras } 344ab1f9dacSPaul Mackerras 345ab1f9dacSPaul Mackerras /* 346ab1f9dacSPaul Mackerras * Check and possibly modify a memory region to enforce the memory limit. 347ab1f9dacSPaul Mackerras * 348ab1f9dacSPaul Mackerras * Returns the size the region should have to enforce the memory limit. 349ab1f9dacSPaul Mackerras * This will either be the original value of size, a truncated value, 350ab1f9dacSPaul Mackerras * or zero. If the returned value of size is 0 the region should be 351ab1f9dacSPaul Mackerras * discarded as it lies wholy above the memory limit. 352ab1f9dacSPaul Mackerras */ 35345fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start, 35445fb6ceaSAnton Blanchard unsigned long size) 355ab1f9dacSPaul Mackerras { 356ab1f9dacSPaul Mackerras /* 357ab1f9dacSPaul Mackerras * We use lmb_end_of_DRAM() in here instead of memory_limit because 358ab1f9dacSPaul Mackerras * we've already adjusted it for the limit and it takes care of 359ab1f9dacSPaul Mackerras * having memory holes below the limit. 360ab1f9dacSPaul Mackerras */ 361ab1f9dacSPaul Mackerras 362ab1f9dacSPaul Mackerras if (! memory_limit) 363ab1f9dacSPaul Mackerras return size; 364ab1f9dacSPaul Mackerras 365ab1f9dacSPaul Mackerras if (start + size <= lmb_end_of_DRAM()) 366ab1f9dacSPaul Mackerras return size; 367ab1f9dacSPaul Mackerras 368ab1f9dacSPaul Mackerras if (start >= lmb_end_of_DRAM()) 369ab1f9dacSPaul Mackerras return 0; 370ab1f9dacSPaul Mackerras 371ab1f9dacSPaul Mackerras return lmb_end_of_DRAM() - start; 372ab1f9dacSPaul Mackerras } 373ab1f9dacSPaul Mackerras 374ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void) 375ab1f9dacSPaul Mackerras { 376ab1f9dacSPaul Mackerras struct device_node *cpu = NULL; 377ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 37845fb6ceaSAnton Blanchard int max_domain; 379ab1f9dacSPaul Mackerras unsigned long i; 380ab1f9dacSPaul Mackerras 381ab1f9dacSPaul Mackerras if (numa_enabled == 0) { 382ab1f9dacSPaul Mackerras printk(KERN_WARNING "NUMA disabled by user\n"); 383ab1f9dacSPaul Mackerras return -1; 384ab1f9dacSPaul Mackerras } 385ab1f9dacSPaul Mackerras 386ab1f9dacSPaul Mackerras min_common_depth = find_min_common_depth(); 387ab1f9dacSPaul Mackerras 388ab1f9dacSPaul Mackerras dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 389ab1f9dacSPaul Mackerras if (min_common_depth < 0) 390ab1f9dacSPaul Mackerras return min_common_depth; 391ab1f9dacSPaul Mackerras 392ab1f9dacSPaul Mackerras max_domain = numa_setup_cpu(boot_cpuid); 393ab1f9dacSPaul Mackerras 394ab1f9dacSPaul Mackerras /* 395ab1f9dacSPaul Mackerras * Even though we connect cpus to numa domains later in SMP init, 396ab1f9dacSPaul Mackerras * we need to know the maximum node id now. This is because each 397ab1f9dacSPaul Mackerras * node id must have NODE_DATA etc backing it. 398ab1f9dacSPaul Mackerras * As a result of hotplug we could still have cpus appear later on 399ab1f9dacSPaul Mackerras * with larger node ids. In that case we force the cpu into node 0. 400ab1f9dacSPaul Mackerras */ 401ab1f9dacSPaul Mackerras for_each_cpu(i) { 402ab1f9dacSPaul Mackerras int numa_domain; 403ab1f9dacSPaul Mackerras 404ab1f9dacSPaul Mackerras cpu = find_cpu_node(i); 405ab1f9dacSPaul Mackerras 406ab1f9dacSPaul Mackerras if (cpu) { 407ab1f9dacSPaul Mackerras numa_domain = of_node_numa_domain(cpu); 408ab1f9dacSPaul Mackerras of_node_put(cpu); 409ab1f9dacSPaul Mackerras 410ab1f9dacSPaul Mackerras if (numa_domain < MAX_NUMNODES && 411ab1f9dacSPaul Mackerras max_domain < numa_domain) 412ab1f9dacSPaul Mackerras max_domain = numa_domain; 413ab1f9dacSPaul Mackerras } 414ab1f9dacSPaul Mackerras } 415ab1f9dacSPaul Mackerras 416237a0989SMike Kravetz get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 417ab1f9dacSPaul Mackerras memory = NULL; 418ab1f9dacSPaul Mackerras while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 419ab1f9dacSPaul Mackerras unsigned long start; 420ab1f9dacSPaul Mackerras unsigned long size; 421ab1f9dacSPaul Mackerras int numa_domain; 422ab1f9dacSPaul Mackerras int ranges; 423ab1f9dacSPaul Mackerras unsigned int *memcell_buf; 424ab1f9dacSPaul Mackerras unsigned int len; 425ab1f9dacSPaul Mackerras 426*ba759485SMichael Ellerman memcell_buf = (unsigned int *)get_property(memory, 427*ba759485SMichael Ellerman "linux,usable-memory", &len); 428*ba759485SMichael Ellerman if (!memcell_buf || len <= 0) 429*ba759485SMichael Ellerman memcell_buf = 430*ba759485SMichael Ellerman (unsigned int *)get_property(memory, "reg", 431*ba759485SMichael Ellerman &len); 432ab1f9dacSPaul Mackerras if (!memcell_buf || len <= 0) 433ab1f9dacSPaul Mackerras continue; 434ab1f9dacSPaul Mackerras 435ab1f9dacSPaul Mackerras ranges = memory->n_addrs; 436ab1f9dacSPaul Mackerras new_range: 437ab1f9dacSPaul Mackerras /* these are order-sensitive, and modify the buffer pointer */ 438237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 439237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 440ab1f9dacSPaul Mackerras 441ab1f9dacSPaul Mackerras numa_domain = of_node_numa_domain(memory); 442ab1f9dacSPaul Mackerras 443ab1f9dacSPaul Mackerras if (numa_domain >= MAX_NUMNODES) { 444ab1f9dacSPaul Mackerras if (numa_domain != 0xffff) 445ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: memory at %lx maps " 446ab1f9dacSPaul Mackerras "to invalid NUMA node %d\n", start, 447ab1f9dacSPaul Mackerras numa_domain); 448ab1f9dacSPaul Mackerras numa_domain = 0; 449ab1f9dacSPaul Mackerras } 450ab1f9dacSPaul Mackerras 451ab1f9dacSPaul Mackerras if (max_domain < numa_domain) 452ab1f9dacSPaul Mackerras max_domain = numa_domain; 453ab1f9dacSPaul Mackerras 454ab1f9dacSPaul Mackerras if (!(size = numa_enforce_memory_limit(start, size))) { 455ab1f9dacSPaul Mackerras if (--ranges) 456ab1f9dacSPaul Mackerras goto new_range; 457ab1f9dacSPaul Mackerras else 458ab1f9dacSPaul Mackerras continue; 459ab1f9dacSPaul Mackerras } 460ab1f9dacSPaul Mackerras 46145fb6ceaSAnton Blanchard add_region(numa_domain, start >> PAGE_SHIFT, 46245fb6ceaSAnton Blanchard size >> PAGE_SHIFT); 463ab1f9dacSPaul Mackerras 464ab1f9dacSPaul Mackerras if (--ranges) 465ab1f9dacSPaul Mackerras goto new_range; 466ab1f9dacSPaul Mackerras } 467ab1f9dacSPaul Mackerras 468ab1f9dacSPaul Mackerras for (i = 0; i <= max_domain; i++) 469ab1f9dacSPaul Mackerras node_set_online(i); 470ab1f9dacSPaul Mackerras 471ab1f9dacSPaul Mackerras return 0; 472ab1f9dacSPaul Mackerras } 473ab1f9dacSPaul Mackerras 474ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void) 475ab1f9dacSPaul Mackerras { 476ab1f9dacSPaul Mackerras unsigned long top_of_ram = lmb_end_of_DRAM(); 477ab1f9dacSPaul Mackerras unsigned long total_ram = lmb_phys_mem_size(); 478fb6d73d3SPaul Mackerras unsigned int i; 479ab1f9dacSPaul Mackerras 480ab1f9dacSPaul Mackerras printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 481ab1f9dacSPaul Mackerras top_of_ram, total_ram); 482ab1f9dacSPaul Mackerras printk(KERN_INFO "Memory hole size: %ldMB\n", 483ab1f9dacSPaul Mackerras (top_of_ram - total_ram) >> 20); 484ab1f9dacSPaul Mackerras 485ab1f9dacSPaul Mackerras map_cpu_to_node(boot_cpuid, 0); 486fb6d73d3SPaul Mackerras for (i = 0; i < lmb.memory.cnt; ++i) 487fb6d73d3SPaul Mackerras add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, 488fb6d73d3SPaul Mackerras lmb_size_pages(&lmb.memory, i)); 489ab1f9dacSPaul Mackerras node_set_online(0); 490ab1f9dacSPaul Mackerras } 491ab1f9dacSPaul Mackerras 492ab1f9dacSPaul Mackerras static void __init dump_numa_topology(void) 493ab1f9dacSPaul Mackerras { 494ab1f9dacSPaul Mackerras unsigned int node; 495ab1f9dacSPaul Mackerras unsigned int count; 496ab1f9dacSPaul Mackerras 497ab1f9dacSPaul Mackerras if (min_common_depth == -1 || !numa_enabled) 498ab1f9dacSPaul Mackerras return; 499ab1f9dacSPaul Mackerras 500ab1f9dacSPaul Mackerras for_each_online_node(node) { 501ab1f9dacSPaul Mackerras unsigned long i; 502ab1f9dacSPaul Mackerras 503ab1f9dacSPaul Mackerras printk(KERN_INFO "Node %d Memory:", node); 504ab1f9dacSPaul Mackerras 505ab1f9dacSPaul Mackerras count = 0; 506ab1f9dacSPaul Mackerras 50745fb6ceaSAnton Blanchard for (i = 0; i < lmb_end_of_DRAM(); 50845fb6ceaSAnton Blanchard i += (1 << SECTION_SIZE_BITS)) { 50945fb6ceaSAnton Blanchard if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { 510ab1f9dacSPaul Mackerras if (count == 0) 511ab1f9dacSPaul Mackerras printk(" 0x%lx", i); 512ab1f9dacSPaul Mackerras ++count; 513ab1f9dacSPaul Mackerras } else { 514ab1f9dacSPaul Mackerras if (count > 0) 515ab1f9dacSPaul Mackerras printk("-0x%lx", i); 516ab1f9dacSPaul Mackerras count = 0; 517ab1f9dacSPaul Mackerras } 518ab1f9dacSPaul Mackerras } 519ab1f9dacSPaul Mackerras 520ab1f9dacSPaul Mackerras if (count > 0) 521ab1f9dacSPaul Mackerras printk("-0x%lx", i); 522ab1f9dacSPaul Mackerras printk("\n"); 523ab1f9dacSPaul Mackerras } 524ab1f9dacSPaul Mackerras return; 525ab1f9dacSPaul Mackerras } 526ab1f9dacSPaul Mackerras 527ab1f9dacSPaul Mackerras /* 528ab1f9dacSPaul Mackerras * Allocate some memory, satisfying the lmb or bootmem allocator where 529ab1f9dacSPaul Mackerras * required. nid is the preferred node and end is the physical address of 530ab1f9dacSPaul Mackerras * the highest address in the node. 531ab1f9dacSPaul Mackerras * 532ab1f9dacSPaul Mackerras * Returns the physical address of the memory. 533ab1f9dacSPaul Mackerras */ 53445fb6ceaSAnton Blanchard static void __init *careful_allocation(int nid, unsigned long size, 53545fb6ceaSAnton Blanchard unsigned long align, 53645fb6ceaSAnton Blanchard unsigned long end_pfn) 537ab1f9dacSPaul Mackerras { 53845fb6ceaSAnton Blanchard int new_nid; 53945fb6ceaSAnton Blanchard unsigned long ret = lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); 540ab1f9dacSPaul Mackerras 541ab1f9dacSPaul Mackerras /* retry over all memory */ 542ab1f9dacSPaul Mackerras if (!ret) 543ab1f9dacSPaul Mackerras ret = lmb_alloc_base(size, align, lmb_end_of_DRAM()); 544ab1f9dacSPaul Mackerras 545ab1f9dacSPaul Mackerras if (!ret) 546ab1f9dacSPaul Mackerras panic("numa.c: cannot allocate %lu bytes on node %d", 547ab1f9dacSPaul Mackerras size, nid); 548ab1f9dacSPaul Mackerras 549ab1f9dacSPaul Mackerras /* 550ab1f9dacSPaul Mackerras * If the memory came from a previously allocated node, we must 551ab1f9dacSPaul Mackerras * retry with the bootmem allocator. 552ab1f9dacSPaul Mackerras */ 55345fb6ceaSAnton Blanchard new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); 55445fb6ceaSAnton Blanchard if (new_nid < nid) { 55545fb6ceaSAnton Blanchard ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), 556ab1f9dacSPaul Mackerras size, align, 0); 557ab1f9dacSPaul Mackerras 558ab1f9dacSPaul Mackerras if (!ret) 559ab1f9dacSPaul Mackerras panic("numa.c: cannot allocate %lu bytes on node %d", 56045fb6ceaSAnton Blanchard size, new_nid); 561ab1f9dacSPaul Mackerras 56245fb6ceaSAnton Blanchard ret = __pa(ret); 563ab1f9dacSPaul Mackerras 564ab1f9dacSPaul Mackerras dbg("alloc_bootmem %lx %lx\n", ret, size); 565ab1f9dacSPaul Mackerras } 566ab1f9dacSPaul Mackerras 56745fb6ceaSAnton Blanchard return (void *)ret; 568ab1f9dacSPaul Mackerras } 569ab1f9dacSPaul Mackerras 570ab1f9dacSPaul Mackerras void __init do_init_bootmem(void) 571ab1f9dacSPaul Mackerras { 572ab1f9dacSPaul Mackerras int nid; 57345fb6ceaSAnton Blanchard unsigned int i; 574ab1f9dacSPaul Mackerras static struct notifier_block ppc64_numa_nb = { 575ab1f9dacSPaul Mackerras .notifier_call = cpu_numa_callback, 576ab1f9dacSPaul Mackerras .priority = 1 /* Must run before sched domains notifier. */ 577ab1f9dacSPaul Mackerras }; 578ab1f9dacSPaul Mackerras 579ab1f9dacSPaul Mackerras min_low_pfn = 0; 580ab1f9dacSPaul Mackerras max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 581ab1f9dacSPaul Mackerras max_pfn = max_low_pfn; 582ab1f9dacSPaul Mackerras 583ab1f9dacSPaul Mackerras if (parse_numa_properties()) 584ab1f9dacSPaul Mackerras setup_nonnuma(); 585ab1f9dacSPaul Mackerras else 586ab1f9dacSPaul Mackerras dump_numa_topology(); 587ab1f9dacSPaul Mackerras 588ab1f9dacSPaul Mackerras register_cpu_notifier(&ppc64_numa_nb); 589ab1f9dacSPaul Mackerras 590ab1f9dacSPaul Mackerras for_each_online_node(nid) { 59145fb6ceaSAnton Blanchard unsigned long start_pfn, end_pfn, pages_present; 592ab1f9dacSPaul Mackerras unsigned long bootmem_paddr; 593ab1f9dacSPaul Mackerras unsigned long bootmap_pages; 594ab1f9dacSPaul Mackerras 59545fb6ceaSAnton Blanchard get_region(nid, &start_pfn, &end_pfn, &pages_present); 596ab1f9dacSPaul Mackerras 597ab1f9dacSPaul Mackerras /* Allocate the node structure node local if possible */ 59845fb6ceaSAnton Blanchard NODE_DATA(nid) = careful_allocation(nid, 599ab1f9dacSPaul Mackerras sizeof(struct pglist_data), 60045fb6ceaSAnton Blanchard SMP_CACHE_BYTES, end_pfn); 60145fb6ceaSAnton Blanchard NODE_DATA(nid) = __va(NODE_DATA(nid)); 602ab1f9dacSPaul Mackerras memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 603ab1f9dacSPaul Mackerras 604ab1f9dacSPaul Mackerras dbg("node %d\n", nid); 605ab1f9dacSPaul Mackerras dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); 606ab1f9dacSPaul Mackerras 607ab1f9dacSPaul Mackerras NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; 60845fb6ceaSAnton Blanchard NODE_DATA(nid)->node_start_pfn = start_pfn; 60945fb6ceaSAnton Blanchard NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 610ab1f9dacSPaul Mackerras 611ab1f9dacSPaul Mackerras if (NODE_DATA(nid)->node_spanned_pages == 0) 612ab1f9dacSPaul Mackerras continue; 613ab1f9dacSPaul Mackerras 61445fb6ceaSAnton Blanchard dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); 61545fb6ceaSAnton Blanchard dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); 616ab1f9dacSPaul Mackerras 61745fb6ceaSAnton Blanchard bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 61845fb6ceaSAnton Blanchard bootmem_paddr = (unsigned long)careful_allocation(nid, 619ab1f9dacSPaul Mackerras bootmap_pages << PAGE_SHIFT, 62045fb6ceaSAnton Blanchard PAGE_SIZE, end_pfn); 62145fb6ceaSAnton Blanchard memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT); 62245fb6ceaSAnton Blanchard 623ab1f9dacSPaul Mackerras dbg("bootmap_paddr = %lx\n", bootmem_paddr); 624ab1f9dacSPaul Mackerras 625ab1f9dacSPaul Mackerras init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, 62645fb6ceaSAnton Blanchard start_pfn, end_pfn); 627ab1f9dacSPaul Mackerras 62845fb6ceaSAnton Blanchard /* Add free regions on this node */ 62945fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 63045fb6ceaSAnton Blanchard unsigned long start, end; 631ab1f9dacSPaul Mackerras 63245fb6ceaSAnton Blanchard if (init_node_data[i].nid != nid) 633ab1f9dacSPaul Mackerras continue; 634ab1f9dacSPaul Mackerras 63545fb6ceaSAnton Blanchard start = init_node_data[i].start_pfn << PAGE_SHIFT; 63645fb6ceaSAnton Blanchard end = init_node_data[i].end_pfn << PAGE_SHIFT; 637ab1f9dacSPaul Mackerras 63845fb6ceaSAnton Blanchard dbg("free_bootmem %lx %lx\n", start, end - start); 63945fb6ceaSAnton Blanchard free_bootmem_node(NODE_DATA(nid), start, end - start); 640ab1f9dacSPaul Mackerras } 641ab1f9dacSPaul Mackerras 64245fb6ceaSAnton Blanchard /* Mark reserved regions on this node */ 643ab1f9dacSPaul Mackerras for (i = 0; i < lmb.reserved.cnt; i++) { 644ab1f9dacSPaul Mackerras unsigned long physbase = lmb.reserved.region[i].base; 645ab1f9dacSPaul Mackerras unsigned long size = lmb.reserved.region[i].size; 64645fb6ceaSAnton Blanchard unsigned long start_paddr = start_pfn << PAGE_SHIFT; 64745fb6ceaSAnton Blanchard unsigned long end_paddr = end_pfn << PAGE_SHIFT; 648ab1f9dacSPaul Mackerras 64945fb6ceaSAnton Blanchard if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid && 65045fb6ceaSAnton Blanchard early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid) 651ab1f9dacSPaul Mackerras continue; 652ab1f9dacSPaul Mackerras 653ab1f9dacSPaul Mackerras if (physbase < end_paddr && 654ab1f9dacSPaul Mackerras (physbase+size) > start_paddr) { 655ab1f9dacSPaul Mackerras /* overlaps */ 656ab1f9dacSPaul Mackerras if (physbase < start_paddr) { 657ab1f9dacSPaul Mackerras size -= start_paddr - physbase; 658ab1f9dacSPaul Mackerras physbase = start_paddr; 659ab1f9dacSPaul Mackerras } 660ab1f9dacSPaul Mackerras 661ab1f9dacSPaul Mackerras if (size > end_paddr - physbase) 662ab1f9dacSPaul Mackerras size = end_paddr - physbase; 663ab1f9dacSPaul Mackerras 664ab1f9dacSPaul Mackerras dbg("reserve_bootmem %lx %lx\n", physbase, 665ab1f9dacSPaul Mackerras size); 666ab1f9dacSPaul Mackerras reserve_bootmem_node(NODE_DATA(nid), physbase, 667ab1f9dacSPaul Mackerras size); 668ab1f9dacSPaul Mackerras } 669ab1f9dacSPaul Mackerras } 670ab1f9dacSPaul Mackerras 67145fb6ceaSAnton Blanchard /* Add regions into sparsemem */ 67245fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 67345fb6ceaSAnton Blanchard unsigned long start, end; 67445fb6ceaSAnton Blanchard 67545fb6ceaSAnton Blanchard if (init_node_data[i].nid != nid) 676ab1f9dacSPaul Mackerras continue; 677ab1f9dacSPaul Mackerras 67845fb6ceaSAnton Blanchard start = init_node_data[i].start_pfn; 67945fb6ceaSAnton Blanchard end = init_node_data[i].end_pfn; 680ab1f9dacSPaul Mackerras 68145fb6ceaSAnton Blanchard memory_present(nid, start, end); 682ab1f9dacSPaul Mackerras } 683ab1f9dacSPaul Mackerras } 684ab1f9dacSPaul Mackerras } 685ab1f9dacSPaul Mackerras 686ab1f9dacSPaul Mackerras void __init paging_init(void) 687ab1f9dacSPaul Mackerras { 688ab1f9dacSPaul Mackerras unsigned long zones_size[MAX_NR_ZONES]; 689ab1f9dacSPaul Mackerras unsigned long zholes_size[MAX_NR_ZONES]; 690ab1f9dacSPaul Mackerras int nid; 691ab1f9dacSPaul Mackerras 692ab1f9dacSPaul Mackerras memset(zones_size, 0, sizeof(zones_size)); 693ab1f9dacSPaul Mackerras memset(zholes_size, 0, sizeof(zholes_size)); 694ab1f9dacSPaul Mackerras 695ab1f9dacSPaul Mackerras for_each_online_node(nid) { 69645fb6ceaSAnton Blanchard unsigned long start_pfn, end_pfn, pages_present; 697ab1f9dacSPaul Mackerras 69845fb6ceaSAnton Blanchard get_region(nid, &start_pfn, &end_pfn, &pages_present); 699ab1f9dacSPaul Mackerras 700ab1f9dacSPaul Mackerras zones_size[ZONE_DMA] = end_pfn - start_pfn; 70145fb6ceaSAnton Blanchard zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present; 702ab1f9dacSPaul Mackerras 703ab1f9dacSPaul Mackerras dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid, 704ab1f9dacSPaul Mackerras zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]); 705ab1f9dacSPaul Mackerras 70645fb6ceaSAnton Blanchard free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn, 70745fb6ceaSAnton Blanchard zholes_size); 708ab1f9dacSPaul Mackerras } 709ab1f9dacSPaul Mackerras } 710ab1f9dacSPaul Mackerras 711ab1f9dacSPaul Mackerras static int __init early_numa(char *p) 712ab1f9dacSPaul Mackerras { 713ab1f9dacSPaul Mackerras if (!p) 714ab1f9dacSPaul Mackerras return 0; 715ab1f9dacSPaul Mackerras 716ab1f9dacSPaul Mackerras if (strstr(p, "off")) 717ab1f9dacSPaul Mackerras numa_enabled = 0; 718ab1f9dacSPaul Mackerras 719ab1f9dacSPaul Mackerras if (strstr(p, "debug")) 720ab1f9dacSPaul Mackerras numa_debug = 1; 721ab1f9dacSPaul Mackerras 722ab1f9dacSPaul Mackerras return 0; 723ab1f9dacSPaul Mackerras } 724ab1f9dacSPaul Mackerras early_param("numa", early_numa); 725237a0989SMike Kravetz 726237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG 727237a0989SMike Kravetz /* 728237a0989SMike Kravetz * Find the node associated with a hot added memory section. Section 729237a0989SMike Kravetz * corresponds to a SPARSEMEM section, not an LMB. It is assumed that 730237a0989SMike Kravetz * sections are fully contained within a single LMB. 731237a0989SMike Kravetz */ 732237a0989SMike Kravetz int hot_add_scn_to_nid(unsigned long scn_addr) 733237a0989SMike Kravetz { 734237a0989SMike Kravetz struct device_node *memory = NULL; 735237a0989SMike Kravetz 736237a0989SMike Kravetz if (!numa_enabled || (min_common_depth < 0)) 737237a0989SMike Kravetz return 0; 738237a0989SMike Kravetz 739237a0989SMike Kravetz while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 740237a0989SMike Kravetz unsigned long start, size; 741237a0989SMike Kravetz int numa_domain, ranges; 742237a0989SMike Kravetz unsigned int *memcell_buf; 743237a0989SMike Kravetz unsigned int len; 744237a0989SMike Kravetz 745237a0989SMike Kravetz memcell_buf = (unsigned int *)get_property(memory, "reg", &len); 746237a0989SMike Kravetz if (!memcell_buf || len <= 0) 747237a0989SMike Kravetz continue; 748237a0989SMike Kravetz 749237a0989SMike Kravetz ranges = memory->n_addrs; /* ranges in cell */ 750237a0989SMike Kravetz ha_new_range: 751237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 752237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 753237a0989SMike Kravetz numa_domain = of_node_numa_domain(memory); 754237a0989SMike Kravetz 755237a0989SMike Kravetz /* Domains not present at boot default to 0 */ 756237a0989SMike Kravetz if (!node_online(numa_domain)) 757237a0989SMike Kravetz numa_domain = any_online_node(NODE_MASK_ALL); 758237a0989SMike Kravetz 759237a0989SMike Kravetz if ((scn_addr >= start) && (scn_addr < (start + size))) { 760237a0989SMike Kravetz of_node_put(memory); 761237a0989SMike Kravetz return numa_domain; 762237a0989SMike Kravetz } 763237a0989SMike Kravetz 764237a0989SMike Kravetz if (--ranges) /* process all ranges in cell */ 765237a0989SMike Kravetz goto ha_new_range; 766237a0989SMike Kravetz } 767237a0989SMike Kravetz 768237a0989SMike Kravetz BUG(); /* section address should be found above */ 769237a0989SMike Kravetz return 0; 770237a0989SMike Kravetz } 771237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */ 772