1ab1f9dacSPaul Mackerras /* 2ab1f9dacSPaul Mackerras * pSeries NUMA support 3ab1f9dacSPaul Mackerras * 4ab1f9dacSPaul Mackerras * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 5ab1f9dacSPaul Mackerras * 6ab1f9dacSPaul Mackerras * This program is free software; you can redistribute it and/or 7ab1f9dacSPaul Mackerras * modify it under the terms of the GNU General Public License 8ab1f9dacSPaul Mackerras * as published by the Free Software Foundation; either version 9ab1f9dacSPaul Mackerras * 2 of the License, or (at your option) any later version. 10ab1f9dacSPaul Mackerras */ 11ab1f9dacSPaul Mackerras #include <linux/threads.h> 12ab1f9dacSPaul Mackerras #include <linux/bootmem.h> 13ab1f9dacSPaul Mackerras #include <linux/init.h> 14ab1f9dacSPaul Mackerras #include <linux/mm.h> 15ab1f9dacSPaul Mackerras #include <linux/mmzone.h> 16ab1f9dacSPaul Mackerras #include <linux/module.h> 17ab1f9dacSPaul Mackerras #include <linux/nodemask.h> 18ab1f9dacSPaul Mackerras #include <linux/cpu.h> 19ab1f9dacSPaul Mackerras #include <linux/notifier.h> 2045fb6ceaSAnton Blanchard #include <asm/sparsemem.h> 21ab1f9dacSPaul Mackerras #include <asm/lmb.h> 22cf00a8d1SPaul Mackerras #include <asm/system.h> 232249ca9dSPaul Mackerras #include <asm/smp.h> 24ab1f9dacSPaul Mackerras 25ab1f9dacSPaul Mackerras static int numa_enabled = 1; 26ab1f9dacSPaul Mackerras 27ab1f9dacSPaul Mackerras static int numa_debug; 28ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 29ab1f9dacSPaul Mackerras 3045fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS]; 31ab1f9dacSPaul Mackerras cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; 32ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES]; 3345fb6ceaSAnton Blanchard 3445fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table); 3545fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpumask_lookup_table); 3645fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data); 3745fb6ceaSAnton Blanchard 3845fb6ceaSAnton Blanchard static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; 39ab1f9dacSPaul Mackerras static int min_common_depth; 40237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells; 41ab1f9dacSPaul Mackerras 42ab1f9dacSPaul Mackerras /* 4345fb6ceaSAnton Blanchard * We need somewhere to store start/end/node for each region until we have 44ab1f9dacSPaul Mackerras * allocated the real node_data structures. 45ab1f9dacSPaul Mackerras */ 4645fb6ceaSAnton Blanchard #define MAX_REGIONS (MAX_LMB_REGIONS*2) 47ab1f9dacSPaul Mackerras static struct { 4845fb6ceaSAnton Blanchard unsigned long start_pfn; 4945fb6ceaSAnton Blanchard unsigned long end_pfn; 5045fb6ceaSAnton Blanchard int nid; 5145fb6ceaSAnton Blanchard } init_node_data[MAX_REGIONS] __initdata; 52ab1f9dacSPaul Mackerras 5345fb6ceaSAnton Blanchard int __init early_pfn_to_nid(unsigned long pfn) 5445fb6ceaSAnton Blanchard { 5545fb6ceaSAnton Blanchard unsigned int i; 5645fb6ceaSAnton Blanchard 5745fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 5845fb6ceaSAnton Blanchard unsigned long start_pfn = init_node_data[i].start_pfn; 5945fb6ceaSAnton Blanchard unsigned long end_pfn = init_node_data[i].end_pfn; 6045fb6ceaSAnton Blanchard 6145fb6ceaSAnton Blanchard if ((start_pfn <= pfn) && (pfn < end_pfn)) 6245fb6ceaSAnton Blanchard return init_node_data[i].nid; 6345fb6ceaSAnton Blanchard } 6445fb6ceaSAnton Blanchard 6545fb6ceaSAnton Blanchard return -1; 6645fb6ceaSAnton Blanchard } 6745fb6ceaSAnton Blanchard 6845fb6ceaSAnton Blanchard void __init add_region(unsigned int nid, unsigned long start_pfn, 6945fb6ceaSAnton Blanchard unsigned long pages) 7045fb6ceaSAnton Blanchard { 7145fb6ceaSAnton Blanchard unsigned int i; 7245fb6ceaSAnton Blanchard 7345fb6ceaSAnton Blanchard dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n", 7445fb6ceaSAnton Blanchard nid, start_pfn, pages); 7545fb6ceaSAnton Blanchard 7645fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 7745fb6ceaSAnton Blanchard if (init_node_data[i].nid != nid) 7845fb6ceaSAnton Blanchard continue; 7945fb6ceaSAnton Blanchard if (init_node_data[i].end_pfn == start_pfn) { 8045fb6ceaSAnton Blanchard init_node_data[i].end_pfn += pages; 8145fb6ceaSAnton Blanchard return; 8245fb6ceaSAnton Blanchard } 8345fb6ceaSAnton Blanchard if (init_node_data[i].start_pfn == (start_pfn + pages)) { 8445fb6ceaSAnton Blanchard init_node_data[i].start_pfn -= pages; 8545fb6ceaSAnton Blanchard return; 8645fb6ceaSAnton Blanchard } 8745fb6ceaSAnton Blanchard } 8845fb6ceaSAnton Blanchard 8945fb6ceaSAnton Blanchard /* 9045fb6ceaSAnton Blanchard * Leave last entry NULL so we dont iterate off the end (we use 9145fb6ceaSAnton Blanchard * entry.end_pfn to terminate the walk). 9245fb6ceaSAnton Blanchard */ 9345fb6ceaSAnton Blanchard if (i >= (MAX_REGIONS - 1)) { 9445fb6ceaSAnton Blanchard printk(KERN_ERR "WARNING: too many memory regions in " 9545fb6ceaSAnton Blanchard "numa code, truncating\n"); 9645fb6ceaSAnton Blanchard return; 9745fb6ceaSAnton Blanchard } 9845fb6ceaSAnton Blanchard 9945fb6ceaSAnton Blanchard init_node_data[i].start_pfn = start_pfn; 10045fb6ceaSAnton Blanchard init_node_data[i].end_pfn = start_pfn + pages; 10145fb6ceaSAnton Blanchard init_node_data[i].nid = nid; 10245fb6ceaSAnton Blanchard } 10345fb6ceaSAnton Blanchard 10445fb6ceaSAnton Blanchard /* We assume init_node_data has no overlapping regions */ 10545fb6ceaSAnton Blanchard void __init get_region(unsigned int nid, unsigned long *start_pfn, 10645fb6ceaSAnton Blanchard unsigned long *end_pfn, unsigned long *pages_present) 10745fb6ceaSAnton Blanchard { 10845fb6ceaSAnton Blanchard unsigned int i; 10945fb6ceaSAnton Blanchard 11045fb6ceaSAnton Blanchard *start_pfn = -1UL; 11145fb6ceaSAnton Blanchard *end_pfn = *pages_present = 0; 11245fb6ceaSAnton Blanchard 11345fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 11445fb6ceaSAnton Blanchard if (init_node_data[i].nid != nid) 11545fb6ceaSAnton Blanchard continue; 11645fb6ceaSAnton Blanchard 11745fb6ceaSAnton Blanchard *pages_present += init_node_data[i].end_pfn - 11845fb6ceaSAnton Blanchard init_node_data[i].start_pfn; 11945fb6ceaSAnton Blanchard 12045fb6ceaSAnton Blanchard if (init_node_data[i].start_pfn < *start_pfn) 12145fb6ceaSAnton Blanchard *start_pfn = init_node_data[i].start_pfn; 12245fb6ceaSAnton Blanchard 12345fb6ceaSAnton Blanchard if (init_node_data[i].end_pfn > *end_pfn) 12445fb6ceaSAnton Blanchard *end_pfn = init_node_data[i].end_pfn; 12545fb6ceaSAnton Blanchard } 12645fb6ceaSAnton Blanchard 12745fb6ceaSAnton Blanchard /* We didnt find a matching region, return start/end as 0 */ 12845fb6ceaSAnton Blanchard if (*start_pfn == -1UL) 1296d91bb93SMike Kravetz *start_pfn = 0; 13045fb6ceaSAnton Blanchard } 131ab1f9dacSPaul Mackerras 1322e5ce39dSNathan Lynch static void __cpuinit map_cpu_to_node(int cpu, int node) 133ab1f9dacSPaul Mackerras { 134ab1f9dacSPaul Mackerras numa_cpu_lookup_table[cpu] = node; 13545fb6ceaSAnton Blanchard 136bf4b85b0SNathan Lynch dbg("adding cpu %d to node %d\n", cpu, node); 137bf4b85b0SNathan Lynch 13845fb6ceaSAnton Blanchard if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) 139ab1f9dacSPaul Mackerras cpu_set(cpu, numa_cpumask_lookup_table[node]); 140ab1f9dacSPaul Mackerras } 141ab1f9dacSPaul Mackerras 142ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU 143ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu) 144ab1f9dacSPaul Mackerras { 145ab1f9dacSPaul Mackerras int node = numa_cpu_lookup_table[cpu]; 146ab1f9dacSPaul Mackerras 147ab1f9dacSPaul Mackerras dbg("removing cpu %lu from node %d\n", cpu, node); 148ab1f9dacSPaul Mackerras 149ab1f9dacSPaul Mackerras if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 150ab1f9dacSPaul Mackerras cpu_clear(cpu, numa_cpumask_lookup_table[node]); 151ab1f9dacSPaul Mackerras } else { 152ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 153ab1f9dacSPaul Mackerras cpu, node); 154ab1f9dacSPaul Mackerras } 155ab1f9dacSPaul Mackerras } 156ab1f9dacSPaul Mackerras #endif /* CONFIG_HOTPLUG_CPU */ 157ab1f9dacSPaul Mackerras 1582e5ce39dSNathan Lynch static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) 159ab1f9dacSPaul Mackerras { 160ab1f9dacSPaul Mackerras unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); 161ab1f9dacSPaul Mackerras struct device_node *cpu_node = NULL; 162ab1f9dacSPaul Mackerras unsigned int *interrupt_server, *reg; 163ab1f9dacSPaul Mackerras int len; 164ab1f9dacSPaul Mackerras 165ab1f9dacSPaul Mackerras while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { 166ab1f9dacSPaul Mackerras /* Try interrupt server first */ 167ab1f9dacSPaul Mackerras interrupt_server = (unsigned int *)get_property(cpu_node, 168ab1f9dacSPaul Mackerras "ibm,ppc-interrupt-server#s", &len); 169ab1f9dacSPaul Mackerras 170ab1f9dacSPaul Mackerras len = len / sizeof(u32); 171ab1f9dacSPaul Mackerras 172ab1f9dacSPaul Mackerras if (interrupt_server && (len > 0)) { 173ab1f9dacSPaul Mackerras while (len--) { 174ab1f9dacSPaul Mackerras if (interrupt_server[len] == hw_cpuid) 175ab1f9dacSPaul Mackerras return cpu_node; 176ab1f9dacSPaul Mackerras } 177ab1f9dacSPaul Mackerras } else { 178ab1f9dacSPaul Mackerras reg = (unsigned int *)get_property(cpu_node, 179ab1f9dacSPaul Mackerras "reg", &len); 180ab1f9dacSPaul Mackerras if (reg && (len > 0) && (reg[0] == hw_cpuid)) 181ab1f9dacSPaul Mackerras return cpu_node; 182ab1f9dacSPaul Mackerras } 183ab1f9dacSPaul Mackerras } 184ab1f9dacSPaul Mackerras 185ab1f9dacSPaul Mackerras return NULL; 186ab1f9dacSPaul Mackerras } 187ab1f9dacSPaul Mackerras 188ab1f9dacSPaul Mackerras /* must hold reference to node during call */ 189ab1f9dacSPaul Mackerras static int *of_get_associativity(struct device_node *dev) 190ab1f9dacSPaul Mackerras { 191ab1f9dacSPaul Mackerras return (unsigned int *)get_property(dev, "ibm,associativity", NULL); 192ab1f9dacSPaul Mackerras } 193ab1f9dacSPaul Mackerras 194cf950b7aSNathan Lynch static int of_node_to_nid(struct device_node *device) 195ab1f9dacSPaul Mackerras { 196cf950b7aSNathan Lynch int nid; 197ab1f9dacSPaul Mackerras unsigned int *tmp; 198ab1f9dacSPaul Mackerras 199ab1f9dacSPaul Mackerras if (min_common_depth == -1) 200ab1f9dacSPaul Mackerras return 0; 201ab1f9dacSPaul Mackerras 202ab1f9dacSPaul Mackerras tmp = of_get_associativity(device); 203ab1f9dacSPaul Mackerras if (tmp && (tmp[0] >= min_common_depth)) { 204cf950b7aSNathan Lynch nid = tmp[min_common_depth]; 205ab1f9dacSPaul Mackerras } else { 206ab1f9dacSPaul Mackerras dbg("WARNING: no NUMA information for %s\n", 207ab1f9dacSPaul Mackerras device->full_name); 208cf950b7aSNathan Lynch nid = 0; 209ab1f9dacSPaul Mackerras } 210*bc16a759SNathan Lynch 211*bc16a759SNathan Lynch /* POWER4 LPAR uses 0xffff as invalid node */ 212*bc16a759SNathan Lynch if (nid == 0xffff) 213*bc16a759SNathan Lynch nid = 0; 214*bc16a759SNathan Lynch 215cf950b7aSNathan Lynch return nid; 216ab1f9dacSPaul Mackerras } 217ab1f9dacSPaul Mackerras 218ab1f9dacSPaul Mackerras /* 219ab1f9dacSPaul Mackerras * In theory, the "ibm,associativity" property may contain multiple 220ab1f9dacSPaul Mackerras * associativity lists because a resource may be multiply connected 221ab1f9dacSPaul Mackerras * into the machine. This resource then has different associativity 222ab1f9dacSPaul Mackerras * characteristics relative to its multiple connections. We ignore 223ab1f9dacSPaul Mackerras * this for now. We also assume that all cpu and memory sets have 224ab1f9dacSPaul Mackerras * their distances represented at a common level. This won't be 225ab1f9dacSPaul Mackerras * true for heirarchical NUMA. 226ab1f9dacSPaul Mackerras * 227ab1f9dacSPaul Mackerras * In any case the ibm,associativity-reference-points should give 228ab1f9dacSPaul Mackerras * the correct depth for a normal NUMA system. 229ab1f9dacSPaul Mackerras * 230ab1f9dacSPaul Mackerras * - Dave Hansen <haveblue@us.ibm.com> 231ab1f9dacSPaul Mackerras */ 232ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void) 233ab1f9dacSPaul Mackerras { 234ab1f9dacSPaul Mackerras int depth; 235ab1f9dacSPaul Mackerras unsigned int *ref_points; 236ab1f9dacSPaul Mackerras struct device_node *rtas_root; 237ab1f9dacSPaul Mackerras unsigned int len; 238ab1f9dacSPaul Mackerras 239ab1f9dacSPaul Mackerras rtas_root = of_find_node_by_path("/rtas"); 240ab1f9dacSPaul Mackerras 241ab1f9dacSPaul Mackerras if (!rtas_root) 242ab1f9dacSPaul Mackerras return -1; 243ab1f9dacSPaul Mackerras 244ab1f9dacSPaul Mackerras /* 245ab1f9dacSPaul Mackerras * this property is 2 32-bit integers, each representing a level of 246ab1f9dacSPaul Mackerras * depth in the associativity nodes. The first is for an SMP 247ab1f9dacSPaul Mackerras * configuration (should be all 0's) and the second is for a normal 248ab1f9dacSPaul Mackerras * NUMA configuration. 249ab1f9dacSPaul Mackerras */ 250ab1f9dacSPaul Mackerras ref_points = (unsigned int *)get_property(rtas_root, 251ab1f9dacSPaul Mackerras "ibm,associativity-reference-points", &len); 252ab1f9dacSPaul Mackerras 253ab1f9dacSPaul Mackerras if ((len >= 1) && ref_points) { 254ab1f9dacSPaul Mackerras depth = ref_points[1]; 255ab1f9dacSPaul Mackerras } else { 256bf4b85b0SNathan Lynch dbg("NUMA: ibm,associativity-reference-points not found.\n"); 257ab1f9dacSPaul Mackerras depth = -1; 258ab1f9dacSPaul Mackerras } 259ab1f9dacSPaul Mackerras of_node_put(rtas_root); 260ab1f9dacSPaul Mackerras 261ab1f9dacSPaul Mackerras return depth; 262ab1f9dacSPaul Mackerras } 263ab1f9dacSPaul Mackerras 26484c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 265ab1f9dacSPaul Mackerras { 266ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 267ab1f9dacSPaul Mackerras 268ab1f9dacSPaul Mackerras memory = of_find_node_by_type(memory, "memory"); 26954c23310SPaul Mackerras if (!memory) 27084c9fdd1SMike Kravetz panic("numa.c: No memory nodes found!"); 27154c23310SPaul Mackerras 27284c9fdd1SMike Kravetz *n_addr_cells = prom_n_addr_cells(memory); 27384c9fdd1SMike Kravetz *n_size_cells = prom_n_size_cells(memory); 27484c9fdd1SMike Kravetz of_node_put(memory); 275ab1f9dacSPaul Mackerras } 276ab1f9dacSPaul Mackerras 277237a0989SMike Kravetz static unsigned long __devinit read_n_cells(int n, unsigned int **buf) 278ab1f9dacSPaul Mackerras { 279ab1f9dacSPaul Mackerras unsigned long result = 0; 280ab1f9dacSPaul Mackerras 281ab1f9dacSPaul Mackerras while (n--) { 282ab1f9dacSPaul Mackerras result = (result << 32) | **buf; 283ab1f9dacSPaul Mackerras (*buf)++; 284ab1f9dacSPaul Mackerras } 285ab1f9dacSPaul Mackerras return result; 286ab1f9dacSPaul Mackerras } 287ab1f9dacSPaul Mackerras 288ab1f9dacSPaul Mackerras /* 289ab1f9dacSPaul Mackerras * Figure out to which domain a cpu belongs and stick it there. 290ab1f9dacSPaul Mackerras * Return the id of the domain used. 291ab1f9dacSPaul Mackerras */ 2922e5ce39dSNathan Lynch static int __cpuinit numa_setup_cpu(unsigned long lcpu) 293ab1f9dacSPaul Mackerras { 294cf950b7aSNathan Lynch int nid = 0; 295ab1f9dacSPaul Mackerras struct device_node *cpu = find_cpu_node(lcpu); 296ab1f9dacSPaul Mackerras 297ab1f9dacSPaul Mackerras if (!cpu) { 298ab1f9dacSPaul Mackerras WARN_ON(1); 299ab1f9dacSPaul Mackerras goto out; 300ab1f9dacSPaul Mackerras } 301ab1f9dacSPaul Mackerras 302cf950b7aSNathan Lynch nid = of_node_to_nid(cpu); 303ab1f9dacSPaul Mackerras 304cf950b7aSNathan Lynch if (nid >= num_online_nodes()) { 305ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: cpu %ld " 306ab1f9dacSPaul Mackerras "maps to invalid NUMA node %d\n", 307cf950b7aSNathan Lynch lcpu, nid); 308cf950b7aSNathan Lynch nid = 0; 309ab1f9dacSPaul Mackerras } 310ab1f9dacSPaul Mackerras out: 311cf950b7aSNathan Lynch node_set_online(nid); 312ab1f9dacSPaul Mackerras 313cf950b7aSNathan Lynch map_cpu_to_node(lcpu, nid); 314ab1f9dacSPaul Mackerras 315ab1f9dacSPaul Mackerras of_node_put(cpu); 316ab1f9dacSPaul Mackerras 317cf950b7aSNathan Lynch return nid; 318ab1f9dacSPaul Mackerras } 319ab1f9dacSPaul Mackerras 320ab1f9dacSPaul Mackerras static int cpu_numa_callback(struct notifier_block *nfb, 321ab1f9dacSPaul Mackerras unsigned long action, 322ab1f9dacSPaul Mackerras void *hcpu) 323ab1f9dacSPaul Mackerras { 324ab1f9dacSPaul Mackerras unsigned long lcpu = (unsigned long)hcpu; 325ab1f9dacSPaul Mackerras int ret = NOTIFY_DONE; 326ab1f9dacSPaul Mackerras 327ab1f9dacSPaul Mackerras switch (action) { 328ab1f9dacSPaul Mackerras case CPU_UP_PREPARE: 329ab1f9dacSPaul Mackerras if (min_common_depth == -1 || !numa_enabled) 330ab1f9dacSPaul Mackerras map_cpu_to_node(lcpu, 0); 331ab1f9dacSPaul Mackerras else 332ab1f9dacSPaul Mackerras numa_setup_cpu(lcpu); 333ab1f9dacSPaul Mackerras ret = NOTIFY_OK; 334ab1f9dacSPaul Mackerras break; 335ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU 336ab1f9dacSPaul Mackerras case CPU_DEAD: 337ab1f9dacSPaul Mackerras case CPU_UP_CANCELED: 338ab1f9dacSPaul Mackerras unmap_cpu_from_node(lcpu); 339ab1f9dacSPaul Mackerras break; 340ab1f9dacSPaul Mackerras ret = NOTIFY_OK; 341ab1f9dacSPaul Mackerras #endif 342ab1f9dacSPaul Mackerras } 343ab1f9dacSPaul Mackerras return ret; 344ab1f9dacSPaul Mackerras } 345ab1f9dacSPaul Mackerras 346ab1f9dacSPaul Mackerras /* 347ab1f9dacSPaul Mackerras * Check and possibly modify a memory region to enforce the memory limit. 348ab1f9dacSPaul Mackerras * 349ab1f9dacSPaul Mackerras * Returns the size the region should have to enforce the memory limit. 350ab1f9dacSPaul Mackerras * This will either be the original value of size, a truncated value, 351ab1f9dacSPaul Mackerras * or zero. If the returned value of size is 0 the region should be 352ab1f9dacSPaul Mackerras * discarded as it lies wholy above the memory limit. 353ab1f9dacSPaul Mackerras */ 35445fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start, 35545fb6ceaSAnton Blanchard unsigned long size) 356ab1f9dacSPaul Mackerras { 357ab1f9dacSPaul Mackerras /* 358ab1f9dacSPaul Mackerras * We use lmb_end_of_DRAM() in here instead of memory_limit because 359ab1f9dacSPaul Mackerras * we've already adjusted it for the limit and it takes care of 360ab1f9dacSPaul Mackerras * having memory holes below the limit. 361ab1f9dacSPaul Mackerras */ 362ab1f9dacSPaul Mackerras 363ab1f9dacSPaul Mackerras if (! memory_limit) 364ab1f9dacSPaul Mackerras return size; 365ab1f9dacSPaul Mackerras 366ab1f9dacSPaul Mackerras if (start + size <= lmb_end_of_DRAM()) 367ab1f9dacSPaul Mackerras return size; 368ab1f9dacSPaul Mackerras 369ab1f9dacSPaul Mackerras if (start >= lmb_end_of_DRAM()) 370ab1f9dacSPaul Mackerras return 0; 371ab1f9dacSPaul Mackerras 372ab1f9dacSPaul Mackerras return lmb_end_of_DRAM() - start; 373ab1f9dacSPaul Mackerras } 374ab1f9dacSPaul Mackerras 375ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void) 376ab1f9dacSPaul Mackerras { 377ab1f9dacSPaul Mackerras struct device_node *cpu = NULL; 378ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 379c08888cfSNathan Lynch int max_domain = 0; 380ab1f9dacSPaul Mackerras unsigned long i; 381ab1f9dacSPaul Mackerras 382ab1f9dacSPaul Mackerras if (numa_enabled == 0) { 383ab1f9dacSPaul Mackerras printk(KERN_WARNING "NUMA disabled by user\n"); 384ab1f9dacSPaul Mackerras return -1; 385ab1f9dacSPaul Mackerras } 386ab1f9dacSPaul Mackerras 387ab1f9dacSPaul Mackerras min_common_depth = find_min_common_depth(); 388ab1f9dacSPaul Mackerras 389ab1f9dacSPaul Mackerras if (min_common_depth < 0) 390ab1f9dacSPaul Mackerras return min_common_depth; 391ab1f9dacSPaul Mackerras 392bf4b85b0SNathan Lynch dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 393bf4b85b0SNathan Lynch 394ab1f9dacSPaul Mackerras /* 395ab1f9dacSPaul Mackerras * Even though we connect cpus to numa domains later in SMP init, 396ab1f9dacSPaul Mackerras * we need to know the maximum node id now. This is because each 397ab1f9dacSPaul Mackerras * node id must have NODE_DATA etc backing it. 398ab1f9dacSPaul Mackerras * As a result of hotplug we could still have cpus appear later on 399ab1f9dacSPaul Mackerras * with larger node ids. In that case we force the cpu into node 0. 400ab1f9dacSPaul Mackerras */ 401ab1f9dacSPaul Mackerras for_each_cpu(i) { 402cf950b7aSNathan Lynch int nid; 403ab1f9dacSPaul Mackerras 404ab1f9dacSPaul Mackerras cpu = find_cpu_node(i); 405ab1f9dacSPaul Mackerras 406ab1f9dacSPaul Mackerras if (cpu) { 407cf950b7aSNathan Lynch nid = of_node_to_nid(cpu); 408ab1f9dacSPaul Mackerras of_node_put(cpu); 409ab1f9dacSPaul Mackerras 410cf950b7aSNathan Lynch if (nid < MAX_NUMNODES && 411cf950b7aSNathan Lynch max_domain < nid) 412cf950b7aSNathan Lynch max_domain = nid; 413ab1f9dacSPaul Mackerras } 414ab1f9dacSPaul Mackerras } 415ab1f9dacSPaul Mackerras 416237a0989SMike Kravetz get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 417ab1f9dacSPaul Mackerras memory = NULL; 418ab1f9dacSPaul Mackerras while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 419ab1f9dacSPaul Mackerras unsigned long start; 420ab1f9dacSPaul Mackerras unsigned long size; 421cf950b7aSNathan Lynch int nid; 422ab1f9dacSPaul Mackerras int ranges; 423ab1f9dacSPaul Mackerras unsigned int *memcell_buf; 424ab1f9dacSPaul Mackerras unsigned int len; 425ab1f9dacSPaul Mackerras 426ba759485SMichael Ellerman memcell_buf = (unsigned int *)get_property(memory, 427ba759485SMichael Ellerman "linux,usable-memory", &len); 428ba759485SMichael Ellerman if (!memcell_buf || len <= 0) 429ba759485SMichael Ellerman memcell_buf = 430ba759485SMichael Ellerman (unsigned int *)get_property(memory, "reg", 431ba759485SMichael Ellerman &len); 432ab1f9dacSPaul Mackerras if (!memcell_buf || len <= 0) 433ab1f9dacSPaul Mackerras continue; 434ab1f9dacSPaul Mackerras 435cc5d0189SBenjamin Herrenschmidt /* ranges in cell */ 436cc5d0189SBenjamin Herrenschmidt ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 437ab1f9dacSPaul Mackerras new_range: 438ab1f9dacSPaul Mackerras /* these are order-sensitive, and modify the buffer pointer */ 439237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 440237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 441ab1f9dacSPaul Mackerras 442cf950b7aSNathan Lynch nid = of_node_to_nid(memory); 443ab1f9dacSPaul Mackerras 444cf950b7aSNathan Lynch if (nid >= MAX_NUMNODES) { 445ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: memory at %lx maps " 446ab1f9dacSPaul Mackerras "to invalid NUMA node %d\n", start, 447cf950b7aSNathan Lynch nid); 448cf950b7aSNathan Lynch nid = 0; 449ab1f9dacSPaul Mackerras } 450ab1f9dacSPaul Mackerras 451cf950b7aSNathan Lynch if (max_domain < nid) 452cf950b7aSNathan Lynch max_domain = nid; 453ab1f9dacSPaul Mackerras 454ab1f9dacSPaul Mackerras if (!(size = numa_enforce_memory_limit(start, size))) { 455ab1f9dacSPaul Mackerras if (--ranges) 456ab1f9dacSPaul Mackerras goto new_range; 457ab1f9dacSPaul Mackerras else 458ab1f9dacSPaul Mackerras continue; 459ab1f9dacSPaul Mackerras } 460ab1f9dacSPaul Mackerras 461cf950b7aSNathan Lynch add_region(nid, start >> PAGE_SHIFT, 46245fb6ceaSAnton Blanchard size >> PAGE_SHIFT); 463ab1f9dacSPaul Mackerras 464ab1f9dacSPaul Mackerras if (--ranges) 465ab1f9dacSPaul Mackerras goto new_range; 466ab1f9dacSPaul Mackerras } 467ab1f9dacSPaul Mackerras 468ab1f9dacSPaul Mackerras for (i = 0; i <= max_domain; i++) 469ab1f9dacSPaul Mackerras node_set_online(i); 470ab1f9dacSPaul Mackerras 471c08888cfSNathan Lynch max_domain = numa_setup_cpu(boot_cpuid); 472c08888cfSNathan Lynch 473ab1f9dacSPaul Mackerras return 0; 474ab1f9dacSPaul Mackerras } 475ab1f9dacSPaul Mackerras 476ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void) 477ab1f9dacSPaul Mackerras { 478ab1f9dacSPaul Mackerras unsigned long top_of_ram = lmb_end_of_DRAM(); 479ab1f9dacSPaul Mackerras unsigned long total_ram = lmb_phys_mem_size(); 480fb6d73d3SPaul Mackerras unsigned int i; 481ab1f9dacSPaul Mackerras 482ab1f9dacSPaul Mackerras printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 483ab1f9dacSPaul Mackerras top_of_ram, total_ram); 484ab1f9dacSPaul Mackerras printk(KERN_INFO "Memory hole size: %ldMB\n", 485ab1f9dacSPaul Mackerras (top_of_ram - total_ram) >> 20); 486ab1f9dacSPaul Mackerras 487ab1f9dacSPaul Mackerras map_cpu_to_node(boot_cpuid, 0); 488fb6d73d3SPaul Mackerras for (i = 0; i < lmb.memory.cnt; ++i) 489fb6d73d3SPaul Mackerras add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, 490fb6d73d3SPaul Mackerras lmb_size_pages(&lmb.memory, i)); 491ab1f9dacSPaul Mackerras node_set_online(0); 492ab1f9dacSPaul Mackerras } 493ab1f9dacSPaul Mackerras 4944b703a23SAnton Blanchard void __init dump_numa_cpu_topology(void) 4954b703a23SAnton Blanchard { 4964b703a23SAnton Blanchard unsigned int node; 4974b703a23SAnton Blanchard unsigned int cpu, count; 4984b703a23SAnton Blanchard 4994b703a23SAnton Blanchard if (min_common_depth == -1 || !numa_enabled) 5004b703a23SAnton Blanchard return; 5014b703a23SAnton Blanchard 5024b703a23SAnton Blanchard for_each_online_node(node) { 5034b703a23SAnton Blanchard printk(KERN_INFO "Node %d CPUs:", node); 5044b703a23SAnton Blanchard 5054b703a23SAnton Blanchard count = 0; 5064b703a23SAnton Blanchard /* 5074b703a23SAnton Blanchard * If we used a CPU iterator here we would miss printing 5084b703a23SAnton Blanchard * the holes in the cpumap. 5094b703a23SAnton Blanchard */ 5104b703a23SAnton Blanchard for (cpu = 0; cpu < NR_CPUS; cpu++) { 5114b703a23SAnton Blanchard if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 5124b703a23SAnton Blanchard if (count == 0) 5134b703a23SAnton Blanchard printk(" %u", cpu); 5144b703a23SAnton Blanchard ++count; 5154b703a23SAnton Blanchard } else { 5164b703a23SAnton Blanchard if (count > 1) 5174b703a23SAnton Blanchard printk("-%u", cpu - 1); 5184b703a23SAnton Blanchard count = 0; 5194b703a23SAnton Blanchard } 5204b703a23SAnton Blanchard } 5214b703a23SAnton Blanchard 5224b703a23SAnton Blanchard if (count > 1) 5234b703a23SAnton Blanchard printk("-%u", NR_CPUS - 1); 5244b703a23SAnton Blanchard printk("\n"); 5254b703a23SAnton Blanchard } 5264b703a23SAnton Blanchard } 5274b703a23SAnton Blanchard 5284b703a23SAnton Blanchard static void __init dump_numa_memory_topology(void) 529ab1f9dacSPaul Mackerras { 530ab1f9dacSPaul Mackerras unsigned int node; 531ab1f9dacSPaul Mackerras unsigned int count; 532ab1f9dacSPaul Mackerras 533ab1f9dacSPaul Mackerras if (min_common_depth == -1 || !numa_enabled) 534ab1f9dacSPaul Mackerras return; 535ab1f9dacSPaul Mackerras 536ab1f9dacSPaul Mackerras for_each_online_node(node) { 537ab1f9dacSPaul Mackerras unsigned long i; 538ab1f9dacSPaul Mackerras 539ab1f9dacSPaul Mackerras printk(KERN_INFO "Node %d Memory:", node); 540ab1f9dacSPaul Mackerras 541ab1f9dacSPaul Mackerras count = 0; 542ab1f9dacSPaul Mackerras 54345fb6ceaSAnton Blanchard for (i = 0; i < lmb_end_of_DRAM(); 54445fb6ceaSAnton Blanchard i += (1 << SECTION_SIZE_BITS)) { 54545fb6ceaSAnton Blanchard if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { 546ab1f9dacSPaul Mackerras if (count == 0) 547ab1f9dacSPaul Mackerras printk(" 0x%lx", i); 548ab1f9dacSPaul Mackerras ++count; 549ab1f9dacSPaul Mackerras } else { 550ab1f9dacSPaul Mackerras if (count > 0) 551ab1f9dacSPaul Mackerras printk("-0x%lx", i); 552ab1f9dacSPaul Mackerras count = 0; 553ab1f9dacSPaul Mackerras } 554ab1f9dacSPaul Mackerras } 555ab1f9dacSPaul Mackerras 556ab1f9dacSPaul Mackerras if (count > 0) 557ab1f9dacSPaul Mackerras printk("-0x%lx", i); 558ab1f9dacSPaul Mackerras printk("\n"); 559ab1f9dacSPaul Mackerras } 560ab1f9dacSPaul Mackerras } 561ab1f9dacSPaul Mackerras 562ab1f9dacSPaul Mackerras /* 563ab1f9dacSPaul Mackerras * Allocate some memory, satisfying the lmb or bootmem allocator where 564ab1f9dacSPaul Mackerras * required. nid is the preferred node and end is the physical address of 565ab1f9dacSPaul Mackerras * the highest address in the node. 566ab1f9dacSPaul Mackerras * 567ab1f9dacSPaul Mackerras * Returns the physical address of the memory. 568ab1f9dacSPaul Mackerras */ 56945fb6ceaSAnton Blanchard static void __init *careful_allocation(int nid, unsigned long size, 57045fb6ceaSAnton Blanchard unsigned long align, 57145fb6ceaSAnton Blanchard unsigned long end_pfn) 572ab1f9dacSPaul Mackerras { 57345fb6ceaSAnton Blanchard int new_nid; 574d7a5b2ffSMichael Ellerman unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); 575ab1f9dacSPaul Mackerras 576ab1f9dacSPaul Mackerras /* retry over all memory */ 577ab1f9dacSPaul Mackerras if (!ret) 578d7a5b2ffSMichael Ellerman ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); 579ab1f9dacSPaul Mackerras 580ab1f9dacSPaul Mackerras if (!ret) 581ab1f9dacSPaul Mackerras panic("numa.c: cannot allocate %lu bytes on node %d", 582ab1f9dacSPaul Mackerras size, nid); 583ab1f9dacSPaul Mackerras 584ab1f9dacSPaul Mackerras /* 585ab1f9dacSPaul Mackerras * If the memory came from a previously allocated node, we must 586ab1f9dacSPaul Mackerras * retry with the bootmem allocator. 587ab1f9dacSPaul Mackerras */ 58845fb6ceaSAnton Blanchard new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); 58945fb6ceaSAnton Blanchard if (new_nid < nid) { 59045fb6ceaSAnton Blanchard ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), 591ab1f9dacSPaul Mackerras size, align, 0); 592ab1f9dacSPaul Mackerras 593ab1f9dacSPaul Mackerras if (!ret) 594ab1f9dacSPaul Mackerras panic("numa.c: cannot allocate %lu bytes on node %d", 59545fb6ceaSAnton Blanchard size, new_nid); 596ab1f9dacSPaul Mackerras 59745fb6ceaSAnton Blanchard ret = __pa(ret); 598ab1f9dacSPaul Mackerras 599ab1f9dacSPaul Mackerras dbg("alloc_bootmem %lx %lx\n", ret, size); 600ab1f9dacSPaul Mackerras } 601ab1f9dacSPaul Mackerras 60245fb6ceaSAnton Blanchard return (void *)ret; 603ab1f9dacSPaul Mackerras } 604ab1f9dacSPaul Mackerras 605ab1f9dacSPaul Mackerras void __init do_init_bootmem(void) 606ab1f9dacSPaul Mackerras { 607ab1f9dacSPaul Mackerras int nid; 60845fb6ceaSAnton Blanchard unsigned int i; 609ab1f9dacSPaul Mackerras static struct notifier_block ppc64_numa_nb = { 610ab1f9dacSPaul Mackerras .notifier_call = cpu_numa_callback, 611ab1f9dacSPaul Mackerras .priority = 1 /* Must run before sched domains notifier. */ 612ab1f9dacSPaul Mackerras }; 613ab1f9dacSPaul Mackerras 614ab1f9dacSPaul Mackerras min_low_pfn = 0; 615ab1f9dacSPaul Mackerras max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 616ab1f9dacSPaul Mackerras max_pfn = max_low_pfn; 617ab1f9dacSPaul Mackerras 618ab1f9dacSPaul Mackerras if (parse_numa_properties()) 619ab1f9dacSPaul Mackerras setup_nonnuma(); 620ab1f9dacSPaul Mackerras else 6214b703a23SAnton Blanchard dump_numa_memory_topology(); 622ab1f9dacSPaul Mackerras 623ab1f9dacSPaul Mackerras register_cpu_notifier(&ppc64_numa_nb); 624ab1f9dacSPaul Mackerras 625ab1f9dacSPaul Mackerras for_each_online_node(nid) { 62645fb6ceaSAnton Blanchard unsigned long start_pfn, end_pfn, pages_present; 627ab1f9dacSPaul Mackerras unsigned long bootmem_paddr; 628ab1f9dacSPaul Mackerras unsigned long bootmap_pages; 629ab1f9dacSPaul Mackerras 63045fb6ceaSAnton Blanchard get_region(nid, &start_pfn, &end_pfn, &pages_present); 631ab1f9dacSPaul Mackerras 632ab1f9dacSPaul Mackerras /* Allocate the node structure node local if possible */ 63345fb6ceaSAnton Blanchard NODE_DATA(nid) = careful_allocation(nid, 634ab1f9dacSPaul Mackerras sizeof(struct pglist_data), 63545fb6ceaSAnton Blanchard SMP_CACHE_BYTES, end_pfn); 63645fb6ceaSAnton Blanchard NODE_DATA(nid) = __va(NODE_DATA(nid)); 637ab1f9dacSPaul Mackerras memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 638ab1f9dacSPaul Mackerras 639ab1f9dacSPaul Mackerras dbg("node %d\n", nid); 640ab1f9dacSPaul Mackerras dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); 641ab1f9dacSPaul Mackerras 642ab1f9dacSPaul Mackerras NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; 64345fb6ceaSAnton Blanchard NODE_DATA(nid)->node_start_pfn = start_pfn; 64445fb6ceaSAnton Blanchard NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 645ab1f9dacSPaul Mackerras 646ab1f9dacSPaul Mackerras if (NODE_DATA(nid)->node_spanned_pages == 0) 647ab1f9dacSPaul Mackerras continue; 648ab1f9dacSPaul Mackerras 64945fb6ceaSAnton Blanchard dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); 65045fb6ceaSAnton Blanchard dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); 651ab1f9dacSPaul Mackerras 65245fb6ceaSAnton Blanchard bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 65345fb6ceaSAnton Blanchard bootmem_paddr = (unsigned long)careful_allocation(nid, 654ab1f9dacSPaul Mackerras bootmap_pages << PAGE_SHIFT, 65545fb6ceaSAnton Blanchard PAGE_SIZE, end_pfn); 65645fb6ceaSAnton Blanchard memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT); 65745fb6ceaSAnton Blanchard 658ab1f9dacSPaul Mackerras dbg("bootmap_paddr = %lx\n", bootmem_paddr); 659ab1f9dacSPaul Mackerras 660ab1f9dacSPaul Mackerras init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, 66145fb6ceaSAnton Blanchard start_pfn, end_pfn); 662ab1f9dacSPaul Mackerras 66345fb6ceaSAnton Blanchard /* Add free regions on this node */ 66445fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 66545fb6ceaSAnton Blanchard unsigned long start, end; 666ab1f9dacSPaul Mackerras 66745fb6ceaSAnton Blanchard if (init_node_data[i].nid != nid) 668ab1f9dacSPaul Mackerras continue; 669ab1f9dacSPaul Mackerras 67045fb6ceaSAnton Blanchard start = init_node_data[i].start_pfn << PAGE_SHIFT; 67145fb6ceaSAnton Blanchard end = init_node_data[i].end_pfn << PAGE_SHIFT; 672ab1f9dacSPaul Mackerras 67345fb6ceaSAnton Blanchard dbg("free_bootmem %lx %lx\n", start, end - start); 67445fb6ceaSAnton Blanchard free_bootmem_node(NODE_DATA(nid), start, end - start); 675ab1f9dacSPaul Mackerras } 676ab1f9dacSPaul Mackerras 67745fb6ceaSAnton Blanchard /* Mark reserved regions on this node */ 678ab1f9dacSPaul Mackerras for (i = 0; i < lmb.reserved.cnt; i++) { 679ab1f9dacSPaul Mackerras unsigned long physbase = lmb.reserved.region[i].base; 680ab1f9dacSPaul Mackerras unsigned long size = lmb.reserved.region[i].size; 68145fb6ceaSAnton Blanchard unsigned long start_paddr = start_pfn << PAGE_SHIFT; 68245fb6ceaSAnton Blanchard unsigned long end_paddr = end_pfn << PAGE_SHIFT; 683ab1f9dacSPaul Mackerras 68445fb6ceaSAnton Blanchard if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid && 68545fb6ceaSAnton Blanchard early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid) 686ab1f9dacSPaul Mackerras continue; 687ab1f9dacSPaul Mackerras 688ab1f9dacSPaul Mackerras if (physbase < end_paddr && 689ab1f9dacSPaul Mackerras (physbase+size) > start_paddr) { 690ab1f9dacSPaul Mackerras /* overlaps */ 691ab1f9dacSPaul Mackerras if (physbase < start_paddr) { 692ab1f9dacSPaul Mackerras size -= start_paddr - physbase; 693ab1f9dacSPaul Mackerras physbase = start_paddr; 694ab1f9dacSPaul Mackerras } 695ab1f9dacSPaul Mackerras 696ab1f9dacSPaul Mackerras if (size > end_paddr - physbase) 697ab1f9dacSPaul Mackerras size = end_paddr - physbase; 698ab1f9dacSPaul Mackerras 699ab1f9dacSPaul Mackerras dbg("reserve_bootmem %lx %lx\n", physbase, 700ab1f9dacSPaul Mackerras size); 701ab1f9dacSPaul Mackerras reserve_bootmem_node(NODE_DATA(nid), physbase, 702ab1f9dacSPaul Mackerras size); 703ab1f9dacSPaul Mackerras } 704ab1f9dacSPaul Mackerras } 705ab1f9dacSPaul Mackerras 70645fb6ceaSAnton Blanchard /* Add regions into sparsemem */ 70745fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 70845fb6ceaSAnton Blanchard unsigned long start, end; 70945fb6ceaSAnton Blanchard 71045fb6ceaSAnton Blanchard if (init_node_data[i].nid != nid) 711ab1f9dacSPaul Mackerras continue; 712ab1f9dacSPaul Mackerras 71345fb6ceaSAnton Blanchard start = init_node_data[i].start_pfn; 71445fb6ceaSAnton Blanchard end = init_node_data[i].end_pfn; 715ab1f9dacSPaul Mackerras 71645fb6ceaSAnton Blanchard memory_present(nid, start, end); 717ab1f9dacSPaul Mackerras } 718ab1f9dacSPaul Mackerras } 719ab1f9dacSPaul Mackerras } 720ab1f9dacSPaul Mackerras 721ab1f9dacSPaul Mackerras void __init paging_init(void) 722ab1f9dacSPaul Mackerras { 723ab1f9dacSPaul Mackerras unsigned long zones_size[MAX_NR_ZONES]; 724ab1f9dacSPaul Mackerras unsigned long zholes_size[MAX_NR_ZONES]; 725ab1f9dacSPaul Mackerras int nid; 726ab1f9dacSPaul Mackerras 727ab1f9dacSPaul Mackerras memset(zones_size, 0, sizeof(zones_size)); 728ab1f9dacSPaul Mackerras memset(zholes_size, 0, sizeof(zholes_size)); 729ab1f9dacSPaul Mackerras 730ab1f9dacSPaul Mackerras for_each_online_node(nid) { 73145fb6ceaSAnton Blanchard unsigned long start_pfn, end_pfn, pages_present; 732ab1f9dacSPaul Mackerras 73345fb6ceaSAnton Blanchard get_region(nid, &start_pfn, &end_pfn, &pages_present); 734ab1f9dacSPaul Mackerras 735ab1f9dacSPaul Mackerras zones_size[ZONE_DMA] = end_pfn - start_pfn; 73645fb6ceaSAnton Blanchard zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present; 737ab1f9dacSPaul Mackerras 738ab1f9dacSPaul Mackerras dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid, 739ab1f9dacSPaul Mackerras zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]); 740ab1f9dacSPaul Mackerras 74145fb6ceaSAnton Blanchard free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn, 74245fb6ceaSAnton Blanchard zholes_size); 743ab1f9dacSPaul Mackerras } 744ab1f9dacSPaul Mackerras } 745ab1f9dacSPaul Mackerras 746ab1f9dacSPaul Mackerras static int __init early_numa(char *p) 747ab1f9dacSPaul Mackerras { 748ab1f9dacSPaul Mackerras if (!p) 749ab1f9dacSPaul Mackerras return 0; 750ab1f9dacSPaul Mackerras 751ab1f9dacSPaul Mackerras if (strstr(p, "off")) 752ab1f9dacSPaul Mackerras numa_enabled = 0; 753ab1f9dacSPaul Mackerras 754ab1f9dacSPaul Mackerras if (strstr(p, "debug")) 755ab1f9dacSPaul Mackerras numa_debug = 1; 756ab1f9dacSPaul Mackerras 757ab1f9dacSPaul Mackerras return 0; 758ab1f9dacSPaul Mackerras } 759ab1f9dacSPaul Mackerras early_param("numa", early_numa); 760237a0989SMike Kravetz 761237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG 762237a0989SMike Kravetz /* 763237a0989SMike Kravetz * Find the node associated with a hot added memory section. Section 764237a0989SMike Kravetz * corresponds to a SPARSEMEM section, not an LMB. It is assumed that 765237a0989SMike Kravetz * sections are fully contained within a single LMB. 766237a0989SMike Kravetz */ 767237a0989SMike Kravetz int hot_add_scn_to_nid(unsigned long scn_addr) 768237a0989SMike Kravetz { 769237a0989SMike Kravetz struct device_node *memory = NULL; 770b226e462SMike Kravetz nodemask_t nodes; 771cf950b7aSNathan Lynch int nid = 0; 772237a0989SMike Kravetz 773237a0989SMike Kravetz if (!numa_enabled || (min_common_depth < 0)) 774cf950b7aSNathan Lynch return nid; 775237a0989SMike Kravetz 776237a0989SMike Kravetz while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 777237a0989SMike Kravetz unsigned long start, size; 778b226e462SMike Kravetz int ranges; 779237a0989SMike Kravetz unsigned int *memcell_buf; 780237a0989SMike Kravetz unsigned int len; 781237a0989SMike Kravetz 782237a0989SMike Kravetz memcell_buf = (unsigned int *)get_property(memory, "reg", &len); 783237a0989SMike Kravetz if (!memcell_buf || len <= 0) 784237a0989SMike Kravetz continue; 785237a0989SMike Kravetz 786cc5d0189SBenjamin Herrenschmidt /* ranges in cell */ 787cc5d0189SBenjamin Herrenschmidt ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 788237a0989SMike Kravetz ha_new_range: 789237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 790237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 791cf950b7aSNathan Lynch nid = of_node_to_nid(memory); 792237a0989SMike Kravetz 793237a0989SMike Kravetz /* Domains not present at boot default to 0 */ 794cf950b7aSNathan Lynch if (!node_online(nid)) 795cf950b7aSNathan Lynch nid = any_online_node(NODE_MASK_ALL); 796237a0989SMike Kravetz 797237a0989SMike Kravetz if ((scn_addr >= start) && (scn_addr < (start + size))) { 798237a0989SMike Kravetz of_node_put(memory); 799cf950b7aSNathan Lynch goto got_nid; 800237a0989SMike Kravetz } 801237a0989SMike Kravetz 802237a0989SMike Kravetz if (--ranges) /* process all ranges in cell */ 803237a0989SMike Kravetz goto ha_new_range; 804237a0989SMike Kravetz } 805237a0989SMike Kravetz BUG(); /* section address should be found above */ 806b226e462SMike Kravetz 807b226e462SMike Kravetz /* Temporary code to ensure that returned node is not empty */ 808cf950b7aSNathan Lynch got_nid: 809b226e462SMike Kravetz nodes_setall(nodes); 810cf950b7aSNathan Lynch while (NODE_DATA(nid)->node_spanned_pages == 0) { 811cf950b7aSNathan Lynch node_clear(nid, nodes); 812cf950b7aSNathan Lynch nid = any_online_node(nodes); 813b226e462SMike Kravetz } 814cf950b7aSNathan Lynch return nid; 815237a0989SMike Kravetz } 816237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */ 817