1ab1f9dacSPaul Mackerras /* 2ab1f9dacSPaul Mackerras * pSeries NUMA support 3ab1f9dacSPaul Mackerras * 4ab1f9dacSPaul Mackerras * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 5ab1f9dacSPaul Mackerras * 6ab1f9dacSPaul Mackerras * This program is free software; you can redistribute it and/or 7ab1f9dacSPaul Mackerras * modify it under the terms of the GNU General Public License 8ab1f9dacSPaul Mackerras * as published by the Free Software Foundation; either version 9ab1f9dacSPaul Mackerras * 2 of the License, or (at your option) any later version. 10ab1f9dacSPaul Mackerras */ 11ab1f9dacSPaul Mackerras #include <linux/threads.h> 12ab1f9dacSPaul Mackerras #include <linux/bootmem.h> 13ab1f9dacSPaul Mackerras #include <linux/init.h> 14ab1f9dacSPaul Mackerras #include <linux/mm.h> 15ab1f9dacSPaul Mackerras #include <linux/mmzone.h> 16ab1f9dacSPaul Mackerras #include <linux/module.h> 17ab1f9dacSPaul Mackerras #include <linux/nodemask.h> 18ab1f9dacSPaul Mackerras #include <linux/cpu.h> 19ab1f9dacSPaul Mackerras #include <linux/notifier.h> 2045fb6ceaSAnton Blanchard #include <asm/sparsemem.h> 21ab1f9dacSPaul Mackerras #include <asm/lmb.h> 22cf00a8d1SPaul Mackerras #include <asm/system.h> 232249ca9dSPaul Mackerras #include <asm/smp.h> 24ab1f9dacSPaul Mackerras 25ab1f9dacSPaul Mackerras static int numa_enabled = 1; 26ab1f9dacSPaul Mackerras 27ab1f9dacSPaul Mackerras static int numa_debug; 28ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 29ab1f9dacSPaul Mackerras 3045fb6ceaSAnton Blanchard int numa_cpu_lookup_table[NR_CPUS]; 31ab1f9dacSPaul Mackerras cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; 32ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES]; 3345fb6ceaSAnton Blanchard 3445fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpu_lookup_table); 3545fb6ceaSAnton Blanchard EXPORT_SYMBOL(numa_cpumask_lookup_table); 3645fb6ceaSAnton Blanchard EXPORT_SYMBOL(node_data); 3745fb6ceaSAnton Blanchard 3845fb6ceaSAnton Blanchard static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; 39ab1f9dacSPaul Mackerras static int min_common_depth; 40237a0989SMike Kravetz static int n_mem_addr_cells, n_mem_size_cells; 41ab1f9dacSPaul Mackerras 42ab1f9dacSPaul Mackerras /* 4345fb6ceaSAnton Blanchard * We need somewhere to store start/end/node for each region until we have 44ab1f9dacSPaul Mackerras * allocated the real node_data structures. 45ab1f9dacSPaul Mackerras */ 4645fb6ceaSAnton Blanchard #define MAX_REGIONS (MAX_LMB_REGIONS*2) 47ab1f9dacSPaul Mackerras static struct { 4845fb6ceaSAnton Blanchard unsigned long start_pfn; 4945fb6ceaSAnton Blanchard unsigned long end_pfn; 5045fb6ceaSAnton Blanchard int nid; 5145fb6ceaSAnton Blanchard } init_node_data[MAX_REGIONS] __initdata; 52ab1f9dacSPaul Mackerras 5345fb6ceaSAnton Blanchard int __init early_pfn_to_nid(unsigned long pfn) 5445fb6ceaSAnton Blanchard { 5545fb6ceaSAnton Blanchard unsigned int i; 5645fb6ceaSAnton Blanchard 5745fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 5845fb6ceaSAnton Blanchard unsigned long start_pfn = init_node_data[i].start_pfn; 5945fb6ceaSAnton Blanchard unsigned long end_pfn = init_node_data[i].end_pfn; 6045fb6ceaSAnton Blanchard 6145fb6ceaSAnton Blanchard if ((start_pfn <= pfn) && (pfn < end_pfn)) 6245fb6ceaSAnton Blanchard return init_node_data[i].nid; 6345fb6ceaSAnton Blanchard } 6445fb6ceaSAnton Blanchard 6545fb6ceaSAnton Blanchard return -1; 6645fb6ceaSAnton Blanchard } 6745fb6ceaSAnton Blanchard 6845fb6ceaSAnton Blanchard void __init add_region(unsigned int nid, unsigned long start_pfn, 6945fb6ceaSAnton Blanchard unsigned long pages) 7045fb6ceaSAnton Blanchard { 7145fb6ceaSAnton Blanchard unsigned int i; 7245fb6ceaSAnton Blanchard 7345fb6ceaSAnton Blanchard dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n", 7445fb6ceaSAnton Blanchard nid, start_pfn, pages); 7545fb6ceaSAnton Blanchard 7645fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 7745fb6ceaSAnton Blanchard if (init_node_data[i].nid != nid) 7845fb6ceaSAnton Blanchard continue; 7945fb6ceaSAnton Blanchard if (init_node_data[i].end_pfn == start_pfn) { 8045fb6ceaSAnton Blanchard init_node_data[i].end_pfn += pages; 8145fb6ceaSAnton Blanchard return; 8245fb6ceaSAnton Blanchard } 8345fb6ceaSAnton Blanchard if (init_node_data[i].start_pfn == (start_pfn + pages)) { 8445fb6ceaSAnton Blanchard init_node_data[i].start_pfn -= pages; 8545fb6ceaSAnton Blanchard return; 8645fb6ceaSAnton Blanchard } 8745fb6ceaSAnton Blanchard } 8845fb6ceaSAnton Blanchard 8945fb6ceaSAnton Blanchard /* 9045fb6ceaSAnton Blanchard * Leave last entry NULL so we dont iterate off the end (we use 9145fb6ceaSAnton Blanchard * entry.end_pfn to terminate the walk). 9245fb6ceaSAnton Blanchard */ 9345fb6ceaSAnton Blanchard if (i >= (MAX_REGIONS - 1)) { 9445fb6ceaSAnton Blanchard printk(KERN_ERR "WARNING: too many memory regions in " 9545fb6ceaSAnton Blanchard "numa code, truncating\n"); 9645fb6ceaSAnton Blanchard return; 9745fb6ceaSAnton Blanchard } 9845fb6ceaSAnton Blanchard 9945fb6ceaSAnton Blanchard init_node_data[i].start_pfn = start_pfn; 10045fb6ceaSAnton Blanchard init_node_data[i].end_pfn = start_pfn + pages; 10145fb6ceaSAnton Blanchard init_node_data[i].nid = nid; 10245fb6ceaSAnton Blanchard } 10345fb6ceaSAnton Blanchard 10445fb6ceaSAnton Blanchard /* We assume init_node_data has no overlapping regions */ 10545fb6ceaSAnton Blanchard void __init get_region(unsigned int nid, unsigned long *start_pfn, 10645fb6ceaSAnton Blanchard unsigned long *end_pfn, unsigned long *pages_present) 10745fb6ceaSAnton Blanchard { 10845fb6ceaSAnton Blanchard unsigned int i; 10945fb6ceaSAnton Blanchard 11045fb6ceaSAnton Blanchard *start_pfn = -1UL; 11145fb6ceaSAnton Blanchard *end_pfn = *pages_present = 0; 11245fb6ceaSAnton Blanchard 11345fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 11445fb6ceaSAnton Blanchard if (init_node_data[i].nid != nid) 11545fb6ceaSAnton Blanchard continue; 11645fb6ceaSAnton Blanchard 11745fb6ceaSAnton Blanchard *pages_present += init_node_data[i].end_pfn - 11845fb6ceaSAnton Blanchard init_node_data[i].start_pfn; 11945fb6ceaSAnton Blanchard 12045fb6ceaSAnton Blanchard if (init_node_data[i].start_pfn < *start_pfn) 12145fb6ceaSAnton Blanchard *start_pfn = init_node_data[i].start_pfn; 12245fb6ceaSAnton Blanchard 12345fb6ceaSAnton Blanchard if (init_node_data[i].end_pfn > *end_pfn) 12445fb6ceaSAnton Blanchard *end_pfn = init_node_data[i].end_pfn; 12545fb6ceaSAnton Blanchard } 12645fb6ceaSAnton Blanchard 12745fb6ceaSAnton Blanchard /* We didnt find a matching region, return start/end as 0 */ 12845fb6ceaSAnton Blanchard if (*start_pfn == -1UL) 1296d91bb93SMike Kravetz *start_pfn = 0; 13045fb6ceaSAnton Blanchard } 131ab1f9dacSPaul Mackerras 132ab1f9dacSPaul Mackerras static inline void map_cpu_to_node(int cpu, int node) 133ab1f9dacSPaul Mackerras { 134ab1f9dacSPaul Mackerras numa_cpu_lookup_table[cpu] = node; 13545fb6ceaSAnton Blanchard 13645fb6ceaSAnton Blanchard if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) 137ab1f9dacSPaul Mackerras cpu_set(cpu, numa_cpumask_lookup_table[node]); 138ab1f9dacSPaul Mackerras } 139ab1f9dacSPaul Mackerras 140ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU 141ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu) 142ab1f9dacSPaul Mackerras { 143ab1f9dacSPaul Mackerras int node = numa_cpu_lookup_table[cpu]; 144ab1f9dacSPaul Mackerras 145ab1f9dacSPaul Mackerras dbg("removing cpu %lu from node %d\n", cpu, node); 146ab1f9dacSPaul Mackerras 147ab1f9dacSPaul Mackerras if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 148ab1f9dacSPaul Mackerras cpu_clear(cpu, numa_cpumask_lookup_table[node]); 149ab1f9dacSPaul Mackerras } else { 150ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 151ab1f9dacSPaul Mackerras cpu, node); 152ab1f9dacSPaul Mackerras } 153ab1f9dacSPaul Mackerras } 154ab1f9dacSPaul Mackerras #endif /* CONFIG_HOTPLUG_CPU */ 155ab1f9dacSPaul Mackerras 15645fb6ceaSAnton Blanchard static struct device_node *find_cpu_node(unsigned int cpu) 157ab1f9dacSPaul Mackerras { 158ab1f9dacSPaul Mackerras unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); 159ab1f9dacSPaul Mackerras struct device_node *cpu_node = NULL; 160ab1f9dacSPaul Mackerras unsigned int *interrupt_server, *reg; 161ab1f9dacSPaul Mackerras int len; 162ab1f9dacSPaul Mackerras 163ab1f9dacSPaul Mackerras while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { 164ab1f9dacSPaul Mackerras /* Try interrupt server first */ 165ab1f9dacSPaul Mackerras interrupt_server = (unsigned int *)get_property(cpu_node, 166ab1f9dacSPaul Mackerras "ibm,ppc-interrupt-server#s", &len); 167ab1f9dacSPaul Mackerras 168ab1f9dacSPaul Mackerras len = len / sizeof(u32); 169ab1f9dacSPaul Mackerras 170ab1f9dacSPaul Mackerras if (interrupt_server && (len > 0)) { 171ab1f9dacSPaul Mackerras while (len--) { 172ab1f9dacSPaul Mackerras if (interrupt_server[len] == hw_cpuid) 173ab1f9dacSPaul Mackerras return cpu_node; 174ab1f9dacSPaul Mackerras } 175ab1f9dacSPaul Mackerras } else { 176ab1f9dacSPaul Mackerras reg = (unsigned int *)get_property(cpu_node, 177ab1f9dacSPaul Mackerras "reg", &len); 178ab1f9dacSPaul Mackerras if (reg && (len > 0) && (reg[0] == hw_cpuid)) 179ab1f9dacSPaul Mackerras return cpu_node; 180ab1f9dacSPaul Mackerras } 181ab1f9dacSPaul Mackerras } 182ab1f9dacSPaul Mackerras 183ab1f9dacSPaul Mackerras return NULL; 184ab1f9dacSPaul Mackerras } 185ab1f9dacSPaul Mackerras 186ab1f9dacSPaul Mackerras /* must hold reference to node during call */ 187ab1f9dacSPaul Mackerras static int *of_get_associativity(struct device_node *dev) 188ab1f9dacSPaul Mackerras { 189ab1f9dacSPaul Mackerras return (unsigned int *)get_property(dev, "ibm,associativity", NULL); 190ab1f9dacSPaul Mackerras } 191ab1f9dacSPaul Mackerras 192ab1f9dacSPaul Mackerras static int of_node_numa_domain(struct device_node *device) 193ab1f9dacSPaul Mackerras { 194ab1f9dacSPaul Mackerras int numa_domain; 195ab1f9dacSPaul Mackerras unsigned int *tmp; 196ab1f9dacSPaul Mackerras 197ab1f9dacSPaul Mackerras if (min_common_depth == -1) 198ab1f9dacSPaul Mackerras return 0; 199ab1f9dacSPaul Mackerras 200ab1f9dacSPaul Mackerras tmp = of_get_associativity(device); 201ab1f9dacSPaul Mackerras if (tmp && (tmp[0] >= min_common_depth)) { 202ab1f9dacSPaul Mackerras numa_domain = tmp[min_common_depth]; 203ab1f9dacSPaul Mackerras } else { 204ab1f9dacSPaul Mackerras dbg("WARNING: no NUMA information for %s\n", 205ab1f9dacSPaul Mackerras device->full_name); 206ab1f9dacSPaul Mackerras numa_domain = 0; 207ab1f9dacSPaul Mackerras } 208ab1f9dacSPaul Mackerras return numa_domain; 209ab1f9dacSPaul Mackerras } 210ab1f9dacSPaul Mackerras 211ab1f9dacSPaul Mackerras /* 212ab1f9dacSPaul Mackerras * In theory, the "ibm,associativity" property may contain multiple 213ab1f9dacSPaul Mackerras * associativity lists because a resource may be multiply connected 214ab1f9dacSPaul Mackerras * into the machine. This resource then has different associativity 215ab1f9dacSPaul Mackerras * characteristics relative to its multiple connections. We ignore 216ab1f9dacSPaul Mackerras * this for now. We also assume that all cpu and memory sets have 217ab1f9dacSPaul Mackerras * their distances represented at a common level. This won't be 218ab1f9dacSPaul Mackerras * true for heirarchical NUMA. 219ab1f9dacSPaul Mackerras * 220ab1f9dacSPaul Mackerras * In any case the ibm,associativity-reference-points should give 221ab1f9dacSPaul Mackerras * the correct depth for a normal NUMA system. 222ab1f9dacSPaul Mackerras * 223ab1f9dacSPaul Mackerras * - Dave Hansen <haveblue@us.ibm.com> 224ab1f9dacSPaul Mackerras */ 225ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void) 226ab1f9dacSPaul Mackerras { 227ab1f9dacSPaul Mackerras int depth; 228ab1f9dacSPaul Mackerras unsigned int *ref_points; 229ab1f9dacSPaul Mackerras struct device_node *rtas_root; 230ab1f9dacSPaul Mackerras unsigned int len; 231ab1f9dacSPaul Mackerras 232ab1f9dacSPaul Mackerras rtas_root = of_find_node_by_path("/rtas"); 233ab1f9dacSPaul Mackerras 234ab1f9dacSPaul Mackerras if (!rtas_root) 235ab1f9dacSPaul Mackerras return -1; 236ab1f9dacSPaul Mackerras 237ab1f9dacSPaul Mackerras /* 238ab1f9dacSPaul Mackerras * this property is 2 32-bit integers, each representing a level of 239ab1f9dacSPaul Mackerras * depth in the associativity nodes. The first is for an SMP 240ab1f9dacSPaul Mackerras * configuration (should be all 0's) and the second is for a normal 241ab1f9dacSPaul Mackerras * NUMA configuration. 242ab1f9dacSPaul Mackerras */ 243ab1f9dacSPaul Mackerras ref_points = (unsigned int *)get_property(rtas_root, 244ab1f9dacSPaul Mackerras "ibm,associativity-reference-points", &len); 245ab1f9dacSPaul Mackerras 246ab1f9dacSPaul Mackerras if ((len >= 1) && ref_points) { 247ab1f9dacSPaul Mackerras depth = ref_points[1]; 248ab1f9dacSPaul Mackerras } else { 249ab1f9dacSPaul Mackerras dbg("WARNING: could not find NUMA " 250ab1f9dacSPaul Mackerras "associativity reference point\n"); 251ab1f9dacSPaul Mackerras depth = -1; 252ab1f9dacSPaul Mackerras } 253ab1f9dacSPaul Mackerras of_node_put(rtas_root); 254ab1f9dacSPaul Mackerras 255ab1f9dacSPaul Mackerras return depth; 256ab1f9dacSPaul Mackerras } 257ab1f9dacSPaul Mackerras 25884c9fdd1SMike Kravetz static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 259ab1f9dacSPaul Mackerras { 260ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 261ab1f9dacSPaul Mackerras 262ab1f9dacSPaul Mackerras memory = of_find_node_by_type(memory, "memory"); 26354c23310SPaul Mackerras if (!memory) 26484c9fdd1SMike Kravetz panic("numa.c: No memory nodes found!"); 26554c23310SPaul Mackerras 26684c9fdd1SMike Kravetz *n_addr_cells = prom_n_addr_cells(memory); 26784c9fdd1SMike Kravetz *n_size_cells = prom_n_size_cells(memory); 26884c9fdd1SMike Kravetz of_node_put(memory); 269ab1f9dacSPaul Mackerras } 270ab1f9dacSPaul Mackerras 271237a0989SMike Kravetz static unsigned long __devinit read_n_cells(int n, unsigned int **buf) 272ab1f9dacSPaul Mackerras { 273ab1f9dacSPaul Mackerras unsigned long result = 0; 274ab1f9dacSPaul Mackerras 275ab1f9dacSPaul Mackerras while (n--) { 276ab1f9dacSPaul Mackerras result = (result << 32) | **buf; 277ab1f9dacSPaul Mackerras (*buf)++; 278ab1f9dacSPaul Mackerras } 279ab1f9dacSPaul Mackerras return result; 280ab1f9dacSPaul Mackerras } 281ab1f9dacSPaul Mackerras 282ab1f9dacSPaul Mackerras /* 283ab1f9dacSPaul Mackerras * Figure out to which domain a cpu belongs and stick it there. 284ab1f9dacSPaul Mackerras * Return the id of the domain used. 285ab1f9dacSPaul Mackerras */ 286ab1f9dacSPaul Mackerras static int numa_setup_cpu(unsigned long lcpu) 287ab1f9dacSPaul Mackerras { 288ab1f9dacSPaul Mackerras int numa_domain = 0; 289ab1f9dacSPaul Mackerras struct device_node *cpu = find_cpu_node(lcpu); 290ab1f9dacSPaul Mackerras 291ab1f9dacSPaul Mackerras if (!cpu) { 292ab1f9dacSPaul Mackerras WARN_ON(1); 293ab1f9dacSPaul Mackerras goto out; 294ab1f9dacSPaul Mackerras } 295ab1f9dacSPaul Mackerras 296ab1f9dacSPaul Mackerras numa_domain = of_node_numa_domain(cpu); 297ab1f9dacSPaul Mackerras 298ab1f9dacSPaul Mackerras if (numa_domain >= num_online_nodes()) { 299ab1f9dacSPaul Mackerras /* 300ab1f9dacSPaul Mackerras * POWER4 LPAR uses 0xffff as invalid node, 301ab1f9dacSPaul Mackerras * dont warn in this case. 302ab1f9dacSPaul Mackerras */ 303ab1f9dacSPaul Mackerras if (numa_domain != 0xffff) 304ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: cpu %ld " 305ab1f9dacSPaul Mackerras "maps to invalid NUMA node %d\n", 306ab1f9dacSPaul Mackerras lcpu, numa_domain); 307ab1f9dacSPaul Mackerras numa_domain = 0; 308ab1f9dacSPaul Mackerras } 309ab1f9dacSPaul Mackerras out: 310ab1f9dacSPaul Mackerras node_set_online(numa_domain); 311ab1f9dacSPaul Mackerras 312ab1f9dacSPaul Mackerras map_cpu_to_node(lcpu, numa_domain); 313ab1f9dacSPaul Mackerras 314ab1f9dacSPaul Mackerras of_node_put(cpu); 315ab1f9dacSPaul Mackerras 316ab1f9dacSPaul Mackerras return numa_domain; 317ab1f9dacSPaul Mackerras } 318ab1f9dacSPaul Mackerras 319ab1f9dacSPaul Mackerras static int cpu_numa_callback(struct notifier_block *nfb, 320ab1f9dacSPaul Mackerras unsigned long action, 321ab1f9dacSPaul Mackerras void *hcpu) 322ab1f9dacSPaul Mackerras { 323ab1f9dacSPaul Mackerras unsigned long lcpu = (unsigned long)hcpu; 324ab1f9dacSPaul Mackerras int ret = NOTIFY_DONE; 325ab1f9dacSPaul Mackerras 326ab1f9dacSPaul Mackerras switch (action) { 327ab1f9dacSPaul Mackerras case CPU_UP_PREPARE: 328ab1f9dacSPaul Mackerras if (min_common_depth == -1 || !numa_enabled) 329ab1f9dacSPaul Mackerras map_cpu_to_node(lcpu, 0); 330ab1f9dacSPaul Mackerras else 331ab1f9dacSPaul Mackerras numa_setup_cpu(lcpu); 332ab1f9dacSPaul Mackerras ret = NOTIFY_OK; 333ab1f9dacSPaul Mackerras break; 334ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU 335ab1f9dacSPaul Mackerras case CPU_DEAD: 336ab1f9dacSPaul Mackerras case CPU_UP_CANCELED: 337ab1f9dacSPaul Mackerras unmap_cpu_from_node(lcpu); 338ab1f9dacSPaul Mackerras break; 339ab1f9dacSPaul Mackerras ret = NOTIFY_OK; 340ab1f9dacSPaul Mackerras #endif 341ab1f9dacSPaul Mackerras } 342ab1f9dacSPaul Mackerras return ret; 343ab1f9dacSPaul Mackerras } 344ab1f9dacSPaul Mackerras 345ab1f9dacSPaul Mackerras /* 346ab1f9dacSPaul Mackerras * Check and possibly modify a memory region to enforce the memory limit. 347ab1f9dacSPaul Mackerras * 348ab1f9dacSPaul Mackerras * Returns the size the region should have to enforce the memory limit. 349ab1f9dacSPaul Mackerras * This will either be the original value of size, a truncated value, 350ab1f9dacSPaul Mackerras * or zero. If the returned value of size is 0 the region should be 351ab1f9dacSPaul Mackerras * discarded as it lies wholy above the memory limit. 352ab1f9dacSPaul Mackerras */ 35345fb6ceaSAnton Blanchard static unsigned long __init numa_enforce_memory_limit(unsigned long start, 35445fb6ceaSAnton Blanchard unsigned long size) 355ab1f9dacSPaul Mackerras { 356ab1f9dacSPaul Mackerras /* 357ab1f9dacSPaul Mackerras * We use lmb_end_of_DRAM() in here instead of memory_limit because 358ab1f9dacSPaul Mackerras * we've already adjusted it for the limit and it takes care of 359ab1f9dacSPaul Mackerras * having memory holes below the limit. 360ab1f9dacSPaul Mackerras */ 361ab1f9dacSPaul Mackerras 362ab1f9dacSPaul Mackerras if (! memory_limit) 363ab1f9dacSPaul Mackerras return size; 364ab1f9dacSPaul Mackerras 365ab1f9dacSPaul Mackerras if (start + size <= lmb_end_of_DRAM()) 366ab1f9dacSPaul Mackerras return size; 367ab1f9dacSPaul Mackerras 368ab1f9dacSPaul Mackerras if (start >= lmb_end_of_DRAM()) 369ab1f9dacSPaul Mackerras return 0; 370ab1f9dacSPaul Mackerras 371ab1f9dacSPaul Mackerras return lmb_end_of_DRAM() - start; 372ab1f9dacSPaul Mackerras } 373ab1f9dacSPaul Mackerras 374ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void) 375ab1f9dacSPaul Mackerras { 376ab1f9dacSPaul Mackerras struct device_node *cpu = NULL; 377ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 378*c08888cfSNathan Lynch int max_domain = 0; 379ab1f9dacSPaul Mackerras unsigned long i; 380ab1f9dacSPaul Mackerras 381ab1f9dacSPaul Mackerras if (numa_enabled == 0) { 382ab1f9dacSPaul Mackerras printk(KERN_WARNING "NUMA disabled by user\n"); 383ab1f9dacSPaul Mackerras return -1; 384ab1f9dacSPaul Mackerras } 385ab1f9dacSPaul Mackerras 386ab1f9dacSPaul Mackerras min_common_depth = find_min_common_depth(); 387ab1f9dacSPaul Mackerras 388ab1f9dacSPaul Mackerras dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 389ab1f9dacSPaul Mackerras if (min_common_depth < 0) 390ab1f9dacSPaul Mackerras return min_common_depth; 391ab1f9dacSPaul Mackerras 392ab1f9dacSPaul Mackerras /* 393ab1f9dacSPaul Mackerras * Even though we connect cpus to numa domains later in SMP init, 394ab1f9dacSPaul Mackerras * we need to know the maximum node id now. This is because each 395ab1f9dacSPaul Mackerras * node id must have NODE_DATA etc backing it. 396ab1f9dacSPaul Mackerras * As a result of hotplug we could still have cpus appear later on 397ab1f9dacSPaul Mackerras * with larger node ids. In that case we force the cpu into node 0. 398ab1f9dacSPaul Mackerras */ 399ab1f9dacSPaul Mackerras for_each_cpu(i) { 400ab1f9dacSPaul Mackerras int numa_domain; 401ab1f9dacSPaul Mackerras 402ab1f9dacSPaul Mackerras cpu = find_cpu_node(i); 403ab1f9dacSPaul Mackerras 404ab1f9dacSPaul Mackerras if (cpu) { 405ab1f9dacSPaul Mackerras numa_domain = of_node_numa_domain(cpu); 406ab1f9dacSPaul Mackerras of_node_put(cpu); 407ab1f9dacSPaul Mackerras 408ab1f9dacSPaul Mackerras if (numa_domain < MAX_NUMNODES && 409ab1f9dacSPaul Mackerras max_domain < numa_domain) 410ab1f9dacSPaul Mackerras max_domain = numa_domain; 411ab1f9dacSPaul Mackerras } 412ab1f9dacSPaul Mackerras } 413ab1f9dacSPaul Mackerras 414237a0989SMike Kravetz get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 415ab1f9dacSPaul Mackerras memory = NULL; 416ab1f9dacSPaul Mackerras while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 417ab1f9dacSPaul Mackerras unsigned long start; 418ab1f9dacSPaul Mackerras unsigned long size; 419ab1f9dacSPaul Mackerras int numa_domain; 420ab1f9dacSPaul Mackerras int ranges; 421ab1f9dacSPaul Mackerras unsigned int *memcell_buf; 422ab1f9dacSPaul Mackerras unsigned int len; 423ab1f9dacSPaul Mackerras 424ba759485SMichael Ellerman memcell_buf = (unsigned int *)get_property(memory, 425ba759485SMichael Ellerman "linux,usable-memory", &len); 426ba759485SMichael Ellerman if (!memcell_buf || len <= 0) 427ba759485SMichael Ellerman memcell_buf = 428ba759485SMichael Ellerman (unsigned int *)get_property(memory, "reg", 429ba759485SMichael Ellerman &len); 430ab1f9dacSPaul Mackerras if (!memcell_buf || len <= 0) 431ab1f9dacSPaul Mackerras continue; 432ab1f9dacSPaul Mackerras 433cc5d0189SBenjamin Herrenschmidt /* ranges in cell */ 434cc5d0189SBenjamin Herrenschmidt ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 435ab1f9dacSPaul Mackerras new_range: 436ab1f9dacSPaul Mackerras /* these are order-sensitive, and modify the buffer pointer */ 437237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 438237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 439ab1f9dacSPaul Mackerras 440ab1f9dacSPaul Mackerras numa_domain = of_node_numa_domain(memory); 441ab1f9dacSPaul Mackerras 442ab1f9dacSPaul Mackerras if (numa_domain >= MAX_NUMNODES) { 443ab1f9dacSPaul Mackerras if (numa_domain != 0xffff) 444ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: memory at %lx maps " 445ab1f9dacSPaul Mackerras "to invalid NUMA node %d\n", start, 446ab1f9dacSPaul Mackerras numa_domain); 447ab1f9dacSPaul Mackerras numa_domain = 0; 448ab1f9dacSPaul Mackerras } 449ab1f9dacSPaul Mackerras 450ab1f9dacSPaul Mackerras if (max_domain < numa_domain) 451ab1f9dacSPaul Mackerras max_domain = numa_domain; 452ab1f9dacSPaul Mackerras 453ab1f9dacSPaul Mackerras if (!(size = numa_enforce_memory_limit(start, size))) { 454ab1f9dacSPaul Mackerras if (--ranges) 455ab1f9dacSPaul Mackerras goto new_range; 456ab1f9dacSPaul Mackerras else 457ab1f9dacSPaul Mackerras continue; 458ab1f9dacSPaul Mackerras } 459ab1f9dacSPaul Mackerras 46045fb6ceaSAnton Blanchard add_region(numa_domain, start >> PAGE_SHIFT, 46145fb6ceaSAnton Blanchard size >> PAGE_SHIFT); 462ab1f9dacSPaul Mackerras 463ab1f9dacSPaul Mackerras if (--ranges) 464ab1f9dacSPaul Mackerras goto new_range; 465ab1f9dacSPaul Mackerras } 466ab1f9dacSPaul Mackerras 467ab1f9dacSPaul Mackerras for (i = 0; i <= max_domain; i++) 468ab1f9dacSPaul Mackerras node_set_online(i); 469ab1f9dacSPaul Mackerras 470*c08888cfSNathan Lynch max_domain = numa_setup_cpu(boot_cpuid); 471*c08888cfSNathan Lynch 472ab1f9dacSPaul Mackerras return 0; 473ab1f9dacSPaul Mackerras } 474ab1f9dacSPaul Mackerras 475ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void) 476ab1f9dacSPaul Mackerras { 477ab1f9dacSPaul Mackerras unsigned long top_of_ram = lmb_end_of_DRAM(); 478ab1f9dacSPaul Mackerras unsigned long total_ram = lmb_phys_mem_size(); 479fb6d73d3SPaul Mackerras unsigned int i; 480ab1f9dacSPaul Mackerras 481ab1f9dacSPaul Mackerras printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 482ab1f9dacSPaul Mackerras top_of_ram, total_ram); 483ab1f9dacSPaul Mackerras printk(KERN_INFO "Memory hole size: %ldMB\n", 484ab1f9dacSPaul Mackerras (top_of_ram - total_ram) >> 20); 485ab1f9dacSPaul Mackerras 486ab1f9dacSPaul Mackerras map_cpu_to_node(boot_cpuid, 0); 487fb6d73d3SPaul Mackerras for (i = 0; i < lmb.memory.cnt; ++i) 488fb6d73d3SPaul Mackerras add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, 489fb6d73d3SPaul Mackerras lmb_size_pages(&lmb.memory, i)); 490ab1f9dacSPaul Mackerras node_set_online(0); 491ab1f9dacSPaul Mackerras } 492ab1f9dacSPaul Mackerras 4934b703a23SAnton Blanchard void __init dump_numa_cpu_topology(void) 4944b703a23SAnton Blanchard { 4954b703a23SAnton Blanchard unsigned int node; 4964b703a23SAnton Blanchard unsigned int cpu, count; 4974b703a23SAnton Blanchard 4984b703a23SAnton Blanchard if (min_common_depth == -1 || !numa_enabled) 4994b703a23SAnton Blanchard return; 5004b703a23SAnton Blanchard 5014b703a23SAnton Blanchard for_each_online_node(node) { 5024b703a23SAnton Blanchard printk(KERN_INFO "Node %d CPUs:", node); 5034b703a23SAnton Blanchard 5044b703a23SAnton Blanchard count = 0; 5054b703a23SAnton Blanchard /* 5064b703a23SAnton Blanchard * If we used a CPU iterator here we would miss printing 5074b703a23SAnton Blanchard * the holes in the cpumap. 5084b703a23SAnton Blanchard */ 5094b703a23SAnton Blanchard for (cpu = 0; cpu < NR_CPUS; cpu++) { 5104b703a23SAnton Blanchard if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 5114b703a23SAnton Blanchard if (count == 0) 5124b703a23SAnton Blanchard printk(" %u", cpu); 5134b703a23SAnton Blanchard ++count; 5144b703a23SAnton Blanchard } else { 5154b703a23SAnton Blanchard if (count > 1) 5164b703a23SAnton Blanchard printk("-%u", cpu - 1); 5174b703a23SAnton Blanchard count = 0; 5184b703a23SAnton Blanchard } 5194b703a23SAnton Blanchard } 5204b703a23SAnton Blanchard 5214b703a23SAnton Blanchard if (count > 1) 5224b703a23SAnton Blanchard printk("-%u", NR_CPUS - 1); 5234b703a23SAnton Blanchard printk("\n"); 5244b703a23SAnton Blanchard } 5254b703a23SAnton Blanchard } 5264b703a23SAnton Blanchard 5274b703a23SAnton Blanchard static void __init dump_numa_memory_topology(void) 528ab1f9dacSPaul Mackerras { 529ab1f9dacSPaul Mackerras unsigned int node; 530ab1f9dacSPaul Mackerras unsigned int count; 531ab1f9dacSPaul Mackerras 532ab1f9dacSPaul Mackerras if (min_common_depth == -1 || !numa_enabled) 533ab1f9dacSPaul Mackerras return; 534ab1f9dacSPaul Mackerras 535ab1f9dacSPaul Mackerras for_each_online_node(node) { 536ab1f9dacSPaul Mackerras unsigned long i; 537ab1f9dacSPaul Mackerras 538ab1f9dacSPaul Mackerras printk(KERN_INFO "Node %d Memory:", node); 539ab1f9dacSPaul Mackerras 540ab1f9dacSPaul Mackerras count = 0; 541ab1f9dacSPaul Mackerras 54245fb6ceaSAnton Blanchard for (i = 0; i < lmb_end_of_DRAM(); 54345fb6ceaSAnton Blanchard i += (1 << SECTION_SIZE_BITS)) { 54445fb6ceaSAnton Blanchard if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { 545ab1f9dacSPaul Mackerras if (count == 0) 546ab1f9dacSPaul Mackerras printk(" 0x%lx", i); 547ab1f9dacSPaul Mackerras ++count; 548ab1f9dacSPaul Mackerras } else { 549ab1f9dacSPaul Mackerras if (count > 0) 550ab1f9dacSPaul Mackerras printk("-0x%lx", i); 551ab1f9dacSPaul Mackerras count = 0; 552ab1f9dacSPaul Mackerras } 553ab1f9dacSPaul Mackerras } 554ab1f9dacSPaul Mackerras 555ab1f9dacSPaul Mackerras if (count > 0) 556ab1f9dacSPaul Mackerras printk("-0x%lx", i); 557ab1f9dacSPaul Mackerras printk("\n"); 558ab1f9dacSPaul Mackerras } 559ab1f9dacSPaul Mackerras } 560ab1f9dacSPaul Mackerras 561ab1f9dacSPaul Mackerras /* 562ab1f9dacSPaul Mackerras * Allocate some memory, satisfying the lmb or bootmem allocator where 563ab1f9dacSPaul Mackerras * required. nid is the preferred node and end is the physical address of 564ab1f9dacSPaul Mackerras * the highest address in the node. 565ab1f9dacSPaul Mackerras * 566ab1f9dacSPaul Mackerras * Returns the physical address of the memory. 567ab1f9dacSPaul Mackerras */ 56845fb6ceaSAnton Blanchard static void __init *careful_allocation(int nid, unsigned long size, 56945fb6ceaSAnton Blanchard unsigned long align, 57045fb6ceaSAnton Blanchard unsigned long end_pfn) 571ab1f9dacSPaul Mackerras { 57245fb6ceaSAnton Blanchard int new_nid; 573d7a5b2ffSMichael Ellerman unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); 574ab1f9dacSPaul Mackerras 575ab1f9dacSPaul Mackerras /* retry over all memory */ 576ab1f9dacSPaul Mackerras if (!ret) 577d7a5b2ffSMichael Ellerman ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); 578ab1f9dacSPaul Mackerras 579ab1f9dacSPaul Mackerras if (!ret) 580ab1f9dacSPaul Mackerras panic("numa.c: cannot allocate %lu bytes on node %d", 581ab1f9dacSPaul Mackerras size, nid); 582ab1f9dacSPaul Mackerras 583ab1f9dacSPaul Mackerras /* 584ab1f9dacSPaul Mackerras * If the memory came from a previously allocated node, we must 585ab1f9dacSPaul Mackerras * retry with the bootmem allocator. 586ab1f9dacSPaul Mackerras */ 58745fb6ceaSAnton Blanchard new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); 58845fb6ceaSAnton Blanchard if (new_nid < nid) { 58945fb6ceaSAnton Blanchard ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), 590ab1f9dacSPaul Mackerras size, align, 0); 591ab1f9dacSPaul Mackerras 592ab1f9dacSPaul Mackerras if (!ret) 593ab1f9dacSPaul Mackerras panic("numa.c: cannot allocate %lu bytes on node %d", 59445fb6ceaSAnton Blanchard size, new_nid); 595ab1f9dacSPaul Mackerras 59645fb6ceaSAnton Blanchard ret = __pa(ret); 597ab1f9dacSPaul Mackerras 598ab1f9dacSPaul Mackerras dbg("alloc_bootmem %lx %lx\n", ret, size); 599ab1f9dacSPaul Mackerras } 600ab1f9dacSPaul Mackerras 60145fb6ceaSAnton Blanchard return (void *)ret; 602ab1f9dacSPaul Mackerras } 603ab1f9dacSPaul Mackerras 604ab1f9dacSPaul Mackerras void __init do_init_bootmem(void) 605ab1f9dacSPaul Mackerras { 606ab1f9dacSPaul Mackerras int nid; 60745fb6ceaSAnton Blanchard unsigned int i; 608ab1f9dacSPaul Mackerras static struct notifier_block ppc64_numa_nb = { 609ab1f9dacSPaul Mackerras .notifier_call = cpu_numa_callback, 610ab1f9dacSPaul Mackerras .priority = 1 /* Must run before sched domains notifier. */ 611ab1f9dacSPaul Mackerras }; 612ab1f9dacSPaul Mackerras 613ab1f9dacSPaul Mackerras min_low_pfn = 0; 614ab1f9dacSPaul Mackerras max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 615ab1f9dacSPaul Mackerras max_pfn = max_low_pfn; 616ab1f9dacSPaul Mackerras 617ab1f9dacSPaul Mackerras if (parse_numa_properties()) 618ab1f9dacSPaul Mackerras setup_nonnuma(); 619ab1f9dacSPaul Mackerras else 6204b703a23SAnton Blanchard dump_numa_memory_topology(); 621ab1f9dacSPaul Mackerras 622ab1f9dacSPaul Mackerras register_cpu_notifier(&ppc64_numa_nb); 623ab1f9dacSPaul Mackerras 624ab1f9dacSPaul Mackerras for_each_online_node(nid) { 62545fb6ceaSAnton Blanchard unsigned long start_pfn, end_pfn, pages_present; 626ab1f9dacSPaul Mackerras unsigned long bootmem_paddr; 627ab1f9dacSPaul Mackerras unsigned long bootmap_pages; 628ab1f9dacSPaul Mackerras 62945fb6ceaSAnton Blanchard get_region(nid, &start_pfn, &end_pfn, &pages_present); 630ab1f9dacSPaul Mackerras 631ab1f9dacSPaul Mackerras /* Allocate the node structure node local if possible */ 63245fb6ceaSAnton Blanchard NODE_DATA(nid) = careful_allocation(nid, 633ab1f9dacSPaul Mackerras sizeof(struct pglist_data), 63445fb6ceaSAnton Blanchard SMP_CACHE_BYTES, end_pfn); 63545fb6ceaSAnton Blanchard NODE_DATA(nid) = __va(NODE_DATA(nid)); 636ab1f9dacSPaul Mackerras memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 637ab1f9dacSPaul Mackerras 638ab1f9dacSPaul Mackerras dbg("node %d\n", nid); 639ab1f9dacSPaul Mackerras dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); 640ab1f9dacSPaul Mackerras 641ab1f9dacSPaul Mackerras NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; 64245fb6ceaSAnton Blanchard NODE_DATA(nid)->node_start_pfn = start_pfn; 64345fb6ceaSAnton Blanchard NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 644ab1f9dacSPaul Mackerras 645ab1f9dacSPaul Mackerras if (NODE_DATA(nid)->node_spanned_pages == 0) 646ab1f9dacSPaul Mackerras continue; 647ab1f9dacSPaul Mackerras 64845fb6ceaSAnton Blanchard dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); 64945fb6ceaSAnton Blanchard dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); 650ab1f9dacSPaul Mackerras 65145fb6ceaSAnton Blanchard bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 65245fb6ceaSAnton Blanchard bootmem_paddr = (unsigned long)careful_allocation(nid, 653ab1f9dacSPaul Mackerras bootmap_pages << PAGE_SHIFT, 65445fb6ceaSAnton Blanchard PAGE_SIZE, end_pfn); 65545fb6ceaSAnton Blanchard memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT); 65645fb6ceaSAnton Blanchard 657ab1f9dacSPaul Mackerras dbg("bootmap_paddr = %lx\n", bootmem_paddr); 658ab1f9dacSPaul Mackerras 659ab1f9dacSPaul Mackerras init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, 66045fb6ceaSAnton Blanchard start_pfn, end_pfn); 661ab1f9dacSPaul Mackerras 66245fb6ceaSAnton Blanchard /* Add free regions on this node */ 66345fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 66445fb6ceaSAnton Blanchard unsigned long start, end; 665ab1f9dacSPaul Mackerras 66645fb6ceaSAnton Blanchard if (init_node_data[i].nid != nid) 667ab1f9dacSPaul Mackerras continue; 668ab1f9dacSPaul Mackerras 66945fb6ceaSAnton Blanchard start = init_node_data[i].start_pfn << PAGE_SHIFT; 67045fb6ceaSAnton Blanchard end = init_node_data[i].end_pfn << PAGE_SHIFT; 671ab1f9dacSPaul Mackerras 67245fb6ceaSAnton Blanchard dbg("free_bootmem %lx %lx\n", start, end - start); 67345fb6ceaSAnton Blanchard free_bootmem_node(NODE_DATA(nid), start, end - start); 674ab1f9dacSPaul Mackerras } 675ab1f9dacSPaul Mackerras 67645fb6ceaSAnton Blanchard /* Mark reserved regions on this node */ 677ab1f9dacSPaul Mackerras for (i = 0; i < lmb.reserved.cnt; i++) { 678ab1f9dacSPaul Mackerras unsigned long physbase = lmb.reserved.region[i].base; 679ab1f9dacSPaul Mackerras unsigned long size = lmb.reserved.region[i].size; 68045fb6ceaSAnton Blanchard unsigned long start_paddr = start_pfn << PAGE_SHIFT; 68145fb6ceaSAnton Blanchard unsigned long end_paddr = end_pfn << PAGE_SHIFT; 682ab1f9dacSPaul Mackerras 68345fb6ceaSAnton Blanchard if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid && 68445fb6ceaSAnton Blanchard early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid) 685ab1f9dacSPaul Mackerras continue; 686ab1f9dacSPaul Mackerras 687ab1f9dacSPaul Mackerras if (physbase < end_paddr && 688ab1f9dacSPaul Mackerras (physbase+size) > start_paddr) { 689ab1f9dacSPaul Mackerras /* overlaps */ 690ab1f9dacSPaul Mackerras if (physbase < start_paddr) { 691ab1f9dacSPaul Mackerras size -= start_paddr - physbase; 692ab1f9dacSPaul Mackerras physbase = start_paddr; 693ab1f9dacSPaul Mackerras } 694ab1f9dacSPaul Mackerras 695ab1f9dacSPaul Mackerras if (size > end_paddr - physbase) 696ab1f9dacSPaul Mackerras size = end_paddr - physbase; 697ab1f9dacSPaul Mackerras 698ab1f9dacSPaul Mackerras dbg("reserve_bootmem %lx %lx\n", physbase, 699ab1f9dacSPaul Mackerras size); 700ab1f9dacSPaul Mackerras reserve_bootmem_node(NODE_DATA(nid), physbase, 701ab1f9dacSPaul Mackerras size); 702ab1f9dacSPaul Mackerras } 703ab1f9dacSPaul Mackerras } 704ab1f9dacSPaul Mackerras 70545fb6ceaSAnton Blanchard /* Add regions into sparsemem */ 70645fb6ceaSAnton Blanchard for (i = 0; init_node_data[i].end_pfn; i++) { 70745fb6ceaSAnton Blanchard unsigned long start, end; 70845fb6ceaSAnton Blanchard 70945fb6ceaSAnton Blanchard if (init_node_data[i].nid != nid) 710ab1f9dacSPaul Mackerras continue; 711ab1f9dacSPaul Mackerras 71245fb6ceaSAnton Blanchard start = init_node_data[i].start_pfn; 71345fb6ceaSAnton Blanchard end = init_node_data[i].end_pfn; 714ab1f9dacSPaul Mackerras 71545fb6ceaSAnton Blanchard memory_present(nid, start, end); 716ab1f9dacSPaul Mackerras } 717ab1f9dacSPaul Mackerras } 718ab1f9dacSPaul Mackerras } 719ab1f9dacSPaul Mackerras 720ab1f9dacSPaul Mackerras void __init paging_init(void) 721ab1f9dacSPaul Mackerras { 722ab1f9dacSPaul Mackerras unsigned long zones_size[MAX_NR_ZONES]; 723ab1f9dacSPaul Mackerras unsigned long zholes_size[MAX_NR_ZONES]; 724ab1f9dacSPaul Mackerras int nid; 725ab1f9dacSPaul Mackerras 726ab1f9dacSPaul Mackerras memset(zones_size, 0, sizeof(zones_size)); 727ab1f9dacSPaul Mackerras memset(zholes_size, 0, sizeof(zholes_size)); 728ab1f9dacSPaul Mackerras 729ab1f9dacSPaul Mackerras for_each_online_node(nid) { 73045fb6ceaSAnton Blanchard unsigned long start_pfn, end_pfn, pages_present; 731ab1f9dacSPaul Mackerras 73245fb6ceaSAnton Blanchard get_region(nid, &start_pfn, &end_pfn, &pages_present); 733ab1f9dacSPaul Mackerras 734ab1f9dacSPaul Mackerras zones_size[ZONE_DMA] = end_pfn - start_pfn; 73545fb6ceaSAnton Blanchard zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present; 736ab1f9dacSPaul Mackerras 737ab1f9dacSPaul Mackerras dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid, 738ab1f9dacSPaul Mackerras zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]); 739ab1f9dacSPaul Mackerras 74045fb6ceaSAnton Blanchard free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn, 74145fb6ceaSAnton Blanchard zholes_size); 742ab1f9dacSPaul Mackerras } 743ab1f9dacSPaul Mackerras } 744ab1f9dacSPaul Mackerras 745ab1f9dacSPaul Mackerras static int __init early_numa(char *p) 746ab1f9dacSPaul Mackerras { 747ab1f9dacSPaul Mackerras if (!p) 748ab1f9dacSPaul Mackerras return 0; 749ab1f9dacSPaul Mackerras 750ab1f9dacSPaul Mackerras if (strstr(p, "off")) 751ab1f9dacSPaul Mackerras numa_enabled = 0; 752ab1f9dacSPaul Mackerras 753ab1f9dacSPaul Mackerras if (strstr(p, "debug")) 754ab1f9dacSPaul Mackerras numa_debug = 1; 755ab1f9dacSPaul Mackerras 756ab1f9dacSPaul Mackerras return 0; 757ab1f9dacSPaul Mackerras } 758ab1f9dacSPaul Mackerras early_param("numa", early_numa); 759237a0989SMike Kravetz 760237a0989SMike Kravetz #ifdef CONFIG_MEMORY_HOTPLUG 761237a0989SMike Kravetz /* 762237a0989SMike Kravetz * Find the node associated with a hot added memory section. Section 763237a0989SMike Kravetz * corresponds to a SPARSEMEM section, not an LMB. It is assumed that 764237a0989SMike Kravetz * sections are fully contained within a single LMB. 765237a0989SMike Kravetz */ 766237a0989SMike Kravetz int hot_add_scn_to_nid(unsigned long scn_addr) 767237a0989SMike Kravetz { 768237a0989SMike Kravetz struct device_node *memory = NULL; 769b226e462SMike Kravetz nodemask_t nodes; 770b226e462SMike Kravetz int numa_domain = 0; 771237a0989SMike Kravetz 772237a0989SMike Kravetz if (!numa_enabled || (min_common_depth < 0)) 773b226e462SMike Kravetz return numa_domain; 774237a0989SMike Kravetz 775237a0989SMike Kravetz while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 776237a0989SMike Kravetz unsigned long start, size; 777b226e462SMike Kravetz int ranges; 778237a0989SMike Kravetz unsigned int *memcell_buf; 779237a0989SMike Kravetz unsigned int len; 780237a0989SMike Kravetz 781237a0989SMike Kravetz memcell_buf = (unsigned int *)get_property(memory, "reg", &len); 782237a0989SMike Kravetz if (!memcell_buf || len <= 0) 783237a0989SMike Kravetz continue; 784237a0989SMike Kravetz 785cc5d0189SBenjamin Herrenschmidt /* ranges in cell */ 786cc5d0189SBenjamin Herrenschmidt ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 787237a0989SMike Kravetz ha_new_range: 788237a0989SMike Kravetz start = read_n_cells(n_mem_addr_cells, &memcell_buf); 789237a0989SMike Kravetz size = read_n_cells(n_mem_size_cells, &memcell_buf); 790237a0989SMike Kravetz numa_domain = of_node_numa_domain(memory); 791237a0989SMike Kravetz 792237a0989SMike Kravetz /* Domains not present at boot default to 0 */ 793237a0989SMike Kravetz if (!node_online(numa_domain)) 794237a0989SMike Kravetz numa_domain = any_online_node(NODE_MASK_ALL); 795237a0989SMike Kravetz 796237a0989SMike Kravetz if ((scn_addr >= start) && (scn_addr < (start + size))) { 797237a0989SMike Kravetz of_node_put(memory); 798b226e462SMike Kravetz goto got_numa_domain; 799237a0989SMike Kravetz } 800237a0989SMike Kravetz 801237a0989SMike Kravetz if (--ranges) /* process all ranges in cell */ 802237a0989SMike Kravetz goto ha_new_range; 803237a0989SMike Kravetz } 804237a0989SMike Kravetz BUG(); /* section address should be found above */ 805b226e462SMike Kravetz 806b226e462SMike Kravetz /* Temporary code to ensure that returned node is not empty */ 807b226e462SMike Kravetz got_numa_domain: 808b226e462SMike Kravetz nodes_setall(nodes); 809b226e462SMike Kravetz while (NODE_DATA(numa_domain)->node_spanned_pages == 0) { 810b226e462SMike Kravetz node_clear(numa_domain, nodes); 811b226e462SMike Kravetz numa_domain = any_online_node(nodes); 812b226e462SMike Kravetz } 813b226e462SMike Kravetz return numa_domain; 814237a0989SMike Kravetz } 815237a0989SMike Kravetz #endif /* CONFIG_MEMORY_HOTPLUG */ 816