1*ab1f9dacSPaul Mackerras /* 2*ab1f9dacSPaul Mackerras * pSeries NUMA support 3*ab1f9dacSPaul Mackerras * 4*ab1f9dacSPaul Mackerras * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 5*ab1f9dacSPaul Mackerras * 6*ab1f9dacSPaul Mackerras * This program is free software; you can redistribute it and/or 7*ab1f9dacSPaul Mackerras * modify it under the terms of the GNU General Public License 8*ab1f9dacSPaul Mackerras * as published by the Free Software Foundation; either version 9*ab1f9dacSPaul Mackerras * 2 of the License, or (at your option) any later version. 10*ab1f9dacSPaul Mackerras */ 11*ab1f9dacSPaul Mackerras #include <linux/threads.h> 12*ab1f9dacSPaul Mackerras #include <linux/bootmem.h> 13*ab1f9dacSPaul Mackerras #include <linux/init.h> 14*ab1f9dacSPaul Mackerras #include <linux/mm.h> 15*ab1f9dacSPaul Mackerras #include <linux/mmzone.h> 16*ab1f9dacSPaul Mackerras #include <linux/module.h> 17*ab1f9dacSPaul Mackerras #include <linux/nodemask.h> 18*ab1f9dacSPaul Mackerras #include <linux/cpu.h> 19*ab1f9dacSPaul Mackerras #include <linux/notifier.h> 20*ab1f9dacSPaul Mackerras #include <asm/lmb.h> 21*ab1f9dacSPaul Mackerras #include <asm/machdep.h> 22*ab1f9dacSPaul Mackerras #include <asm/abs_addr.h> 23*ab1f9dacSPaul Mackerras 24*ab1f9dacSPaul Mackerras static int numa_enabled = 1; 25*ab1f9dacSPaul Mackerras 26*ab1f9dacSPaul Mackerras static int numa_debug; 27*ab1f9dacSPaul Mackerras #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 28*ab1f9dacSPaul Mackerras 29*ab1f9dacSPaul Mackerras #ifdef DEBUG_NUMA 30*ab1f9dacSPaul Mackerras #define ARRAY_INITIALISER -1 31*ab1f9dacSPaul Mackerras #else 32*ab1f9dacSPaul Mackerras #define ARRAY_INITIALISER 0 33*ab1f9dacSPaul Mackerras #endif 34*ab1f9dacSPaul Mackerras 35*ab1f9dacSPaul Mackerras int numa_cpu_lookup_table[NR_CPUS] = { [ 0 ... (NR_CPUS - 1)] = 36*ab1f9dacSPaul Mackerras ARRAY_INITIALISER}; 37*ab1f9dacSPaul Mackerras char *numa_memory_lookup_table; 38*ab1f9dacSPaul Mackerras cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; 39*ab1f9dacSPaul Mackerras int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0}; 40*ab1f9dacSPaul Mackerras 41*ab1f9dacSPaul Mackerras struct pglist_data *node_data[MAX_NUMNODES]; 42*ab1f9dacSPaul Mackerras bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; 43*ab1f9dacSPaul Mackerras static int min_common_depth; 44*ab1f9dacSPaul Mackerras 45*ab1f9dacSPaul Mackerras /* 46*ab1f9dacSPaul Mackerras * We need somewhere to store start/span for each node until we have 47*ab1f9dacSPaul Mackerras * allocated the real node_data structures. 48*ab1f9dacSPaul Mackerras */ 49*ab1f9dacSPaul Mackerras static struct { 50*ab1f9dacSPaul Mackerras unsigned long node_start_pfn; 51*ab1f9dacSPaul Mackerras unsigned long node_end_pfn; 52*ab1f9dacSPaul Mackerras unsigned long node_present_pages; 53*ab1f9dacSPaul Mackerras } init_node_data[MAX_NUMNODES] __initdata; 54*ab1f9dacSPaul Mackerras 55*ab1f9dacSPaul Mackerras EXPORT_SYMBOL(node_data); 56*ab1f9dacSPaul Mackerras EXPORT_SYMBOL(numa_cpu_lookup_table); 57*ab1f9dacSPaul Mackerras EXPORT_SYMBOL(numa_memory_lookup_table); 58*ab1f9dacSPaul Mackerras EXPORT_SYMBOL(numa_cpumask_lookup_table); 59*ab1f9dacSPaul Mackerras EXPORT_SYMBOL(nr_cpus_in_node); 60*ab1f9dacSPaul Mackerras 61*ab1f9dacSPaul Mackerras static inline void map_cpu_to_node(int cpu, int node) 62*ab1f9dacSPaul Mackerras { 63*ab1f9dacSPaul Mackerras numa_cpu_lookup_table[cpu] = node; 64*ab1f9dacSPaul Mackerras if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) { 65*ab1f9dacSPaul Mackerras cpu_set(cpu, numa_cpumask_lookup_table[node]); 66*ab1f9dacSPaul Mackerras nr_cpus_in_node[node]++; 67*ab1f9dacSPaul Mackerras } 68*ab1f9dacSPaul Mackerras } 69*ab1f9dacSPaul Mackerras 70*ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU 71*ab1f9dacSPaul Mackerras static void unmap_cpu_from_node(unsigned long cpu) 72*ab1f9dacSPaul Mackerras { 73*ab1f9dacSPaul Mackerras int node = numa_cpu_lookup_table[cpu]; 74*ab1f9dacSPaul Mackerras 75*ab1f9dacSPaul Mackerras dbg("removing cpu %lu from node %d\n", cpu, node); 76*ab1f9dacSPaul Mackerras 77*ab1f9dacSPaul Mackerras if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 78*ab1f9dacSPaul Mackerras cpu_clear(cpu, numa_cpumask_lookup_table[node]); 79*ab1f9dacSPaul Mackerras nr_cpus_in_node[node]--; 80*ab1f9dacSPaul Mackerras } else { 81*ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 82*ab1f9dacSPaul Mackerras cpu, node); 83*ab1f9dacSPaul Mackerras } 84*ab1f9dacSPaul Mackerras } 85*ab1f9dacSPaul Mackerras #endif /* CONFIG_HOTPLUG_CPU */ 86*ab1f9dacSPaul Mackerras 87*ab1f9dacSPaul Mackerras static struct device_node * __devinit find_cpu_node(unsigned int cpu) 88*ab1f9dacSPaul Mackerras { 89*ab1f9dacSPaul Mackerras unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); 90*ab1f9dacSPaul Mackerras struct device_node *cpu_node = NULL; 91*ab1f9dacSPaul Mackerras unsigned int *interrupt_server, *reg; 92*ab1f9dacSPaul Mackerras int len; 93*ab1f9dacSPaul Mackerras 94*ab1f9dacSPaul Mackerras while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { 95*ab1f9dacSPaul Mackerras /* Try interrupt server first */ 96*ab1f9dacSPaul Mackerras interrupt_server = (unsigned int *)get_property(cpu_node, 97*ab1f9dacSPaul Mackerras "ibm,ppc-interrupt-server#s", &len); 98*ab1f9dacSPaul Mackerras 99*ab1f9dacSPaul Mackerras len = len / sizeof(u32); 100*ab1f9dacSPaul Mackerras 101*ab1f9dacSPaul Mackerras if (interrupt_server && (len > 0)) { 102*ab1f9dacSPaul Mackerras while (len--) { 103*ab1f9dacSPaul Mackerras if (interrupt_server[len] == hw_cpuid) 104*ab1f9dacSPaul Mackerras return cpu_node; 105*ab1f9dacSPaul Mackerras } 106*ab1f9dacSPaul Mackerras } else { 107*ab1f9dacSPaul Mackerras reg = (unsigned int *)get_property(cpu_node, 108*ab1f9dacSPaul Mackerras "reg", &len); 109*ab1f9dacSPaul Mackerras if (reg && (len > 0) && (reg[0] == hw_cpuid)) 110*ab1f9dacSPaul Mackerras return cpu_node; 111*ab1f9dacSPaul Mackerras } 112*ab1f9dacSPaul Mackerras } 113*ab1f9dacSPaul Mackerras 114*ab1f9dacSPaul Mackerras return NULL; 115*ab1f9dacSPaul Mackerras } 116*ab1f9dacSPaul Mackerras 117*ab1f9dacSPaul Mackerras /* must hold reference to node during call */ 118*ab1f9dacSPaul Mackerras static int *of_get_associativity(struct device_node *dev) 119*ab1f9dacSPaul Mackerras { 120*ab1f9dacSPaul Mackerras return (unsigned int *)get_property(dev, "ibm,associativity", NULL); 121*ab1f9dacSPaul Mackerras } 122*ab1f9dacSPaul Mackerras 123*ab1f9dacSPaul Mackerras static int of_node_numa_domain(struct device_node *device) 124*ab1f9dacSPaul Mackerras { 125*ab1f9dacSPaul Mackerras int numa_domain; 126*ab1f9dacSPaul Mackerras unsigned int *tmp; 127*ab1f9dacSPaul Mackerras 128*ab1f9dacSPaul Mackerras if (min_common_depth == -1) 129*ab1f9dacSPaul Mackerras return 0; 130*ab1f9dacSPaul Mackerras 131*ab1f9dacSPaul Mackerras tmp = of_get_associativity(device); 132*ab1f9dacSPaul Mackerras if (tmp && (tmp[0] >= min_common_depth)) { 133*ab1f9dacSPaul Mackerras numa_domain = tmp[min_common_depth]; 134*ab1f9dacSPaul Mackerras } else { 135*ab1f9dacSPaul Mackerras dbg("WARNING: no NUMA information for %s\n", 136*ab1f9dacSPaul Mackerras device->full_name); 137*ab1f9dacSPaul Mackerras numa_domain = 0; 138*ab1f9dacSPaul Mackerras } 139*ab1f9dacSPaul Mackerras return numa_domain; 140*ab1f9dacSPaul Mackerras } 141*ab1f9dacSPaul Mackerras 142*ab1f9dacSPaul Mackerras /* 143*ab1f9dacSPaul Mackerras * In theory, the "ibm,associativity" property may contain multiple 144*ab1f9dacSPaul Mackerras * associativity lists because a resource may be multiply connected 145*ab1f9dacSPaul Mackerras * into the machine. This resource then has different associativity 146*ab1f9dacSPaul Mackerras * characteristics relative to its multiple connections. We ignore 147*ab1f9dacSPaul Mackerras * this for now. We also assume that all cpu and memory sets have 148*ab1f9dacSPaul Mackerras * their distances represented at a common level. This won't be 149*ab1f9dacSPaul Mackerras * true for heirarchical NUMA. 150*ab1f9dacSPaul Mackerras * 151*ab1f9dacSPaul Mackerras * In any case the ibm,associativity-reference-points should give 152*ab1f9dacSPaul Mackerras * the correct depth for a normal NUMA system. 153*ab1f9dacSPaul Mackerras * 154*ab1f9dacSPaul Mackerras * - Dave Hansen <haveblue@us.ibm.com> 155*ab1f9dacSPaul Mackerras */ 156*ab1f9dacSPaul Mackerras static int __init find_min_common_depth(void) 157*ab1f9dacSPaul Mackerras { 158*ab1f9dacSPaul Mackerras int depth; 159*ab1f9dacSPaul Mackerras unsigned int *ref_points; 160*ab1f9dacSPaul Mackerras struct device_node *rtas_root; 161*ab1f9dacSPaul Mackerras unsigned int len; 162*ab1f9dacSPaul Mackerras 163*ab1f9dacSPaul Mackerras rtas_root = of_find_node_by_path("/rtas"); 164*ab1f9dacSPaul Mackerras 165*ab1f9dacSPaul Mackerras if (!rtas_root) 166*ab1f9dacSPaul Mackerras return -1; 167*ab1f9dacSPaul Mackerras 168*ab1f9dacSPaul Mackerras /* 169*ab1f9dacSPaul Mackerras * this property is 2 32-bit integers, each representing a level of 170*ab1f9dacSPaul Mackerras * depth in the associativity nodes. The first is for an SMP 171*ab1f9dacSPaul Mackerras * configuration (should be all 0's) and the second is for a normal 172*ab1f9dacSPaul Mackerras * NUMA configuration. 173*ab1f9dacSPaul Mackerras */ 174*ab1f9dacSPaul Mackerras ref_points = (unsigned int *)get_property(rtas_root, 175*ab1f9dacSPaul Mackerras "ibm,associativity-reference-points", &len); 176*ab1f9dacSPaul Mackerras 177*ab1f9dacSPaul Mackerras if ((len >= 1) && ref_points) { 178*ab1f9dacSPaul Mackerras depth = ref_points[1]; 179*ab1f9dacSPaul Mackerras } else { 180*ab1f9dacSPaul Mackerras dbg("WARNING: could not find NUMA " 181*ab1f9dacSPaul Mackerras "associativity reference point\n"); 182*ab1f9dacSPaul Mackerras depth = -1; 183*ab1f9dacSPaul Mackerras } 184*ab1f9dacSPaul Mackerras of_node_put(rtas_root); 185*ab1f9dacSPaul Mackerras 186*ab1f9dacSPaul Mackerras return depth; 187*ab1f9dacSPaul Mackerras } 188*ab1f9dacSPaul Mackerras 189*ab1f9dacSPaul Mackerras static int __init get_mem_addr_cells(void) 190*ab1f9dacSPaul Mackerras { 191*ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 192*ab1f9dacSPaul Mackerras int rc; 193*ab1f9dacSPaul Mackerras 194*ab1f9dacSPaul Mackerras memory = of_find_node_by_type(memory, "memory"); 195*ab1f9dacSPaul Mackerras if (!memory) 196*ab1f9dacSPaul Mackerras return 0; /* it won't matter */ 197*ab1f9dacSPaul Mackerras 198*ab1f9dacSPaul Mackerras rc = prom_n_addr_cells(memory); 199*ab1f9dacSPaul Mackerras return rc; 200*ab1f9dacSPaul Mackerras } 201*ab1f9dacSPaul Mackerras 202*ab1f9dacSPaul Mackerras static int __init get_mem_size_cells(void) 203*ab1f9dacSPaul Mackerras { 204*ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 205*ab1f9dacSPaul Mackerras int rc; 206*ab1f9dacSPaul Mackerras 207*ab1f9dacSPaul Mackerras memory = of_find_node_by_type(memory, "memory"); 208*ab1f9dacSPaul Mackerras if (!memory) 209*ab1f9dacSPaul Mackerras return 0; /* it won't matter */ 210*ab1f9dacSPaul Mackerras rc = prom_n_size_cells(memory); 211*ab1f9dacSPaul Mackerras return rc; 212*ab1f9dacSPaul Mackerras } 213*ab1f9dacSPaul Mackerras 214*ab1f9dacSPaul Mackerras static unsigned long read_n_cells(int n, unsigned int **buf) 215*ab1f9dacSPaul Mackerras { 216*ab1f9dacSPaul Mackerras unsigned long result = 0; 217*ab1f9dacSPaul Mackerras 218*ab1f9dacSPaul Mackerras while (n--) { 219*ab1f9dacSPaul Mackerras result = (result << 32) | **buf; 220*ab1f9dacSPaul Mackerras (*buf)++; 221*ab1f9dacSPaul Mackerras } 222*ab1f9dacSPaul Mackerras return result; 223*ab1f9dacSPaul Mackerras } 224*ab1f9dacSPaul Mackerras 225*ab1f9dacSPaul Mackerras /* 226*ab1f9dacSPaul Mackerras * Figure out to which domain a cpu belongs and stick it there. 227*ab1f9dacSPaul Mackerras * Return the id of the domain used. 228*ab1f9dacSPaul Mackerras */ 229*ab1f9dacSPaul Mackerras static int numa_setup_cpu(unsigned long lcpu) 230*ab1f9dacSPaul Mackerras { 231*ab1f9dacSPaul Mackerras int numa_domain = 0; 232*ab1f9dacSPaul Mackerras struct device_node *cpu = find_cpu_node(lcpu); 233*ab1f9dacSPaul Mackerras 234*ab1f9dacSPaul Mackerras if (!cpu) { 235*ab1f9dacSPaul Mackerras WARN_ON(1); 236*ab1f9dacSPaul Mackerras goto out; 237*ab1f9dacSPaul Mackerras } 238*ab1f9dacSPaul Mackerras 239*ab1f9dacSPaul Mackerras numa_domain = of_node_numa_domain(cpu); 240*ab1f9dacSPaul Mackerras 241*ab1f9dacSPaul Mackerras if (numa_domain >= num_online_nodes()) { 242*ab1f9dacSPaul Mackerras /* 243*ab1f9dacSPaul Mackerras * POWER4 LPAR uses 0xffff as invalid node, 244*ab1f9dacSPaul Mackerras * dont warn in this case. 245*ab1f9dacSPaul Mackerras */ 246*ab1f9dacSPaul Mackerras if (numa_domain != 0xffff) 247*ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: cpu %ld " 248*ab1f9dacSPaul Mackerras "maps to invalid NUMA node %d\n", 249*ab1f9dacSPaul Mackerras lcpu, numa_domain); 250*ab1f9dacSPaul Mackerras numa_domain = 0; 251*ab1f9dacSPaul Mackerras } 252*ab1f9dacSPaul Mackerras out: 253*ab1f9dacSPaul Mackerras node_set_online(numa_domain); 254*ab1f9dacSPaul Mackerras 255*ab1f9dacSPaul Mackerras map_cpu_to_node(lcpu, numa_domain); 256*ab1f9dacSPaul Mackerras 257*ab1f9dacSPaul Mackerras of_node_put(cpu); 258*ab1f9dacSPaul Mackerras 259*ab1f9dacSPaul Mackerras return numa_domain; 260*ab1f9dacSPaul Mackerras } 261*ab1f9dacSPaul Mackerras 262*ab1f9dacSPaul Mackerras static int cpu_numa_callback(struct notifier_block *nfb, 263*ab1f9dacSPaul Mackerras unsigned long action, 264*ab1f9dacSPaul Mackerras void *hcpu) 265*ab1f9dacSPaul Mackerras { 266*ab1f9dacSPaul Mackerras unsigned long lcpu = (unsigned long)hcpu; 267*ab1f9dacSPaul Mackerras int ret = NOTIFY_DONE; 268*ab1f9dacSPaul Mackerras 269*ab1f9dacSPaul Mackerras switch (action) { 270*ab1f9dacSPaul Mackerras case CPU_UP_PREPARE: 271*ab1f9dacSPaul Mackerras if (min_common_depth == -1 || !numa_enabled) 272*ab1f9dacSPaul Mackerras map_cpu_to_node(lcpu, 0); 273*ab1f9dacSPaul Mackerras else 274*ab1f9dacSPaul Mackerras numa_setup_cpu(lcpu); 275*ab1f9dacSPaul Mackerras ret = NOTIFY_OK; 276*ab1f9dacSPaul Mackerras break; 277*ab1f9dacSPaul Mackerras #ifdef CONFIG_HOTPLUG_CPU 278*ab1f9dacSPaul Mackerras case CPU_DEAD: 279*ab1f9dacSPaul Mackerras case CPU_UP_CANCELED: 280*ab1f9dacSPaul Mackerras unmap_cpu_from_node(lcpu); 281*ab1f9dacSPaul Mackerras break; 282*ab1f9dacSPaul Mackerras ret = NOTIFY_OK; 283*ab1f9dacSPaul Mackerras #endif 284*ab1f9dacSPaul Mackerras } 285*ab1f9dacSPaul Mackerras return ret; 286*ab1f9dacSPaul Mackerras } 287*ab1f9dacSPaul Mackerras 288*ab1f9dacSPaul Mackerras /* 289*ab1f9dacSPaul Mackerras * Check and possibly modify a memory region to enforce the memory limit. 290*ab1f9dacSPaul Mackerras * 291*ab1f9dacSPaul Mackerras * Returns the size the region should have to enforce the memory limit. 292*ab1f9dacSPaul Mackerras * This will either be the original value of size, a truncated value, 293*ab1f9dacSPaul Mackerras * or zero. If the returned value of size is 0 the region should be 294*ab1f9dacSPaul Mackerras * discarded as it lies wholy above the memory limit. 295*ab1f9dacSPaul Mackerras */ 296*ab1f9dacSPaul Mackerras static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size) 297*ab1f9dacSPaul Mackerras { 298*ab1f9dacSPaul Mackerras /* 299*ab1f9dacSPaul Mackerras * We use lmb_end_of_DRAM() in here instead of memory_limit because 300*ab1f9dacSPaul Mackerras * we've already adjusted it for the limit and it takes care of 301*ab1f9dacSPaul Mackerras * having memory holes below the limit. 302*ab1f9dacSPaul Mackerras */ 303*ab1f9dacSPaul Mackerras extern unsigned long memory_limit; 304*ab1f9dacSPaul Mackerras 305*ab1f9dacSPaul Mackerras if (! memory_limit) 306*ab1f9dacSPaul Mackerras return size; 307*ab1f9dacSPaul Mackerras 308*ab1f9dacSPaul Mackerras if (start + size <= lmb_end_of_DRAM()) 309*ab1f9dacSPaul Mackerras return size; 310*ab1f9dacSPaul Mackerras 311*ab1f9dacSPaul Mackerras if (start >= lmb_end_of_DRAM()) 312*ab1f9dacSPaul Mackerras return 0; 313*ab1f9dacSPaul Mackerras 314*ab1f9dacSPaul Mackerras return lmb_end_of_DRAM() - start; 315*ab1f9dacSPaul Mackerras } 316*ab1f9dacSPaul Mackerras 317*ab1f9dacSPaul Mackerras static int __init parse_numa_properties(void) 318*ab1f9dacSPaul Mackerras { 319*ab1f9dacSPaul Mackerras struct device_node *cpu = NULL; 320*ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 321*ab1f9dacSPaul Mackerras int addr_cells, size_cells; 322*ab1f9dacSPaul Mackerras int max_domain = 0; 323*ab1f9dacSPaul Mackerras long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT; 324*ab1f9dacSPaul Mackerras unsigned long i; 325*ab1f9dacSPaul Mackerras 326*ab1f9dacSPaul Mackerras if (numa_enabled == 0) { 327*ab1f9dacSPaul Mackerras printk(KERN_WARNING "NUMA disabled by user\n"); 328*ab1f9dacSPaul Mackerras return -1; 329*ab1f9dacSPaul Mackerras } 330*ab1f9dacSPaul Mackerras 331*ab1f9dacSPaul Mackerras numa_memory_lookup_table = 332*ab1f9dacSPaul Mackerras (char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1)); 333*ab1f9dacSPaul Mackerras memset(numa_memory_lookup_table, 0, entries * sizeof(char)); 334*ab1f9dacSPaul Mackerras 335*ab1f9dacSPaul Mackerras for (i = 0; i < entries ; i++) 336*ab1f9dacSPaul Mackerras numa_memory_lookup_table[i] = ARRAY_INITIALISER; 337*ab1f9dacSPaul Mackerras 338*ab1f9dacSPaul Mackerras min_common_depth = find_min_common_depth(); 339*ab1f9dacSPaul Mackerras 340*ab1f9dacSPaul Mackerras dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 341*ab1f9dacSPaul Mackerras if (min_common_depth < 0) 342*ab1f9dacSPaul Mackerras return min_common_depth; 343*ab1f9dacSPaul Mackerras 344*ab1f9dacSPaul Mackerras max_domain = numa_setup_cpu(boot_cpuid); 345*ab1f9dacSPaul Mackerras 346*ab1f9dacSPaul Mackerras /* 347*ab1f9dacSPaul Mackerras * Even though we connect cpus to numa domains later in SMP init, 348*ab1f9dacSPaul Mackerras * we need to know the maximum node id now. This is because each 349*ab1f9dacSPaul Mackerras * node id must have NODE_DATA etc backing it. 350*ab1f9dacSPaul Mackerras * As a result of hotplug we could still have cpus appear later on 351*ab1f9dacSPaul Mackerras * with larger node ids. In that case we force the cpu into node 0. 352*ab1f9dacSPaul Mackerras */ 353*ab1f9dacSPaul Mackerras for_each_cpu(i) { 354*ab1f9dacSPaul Mackerras int numa_domain; 355*ab1f9dacSPaul Mackerras 356*ab1f9dacSPaul Mackerras cpu = find_cpu_node(i); 357*ab1f9dacSPaul Mackerras 358*ab1f9dacSPaul Mackerras if (cpu) { 359*ab1f9dacSPaul Mackerras numa_domain = of_node_numa_domain(cpu); 360*ab1f9dacSPaul Mackerras of_node_put(cpu); 361*ab1f9dacSPaul Mackerras 362*ab1f9dacSPaul Mackerras if (numa_domain < MAX_NUMNODES && 363*ab1f9dacSPaul Mackerras max_domain < numa_domain) 364*ab1f9dacSPaul Mackerras max_domain = numa_domain; 365*ab1f9dacSPaul Mackerras } 366*ab1f9dacSPaul Mackerras } 367*ab1f9dacSPaul Mackerras 368*ab1f9dacSPaul Mackerras addr_cells = get_mem_addr_cells(); 369*ab1f9dacSPaul Mackerras size_cells = get_mem_size_cells(); 370*ab1f9dacSPaul Mackerras memory = NULL; 371*ab1f9dacSPaul Mackerras while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 372*ab1f9dacSPaul Mackerras unsigned long start; 373*ab1f9dacSPaul Mackerras unsigned long size; 374*ab1f9dacSPaul Mackerras int numa_domain; 375*ab1f9dacSPaul Mackerras int ranges; 376*ab1f9dacSPaul Mackerras unsigned int *memcell_buf; 377*ab1f9dacSPaul Mackerras unsigned int len; 378*ab1f9dacSPaul Mackerras 379*ab1f9dacSPaul Mackerras memcell_buf = (unsigned int *)get_property(memory, "reg", &len); 380*ab1f9dacSPaul Mackerras if (!memcell_buf || len <= 0) 381*ab1f9dacSPaul Mackerras continue; 382*ab1f9dacSPaul Mackerras 383*ab1f9dacSPaul Mackerras ranges = memory->n_addrs; 384*ab1f9dacSPaul Mackerras new_range: 385*ab1f9dacSPaul Mackerras /* these are order-sensitive, and modify the buffer pointer */ 386*ab1f9dacSPaul Mackerras start = read_n_cells(addr_cells, &memcell_buf); 387*ab1f9dacSPaul Mackerras size = read_n_cells(size_cells, &memcell_buf); 388*ab1f9dacSPaul Mackerras 389*ab1f9dacSPaul Mackerras start = _ALIGN_DOWN(start, MEMORY_INCREMENT); 390*ab1f9dacSPaul Mackerras size = _ALIGN_UP(size, MEMORY_INCREMENT); 391*ab1f9dacSPaul Mackerras 392*ab1f9dacSPaul Mackerras numa_domain = of_node_numa_domain(memory); 393*ab1f9dacSPaul Mackerras 394*ab1f9dacSPaul Mackerras if (numa_domain >= MAX_NUMNODES) { 395*ab1f9dacSPaul Mackerras if (numa_domain != 0xffff) 396*ab1f9dacSPaul Mackerras printk(KERN_ERR "WARNING: memory at %lx maps " 397*ab1f9dacSPaul Mackerras "to invalid NUMA node %d\n", start, 398*ab1f9dacSPaul Mackerras numa_domain); 399*ab1f9dacSPaul Mackerras numa_domain = 0; 400*ab1f9dacSPaul Mackerras } 401*ab1f9dacSPaul Mackerras 402*ab1f9dacSPaul Mackerras if (max_domain < numa_domain) 403*ab1f9dacSPaul Mackerras max_domain = numa_domain; 404*ab1f9dacSPaul Mackerras 405*ab1f9dacSPaul Mackerras if (! (size = numa_enforce_memory_limit(start, size))) { 406*ab1f9dacSPaul Mackerras if (--ranges) 407*ab1f9dacSPaul Mackerras goto new_range; 408*ab1f9dacSPaul Mackerras else 409*ab1f9dacSPaul Mackerras continue; 410*ab1f9dacSPaul Mackerras } 411*ab1f9dacSPaul Mackerras 412*ab1f9dacSPaul Mackerras /* 413*ab1f9dacSPaul Mackerras * Initialize new node struct, or add to an existing one. 414*ab1f9dacSPaul Mackerras */ 415*ab1f9dacSPaul Mackerras if (init_node_data[numa_domain].node_end_pfn) { 416*ab1f9dacSPaul Mackerras if ((start / PAGE_SIZE) < 417*ab1f9dacSPaul Mackerras init_node_data[numa_domain].node_start_pfn) 418*ab1f9dacSPaul Mackerras init_node_data[numa_domain].node_start_pfn = 419*ab1f9dacSPaul Mackerras start / PAGE_SIZE; 420*ab1f9dacSPaul Mackerras if (((start / PAGE_SIZE) + (size / PAGE_SIZE)) > 421*ab1f9dacSPaul Mackerras init_node_data[numa_domain].node_end_pfn) 422*ab1f9dacSPaul Mackerras init_node_data[numa_domain].node_end_pfn = 423*ab1f9dacSPaul Mackerras (start / PAGE_SIZE) + 424*ab1f9dacSPaul Mackerras (size / PAGE_SIZE); 425*ab1f9dacSPaul Mackerras 426*ab1f9dacSPaul Mackerras init_node_data[numa_domain].node_present_pages += 427*ab1f9dacSPaul Mackerras size / PAGE_SIZE; 428*ab1f9dacSPaul Mackerras } else { 429*ab1f9dacSPaul Mackerras node_set_online(numa_domain); 430*ab1f9dacSPaul Mackerras 431*ab1f9dacSPaul Mackerras init_node_data[numa_domain].node_start_pfn = 432*ab1f9dacSPaul Mackerras start / PAGE_SIZE; 433*ab1f9dacSPaul Mackerras init_node_data[numa_domain].node_end_pfn = 434*ab1f9dacSPaul Mackerras init_node_data[numa_domain].node_start_pfn + 435*ab1f9dacSPaul Mackerras size / PAGE_SIZE; 436*ab1f9dacSPaul Mackerras init_node_data[numa_domain].node_present_pages = 437*ab1f9dacSPaul Mackerras size / PAGE_SIZE; 438*ab1f9dacSPaul Mackerras } 439*ab1f9dacSPaul Mackerras 440*ab1f9dacSPaul Mackerras for (i = start ; i < (start+size); i += MEMORY_INCREMENT) 441*ab1f9dacSPaul Mackerras numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 442*ab1f9dacSPaul Mackerras numa_domain; 443*ab1f9dacSPaul Mackerras 444*ab1f9dacSPaul Mackerras if (--ranges) 445*ab1f9dacSPaul Mackerras goto new_range; 446*ab1f9dacSPaul Mackerras } 447*ab1f9dacSPaul Mackerras 448*ab1f9dacSPaul Mackerras for (i = 0; i <= max_domain; i++) 449*ab1f9dacSPaul Mackerras node_set_online(i); 450*ab1f9dacSPaul Mackerras 451*ab1f9dacSPaul Mackerras return 0; 452*ab1f9dacSPaul Mackerras } 453*ab1f9dacSPaul Mackerras 454*ab1f9dacSPaul Mackerras static void __init setup_nonnuma(void) 455*ab1f9dacSPaul Mackerras { 456*ab1f9dacSPaul Mackerras unsigned long top_of_ram = lmb_end_of_DRAM(); 457*ab1f9dacSPaul Mackerras unsigned long total_ram = lmb_phys_mem_size(); 458*ab1f9dacSPaul Mackerras unsigned long i; 459*ab1f9dacSPaul Mackerras 460*ab1f9dacSPaul Mackerras printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 461*ab1f9dacSPaul Mackerras top_of_ram, total_ram); 462*ab1f9dacSPaul Mackerras printk(KERN_INFO "Memory hole size: %ldMB\n", 463*ab1f9dacSPaul Mackerras (top_of_ram - total_ram) >> 20); 464*ab1f9dacSPaul Mackerras 465*ab1f9dacSPaul Mackerras if (!numa_memory_lookup_table) { 466*ab1f9dacSPaul Mackerras long entries = top_of_ram >> MEMORY_INCREMENT_SHIFT; 467*ab1f9dacSPaul Mackerras numa_memory_lookup_table = 468*ab1f9dacSPaul Mackerras (char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1)); 469*ab1f9dacSPaul Mackerras memset(numa_memory_lookup_table, 0, entries * sizeof(char)); 470*ab1f9dacSPaul Mackerras for (i = 0; i < entries ; i++) 471*ab1f9dacSPaul Mackerras numa_memory_lookup_table[i] = ARRAY_INITIALISER; 472*ab1f9dacSPaul Mackerras } 473*ab1f9dacSPaul Mackerras 474*ab1f9dacSPaul Mackerras map_cpu_to_node(boot_cpuid, 0); 475*ab1f9dacSPaul Mackerras 476*ab1f9dacSPaul Mackerras node_set_online(0); 477*ab1f9dacSPaul Mackerras 478*ab1f9dacSPaul Mackerras init_node_data[0].node_start_pfn = 0; 479*ab1f9dacSPaul Mackerras init_node_data[0].node_end_pfn = lmb_end_of_DRAM() / PAGE_SIZE; 480*ab1f9dacSPaul Mackerras init_node_data[0].node_present_pages = total_ram / PAGE_SIZE; 481*ab1f9dacSPaul Mackerras 482*ab1f9dacSPaul Mackerras for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) 483*ab1f9dacSPaul Mackerras numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; 484*ab1f9dacSPaul Mackerras } 485*ab1f9dacSPaul Mackerras 486*ab1f9dacSPaul Mackerras static void __init dump_numa_topology(void) 487*ab1f9dacSPaul Mackerras { 488*ab1f9dacSPaul Mackerras unsigned int node; 489*ab1f9dacSPaul Mackerras unsigned int count; 490*ab1f9dacSPaul Mackerras 491*ab1f9dacSPaul Mackerras if (min_common_depth == -1 || !numa_enabled) 492*ab1f9dacSPaul Mackerras return; 493*ab1f9dacSPaul Mackerras 494*ab1f9dacSPaul Mackerras for_each_online_node(node) { 495*ab1f9dacSPaul Mackerras unsigned long i; 496*ab1f9dacSPaul Mackerras 497*ab1f9dacSPaul Mackerras printk(KERN_INFO "Node %d Memory:", node); 498*ab1f9dacSPaul Mackerras 499*ab1f9dacSPaul Mackerras count = 0; 500*ab1f9dacSPaul Mackerras 501*ab1f9dacSPaul Mackerras for (i = 0; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) { 502*ab1f9dacSPaul Mackerras if (numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] == node) { 503*ab1f9dacSPaul Mackerras if (count == 0) 504*ab1f9dacSPaul Mackerras printk(" 0x%lx", i); 505*ab1f9dacSPaul Mackerras ++count; 506*ab1f9dacSPaul Mackerras } else { 507*ab1f9dacSPaul Mackerras if (count > 0) 508*ab1f9dacSPaul Mackerras printk("-0x%lx", i); 509*ab1f9dacSPaul Mackerras count = 0; 510*ab1f9dacSPaul Mackerras } 511*ab1f9dacSPaul Mackerras } 512*ab1f9dacSPaul Mackerras 513*ab1f9dacSPaul Mackerras if (count > 0) 514*ab1f9dacSPaul Mackerras printk("-0x%lx", i); 515*ab1f9dacSPaul Mackerras printk("\n"); 516*ab1f9dacSPaul Mackerras } 517*ab1f9dacSPaul Mackerras return; 518*ab1f9dacSPaul Mackerras } 519*ab1f9dacSPaul Mackerras 520*ab1f9dacSPaul Mackerras /* 521*ab1f9dacSPaul Mackerras * Allocate some memory, satisfying the lmb or bootmem allocator where 522*ab1f9dacSPaul Mackerras * required. nid is the preferred node and end is the physical address of 523*ab1f9dacSPaul Mackerras * the highest address in the node. 524*ab1f9dacSPaul Mackerras * 525*ab1f9dacSPaul Mackerras * Returns the physical address of the memory. 526*ab1f9dacSPaul Mackerras */ 527*ab1f9dacSPaul Mackerras static unsigned long careful_allocation(int nid, unsigned long size, 528*ab1f9dacSPaul Mackerras unsigned long align, unsigned long end) 529*ab1f9dacSPaul Mackerras { 530*ab1f9dacSPaul Mackerras unsigned long ret = lmb_alloc_base(size, align, end); 531*ab1f9dacSPaul Mackerras 532*ab1f9dacSPaul Mackerras /* retry over all memory */ 533*ab1f9dacSPaul Mackerras if (!ret) 534*ab1f9dacSPaul Mackerras ret = lmb_alloc_base(size, align, lmb_end_of_DRAM()); 535*ab1f9dacSPaul Mackerras 536*ab1f9dacSPaul Mackerras if (!ret) 537*ab1f9dacSPaul Mackerras panic("numa.c: cannot allocate %lu bytes on node %d", 538*ab1f9dacSPaul Mackerras size, nid); 539*ab1f9dacSPaul Mackerras 540*ab1f9dacSPaul Mackerras /* 541*ab1f9dacSPaul Mackerras * If the memory came from a previously allocated node, we must 542*ab1f9dacSPaul Mackerras * retry with the bootmem allocator. 543*ab1f9dacSPaul Mackerras */ 544*ab1f9dacSPaul Mackerras if (pa_to_nid(ret) < nid) { 545*ab1f9dacSPaul Mackerras nid = pa_to_nid(ret); 546*ab1f9dacSPaul Mackerras ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(nid), 547*ab1f9dacSPaul Mackerras size, align, 0); 548*ab1f9dacSPaul Mackerras 549*ab1f9dacSPaul Mackerras if (!ret) 550*ab1f9dacSPaul Mackerras panic("numa.c: cannot allocate %lu bytes on node %d", 551*ab1f9dacSPaul Mackerras size, nid); 552*ab1f9dacSPaul Mackerras 553*ab1f9dacSPaul Mackerras ret = virt_to_abs(ret); 554*ab1f9dacSPaul Mackerras 555*ab1f9dacSPaul Mackerras dbg("alloc_bootmem %lx %lx\n", ret, size); 556*ab1f9dacSPaul Mackerras } 557*ab1f9dacSPaul Mackerras 558*ab1f9dacSPaul Mackerras return ret; 559*ab1f9dacSPaul Mackerras } 560*ab1f9dacSPaul Mackerras 561*ab1f9dacSPaul Mackerras void __init do_init_bootmem(void) 562*ab1f9dacSPaul Mackerras { 563*ab1f9dacSPaul Mackerras int nid; 564*ab1f9dacSPaul Mackerras int addr_cells, size_cells; 565*ab1f9dacSPaul Mackerras struct device_node *memory = NULL; 566*ab1f9dacSPaul Mackerras static struct notifier_block ppc64_numa_nb = { 567*ab1f9dacSPaul Mackerras .notifier_call = cpu_numa_callback, 568*ab1f9dacSPaul Mackerras .priority = 1 /* Must run before sched domains notifier. */ 569*ab1f9dacSPaul Mackerras }; 570*ab1f9dacSPaul Mackerras 571*ab1f9dacSPaul Mackerras min_low_pfn = 0; 572*ab1f9dacSPaul Mackerras max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 573*ab1f9dacSPaul Mackerras max_pfn = max_low_pfn; 574*ab1f9dacSPaul Mackerras 575*ab1f9dacSPaul Mackerras if (parse_numa_properties()) 576*ab1f9dacSPaul Mackerras setup_nonnuma(); 577*ab1f9dacSPaul Mackerras else 578*ab1f9dacSPaul Mackerras dump_numa_topology(); 579*ab1f9dacSPaul Mackerras 580*ab1f9dacSPaul Mackerras register_cpu_notifier(&ppc64_numa_nb); 581*ab1f9dacSPaul Mackerras 582*ab1f9dacSPaul Mackerras for_each_online_node(nid) { 583*ab1f9dacSPaul Mackerras unsigned long start_paddr, end_paddr; 584*ab1f9dacSPaul Mackerras int i; 585*ab1f9dacSPaul Mackerras unsigned long bootmem_paddr; 586*ab1f9dacSPaul Mackerras unsigned long bootmap_pages; 587*ab1f9dacSPaul Mackerras 588*ab1f9dacSPaul Mackerras start_paddr = init_node_data[nid].node_start_pfn * PAGE_SIZE; 589*ab1f9dacSPaul Mackerras end_paddr = init_node_data[nid].node_end_pfn * PAGE_SIZE; 590*ab1f9dacSPaul Mackerras 591*ab1f9dacSPaul Mackerras /* Allocate the node structure node local if possible */ 592*ab1f9dacSPaul Mackerras NODE_DATA(nid) = (struct pglist_data *)careful_allocation(nid, 593*ab1f9dacSPaul Mackerras sizeof(struct pglist_data), 594*ab1f9dacSPaul Mackerras SMP_CACHE_BYTES, end_paddr); 595*ab1f9dacSPaul Mackerras NODE_DATA(nid) = abs_to_virt(NODE_DATA(nid)); 596*ab1f9dacSPaul Mackerras memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 597*ab1f9dacSPaul Mackerras 598*ab1f9dacSPaul Mackerras dbg("node %d\n", nid); 599*ab1f9dacSPaul Mackerras dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); 600*ab1f9dacSPaul Mackerras 601*ab1f9dacSPaul Mackerras NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; 602*ab1f9dacSPaul Mackerras NODE_DATA(nid)->node_start_pfn = 603*ab1f9dacSPaul Mackerras init_node_data[nid].node_start_pfn; 604*ab1f9dacSPaul Mackerras NODE_DATA(nid)->node_spanned_pages = 605*ab1f9dacSPaul Mackerras end_paddr - start_paddr; 606*ab1f9dacSPaul Mackerras 607*ab1f9dacSPaul Mackerras if (NODE_DATA(nid)->node_spanned_pages == 0) 608*ab1f9dacSPaul Mackerras continue; 609*ab1f9dacSPaul Mackerras 610*ab1f9dacSPaul Mackerras dbg("start_paddr = %lx\n", start_paddr); 611*ab1f9dacSPaul Mackerras dbg("end_paddr = %lx\n", end_paddr); 612*ab1f9dacSPaul Mackerras 613*ab1f9dacSPaul Mackerras bootmap_pages = bootmem_bootmap_pages((end_paddr - start_paddr) >> PAGE_SHIFT); 614*ab1f9dacSPaul Mackerras 615*ab1f9dacSPaul Mackerras bootmem_paddr = careful_allocation(nid, 616*ab1f9dacSPaul Mackerras bootmap_pages << PAGE_SHIFT, 617*ab1f9dacSPaul Mackerras PAGE_SIZE, end_paddr); 618*ab1f9dacSPaul Mackerras memset(abs_to_virt(bootmem_paddr), 0, 619*ab1f9dacSPaul Mackerras bootmap_pages << PAGE_SHIFT); 620*ab1f9dacSPaul Mackerras dbg("bootmap_paddr = %lx\n", bootmem_paddr); 621*ab1f9dacSPaul Mackerras 622*ab1f9dacSPaul Mackerras init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, 623*ab1f9dacSPaul Mackerras start_paddr >> PAGE_SHIFT, 624*ab1f9dacSPaul Mackerras end_paddr >> PAGE_SHIFT); 625*ab1f9dacSPaul Mackerras 626*ab1f9dacSPaul Mackerras /* 627*ab1f9dacSPaul Mackerras * We need to do another scan of all memory sections to 628*ab1f9dacSPaul Mackerras * associate memory with the correct node. 629*ab1f9dacSPaul Mackerras */ 630*ab1f9dacSPaul Mackerras addr_cells = get_mem_addr_cells(); 631*ab1f9dacSPaul Mackerras size_cells = get_mem_size_cells(); 632*ab1f9dacSPaul Mackerras memory = NULL; 633*ab1f9dacSPaul Mackerras while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 634*ab1f9dacSPaul Mackerras unsigned long mem_start, mem_size; 635*ab1f9dacSPaul Mackerras int numa_domain, ranges; 636*ab1f9dacSPaul Mackerras unsigned int *memcell_buf; 637*ab1f9dacSPaul Mackerras unsigned int len; 638*ab1f9dacSPaul Mackerras 639*ab1f9dacSPaul Mackerras memcell_buf = (unsigned int *)get_property(memory, "reg", &len); 640*ab1f9dacSPaul Mackerras if (!memcell_buf || len <= 0) 641*ab1f9dacSPaul Mackerras continue; 642*ab1f9dacSPaul Mackerras 643*ab1f9dacSPaul Mackerras ranges = memory->n_addrs; /* ranges in cell */ 644*ab1f9dacSPaul Mackerras new_range: 645*ab1f9dacSPaul Mackerras mem_start = read_n_cells(addr_cells, &memcell_buf); 646*ab1f9dacSPaul Mackerras mem_size = read_n_cells(size_cells, &memcell_buf); 647*ab1f9dacSPaul Mackerras if (numa_enabled) { 648*ab1f9dacSPaul Mackerras numa_domain = of_node_numa_domain(memory); 649*ab1f9dacSPaul Mackerras if (numa_domain >= MAX_NUMNODES) 650*ab1f9dacSPaul Mackerras numa_domain = 0; 651*ab1f9dacSPaul Mackerras } else 652*ab1f9dacSPaul Mackerras numa_domain = 0; 653*ab1f9dacSPaul Mackerras 654*ab1f9dacSPaul Mackerras if (numa_domain != nid) 655*ab1f9dacSPaul Mackerras continue; 656*ab1f9dacSPaul Mackerras 657*ab1f9dacSPaul Mackerras mem_size = numa_enforce_memory_limit(mem_start, mem_size); 658*ab1f9dacSPaul Mackerras if (mem_size) { 659*ab1f9dacSPaul Mackerras dbg("free_bootmem %lx %lx\n", mem_start, mem_size); 660*ab1f9dacSPaul Mackerras free_bootmem_node(NODE_DATA(nid), mem_start, mem_size); 661*ab1f9dacSPaul Mackerras } 662*ab1f9dacSPaul Mackerras 663*ab1f9dacSPaul Mackerras if (--ranges) /* process all ranges in cell */ 664*ab1f9dacSPaul Mackerras goto new_range; 665*ab1f9dacSPaul Mackerras } 666*ab1f9dacSPaul Mackerras 667*ab1f9dacSPaul Mackerras /* 668*ab1f9dacSPaul Mackerras * Mark reserved regions on this node 669*ab1f9dacSPaul Mackerras */ 670*ab1f9dacSPaul Mackerras for (i = 0; i < lmb.reserved.cnt; i++) { 671*ab1f9dacSPaul Mackerras unsigned long physbase = lmb.reserved.region[i].base; 672*ab1f9dacSPaul Mackerras unsigned long size = lmb.reserved.region[i].size; 673*ab1f9dacSPaul Mackerras 674*ab1f9dacSPaul Mackerras if (pa_to_nid(physbase) != nid && 675*ab1f9dacSPaul Mackerras pa_to_nid(physbase+size-1) != nid) 676*ab1f9dacSPaul Mackerras continue; 677*ab1f9dacSPaul Mackerras 678*ab1f9dacSPaul Mackerras if (physbase < end_paddr && 679*ab1f9dacSPaul Mackerras (physbase+size) > start_paddr) { 680*ab1f9dacSPaul Mackerras /* overlaps */ 681*ab1f9dacSPaul Mackerras if (physbase < start_paddr) { 682*ab1f9dacSPaul Mackerras size -= start_paddr - physbase; 683*ab1f9dacSPaul Mackerras physbase = start_paddr; 684*ab1f9dacSPaul Mackerras } 685*ab1f9dacSPaul Mackerras 686*ab1f9dacSPaul Mackerras if (size > end_paddr - physbase) 687*ab1f9dacSPaul Mackerras size = end_paddr - physbase; 688*ab1f9dacSPaul Mackerras 689*ab1f9dacSPaul Mackerras dbg("reserve_bootmem %lx %lx\n", physbase, 690*ab1f9dacSPaul Mackerras size); 691*ab1f9dacSPaul Mackerras reserve_bootmem_node(NODE_DATA(nid), physbase, 692*ab1f9dacSPaul Mackerras size); 693*ab1f9dacSPaul Mackerras } 694*ab1f9dacSPaul Mackerras } 695*ab1f9dacSPaul Mackerras /* 696*ab1f9dacSPaul Mackerras * This loop may look famaliar, but we have to do it again 697*ab1f9dacSPaul Mackerras * after marking our reserved memory to mark memory present 698*ab1f9dacSPaul Mackerras * for sparsemem. 699*ab1f9dacSPaul Mackerras */ 700*ab1f9dacSPaul Mackerras addr_cells = get_mem_addr_cells(); 701*ab1f9dacSPaul Mackerras size_cells = get_mem_size_cells(); 702*ab1f9dacSPaul Mackerras memory = NULL; 703*ab1f9dacSPaul Mackerras while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 704*ab1f9dacSPaul Mackerras unsigned long mem_start, mem_size; 705*ab1f9dacSPaul Mackerras int numa_domain, ranges; 706*ab1f9dacSPaul Mackerras unsigned int *memcell_buf; 707*ab1f9dacSPaul Mackerras unsigned int len; 708*ab1f9dacSPaul Mackerras 709*ab1f9dacSPaul Mackerras memcell_buf = (unsigned int *)get_property(memory, "reg", &len); 710*ab1f9dacSPaul Mackerras if (!memcell_buf || len <= 0) 711*ab1f9dacSPaul Mackerras continue; 712*ab1f9dacSPaul Mackerras 713*ab1f9dacSPaul Mackerras ranges = memory->n_addrs; /* ranges in cell */ 714*ab1f9dacSPaul Mackerras new_range2: 715*ab1f9dacSPaul Mackerras mem_start = read_n_cells(addr_cells, &memcell_buf); 716*ab1f9dacSPaul Mackerras mem_size = read_n_cells(size_cells, &memcell_buf); 717*ab1f9dacSPaul Mackerras if (numa_enabled) { 718*ab1f9dacSPaul Mackerras numa_domain = of_node_numa_domain(memory); 719*ab1f9dacSPaul Mackerras if (numa_domain >= MAX_NUMNODES) 720*ab1f9dacSPaul Mackerras numa_domain = 0; 721*ab1f9dacSPaul Mackerras } else 722*ab1f9dacSPaul Mackerras numa_domain = 0; 723*ab1f9dacSPaul Mackerras 724*ab1f9dacSPaul Mackerras if (numa_domain != nid) 725*ab1f9dacSPaul Mackerras continue; 726*ab1f9dacSPaul Mackerras 727*ab1f9dacSPaul Mackerras mem_size = numa_enforce_memory_limit(mem_start, mem_size); 728*ab1f9dacSPaul Mackerras memory_present(numa_domain, mem_start >> PAGE_SHIFT, 729*ab1f9dacSPaul Mackerras (mem_start + mem_size) >> PAGE_SHIFT); 730*ab1f9dacSPaul Mackerras 731*ab1f9dacSPaul Mackerras if (--ranges) /* process all ranges in cell */ 732*ab1f9dacSPaul Mackerras goto new_range2; 733*ab1f9dacSPaul Mackerras } 734*ab1f9dacSPaul Mackerras 735*ab1f9dacSPaul Mackerras } 736*ab1f9dacSPaul Mackerras } 737*ab1f9dacSPaul Mackerras 738*ab1f9dacSPaul Mackerras void __init paging_init(void) 739*ab1f9dacSPaul Mackerras { 740*ab1f9dacSPaul Mackerras unsigned long zones_size[MAX_NR_ZONES]; 741*ab1f9dacSPaul Mackerras unsigned long zholes_size[MAX_NR_ZONES]; 742*ab1f9dacSPaul Mackerras int nid; 743*ab1f9dacSPaul Mackerras 744*ab1f9dacSPaul Mackerras memset(zones_size, 0, sizeof(zones_size)); 745*ab1f9dacSPaul Mackerras memset(zholes_size, 0, sizeof(zholes_size)); 746*ab1f9dacSPaul Mackerras 747*ab1f9dacSPaul Mackerras for_each_online_node(nid) { 748*ab1f9dacSPaul Mackerras unsigned long start_pfn; 749*ab1f9dacSPaul Mackerras unsigned long end_pfn; 750*ab1f9dacSPaul Mackerras 751*ab1f9dacSPaul Mackerras start_pfn = init_node_data[nid].node_start_pfn; 752*ab1f9dacSPaul Mackerras end_pfn = init_node_data[nid].node_end_pfn; 753*ab1f9dacSPaul Mackerras 754*ab1f9dacSPaul Mackerras zones_size[ZONE_DMA] = end_pfn - start_pfn; 755*ab1f9dacSPaul Mackerras zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - 756*ab1f9dacSPaul Mackerras init_node_data[nid].node_present_pages; 757*ab1f9dacSPaul Mackerras 758*ab1f9dacSPaul Mackerras dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid, 759*ab1f9dacSPaul Mackerras zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]); 760*ab1f9dacSPaul Mackerras 761*ab1f9dacSPaul Mackerras free_area_init_node(nid, NODE_DATA(nid), zones_size, 762*ab1f9dacSPaul Mackerras start_pfn, zholes_size); 763*ab1f9dacSPaul Mackerras } 764*ab1f9dacSPaul Mackerras } 765*ab1f9dacSPaul Mackerras 766*ab1f9dacSPaul Mackerras static int __init early_numa(char *p) 767*ab1f9dacSPaul Mackerras { 768*ab1f9dacSPaul Mackerras if (!p) 769*ab1f9dacSPaul Mackerras return 0; 770*ab1f9dacSPaul Mackerras 771*ab1f9dacSPaul Mackerras if (strstr(p, "off")) 772*ab1f9dacSPaul Mackerras numa_enabled = 0; 773*ab1f9dacSPaul Mackerras 774*ab1f9dacSPaul Mackerras if (strstr(p, "debug")) 775*ab1f9dacSPaul Mackerras numa_debug = 1; 776*ab1f9dacSPaul Mackerras 777*ab1f9dacSPaul Mackerras return 0; 778*ab1f9dacSPaul Mackerras } 779*ab1f9dacSPaul Mackerras early_param("numa", early_numa); 780