1 /* 2 * pSeries NUMA support 3 * 4 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 #include <linux/threads.h> 12 #include <linux/bootmem.h> 13 #include <linux/init.h> 14 #include <linux/mm.h> 15 #include <linux/mmzone.h> 16 #include <linux/module.h> 17 #include <linux/nodemask.h> 18 #include <linux/cpu.h> 19 #include <linux/notifier.h> 20 #include <asm/sparsemem.h> 21 #include <asm/lmb.h> 22 #include <asm/system.h> 23 #include <asm/smp.h> 24 25 static int numa_enabled = 1; 26 27 static int numa_debug; 28 #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 29 30 int numa_cpu_lookup_table[NR_CPUS]; 31 cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; 32 struct pglist_data *node_data[MAX_NUMNODES]; 33 34 EXPORT_SYMBOL(numa_cpu_lookup_table); 35 EXPORT_SYMBOL(numa_cpumask_lookup_table); 36 EXPORT_SYMBOL(node_data); 37 38 static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; 39 static int min_common_depth; 40 static int n_mem_addr_cells, n_mem_size_cells; 41 42 /* 43 * We need somewhere to store start/end/node for each region until we have 44 * allocated the real node_data structures. 45 */ 46 #define MAX_REGIONS (MAX_LMB_REGIONS*2) 47 static struct { 48 unsigned long start_pfn; 49 unsigned long end_pfn; 50 int nid; 51 } init_node_data[MAX_REGIONS] __initdata; 52 53 int __init early_pfn_to_nid(unsigned long pfn) 54 { 55 unsigned int i; 56 57 for (i = 0; init_node_data[i].end_pfn; i++) { 58 unsigned long start_pfn = init_node_data[i].start_pfn; 59 unsigned long end_pfn = init_node_data[i].end_pfn; 60 61 if ((start_pfn <= pfn) && (pfn < end_pfn)) 62 return init_node_data[i].nid; 63 } 64 65 return -1; 66 } 67 68 void __init add_region(unsigned int nid, unsigned long start_pfn, 69 unsigned long pages) 70 { 71 unsigned int i; 72 73 dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n", 74 nid, start_pfn, pages); 75 76 for (i = 0; init_node_data[i].end_pfn; i++) { 77 if (init_node_data[i].nid != nid) 78 continue; 79 if (init_node_data[i].end_pfn == start_pfn) { 80 init_node_data[i].end_pfn += pages; 81 return; 82 } 83 if (init_node_data[i].start_pfn == (start_pfn + pages)) { 84 init_node_data[i].start_pfn -= pages; 85 return; 86 } 87 } 88 89 /* 90 * Leave last entry NULL so we dont iterate off the end (we use 91 * entry.end_pfn to terminate the walk). 92 */ 93 if (i >= (MAX_REGIONS - 1)) { 94 printk(KERN_ERR "WARNING: too many memory regions in " 95 "numa code, truncating\n"); 96 return; 97 } 98 99 init_node_data[i].start_pfn = start_pfn; 100 init_node_data[i].end_pfn = start_pfn + pages; 101 init_node_data[i].nid = nid; 102 } 103 104 /* We assume init_node_data has no overlapping regions */ 105 void __init get_region(unsigned int nid, unsigned long *start_pfn, 106 unsigned long *end_pfn, unsigned long *pages_present) 107 { 108 unsigned int i; 109 110 *start_pfn = -1UL; 111 *end_pfn = *pages_present = 0; 112 113 for (i = 0; init_node_data[i].end_pfn; i++) { 114 if (init_node_data[i].nid != nid) 115 continue; 116 117 *pages_present += init_node_data[i].end_pfn - 118 init_node_data[i].start_pfn; 119 120 if (init_node_data[i].start_pfn < *start_pfn) 121 *start_pfn = init_node_data[i].start_pfn; 122 123 if (init_node_data[i].end_pfn > *end_pfn) 124 *end_pfn = init_node_data[i].end_pfn; 125 } 126 127 /* We didnt find a matching region, return start/end as 0 */ 128 if (*start_pfn == -1UL) 129 *start_pfn = 0; 130 } 131 132 static void __cpuinit map_cpu_to_node(int cpu, int node) 133 { 134 numa_cpu_lookup_table[cpu] = node; 135 136 dbg("adding cpu %d to node %d\n", cpu, node); 137 138 if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) 139 cpu_set(cpu, numa_cpumask_lookup_table[node]); 140 } 141 142 #ifdef CONFIG_HOTPLUG_CPU 143 static void unmap_cpu_from_node(unsigned long cpu) 144 { 145 int node = numa_cpu_lookup_table[cpu]; 146 147 dbg("removing cpu %lu from node %d\n", cpu, node); 148 149 if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 150 cpu_clear(cpu, numa_cpumask_lookup_table[node]); 151 } else { 152 printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 153 cpu, node); 154 } 155 } 156 #endif /* CONFIG_HOTPLUG_CPU */ 157 158 static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) 159 { 160 unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); 161 struct device_node *cpu_node = NULL; 162 unsigned int *interrupt_server, *reg; 163 int len; 164 165 while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { 166 /* Try interrupt server first */ 167 interrupt_server = (unsigned int *)get_property(cpu_node, 168 "ibm,ppc-interrupt-server#s", &len); 169 170 len = len / sizeof(u32); 171 172 if (interrupt_server && (len > 0)) { 173 while (len--) { 174 if (interrupt_server[len] == hw_cpuid) 175 return cpu_node; 176 } 177 } else { 178 reg = (unsigned int *)get_property(cpu_node, 179 "reg", &len); 180 if (reg && (len > 0) && (reg[0] == hw_cpuid)) 181 return cpu_node; 182 } 183 } 184 185 return NULL; 186 } 187 188 /* must hold reference to node during call */ 189 static int *of_get_associativity(struct device_node *dev) 190 { 191 return (unsigned int *)get_property(dev, "ibm,associativity", NULL); 192 } 193 194 /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa 195 * info is found. 196 */ 197 static int of_node_to_nid(struct device_node *device) 198 { 199 int nid = -1; 200 unsigned int *tmp; 201 202 if (min_common_depth == -1) 203 goto out; 204 205 tmp = of_get_associativity(device); 206 if (!tmp) 207 goto out; 208 209 if (tmp[0] >= min_common_depth) 210 nid = tmp[min_common_depth]; 211 212 /* POWER4 LPAR uses 0xffff as invalid node */ 213 if (nid == 0xffff || nid >= MAX_NUMNODES) 214 nid = -1; 215 out: 216 return nid; 217 } 218 219 /* 220 * In theory, the "ibm,associativity" property may contain multiple 221 * associativity lists because a resource may be multiply connected 222 * into the machine. This resource then has different associativity 223 * characteristics relative to its multiple connections. We ignore 224 * this for now. We also assume that all cpu and memory sets have 225 * their distances represented at a common level. This won't be 226 * true for heirarchical NUMA. 227 * 228 * In any case the ibm,associativity-reference-points should give 229 * the correct depth for a normal NUMA system. 230 * 231 * - Dave Hansen <haveblue@us.ibm.com> 232 */ 233 static int __init find_min_common_depth(void) 234 { 235 int depth; 236 unsigned int *ref_points; 237 struct device_node *rtas_root; 238 unsigned int len; 239 240 rtas_root = of_find_node_by_path("/rtas"); 241 242 if (!rtas_root) 243 return -1; 244 245 /* 246 * this property is 2 32-bit integers, each representing a level of 247 * depth in the associativity nodes. The first is for an SMP 248 * configuration (should be all 0's) and the second is for a normal 249 * NUMA configuration. 250 */ 251 ref_points = (unsigned int *)get_property(rtas_root, 252 "ibm,associativity-reference-points", &len); 253 254 if ((len >= 1) && ref_points) { 255 depth = ref_points[1]; 256 } else { 257 dbg("NUMA: ibm,associativity-reference-points not found.\n"); 258 depth = -1; 259 } 260 of_node_put(rtas_root); 261 262 return depth; 263 } 264 265 static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 266 { 267 struct device_node *memory = NULL; 268 269 memory = of_find_node_by_type(memory, "memory"); 270 if (!memory) 271 panic("numa.c: No memory nodes found!"); 272 273 *n_addr_cells = prom_n_addr_cells(memory); 274 *n_size_cells = prom_n_size_cells(memory); 275 of_node_put(memory); 276 } 277 278 static unsigned long __devinit read_n_cells(int n, unsigned int **buf) 279 { 280 unsigned long result = 0; 281 282 while (n--) { 283 result = (result << 32) | **buf; 284 (*buf)++; 285 } 286 return result; 287 } 288 289 /* 290 * Figure out to which domain a cpu belongs and stick it there. 291 * Return the id of the domain used. 292 */ 293 static int __cpuinit numa_setup_cpu(unsigned long lcpu) 294 { 295 int nid = 0; 296 struct device_node *cpu = find_cpu_node(lcpu); 297 298 if (!cpu) { 299 WARN_ON(1); 300 goto out; 301 } 302 303 nid = of_node_to_nid(cpu); 304 305 if (nid < 0 || !node_online(nid)) 306 nid = any_online_node(NODE_MASK_ALL); 307 out: 308 map_cpu_to_node(lcpu, nid); 309 310 of_node_put(cpu); 311 312 return nid; 313 } 314 315 static int cpu_numa_callback(struct notifier_block *nfb, 316 unsigned long action, 317 void *hcpu) 318 { 319 unsigned long lcpu = (unsigned long)hcpu; 320 int ret = NOTIFY_DONE; 321 322 switch (action) { 323 case CPU_UP_PREPARE: 324 numa_setup_cpu(lcpu); 325 ret = NOTIFY_OK; 326 break; 327 #ifdef CONFIG_HOTPLUG_CPU 328 case CPU_DEAD: 329 case CPU_UP_CANCELED: 330 unmap_cpu_from_node(lcpu); 331 break; 332 ret = NOTIFY_OK; 333 #endif 334 } 335 return ret; 336 } 337 338 /* 339 * Check and possibly modify a memory region to enforce the memory limit. 340 * 341 * Returns the size the region should have to enforce the memory limit. 342 * This will either be the original value of size, a truncated value, 343 * or zero. If the returned value of size is 0 the region should be 344 * discarded as it lies wholy above the memory limit. 345 */ 346 static unsigned long __init numa_enforce_memory_limit(unsigned long start, 347 unsigned long size) 348 { 349 /* 350 * We use lmb_end_of_DRAM() in here instead of memory_limit because 351 * we've already adjusted it for the limit and it takes care of 352 * having memory holes below the limit. 353 */ 354 355 if (! memory_limit) 356 return size; 357 358 if (start + size <= lmb_end_of_DRAM()) 359 return size; 360 361 if (start >= lmb_end_of_DRAM()) 362 return 0; 363 364 return lmb_end_of_DRAM() - start; 365 } 366 367 static int __init parse_numa_properties(void) 368 { 369 struct device_node *cpu = NULL; 370 struct device_node *memory = NULL; 371 int default_nid = 0; 372 unsigned long i; 373 374 if (numa_enabled == 0) { 375 printk(KERN_WARNING "NUMA disabled by user\n"); 376 return -1; 377 } 378 379 min_common_depth = find_min_common_depth(); 380 381 if (min_common_depth < 0) 382 return min_common_depth; 383 384 dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 385 386 /* 387 * Even though we connect cpus to numa domains later in SMP 388 * init, we need to know the node ids now. This is because 389 * each node to be onlined must have NODE_DATA etc backing it. 390 */ 391 for_each_present_cpu(i) { 392 int nid; 393 394 cpu = find_cpu_node(i); 395 BUG_ON(!cpu); 396 nid = of_node_to_nid(cpu); 397 of_node_put(cpu); 398 399 /* 400 * Don't fall back to default_nid yet -- we will plug 401 * cpus into nodes once the memory scan has discovered 402 * the topology. 403 */ 404 if (nid < 0) 405 continue; 406 node_set_online(nid); 407 } 408 409 get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 410 memory = NULL; 411 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 412 unsigned long start; 413 unsigned long size; 414 int nid; 415 int ranges; 416 unsigned int *memcell_buf; 417 unsigned int len; 418 419 memcell_buf = (unsigned int *)get_property(memory, 420 "linux,usable-memory", &len); 421 if (!memcell_buf || len <= 0) 422 memcell_buf = 423 (unsigned int *)get_property(memory, "reg", 424 &len); 425 if (!memcell_buf || len <= 0) 426 continue; 427 428 /* ranges in cell */ 429 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 430 new_range: 431 /* these are order-sensitive, and modify the buffer pointer */ 432 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 433 size = read_n_cells(n_mem_size_cells, &memcell_buf); 434 435 /* 436 * Assumption: either all memory nodes or none will 437 * have associativity properties. If none, then 438 * everything goes to default_nid. 439 */ 440 nid = of_node_to_nid(memory); 441 if (nid < 0) 442 nid = default_nid; 443 node_set_online(nid); 444 445 if (!(size = numa_enforce_memory_limit(start, size))) { 446 if (--ranges) 447 goto new_range; 448 else 449 continue; 450 } 451 452 add_region(nid, start >> PAGE_SHIFT, 453 size >> PAGE_SHIFT); 454 455 if (--ranges) 456 goto new_range; 457 } 458 459 return 0; 460 } 461 462 static void __init setup_nonnuma(void) 463 { 464 unsigned long top_of_ram = lmb_end_of_DRAM(); 465 unsigned long total_ram = lmb_phys_mem_size(); 466 unsigned int i; 467 468 printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 469 top_of_ram, total_ram); 470 printk(KERN_INFO "Memory hole size: %ldMB\n", 471 (top_of_ram - total_ram) >> 20); 472 473 for (i = 0; i < lmb.memory.cnt; ++i) 474 add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, 475 lmb_size_pages(&lmb.memory, i)); 476 node_set_online(0); 477 } 478 479 void __init dump_numa_cpu_topology(void) 480 { 481 unsigned int node; 482 unsigned int cpu, count; 483 484 if (min_common_depth == -1 || !numa_enabled) 485 return; 486 487 for_each_online_node(node) { 488 printk(KERN_INFO "Node %d CPUs:", node); 489 490 count = 0; 491 /* 492 * If we used a CPU iterator here we would miss printing 493 * the holes in the cpumap. 494 */ 495 for (cpu = 0; cpu < NR_CPUS; cpu++) { 496 if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 497 if (count == 0) 498 printk(" %u", cpu); 499 ++count; 500 } else { 501 if (count > 1) 502 printk("-%u", cpu - 1); 503 count = 0; 504 } 505 } 506 507 if (count > 1) 508 printk("-%u", NR_CPUS - 1); 509 printk("\n"); 510 } 511 } 512 513 static void __init dump_numa_memory_topology(void) 514 { 515 unsigned int node; 516 unsigned int count; 517 518 if (min_common_depth == -1 || !numa_enabled) 519 return; 520 521 for_each_online_node(node) { 522 unsigned long i; 523 524 printk(KERN_INFO "Node %d Memory:", node); 525 526 count = 0; 527 528 for (i = 0; i < lmb_end_of_DRAM(); 529 i += (1 << SECTION_SIZE_BITS)) { 530 if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { 531 if (count == 0) 532 printk(" 0x%lx", i); 533 ++count; 534 } else { 535 if (count > 0) 536 printk("-0x%lx", i); 537 count = 0; 538 } 539 } 540 541 if (count > 0) 542 printk("-0x%lx", i); 543 printk("\n"); 544 } 545 } 546 547 /* 548 * Allocate some memory, satisfying the lmb or bootmem allocator where 549 * required. nid is the preferred node and end is the physical address of 550 * the highest address in the node. 551 * 552 * Returns the physical address of the memory. 553 */ 554 static void __init *careful_allocation(int nid, unsigned long size, 555 unsigned long align, 556 unsigned long end_pfn) 557 { 558 int new_nid; 559 unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); 560 561 /* retry over all memory */ 562 if (!ret) 563 ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); 564 565 if (!ret) 566 panic("numa.c: cannot allocate %lu bytes on node %d", 567 size, nid); 568 569 /* 570 * If the memory came from a previously allocated node, we must 571 * retry with the bootmem allocator. 572 */ 573 new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); 574 if (new_nid < nid) { 575 ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), 576 size, align, 0); 577 578 if (!ret) 579 panic("numa.c: cannot allocate %lu bytes on node %d", 580 size, new_nid); 581 582 ret = __pa(ret); 583 584 dbg("alloc_bootmem %lx %lx\n", ret, size); 585 } 586 587 return (void *)ret; 588 } 589 590 void __init do_init_bootmem(void) 591 { 592 int nid; 593 unsigned int i; 594 static struct notifier_block ppc64_numa_nb = { 595 .notifier_call = cpu_numa_callback, 596 .priority = 1 /* Must run before sched domains notifier. */ 597 }; 598 599 min_low_pfn = 0; 600 max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 601 max_pfn = max_low_pfn; 602 603 if (parse_numa_properties()) 604 setup_nonnuma(); 605 else 606 dump_numa_memory_topology(); 607 608 register_cpu_notifier(&ppc64_numa_nb); 609 cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, 610 (void *)(unsigned long)boot_cpuid); 611 612 for_each_online_node(nid) { 613 unsigned long start_pfn, end_pfn, pages_present; 614 unsigned long bootmem_paddr; 615 unsigned long bootmap_pages; 616 617 get_region(nid, &start_pfn, &end_pfn, &pages_present); 618 619 /* Allocate the node structure node local if possible */ 620 NODE_DATA(nid) = careful_allocation(nid, 621 sizeof(struct pglist_data), 622 SMP_CACHE_BYTES, end_pfn); 623 NODE_DATA(nid) = __va(NODE_DATA(nid)); 624 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 625 626 dbg("node %d\n", nid); 627 dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); 628 629 NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; 630 NODE_DATA(nid)->node_start_pfn = start_pfn; 631 NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 632 633 if (NODE_DATA(nid)->node_spanned_pages == 0) 634 continue; 635 636 dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); 637 dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); 638 639 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 640 bootmem_paddr = (unsigned long)careful_allocation(nid, 641 bootmap_pages << PAGE_SHIFT, 642 PAGE_SIZE, end_pfn); 643 memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT); 644 645 dbg("bootmap_paddr = %lx\n", bootmem_paddr); 646 647 init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, 648 start_pfn, end_pfn); 649 650 /* Add free regions on this node */ 651 for (i = 0; init_node_data[i].end_pfn; i++) { 652 unsigned long start, end; 653 654 if (init_node_data[i].nid != nid) 655 continue; 656 657 start = init_node_data[i].start_pfn << PAGE_SHIFT; 658 end = init_node_data[i].end_pfn << PAGE_SHIFT; 659 660 dbg("free_bootmem %lx %lx\n", start, end - start); 661 free_bootmem_node(NODE_DATA(nid), start, end - start); 662 } 663 664 /* Mark reserved regions on this node */ 665 for (i = 0; i < lmb.reserved.cnt; i++) { 666 unsigned long physbase = lmb.reserved.region[i].base; 667 unsigned long size = lmb.reserved.region[i].size; 668 unsigned long start_paddr = start_pfn << PAGE_SHIFT; 669 unsigned long end_paddr = end_pfn << PAGE_SHIFT; 670 671 if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid && 672 early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid) 673 continue; 674 675 if (physbase < end_paddr && 676 (physbase+size) > start_paddr) { 677 /* overlaps */ 678 if (physbase < start_paddr) { 679 size -= start_paddr - physbase; 680 physbase = start_paddr; 681 } 682 683 if (size > end_paddr - physbase) 684 size = end_paddr - physbase; 685 686 dbg("reserve_bootmem %lx %lx\n", physbase, 687 size); 688 reserve_bootmem_node(NODE_DATA(nid), physbase, 689 size); 690 } 691 } 692 693 /* Add regions into sparsemem */ 694 for (i = 0; init_node_data[i].end_pfn; i++) { 695 unsigned long start, end; 696 697 if (init_node_data[i].nid != nid) 698 continue; 699 700 start = init_node_data[i].start_pfn; 701 end = init_node_data[i].end_pfn; 702 703 memory_present(nid, start, end); 704 } 705 } 706 } 707 708 void __init paging_init(void) 709 { 710 unsigned long zones_size[MAX_NR_ZONES]; 711 unsigned long zholes_size[MAX_NR_ZONES]; 712 int nid; 713 714 memset(zones_size, 0, sizeof(zones_size)); 715 memset(zholes_size, 0, sizeof(zholes_size)); 716 717 for_each_online_node(nid) { 718 unsigned long start_pfn, end_pfn, pages_present; 719 720 get_region(nid, &start_pfn, &end_pfn, &pages_present); 721 722 zones_size[ZONE_DMA] = end_pfn - start_pfn; 723 zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present; 724 725 dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid, 726 zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]); 727 728 free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn, 729 zholes_size); 730 } 731 } 732 733 static int __init early_numa(char *p) 734 { 735 if (!p) 736 return 0; 737 738 if (strstr(p, "off")) 739 numa_enabled = 0; 740 741 if (strstr(p, "debug")) 742 numa_debug = 1; 743 744 return 0; 745 } 746 early_param("numa", early_numa); 747 748 #ifdef CONFIG_MEMORY_HOTPLUG 749 /* 750 * Find the node associated with a hot added memory section. Section 751 * corresponds to a SPARSEMEM section, not an LMB. It is assumed that 752 * sections are fully contained within a single LMB. 753 */ 754 int hot_add_scn_to_nid(unsigned long scn_addr) 755 { 756 struct device_node *memory = NULL; 757 nodemask_t nodes; 758 int default_nid = any_online_node(NODE_MASK_ALL); 759 int nid; 760 761 if (!numa_enabled || (min_common_depth < 0)) 762 return default_nid; 763 764 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 765 unsigned long start, size; 766 int ranges; 767 unsigned int *memcell_buf; 768 unsigned int len; 769 770 memcell_buf = (unsigned int *)get_property(memory, "reg", &len); 771 if (!memcell_buf || len <= 0) 772 continue; 773 774 /* ranges in cell */ 775 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 776 ha_new_range: 777 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 778 size = read_n_cells(n_mem_size_cells, &memcell_buf); 779 nid = of_node_to_nid(memory); 780 781 /* Domains not present at boot default to 0 */ 782 if (nid < 0 || !node_online(nid)) 783 nid = default_nid; 784 785 if ((scn_addr >= start) && (scn_addr < (start + size))) { 786 of_node_put(memory); 787 goto got_nid; 788 } 789 790 if (--ranges) /* process all ranges in cell */ 791 goto ha_new_range; 792 } 793 BUG(); /* section address should be found above */ 794 return 0; 795 796 /* Temporary code to ensure that returned node is not empty */ 797 got_nid: 798 nodes_setall(nodes); 799 while (NODE_DATA(nid)->node_spanned_pages == 0) { 800 node_clear(nid, nodes); 801 nid = any_online_node(nodes); 802 } 803 return nid; 804 } 805 #endif /* CONFIG_MEMORY_HOTPLUG */ 806