1 /* 2 * pSeries NUMA support 3 * 4 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 #include <linux/threads.h> 12 #include <linux/bootmem.h> 13 #include <linux/init.h> 14 #include <linux/mm.h> 15 #include <linux/mmzone.h> 16 #include <linux/module.h> 17 #include <linux/nodemask.h> 18 #include <linux/cpu.h> 19 #include <linux/notifier.h> 20 #include <asm/sparsemem.h> 21 #include <asm/lmb.h> 22 #include <asm/system.h> 23 #include <asm/smp.h> 24 25 static int numa_enabled = 1; 26 27 static int numa_debug; 28 #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 29 30 int numa_cpu_lookup_table[NR_CPUS]; 31 cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; 32 struct pglist_data *node_data[MAX_NUMNODES]; 33 34 EXPORT_SYMBOL(numa_cpu_lookup_table); 35 EXPORT_SYMBOL(numa_cpumask_lookup_table); 36 EXPORT_SYMBOL(node_data); 37 38 static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; 39 static int min_common_depth; 40 static int n_mem_addr_cells, n_mem_size_cells; 41 42 /* 43 * We need somewhere to store start/end/node for each region until we have 44 * allocated the real node_data structures. 45 */ 46 #define MAX_REGIONS (MAX_LMB_REGIONS*2) 47 static struct { 48 unsigned long start_pfn; 49 unsigned long end_pfn; 50 int nid; 51 } init_node_data[MAX_REGIONS] __initdata; 52 53 int __init early_pfn_to_nid(unsigned long pfn) 54 { 55 unsigned int i; 56 57 for (i = 0; init_node_data[i].end_pfn; i++) { 58 unsigned long start_pfn = init_node_data[i].start_pfn; 59 unsigned long end_pfn = init_node_data[i].end_pfn; 60 61 if ((start_pfn <= pfn) && (pfn < end_pfn)) 62 return init_node_data[i].nid; 63 } 64 65 return -1; 66 } 67 68 void __init add_region(unsigned int nid, unsigned long start_pfn, 69 unsigned long pages) 70 { 71 unsigned int i; 72 73 dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n", 74 nid, start_pfn, pages); 75 76 for (i = 0; init_node_data[i].end_pfn; i++) { 77 if (init_node_data[i].nid != nid) 78 continue; 79 if (init_node_data[i].end_pfn == start_pfn) { 80 init_node_data[i].end_pfn += pages; 81 return; 82 } 83 if (init_node_data[i].start_pfn == (start_pfn + pages)) { 84 init_node_data[i].start_pfn -= pages; 85 return; 86 } 87 } 88 89 /* 90 * Leave last entry NULL so we dont iterate off the end (we use 91 * entry.end_pfn to terminate the walk). 92 */ 93 if (i >= (MAX_REGIONS - 1)) { 94 printk(KERN_ERR "WARNING: too many memory regions in " 95 "numa code, truncating\n"); 96 return; 97 } 98 99 init_node_data[i].start_pfn = start_pfn; 100 init_node_data[i].end_pfn = start_pfn + pages; 101 init_node_data[i].nid = nid; 102 } 103 104 /* We assume init_node_data has no overlapping regions */ 105 void __init get_region(unsigned int nid, unsigned long *start_pfn, 106 unsigned long *end_pfn, unsigned long *pages_present) 107 { 108 unsigned int i; 109 110 *start_pfn = -1UL; 111 *end_pfn = *pages_present = 0; 112 113 for (i = 0; init_node_data[i].end_pfn; i++) { 114 if (init_node_data[i].nid != nid) 115 continue; 116 117 *pages_present += init_node_data[i].end_pfn - 118 init_node_data[i].start_pfn; 119 120 if (init_node_data[i].start_pfn < *start_pfn) 121 *start_pfn = init_node_data[i].start_pfn; 122 123 if (init_node_data[i].end_pfn > *end_pfn) 124 *end_pfn = init_node_data[i].end_pfn; 125 } 126 127 /* We didnt find a matching region, return start/end as 0 */ 128 if (*start_pfn == -1UL) 129 *start_pfn = 0; 130 } 131 132 static void __cpuinit map_cpu_to_node(int cpu, int node) 133 { 134 numa_cpu_lookup_table[cpu] = node; 135 136 dbg("adding cpu %d to node %d\n", cpu, node); 137 138 if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) 139 cpu_set(cpu, numa_cpumask_lookup_table[node]); 140 } 141 142 #ifdef CONFIG_HOTPLUG_CPU 143 static void unmap_cpu_from_node(unsigned long cpu) 144 { 145 int node = numa_cpu_lookup_table[cpu]; 146 147 dbg("removing cpu %lu from node %d\n", cpu, node); 148 149 if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 150 cpu_clear(cpu, numa_cpumask_lookup_table[node]); 151 } else { 152 printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 153 cpu, node); 154 } 155 } 156 #endif /* CONFIG_HOTPLUG_CPU */ 157 158 static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) 159 { 160 unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); 161 struct device_node *cpu_node = NULL; 162 const unsigned int *interrupt_server, *reg; 163 int len; 164 165 while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { 166 /* Try interrupt server first */ 167 interrupt_server = get_property(cpu_node, 168 "ibm,ppc-interrupt-server#s", &len); 169 170 len = len / sizeof(u32); 171 172 if (interrupt_server && (len > 0)) { 173 while (len--) { 174 if (interrupt_server[len] == hw_cpuid) 175 return cpu_node; 176 } 177 } else { 178 reg = get_property(cpu_node, "reg", &len); 179 if (reg && (len > 0) && (reg[0] == hw_cpuid)) 180 return cpu_node; 181 } 182 } 183 184 return NULL; 185 } 186 187 /* must hold reference to node during call */ 188 static const int *of_get_associativity(struct device_node *dev) 189 { 190 return get_property(dev, "ibm,associativity", NULL); 191 } 192 193 /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa 194 * info is found. 195 */ 196 static int of_node_to_nid_single(struct device_node *device) 197 { 198 int nid = -1; 199 const unsigned int *tmp; 200 201 if (min_common_depth == -1) 202 goto out; 203 204 tmp = of_get_associativity(device); 205 if (!tmp) 206 goto out; 207 208 if (tmp[0] >= min_common_depth) 209 nid = tmp[min_common_depth]; 210 211 /* POWER4 LPAR uses 0xffff as invalid node */ 212 if (nid == 0xffff || nid >= MAX_NUMNODES) 213 nid = -1; 214 out: 215 return nid; 216 } 217 218 /* Walk the device tree upwards, looking for an associativity id */ 219 int of_node_to_nid(struct device_node *device) 220 { 221 struct device_node *tmp; 222 int nid = -1; 223 224 of_node_get(device); 225 while (device) { 226 nid = of_node_to_nid_single(device); 227 if (nid != -1) 228 break; 229 230 tmp = device; 231 device = of_get_parent(tmp); 232 of_node_put(tmp); 233 } 234 of_node_put(device); 235 236 return nid; 237 } 238 EXPORT_SYMBOL_GPL(of_node_to_nid); 239 240 /* 241 * In theory, the "ibm,associativity" property may contain multiple 242 * associativity lists because a resource may be multiply connected 243 * into the machine. This resource then has different associativity 244 * characteristics relative to its multiple connections. We ignore 245 * this for now. We also assume that all cpu and memory sets have 246 * their distances represented at a common level. This won't be 247 * true for heirarchical NUMA. 248 * 249 * In any case the ibm,associativity-reference-points should give 250 * the correct depth for a normal NUMA system. 251 * 252 * - Dave Hansen <haveblue@us.ibm.com> 253 */ 254 static int __init find_min_common_depth(void) 255 { 256 int depth; 257 const unsigned int *ref_points; 258 struct device_node *rtas_root; 259 unsigned int len; 260 261 rtas_root = of_find_node_by_path("/rtas"); 262 263 if (!rtas_root) 264 return -1; 265 266 /* 267 * this property is 2 32-bit integers, each representing a level of 268 * depth in the associativity nodes. The first is for an SMP 269 * configuration (should be all 0's) and the second is for a normal 270 * NUMA configuration. 271 */ 272 ref_points = get_property(rtas_root, 273 "ibm,associativity-reference-points", &len); 274 275 if ((len >= 1) && ref_points) { 276 depth = ref_points[1]; 277 } else { 278 dbg("NUMA: ibm,associativity-reference-points not found.\n"); 279 depth = -1; 280 } 281 of_node_put(rtas_root); 282 283 return depth; 284 } 285 286 static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 287 { 288 struct device_node *memory = NULL; 289 290 memory = of_find_node_by_type(memory, "memory"); 291 if (!memory) 292 panic("numa.c: No memory nodes found!"); 293 294 *n_addr_cells = prom_n_addr_cells(memory); 295 *n_size_cells = prom_n_size_cells(memory); 296 of_node_put(memory); 297 } 298 299 static unsigned long __devinit read_n_cells(int n, const unsigned int **buf) 300 { 301 unsigned long result = 0; 302 303 while (n--) { 304 result = (result << 32) | **buf; 305 (*buf)++; 306 } 307 return result; 308 } 309 310 /* 311 * Figure out to which domain a cpu belongs and stick it there. 312 * Return the id of the domain used. 313 */ 314 static int __cpuinit numa_setup_cpu(unsigned long lcpu) 315 { 316 int nid = 0; 317 struct device_node *cpu = find_cpu_node(lcpu); 318 319 if (!cpu) { 320 WARN_ON(1); 321 goto out; 322 } 323 324 nid = of_node_to_nid_single(cpu); 325 326 if (nid < 0 || !node_online(nid)) 327 nid = any_online_node(NODE_MASK_ALL); 328 out: 329 map_cpu_to_node(lcpu, nid); 330 331 of_node_put(cpu); 332 333 return nid; 334 } 335 336 static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, 337 unsigned long action, 338 void *hcpu) 339 { 340 unsigned long lcpu = (unsigned long)hcpu; 341 int ret = NOTIFY_DONE; 342 343 switch (action) { 344 case CPU_UP_PREPARE: 345 numa_setup_cpu(lcpu); 346 ret = NOTIFY_OK; 347 break; 348 #ifdef CONFIG_HOTPLUG_CPU 349 case CPU_DEAD: 350 case CPU_UP_CANCELED: 351 unmap_cpu_from_node(lcpu); 352 break; 353 ret = NOTIFY_OK; 354 #endif 355 } 356 return ret; 357 } 358 359 /* 360 * Check and possibly modify a memory region to enforce the memory limit. 361 * 362 * Returns the size the region should have to enforce the memory limit. 363 * This will either be the original value of size, a truncated value, 364 * or zero. If the returned value of size is 0 the region should be 365 * discarded as it lies wholy above the memory limit. 366 */ 367 static unsigned long __init numa_enforce_memory_limit(unsigned long start, 368 unsigned long size) 369 { 370 /* 371 * We use lmb_end_of_DRAM() in here instead of memory_limit because 372 * we've already adjusted it for the limit and it takes care of 373 * having memory holes below the limit. 374 */ 375 376 if (! memory_limit) 377 return size; 378 379 if (start + size <= lmb_end_of_DRAM()) 380 return size; 381 382 if (start >= lmb_end_of_DRAM()) 383 return 0; 384 385 return lmb_end_of_DRAM() - start; 386 } 387 388 static int __init parse_numa_properties(void) 389 { 390 struct device_node *cpu = NULL; 391 struct device_node *memory = NULL; 392 int default_nid = 0; 393 unsigned long i; 394 395 if (numa_enabled == 0) { 396 printk(KERN_WARNING "NUMA disabled by user\n"); 397 return -1; 398 } 399 400 min_common_depth = find_min_common_depth(); 401 402 if (min_common_depth < 0) 403 return min_common_depth; 404 405 dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 406 407 /* 408 * Even though we connect cpus to numa domains later in SMP 409 * init, we need to know the node ids now. This is because 410 * each node to be onlined must have NODE_DATA etc backing it. 411 */ 412 for_each_present_cpu(i) { 413 int nid; 414 415 cpu = find_cpu_node(i); 416 BUG_ON(!cpu); 417 nid = of_node_to_nid_single(cpu); 418 of_node_put(cpu); 419 420 /* 421 * Don't fall back to default_nid yet -- we will plug 422 * cpus into nodes once the memory scan has discovered 423 * the topology. 424 */ 425 if (nid < 0) 426 continue; 427 node_set_online(nid); 428 } 429 430 get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 431 memory = NULL; 432 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 433 unsigned long start; 434 unsigned long size; 435 int nid; 436 int ranges; 437 const unsigned int *memcell_buf; 438 unsigned int len; 439 440 memcell_buf = get_property(memory, 441 "linux,usable-memory", &len); 442 if (!memcell_buf || len <= 0) 443 memcell_buf = get_property(memory, "reg", &len); 444 if (!memcell_buf || len <= 0) 445 continue; 446 447 /* ranges in cell */ 448 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 449 new_range: 450 /* these are order-sensitive, and modify the buffer pointer */ 451 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 452 size = read_n_cells(n_mem_size_cells, &memcell_buf); 453 454 /* 455 * Assumption: either all memory nodes or none will 456 * have associativity properties. If none, then 457 * everything goes to default_nid. 458 */ 459 nid = of_node_to_nid_single(memory); 460 if (nid < 0) 461 nid = default_nid; 462 node_set_online(nid); 463 464 if (!(size = numa_enforce_memory_limit(start, size))) { 465 if (--ranges) 466 goto new_range; 467 else 468 continue; 469 } 470 471 add_region(nid, start >> PAGE_SHIFT, 472 size >> PAGE_SHIFT); 473 474 if (--ranges) 475 goto new_range; 476 } 477 478 return 0; 479 } 480 481 static void __init setup_nonnuma(void) 482 { 483 unsigned long top_of_ram = lmb_end_of_DRAM(); 484 unsigned long total_ram = lmb_phys_mem_size(); 485 unsigned int i; 486 487 printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 488 top_of_ram, total_ram); 489 printk(KERN_DEBUG "Memory hole size: %ldMB\n", 490 (top_of_ram - total_ram) >> 20); 491 492 for (i = 0; i < lmb.memory.cnt; ++i) 493 add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, 494 lmb_size_pages(&lmb.memory, i)); 495 node_set_online(0); 496 } 497 498 void __init dump_numa_cpu_topology(void) 499 { 500 unsigned int node; 501 unsigned int cpu, count; 502 503 if (min_common_depth == -1 || !numa_enabled) 504 return; 505 506 for_each_online_node(node) { 507 printk(KERN_DEBUG "Node %d CPUs:", node); 508 509 count = 0; 510 /* 511 * If we used a CPU iterator here we would miss printing 512 * the holes in the cpumap. 513 */ 514 for (cpu = 0; cpu < NR_CPUS; cpu++) { 515 if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 516 if (count == 0) 517 printk(" %u", cpu); 518 ++count; 519 } else { 520 if (count > 1) 521 printk("-%u", cpu - 1); 522 count = 0; 523 } 524 } 525 526 if (count > 1) 527 printk("-%u", NR_CPUS - 1); 528 printk("\n"); 529 } 530 } 531 532 static void __init dump_numa_memory_topology(void) 533 { 534 unsigned int node; 535 unsigned int count; 536 537 if (min_common_depth == -1 || !numa_enabled) 538 return; 539 540 for_each_online_node(node) { 541 unsigned long i; 542 543 printk(KERN_DEBUG "Node %d Memory:", node); 544 545 count = 0; 546 547 for (i = 0; i < lmb_end_of_DRAM(); 548 i += (1 << SECTION_SIZE_BITS)) { 549 if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { 550 if (count == 0) 551 printk(" 0x%lx", i); 552 ++count; 553 } else { 554 if (count > 0) 555 printk("-0x%lx", i); 556 count = 0; 557 } 558 } 559 560 if (count > 0) 561 printk("-0x%lx", i); 562 printk("\n"); 563 } 564 } 565 566 /* 567 * Allocate some memory, satisfying the lmb or bootmem allocator where 568 * required. nid is the preferred node and end is the physical address of 569 * the highest address in the node. 570 * 571 * Returns the physical address of the memory. 572 */ 573 static void __init *careful_allocation(int nid, unsigned long size, 574 unsigned long align, 575 unsigned long end_pfn) 576 { 577 int new_nid; 578 unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); 579 580 /* retry over all memory */ 581 if (!ret) 582 ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); 583 584 if (!ret) 585 panic("numa.c: cannot allocate %lu bytes on node %d", 586 size, nid); 587 588 /* 589 * If the memory came from a previously allocated node, we must 590 * retry with the bootmem allocator. 591 */ 592 new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); 593 if (new_nid < nid) { 594 ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), 595 size, align, 0); 596 597 if (!ret) 598 panic("numa.c: cannot allocate %lu bytes on node %d", 599 size, new_nid); 600 601 ret = __pa(ret); 602 603 dbg("alloc_bootmem %lx %lx\n", ret, size); 604 } 605 606 return (void *)ret; 607 } 608 609 static struct notifier_block __cpuinitdata ppc64_numa_nb = { 610 .notifier_call = cpu_numa_callback, 611 .priority = 1 /* Must run before sched domains notifier. */ 612 }; 613 614 void __init do_init_bootmem(void) 615 { 616 int nid; 617 unsigned int i; 618 619 min_low_pfn = 0; 620 max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 621 max_pfn = max_low_pfn; 622 623 if (parse_numa_properties()) 624 setup_nonnuma(); 625 else 626 dump_numa_memory_topology(); 627 628 register_cpu_notifier(&ppc64_numa_nb); 629 cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, 630 (void *)(unsigned long)boot_cpuid); 631 632 for_each_online_node(nid) { 633 unsigned long start_pfn, end_pfn, pages_present; 634 unsigned long bootmem_paddr; 635 unsigned long bootmap_pages; 636 637 get_region(nid, &start_pfn, &end_pfn, &pages_present); 638 639 /* Allocate the node structure node local if possible */ 640 NODE_DATA(nid) = careful_allocation(nid, 641 sizeof(struct pglist_data), 642 SMP_CACHE_BYTES, end_pfn); 643 NODE_DATA(nid) = __va(NODE_DATA(nid)); 644 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 645 646 dbg("node %d\n", nid); 647 dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); 648 649 NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; 650 NODE_DATA(nid)->node_start_pfn = start_pfn; 651 NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 652 653 if (NODE_DATA(nid)->node_spanned_pages == 0) 654 continue; 655 656 dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); 657 dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); 658 659 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 660 bootmem_paddr = (unsigned long)careful_allocation(nid, 661 bootmap_pages << PAGE_SHIFT, 662 PAGE_SIZE, end_pfn); 663 memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT); 664 665 dbg("bootmap_paddr = %lx\n", bootmem_paddr); 666 667 init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, 668 start_pfn, end_pfn); 669 670 /* Add free regions on this node */ 671 for (i = 0; init_node_data[i].end_pfn; i++) { 672 unsigned long start, end; 673 674 if (init_node_data[i].nid != nid) 675 continue; 676 677 start = init_node_data[i].start_pfn << PAGE_SHIFT; 678 end = init_node_data[i].end_pfn << PAGE_SHIFT; 679 680 dbg("free_bootmem %lx %lx\n", start, end - start); 681 free_bootmem_node(NODE_DATA(nid), start, end - start); 682 } 683 684 /* Mark reserved regions on this node */ 685 for (i = 0; i < lmb.reserved.cnt; i++) { 686 unsigned long physbase = lmb.reserved.region[i].base; 687 unsigned long size = lmb.reserved.region[i].size; 688 unsigned long start_paddr = start_pfn << PAGE_SHIFT; 689 unsigned long end_paddr = end_pfn << PAGE_SHIFT; 690 691 if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid && 692 early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid) 693 continue; 694 695 if (physbase < end_paddr && 696 (physbase+size) > start_paddr) { 697 /* overlaps */ 698 if (physbase < start_paddr) { 699 size -= start_paddr - physbase; 700 physbase = start_paddr; 701 } 702 703 if (size > end_paddr - physbase) 704 size = end_paddr - physbase; 705 706 dbg("reserve_bootmem %lx %lx\n", physbase, 707 size); 708 reserve_bootmem_node(NODE_DATA(nid), physbase, 709 size); 710 } 711 } 712 713 /* Add regions into sparsemem */ 714 for (i = 0; init_node_data[i].end_pfn; i++) { 715 unsigned long start, end; 716 717 if (init_node_data[i].nid != nid) 718 continue; 719 720 start = init_node_data[i].start_pfn; 721 end = init_node_data[i].end_pfn; 722 723 memory_present(nid, start, end); 724 } 725 } 726 } 727 728 void __init paging_init(void) 729 { 730 unsigned long zones_size[MAX_NR_ZONES]; 731 unsigned long zholes_size[MAX_NR_ZONES]; 732 int nid; 733 734 memset(zones_size, 0, sizeof(zones_size)); 735 memset(zholes_size, 0, sizeof(zholes_size)); 736 737 for_each_online_node(nid) { 738 unsigned long start_pfn, end_pfn, pages_present; 739 740 get_region(nid, &start_pfn, &end_pfn, &pages_present); 741 742 zones_size[ZONE_DMA] = end_pfn - start_pfn; 743 zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present; 744 745 dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid, 746 zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]); 747 748 free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn, 749 zholes_size); 750 } 751 } 752 753 static int __init early_numa(char *p) 754 { 755 if (!p) 756 return 0; 757 758 if (strstr(p, "off")) 759 numa_enabled = 0; 760 761 if (strstr(p, "debug")) 762 numa_debug = 1; 763 764 return 0; 765 } 766 early_param("numa", early_numa); 767 768 #ifdef CONFIG_MEMORY_HOTPLUG 769 /* 770 * Find the node associated with a hot added memory section. Section 771 * corresponds to a SPARSEMEM section, not an LMB. It is assumed that 772 * sections are fully contained within a single LMB. 773 */ 774 int hot_add_scn_to_nid(unsigned long scn_addr) 775 { 776 struct device_node *memory = NULL; 777 nodemask_t nodes; 778 int default_nid = any_online_node(NODE_MASK_ALL); 779 int nid; 780 781 if (!numa_enabled || (min_common_depth < 0)) 782 return default_nid; 783 784 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 785 unsigned long start, size; 786 int ranges; 787 const unsigned int *memcell_buf; 788 unsigned int len; 789 790 memcell_buf = get_property(memory, "reg", &len); 791 if (!memcell_buf || len <= 0) 792 continue; 793 794 /* ranges in cell */ 795 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 796 ha_new_range: 797 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 798 size = read_n_cells(n_mem_size_cells, &memcell_buf); 799 nid = of_node_to_nid_single(memory); 800 801 /* Domains not present at boot default to 0 */ 802 if (nid < 0 || !node_online(nid)) 803 nid = default_nid; 804 805 if ((scn_addr >= start) && (scn_addr < (start + size))) { 806 of_node_put(memory); 807 goto got_nid; 808 } 809 810 if (--ranges) /* process all ranges in cell */ 811 goto ha_new_range; 812 } 813 BUG(); /* section address should be found above */ 814 return 0; 815 816 /* Temporary code to ensure that returned node is not empty */ 817 got_nid: 818 nodes_setall(nodes); 819 while (NODE_DATA(nid)->node_spanned_pages == 0) { 820 node_clear(nid, nodes); 821 nid = any_online_node(nodes); 822 } 823 return nid; 824 } 825 #endif /* CONFIG_MEMORY_HOTPLUG */ 826