1 /* 2 * pSeries NUMA support 3 * 4 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 #include <linux/threads.h> 12 #include <linux/bootmem.h> 13 #include <linux/init.h> 14 #include <linux/mm.h> 15 #include <linux/mmzone.h> 16 #include <linux/module.h> 17 #include <linux/nodemask.h> 18 #include <linux/cpu.h> 19 #include <linux/notifier.h> 20 #include <asm/sparsemem.h> 21 #include <asm/lmb.h> 22 #include <asm/system.h> 23 #include <asm/smp.h> 24 25 static int numa_enabled = 1; 26 27 static int numa_debug; 28 #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 29 30 int numa_cpu_lookup_table[NR_CPUS]; 31 cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; 32 struct pglist_data *node_data[MAX_NUMNODES]; 33 34 EXPORT_SYMBOL(numa_cpu_lookup_table); 35 EXPORT_SYMBOL(numa_cpumask_lookup_table); 36 EXPORT_SYMBOL(node_data); 37 38 static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; 39 static int min_common_depth; 40 static int n_mem_addr_cells, n_mem_size_cells; 41 42 /* 43 * We need somewhere to store start/end/node for each region until we have 44 * allocated the real node_data structures. 45 */ 46 #define MAX_REGIONS (MAX_LMB_REGIONS*2) 47 static struct { 48 unsigned long start_pfn; 49 unsigned long end_pfn; 50 int nid; 51 } init_node_data[MAX_REGIONS] __initdata; 52 53 int __init early_pfn_to_nid(unsigned long pfn) 54 { 55 unsigned int i; 56 57 for (i = 0; init_node_data[i].end_pfn; i++) { 58 unsigned long start_pfn = init_node_data[i].start_pfn; 59 unsigned long end_pfn = init_node_data[i].end_pfn; 60 61 if ((start_pfn <= pfn) && (pfn < end_pfn)) 62 return init_node_data[i].nid; 63 } 64 65 return -1; 66 } 67 68 void __init add_region(unsigned int nid, unsigned long start_pfn, 69 unsigned long pages) 70 { 71 unsigned int i; 72 73 dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n", 74 nid, start_pfn, pages); 75 76 for (i = 0; init_node_data[i].end_pfn; i++) { 77 if (init_node_data[i].nid != nid) 78 continue; 79 if (init_node_data[i].end_pfn == start_pfn) { 80 init_node_data[i].end_pfn += pages; 81 return; 82 } 83 if (init_node_data[i].start_pfn == (start_pfn + pages)) { 84 init_node_data[i].start_pfn -= pages; 85 return; 86 } 87 } 88 89 /* 90 * Leave last entry NULL so we dont iterate off the end (we use 91 * entry.end_pfn to terminate the walk). 92 */ 93 if (i >= (MAX_REGIONS - 1)) { 94 printk(KERN_ERR "WARNING: too many memory regions in " 95 "numa code, truncating\n"); 96 return; 97 } 98 99 init_node_data[i].start_pfn = start_pfn; 100 init_node_data[i].end_pfn = start_pfn + pages; 101 init_node_data[i].nid = nid; 102 } 103 104 /* We assume init_node_data has no overlapping regions */ 105 void __init get_region(unsigned int nid, unsigned long *start_pfn, 106 unsigned long *end_pfn, unsigned long *pages_present) 107 { 108 unsigned int i; 109 110 *start_pfn = -1UL; 111 *end_pfn = *pages_present = 0; 112 113 for (i = 0; init_node_data[i].end_pfn; i++) { 114 if (init_node_data[i].nid != nid) 115 continue; 116 117 *pages_present += init_node_data[i].end_pfn - 118 init_node_data[i].start_pfn; 119 120 if (init_node_data[i].start_pfn < *start_pfn) 121 *start_pfn = init_node_data[i].start_pfn; 122 123 if (init_node_data[i].end_pfn > *end_pfn) 124 *end_pfn = init_node_data[i].end_pfn; 125 } 126 127 /* We didnt find a matching region, return start/end as 0 */ 128 if (*start_pfn == -1UL) 129 *start_pfn = 0; 130 } 131 132 static void __cpuinit map_cpu_to_node(int cpu, int node) 133 { 134 numa_cpu_lookup_table[cpu] = node; 135 136 dbg("adding cpu %d to node %d\n", cpu, node); 137 138 if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) 139 cpu_set(cpu, numa_cpumask_lookup_table[node]); 140 } 141 142 #ifdef CONFIG_HOTPLUG_CPU 143 static void unmap_cpu_from_node(unsigned long cpu) 144 { 145 int node = numa_cpu_lookup_table[cpu]; 146 147 dbg("removing cpu %lu from node %d\n", cpu, node); 148 149 if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 150 cpu_clear(cpu, numa_cpumask_lookup_table[node]); 151 } else { 152 printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 153 cpu, node); 154 } 155 } 156 #endif /* CONFIG_HOTPLUG_CPU */ 157 158 static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) 159 { 160 unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); 161 struct device_node *cpu_node = NULL; 162 unsigned int *interrupt_server, *reg; 163 int len; 164 165 while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { 166 /* Try interrupt server first */ 167 interrupt_server = (unsigned int *)get_property(cpu_node, 168 "ibm,ppc-interrupt-server#s", &len); 169 170 len = len / sizeof(u32); 171 172 if (interrupt_server && (len > 0)) { 173 while (len--) { 174 if (interrupt_server[len] == hw_cpuid) 175 return cpu_node; 176 } 177 } else { 178 reg = (unsigned int *)get_property(cpu_node, 179 "reg", &len); 180 if (reg && (len > 0) && (reg[0] == hw_cpuid)) 181 return cpu_node; 182 } 183 } 184 185 return NULL; 186 } 187 188 /* must hold reference to node during call */ 189 static int *of_get_associativity(struct device_node *dev) 190 { 191 return (unsigned int *)get_property(dev, "ibm,associativity", NULL); 192 } 193 194 /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa 195 * info is found. 196 */ 197 static int of_node_to_nid_single(struct device_node *device) 198 { 199 int nid = -1; 200 unsigned int *tmp; 201 202 if (min_common_depth == -1) 203 goto out; 204 205 tmp = of_get_associativity(device); 206 if (!tmp) 207 goto out; 208 209 if (tmp[0] >= min_common_depth) 210 nid = tmp[min_common_depth]; 211 212 /* POWER4 LPAR uses 0xffff as invalid node */ 213 if (nid == 0xffff || nid >= MAX_NUMNODES) 214 nid = -1; 215 out: 216 return nid; 217 } 218 219 /* Walk the device tree upwards, looking for an associativity id */ 220 int of_node_to_nid(struct device_node *device) 221 { 222 struct device_node *tmp; 223 int nid = -1; 224 225 of_node_get(device); 226 while (device) { 227 nid = of_node_to_nid_single(device); 228 if (nid != -1) 229 break; 230 231 tmp = device; 232 device = of_get_parent(tmp); 233 of_node_put(tmp); 234 } 235 of_node_put(device); 236 237 return nid; 238 } 239 EXPORT_SYMBOL_GPL(of_node_to_nid); 240 241 /* 242 * In theory, the "ibm,associativity" property may contain multiple 243 * associativity lists because a resource may be multiply connected 244 * into the machine. This resource then has different associativity 245 * characteristics relative to its multiple connections. We ignore 246 * this for now. We also assume that all cpu and memory sets have 247 * their distances represented at a common level. This won't be 248 * true for heirarchical NUMA. 249 * 250 * In any case the ibm,associativity-reference-points should give 251 * the correct depth for a normal NUMA system. 252 * 253 * - Dave Hansen <haveblue@us.ibm.com> 254 */ 255 static int __init find_min_common_depth(void) 256 { 257 int depth; 258 unsigned int *ref_points; 259 struct device_node *rtas_root; 260 unsigned int len; 261 262 rtas_root = of_find_node_by_path("/rtas"); 263 264 if (!rtas_root) 265 return -1; 266 267 /* 268 * this property is 2 32-bit integers, each representing a level of 269 * depth in the associativity nodes. The first is for an SMP 270 * configuration (should be all 0's) and the second is for a normal 271 * NUMA configuration. 272 */ 273 ref_points = (unsigned int *)get_property(rtas_root, 274 "ibm,associativity-reference-points", &len); 275 276 if ((len >= 1) && ref_points) { 277 depth = ref_points[1]; 278 } else { 279 dbg("NUMA: ibm,associativity-reference-points not found.\n"); 280 depth = -1; 281 } 282 of_node_put(rtas_root); 283 284 return depth; 285 } 286 287 static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 288 { 289 struct device_node *memory = NULL; 290 291 memory = of_find_node_by_type(memory, "memory"); 292 if (!memory) 293 panic("numa.c: No memory nodes found!"); 294 295 *n_addr_cells = prom_n_addr_cells(memory); 296 *n_size_cells = prom_n_size_cells(memory); 297 of_node_put(memory); 298 } 299 300 static unsigned long __devinit read_n_cells(int n, unsigned int **buf) 301 { 302 unsigned long result = 0; 303 304 while (n--) { 305 result = (result << 32) | **buf; 306 (*buf)++; 307 } 308 return result; 309 } 310 311 /* 312 * Figure out to which domain a cpu belongs and stick it there. 313 * Return the id of the domain used. 314 */ 315 static int __cpuinit numa_setup_cpu(unsigned long lcpu) 316 { 317 int nid = 0; 318 struct device_node *cpu = find_cpu_node(lcpu); 319 320 if (!cpu) { 321 WARN_ON(1); 322 goto out; 323 } 324 325 nid = of_node_to_nid_single(cpu); 326 327 if (nid < 0 || !node_online(nid)) 328 nid = any_online_node(NODE_MASK_ALL); 329 out: 330 map_cpu_to_node(lcpu, nid); 331 332 of_node_put(cpu); 333 334 return nid; 335 } 336 337 static int cpu_numa_callback(struct notifier_block *nfb, 338 unsigned long action, 339 void *hcpu) 340 { 341 unsigned long lcpu = (unsigned long)hcpu; 342 int ret = NOTIFY_DONE; 343 344 switch (action) { 345 case CPU_UP_PREPARE: 346 numa_setup_cpu(lcpu); 347 ret = NOTIFY_OK; 348 break; 349 #ifdef CONFIG_HOTPLUG_CPU 350 case CPU_DEAD: 351 case CPU_UP_CANCELED: 352 unmap_cpu_from_node(lcpu); 353 break; 354 ret = NOTIFY_OK; 355 #endif 356 } 357 return ret; 358 } 359 360 /* 361 * Check and possibly modify a memory region to enforce the memory limit. 362 * 363 * Returns the size the region should have to enforce the memory limit. 364 * This will either be the original value of size, a truncated value, 365 * or zero. If the returned value of size is 0 the region should be 366 * discarded as it lies wholy above the memory limit. 367 */ 368 static unsigned long __init numa_enforce_memory_limit(unsigned long start, 369 unsigned long size) 370 { 371 /* 372 * We use lmb_end_of_DRAM() in here instead of memory_limit because 373 * we've already adjusted it for the limit and it takes care of 374 * having memory holes below the limit. 375 */ 376 377 if (! memory_limit) 378 return size; 379 380 if (start + size <= lmb_end_of_DRAM()) 381 return size; 382 383 if (start >= lmb_end_of_DRAM()) 384 return 0; 385 386 return lmb_end_of_DRAM() - start; 387 } 388 389 static int __init parse_numa_properties(void) 390 { 391 struct device_node *cpu = NULL; 392 struct device_node *memory = NULL; 393 int default_nid = 0; 394 unsigned long i; 395 396 if (numa_enabled == 0) { 397 printk(KERN_WARNING "NUMA disabled by user\n"); 398 return -1; 399 } 400 401 min_common_depth = find_min_common_depth(); 402 403 if (min_common_depth < 0) 404 return min_common_depth; 405 406 dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); 407 408 /* 409 * Even though we connect cpus to numa domains later in SMP 410 * init, we need to know the node ids now. This is because 411 * each node to be onlined must have NODE_DATA etc backing it. 412 */ 413 for_each_present_cpu(i) { 414 int nid; 415 416 cpu = find_cpu_node(i); 417 BUG_ON(!cpu); 418 nid = of_node_to_nid_single(cpu); 419 of_node_put(cpu); 420 421 /* 422 * Don't fall back to default_nid yet -- we will plug 423 * cpus into nodes once the memory scan has discovered 424 * the topology. 425 */ 426 if (nid < 0) 427 continue; 428 node_set_online(nid); 429 } 430 431 get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 432 memory = NULL; 433 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 434 unsigned long start; 435 unsigned long size; 436 int nid; 437 int ranges; 438 unsigned int *memcell_buf; 439 unsigned int len; 440 441 memcell_buf = (unsigned int *)get_property(memory, 442 "linux,usable-memory", &len); 443 if (!memcell_buf || len <= 0) 444 memcell_buf = 445 (unsigned int *)get_property(memory, "reg", 446 &len); 447 if (!memcell_buf || len <= 0) 448 continue; 449 450 /* ranges in cell */ 451 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 452 new_range: 453 /* these are order-sensitive, and modify the buffer pointer */ 454 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 455 size = read_n_cells(n_mem_size_cells, &memcell_buf); 456 457 /* 458 * Assumption: either all memory nodes or none will 459 * have associativity properties. If none, then 460 * everything goes to default_nid. 461 */ 462 nid = of_node_to_nid_single(memory); 463 if (nid < 0) 464 nid = default_nid; 465 node_set_online(nid); 466 467 if (!(size = numa_enforce_memory_limit(start, size))) { 468 if (--ranges) 469 goto new_range; 470 else 471 continue; 472 } 473 474 add_region(nid, start >> PAGE_SHIFT, 475 size >> PAGE_SHIFT); 476 477 if (--ranges) 478 goto new_range; 479 } 480 481 return 0; 482 } 483 484 static void __init setup_nonnuma(void) 485 { 486 unsigned long top_of_ram = lmb_end_of_DRAM(); 487 unsigned long total_ram = lmb_phys_mem_size(); 488 unsigned int i; 489 490 printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 491 top_of_ram, total_ram); 492 printk(KERN_INFO "Memory hole size: %ldMB\n", 493 (top_of_ram - total_ram) >> 20); 494 495 for (i = 0; i < lmb.memory.cnt; ++i) 496 add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT, 497 lmb_size_pages(&lmb.memory, i)); 498 node_set_online(0); 499 } 500 501 void __init dump_numa_cpu_topology(void) 502 { 503 unsigned int node; 504 unsigned int cpu, count; 505 506 if (min_common_depth == -1 || !numa_enabled) 507 return; 508 509 for_each_online_node(node) { 510 printk(KERN_INFO "Node %d CPUs:", node); 511 512 count = 0; 513 /* 514 * If we used a CPU iterator here we would miss printing 515 * the holes in the cpumap. 516 */ 517 for (cpu = 0; cpu < NR_CPUS; cpu++) { 518 if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { 519 if (count == 0) 520 printk(" %u", cpu); 521 ++count; 522 } else { 523 if (count > 1) 524 printk("-%u", cpu - 1); 525 count = 0; 526 } 527 } 528 529 if (count > 1) 530 printk("-%u", NR_CPUS - 1); 531 printk("\n"); 532 } 533 } 534 535 static void __init dump_numa_memory_topology(void) 536 { 537 unsigned int node; 538 unsigned int count; 539 540 if (min_common_depth == -1 || !numa_enabled) 541 return; 542 543 for_each_online_node(node) { 544 unsigned long i; 545 546 printk(KERN_INFO "Node %d Memory:", node); 547 548 count = 0; 549 550 for (i = 0; i < lmb_end_of_DRAM(); 551 i += (1 << SECTION_SIZE_BITS)) { 552 if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { 553 if (count == 0) 554 printk(" 0x%lx", i); 555 ++count; 556 } else { 557 if (count > 0) 558 printk("-0x%lx", i); 559 count = 0; 560 } 561 } 562 563 if (count > 0) 564 printk("-0x%lx", i); 565 printk("\n"); 566 } 567 } 568 569 /* 570 * Allocate some memory, satisfying the lmb or bootmem allocator where 571 * required. nid is the preferred node and end is the physical address of 572 * the highest address in the node. 573 * 574 * Returns the physical address of the memory. 575 */ 576 static void __init *careful_allocation(int nid, unsigned long size, 577 unsigned long align, 578 unsigned long end_pfn) 579 { 580 int new_nid; 581 unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); 582 583 /* retry over all memory */ 584 if (!ret) 585 ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); 586 587 if (!ret) 588 panic("numa.c: cannot allocate %lu bytes on node %d", 589 size, nid); 590 591 /* 592 * If the memory came from a previously allocated node, we must 593 * retry with the bootmem allocator. 594 */ 595 new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); 596 if (new_nid < nid) { 597 ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), 598 size, align, 0); 599 600 if (!ret) 601 panic("numa.c: cannot allocate %lu bytes on node %d", 602 size, new_nid); 603 604 ret = __pa(ret); 605 606 dbg("alloc_bootmem %lx %lx\n", ret, size); 607 } 608 609 return (void *)ret; 610 } 611 612 void __init do_init_bootmem(void) 613 { 614 int nid; 615 unsigned int i; 616 static struct notifier_block ppc64_numa_nb = { 617 .notifier_call = cpu_numa_callback, 618 .priority = 1 /* Must run before sched domains notifier. */ 619 }; 620 621 min_low_pfn = 0; 622 max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 623 max_pfn = max_low_pfn; 624 625 if (parse_numa_properties()) 626 setup_nonnuma(); 627 else 628 dump_numa_memory_topology(); 629 630 register_cpu_notifier(&ppc64_numa_nb); 631 cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, 632 (void *)(unsigned long)boot_cpuid); 633 634 for_each_online_node(nid) { 635 unsigned long start_pfn, end_pfn, pages_present; 636 unsigned long bootmem_paddr; 637 unsigned long bootmap_pages; 638 639 get_region(nid, &start_pfn, &end_pfn, &pages_present); 640 641 /* Allocate the node structure node local if possible */ 642 NODE_DATA(nid) = careful_allocation(nid, 643 sizeof(struct pglist_data), 644 SMP_CACHE_BYTES, end_pfn); 645 NODE_DATA(nid) = __va(NODE_DATA(nid)); 646 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 647 648 dbg("node %d\n", nid); 649 dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); 650 651 NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; 652 NODE_DATA(nid)->node_start_pfn = start_pfn; 653 NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 654 655 if (NODE_DATA(nid)->node_spanned_pages == 0) 656 continue; 657 658 dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); 659 dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); 660 661 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 662 bootmem_paddr = (unsigned long)careful_allocation(nid, 663 bootmap_pages << PAGE_SHIFT, 664 PAGE_SIZE, end_pfn); 665 memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT); 666 667 dbg("bootmap_paddr = %lx\n", bootmem_paddr); 668 669 init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, 670 start_pfn, end_pfn); 671 672 /* Add free regions on this node */ 673 for (i = 0; init_node_data[i].end_pfn; i++) { 674 unsigned long start, end; 675 676 if (init_node_data[i].nid != nid) 677 continue; 678 679 start = init_node_data[i].start_pfn << PAGE_SHIFT; 680 end = init_node_data[i].end_pfn << PAGE_SHIFT; 681 682 dbg("free_bootmem %lx %lx\n", start, end - start); 683 free_bootmem_node(NODE_DATA(nid), start, end - start); 684 } 685 686 /* Mark reserved regions on this node */ 687 for (i = 0; i < lmb.reserved.cnt; i++) { 688 unsigned long physbase = lmb.reserved.region[i].base; 689 unsigned long size = lmb.reserved.region[i].size; 690 unsigned long start_paddr = start_pfn << PAGE_SHIFT; 691 unsigned long end_paddr = end_pfn << PAGE_SHIFT; 692 693 if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid && 694 early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid) 695 continue; 696 697 if (physbase < end_paddr && 698 (physbase+size) > start_paddr) { 699 /* overlaps */ 700 if (physbase < start_paddr) { 701 size -= start_paddr - physbase; 702 physbase = start_paddr; 703 } 704 705 if (size > end_paddr - physbase) 706 size = end_paddr - physbase; 707 708 dbg("reserve_bootmem %lx %lx\n", physbase, 709 size); 710 reserve_bootmem_node(NODE_DATA(nid), physbase, 711 size); 712 } 713 } 714 715 /* Add regions into sparsemem */ 716 for (i = 0; init_node_data[i].end_pfn; i++) { 717 unsigned long start, end; 718 719 if (init_node_data[i].nid != nid) 720 continue; 721 722 start = init_node_data[i].start_pfn; 723 end = init_node_data[i].end_pfn; 724 725 memory_present(nid, start, end); 726 } 727 } 728 } 729 730 void __init paging_init(void) 731 { 732 unsigned long zones_size[MAX_NR_ZONES]; 733 unsigned long zholes_size[MAX_NR_ZONES]; 734 int nid; 735 736 memset(zones_size, 0, sizeof(zones_size)); 737 memset(zholes_size, 0, sizeof(zholes_size)); 738 739 for_each_online_node(nid) { 740 unsigned long start_pfn, end_pfn, pages_present; 741 742 get_region(nid, &start_pfn, &end_pfn, &pages_present); 743 744 zones_size[ZONE_DMA] = end_pfn - start_pfn; 745 zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present; 746 747 dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid, 748 zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]); 749 750 free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn, 751 zholes_size); 752 } 753 } 754 755 static int __init early_numa(char *p) 756 { 757 if (!p) 758 return 0; 759 760 if (strstr(p, "off")) 761 numa_enabled = 0; 762 763 if (strstr(p, "debug")) 764 numa_debug = 1; 765 766 return 0; 767 } 768 early_param("numa", early_numa); 769 770 #ifdef CONFIG_MEMORY_HOTPLUG 771 /* 772 * Find the node associated with a hot added memory section. Section 773 * corresponds to a SPARSEMEM section, not an LMB. It is assumed that 774 * sections are fully contained within a single LMB. 775 */ 776 int hot_add_scn_to_nid(unsigned long scn_addr) 777 { 778 struct device_node *memory = NULL; 779 nodemask_t nodes; 780 int default_nid = any_online_node(NODE_MASK_ALL); 781 int nid; 782 783 if (!numa_enabled || (min_common_depth < 0)) 784 return default_nid; 785 786 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { 787 unsigned long start, size; 788 int ranges; 789 unsigned int *memcell_buf; 790 unsigned int len; 791 792 memcell_buf = (unsigned int *)get_property(memory, "reg", &len); 793 if (!memcell_buf || len <= 0) 794 continue; 795 796 /* ranges in cell */ 797 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 798 ha_new_range: 799 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 800 size = read_n_cells(n_mem_size_cells, &memcell_buf); 801 nid = of_node_to_nid_single(memory); 802 803 /* Domains not present at boot default to 0 */ 804 if (nid < 0 || !node_online(nid)) 805 nid = default_nid; 806 807 if ((scn_addr >= start) && (scn_addr < (start + size))) { 808 of_node_put(memory); 809 goto got_nid; 810 } 811 812 if (--ranges) /* process all ranges in cell */ 813 goto ha_new_range; 814 } 815 BUG(); /* section address should be found above */ 816 return 0; 817 818 /* Temporary code to ensure that returned node is not empty */ 819 got_nid: 820 nodes_setall(nodes); 821 while (NODE_DATA(nid)->node_spanned_pages == 0) { 822 node_clear(nid, nodes); 823 nid = any_online_node(nodes); 824 } 825 return nid; 826 } 827 #endif /* CONFIG_MEMORY_HOTPLUG */ 828