1 /* 2 * Procedures for maintaining information about logical memory blocks. 3 * 4 * Peter Bergner, IBM Corp. June 2001. 5 * Copyright (C) 2001 Peter Bergner. 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/slab.h> 15 #include <linux/init.h> 16 #include <linux/bitops.h> 17 #include <linux/poison.h> 18 #include <linux/pfn.h> 19 #include <linux/debugfs.h> 20 #include <linux/kmemleak.h> 21 #include <linux/seq_file.h> 22 #include <linux/memblock.h> 23 24 #include <asm/sections.h> 25 #include <linux/io.h> 26 27 #include "internal.h" 28 29 /** 30 * DOC: memblock overview 31 * 32 * Memblock is a method of managing memory regions during the early 33 * boot period when the usual kernel memory allocators are not up and 34 * running. 35 * 36 * Memblock views the system memory as collections of contiguous 37 * regions. There are several types of these collections: 38 * 39 * * ``memory`` - describes the physical memory available to the 40 * kernel; this may differ from the actual physical memory installed 41 * in the system, for instance when the memory is restricted with 42 * ``mem=`` command line parameter 43 * * ``reserved`` - describes the regions that were allocated 44 * * ``physmap`` - describes the actual physical memory regardless of 45 * the possible restrictions; the ``physmap`` type is only available 46 * on some architectures. 47 * 48 * Each region is represented by :c:type:`struct memblock_region` that 49 * defines the region extents, its attributes and NUMA node id on NUMA 50 * systems. Every memory type is described by the :c:type:`struct 51 * memblock_type` which contains an array of memory regions along with 52 * the allocator metadata. The memory types are nicely wrapped with 53 * :c:type:`struct memblock`. This structure is statically initialzed 54 * at build time. The region arrays for the "memory" and "reserved" 55 * types are initially sized to %INIT_MEMBLOCK_REGIONS and for the 56 * "physmap" type to %INIT_PHYSMEM_REGIONS. 57 * The :c:func:`memblock_allow_resize` enables automatic resizing of 58 * the region arrays during addition of new regions. This feature 59 * should be used with care so that memory allocated for the region 60 * array will not overlap with areas that should be reserved, for 61 * example initrd. 62 * 63 * The early architecture setup should tell memblock what the physical 64 * memory layout is by using :c:func:`memblock_add` or 65 * :c:func:`memblock_add_node` functions. The first function does not 66 * assign the region to a NUMA node and it is appropriate for UMA 67 * systems. Yet, it is possible to use it on NUMA systems as well and 68 * assign the region to a NUMA node later in the setup process using 69 * :c:func:`memblock_set_node`. The :c:func:`memblock_add_node` 70 * performs such an assignment directly. 71 * 72 * Once memblock is setup the memory can be allocated using either 73 * memblock or bootmem APIs. 74 * 75 * As the system boot progresses, the architecture specific 76 * :c:func:`mem_init` function frees all the memory to the buddy page 77 * allocator. 78 * 79 * If an architecure enables %CONFIG_ARCH_DISCARD_MEMBLOCK, the 80 * memblock data structures will be discarded after the system 81 * initialization compltes. 82 */ 83 84 #ifndef CONFIG_NEED_MULTIPLE_NODES 85 struct pglist_data __refdata contig_page_data; 86 EXPORT_SYMBOL(contig_page_data); 87 #endif 88 89 unsigned long max_low_pfn; 90 unsigned long min_low_pfn; 91 unsigned long max_pfn; 92 unsigned long long max_possible_pfn; 93 94 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; 95 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; 96 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 97 static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; 98 #endif 99 100 struct memblock memblock __initdata_memblock = { 101 .memory.regions = memblock_memory_init_regions, 102 .memory.cnt = 1, /* empty dummy entry */ 103 .memory.max = INIT_MEMBLOCK_REGIONS, 104 .memory.name = "memory", 105 106 .reserved.regions = memblock_reserved_init_regions, 107 .reserved.cnt = 1, /* empty dummy entry */ 108 .reserved.max = INIT_MEMBLOCK_REGIONS, 109 .reserved.name = "reserved", 110 111 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 112 .physmem.regions = memblock_physmem_init_regions, 113 .physmem.cnt = 1, /* empty dummy entry */ 114 .physmem.max = INIT_PHYSMEM_REGIONS, 115 .physmem.name = "physmem", 116 #endif 117 118 .bottom_up = false, 119 .current_limit = MEMBLOCK_ALLOC_ANYWHERE, 120 }; 121 122 int memblock_debug __initdata_memblock; 123 static bool system_has_some_mirror __initdata_memblock = false; 124 static int memblock_can_resize __initdata_memblock; 125 static int memblock_memory_in_slab __initdata_memblock = 0; 126 static int memblock_reserved_in_slab __initdata_memblock = 0; 127 128 enum memblock_flags __init_memblock choose_memblock_flags(void) 129 { 130 return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE; 131 } 132 133 /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ 134 static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) 135 { 136 return *size = min(*size, PHYS_ADDR_MAX - base); 137 } 138 139 /* 140 * Address comparison utilities 141 */ 142 static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, 143 phys_addr_t base2, phys_addr_t size2) 144 { 145 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); 146 } 147 148 bool __init_memblock memblock_overlaps_region(struct memblock_type *type, 149 phys_addr_t base, phys_addr_t size) 150 { 151 unsigned long i; 152 153 for (i = 0; i < type->cnt; i++) 154 if (memblock_addrs_overlap(base, size, type->regions[i].base, 155 type->regions[i].size)) 156 break; 157 return i < type->cnt; 158 } 159 160 /** 161 * __memblock_find_range_bottom_up - find free area utility in bottom-up 162 * @start: start of candidate range 163 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 164 * %MEMBLOCK_ALLOC_ACCESSIBLE 165 * @size: size of free area to find 166 * @align: alignment of free area to find 167 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 168 * @flags: pick from blocks based on memory attributes 169 * 170 * Utility called from memblock_find_in_range_node(), find free area bottom-up. 171 * 172 * Return: 173 * Found address on success, 0 on failure. 174 */ 175 static phys_addr_t __init_memblock 176 __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, 177 phys_addr_t size, phys_addr_t align, int nid, 178 enum memblock_flags flags) 179 { 180 phys_addr_t this_start, this_end, cand; 181 u64 i; 182 183 for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) { 184 this_start = clamp(this_start, start, end); 185 this_end = clamp(this_end, start, end); 186 187 cand = round_up(this_start, align); 188 if (cand < this_end && this_end - cand >= size) 189 return cand; 190 } 191 192 return 0; 193 } 194 195 /** 196 * __memblock_find_range_top_down - find free area utility, in top-down 197 * @start: start of candidate range 198 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 199 * %MEMBLOCK_ALLOC_ACCESSIBLE 200 * @size: size of free area to find 201 * @align: alignment of free area to find 202 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 203 * @flags: pick from blocks based on memory attributes 204 * 205 * Utility called from memblock_find_in_range_node(), find free area top-down. 206 * 207 * Return: 208 * Found address on success, 0 on failure. 209 */ 210 static phys_addr_t __init_memblock 211 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, 212 phys_addr_t size, phys_addr_t align, int nid, 213 enum memblock_flags flags) 214 { 215 phys_addr_t this_start, this_end, cand; 216 u64 i; 217 218 for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end, 219 NULL) { 220 this_start = clamp(this_start, start, end); 221 this_end = clamp(this_end, start, end); 222 223 if (this_end < size) 224 continue; 225 226 cand = round_down(this_end - size, align); 227 if (cand >= this_start) 228 return cand; 229 } 230 231 return 0; 232 } 233 234 /** 235 * memblock_find_in_range_node - find free area in given range and node 236 * @size: size of free area to find 237 * @align: alignment of free area to find 238 * @start: start of candidate range 239 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 240 * %MEMBLOCK_ALLOC_ACCESSIBLE 241 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 242 * @flags: pick from blocks based on memory attributes 243 * 244 * Find @size free area aligned to @align in the specified range and node. 245 * 246 * When allocation direction is bottom-up, the @start should be greater 247 * than the end of the kernel image. Otherwise, it will be trimmed. The 248 * reason is that we want the bottom-up allocation just near the kernel 249 * image so it is highly likely that the allocated memory and the kernel 250 * will reside in the same node. 251 * 252 * If bottom-up allocation failed, will try to allocate memory top-down. 253 * 254 * Return: 255 * Found address on success, 0 on failure. 256 */ 257 phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, 258 phys_addr_t align, phys_addr_t start, 259 phys_addr_t end, int nid, 260 enum memblock_flags flags) 261 { 262 phys_addr_t kernel_end, ret; 263 264 /* pump up @end */ 265 if (end == MEMBLOCK_ALLOC_ACCESSIBLE) 266 end = memblock.current_limit; 267 268 /* avoid allocating the first page */ 269 start = max_t(phys_addr_t, start, PAGE_SIZE); 270 end = max(start, end); 271 kernel_end = __pa_symbol(_end); 272 273 /* 274 * try bottom-up allocation only when bottom-up mode 275 * is set and @end is above the kernel image. 276 */ 277 if (memblock_bottom_up() && end > kernel_end) { 278 phys_addr_t bottom_up_start; 279 280 /* make sure we will allocate above the kernel */ 281 bottom_up_start = max(start, kernel_end); 282 283 /* ok, try bottom-up allocation first */ 284 ret = __memblock_find_range_bottom_up(bottom_up_start, end, 285 size, align, nid, flags); 286 if (ret) 287 return ret; 288 289 /* 290 * we always limit bottom-up allocation above the kernel, 291 * but top-down allocation doesn't have the limit, so 292 * retrying top-down allocation may succeed when bottom-up 293 * allocation failed. 294 * 295 * bottom-up allocation is expected to be fail very rarely, 296 * so we use WARN_ONCE() here to see the stack trace if 297 * fail happens. 298 */ 299 WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE), 300 "memblock: bottom-up allocation failed, memory hotremove may be affected\n"); 301 } 302 303 return __memblock_find_range_top_down(start, end, size, align, nid, 304 flags); 305 } 306 307 /** 308 * memblock_find_in_range - find free area in given range 309 * @start: start of candidate range 310 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 311 * %MEMBLOCK_ALLOC_ACCESSIBLE 312 * @size: size of free area to find 313 * @align: alignment of free area to find 314 * 315 * Find @size free area aligned to @align in the specified range. 316 * 317 * Return: 318 * Found address on success, 0 on failure. 319 */ 320 phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, 321 phys_addr_t end, phys_addr_t size, 322 phys_addr_t align) 323 { 324 phys_addr_t ret; 325 enum memblock_flags flags = choose_memblock_flags(); 326 327 again: 328 ret = memblock_find_in_range_node(size, align, start, end, 329 NUMA_NO_NODE, flags); 330 331 if (!ret && (flags & MEMBLOCK_MIRROR)) { 332 pr_warn("Could not allocate %pap bytes of mirrored memory\n", 333 &size); 334 flags &= ~MEMBLOCK_MIRROR; 335 goto again; 336 } 337 338 return ret; 339 } 340 341 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) 342 { 343 type->total_size -= type->regions[r].size; 344 memmove(&type->regions[r], &type->regions[r + 1], 345 (type->cnt - (r + 1)) * sizeof(type->regions[r])); 346 type->cnt--; 347 348 /* Special case for empty arrays */ 349 if (type->cnt == 0) { 350 WARN_ON(type->total_size != 0); 351 type->cnt = 1; 352 type->regions[0].base = 0; 353 type->regions[0].size = 0; 354 type->regions[0].flags = 0; 355 memblock_set_region_node(&type->regions[0], MAX_NUMNODES); 356 } 357 } 358 359 #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK 360 /** 361 * memblock_discard - discard memory and reserved arrays if they were allocated 362 */ 363 void __init memblock_discard(void) 364 { 365 phys_addr_t addr, size; 366 367 if (memblock.reserved.regions != memblock_reserved_init_regions) { 368 addr = __pa(memblock.reserved.regions); 369 size = PAGE_ALIGN(sizeof(struct memblock_region) * 370 memblock.reserved.max); 371 __memblock_free_late(addr, size); 372 } 373 374 if (memblock.memory.regions != memblock_memory_init_regions) { 375 addr = __pa(memblock.memory.regions); 376 size = PAGE_ALIGN(sizeof(struct memblock_region) * 377 memblock.memory.max); 378 __memblock_free_late(addr, size); 379 } 380 } 381 #endif 382 383 /** 384 * memblock_double_array - double the size of the memblock regions array 385 * @type: memblock type of the regions array being doubled 386 * @new_area_start: starting address of memory range to avoid overlap with 387 * @new_area_size: size of memory range to avoid overlap with 388 * 389 * Double the size of the @type regions array. If memblock is being used to 390 * allocate memory for a new reserved regions array and there is a previously 391 * allocated memory range [@new_area_start, @new_area_start + @new_area_size] 392 * waiting to be reserved, ensure the memory used by the new array does 393 * not overlap. 394 * 395 * Return: 396 * 0 on success, -1 on failure. 397 */ 398 static int __init_memblock memblock_double_array(struct memblock_type *type, 399 phys_addr_t new_area_start, 400 phys_addr_t new_area_size) 401 { 402 struct memblock_region *new_array, *old_array; 403 phys_addr_t old_alloc_size, new_alloc_size; 404 phys_addr_t old_size, new_size, addr, new_end; 405 int use_slab = slab_is_available(); 406 int *in_slab; 407 408 /* We don't allow resizing until we know about the reserved regions 409 * of memory that aren't suitable for allocation 410 */ 411 if (!memblock_can_resize) 412 return -1; 413 414 /* Calculate new doubled size */ 415 old_size = type->max * sizeof(struct memblock_region); 416 new_size = old_size << 1; 417 /* 418 * We need to allocated new one align to PAGE_SIZE, 419 * so we can free them completely later. 420 */ 421 old_alloc_size = PAGE_ALIGN(old_size); 422 new_alloc_size = PAGE_ALIGN(new_size); 423 424 /* Retrieve the slab flag */ 425 if (type == &memblock.memory) 426 in_slab = &memblock_memory_in_slab; 427 else 428 in_slab = &memblock_reserved_in_slab; 429 430 /* Try to find some space for it. 431 * 432 * WARNING: We assume that either slab_is_available() and we use it or 433 * we use MEMBLOCK for allocations. That means that this is unsafe to 434 * use when bootmem is currently active (unless bootmem itself is 435 * implemented on top of MEMBLOCK which isn't the case yet) 436 * 437 * This should however not be an issue for now, as we currently only 438 * call into MEMBLOCK while it's still active, or much later when slab 439 * is active for memory hotplug operations 440 */ 441 if (use_slab) { 442 new_array = kmalloc(new_size, GFP_KERNEL); 443 addr = new_array ? __pa(new_array) : 0; 444 } else { 445 /* only exclude range when trying to double reserved.regions */ 446 if (type != &memblock.reserved) 447 new_area_start = new_area_size = 0; 448 449 addr = memblock_find_in_range(new_area_start + new_area_size, 450 memblock.current_limit, 451 new_alloc_size, PAGE_SIZE); 452 if (!addr && new_area_size) 453 addr = memblock_find_in_range(0, 454 min(new_area_start, memblock.current_limit), 455 new_alloc_size, PAGE_SIZE); 456 457 new_array = addr ? __va(addr) : NULL; 458 } 459 if (!addr) { 460 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", 461 type->name, type->max, type->max * 2); 462 return -1; 463 } 464 465 new_end = addr + new_size - 1; 466 memblock_dbg("memblock: %s is doubled to %ld at [%pa-%pa]", 467 type->name, type->max * 2, &addr, &new_end); 468 469 /* 470 * Found space, we now need to move the array over before we add the 471 * reserved region since it may be our reserved array itself that is 472 * full. 473 */ 474 memcpy(new_array, type->regions, old_size); 475 memset(new_array + type->max, 0, old_size); 476 old_array = type->regions; 477 type->regions = new_array; 478 type->max <<= 1; 479 480 /* Free old array. We needn't free it if the array is the static one */ 481 if (*in_slab) 482 kfree(old_array); 483 else if (old_array != memblock_memory_init_regions && 484 old_array != memblock_reserved_init_regions) 485 memblock_free(__pa(old_array), old_alloc_size); 486 487 /* 488 * Reserve the new array if that comes from the memblock. Otherwise, we 489 * needn't do it 490 */ 491 if (!use_slab) 492 BUG_ON(memblock_reserve(addr, new_alloc_size)); 493 494 /* Update slab flag */ 495 *in_slab = use_slab; 496 497 return 0; 498 } 499 500 /** 501 * memblock_merge_regions - merge neighboring compatible regions 502 * @type: memblock type to scan 503 * 504 * Scan @type and merge neighboring compatible regions. 505 */ 506 static void __init_memblock memblock_merge_regions(struct memblock_type *type) 507 { 508 int i = 0; 509 510 /* cnt never goes below 1 */ 511 while (i < type->cnt - 1) { 512 struct memblock_region *this = &type->regions[i]; 513 struct memblock_region *next = &type->regions[i + 1]; 514 515 if (this->base + this->size != next->base || 516 memblock_get_region_node(this) != 517 memblock_get_region_node(next) || 518 this->flags != next->flags) { 519 BUG_ON(this->base + this->size > next->base); 520 i++; 521 continue; 522 } 523 524 this->size += next->size; 525 /* move forward from next + 1, index of which is i + 2 */ 526 memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); 527 type->cnt--; 528 } 529 } 530 531 /** 532 * memblock_insert_region - insert new memblock region 533 * @type: memblock type to insert into 534 * @idx: index for the insertion point 535 * @base: base address of the new region 536 * @size: size of the new region 537 * @nid: node id of the new region 538 * @flags: flags of the new region 539 * 540 * Insert new memblock region [@base, @base + @size) into @type at @idx. 541 * @type must already have extra room to accommodate the new region. 542 */ 543 static void __init_memblock memblock_insert_region(struct memblock_type *type, 544 int idx, phys_addr_t base, 545 phys_addr_t size, 546 int nid, 547 enum memblock_flags flags) 548 { 549 struct memblock_region *rgn = &type->regions[idx]; 550 551 BUG_ON(type->cnt >= type->max); 552 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); 553 rgn->base = base; 554 rgn->size = size; 555 rgn->flags = flags; 556 memblock_set_region_node(rgn, nid); 557 type->cnt++; 558 type->total_size += size; 559 } 560 561 /** 562 * memblock_add_range - add new memblock region 563 * @type: memblock type to add new region into 564 * @base: base address of the new region 565 * @size: size of the new region 566 * @nid: nid of the new region 567 * @flags: flags of the new region 568 * 569 * Add new memblock region [@base, @base + @size) into @type. The new region 570 * is allowed to overlap with existing ones - overlaps don't affect already 571 * existing regions. @type is guaranteed to be minimal (all neighbouring 572 * compatible regions are merged) after the addition. 573 * 574 * Return: 575 * 0 on success, -errno on failure. 576 */ 577 int __init_memblock memblock_add_range(struct memblock_type *type, 578 phys_addr_t base, phys_addr_t size, 579 int nid, enum memblock_flags flags) 580 { 581 bool insert = false; 582 phys_addr_t obase = base; 583 phys_addr_t end = base + memblock_cap_size(base, &size); 584 int idx, nr_new; 585 struct memblock_region *rgn; 586 587 if (!size) 588 return 0; 589 590 /* special case for empty array */ 591 if (type->regions[0].size == 0) { 592 WARN_ON(type->cnt != 1 || type->total_size); 593 type->regions[0].base = base; 594 type->regions[0].size = size; 595 type->regions[0].flags = flags; 596 memblock_set_region_node(&type->regions[0], nid); 597 type->total_size = size; 598 return 0; 599 } 600 repeat: 601 /* 602 * The following is executed twice. Once with %false @insert and 603 * then with %true. The first counts the number of regions needed 604 * to accommodate the new area. The second actually inserts them. 605 */ 606 base = obase; 607 nr_new = 0; 608 609 for_each_memblock_type(idx, type, rgn) { 610 phys_addr_t rbase = rgn->base; 611 phys_addr_t rend = rbase + rgn->size; 612 613 if (rbase >= end) 614 break; 615 if (rend <= base) 616 continue; 617 /* 618 * @rgn overlaps. If it separates the lower part of new 619 * area, insert that portion. 620 */ 621 if (rbase > base) { 622 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 623 WARN_ON(nid != memblock_get_region_node(rgn)); 624 #endif 625 WARN_ON(flags != rgn->flags); 626 nr_new++; 627 if (insert) 628 memblock_insert_region(type, idx++, base, 629 rbase - base, nid, 630 flags); 631 } 632 /* area below @rend is dealt with, forget about it */ 633 base = min(rend, end); 634 } 635 636 /* insert the remaining portion */ 637 if (base < end) { 638 nr_new++; 639 if (insert) 640 memblock_insert_region(type, idx, base, end - base, 641 nid, flags); 642 } 643 644 if (!nr_new) 645 return 0; 646 647 /* 648 * If this was the first round, resize array and repeat for actual 649 * insertions; otherwise, merge and return. 650 */ 651 if (!insert) { 652 while (type->cnt + nr_new > type->max) 653 if (memblock_double_array(type, obase, size) < 0) 654 return -ENOMEM; 655 insert = true; 656 goto repeat; 657 } else { 658 memblock_merge_regions(type); 659 return 0; 660 } 661 } 662 663 /** 664 * memblock_add_node - add new memblock region within a NUMA node 665 * @base: base address of the new region 666 * @size: size of the new region 667 * @nid: nid of the new region 668 * 669 * Add new memblock region [@base, @base + @size) to the "memory" 670 * type. See memblock_add_range() description for mode details 671 * 672 * Return: 673 * 0 on success, -errno on failure. 674 */ 675 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, 676 int nid) 677 { 678 return memblock_add_range(&memblock.memory, base, size, nid, 0); 679 } 680 681 /** 682 * memblock_add - add new memblock region 683 * @base: base address of the new region 684 * @size: size of the new region 685 * 686 * Add new memblock region [@base, @base + @size) to the "memory" 687 * type. See memblock_add_range() description for mode details 688 * 689 * Return: 690 * 0 on success, -errno on failure. 691 */ 692 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) 693 { 694 phys_addr_t end = base + size - 1; 695 696 memblock_dbg("memblock_add: [%pa-%pa] %pF\n", 697 &base, &end, (void *)_RET_IP_); 698 699 return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); 700 } 701 702 /** 703 * memblock_isolate_range - isolate given range into disjoint memblocks 704 * @type: memblock type to isolate range for 705 * @base: base of range to isolate 706 * @size: size of range to isolate 707 * @start_rgn: out parameter for the start of isolated region 708 * @end_rgn: out parameter for the end of isolated region 709 * 710 * Walk @type and ensure that regions don't cross the boundaries defined by 711 * [@base, @base + @size). Crossing regions are split at the boundaries, 712 * which may create at most two more regions. The index of the first 713 * region inside the range is returned in *@start_rgn and end in *@end_rgn. 714 * 715 * Return: 716 * 0 on success, -errno on failure. 717 */ 718 static int __init_memblock memblock_isolate_range(struct memblock_type *type, 719 phys_addr_t base, phys_addr_t size, 720 int *start_rgn, int *end_rgn) 721 { 722 phys_addr_t end = base + memblock_cap_size(base, &size); 723 int idx; 724 struct memblock_region *rgn; 725 726 *start_rgn = *end_rgn = 0; 727 728 if (!size) 729 return 0; 730 731 /* we'll create at most two more regions */ 732 while (type->cnt + 2 > type->max) 733 if (memblock_double_array(type, base, size) < 0) 734 return -ENOMEM; 735 736 for_each_memblock_type(idx, type, rgn) { 737 phys_addr_t rbase = rgn->base; 738 phys_addr_t rend = rbase + rgn->size; 739 740 if (rbase >= end) 741 break; 742 if (rend <= base) 743 continue; 744 745 if (rbase < base) { 746 /* 747 * @rgn intersects from below. Split and continue 748 * to process the next region - the new top half. 749 */ 750 rgn->base = base; 751 rgn->size -= base - rbase; 752 type->total_size -= base - rbase; 753 memblock_insert_region(type, idx, rbase, base - rbase, 754 memblock_get_region_node(rgn), 755 rgn->flags); 756 } else if (rend > end) { 757 /* 758 * @rgn intersects from above. Split and redo the 759 * current region - the new bottom half. 760 */ 761 rgn->base = end; 762 rgn->size -= end - rbase; 763 type->total_size -= end - rbase; 764 memblock_insert_region(type, idx--, rbase, end - rbase, 765 memblock_get_region_node(rgn), 766 rgn->flags); 767 } else { 768 /* @rgn is fully contained, record it */ 769 if (!*end_rgn) 770 *start_rgn = idx; 771 *end_rgn = idx + 1; 772 } 773 } 774 775 return 0; 776 } 777 778 static int __init_memblock memblock_remove_range(struct memblock_type *type, 779 phys_addr_t base, phys_addr_t size) 780 { 781 int start_rgn, end_rgn; 782 int i, ret; 783 784 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 785 if (ret) 786 return ret; 787 788 for (i = end_rgn - 1; i >= start_rgn; i--) 789 memblock_remove_region(type, i); 790 return 0; 791 } 792 793 int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) 794 { 795 phys_addr_t end = base + size - 1; 796 797 memblock_dbg("memblock_remove: [%pa-%pa] %pS\n", 798 &base, &end, (void *)_RET_IP_); 799 800 return memblock_remove_range(&memblock.memory, base, size); 801 } 802 803 804 int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) 805 { 806 phys_addr_t end = base + size - 1; 807 808 memblock_dbg(" memblock_free: [%pa-%pa] %pF\n", 809 &base, &end, (void *)_RET_IP_); 810 811 kmemleak_free_part_phys(base, size); 812 return memblock_remove_range(&memblock.reserved, base, size); 813 } 814 815 int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) 816 { 817 phys_addr_t end = base + size - 1; 818 819 memblock_dbg("memblock_reserve: [%pa-%pa] %pF\n", 820 &base, &end, (void *)_RET_IP_); 821 822 return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0); 823 } 824 825 /** 826 * memblock_setclr_flag - set or clear flag for a memory region 827 * @base: base address of the region 828 * @size: size of the region 829 * @set: set or clear the flag 830 * @flag: the flag to udpate 831 * 832 * This function isolates region [@base, @base + @size), and sets/clears flag 833 * 834 * Return: 0 on success, -errno on failure. 835 */ 836 static int __init_memblock memblock_setclr_flag(phys_addr_t base, 837 phys_addr_t size, int set, int flag) 838 { 839 struct memblock_type *type = &memblock.memory; 840 int i, ret, start_rgn, end_rgn; 841 842 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 843 if (ret) 844 return ret; 845 846 for (i = start_rgn; i < end_rgn; i++) 847 if (set) 848 memblock_set_region_flags(&type->regions[i], flag); 849 else 850 memblock_clear_region_flags(&type->regions[i], flag); 851 852 memblock_merge_regions(type); 853 return 0; 854 } 855 856 /** 857 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG. 858 * @base: the base phys addr of the region 859 * @size: the size of the region 860 * 861 * Return: 0 on success, -errno on failure. 862 */ 863 int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size) 864 { 865 return memblock_setclr_flag(base, size, 1, MEMBLOCK_HOTPLUG); 866 } 867 868 /** 869 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region. 870 * @base: the base phys addr of the region 871 * @size: the size of the region 872 * 873 * Return: 0 on success, -errno on failure. 874 */ 875 int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size) 876 { 877 return memblock_setclr_flag(base, size, 0, MEMBLOCK_HOTPLUG); 878 } 879 880 /** 881 * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR. 882 * @base: the base phys addr of the region 883 * @size: the size of the region 884 * 885 * Return: 0 on success, -errno on failure. 886 */ 887 int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) 888 { 889 system_has_some_mirror = true; 890 891 return memblock_setclr_flag(base, size, 1, MEMBLOCK_MIRROR); 892 } 893 894 /** 895 * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP. 896 * @base: the base phys addr of the region 897 * @size: the size of the region 898 * 899 * Return: 0 on success, -errno on failure. 900 */ 901 int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) 902 { 903 return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP); 904 } 905 906 /** 907 * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region. 908 * @base: the base phys addr of the region 909 * @size: the size of the region 910 * 911 * Return: 0 on success, -errno on failure. 912 */ 913 int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) 914 { 915 return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP); 916 } 917 918 /** 919 * __next_reserved_mem_region - next function for for_each_reserved_region() 920 * @idx: pointer to u64 loop variable 921 * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL 922 * @out_end: ptr to phys_addr_t for end address of the region, can be %NULL 923 * 924 * Iterate over all reserved memory regions. 925 */ 926 void __init_memblock __next_reserved_mem_region(u64 *idx, 927 phys_addr_t *out_start, 928 phys_addr_t *out_end) 929 { 930 struct memblock_type *type = &memblock.reserved; 931 932 if (*idx < type->cnt) { 933 struct memblock_region *r = &type->regions[*idx]; 934 phys_addr_t base = r->base; 935 phys_addr_t size = r->size; 936 937 if (out_start) 938 *out_start = base; 939 if (out_end) 940 *out_end = base + size - 1; 941 942 *idx += 1; 943 return; 944 } 945 946 /* signal end of iteration */ 947 *idx = ULLONG_MAX; 948 } 949 950 /** 951 * __next__mem_range - next function for for_each_free_mem_range() etc. 952 * @idx: pointer to u64 loop variable 953 * @nid: node selector, %NUMA_NO_NODE for all nodes 954 * @flags: pick from blocks based on memory attributes 955 * @type_a: pointer to memblock_type from where the range is taken 956 * @type_b: pointer to memblock_type which excludes memory from being taken 957 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 958 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 959 * @out_nid: ptr to int for nid of the range, can be %NULL 960 * 961 * Find the first area from *@idx which matches @nid, fill the out 962 * parameters, and update *@idx for the next iteration. The lower 32bit of 963 * *@idx contains index into type_a and the upper 32bit indexes the 964 * areas before each region in type_b. For example, if type_b regions 965 * look like the following, 966 * 967 * 0:[0-16), 1:[32-48), 2:[128-130) 968 * 969 * The upper 32bit indexes the following regions. 970 * 971 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) 972 * 973 * As both region arrays are sorted, the function advances the two indices 974 * in lockstep and returns each intersection. 975 */ 976 void __init_memblock __next_mem_range(u64 *idx, int nid, 977 enum memblock_flags flags, 978 struct memblock_type *type_a, 979 struct memblock_type *type_b, 980 phys_addr_t *out_start, 981 phys_addr_t *out_end, int *out_nid) 982 { 983 int idx_a = *idx & 0xffffffff; 984 int idx_b = *idx >> 32; 985 986 if (WARN_ONCE(nid == MAX_NUMNODES, 987 "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) 988 nid = NUMA_NO_NODE; 989 990 for (; idx_a < type_a->cnt; idx_a++) { 991 struct memblock_region *m = &type_a->regions[idx_a]; 992 993 phys_addr_t m_start = m->base; 994 phys_addr_t m_end = m->base + m->size; 995 int m_nid = memblock_get_region_node(m); 996 997 /* only memory regions are associated with nodes, check it */ 998 if (nid != NUMA_NO_NODE && nid != m_nid) 999 continue; 1000 1001 /* skip hotpluggable memory regions if needed */ 1002 if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) 1003 continue; 1004 1005 /* if we want mirror memory skip non-mirror memory regions */ 1006 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) 1007 continue; 1008 1009 /* skip nomap memory unless we were asked for it explicitly */ 1010 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) 1011 continue; 1012 1013 if (!type_b) { 1014 if (out_start) 1015 *out_start = m_start; 1016 if (out_end) 1017 *out_end = m_end; 1018 if (out_nid) 1019 *out_nid = m_nid; 1020 idx_a++; 1021 *idx = (u32)idx_a | (u64)idx_b << 32; 1022 return; 1023 } 1024 1025 /* scan areas before each reservation */ 1026 for (; idx_b < type_b->cnt + 1; idx_b++) { 1027 struct memblock_region *r; 1028 phys_addr_t r_start; 1029 phys_addr_t r_end; 1030 1031 r = &type_b->regions[idx_b]; 1032 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1033 r_end = idx_b < type_b->cnt ? 1034 r->base : PHYS_ADDR_MAX; 1035 1036 /* 1037 * if idx_b advanced past idx_a, 1038 * break out to advance idx_a 1039 */ 1040 if (r_start >= m_end) 1041 break; 1042 /* if the two regions intersect, we're done */ 1043 if (m_start < r_end) { 1044 if (out_start) 1045 *out_start = 1046 max(m_start, r_start); 1047 if (out_end) 1048 *out_end = min(m_end, r_end); 1049 if (out_nid) 1050 *out_nid = m_nid; 1051 /* 1052 * The region which ends first is 1053 * advanced for the next iteration. 1054 */ 1055 if (m_end <= r_end) 1056 idx_a++; 1057 else 1058 idx_b++; 1059 *idx = (u32)idx_a | (u64)idx_b << 32; 1060 return; 1061 } 1062 } 1063 } 1064 1065 /* signal end of iteration */ 1066 *idx = ULLONG_MAX; 1067 } 1068 1069 /** 1070 * __next_mem_range_rev - generic next function for for_each_*_range_rev() 1071 * 1072 * @idx: pointer to u64 loop variable 1073 * @nid: node selector, %NUMA_NO_NODE for all nodes 1074 * @flags: pick from blocks based on memory attributes 1075 * @type_a: pointer to memblock_type from where the range is taken 1076 * @type_b: pointer to memblock_type which excludes memory from being taken 1077 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 1078 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 1079 * @out_nid: ptr to int for nid of the range, can be %NULL 1080 * 1081 * Finds the next range from type_a which is not marked as unsuitable 1082 * in type_b. 1083 * 1084 * Reverse of __next_mem_range(). 1085 */ 1086 void __init_memblock __next_mem_range_rev(u64 *idx, int nid, 1087 enum memblock_flags flags, 1088 struct memblock_type *type_a, 1089 struct memblock_type *type_b, 1090 phys_addr_t *out_start, 1091 phys_addr_t *out_end, int *out_nid) 1092 { 1093 int idx_a = *idx & 0xffffffff; 1094 int idx_b = *idx >> 32; 1095 1096 if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) 1097 nid = NUMA_NO_NODE; 1098 1099 if (*idx == (u64)ULLONG_MAX) { 1100 idx_a = type_a->cnt - 1; 1101 if (type_b != NULL) 1102 idx_b = type_b->cnt; 1103 else 1104 idx_b = 0; 1105 } 1106 1107 for (; idx_a >= 0; idx_a--) { 1108 struct memblock_region *m = &type_a->regions[idx_a]; 1109 1110 phys_addr_t m_start = m->base; 1111 phys_addr_t m_end = m->base + m->size; 1112 int m_nid = memblock_get_region_node(m); 1113 1114 /* only memory regions are associated with nodes, check it */ 1115 if (nid != NUMA_NO_NODE && nid != m_nid) 1116 continue; 1117 1118 /* skip hotpluggable memory regions if needed */ 1119 if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) 1120 continue; 1121 1122 /* if we want mirror memory skip non-mirror memory regions */ 1123 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) 1124 continue; 1125 1126 /* skip nomap memory unless we were asked for it explicitly */ 1127 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) 1128 continue; 1129 1130 if (!type_b) { 1131 if (out_start) 1132 *out_start = m_start; 1133 if (out_end) 1134 *out_end = m_end; 1135 if (out_nid) 1136 *out_nid = m_nid; 1137 idx_a--; 1138 *idx = (u32)idx_a | (u64)idx_b << 32; 1139 return; 1140 } 1141 1142 /* scan areas before each reservation */ 1143 for (; idx_b >= 0; idx_b--) { 1144 struct memblock_region *r; 1145 phys_addr_t r_start; 1146 phys_addr_t r_end; 1147 1148 r = &type_b->regions[idx_b]; 1149 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1150 r_end = idx_b < type_b->cnt ? 1151 r->base : PHYS_ADDR_MAX; 1152 /* 1153 * if idx_b advanced past idx_a, 1154 * break out to advance idx_a 1155 */ 1156 1157 if (r_end <= m_start) 1158 break; 1159 /* if the two regions intersect, we're done */ 1160 if (m_end > r_start) { 1161 if (out_start) 1162 *out_start = max(m_start, r_start); 1163 if (out_end) 1164 *out_end = min(m_end, r_end); 1165 if (out_nid) 1166 *out_nid = m_nid; 1167 if (m_start >= r_start) 1168 idx_a--; 1169 else 1170 idx_b--; 1171 *idx = (u32)idx_a | (u64)idx_b << 32; 1172 return; 1173 } 1174 } 1175 } 1176 /* signal end of iteration */ 1177 *idx = ULLONG_MAX; 1178 } 1179 1180 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 1181 /* 1182 * Common iterator interface used to define for_each_mem_pfn_range(). 1183 */ 1184 void __init_memblock __next_mem_pfn_range(int *idx, int nid, 1185 unsigned long *out_start_pfn, 1186 unsigned long *out_end_pfn, int *out_nid) 1187 { 1188 struct memblock_type *type = &memblock.memory; 1189 struct memblock_region *r; 1190 1191 while (++*idx < type->cnt) { 1192 r = &type->regions[*idx]; 1193 1194 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) 1195 continue; 1196 if (nid == MAX_NUMNODES || nid == r->nid) 1197 break; 1198 } 1199 if (*idx >= type->cnt) { 1200 *idx = -1; 1201 return; 1202 } 1203 1204 if (out_start_pfn) 1205 *out_start_pfn = PFN_UP(r->base); 1206 if (out_end_pfn) 1207 *out_end_pfn = PFN_DOWN(r->base + r->size); 1208 if (out_nid) 1209 *out_nid = r->nid; 1210 } 1211 1212 /** 1213 * memblock_set_node - set node ID on memblock regions 1214 * @base: base of area to set node ID for 1215 * @size: size of area to set node ID for 1216 * @type: memblock type to set node ID for 1217 * @nid: node ID to set 1218 * 1219 * Set the nid of memblock @type regions in [@base, @base + @size) to @nid. 1220 * Regions which cross the area boundaries are split as necessary. 1221 * 1222 * Return: 1223 * 0 on success, -errno on failure. 1224 */ 1225 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, 1226 struct memblock_type *type, int nid) 1227 { 1228 int start_rgn, end_rgn; 1229 int i, ret; 1230 1231 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 1232 if (ret) 1233 return ret; 1234 1235 for (i = start_rgn; i < end_rgn; i++) 1236 memblock_set_region_node(&type->regions[i], nid); 1237 1238 memblock_merge_regions(type); 1239 return 0; 1240 } 1241 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 1242 1243 static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, 1244 phys_addr_t align, phys_addr_t start, 1245 phys_addr_t end, int nid, 1246 enum memblock_flags flags) 1247 { 1248 phys_addr_t found; 1249 1250 if (!align) { 1251 /* Can't use WARNs this early in boot on powerpc */ 1252 dump_stack(); 1253 align = SMP_CACHE_BYTES; 1254 } 1255 1256 found = memblock_find_in_range_node(size, align, start, end, nid, 1257 flags); 1258 if (found && !memblock_reserve(found, size)) { 1259 /* 1260 * The min_count is set to 0 so that memblock allocations are 1261 * never reported as leaks. 1262 */ 1263 kmemleak_alloc_phys(found, size, 0, 0); 1264 return found; 1265 } 1266 return 0; 1267 } 1268 1269 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, 1270 phys_addr_t start, phys_addr_t end, 1271 enum memblock_flags flags) 1272 { 1273 return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE, 1274 flags); 1275 } 1276 1277 phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, 1278 phys_addr_t align, phys_addr_t max_addr, 1279 int nid, enum memblock_flags flags) 1280 { 1281 return memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags); 1282 } 1283 1284 phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) 1285 { 1286 enum memblock_flags flags = choose_memblock_flags(); 1287 phys_addr_t ret; 1288 1289 again: 1290 ret = memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, 1291 nid, flags); 1292 1293 if (!ret && (flags & MEMBLOCK_MIRROR)) { 1294 flags &= ~MEMBLOCK_MIRROR; 1295 goto again; 1296 } 1297 return ret; 1298 } 1299 1300 phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) 1301 { 1302 return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE, 1303 MEMBLOCK_NONE); 1304 } 1305 1306 phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) 1307 { 1308 phys_addr_t alloc; 1309 1310 alloc = __memblock_alloc_base(size, align, max_addr); 1311 1312 if (alloc == 0) 1313 panic("ERROR: Failed to allocate %pa bytes below %pa.\n", 1314 &size, &max_addr); 1315 1316 return alloc; 1317 } 1318 1319 phys_addr_t __init memblock_phys_alloc(phys_addr_t size, phys_addr_t align) 1320 { 1321 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); 1322 } 1323 1324 phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) 1325 { 1326 phys_addr_t res = memblock_phys_alloc_nid(size, align, nid); 1327 1328 if (res) 1329 return res; 1330 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); 1331 } 1332 1333 /** 1334 * memblock_alloc_internal - allocate boot memory block 1335 * @size: size of memory block to be allocated in bytes 1336 * @align: alignment of the region and block's size 1337 * @min_addr: the lower bound of the memory region to allocate (phys address) 1338 * @max_addr: the upper bound of the memory region to allocate (phys address) 1339 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1340 * 1341 * The @min_addr limit is dropped if it can not be satisfied and the allocation 1342 * will fall back to memory below @min_addr. Also, allocation may fall back 1343 * to any node in the system if the specified node can not 1344 * hold the requested memory. 1345 * 1346 * The allocation is performed from memory region limited by 1347 * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE. 1348 * 1349 * The phys address of allocated boot memory block is converted to virtual and 1350 * allocated memory is reset to 0. 1351 * 1352 * In addition, function sets the min_count to 0 using kmemleak_alloc for 1353 * allocated boot memory block, so that it is never reported as leaks. 1354 * 1355 * Return: 1356 * Virtual address of allocated memory block on success, NULL on failure. 1357 */ 1358 static void * __init memblock_alloc_internal( 1359 phys_addr_t size, phys_addr_t align, 1360 phys_addr_t min_addr, phys_addr_t max_addr, 1361 int nid) 1362 { 1363 phys_addr_t alloc; 1364 void *ptr; 1365 enum memblock_flags flags = choose_memblock_flags(); 1366 1367 if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) 1368 nid = NUMA_NO_NODE; 1369 1370 /* 1371 * Detect any accidental use of these APIs after slab is ready, as at 1372 * this moment memblock may be deinitialized already and its 1373 * internal data may be destroyed (after execution of memblock_free_all) 1374 */ 1375 if (WARN_ON_ONCE(slab_is_available())) 1376 return kzalloc_node(size, GFP_NOWAIT, nid); 1377 1378 if (!align) { 1379 dump_stack(); 1380 align = SMP_CACHE_BYTES; 1381 } 1382 1383 if (max_addr > memblock.current_limit) 1384 max_addr = memblock.current_limit; 1385 again: 1386 alloc = memblock_find_in_range_node(size, align, min_addr, max_addr, 1387 nid, flags); 1388 if (alloc && !memblock_reserve(alloc, size)) 1389 goto done; 1390 1391 if (nid != NUMA_NO_NODE) { 1392 alloc = memblock_find_in_range_node(size, align, min_addr, 1393 max_addr, NUMA_NO_NODE, 1394 flags); 1395 if (alloc && !memblock_reserve(alloc, size)) 1396 goto done; 1397 } 1398 1399 if (min_addr) { 1400 min_addr = 0; 1401 goto again; 1402 } 1403 1404 if (flags & MEMBLOCK_MIRROR) { 1405 flags &= ~MEMBLOCK_MIRROR; 1406 pr_warn("Could not allocate %pap bytes of mirrored memory\n", 1407 &size); 1408 goto again; 1409 } 1410 1411 return NULL; 1412 done: 1413 ptr = phys_to_virt(alloc); 1414 1415 /* 1416 * The min_count is set to 0 so that bootmem allocated blocks 1417 * are never reported as leaks. This is because many of these blocks 1418 * are only referred via the physical address which is not 1419 * looked up by kmemleak. 1420 */ 1421 kmemleak_alloc(ptr, size, 0, 0); 1422 1423 return ptr; 1424 } 1425 1426 /** 1427 * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing 1428 * memory and without panicking 1429 * @size: size of memory block to be allocated in bytes 1430 * @align: alignment of the region and block's size 1431 * @min_addr: the lower bound of the memory region from where the allocation 1432 * is preferred (phys address) 1433 * @max_addr: the upper bound of the memory region from where the allocation 1434 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1435 * allocate only from memory limited by memblock.current_limit value 1436 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1437 * 1438 * Public function, provides additional debug information (including caller 1439 * info), if enabled. Does not zero allocated memory, does not panic if request 1440 * cannot be satisfied. 1441 * 1442 * Return: 1443 * Virtual address of allocated memory block on success, NULL on failure. 1444 */ 1445 void * __init memblock_alloc_try_nid_raw( 1446 phys_addr_t size, phys_addr_t align, 1447 phys_addr_t min_addr, phys_addr_t max_addr, 1448 int nid) 1449 { 1450 void *ptr; 1451 1452 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n", 1453 __func__, (u64)size, (u64)align, nid, &min_addr, 1454 &max_addr, (void *)_RET_IP_); 1455 1456 ptr = memblock_alloc_internal(size, align, 1457 min_addr, max_addr, nid); 1458 if (ptr && size > 0) 1459 page_init_poison(ptr, size); 1460 1461 return ptr; 1462 } 1463 1464 /** 1465 * memblock_alloc_try_nid_nopanic - allocate boot memory block 1466 * @size: size of memory block to be allocated in bytes 1467 * @align: alignment of the region and block's size 1468 * @min_addr: the lower bound of the memory region from where the allocation 1469 * is preferred (phys address) 1470 * @max_addr: the upper bound of the memory region from where the allocation 1471 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1472 * allocate only from memory limited by memblock.current_limit value 1473 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1474 * 1475 * Public function, provides additional debug information (including caller 1476 * info), if enabled. This function zeroes the allocated memory. 1477 * 1478 * Return: 1479 * Virtual address of allocated memory block on success, NULL on failure. 1480 */ 1481 void * __init memblock_alloc_try_nid_nopanic( 1482 phys_addr_t size, phys_addr_t align, 1483 phys_addr_t min_addr, phys_addr_t max_addr, 1484 int nid) 1485 { 1486 void *ptr; 1487 1488 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n", 1489 __func__, (u64)size, (u64)align, nid, &min_addr, 1490 &max_addr, (void *)_RET_IP_); 1491 1492 ptr = memblock_alloc_internal(size, align, 1493 min_addr, max_addr, nid); 1494 if (ptr) 1495 memset(ptr, 0, size); 1496 return ptr; 1497 } 1498 1499 /** 1500 * memblock_alloc_try_nid - allocate boot memory block with panicking 1501 * @size: size of memory block to be allocated in bytes 1502 * @align: alignment of the region and block's size 1503 * @min_addr: the lower bound of the memory region from where the allocation 1504 * is preferred (phys address) 1505 * @max_addr: the upper bound of the memory region from where the allocation 1506 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1507 * allocate only from memory limited by memblock.current_limit value 1508 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1509 * 1510 * Public panicking version of memblock_alloc_try_nid_nopanic() 1511 * which provides debug information (including caller info), if enabled, 1512 * and panics if the request can not be satisfied. 1513 * 1514 * Return: 1515 * Virtual address of allocated memory block on success, NULL on failure. 1516 */ 1517 void * __init memblock_alloc_try_nid( 1518 phys_addr_t size, phys_addr_t align, 1519 phys_addr_t min_addr, phys_addr_t max_addr, 1520 int nid) 1521 { 1522 void *ptr; 1523 1524 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n", 1525 __func__, (u64)size, (u64)align, nid, &min_addr, 1526 &max_addr, (void *)_RET_IP_); 1527 ptr = memblock_alloc_internal(size, align, 1528 min_addr, max_addr, nid); 1529 if (ptr) { 1530 memset(ptr, 0, size); 1531 return ptr; 1532 } 1533 1534 panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa\n", 1535 __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr); 1536 return NULL; 1537 } 1538 1539 /** 1540 * __memblock_free_early - free boot memory block 1541 * @base: phys starting address of the boot memory block 1542 * @size: size of the boot memory block in bytes 1543 * 1544 * Free boot memory block previously allocated by memblock_alloc_xx() API. 1545 * The freeing memory will not be released to the buddy allocator. 1546 */ 1547 void __init __memblock_free_early(phys_addr_t base, phys_addr_t size) 1548 { 1549 phys_addr_t end = base + size - 1; 1550 1551 memblock_dbg("%s: [%pa-%pa] %pF\n", 1552 __func__, &base, &end, (void *)_RET_IP_); 1553 kmemleak_free_part_phys(base, size); 1554 memblock_remove_range(&memblock.reserved, base, size); 1555 } 1556 1557 /** 1558 * __memblock_free_late - free bootmem block pages directly to buddy allocator 1559 * @base: phys starting address of the boot memory block 1560 * @size: size of the boot memory block in bytes 1561 * 1562 * This is only useful when the bootmem allocator has already been torn 1563 * down, but we are still initializing the system. Pages are released directly 1564 * to the buddy allocator, no bootmem metadata is updated because it is gone. 1565 */ 1566 void __init __memblock_free_late(phys_addr_t base, phys_addr_t size) 1567 { 1568 phys_addr_t cursor, end; 1569 1570 end = base + size - 1; 1571 memblock_dbg("%s: [%pa-%pa] %pF\n", 1572 __func__, &base, &end, (void *)_RET_IP_); 1573 kmemleak_free_part_phys(base, size); 1574 cursor = PFN_UP(base); 1575 end = PFN_DOWN(base + size); 1576 1577 for (; cursor < end; cursor++) { 1578 memblock_free_pages(pfn_to_page(cursor), cursor, 0); 1579 totalram_pages++; 1580 } 1581 } 1582 1583 /* 1584 * Remaining API functions 1585 */ 1586 1587 phys_addr_t __init_memblock memblock_phys_mem_size(void) 1588 { 1589 return memblock.memory.total_size; 1590 } 1591 1592 phys_addr_t __init_memblock memblock_reserved_size(void) 1593 { 1594 return memblock.reserved.total_size; 1595 } 1596 1597 phys_addr_t __init memblock_mem_size(unsigned long limit_pfn) 1598 { 1599 unsigned long pages = 0; 1600 struct memblock_region *r; 1601 unsigned long start_pfn, end_pfn; 1602 1603 for_each_memblock(memory, r) { 1604 start_pfn = memblock_region_memory_base_pfn(r); 1605 end_pfn = memblock_region_memory_end_pfn(r); 1606 start_pfn = min_t(unsigned long, start_pfn, limit_pfn); 1607 end_pfn = min_t(unsigned long, end_pfn, limit_pfn); 1608 pages += end_pfn - start_pfn; 1609 } 1610 1611 return PFN_PHYS(pages); 1612 } 1613 1614 /* lowest address */ 1615 phys_addr_t __init_memblock memblock_start_of_DRAM(void) 1616 { 1617 return memblock.memory.regions[0].base; 1618 } 1619 1620 phys_addr_t __init_memblock memblock_end_of_DRAM(void) 1621 { 1622 int idx = memblock.memory.cnt - 1; 1623 1624 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); 1625 } 1626 1627 static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) 1628 { 1629 phys_addr_t max_addr = PHYS_ADDR_MAX; 1630 struct memblock_region *r; 1631 1632 /* 1633 * translate the memory @limit size into the max address within one of 1634 * the memory memblock regions, if the @limit exceeds the total size 1635 * of those regions, max_addr will keep original value PHYS_ADDR_MAX 1636 */ 1637 for_each_memblock(memory, r) { 1638 if (limit <= r->size) { 1639 max_addr = r->base + limit; 1640 break; 1641 } 1642 limit -= r->size; 1643 } 1644 1645 return max_addr; 1646 } 1647 1648 void __init memblock_enforce_memory_limit(phys_addr_t limit) 1649 { 1650 phys_addr_t max_addr = PHYS_ADDR_MAX; 1651 1652 if (!limit) 1653 return; 1654 1655 max_addr = __find_max_addr(limit); 1656 1657 /* @limit exceeds the total size of the memory, do nothing */ 1658 if (max_addr == PHYS_ADDR_MAX) 1659 return; 1660 1661 /* truncate both memory and reserved regions */ 1662 memblock_remove_range(&memblock.memory, max_addr, 1663 PHYS_ADDR_MAX); 1664 memblock_remove_range(&memblock.reserved, max_addr, 1665 PHYS_ADDR_MAX); 1666 } 1667 1668 void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) 1669 { 1670 int start_rgn, end_rgn; 1671 int i, ret; 1672 1673 if (!size) 1674 return; 1675 1676 ret = memblock_isolate_range(&memblock.memory, base, size, 1677 &start_rgn, &end_rgn); 1678 if (ret) 1679 return; 1680 1681 /* remove all the MAP regions */ 1682 for (i = memblock.memory.cnt - 1; i >= end_rgn; i--) 1683 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1684 memblock_remove_region(&memblock.memory, i); 1685 1686 for (i = start_rgn - 1; i >= 0; i--) 1687 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1688 memblock_remove_region(&memblock.memory, i); 1689 1690 /* truncate the reserved regions */ 1691 memblock_remove_range(&memblock.reserved, 0, base); 1692 memblock_remove_range(&memblock.reserved, 1693 base + size, PHYS_ADDR_MAX); 1694 } 1695 1696 void __init memblock_mem_limit_remove_map(phys_addr_t limit) 1697 { 1698 phys_addr_t max_addr; 1699 1700 if (!limit) 1701 return; 1702 1703 max_addr = __find_max_addr(limit); 1704 1705 /* @limit exceeds the total size of the memory, do nothing */ 1706 if (max_addr == PHYS_ADDR_MAX) 1707 return; 1708 1709 memblock_cap_memory_range(0, max_addr); 1710 } 1711 1712 static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) 1713 { 1714 unsigned int left = 0, right = type->cnt; 1715 1716 do { 1717 unsigned int mid = (right + left) / 2; 1718 1719 if (addr < type->regions[mid].base) 1720 right = mid; 1721 else if (addr >= (type->regions[mid].base + 1722 type->regions[mid].size)) 1723 left = mid + 1; 1724 else 1725 return mid; 1726 } while (left < right); 1727 return -1; 1728 } 1729 1730 bool __init_memblock memblock_is_reserved(phys_addr_t addr) 1731 { 1732 return memblock_search(&memblock.reserved, addr) != -1; 1733 } 1734 1735 bool __init_memblock memblock_is_memory(phys_addr_t addr) 1736 { 1737 return memblock_search(&memblock.memory, addr) != -1; 1738 } 1739 1740 bool __init_memblock memblock_is_map_memory(phys_addr_t addr) 1741 { 1742 int i = memblock_search(&memblock.memory, addr); 1743 1744 if (i == -1) 1745 return false; 1746 return !memblock_is_nomap(&memblock.memory.regions[i]); 1747 } 1748 1749 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 1750 int __init_memblock memblock_search_pfn_nid(unsigned long pfn, 1751 unsigned long *start_pfn, unsigned long *end_pfn) 1752 { 1753 struct memblock_type *type = &memblock.memory; 1754 int mid = memblock_search(type, PFN_PHYS(pfn)); 1755 1756 if (mid == -1) 1757 return -1; 1758 1759 *start_pfn = PFN_DOWN(type->regions[mid].base); 1760 *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); 1761 1762 return type->regions[mid].nid; 1763 } 1764 #endif 1765 1766 /** 1767 * memblock_is_region_memory - check if a region is a subset of memory 1768 * @base: base of region to check 1769 * @size: size of region to check 1770 * 1771 * Check if the region [@base, @base + @size) is a subset of a memory block. 1772 * 1773 * Return: 1774 * 0 if false, non-zero if true 1775 */ 1776 bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) 1777 { 1778 int idx = memblock_search(&memblock.memory, base); 1779 phys_addr_t end = base + memblock_cap_size(base, &size); 1780 1781 if (idx == -1) 1782 return false; 1783 return (memblock.memory.regions[idx].base + 1784 memblock.memory.regions[idx].size) >= end; 1785 } 1786 1787 /** 1788 * memblock_is_region_reserved - check if a region intersects reserved memory 1789 * @base: base of region to check 1790 * @size: size of region to check 1791 * 1792 * Check if the region [@base, @base + @size) intersects a reserved 1793 * memory block. 1794 * 1795 * Return: 1796 * True if they intersect, false if not. 1797 */ 1798 bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) 1799 { 1800 memblock_cap_size(base, &size); 1801 return memblock_overlaps_region(&memblock.reserved, base, size); 1802 } 1803 1804 void __init_memblock memblock_trim_memory(phys_addr_t align) 1805 { 1806 phys_addr_t start, end, orig_start, orig_end; 1807 struct memblock_region *r; 1808 1809 for_each_memblock(memory, r) { 1810 orig_start = r->base; 1811 orig_end = r->base + r->size; 1812 start = round_up(orig_start, align); 1813 end = round_down(orig_end, align); 1814 1815 if (start == orig_start && end == orig_end) 1816 continue; 1817 1818 if (start < end) { 1819 r->base = start; 1820 r->size = end - start; 1821 } else { 1822 memblock_remove_region(&memblock.memory, 1823 r - memblock.memory.regions); 1824 r--; 1825 } 1826 } 1827 } 1828 1829 void __init_memblock memblock_set_current_limit(phys_addr_t limit) 1830 { 1831 memblock.current_limit = limit; 1832 } 1833 1834 phys_addr_t __init_memblock memblock_get_current_limit(void) 1835 { 1836 return memblock.current_limit; 1837 } 1838 1839 static void __init_memblock memblock_dump(struct memblock_type *type) 1840 { 1841 phys_addr_t base, end, size; 1842 enum memblock_flags flags; 1843 int idx; 1844 struct memblock_region *rgn; 1845 1846 pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt); 1847 1848 for_each_memblock_type(idx, type, rgn) { 1849 char nid_buf[32] = ""; 1850 1851 base = rgn->base; 1852 size = rgn->size; 1853 end = base + size - 1; 1854 flags = rgn->flags; 1855 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 1856 if (memblock_get_region_node(rgn) != MAX_NUMNODES) 1857 snprintf(nid_buf, sizeof(nid_buf), " on node %d", 1858 memblock_get_region_node(rgn)); 1859 #endif 1860 pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n", 1861 type->name, idx, &base, &end, &size, nid_buf, flags); 1862 } 1863 } 1864 1865 void __init_memblock __memblock_dump_all(void) 1866 { 1867 pr_info("MEMBLOCK configuration:\n"); 1868 pr_info(" memory size = %pa reserved size = %pa\n", 1869 &memblock.memory.total_size, 1870 &memblock.reserved.total_size); 1871 1872 memblock_dump(&memblock.memory); 1873 memblock_dump(&memblock.reserved); 1874 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 1875 memblock_dump(&memblock.physmem); 1876 #endif 1877 } 1878 1879 void __init memblock_allow_resize(void) 1880 { 1881 memblock_can_resize = 1; 1882 } 1883 1884 static int __init early_memblock(char *p) 1885 { 1886 if (p && strstr(p, "debug")) 1887 memblock_debug = 1; 1888 return 0; 1889 } 1890 early_param("memblock", early_memblock); 1891 1892 static void __init __free_pages_memory(unsigned long start, unsigned long end) 1893 { 1894 int order; 1895 1896 while (start < end) { 1897 order = min(MAX_ORDER - 1UL, __ffs(start)); 1898 1899 while (start + (1UL << order) > end) 1900 order--; 1901 1902 memblock_free_pages(pfn_to_page(start), start, order); 1903 1904 start += (1UL << order); 1905 } 1906 } 1907 1908 static unsigned long __init __free_memory_core(phys_addr_t start, 1909 phys_addr_t end) 1910 { 1911 unsigned long start_pfn = PFN_UP(start); 1912 unsigned long end_pfn = min_t(unsigned long, 1913 PFN_DOWN(end), max_low_pfn); 1914 1915 if (start_pfn >= end_pfn) 1916 return 0; 1917 1918 __free_pages_memory(start_pfn, end_pfn); 1919 1920 return end_pfn - start_pfn; 1921 } 1922 1923 static unsigned long __init free_low_memory_core_early(void) 1924 { 1925 unsigned long count = 0; 1926 phys_addr_t start, end; 1927 u64 i; 1928 1929 memblock_clear_hotplug(0, -1); 1930 1931 for_each_reserved_mem_region(i, &start, &end) 1932 reserve_bootmem_region(start, end); 1933 1934 /* 1935 * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id 1936 * because in some case like Node0 doesn't have RAM installed 1937 * low ram will be on Node1 1938 */ 1939 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, 1940 NULL) 1941 count += __free_memory_core(start, end); 1942 1943 return count; 1944 } 1945 1946 static int reset_managed_pages_done __initdata; 1947 1948 void reset_node_managed_pages(pg_data_t *pgdat) 1949 { 1950 struct zone *z; 1951 1952 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 1953 z->managed_pages = 0; 1954 } 1955 1956 void __init reset_all_zones_managed_pages(void) 1957 { 1958 struct pglist_data *pgdat; 1959 1960 if (reset_managed_pages_done) 1961 return; 1962 1963 for_each_online_pgdat(pgdat) 1964 reset_node_managed_pages(pgdat); 1965 1966 reset_managed_pages_done = 1; 1967 } 1968 1969 /** 1970 * memblock_free_all - release free pages to the buddy allocator 1971 * 1972 * Return: the number of pages actually released. 1973 */ 1974 unsigned long __init memblock_free_all(void) 1975 { 1976 unsigned long pages; 1977 1978 reset_all_zones_managed_pages(); 1979 1980 pages = free_low_memory_core_early(); 1981 totalram_pages += pages; 1982 1983 return pages; 1984 } 1985 1986 #if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK) 1987 1988 static int memblock_debug_show(struct seq_file *m, void *private) 1989 { 1990 struct memblock_type *type = m->private; 1991 struct memblock_region *reg; 1992 int i; 1993 phys_addr_t end; 1994 1995 for (i = 0; i < type->cnt; i++) { 1996 reg = &type->regions[i]; 1997 end = reg->base + reg->size - 1; 1998 1999 seq_printf(m, "%4d: ", i); 2000 seq_printf(m, "%pa..%pa\n", ®->base, &end); 2001 } 2002 return 0; 2003 } 2004 DEFINE_SHOW_ATTRIBUTE(memblock_debug); 2005 2006 static int __init memblock_init_debugfs(void) 2007 { 2008 struct dentry *root = debugfs_create_dir("memblock", NULL); 2009 if (!root) 2010 return -ENXIO; 2011 debugfs_create_file("memory", 0444, root, 2012 &memblock.memory, &memblock_debug_fops); 2013 debugfs_create_file("reserved", 0444, root, 2014 &memblock.reserved, &memblock_debug_fops); 2015 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 2016 debugfs_create_file("physmem", 0444, root, 2017 &memblock.physmem, &memblock_debug_fops); 2018 #endif 2019 2020 return 0; 2021 } 2022 __initcall(memblock_init_debugfs); 2023 2024 #endif /* CONFIG_DEBUG_FS */ 2025