1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Procedures for maintaining information about logical memory blocks. 4 * 5 * Peter Bergner, IBM Corp. June 2001. 6 * Copyright (C) 2001 Peter Bergner. 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/slab.h> 11 #include <linux/init.h> 12 #include <linux/bitops.h> 13 #include <linux/poison.h> 14 #include <linux/pfn.h> 15 #include <linux/debugfs.h> 16 #include <linux/kmemleak.h> 17 #include <linux/seq_file.h> 18 #include <linux/memblock.h> 19 #include <linux/mutex.h> 20 21 #include <asm/sections.h> 22 #include <linux/io.h> 23 24 #include "internal.h" 25 26 #define INIT_MEMBLOCK_REGIONS 128 27 #define INIT_PHYSMEM_REGIONS 4 28 29 #ifndef INIT_MEMBLOCK_RESERVED_REGIONS 30 # define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS 31 #endif 32 33 #ifndef INIT_MEMBLOCK_MEMORY_REGIONS 34 #define INIT_MEMBLOCK_MEMORY_REGIONS INIT_MEMBLOCK_REGIONS 35 #endif 36 37 /** 38 * DOC: memblock overview 39 * 40 * Memblock is a method of managing memory regions during the early 41 * boot period when the usual kernel memory allocators are not up and 42 * running. 43 * 44 * Memblock views the system memory as collections of contiguous 45 * regions. There are several types of these collections: 46 * 47 * * ``memory`` - describes the physical memory available to the 48 * kernel; this may differ from the actual physical memory installed 49 * in the system, for instance when the memory is restricted with 50 * ``mem=`` command line parameter 51 * * ``reserved`` - describes the regions that were allocated 52 * * ``physmem`` - describes the actual physical memory available during 53 * boot regardless of the possible restrictions and memory hot(un)plug; 54 * the ``physmem`` type is only available on some architectures. 55 * 56 * Each region is represented by struct memblock_region that 57 * defines the region extents, its attributes and NUMA node id on NUMA 58 * systems. Every memory type is described by the struct memblock_type 59 * which contains an array of memory regions along with 60 * the allocator metadata. The "memory" and "reserved" types are nicely 61 * wrapped with struct memblock. This structure is statically 62 * initialized at build time. The region arrays are initially sized to 63 * %INIT_MEMBLOCK_MEMORY_REGIONS for "memory" and 64 * %INIT_MEMBLOCK_RESERVED_REGIONS for "reserved". The region array 65 * for "physmem" is initially sized to %INIT_PHYSMEM_REGIONS. 66 * The memblock_allow_resize() enables automatic resizing of the region 67 * arrays during addition of new regions. This feature should be used 68 * with care so that memory allocated for the region array will not 69 * overlap with areas that should be reserved, for example initrd. 70 * 71 * The early architecture setup should tell memblock what the physical 72 * memory layout is by using memblock_add() or memblock_add_node() 73 * functions. The first function does not assign the region to a NUMA 74 * node and it is appropriate for UMA systems. Yet, it is possible to 75 * use it on NUMA systems as well and assign the region to a NUMA node 76 * later in the setup process using memblock_set_node(). The 77 * memblock_add_node() performs such an assignment directly. 78 * 79 * Once memblock is setup the memory can be allocated using one of the 80 * API variants: 81 * 82 * * memblock_phys_alloc*() - these functions return the **physical** 83 * address of the allocated memory 84 * * memblock_alloc*() - these functions return the **virtual** address 85 * of the allocated memory. 86 * 87 * Note, that both API variants use implicit assumptions about allowed 88 * memory ranges and the fallback methods. Consult the documentation 89 * of memblock_alloc_internal() and memblock_alloc_range_nid() 90 * functions for more elaborate description. 91 * 92 * As the system boot progresses, the architecture specific mem_init() 93 * function frees all the memory to the buddy page allocator. 94 * 95 * Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the 96 * memblock data structures (except "physmem") will be discarded after the 97 * system initialization completes. 98 */ 99 100 #ifndef CONFIG_NUMA 101 struct pglist_data __refdata contig_page_data; 102 EXPORT_SYMBOL(contig_page_data); 103 #endif 104 105 unsigned long max_low_pfn; 106 unsigned long min_low_pfn; 107 unsigned long max_pfn; 108 unsigned long long max_possible_pfn; 109 110 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock; 111 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; 112 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 113 static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS]; 114 #endif 115 116 struct memblock memblock __initdata_memblock = { 117 .memory.regions = memblock_memory_init_regions, 118 .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS, 119 .memory.name = "memory", 120 121 .reserved.regions = memblock_reserved_init_regions, 122 .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, 123 .reserved.name = "reserved", 124 125 .bottom_up = false, 126 .current_limit = MEMBLOCK_ALLOC_ANYWHERE, 127 }; 128 129 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 130 struct memblock_type physmem = { 131 .regions = memblock_physmem_init_regions, 132 .max = INIT_PHYSMEM_REGIONS, 133 .name = "physmem", 134 }; 135 #endif 136 137 /* 138 * keep a pointer to &memblock.memory in the text section to use it in 139 * __next_mem_range() and its helpers. 140 * For architectures that do not keep memblock data after init, this 141 * pointer will be reset to NULL at memblock_discard() 142 */ 143 static __refdata struct memblock_type *memblock_memory = &memblock.memory; 144 145 #define for_each_memblock_type(i, memblock_type, rgn) \ 146 for (i = 0, rgn = &memblock_type->regions[0]; \ 147 i < memblock_type->cnt; \ 148 i++, rgn = &memblock_type->regions[i]) 149 150 #define memblock_dbg(fmt, ...) \ 151 do { \ 152 if (memblock_debug) \ 153 pr_info(fmt, ##__VA_ARGS__); \ 154 } while (0) 155 156 static int memblock_debug __initdata_memblock; 157 static bool system_has_some_mirror __initdata_memblock; 158 static int memblock_can_resize __initdata_memblock; 159 static int memblock_memory_in_slab __initdata_memblock; 160 static int memblock_reserved_in_slab __initdata_memblock; 161 162 bool __init_memblock memblock_has_mirror(void) 163 { 164 return system_has_some_mirror; 165 } 166 167 static enum memblock_flags __init_memblock choose_memblock_flags(void) 168 { 169 return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE; 170 } 171 172 /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ 173 static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) 174 { 175 return *size = min(*size, PHYS_ADDR_MAX - base); 176 } 177 178 /* 179 * Address comparison utilities 180 */ 181 unsigned long __init_memblock 182 memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, 183 phys_addr_t size2) 184 { 185 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); 186 } 187 188 bool __init_memblock memblock_overlaps_region(struct memblock_type *type, 189 phys_addr_t base, phys_addr_t size) 190 { 191 unsigned long i; 192 193 memblock_cap_size(base, &size); 194 195 for (i = 0; i < type->cnt; i++) 196 if (memblock_addrs_overlap(base, size, type->regions[i].base, 197 type->regions[i].size)) 198 return true; 199 return false; 200 } 201 202 /** 203 * __memblock_find_range_bottom_up - find free area utility in bottom-up 204 * @start: start of candidate range 205 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 206 * %MEMBLOCK_ALLOC_ACCESSIBLE 207 * @size: size of free area to find 208 * @align: alignment of free area to find 209 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 210 * @flags: pick from blocks based on memory attributes 211 * 212 * Utility called from memblock_find_in_range_node(), find free area bottom-up. 213 * 214 * Return: 215 * Found address on success, 0 on failure. 216 */ 217 static phys_addr_t __init_memblock 218 __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, 219 phys_addr_t size, phys_addr_t align, int nid, 220 enum memblock_flags flags) 221 { 222 phys_addr_t this_start, this_end, cand; 223 u64 i; 224 225 for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) { 226 this_start = clamp(this_start, start, end); 227 this_end = clamp(this_end, start, end); 228 229 cand = round_up(this_start, align); 230 if (cand < this_end && this_end - cand >= size) 231 return cand; 232 } 233 234 return 0; 235 } 236 237 /** 238 * __memblock_find_range_top_down - find free area utility, in top-down 239 * @start: start of candidate range 240 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 241 * %MEMBLOCK_ALLOC_ACCESSIBLE 242 * @size: size of free area to find 243 * @align: alignment of free area to find 244 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 245 * @flags: pick from blocks based on memory attributes 246 * 247 * Utility called from memblock_find_in_range_node(), find free area top-down. 248 * 249 * Return: 250 * Found address on success, 0 on failure. 251 */ 252 static phys_addr_t __init_memblock 253 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, 254 phys_addr_t size, phys_addr_t align, int nid, 255 enum memblock_flags flags) 256 { 257 phys_addr_t this_start, this_end, cand; 258 u64 i; 259 260 for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end, 261 NULL) { 262 this_start = clamp(this_start, start, end); 263 this_end = clamp(this_end, start, end); 264 265 if (this_end < size) 266 continue; 267 268 cand = round_down(this_end - size, align); 269 if (cand >= this_start) 270 return cand; 271 } 272 273 return 0; 274 } 275 276 /** 277 * memblock_find_in_range_node - find free area in given range and node 278 * @size: size of free area to find 279 * @align: alignment of free area to find 280 * @start: start of candidate range 281 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 282 * %MEMBLOCK_ALLOC_ACCESSIBLE 283 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 284 * @flags: pick from blocks based on memory attributes 285 * 286 * Find @size free area aligned to @align in the specified range and node. 287 * 288 * Return: 289 * Found address on success, 0 on failure. 290 */ 291 static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, 292 phys_addr_t align, phys_addr_t start, 293 phys_addr_t end, int nid, 294 enum memblock_flags flags) 295 { 296 /* pump up @end */ 297 if (end == MEMBLOCK_ALLOC_ACCESSIBLE || 298 end == MEMBLOCK_ALLOC_NOLEAKTRACE) 299 end = memblock.current_limit; 300 301 /* avoid allocating the first page */ 302 start = max_t(phys_addr_t, start, PAGE_SIZE); 303 end = max(start, end); 304 305 if (memblock_bottom_up()) 306 return __memblock_find_range_bottom_up(start, end, size, align, 307 nid, flags); 308 else 309 return __memblock_find_range_top_down(start, end, size, align, 310 nid, flags); 311 } 312 313 /** 314 * memblock_find_in_range - find free area in given range 315 * @start: start of candidate range 316 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 317 * %MEMBLOCK_ALLOC_ACCESSIBLE 318 * @size: size of free area to find 319 * @align: alignment of free area to find 320 * 321 * Find @size free area aligned to @align in the specified range. 322 * 323 * Return: 324 * Found address on success, 0 on failure. 325 */ 326 static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, 327 phys_addr_t end, phys_addr_t size, 328 phys_addr_t align) 329 { 330 phys_addr_t ret; 331 enum memblock_flags flags = choose_memblock_flags(); 332 333 again: 334 ret = memblock_find_in_range_node(size, align, start, end, 335 NUMA_NO_NODE, flags); 336 337 if (!ret && (flags & MEMBLOCK_MIRROR)) { 338 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", 339 &size); 340 flags &= ~MEMBLOCK_MIRROR; 341 goto again; 342 } 343 344 return ret; 345 } 346 347 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) 348 { 349 type->total_size -= type->regions[r].size; 350 memmove(&type->regions[r], &type->regions[r + 1], 351 (type->cnt - (r + 1)) * sizeof(type->regions[r])); 352 type->cnt--; 353 354 /* Special case for empty arrays */ 355 if (type->cnt == 0) { 356 WARN_ON(type->total_size != 0); 357 type->regions[0].base = 0; 358 type->regions[0].size = 0; 359 type->regions[0].flags = 0; 360 memblock_set_region_node(&type->regions[0], MAX_NUMNODES); 361 } 362 } 363 364 #ifndef CONFIG_ARCH_KEEP_MEMBLOCK 365 /** 366 * memblock_discard - discard memory and reserved arrays if they were allocated 367 */ 368 void __init memblock_discard(void) 369 { 370 phys_addr_t addr, size; 371 372 if (memblock.reserved.regions != memblock_reserved_init_regions) { 373 addr = __pa(memblock.reserved.regions); 374 size = PAGE_ALIGN(sizeof(struct memblock_region) * 375 memblock.reserved.max); 376 if (memblock_reserved_in_slab) 377 kfree(memblock.reserved.regions); 378 else 379 memblock_free_late(addr, size); 380 } 381 382 if (memblock.memory.regions != memblock_memory_init_regions) { 383 addr = __pa(memblock.memory.regions); 384 size = PAGE_ALIGN(sizeof(struct memblock_region) * 385 memblock.memory.max); 386 if (memblock_memory_in_slab) 387 kfree(memblock.memory.regions); 388 else 389 memblock_free_late(addr, size); 390 } 391 392 memblock_memory = NULL; 393 } 394 #endif 395 396 /** 397 * memblock_double_array - double the size of the memblock regions array 398 * @type: memblock type of the regions array being doubled 399 * @new_area_start: starting address of memory range to avoid overlap with 400 * @new_area_size: size of memory range to avoid overlap with 401 * 402 * Double the size of the @type regions array. If memblock is being used to 403 * allocate memory for a new reserved regions array and there is a previously 404 * allocated memory range [@new_area_start, @new_area_start + @new_area_size] 405 * waiting to be reserved, ensure the memory used by the new array does 406 * not overlap. 407 * 408 * Return: 409 * 0 on success, -1 on failure. 410 */ 411 static int __init_memblock memblock_double_array(struct memblock_type *type, 412 phys_addr_t new_area_start, 413 phys_addr_t new_area_size) 414 { 415 struct memblock_region *new_array, *old_array; 416 phys_addr_t old_alloc_size, new_alloc_size; 417 phys_addr_t old_size, new_size, addr, new_end; 418 int use_slab = slab_is_available(); 419 int *in_slab; 420 421 /* We don't allow resizing until we know about the reserved regions 422 * of memory that aren't suitable for allocation 423 */ 424 if (!memblock_can_resize) 425 panic("memblock: cannot resize %s array\n", type->name); 426 427 /* Calculate new doubled size */ 428 old_size = type->max * sizeof(struct memblock_region); 429 new_size = old_size << 1; 430 /* 431 * We need to allocated new one align to PAGE_SIZE, 432 * so we can free them completely later. 433 */ 434 old_alloc_size = PAGE_ALIGN(old_size); 435 new_alloc_size = PAGE_ALIGN(new_size); 436 437 /* Retrieve the slab flag */ 438 if (type == &memblock.memory) 439 in_slab = &memblock_memory_in_slab; 440 else 441 in_slab = &memblock_reserved_in_slab; 442 443 /* Try to find some space for it */ 444 if (use_slab) { 445 new_array = kmalloc(new_size, GFP_KERNEL); 446 addr = new_array ? __pa(new_array) : 0; 447 } else { 448 /* only exclude range when trying to double reserved.regions */ 449 if (type != &memblock.reserved) 450 new_area_start = new_area_size = 0; 451 452 addr = memblock_find_in_range(new_area_start + new_area_size, 453 memblock.current_limit, 454 new_alloc_size, PAGE_SIZE); 455 if (!addr && new_area_size) 456 addr = memblock_find_in_range(0, 457 min(new_area_start, memblock.current_limit), 458 new_alloc_size, PAGE_SIZE); 459 460 if (addr) { 461 /* The memory may not have been accepted, yet. */ 462 accept_memory(addr, new_alloc_size); 463 464 new_array = __va(addr); 465 } else { 466 new_array = NULL; 467 } 468 } 469 if (!addr) { 470 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", 471 type->name, type->max, type->max * 2); 472 return -1; 473 } 474 475 new_end = addr + new_size - 1; 476 memblock_dbg("memblock: %s is doubled to %ld at [%pa-%pa]", 477 type->name, type->max * 2, &addr, &new_end); 478 479 /* 480 * Found space, we now need to move the array over before we add the 481 * reserved region since it may be our reserved array itself that is 482 * full. 483 */ 484 memcpy(new_array, type->regions, old_size); 485 memset(new_array + type->max, 0, old_size); 486 old_array = type->regions; 487 type->regions = new_array; 488 type->max <<= 1; 489 490 /* Free old array. We needn't free it if the array is the static one */ 491 if (*in_slab) 492 kfree(old_array); 493 else if (old_array != memblock_memory_init_regions && 494 old_array != memblock_reserved_init_regions) 495 memblock_free(old_array, old_alloc_size); 496 497 /* 498 * Reserve the new array if that comes from the memblock. Otherwise, we 499 * needn't do it 500 */ 501 if (!use_slab) 502 BUG_ON(memblock_reserve(addr, new_alloc_size)); 503 504 /* Update slab flag */ 505 *in_slab = use_slab; 506 507 return 0; 508 } 509 510 /** 511 * memblock_merge_regions - merge neighboring compatible regions 512 * @type: memblock type to scan 513 * @start_rgn: start scanning from (@start_rgn - 1) 514 * @end_rgn: end scanning at (@end_rgn - 1) 515 * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn) 516 */ 517 static void __init_memblock memblock_merge_regions(struct memblock_type *type, 518 unsigned long start_rgn, 519 unsigned long end_rgn) 520 { 521 int i = 0; 522 if (start_rgn) 523 i = start_rgn - 1; 524 end_rgn = min(end_rgn, type->cnt - 1); 525 while (i < end_rgn) { 526 struct memblock_region *this = &type->regions[i]; 527 struct memblock_region *next = &type->regions[i + 1]; 528 529 if (this->base + this->size != next->base || 530 memblock_get_region_node(this) != 531 memblock_get_region_node(next) || 532 this->flags != next->flags) { 533 BUG_ON(this->base + this->size > next->base); 534 i++; 535 continue; 536 } 537 538 this->size += next->size; 539 /* move forward from next + 1, index of which is i + 2 */ 540 memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); 541 type->cnt--; 542 end_rgn--; 543 } 544 } 545 546 /** 547 * memblock_insert_region - insert new memblock region 548 * @type: memblock type to insert into 549 * @idx: index for the insertion point 550 * @base: base address of the new region 551 * @size: size of the new region 552 * @nid: node id of the new region 553 * @flags: flags of the new region 554 * 555 * Insert new memblock region [@base, @base + @size) into @type at @idx. 556 * @type must already have extra room to accommodate the new region. 557 */ 558 static void __init_memblock memblock_insert_region(struct memblock_type *type, 559 int idx, phys_addr_t base, 560 phys_addr_t size, 561 int nid, 562 enum memblock_flags flags) 563 { 564 struct memblock_region *rgn = &type->regions[idx]; 565 566 BUG_ON(type->cnt >= type->max); 567 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); 568 rgn->base = base; 569 rgn->size = size; 570 rgn->flags = flags; 571 memblock_set_region_node(rgn, nid); 572 type->cnt++; 573 type->total_size += size; 574 } 575 576 /** 577 * memblock_add_range - add new memblock region 578 * @type: memblock type to add new region into 579 * @base: base address of the new region 580 * @size: size of the new region 581 * @nid: nid of the new region 582 * @flags: flags of the new region 583 * 584 * Add new memblock region [@base, @base + @size) into @type. The new region 585 * is allowed to overlap with existing ones - overlaps don't affect already 586 * existing regions. @type is guaranteed to be minimal (all neighbouring 587 * compatible regions are merged) after the addition. 588 * 589 * Return: 590 * 0 on success, -errno on failure. 591 */ 592 static int __init_memblock memblock_add_range(struct memblock_type *type, 593 phys_addr_t base, phys_addr_t size, 594 int nid, enum memblock_flags flags) 595 { 596 bool insert = false; 597 phys_addr_t obase = base; 598 phys_addr_t end = base + memblock_cap_size(base, &size); 599 int idx, nr_new, start_rgn = -1, end_rgn; 600 struct memblock_region *rgn; 601 602 if (!size) 603 return 0; 604 605 /* special case for empty array */ 606 if (type->regions[0].size == 0) { 607 WARN_ON(type->cnt != 0 || type->total_size); 608 type->regions[0].base = base; 609 type->regions[0].size = size; 610 type->regions[0].flags = flags; 611 memblock_set_region_node(&type->regions[0], nid); 612 type->total_size = size; 613 type->cnt = 1; 614 return 0; 615 } 616 617 /* 618 * The worst case is when new range overlaps all existing regions, 619 * then we'll need type->cnt + 1 empty regions in @type. So if 620 * type->cnt * 2 + 1 is less than or equal to type->max, we know 621 * that there is enough empty regions in @type, and we can insert 622 * regions directly. 623 */ 624 if (type->cnt * 2 + 1 <= type->max) 625 insert = true; 626 627 repeat: 628 /* 629 * The following is executed twice. Once with %false @insert and 630 * then with %true. The first counts the number of regions needed 631 * to accommodate the new area. The second actually inserts them. 632 */ 633 base = obase; 634 nr_new = 0; 635 636 for_each_memblock_type(idx, type, rgn) { 637 phys_addr_t rbase = rgn->base; 638 phys_addr_t rend = rbase + rgn->size; 639 640 if (rbase >= end) 641 break; 642 if (rend <= base) 643 continue; 644 /* 645 * @rgn overlaps. If it separates the lower part of new 646 * area, insert that portion. 647 */ 648 if (rbase > base) { 649 #ifdef CONFIG_NUMA 650 WARN_ON(nid != memblock_get_region_node(rgn)); 651 #endif 652 WARN_ON(flags != rgn->flags); 653 nr_new++; 654 if (insert) { 655 if (start_rgn == -1) 656 start_rgn = idx; 657 end_rgn = idx + 1; 658 memblock_insert_region(type, idx++, base, 659 rbase - base, nid, 660 flags); 661 } 662 } 663 /* area below @rend is dealt with, forget about it */ 664 base = min(rend, end); 665 } 666 667 /* insert the remaining portion */ 668 if (base < end) { 669 nr_new++; 670 if (insert) { 671 if (start_rgn == -1) 672 start_rgn = idx; 673 end_rgn = idx + 1; 674 memblock_insert_region(type, idx, base, end - base, 675 nid, flags); 676 } 677 } 678 679 if (!nr_new) 680 return 0; 681 682 /* 683 * If this was the first round, resize array and repeat for actual 684 * insertions; otherwise, merge and return. 685 */ 686 if (!insert) { 687 while (type->cnt + nr_new > type->max) 688 if (memblock_double_array(type, obase, size) < 0) 689 return -ENOMEM; 690 insert = true; 691 goto repeat; 692 } else { 693 memblock_merge_regions(type, start_rgn, end_rgn); 694 return 0; 695 } 696 } 697 698 /** 699 * memblock_add_node - add new memblock region within a NUMA node 700 * @base: base address of the new region 701 * @size: size of the new region 702 * @nid: nid of the new region 703 * @flags: flags of the new region 704 * 705 * Add new memblock region [@base, @base + @size) to the "memory" 706 * type. See memblock_add_range() description for mode details 707 * 708 * Return: 709 * 0 on success, -errno on failure. 710 */ 711 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, 712 int nid, enum memblock_flags flags) 713 { 714 phys_addr_t end = base + size - 1; 715 716 memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, 717 &base, &end, nid, flags, (void *)_RET_IP_); 718 719 return memblock_add_range(&memblock.memory, base, size, nid, flags); 720 } 721 722 /** 723 * memblock_add - add new memblock region 724 * @base: base address of the new region 725 * @size: size of the new region 726 * 727 * Add new memblock region [@base, @base + @size) to the "memory" 728 * type. See memblock_add_range() description for mode details 729 * 730 * Return: 731 * 0 on success, -errno on failure. 732 */ 733 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) 734 { 735 phys_addr_t end = base + size - 1; 736 737 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 738 &base, &end, (void *)_RET_IP_); 739 740 return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); 741 } 742 743 /** 744 * memblock_validate_numa_coverage - check if amount of memory with 745 * no node ID assigned is less than a threshold 746 * @threshold_bytes: maximal memory size that can have unassigned node 747 * ID (in bytes). 748 * 749 * A buggy firmware may report memory that does not belong to any node. 750 * Check if amount of such memory is below @threshold_bytes. 751 * 752 * Return: true on success, false on failure. 753 */ 754 bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_bytes) 755 { 756 unsigned long nr_pages = 0; 757 unsigned long start_pfn, end_pfn, mem_size_mb; 758 int nid, i; 759 760 /* calculate lose page */ 761 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { 762 if (!numa_valid_node(nid)) 763 nr_pages += end_pfn - start_pfn; 764 } 765 766 if ((nr_pages << PAGE_SHIFT) > threshold_bytes) { 767 mem_size_mb = memblock_phys_mem_size() >> 20; 768 pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n", 769 (nr_pages << PAGE_SHIFT) >> 20, mem_size_mb); 770 return false; 771 } 772 773 return true; 774 } 775 776 777 /** 778 * memblock_isolate_range - isolate given range into disjoint memblocks 779 * @type: memblock type to isolate range for 780 * @base: base of range to isolate 781 * @size: size of range to isolate 782 * @start_rgn: out parameter for the start of isolated region 783 * @end_rgn: out parameter for the end of isolated region 784 * 785 * Walk @type and ensure that regions don't cross the boundaries defined by 786 * [@base, @base + @size). Crossing regions are split at the boundaries, 787 * which may create at most two more regions. The index of the first 788 * region inside the range is returned in *@start_rgn and the index of the 789 * first region after the range is returned in *@end_rgn. 790 * 791 * Return: 792 * 0 on success, -errno on failure. 793 */ 794 static int __init_memblock memblock_isolate_range(struct memblock_type *type, 795 phys_addr_t base, phys_addr_t size, 796 int *start_rgn, int *end_rgn) 797 { 798 phys_addr_t end = base + memblock_cap_size(base, &size); 799 int idx; 800 struct memblock_region *rgn; 801 802 *start_rgn = *end_rgn = 0; 803 804 if (!size) 805 return 0; 806 807 /* we'll create at most two more regions */ 808 while (type->cnt + 2 > type->max) 809 if (memblock_double_array(type, base, size) < 0) 810 return -ENOMEM; 811 812 for_each_memblock_type(idx, type, rgn) { 813 phys_addr_t rbase = rgn->base; 814 phys_addr_t rend = rbase + rgn->size; 815 816 if (rbase >= end) 817 break; 818 if (rend <= base) 819 continue; 820 821 if (rbase < base) { 822 /* 823 * @rgn intersects from below. Split and continue 824 * to process the next region - the new top half. 825 */ 826 rgn->base = base; 827 rgn->size -= base - rbase; 828 type->total_size -= base - rbase; 829 memblock_insert_region(type, idx, rbase, base - rbase, 830 memblock_get_region_node(rgn), 831 rgn->flags); 832 } else if (rend > end) { 833 /* 834 * @rgn intersects from above. Split and redo the 835 * current region - the new bottom half. 836 */ 837 rgn->base = end; 838 rgn->size -= end - rbase; 839 type->total_size -= end - rbase; 840 memblock_insert_region(type, idx--, rbase, end - rbase, 841 memblock_get_region_node(rgn), 842 rgn->flags); 843 } else { 844 /* @rgn is fully contained, record it */ 845 if (!*end_rgn) 846 *start_rgn = idx; 847 *end_rgn = idx + 1; 848 } 849 } 850 851 return 0; 852 } 853 854 static int __init_memblock memblock_remove_range(struct memblock_type *type, 855 phys_addr_t base, phys_addr_t size) 856 { 857 int start_rgn, end_rgn; 858 int i, ret; 859 860 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 861 if (ret) 862 return ret; 863 864 for (i = end_rgn - 1; i >= start_rgn; i--) 865 memblock_remove_region(type, i); 866 return 0; 867 } 868 869 int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) 870 { 871 phys_addr_t end = base + size - 1; 872 873 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 874 &base, &end, (void *)_RET_IP_); 875 876 return memblock_remove_range(&memblock.memory, base, size); 877 } 878 879 /** 880 * memblock_free - free boot memory allocation 881 * @ptr: starting address of the boot memory allocation 882 * @size: size of the boot memory block in bytes 883 * 884 * Free boot memory block previously allocated by memblock_alloc_xx() API. 885 * The freeing memory will not be released to the buddy allocator. 886 */ 887 void __init_memblock memblock_free(void *ptr, size_t size) 888 { 889 if (ptr) 890 memblock_phys_free(__pa(ptr), size); 891 } 892 893 /** 894 * memblock_phys_free - free boot memory block 895 * @base: phys starting address of the boot memory block 896 * @size: size of the boot memory block in bytes 897 * 898 * Free boot memory block previously allocated by memblock_phys_alloc_xx() API. 899 * The freeing memory will not be released to the buddy allocator. 900 */ 901 int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) 902 { 903 phys_addr_t end = base + size - 1; 904 905 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 906 &base, &end, (void *)_RET_IP_); 907 908 kmemleak_free_part_phys(base, size); 909 return memblock_remove_range(&memblock.reserved, base, size); 910 } 911 912 int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) 913 { 914 phys_addr_t end = base + size - 1; 915 916 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 917 &base, &end, (void *)_RET_IP_); 918 919 return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0); 920 } 921 922 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 923 int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size) 924 { 925 phys_addr_t end = base + size - 1; 926 927 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 928 &base, &end, (void *)_RET_IP_); 929 930 return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0); 931 } 932 #endif 933 934 /** 935 * memblock_setclr_flag - set or clear flag for a memory region 936 * @type: memblock type to set/clear flag for 937 * @base: base address of the region 938 * @size: size of the region 939 * @set: set or clear the flag 940 * @flag: the flag to update 941 * 942 * This function isolates region [@base, @base + @size), and sets/clears flag 943 * 944 * Return: 0 on success, -errno on failure. 945 */ 946 static int __init_memblock memblock_setclr_flag(struct memblock_type *type, 947 phys_addr_t base, phys_addr_t size, int set, int flag) 948 { 949 int i, ret, start_rgn, end_rgn; 950 951 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 952 if (ret) 953 return ret; 954 955 for (i = start_rgn; i < end_rgn; i++) { 956 struct memblock_region *r = &type->regions[i]; 957 958 if (set) 959 r->flags |= flag; 960 else 961 r->flags &= ~flag; 962 } 963 964 memblock_merge_regions(type, start_rgn, end_rgn); 965 return 0; 966 } 967 968 /** 969 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG. 970 * @base: the base phys addr of the region 971 * @size: the size of the region 972 * 973 * Return: 0 on success, -errno on failure. 974 */ 975 int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size) 976 { 977 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_HOTPLUG); 978 } 979 980 /** 981 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region. 982 * @base: the base phys addr of the region 983 * @size: the size of the region 984 * 985 * Return: 0 on success, -errno on failure. 986 */ 987 int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size) 988 { 989 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_HOTPLUG); 990 } 991 992 /** 993 * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR. 994 * @base: the base phys addr of the region 995 * @size: the size of the region 996 * 997 * Return: 0 on success, -errno on failure. 998 */ 999 int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) 1000 { 1001 if (!mirrored_kernelcore) 1002 return 0; 1003 1004 system_has_some_mirror = true; 1005 1006 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_MIRROR); 1007 } 1008 1009 /** 1010 * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP. 1011 * @base: the base phys addr of the region 1012 * @size: the size of the region 1013 * 1014 * The memory regions marked with %MEMBLOCK_NOMAP will not be added to the 1015 * direct mapping of the physical memory. These regions will still be 1016 * covered by the memory map. The struct page representing NOMAP memory 1017 * frames in the memory map will be PageReserved() 1018 * 1019 * Note: if the memory being marked %MEMBLOCK_NOMAP was allocated from 1020 * memblock, the caller must inform kmemleak to ignore that memory 1021 * 1022 * Return: 0 on success, -errno on failure. 1023 */ 1024 int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) 1025 { 1026 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_NOMAP); 1027 } 1028 1029 /** 1030 * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region. 1031 * @base: the base phys addr of the region 1032 * @size: the size of the region 1033 * 1034 * Return: 0 on success, -errno on failure. 1035 */ 1036 int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) 1037 { 1038 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_NOMAP); 1039 } 1040 1041 /** 1042 * memblock_reserved_mark_noinit - Mark a reserved memory region with flag 1043 * MEMBLOCK_RSRV_NOINIT which results in the struct pages not being initialized 1044 * for this region. 1045 * @base: the base phys addr of the region 1046 * @size: the size of the region 1047 * 1048 * struct pages will not be initialized for reserved memory regions marked with 1049 * %MEMBLOCK_RSRV_NOINIT. 1050 * 1051 * Return: 0 on success, -errno on failure. 1052 */ 1053 int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size) 1054 { 1055 return memblock_setclr_flag(&memblock.reserved, base, size, 1, 1056 MEMBLOCK_RSRV_NOINIT); 1057 } 1058 1059 static bool should_skip_region(struct memblock_type *type, 1060 struct memblock_region *m, 1061 int nid, int flags) 1062 { 1063 int m_nid = memblock_get_region_node(m); 1064 1065 /* we never skip regions when iterating memblock.reserved or physmem */ 1066 if (type != memblock_memory) 1067 return false; 1068 1069 /* only memory regions are associated with nodes, check it */ 1070 if (numa_valid_node(nid) && nid != m_nid) 1071 return true; 1072 1073 /* skip hotpluggable memory regions if needed */ 1074 if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && 1075 !(flags & MEMBLOCK_HOTPLUG)) 1076 return true; 1077 1078 /* if we want mirror memory skip non-mirror memory regions */ 1079 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) 1080 return true; 1081 1082 /* skip nomap memory unless we were asked for it explicitly */ 1083 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) 1084 return true; 1085 1086 /* skip driver-managed memory unless we were asked for it explicitly */ 1087 if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m)) 1088 return true; 1089 1090 return false; 1091 } 1092 1093 /** 1094 * __next_mem_range - next function for for_each_free_mem_range() etc. 1095 * @idx: pointer to u64 loop variable 1096 * @nid: node selector, %NUMA_NO_NODE for all nodes 1097 * @flags: pick from blocks based on memory attributes 1098 * @type_a: pointer to memblock_type from where the range is taken 1099 * @type_b: pointer to memblock_type which excludes memory from being taken 1100 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 1101 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 1102 * @out_nid: ptr to int for nid of the range, can be %NULL 1103 * 1104 * Find the first area from *@idx which matches @nid, fill the out 1105 * parameters, and update *@idx for the next iteration. The lower 32bit of 1106 * *@idx contains index into type_a and the upper 32bit indexes the 1107 * areas before each region in type_b. For example, if type_b regions 1108 * look like the following, 1109 * 1110 * 0:[0-16), 1:[32-48), 2:[128-130) 1111 * 1112 * The upper 32bit indexes the following regions. 1113 * 1114 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) 1115 * 1116 * As both region arrays are sorted, the function advances the two indices 1117 * in lockstep and returns each intersection. 1118 */ 1119 void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, 1120 struct memblock_type *type_a, 1121 struct memblock_type *type_b, phys_addr_t *out_start, 1122 phys_addr_t *out_end, int *out_nid) 1123 { 1124 int idx_a = *idx & 0xffffffff; 1125 int idx_b = *idx >> 32; 1126 1127 for (; idx_a < type_a->cnt; idx_a++) { 1128 struct memblock_region *m = &type_a->regions[idx_a]; 1129 1130 phys_addr_t m_start = m->base; 1131 phys_addr_t m_end = m->base + m->size; 1132 int m_nid = memblock_get_region_node(m); 1133 1134 if (should_skip_region(type_a, m, nid, flags)) 1135 continue; 1136 1137 if (!type_b) { 1138 if (out_start) 1139 *out_start = m_start; 1140 if (out_end) 1141 *out_end = m_end; 1142 if (out_nid) 1143 *out_nid = m_nid; 1144 idx_a++; 1145 *idx = (u32)idx_a | (u64)idx_b << 32; 1146 return; 1147 } 1148 1149 /* scan areas before each reservation */ 1150 for (; idx_b < type_b->cnt + 1; idx_b++) { 1151 struct memblock_region *r; 1152 phys_addr_t r_start; 1153 phys_addr_t r_end; 1154 1155 r = &type_b->regions[idx_b]; 1156 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1157 r_end = idx_b < type_b->cnt ? 1158 r->base : PHYS_ADDR_MAX; 1159 1160 /* 1161 * if idx_b advanced past idx_a, 1162 * break out to advance idx_a 1163 */ 1164 if (r_start >= m_end) 1165 break; 1166 /* if the two regions intersect, we're done */ 1167 if (m_start < r_end) { 1168 if (out_start) 1169 *out_start = 1170 max(m_start, r_start); 1171 if (out_end) 1172 *out_end = min(m_end, r_end); 1173 if (out_nid) 1174 *out_nid = m_nid; 1175 /* 1176 * The region which ends first is 1177 * advanced for the next iteration. 1178 */ 1179 if (m_end <= r_end) 1180 idx_a++; 1181 else 1182 idx_b++; 1183 *idx = (u32)idx_a | (u64)idx_b << 32; 1184 return; 1185 } 1186 } 1187 } 1188 1189 /* signal end of iteration */ 1190 *idx = ULLONG_MAX; 1191 } 1192 1193 /** 1194 * __next_mem_range_rev - generic next function for for_each_*_range_rev() 1195 * 1196 * @idx: pointer to u64 loop variable 1197 * @nid: node selector, %NUMA_NO_NODE for all nodes 1198 * @flags: pick from blocks based on memory attributes 1199 * @type_a: pointer to memblock_type from where the range is taken 1200 * @type_b: pointer to memblock_type which excludes memory from being taken 1201 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 1202 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 1203 * @out_nid: ptr to int for nid of the range, can be %NULL 1204 * 1205 * Finds the next range from type_a which is not marked as unsuitable 1206 * in type_b. 1207 * 1208 * Reverse of __next_mem_range(). 1209 */ 1210 void __init_memblock __next_mem_range_rev(u64 *idx, int nid, 1211 enum memblock_flags flags, 1212 struct memblock_type *type_a, 1213 struct memblock_type *type_b, 1214 phys_addr_t *out_start, 1215 phys_addr_t *out_end, int *out_nid) 1216 { 1217 int idx_a = *idx & 0xffffffff; 1218 int idx_b = *idx >> 32; 1219 1220 if (*idx == (u64)ULLONG_MAX) { 1221 idx_a = type_a->cnt - 1; 1222 if (type_b != NULL) 1223 idx_b = type_b->cnt; 1224 else 1225 idx_b = 0; 1226 } 1227 1228 for (; idx_a >= 0; idx_a--) { 1229 struct memblock_region *m = &type_a->regions[idx_a]; 1230 1231 phys_addr_t m_start = m->base; 1232 phys_addr_t m_end = m->base + m->size; 1233 int m_nid = memblock_get_region_node(m); 1234 1235 if (should_skip_region(type_a, m, nid, flags)) 1236 continue; 1237 1238 if (!type_b) { 1239 if (out_start) 1240 *out_start = m_start; 1241 if (out_end) 1242 *out_end = m_end; 1243 if (out_nid) 1244 *out_nid = m_nid; 1245 idx_a--; 1246 *idx = (u32)idx_a | (u64)idx_b << 32; 1247 return; 1248 } 1249 1250 /* scan areas before each reservation */ 1251 for (; idx_b >= 0; idx_b--) { 1252 struct memblock_region *r; 1253 phys_addr_t r_start; 1254 phys_addr_t r_end; 1255 1256 r = &type_b->regions[idx_b]; 1257 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1258 r_end = idx_b < type_b->cnt ? 1259 r->base : PHYS_ADDR_MAX; 1260 /* 1261 * if idx_b advanced past idx_a, 1262 * break out to advance idx_a 1263 */ 1264 1265 if (r_end <= m_start) 1266 break; 1267 /* if the two regions intersect, we're done */ 1268 if (m_end > r_start) { 1269 if (out_start) 1270 *out_start = max(m_start, r_start); 1271 if (out_end) 1272 *out_end = min(m_end, r_end); 1273 if (out_nid) 1274 *out_nid = m_nid; 1275 if (m_start >= r_start) 1276 idx_a--; 1277 else 1278 idx_b--; 1279 *idx = (u32)idx_a | (u64)idx_b << 32; 1280 return; 1281 } 1282 } 1283 } 1284 /* signal end of iteration */ 1285 *idx = ULLONG_MAX; 1286 } 1287 1288 /* 1289 * Common iterator interface used to define for_each_mem_pfn_range(). 1290 */ 1291 void __init_memblock __next_mem_pfn_range(int *idx, int nid, 1292 unsigned long *out_start_pfn, 1293 unsigned long *out_end_pfn, int *out_nid) 1294 { 1295 struct memblock_type *type = &memblock.memory; 1296 struct memblock_region *r; 1297 int r_nid; 1298 1299 while (++*idx < type->cnt) { 1300 r = &type->regions[*idx]; 1301 r_nid = memblock_get_region_node(r); 1302 1303 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) 1304 continue; 1305 if (!numa_valid_node(nid) || nid == r_nid) 1306 break; 1307 } 1308 if (*idx >= type->cnt) { 1309 *idx = -1; 1310 return; 1311 } 1312 1313 if (out_start_pfn) 1314 *out_start_pfn = PFN_UP(r->base); 1315 if (out_end_pfn) 1316 *out_end_pfn = PFN_DOWN(r->base + r->size); 1317 if (out_nid) 1318 *out_nid = r_nid; 1319 } 1320 1321 /** 1322 * memblock_set_node - set node ID on memblock regions 1323 * @base: base of area to set node ID for 1324 * @size: size of area to set node ID for 1325 * @type: memblock type to set node ID for 1326 * @nid: node ID to set 1327 * 1328 * Set the nid of memblock @type regions in [@base, @base + @size) to @nid. 1329 * Regions which cross the area boundaries are split as necessary. 1330 * 1331 * Return: 1332 * 0 on success, -errno on failure. 1333 */ 1334 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, 1335 struct memblock_type *type, int nid) 1336 { 1337 #ifdef CONFIG_NUMA 1338 int start_rgn, end_rgn; 1339 int i, ret; 1340 1341 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 1342 if (ret) 1343 return ret; 1344 1345 for (i = start_rgn; i < end_rgn; i++) 1346 memblock_set_region_node(&type->regions[i], nid); 1347 1348 memblock_merge_regions(type, start_rgn, end_rgn); 1349 #endif 1350 return 0; 1351 } 1352 1353 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 1354 /** 1355 * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone() 1356 * 1357 * @idx: pointer to u64 loop variable 1358 * @zone: zone in which all of the memory blocks reside 1359 * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL 1360 * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL 1361 * 1362 * This function is meant to be a zone/pfn specific wrapper for the 1363 * for_each_mem_range type iterators. Specifically they are used in the 1364 * deferred memory init routines and as such we were duplicating much of 1365 * this logic throughout the code. So instead of having it in multiple 1366 * locations it seemed like it would make more sense to centralize this to 1367 * one new iterator that does everything they need. 1368 */ 1369 void __init_memblock 1370 __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, 1371 unsigned long *out_spfn, unsigned long *out_epfn) 1372 { 1373 int zone_nid = zone_to_nid(zone); 1374 phys_addr_t spa, epa; 1375 1376 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1377 &memblock.memory, &memblock.reserved, 1378 &spa, &epa, NULL); 1379 1380 while (*idx != U64_MAX) { 1381 unsigned long epfn = PFN_DOWN(epa); 1382 unsigned long spfn = PFN_UP(spa); 1383 1384 /* 1385 * Verify the end is at least past the start of the zone and 1386 * that we have at least one PFN to initialize. 1387 */ 1388 if (zone->zone_start_pfn < epfn && spfn < epfn) { 1389 /* if we went too far just stop searching */ 1390 if (zone_end_pfn(zone) <= spfn) { 1391 *idx = U64_MAX; 1392 break; 1393 } 1394 1395 if (out_spfn) 1396 *out_spfn = max(zone->zone_start_pfn, spfn); 1397 if (out_epfn) 1398 *out_epfn = min(zone_end_pfn(zone), epfn); 1399 1400 return; 1401 } 1402 1403 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1404 &memblock.memory, &memblock.reserved, 1405 &spa, &epa, NULL); 1406 } 1407 1408 /* signal end of iteration */ 1409 if (out_spfn) 1410 *out_spfn = ULONG_MAX; 1411 if (out_epfn) 1412 *out_epfn = 0; 1413 } 1414 1415 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 1416 1417 /** 1418 * memblock_alloc_range_nid - allocate boot memory block 1419 * @size: size of memory block to be allocated in bytes 1420 * @align: alignment of the region and block's size 1421 * @start: the lower bound of the memory region to allocate (phys address) 1422 * @end: the upper bound of the memory region to allocate (phys address) 1423 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1424 * @exact_nid: control the allocation fall back to other nodes 1425 * 1426 * The allocation is performed from memory region limited by 1427 * memblock.current_limit if @end == %MEMBLOCK_ALLOC_ACCESSIBLE. 1428 * 1429 * If the specified node can not hold the requested memory and @exact_nid 1430 * is false, the allocation falls back to any node in the system. 1431 * 1432 * For systems with memory mirroring, the allocation is attempted first 1433 * from the regions with mirroring enabled and then retried from any 1434 * memory region. 1435 * 1436 * In addition, function using kmemleak_alloc_phys for allocated boot 1437 * memory block, it is never reported as leaks. 1438 * 1439 * Return: 1440 * Physical address of allocated memory block on success, %0 on failure. 1441 */ 1442 phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, 1443 phys_addr_t align, phys_addr_t start, 1444 phys_addr_t end, int nid, 1445 bool exact_nid) 1446 { 1447 enum memblock_flags flags = choose_memblock_flags(); 1448 phys_addr_t found; 1449 1450 /* 1451 * Detect any accidental use of these APIs after slab is ready, as at 1452 * this moment memblock may be deinitialized already and its 1453 * internal data may be destroyed (after execution of memblock_free_all) 1454 */ 1455 if (WARN_ON_ONCE(slab_is_available())) { 1456 void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid); 1457 1458 return vaddr ? virt_to_phys(vaddr) : 0; 1459 } 1460 1461 if (!align) { 1462 /* Can't use WARNs this early in boot on powerpc */ 1463 dump_stack(); 1464 align = SMP_CACHE_BYTES; 1465 } 1466 1467 again: 1468 found = memblock_find_in_range_node(size, align, start, end, nid, 1469 flags); 1470 if (found && !memblock_reserve(found, size)) 1471 goto done; 1472 1473 if (numa_valid_node(nid) && !exact_nid) { 1474 found = memblock_find_in_range_node(size, align, start, 1475 end, NUMA_NO_NODE, 1476 flags); 1477 if (found && !memblock_reserve(found, size)) 1478 goto done; 1479 } 1480 1481 if (flags & MEMBLOCK_MIRROR) { 1482 flags &= ~MEMBLOCK_MIRROR; 1483 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", 1484 &size); 1485 goto again; 1486 } 1487 1488 return 0; 1489 1490 done: 1491 /* 1492 * Skip kmemleak for those places like kasan_init() and 1493 * early_pgtable_alloc() due to high volume. 1494 */ 1495 if (end != MEMBLOCK_ALLOC_NOLEAKTRACE) 1496 /* 1497 * Memblock allocated blocks are never reported as 1498 * leaks. This is because many of these blocks are 1499 * only referred via the physical address which is 1500 * not looked up by kmemleak. 1501 */ 1502 kmemleak_alloc_phys(found, size, 0); 1503 1504 /* 1505 * Some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP, 1506 * require memory to be accepted before it can be used by the 1507 * guest. 1508 * 1509 * Accept the memory of the allocated buffer. 1510 */ 1511 accept_memory(found, size); 1512 1513 return found; 1514 } 1515 1516 /** 1517 * memblock_phys_alloc_range - allocate a memory block inside specified range 1518 * @size: size of memory block to be allocated in bytes 1519 * @align: alignment of the region and block's size 1520 * @start: the lower bound of the memory region to allocate (physical address) 1521 * @end: the upper bound of the memory region to allocate (physical address) 1522 * 1523 * Allocate @size bytes in the between @start and @end. 1524 * 1525 * Return: physical address of the allocated memory block on success, 1526 * %0 on failure. 1527 */ 1528 phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size, 1529 phys_addr_t align, 1530 phys_addr_t start, 1531 phys_addr_t end) 1532 { 1533 memblock_dbg("%s: %llu bytes align=0x%llx from=%pa max_addr=%pa %pS\n", 1534 __func__, (u64)size, (u64)align, &start, &end, 1535 (void *)_RET_IP_); 1536 return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE, 1537 false); 1538 } 1539 1540 /** 1541 * memblock_phys_alloc_try_nid - allocate a memory block from specified NUMA node 1542 * @size: size of memory block to be allocated in bytes 1543 * @align: alignment of the region and block's size 1544 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1545 * 1546 * Allocates memory block from the specified NUMA node. If the node 1547 * has no available memory, attempts to allocated from any node in the 1548 * system. 1549 * 1550 * Return: physical address of the allocated memory block on success, 1551 * %0 on failure. 1552 */ 1553 phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) 1554 { 1555 return memblock_alloc_range_nid(size, align, 0, 1556 MEMBLOCK_ALLOC_ACCESSIBLE, nid, false); 1557 } 1558 1559 /** 1560 * memblock_alloc_internal - allocate boot memory block 1561 * @size: size of memory block to be allocated in bytes 1562 * @align: alignment of the region and block's size 1563 * @min_addr: the lower bound of the memory region to allocate (phys address) 1564 * @max_addr: the upper bound of the memory region to allocate (phys address) 1565 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1566 * @exact_nid: control the allocation fall back to other nodes 1567 * 1568 * Allocates memory block using memblock_alloc_range_nid() and 1569 * converts the returned physical address to virtual. 1570 * 1571 * The @min_addr limit is dropped if it can not be satisfied and the allocation 1572 * will fall back to memory below @min_addr. Other constraints, such 1573 * as node and mirrored memory will be handled again in 1574 * memblock_alloc_range_nid(). 1575 * 1576 * Return: 1577 * Virtual address of allocated memory block on success, NULL on failure. 1578 */ 1579 static void * __init memblock_alloc_internal( 1580 phys_addr_t size, phys_addr_t align, 1581 phys_addr_t min_addr, phys_addr_t max_addr, 1582 int nid, bool exact_nid) 1583 { 1584 phys_addr_t alloc; 1585 1586 1587 if (max_addr > memblock.current_limit) 1588 max_addr = memblock.current_limit; 1589 1590 alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid, 1591 exact_nid); 1592 1593 /* retry allocation without lower limit */ 1594 if (!alloc && min_addr) 1595 alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid, 1596 exact_nid); 1597 1598 if (!alloc) 1599 return NULL; 1600 1601 return phys_to_virt(alloc); 1602 } 1603 1604 /** 1605 * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node 1606 * without zeroing memory 1607 * @size: size of memory block to be allocated in bytes 1608 * @align: alignment of the region and block's size 1609 * @min_addr: the lower bound of the memory region from where the allocation 1610 * is preferred (phys address) 1611 * @max_addr: the upper bound of the memory region from where the allocation 1612 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1613 * allocate only from memory limited by memblock.current_limit value 1614 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1615 * 1616 * Public function, provides additional debug information (including caller 1617 * info), if enabled. Does not zero allocated memory. 1618 * 1619 * Return: 1620 * Virtual address of allocated memory block on success, NULL on failure. 1621 */ 1622 void * __init memblock_alloc_exact_nid_raw( 1623 phys_addr_t size, phys_addr_t align, 1624 phys_addr_t min_addr, phys_addr_t max_addr, 1625 int nid) 1626 { 1627 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1628 __func__, (u64)size, (u64)align, nid, &min_addr, 1629 &max_addr, (void *)_RET_IP_); 1630 1631 return memblock_alloc_internal(size, align, min_addr, max_addr, nid, 1632 true); 1633 } 1634 1635 /** 1636 * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing 1637 * memory and without panicking 1638 * @size: size of memory block to be allocated in bytes 1639 * @align: alignment of the region and block's size 1640 * @min_addr: the lower bound of the memory region from where the allocation 1641 * is preferred (phys address) 1642 * @max_addr: the upper bound of the memory region from where the allocation 1643 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1644 * allocate only from memory limited by memblock.current_limit value 1645 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1646 * 1647 * Public function, provides additional debug information (including caller 1648 * info), if enabled. Does not zero allocated memory, does not panic if request 1649 * cannot be satisfied. 1650 * 1651 * Return: 1652 * Virtual address of allocated memory block on success, NULL on failure. 1653 */ 1654 void * __init memblock_alloc_try_nid_raw( 1655 phys_addr_t size, phys_addr_t align, 1656 phys_addr_t min_addr, phys_addr_t max_addr, 1657 int nid) 1658 { 1659 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1660 __func__, (u64)size, (u64)align, nid, &min_addr, 1661 &max_addr, (void *)_RET_IP_); 1662 1663 return memblock_alloc_internal(size, align, min_addr, max_addr, nid, 1664 false); 1665 } 1666 1667 /** 1668 * memblock_alloc_try_nid - allocate boot memory block 1669 * @size: size of memory block to be allocated in bytes 1670 * @align: alignment of the region and block's size 1671 * @min_addr: the lower bound of the memory region from where the allocation 1672 * is preferred (phys address) 1673 * @max_addr: the upper bound of the memory region from where the allocation 1674 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1675 * allocate only from memory limited by memblock.current_limit value 1676 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1677 * 1678 * Public function, provides additional debug information (including caller 1679 * info), if enabled. This function zeroes the allocated memory. 1680 * 1681 * Return: 1682 * Virtual address of allocated memory block on success, NULL on failure. 1683 */ 1684 void * __init memblock_alloc_try_nid( 1685 phys_addr_t size, phys_addr_t align, 1686 phys_addr_t min_addr, phys_addr_t max_addr, 1687 int nid) 1688 { 1689 void *ptr; 1690 1691 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1692 __func__, (u64)size, (u64)align, nid, &min_addr, 1693 &max_addr, (void *)_RET_IP_); 1694 ptr = memblock_alloc_internal(size, align, 1695 min_addr, max_addr, nid, false); 1696 if (ptr) 1697 memset(ptr, 0, size); 1698 1699 return ptr; 1700 } 1701 1702 /** 1703 * __memblock_alloc_or_panic - Try to allocate memory and panic on failure 1704 * @size: size of memory block to be allocated in bytes 1705 * @align: alignment of the region and block's size 1706 * @func: caller func name 1707 * 1708 * This function attempts to allocate memory using memblock_alloc, 1709 * and in case of failure, it calls panic with the formatted message. 1710 * This function should not be used directly, please use the macro memblock_alloc_or_panic. 1711 */ 1712 void *__init __memblock_alloc_or_panic(phys_addr_t size, phys_addr_t align, 1713 const char *func) 1714 { 1715 void *addr = memblock_alloc(size, align); 1716 1717 if (unlikely(!addr)) 1718 panic("%s: Failed to allocate %pap bytes\n", func, &size); 1719 return addr; 1720 } 1721 1722 /** 1723 * memblock_free_late - free pages directly to buddy allocator 1724 * @base: phys starting address of the boot memory block 1725 * @size: size of the boot memory block in bytes 1726 * 1727 * This is only useful when the memblock allocator has already been torn 1728 * down, but we are still initializing the system. Pages are released directly 1729 * to the buddy allocator. 1730 */ 1731 void __init memblock_free_late(phys_addr_t base, phys_addr_t size) 1732 { 1733 phys_addr_t cursor, end; 1734 1735 end = base + size - 1; 1736 memblock_dbg("%s: [%pa-%pa] %pS\n", 1737 __func__, &base, &end, (void *)_RET_IP_); 1738 kmemleak_free_part_phys(base, size); 1739 cursor = PFN_UP(base); 1740 end = PFN_DOWN(base + size); 1741 1742 for (; cursor < end; cursor++) { 1743 memblock_free_pages(pfn_to_page(cursor), cursor, 0); 1744 totalram_pages_inc(); 1745 } 1746 } 1747 1748 /* 1749 * Remaining API functions 1750 */ 1751 1752 phys_addr_t __init_memblock memblock_phys_mem_size(void) 1753 { 1754 return memblock.memory.total_size; 1755 } 1756 1757 phys_addr_t __init_memblock memblock_reserved_size(void) 1758 { 1759 return memblock.reserved.total_size; 1760 } 1761 1762 /** 1763 * memblock_estimated_nr_free_pages - return estimated number of free pages 1764 * from memblock point of view 1765 * 1766 * During bootup, subsystems might need a rough estimate of the number of free 1767 * pages in the whole system, before precise numbers are available from the 1768 * buddy. Especially with CONFIG_DEFERRED_STRUCT_PAGE_INIT, the numbers 1769 * obtained from the buddy might be very imprecise during bootup. 1770 * 1771 * Return: 1772 * An estimated number of free pages from memblock point of view. 1773 */ 1774 unsigned long __init memblock_estimated_nr_free_pages(void) 1775 { 1776 return PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size()); 1777 } 1778 1779 /* lowest address */ 1780 phys_addr_t __init_memblock memblock_start_of_DRAM(void) 1781 { 1782 return memblock.memory.regions[0].base; 1783 } 1784 1785 phys_addr_t __init_memblock memblock_end_of_DRAM(void) 1786 { 1787 int idx = memblock.memory.cnt - 1; 1788 1789 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); 1790 } 1791 1792 static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) 1793 { 1794 phys_addr_t max_addr = PHYS_ADDR_MAX; 1795 struct memblock_region *r; 1796 1797 /* 1798 * translate the memory @limit size into the max address within one of 1799 * the memory memblock regions, if the @limit exceeds the total size 1800 * of those regions, max_addr will keep original value PHYS_ADDR_MAX 1801 */ 1802 for_each_mem_region(r) { 1803 if (limit <= r->size) { 1804 max_addr = r->base + limit; 1805 break; 1806 } 1807 limit -= r->size; 1808 } 1809 1810 return max_addr; 1811 } 1812 1813 void __init memblock_enforce_memory_limit(phys_addr_t limit) 1814 { 1815 phys_addr_t max_addr; 1816 1817 if (!limit) 1818 return; 1819 1820 max_addr = __find_max_addr(limit); 1821 1822 /* @limit exceeds the total size of the memory, do nothing */ 1823 if (max_addr == PHYS_ADDR_MAX) 1824 return; 1825 1826 /* truncate both memory and reserved regions */ 1827 memblock_remove_range(&memblock.memory, max_addr, 1828 PHYS_ADDR_MAX); 1829 memblock_remove_range(&memblock.reserved, max_addr, 1830 PHYS_ADDR_MAX); 1831 } 1832 1833 void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) 1834 { 1835 int start_rgn, end_rgn; 1836 int i, ret; 1837 1838 if (!size) 1839 return; 1840 1841 if (!memblock_memory->total_size) { 1842 pr_warn("%s: No memory registered yet\n", __func__); 1843 return; 1844 } 1845 1846 ret = memblock_isolate_range(&memblock.memory, base, size, 1847 &start_rgn, &end_rgn); 1848 if (ret) 1849 return; 1850 1851 /* remove all the MAP regions */ 1852 for (i = memblock.memory.cnt - 1; i >= end_rgn; i--) 1853 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1854 memblock_remove_region(&memblock.memory, i); 1855 1856 for (i = start_rgn - 1; i >= 0; i--) 1857 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1858 memblock_remove_region(&memblock.memory, i); 1859 1860 /* truncate the reserved regions */ 1861 memblock_remove_range(&memblock.reserved, 0, base); 1862 memblock_remove_range(&memblock.reserved, 1863 base + size, PHYS_ADDR_MAX); 1864 } 1865 1866 void __init memblock_mem_limit_remove_map(phys_addr_t limit) 1867 { 1868 phys_addr_t max_addr; 1869 1870 if (!limit) 1871 return; 1872 1873 max_addr = __find_max_addr(limit); 1874 1875 /* @limit exceeds the total size of the memory, do nothing */ 1876 if (max_addr == PHYS_ADDR_MAX) 1877 return; 1878 1879 memblock_cap_memory_range(0, max_addr); 1880 } 1881 1882 static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) 1883 { 1884 unsigned int left = 0, right = type->cnt; 1885 1886 do { 1887 unsigned int mid = (right + left) / 2; 1888 1889 if (addr < type->regions[mid].base) 1890 right = mid; 1891 else if (addr >= (type->regions[mid].base + 1892 type->regions[mid].size)) 1893 left = mid + 1; 1894 else 1895 return mid; 1896 } while (left < right); 1897 return -1; 1898 } 1899 1900 bool __init_memblock memblock_is_reserved(phys_addr_t addr) 1901 { 1902 return memblock_search(&memblock.reserved, addr) != -1; 1903 } 1904 1905 bool __init_memblock memblock_is_memory(phys_addr_t addr) 1906 { 1907 return memblock_search(&memblock.memory, addr) != -1; 1908 } 1909 1910 bool __init_memblock memblock_is_map_memory(phys_addr_t addr) 1911 { 1912 int i = memblock_search(&memblock.memory, addr); 1913 1914 if (i == -1) 1915 return false; 1916 return !memblock_is_nomap(&memblock.memory.regions[i]); 1917 } 1918 1919 int __init_memblock memblock_search_pfn_nid(unsigned long pfn, 1920 unsigned long *start_pfn, unsigned long *end_pfn) 1921 { 1922 struct memblock_type *type = &memblock.memory; 1923 int mid = memblock_search(type, PFN_PHYS(pfn)); 1924 1925 if (mid == -1) 1926 return NUMA_NO_NODE; 1927 1928 *start_pfn = PFN_DOWN(type->regions[mid].base); 1929 *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); 1930 1931 return memblock_get_region_node(&type->regions[mid]); 1932 } 1933 1934 /** 1935 * memblock_is_region_memory - check if a region is a subset of memory 1936 * @base: base of region to check 1937 * @size: size of region to check 1938 * 1939 * Check if the region [@base, @base + @size) is a subset of a memory block. 1940 * 1941 * Return: 1942 * 0 if false, non-zero if true 1943 */ 1944 bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) 1945 { 1946 int idx = memblock_search(&memblock.memory, base); 1947 phys_addr_t end = base + memblock_cap_size(base, &size); 1948 1949 if (idx == -1) 1950 return false; 1951 return (memblock.memory.regions[idx].base + 1952 memblock.memory.regions[idx].size) >= end; 1953 } 1954 1955 /** 1956 * memblock_is_region_reserved - check if a region intersects reserved memory 1957 * @base: base of region to check 1958 * @size: size of region to check 1959 * 1960 * Check if the region [@base, @base + @size) intersects a reserved 1961 * memory block. 1962 * 1963 * Return: 1964 * True if they intersect, false if not. 1965 */ 1966 bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) 1967 { 1968 return memblock_overlaps_region(&memblock.reserved, base, size); 1969 } 1970 1971 void __init_memblock memblock_trim_memory(phys_addr_t align) 1972 { 1973 phys_addr_t start, end, orig_start, orig_end; 1974 struct memblock_region *r; 1975 1976 for_each_mem_region(r) { 1977 orig_start = r->base; 1978 orig_end = r->base + r->size; 1979 start = round_up(orig_start, align); 1980 end = round_down(orig_end, align); 1981 1982 if (start == orig_start && end == orig_end) 1983 continue; 1984 1985 if (start < end) { 1986 r->base = start; 1987 r->size = end - start; 1988 } else { 1989 memblock_remove_region(&memblock.memory, 1990 r - memblock.memory.regions); 1991 r--; 1992 } 1993 } 1994 } 1995 1996 void __init_memblock memblock_set_current_limit(phys_addr_t limit) 1997 { 1998 memblock.current_limit = limit; 1999 } 2000 2001 phys_addr_t __init_memblock memblock_get_current_limit(void) 2002 { 2003 return memblock.current_limit; 2004 } 2005 2006 static void __init_memblock memblock_dump(struct memblock_type *type) 2007 { 2008 phys_addr_t base, end, size; 2009 enum memblock_flags flags; 2010 int idx; 2011 struct memblock_region *rgn; 2012 2013 pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt); 2014 2015 for_each_memblock_type(idx, type, rgn) { 2016 char nid_buf[32] = ""; 2017 2018 base = rgn->base; 2019 size = rgn->size; 2020 end = base + size - 1; 2021 flags = rgn->flags; 2022 #ifdef CONFIG_NUMA 2023 if (numa_valid_node(memblock_get_region_node(rgn))) 2024 snprintf(nid_buf, sizeof(nid_buf), " on node %d", 2025 memblock_get_region_node(rgn)); 2026 #endif 2027 pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n", 2028 type->name, idx, &base, &end, &size, nid_buf, flags); 2029 } 2030 } 2031 2032 static void __init_memblock __memblock_dump_all(void) 2033 { 2034 pr_info("MEMBLOCK configuration:\n"); 2035 pr_info(" memory size = %pa reserved size = %pa\n", 2036 &memblock.memory.total_size, 2037 &memblock.reserved.total_size); 2038 2039 memblock_dump(&memblock.memory); 2040 memblock_dump(&memblock.reserved); 2041 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 2042 memblock_dump(&physmem); 2043 #endif 2044 } 2045 2046 void __init_memblock memblock_dump_all(void) 2047 { 2048 if (memblock_debug) 2049 __memblock_dump_all(); 2050 } 2051 2052 void __init memblock_allow_resize(void) 2053 { 2054 memblock_can_resize = 1; 2055 } 2056 2057 static int __init early_memblock(char *p) 2058 { 2059 if (p && strstr(p, "debug")) 2060 memblock_debug = 1; 2061 return 0; 2062 } 2063 early_param("memblock", early_memblock); 2064 2065 static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) 2066 { 2067 struct page *start_pg, *end_pg; 2068 phys_addr_t pg, pgend; 2069 2070 /* 2071 * Convert start_pfn/end_pfn to a struct page pointer. 2072 */ 2073 start_pg = pfn_to_page(start_pfn - 1) + 1; 2074 end_pg = pfn_to_page(end_pfn - 1) + 1; 2075 2076 /* 2077 * Convert to physical addresses, and round start upwards and end 2078 * downwards. 2079 */ 2080 pg = PAGE_ALIGN(__pa(start_pg)); 2081 pgend = PAGE_ALIGN_DOWN(__pa(end_pg)); 2082 2083 /* 2084 * If there are free pages between these, free the section of the 2085 * memmap array. 2086 */ 2087 if (pg < pgend) 2088 memblock_phys_free(pg, pgend - pg); 2089 } 2090 2091 /* 2092 * The mem_map array can get very big. Free the unused area of the memory map. 2093 */ 2094 static void __init free_unused_memmap(void) 2095 { 2096 unsigned long start, end, prev_end = 0; 2097 int i; 2098 2099 if (!IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) || 2100 IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) 2101 return; 2102 2103 /* 2104 * This relies on each bank being in address order. 2105 * The banks are sorted previously in bootmem_init(). 2106 */ 2107 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { 2108 #ifdef CONFIG_SPARSEMEM 2109 /* 2110 * Take care not to free memmap entries that don't exist 2111 * due to SPARSEMEM sections which aren't present. 2112 */ 2113 start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); 2114 #endif 2115 /* 2116 * Align down here since many operations in VM subsystem 2117 * presume that there are no holes in the memory map inside 2118 * a pageblock 2119 */ 2120 start = pageblock_start_pfn(start); 2121 2122 /* 2123 * If we had a previous bank, and there is a space 2124 * between the current bank and the previous, free it. 2125 */ 2126 if (prev_end && prev_end < start) 2127 free_memmap(prev_end, start); 2128 2129 /* 2130 * Align up here since many operations in VM subsystem 2131 * presume that there are no holes in the memory map inside 2132 * a pageblock 2133 */ 2134 prev_end = pageblock_align(end); 2135 } 2136 2137 #ifdef CONFIG_SPARSEMEM 2138 if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) { 2139 prev_end = pageblock_align(end); 2140 free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); 2141 } 2142 #endif 2143 } 2144 2145 static void __init __free_pages_memory(unsigned long start, unsigned long end) 2146 { 2147 int order; 2148 2149 while (start < end) { 2150 /* 2151 * Free the pages in the largest chunks alignment allows. 2152 * 2153 * __ffs() behaviour is undefined for 0. start == 0 is 2154 * MAX_PAGE_ORDER-aligned, set order to MAX_PAGE_ORDER for 2155 * the case. 2156 */ 2157 if (start) 2158 order = min_t(int, MAX_PAGE_ORDER, __ffs(start)); 2159 else 2160 order = MAX_PAGE_ORDER; 2161 2162 while (start + (1UL << order) > end) 2163 order--; 2164 2165 memblock_free_pages(pfn_to_page(start), start, order); 2166 2167 start += (1UL << order); 2168 } 2169 } 2170 2171 static unsigned long __init __free_memory_core(phys_addr_t start, 2172 phys_addr_t end) 2173 { 2174 unsigned long start_pfn = PFN_UP(start); 2175 unsigned long end_pfn = PFN_DOWN(end); 2176 2177 if (!IS_ENABLED(CONFIG_HIGHMEM) && end_pfn > max_low_pfn) 2178 end_pfn = max_low_pfn; 2179 2180 if (start_pfn >= end_pfn) 2181 return 0; 2182 2183 __free_pages_memory(start_pfn, end_pfn); 2184 2185 return end_pfn - start_pfn; 2186 } 2187 2188 static void __init memmap_init_reserved_pages(void) 2189 { 2190 struct memblock_region *region; 2191 phys_addr_t start, end; 2192 int nid; 2193 unsigned long max_reserved; 2194 2195 /* 2196 * set nid on all reserved pages and also treat struct 2197 * pages for the NOMAP regions as PageReserved 2198 */ 2199 repeat: 2200 max_reserved = memblock.reserved.max; 2201 for_each_mem_region(region) { 2202 nid = memblock_get_region_node(region); 2203 start = region->base; 2204 end = start + region->size; 2205 2206 if (memblock_is_nomap(region)) 2207 reserve_bootmem_region(start, end, nid); 2208 2209 memblock_set_node(start, region->size, &memblock.reserved, nid); 2210 } 2211 /* 2212 * 'max' is changed means memblock.reserved has been doubled its 2213 * array, which may result a new reserved region before current 2214 * 'start'. Now we should repeat the procedure to set its node id. 2215 */ 2216 if (max_reserved != memblock.reserved.max) 2217 goto repeat; 2218 2219 /* 2220 * initialize struct pages for reserved regions that don't have 2221 * the MEMBLOCK_RSRV_NOINIT flag set 2222 */ 2223 for_each_reserved_mem_region(region) { 2224 if (!memblock_is_reserved_noinit(region)) { 2225 nid = memblock_get_region_node(region); 2226 start = region->base; 2227 end = start + region->size; 2228 2229 if (!numa_valid_node(nid)) 2230 nid = early_pfn_to_nid(PFN_DOWN(start)); 2231 2232 reserve_bootmem_region(start, end, nid); 2233 } 2234 } 2235 } 2236 2237 static unsigned long __init free_low_memory_core_early(void) 2238 { 2239 unsigned long count = 0; 2240 phys_addr_t start, end; 2241 u64 i; 2242 2243 memblock_clear_hotplug(0, -1); 2244 2245 memmap_init_reserved_pages(); 2246 2247 /* 2248 * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id 2249 * because in some case like Node0 doesn't have RAM installed 2250 * low ram will be on Node1 2251 */ 2252 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, 2253 NULL) 2254 count += __free_memory_core(start, end); 2255 2256 return count; 2257 } 2258 2259 static int reset_managed_pages_done __initdata; 2260 2261 static void __init reset_node_managed_pages(pg_data_t *pgdat) 2262 { 2263 struct zone *z; 2264 2265 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 2266 atomic_long_set(&z->managed_pages, 0); 2267 } 2268 2269 void __init reset_all_zones_managed_pages(void) 2270 { 2271 struct pglist_data *pgdat; 2272 2273 if (reset_managed_pages_done) 2274 return; 2275 2276 for_each_online_pgdat(pgdat) 2277 reset_node_managed_pages(pgdat); 2278 2279 reset_managed_pages_done = 1; 2280 } 2281 2282 /** 2283 * memblock_free_all - release free pages to the buddy allocator 2284 */ 2285 void __init memblock_free_all(void) 2286 { 2287 unsigned long pages; 2288 2289 free_unused_memmap(); 2290 reset_all_zones_managed_pages(); 2291 2292 pages = free_low_memory_core_early(); 2293 totalram_pages_add(pages); 2294 } 2295 2296 /* Keep a table to reserve named memory */ 2297 #define RESERVE_MEM_MAX_ENTRIES 8 2298 #define RESERVE_MEM_NAME_SIZE 16 2299 struct reserve_mem_table { 2300 char name[RESERVE_MEM_NAME_SIZE]; 2301 phys_addr_t start; 2302 phys_addr_t size; 2303 }; 2304 static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES]; 2305 static int reserved_mem_count; 2306 static DEFINE_MUTEX(reserve_mem_lock); 2307 2308 /* Add wildcard region with a lookup name */ 2309 static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size, 2310 const char *name) 2311 { 2312 struct reserve_mem_table *map; 2313 2314 map = &reserved_mem_table[reserved_mem_count++]; 2315 map->start = start; 2316 map->size = size; 2317 strscpy(map->name, name); 2318 } 2319 2320 static struct reserve_mem_table *reserve_mem_find_by_name_nolock(const char *name) 2321 { 2322 struct reserve_mem_table *map; 2323 int i; 2324 2325 for (i = 0; i < reserved_mem_count; i++) { 2326 map = &reserved_mem_table[i]; 2327 if (!map->size) 2328 continue; 2329 if (strcmp(name, map->name) == 0) 2330 return map; 2331 } 2332 return NULL; 2333 } 2334 2335 /** 2336 * reserve_mem_find_by_name - Find reserved memory region with a given name 2337 * @name: The name that is attached to a reserved memory region 2338 * @start: If found, holds the start address 2339 * @size: If found, holds the size of the address. 2340 * 2341 * @start and @size are only updated if @name is found. 2342 * 2343 * Returns: 1 if found or 0 if not found. 2344 */ 2345 int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size) 2346 { 2347 struct reserve_mem_table *map; 2348 2349 guard(mutex)(&reserve_mem_lock); 2350 map = reserve_mem_find_by_name_nolock(name); 2351 if (!map) 2352 return 0; 2353 2354 *start = map->start; 2355 *size = map->size; 2356 return 1; 2357 } 2358 EXPORT_SYMBOL_GPL(reserve_mem_find_by_name); 2359 2360 /** 2361 * reserve_mem_release_by_name - Release reserved memory region with a given name 2362 * @name: The name that is attatched to a reserved memory region 2363 * 2364 * Forcibly release the pages in the reserved memory region so that those memory 2365 * can be used as free memory. After released the reserved region size becomes 0. 2366 * 2367 * Returns: 1 if released or 0 if not found. 2368 */ 2369 int reserve_mem_release_by_name(const char *name) 2370 { 2371 char buf[RESERVE_MEM_NAME_SIZE + 12]; 2372 struct reserve_mem_table *map; 2373 void *start, *end; 2374 2375 guard(mutex)(&reserve_mem_lock); 2376 map = reserve_mem_find_by_name_nolock(name); 2377 if (!map) 2378 return 0; 2379 2380 start = phys_to_virt(map->start); 2381 end = start + map->size - 1; 2382 snprintf(buf, sizeof(buf), "reserve_mem:%s", name); 2383 free_reserved_area(start, end, 0, buf); 2384 map->size = 0; 2385 2386 return 1; 2387 } 2388 2389 /* 2390 * Parse reserve_mem=nn:align:name 2391 */ 2392 static int __init reserve_mem(char *p) 2393 { 2394 phys_addr_t start, size, align, tmp; 2395 char *name; 2396 char *oldp; 2397 int len; 2398 2399 if (!p) 2400 return -EINVAL; 2401 2402 /* Check if there's room for more reserved memory */ 2403 if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES) 2404 return -EBUSY; 2405 2406 oldp = p; 2407 size = memparse(p, &p); 2408 if (!size || p == oldp) 2409 return -EINVAL; 2410 2411 if (*p != ':') 2412 return -EINVAL; 2413 2414 align = memparse(p+1, &p); 2415 if (*p != ':') 2416 return -EINVAL; 2417 2418 /* 2419 * memblock_phys_alloc() doesn't like a zero size align, 2420 * but it is OK for this command to have it. 2421 */ 2422 if (align < SMP_CACHE_BYTES) 2423 align = SMP_CACHE_BYTES; 2424 2425 name = p + 1; 2426 len = strlen(name); 2427 2428 /* name needs to have length but not too big */ 2429 if (!len || len >= RESERVE_MEM_NAME_SIZE) 2430 return -EINVAL; 2431 2432 /* Make sure that name has text */ 2433 for (p = name; *p; p++) { 2434 if (!isspace(*p)) 2435 break; 2436 } 2437 if (!*p) 2438 return -EINVAL; 2439 2440 /* Make sure the name is not already used */ 2441 if (reserve_mem_find_by_name(name, &start, &tmp)) 2442 return -EBUSY; 2443 2444 start = memblock_phys_alloc(size, align); 2445 if (!start) 2446 return -ENOMEM; 2447 2448 reserved_mem_add(start, size, name); 2449 2450 return 1; 2451 } 2452 __setup("reserve_mem=", reserve_mem); 2453 2454 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK) 2455 static const char * const flagname[] = { 2456 [ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG", 2457 [ilog2(MEMBLOCK_MIRROR)] = "MIRROR", 2458 [ilog2(MEMBLOCK_NOMAP)] = "NOMAP", 2459 [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG", 2460 [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT", 2461 }; 2462 2463 static int memblock_debug_show(struct seq_file *m, void *private) 2464 { 2465 struct memblock_type *type = m->private; 2466 struct memblock_region *reg; 2467 int i, j, nid; 2468 unsigned int count = ARRAY_SIZE(flagname); 2469 phys_addr_t end; 2470 2471 for (i = 0; i < type->cnt; i++) { 2472 reg = &type->regions[i]; 2473 end = reg->base + reg->size - 1; 2474 nid = memblock_get_region_node(reg); 2475 2476 seq_printf(m, "%4d: ", i); 2477 seq_printf(m, "%pa..%pa ", ®->base, &end); 2478 if (numa_valid_node(nid)) 2479 seq_printf(m, "%4d ", nid); 2480 else 2481 seq_printf(m, "%4c ", 'x'); 2482 if (reg->flags) { 2483 for (j = 0; j < count; j++) { 2484 if (reg->flags & (1U << j)) { 2485 seq_printf(m, "%s\n", flagname[j]); 2486 break; 2487 } 2488 } 2489 if (j == count) 2490 seq_printf(m, "%s\n", "UNKNOWN"); 2491 } else { 2492 seq_printf(m, "%s\n", "NONE"); 2493 } 2494 } 2495 return 0; 2496 } 2497 DEFINE_SHOW_ATTRIBUTE(memblock_debug); 2498 2499 static int __init memblock_init_debugfs(void) 2500 { 2501 struct dentry *root = debugfs_create_dir("memblock", NULL); 2502 2503 debugfs_create_file("memory", 0444, root, 2504 &memblock.memory, &memblock_debug_fops); 2505 debugfs_create_file("reserved", 0444, root, 2506 &memblock.reserved, &memblock_debug_fops); 2507 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 2508 debugfs_create_file("physmem", 0444, root, &physmem, 2509 &memblock_debug_fops); 2510 #endif 2511 2512 return 0; 2513 } 2514 __initcall(memblock_init_debugfs); 2515 2516 #endif /* CONFIG_DEBUG_FS */ 2517