1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Procedures for maintaining information about logical memory blocks. 4 * 5 * Peter Bergner, IBM Corp. June 2001. 6 * Copyright (C) 2001 Peter Bergner. 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/slab.h> 11 #include <linux/init.h> 12 #include <linux/bitops.h> 13 #include <linux/poison.h> 14 #include <linux/pfn.h> 15 #include <linux/debugfs.h> 16 #include <linux/kmemleak.h> 17 #include <linux/seq_file.h> 18 #include <linux/memblock.h> 19 #include <linux/mutex.h> 20 21 #include <asm/sections.h> 22 #include <linux/io.h> 23 24 #include "internal.h" 25 26 #define INIT_MEMBLOCK_REGIONS 128 27 #define INIT_PHYSMEM_REGIONS 4 28 29 #ifndef INIT_MEMBLOCK_RESERVED_REGIONS 30 # define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS 31 #endif 32 33 #ifndef INIT_MEMBLOCK_MEMORY_REGIONS 34 #define INIT_MEMBLOCK_MEMORY_REGIONS INIT_MEMBLOCK_REGIONS 35 #endif 36 37 /** 38 * DOC: memblock overview 39 * 40 * Memblock is a method of managing memory regions during the early 41 * boot period when the usual kernel memory allocators are not up and 42 * running. 43 * 44 * Memblock views the system memory as collections of contiguous 45 * regions. There are several types of these collections: 46 * 47 * * ``memory`` - describes the physical memory available to the 48 * kernel; this may differ from the actual physical memory installed 49 * in the system, for instance when the memory is restricted with 50 * ``mem=`` command line parameter 51 * * ``reserved`` - describes the regions that were allocated 52 * * ``physmem`` - describes the actual physical memory available during 53 * boot regardless of the possible restrictions and memory hot(un)plug; 54 * the ``physmem`` type is only available on some architectures. 55 * 56 * Each region is represented by struct memblock_region that 57 * defines the region extents, its attributes and NUMA node id on NUMA 58 * systems. Every memory type is described by the struct memblock_type 59 * which contains an array of memory regions along with 60 * the allocator metadata. The "memory" and "reserved" types are nicely 61 * wrapped with struct memblock. This structure is statically 62 * initialized at build time. The region arrays are initially sized to 63 * %INIT_MEMBLOCK_MEMORY_REGIONS for "memory" and 64 * %INIT_MEMBLOCK_RESERVED_REGIONS for "reserved". The region array 65 * for "physmem" is initially sized to %INIT_PHYSMEM_REGIONS. 66 * The memblock_allow_resize() enables automatic resizing of the region 67 * arrays during addition of new regions. This feature should be used 68 * with care so that memory allocated for the region array will not 69 * overlap with areas that should be reserved, for example initrd. 70 * 71 * The early architecture setup should tell memblock what the physical 72 * memory layout is by using memblock_add() or memblock_add_node() 73 * functions. The first function does not assign the region to a NUMA 74 * node and it is appropriate for UMA systems. Yet, it is possible to 75 * use it on NUMA systems as well and assign the region to a NUMA node 76 * later in the setup process using memblock_set_node(). The 77 * memblock_add_node() performs such an assignment directly. 78 * 79 * Once memblock is setup the memory can be allocated using one of the 80 * API variants: 81 * 82 * * memblock_phys_alloc*() - these functions return the **physical** 83 * address of the allocated memory 84 * * memblock_alloc*() - these functions return the **virtual** address 85 * of the allocated memory. 86 * 87 * Note, that both API variants use implicit assumptions about allowed 88 * memory ranges and the fallback methods. Consult the documentation 89 * of memblock_alloc_internal() and memblock_alloc_range_nid() 90 * functions for more elaborate description. 91 * 92 * As the system boot progresses, the architecture specific mem_init() 93 * function frees all the memory to the buddy page allocator. 94 * 95 * Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the 96 * memblock data structures (except "physmem") will be discarded after the 97 * system initialization completes. 98 */ 99 100 #ifndef CONFIG_NUMA 101 struct pglist_data __refdata contig_page_data; 102 EXPORT_SYMBOL(contig_page_data); 103 #endif 104 105 unsigned long max_low_pfn; 106 unsigned long min_low_pfn; 107 unsigned long max_pfn; 108 unsigned long long max_possible_pfn; 109 110 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock; 111 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; 112 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 113 static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS]; 114 #endif 115 116 struct memblock memblock __initdata_memblock = { 117 .memory.regions = memblock_memory_init_regions, 118 .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS, 119 .memory.name = "memory", 120 121 .reserved.regions = memblock_reserved_init_regions, 122 .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, 123 .reserved.name = "reserved", 124 125 .bottom_up = false, 126 .current_limit = MEMBLOCK_ALLOC_ANYWHERE, 127 }; 128 129 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 130 struct memblock_type physmem = { 131 .regions = memblock_physmem_init_regions, 132 .max = INIT_PHYSMEM_REGIONS, 133 .name = "physmem", 134 }; 135 #endif 136 137 /* 138 * keep a pointer to &memblock.memory in the text section to use it in 139 * __next_mem_range() and its helpers. 140 * For architectures that do not keep memblock data after init, this 141 * pointer will be reset to NULL at memblock_discard() 142 */ 143 static __refdata struct memblock_type *memblock_memory = &memblock.memory; 144 145 #define for_each_memblock_type(i, memblock_type, rgn) \ 146 for (i = 0, rgn = &memblock_type->regions[0]; \ 147 i < memblock_type->cnt; \ 148 i++, rgn = &memblock_type->regions[i]) 149 150 #define memblock_dbg(fmt, ...) \ 151 do { \ 152 if (memblock_debug) \ 153 pr_info(fmt, ##__VA_ARGS__); \ 154 } while (0) 155 156 static int memblock_debug __initdata_memblock; 157 static bool system_has_some_mirror __initdata_memblock; 158 static int memblock_can_resize __initdata_memblock; 159 static int memblock_memory_in_slab __initdata_memblock; 160 static int memblock_reserved_in_slab __initdata_memblock; 161 162 bool __init_memblock memblock_has_mirror(void) 163 { 164 return system_has_some_mirror; 165 } 166 167 static enum memblock_flags __init_memblock choose_memblock_flags(void) 168 { 169 return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE; 170 } 171 172 /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ 173 static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) 174 { 175 return *size = min(*size, PHYS_ADDR_MAX - base); 176 } 177 178 /* 179 * Address comparison utilities 180 */ 181 unsigned long __init_memblock 182 memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, 183 phys_addr_t size2) 184 { 185 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); 186 } 187 188 bool __init_memblock memblock_overlaps_region(struct memblock_type *type, 189 phys_addr_t base, phys_addr_t size) 190 { 191 unsigned long i; 192 193 memblock_cap_size(base, &size); 194 195 for (i = 0; i < type->cnt; i++) 196 if (memblock_addrs_overlap(base, size, type->regions[i].base, 197 type->regions[i].size)) 198 return true; 199 return false; 200 } 201 202 /** 203 * __memblock_find_range_bottom_up - find free area utility in bottom-up 204 * @start: start of candidate range 205 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 206 * %MEMBLOCK_ALLOC_ACCESSIBLE 207 * @size: size of free area to find 208 * @align: alignment of free area to find 209 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 210 * @flags: pick from blocks based on memory attributes 211 * 212 * Utility called from memblock_find_in_range_node(), find free area bottom-up. 213 * 214 * Return: 215 * Found address on success, 0 on failure. 216 */ 217 static phys_addr_t __init_memblock 218 __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, 219 phys_addr_t size, phys_addr_t align, int nid, 220 enum memblock_flags flags) 221 { 222 phys_addr_t this_start, this_end, cand; 223 u64 i; 224 225 for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) { 226 this_start = clamp(this_start, start, end); 227 this_end = clamp(this_end, start, end); 228 229 cand = round_up(this_start, align); 230 if (cand < this_end && this_end - cand >= size) 231 return cand; 232 } 233 234 return 0; 235 } 236 237 /** 238 * __memblock_find_range_top_down - find free area utility, in top-down 239 * @start: start of candidate range 240 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 241 * %MEMBLOCK_ALLOC_ACCESSIBLE 242 * @size: size of free area to find 243 * @align: alignment of free area to find 244 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 245 * @flags: pick from blocks based on memory attributes 246 * 247 * Utility called from memblock_find_in_range_node(), find free area top-down. 248 * 249 * Return: 250 * Found address on success, 0 on failure. 251 */ 252 static phys_addr_t __init_memblock 253 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, 254 phys_addr_t size, phys_addr_t align, int nid, 255 enum memblock_flags flags) 256 { 257 phys_addr_t this_start, this_end, cand; 258 u64 i; 259 260 for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end, 261 NULL) { 262 this_start = clamp(this_start, start, end); 263 this_end = clamp(this_end, start, end); 264 265 if (this_end < size) 266 continue; 267 268 cand = round_down(this_end - size, align); 269 if (cand >= this_start) 270 return cand; 271 } 272 273 return 0; 274 } 275 276 /** 277 * memblock_find_in_range_node - find free area in given range and node 278 * @size: size of free area to find 279 * @align: alignment of free area to find 280 * @start: start of candidate range 281 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 282 * %MEMBLOCK_ALLOC_ACCESSIBLE 283 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 284 * @flags: pick from blocks based on memory attributes 285 * 286 * Find @size free area aligned to @align in the specified range and node. 287 * 288 * Return: 289 * Found address on success, 0 on failure. 290 */ 291 static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, 292 phys_addr_t align, phys_addr_t start, 293 phys_addr_t end, int nid, 294 enum memblock_flags flags) 295 { 296 /* pump up @end */ 297 if (end == MEMBLOCK_ALLOC_ACCESSIBLE || 298 end == MEMBLOCK_ALLOC_NOLEAKTRACE) 299 end = memblock.current_limit; 300 301 /* avoid allocating the first page */ 302 start = max_t(phys_addr_t, start, PAGE_SIZE); 303 end = max(start, end); 304 305 if (memblock_bottom_up()) 306 return __memblock_find_range_bottom_up(start, end, size, align, 307 nid, flags); 308 else 309 return __memblock_find_range_top_down(start, end, size, align, 310 nid, flags); 311 } 312 313 /** 314 * memblock_find_in_range - find free area in given range 315 * @start: start of candidate range 316 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 317 * %MEMBLOCK_ALLOC_ACCESSIBLE 318 * @size: size of free area to find 319 * @align: alignment of free area to find 320 * 321 * Find @size free area aligned to @align in the specified range. 322 * 323 * Return: 324 * Found address on success, 0 on failure. 325 */ 326 static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, 327 phys_addr_t end, phys_addr_t size, 328 phys_addr_t align) 329 { 330 phys_addr_t ret; 331 enum memblock_flags flags = choose_memblock_flags(); 332 333 again: 334 ret = memblock_find_in_range_node(size, align, start, end, 335 NUMA_NO_NODE, flags); 336 337 if (!ret && (flags & MEMBLOCK_MIRROR)) { 338 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", 339 &size); 340 flags &= ~MEMBLOCK_MIRROR; 341 goto again; 342 } 343 344 return ret; 345 } 346 347 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) 348 { 349 type->total_size -= type->regions[r].size; 350 memmove(&type->regions[r], &type->regions[r + 1], 351 (type->cnt - (r + 1)) * sizeof(type->regions[r])); 352 type->cnt--; 353 354 /* Special case for empty arrays */ 355 if (type->cnt == 0) { 356 WARN_ON(type->total_size != 0); 357 type->regions[0].base = 0; 358 type->regions[0].size = 0; 359 type->regions[0].flags = 0; 360 memblock_set_region_node(&type->regions[0], MAX_NUMNODES); 361 } 362 } 363 364 #ifndef CONFIG_ARCH_KEEP_MEMBLOCK 365 /** 366 * memblock_discard - discard memory and reserved arrays if they were allocated 367 */ 368 void __init memblock_discard(void) 369 { 370 phys_addr_t addr, size; 371 372 if (memblock.reserved.regions != memblock_reserved_init_regions) { 373 addr = __pa(memblock.reserved.regions); 374 size = PAGE_ALIGN(sizeof(struct memblock_region) * 375 memblock.reserved.max); 376 if (memblock_reserved_in_slab) 377 kfree(memblock.reserved.regions); 378 else 379 memblock_free_late(addr, size); 380 } 381 382 if (memblock.memory.regions != memblock_memory_init_regions) { 383 addr = __pa(memblock.memory.regions); 384 size = PAGE_ALIGN(sizeof(struct memblock_region) * 385 memblock.memory.max); 386 if (memblock_memory_in_slab) 387 kfree(memblock.memory.regions); 388 else 389 memblock_free_late(addr, size); 390 } 391 392 memblock_memory = NULL; 393 } 394 #endif 395 396 /** 397 * memblock_double_array - double the size of the memblock regions array 398 * @type: memblock type of the regions array being doubled 399 * @new_area_start: starting address of memory range to avoid overlap with 400 * @new_area_size: size of memory range to avoid overlap with 401 * 402 * Double the size of the @type regions array. If memblock is being used to 403 * allocate memory for a new reserved regions array and there is a previously 404 * allocated memory range [@new_area_start, @new_area_start + @new_area_size] 405 * waiting to be reserved, ensure the memory used by the new array does 406 * not overlap. 407 * 408 * Return: 409 * 0 on success, -1 on failure. 410 */ 411 static int __init_memblock memblock_double_array(struct memblock_type *type, 412 phys_addr_t new_area_start, 413 phys_addr_t new_area_size) 414 { 415 struct memblock_region *new_array, *old_array; 416 phys_addr_t old_alloc_size, new_alloc_size; 417 phys_addr_t old_size, new_size, addr, new_end; 418 int use_slab = slab_is_available(); 419 int *in_slab; 420 421 /* We don't allow resizing until we know about the reserved regions 422 * of memory that aren't suitable for allocation 423 */ 424 if (!memblock_can_resize) 425 panic("memblock: cannot resize %s array\n", type->name); 426 427 /* Calculate new doubled size */ 428 old_size = type->max * sizeof(struct memblock_region); 429 new_size = old_size << 1; 430 /* 431 * We need to allocated new one align to PAGE_SIZE, 432 * so we can free them completely later. 433 */ 434 old_alloc_size = PAGE_ALIGN(old_size); 435 new_alloc_size = PAGE_ALIGN(new_size); 436 437 /* Retrieve the slab flag */ 438 if (type == &memblock.memory) 439 in_slab = &memblock_memory_in_slab; 440 else 441 in_slab = &memblock_reserved_in_slab; 442 443 /* Try to find some space for it */ 444 if (use_slab) { 445 new_array = kmalloc(new_size, GFP_KERNEL); 446 addr = new_array ? __pa(new_array) : 0; 447 } else { 448 /* only exclude range when trying to double reserved.regions */ 449 if (type != &memblock.reserved) 450 new_area_start = new_area_size = 0; 451 452 addr = memblock_find_in_range(new_area_start + new_area_size, 453 memblock.current_limit, 454 new_alloc_size, PAGE_SIZE); 455 if (!addr && new_area_size) 456 addr = memblock_find_in_range(0, 457 min(new_area_start, memblock.current_limit), 458 new_alloc_size, PAGE_SIZE); 459 460 if (addr) { 461 /* The memory may not have been accepted, yet. */ 462 accept_memory(addr, new_alloc_size); 463 464 new_array = __va(addr); 465 } else { 466 new_array = NULL; 467 } 468 } 469 if (!addr) { 470 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", 471 type->name, type->max, type->max * 2); 472 return -1; 473 } 474 475 new_end = addr + new_size - 1; 476 memblock_dbg("memblock: %s is doubled to %ld at [%pa-%pa]", 477 type->name, type->max * 2, &addr, &new_end); 478 479 /* 480 * Found space, we now need to move the array over before we add the 481 * reserved region since it may be our reserved array itself that is 482 * full. 483 */ 484 memcpy(new_array, type->regions, old_size); 485 memset(new_array + type->max, 0, old_size); 486 old_array = type->regions; 487 type->regions = new_array; 488 type->max <<= 1; 489 490 /* Free old array. We needn't free it if the array is the static one */ 491 if (*in_slab) 492 kfree(old_array); 493 else if (old_array != memblock_memory_init_regions && 494 old_array != memblock_reserved_init_regions) 495 memblock_free(old_array, old_alloc_size); 496 497 /* 498 * Reserve the new array if that comes from the memblock. Otherwise, we 499 * needn't do it 500 */ 501 if (!use_slab) 502 BUG_ON(memblock_reserve_kern(addr, new_alloc_size)); 503 504 /* Update slab flag */ 505 *in_slab = use_slab; 506 507 return 0; 508 } 509 510 /** 511 * memblock_merge_regions - merge neighboring compatible regions 512 * @type: memblock type to scan 513 * @start_rgn: start scanning from (@start_rgn - 1) 514 * @end_rgn: end scanning at (@end_rgn - 1) 515 * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn) 516 */ 517 static void __init_memblock memblock_merge_regions(struct memblock_type *type, 518 unsigned long start_rgn, 519 unsigned long end_rgn) 520 { 521 int i = 0; 522 if (start_rgn) 523 i = start_rgn - 1; 524 end_rgn = min(end_rgn, type->cnt - 1); 525 while (i < end_rgn) { 526 struct memblock_region *this = &type->regions[i]; 527 struct memblock_region *next = &type->regions[i + 1]; 528 529 if (this->base + this->size != next->base || 530 memblock_get_region_node(this) != 531 memblock_get_region_node(next) || 532 this->flags != next->flags) { 533 BUG_ON(this->base + this->size > next->base); 534 i++; 535 continue; 536 } 537 538 this->size += next->size; 539 /* move forward from next + 1, index of which is i + 2 */ 540 memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); 541 type->cnt--; 542 end_rgn--; 543 } 544 } 545 546 /** 547 * memblock_insert_region - insert new memblock region 548 * @type: memblock type to insert into 549 * @idx: index for the insertion point 550 * @base: base address of the new region 551 * @size: size of the new region 552 * @nid: node id of the new region 553 * @flags: flags of the new region 554 * 555 * Insert new memblock region [@base, @base + @size) into @type at @idx. 556 * @type must already have extra room to accommodate the new region. 557 */ 558 static void __init_memblock memblock_insert_region(struct memblock_type *type, 559 int idx, phys_addr_t base, 560 phys_addr_t size, 561 int nid, 562 enum memblock_flags flags) 563 { 564 struct memblock_region *rgn = &type->regions[idx]; 565 566 BUG_ON(type->cnt >= type->max); 567 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); 568 rgn->base = base; 569 rgn->size = size; 570 rgn->flags = flags; 571 memblock_set_region_node(rgn, nid); 572 type->cnt++; 573 type->total_size += size; 574 } 575 576 /** 577 * memblock_add_range - add new memblock region 578 * @type: memblock type to add new region into 579 * @base: base address of the new region 580 * @size: size of the new region 581 * @nid: nid of the new region 582 * @flags: flags of the new region 583 * 584 * Add new memblock region [@base, @base + @size) into @type. The new region 585 * is allowed to overlap with existing ones - overlaps don't affect already 586 * existing regions. @type is guaranteed to be minimal (all neighbouring 587 * compatible regions are merged) after the addition. 588 * 589 * Return: 590 * 0 on success, -errno on failure. 591 */ 592 static int __init_memblock memblock_add_range(struct memblock_type *type, 593 phys_addr_t base, phys_addr_t size, 594 int nid, enum memblock_flags flags) 595 { 596 bool insert = false; 597 phys_addr_t obase = base; 598 phys_addr_t end = base + memblock_cap_size(base, &size); 599 int idx, nr_new, start_rgn = -1, end_rgn; 600 struct memblock_region *rgn; 601 602 if (!size) 603 return 0; 604 605 /* special case for empty array */ 606 if (type->regions[0].size == 0) { 607 WARN_ON(type->cnt != 0 || type->total_size); 608 type->regions[0].base = base; 609 type->regions[0].size = size; 610 type->regions[0].flags = flags; 611 memblock_set_region_node(&type->regions[0], nid); 612 type->total_size = size; 613 type->cnt = 1; 614 return 0; 615 } 616 617 /* 618 * The worst case is when new range overlaps all existing regions, 619 * then we'll need type->cnt + 1 empty regions in @type. So if 620 * type->cnt * 2 + 1 is less than or equal to type->max, we know 621 * that there is enough empty regions in @type, and we can insert 622 * regions directly. 623 */ 624 if (type->cnt * 2 + 1 <= type->max) 625 insert = true; 626 627 repeat: 628 /* 629 * The following is executed twice. Once with %false @insert and 630 * then with %true. The first counts the number of regions needed 631 * to accommodate the new area. The second actually inserts them. 632 */ 633 base = obase; 634 nr_new = 0; 635 636 for_each_memblock_type(idx, type, rgn) { 637 phys_addr_t rbase = rgn->base; 638 phys_addr_t rend = rbase + rgn->size; 639 640 if (rbase >= end) 641 break; 642 if (rend <= base) 643 continue; 644 /* 645 * @rgn overlaps. If it separates the lower part of new 646 * area, insert that portion. 647 */ 648 if (rbase > base) { 649 #ifdef CONFIG_NUMA 650 WARN_ON(nid != memblock_get_region_node(rgn)); 651 #endif 652 WARN_ON(flags != MEMBLOCK_NONE && flags != rgn->flags); 653 nr_new++; 654 if (insert) { 655 if (start_rgn == -1) 656 start_rgn = idx; 657 end_rgn = idx + 1; 658 memblock_insert_region(type, idx++, base, 659 rbase - base, nid, 660 flags); 661 } 662 } 663 /* area below @rend is dealt with, forget about it */ 664 base = min(rend, end); 665 } 666 667 /* insert the remaining portion */ 668 if (base < end) { 669 nr_new++; 670 if (insert) { 671 if (start_rgn == -1) 672 start_rgn = idx; 673 end_rgn = idx + 1; 674 memblock_insert_region(type, idx, base, end - base, 675 nid, flags); 676 } 677 } 678 679 if (!nr_new) 680 return 0; 681 682 /* 683 * If this was the first round, resize array and repeat for actual 684 * insertions; otherwise, merge and return. 685 */ 686 if (!insert) { 687 while (type->cnt + nr_new > type->max) 688 if (memblock_double_array(type, obase, size) < 0) 689 return -ENOMEM; 690 insert = true; 691 goto repeat; 692 } else { 693 memblock_merge_regions(type, start_rgn, end_rgn); 694 return 0; 695 } 696 } 697 698 /** 699 * memblock_add_node - add new memblock region within a NUMA node 700 * @base: base address of the new region 701 * @size: size of the new region 702 * @nid: nid of the new region 703 * @flags: flags of the new region 704 * 705 * Add new memblock region [@base, @base + @size) to the "memory" 706 * type. See memblock_add_range() description for mode details 707 * 708 * Return: 709 * 0 on success, -errno on failure. 710 */ 711 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, 712 int nid, enum memblock_flags flags) 713 { 714 phys_addr_t end = base + size - 1; 715 716 memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, 717 &base, &end, nid, flags, (void *)_RET_IP_); 718 719 return memblock_add_range(&memblock.memory, base, size, nid, flags); 720 } 721 722 /** 723 * memblock_add - add new memblock region 724 * @base: base address of the new region 725 * @size: size of the new region 726 * 727 * Add new memblock region [@base, @base + @size) to the "memory" 728 * type. See memblock_add_range() description for mode details 729 * 730 * Return: 731 * 0 on success, -errno on failure. 732 */ 733 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) 734 { 735 phys_addr_t end = base + size - 1; 736 737 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 738 &base, &end, (void *)_RET_IP_); 739 740 return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); 741 } 742 743 /** 744 * memblock_validate_numa_coverage - check if amount of memory with 745 * no node ID assigned is less than a threshold 746 * @threshold_bytes: maximal memory size that can have unassigned node 747 * ID (in bytes). 748 * 749 * A buggy firmware may report memory that does not belong to any node. 750 * Check if amount of such memory is below @threshold_bytes. 751 * 752 * Return: true on success, false on failure. 753 */ 754 bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_bytes) 755 { 756 unsigned long nr_pages = 0; 757 unsigned long start_pfn, end_pfn, mem_size_mb; 758 int nid, i; 759 760 /* calculate lose page */ 761 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { 762 if (!numa_valid_node(nid)) 763 nr_pages += end_pfn - start_pfn; 764 } 765 766 if ((nr_pages << PAGE_SHIFT) > threshold_bytes) { 767 mem_size_mb = memblock_phys_mem_size() >> 20; 768 pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n", 769 (nr_pages << PAGE_SHIFT) >> 20, mem_size_mb); 770 return false; 771 } 772 773 return true; 774 } 775 776 777 /** 778 * memblock_isolate_range - isolate given range into disjoint memblocks 779 * @type: memblock type to isolate range for 780 * @base: base of range to isolate 781 * @size: size of range to isolate 782 * @start_rgn: out parameter for the start of isolated region 783 * @end_rgn: out parameter for the end of isolated region 784 * 785 * Walk @type and ensure that regions don't cross the boundaries defined by 786 * [@base, @base + @size). Crossing regions are split at the boundaries, 787 * which may create at most two more regions. The index of the first 788 * region inside the range is returned in *@start_rgn and the index of the 789 * first region after the range is returned in *@end_rgn. 790 * 791 * Return: 792 * 0 on success, -errno on failure. 793 */ 794 static int __init_memblock memblock_isolate_range(struct memblock_type *type, 795 phys_addr_t base, phys_addr_t size, 796 int *start_rgn, int *end_rgn) 797 { 798 phys_addr_t end = base + memblock_cap_size(base, &size); 799 int idx; 800 struct memblock_region *rgn; 801 802 *start_rgn = *end_rgn = 0; 803 804 if (!size) 805 return 0; 806 807 /* we'll create at most two more regions */ 808 while (type->cnt + 2 > type->max) 809 if (memblock_double_array(type, base, size) < 0) 810 return -ENOMEM; 811 812 for_each_memblock_type(idx, type, rgn) { 813 phys_addr_t rbase = rgn->base; 814 phys_addr_t rend = rbase + rgn->size; 815 816 if (rbase >= end) 817 break; 818 if (rend <= base) 819 continue; 820 821 if (rbase < base) { 822 /* 823 * @rgn intersects from below. Split and continue 824 * to process the next region - the new top half. 825 */ 826 rgn->base = base; 827 rgn->size -= base - rbase; 828 type->total_size -= base - rbase; 829 memblock_insert_region(type, idx, rbase, base - rbase, 830 memblock_get_region_node(rgn), 831 rgn->flags); 832 } else if (rend > end) { 833 /* 834 * @rgn intersects from above. Split and redo the 835 * current region - the new bottom half. 836 */ 837 rgn->base = end; 838 rgn->size -= end - rbase; 839 type->total_size -= end - rbase; 840 memblock_insert_region(type, idx--, rbase, end - rbase, 841 memblock_get_region_node(rgn), 842 rgn->flags); 843 } else { 844 /* @rgn is fully contained, record it */ 845 if (!*end_rgn) 846 *start_rgn = idx; 847 *end_rgn = idx + 1; 848 } 849 } 850 851 return 0; 852 } 853 854 static int __init_memblock memblock_remove_range(struct memblock_type *type, 855 phys_addr_t base, phys_addr_t size) 856 { 857 int start_rgn, end_rgn; 858 int i, ret; 859 860 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 861 if (ret) 862 return ret; 863 864 for (i = end_rgn - 1; i >= start_rgn; i--) 865 memblock_remove_region(type, i); 866 return 0; 867 } 868 869 int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) 870 { 871 phys_addr_t end = base + size - 1; 872 873 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 874 &base, &end, (void *)_RET_IP_); 875 876 return memblock_remove_range(&memblock.memory, base, size); 877 } 878 879 /** 880 * memblock_free - free boot memory allocation 881 * @ptr: starting address of the boot memory allocation 882 * @size: size of the boot memory block in bytes 883 * 884 * Free boot memory block previously allocated by memblock_alloc_xx() API. 885 * The freeing memory will not be released to the buddy allocator. 886 */ 887 void __init_memblock memblock_free(void *ptr, size_t size) 888 { 889 if (ptr) 890 memblock_phys_free(__pa(ptr), size); 891 } 892 893 /** 894 * memblock_phys_free - free boot memory block 895 * @base: phys starting address of the boot memory block 896 * @size: size of the boot memory block in bytes 897 * 898 * Free boot memory block previously allocated by memblock_phys_alloc_xx() API. 899 * The freeing memory will not be released to the buddy allocator. 900 */ 901 int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) 902 { 903 phys_addr_t end = base + size - 1; 904 905 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 906 &base, &end, (void *)_RET_IP_); 907 908 kmemleak_free_part_phys(base, size); 909 return memblock_remove_range(&memblock.reserved, base, size); 910 } 911 912 int __init_memblock __memblock_reserve(phys_addr_t base, phys_addr_t size, 913 int nid, enum memblock_flags flags) 914 { 915 phys_addr_t end = base + size - 1; 916 917 memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, 918 &base, &end, nid, flags, (void *)_RET_IP_); 919 920 return memblock_add_range(&memblock.reserved, base, size, nid, flags); 921 } 922 923 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 924 int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size) 925 { 926 phys_addr_t end = base + size - 1; 927 928 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 929 &base, &end, (void *)_RET_IP_); 930 931 return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0); 932 } 933 #endif 934 935 /** 936 * memblock_setclr_flag - set or clear flag for a memory region 937 * @type: memblock type to set/clear flag for 938 * @base: base address of the region 939 * @size: size of the region 940 * @set: set or clear the flag 941 * @flag: the flag to update 942 * 943 * This function isolates region [@base, @base + @size), and sets/clears flag 944 * 945 * Return: 0 on success, -errno on failure. 946 */ 947 static int __init_memblock memblock_setclr_flag(struct memblock_type *type, 948 phys_addr_t base, phys_addr_t size, int set, int flag) 949 { 950 int i, ret, start_rgn, end_rgn; 951 952 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 953 if (ret) 954 return ret; 955 956 for (i = start_rgn; i < end_rgn; i++) { 957 struct memblock_region *r = &type->regions[i]; 958 959 if (set) 960 r->flags |= flag; 961 else 962 r->flags &= ~flag; 963 } 964 965 memblock_merge_regions(type, start_rgn, end_rgn); 966 return 0; 967 } 968 969 /** 970 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG. 971 * @base: the base phys addr of the region 972 * @size: the size of the region 973 * 974 * Return: 0 on success, -errno on failure. 975 */ 976 int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size) 977 { 978 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_HOTPLUG); 979 } 980 981 /** 982 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region. 983 * @base: the base phys addr of the region 984 * @size: the size of the region 985 * 986 * Return: 0 on success, -errno on failure. 987 */ 988 int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size) 989 { 990 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_HOTPLUG); 991 } 992 993 /** 994 * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR. 995 * @base: the base phys addr of the region 996 * @size: the size of the region 997 * 998 * Return: 0 on success, -errno on failure. 999 */ 1000 int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) 1001 { 1002 if (!mirrored_kernelcore) 1003 return 0; 1004 1005 system_has_some_mirror = true; 1006 1007 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_MIRROR); 1008 } 1009 1010 /** 1011 * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP. 1012 * @base: the base phys addr of the region 1013 * @size: the size of the region 1014 * 1015 * The memory regions marked with %MEMBLOCK_NOMAP will not be added to the 1016 * direct mapping of the physical memory. These regions will still be 1017 * covered by the memory map. The struct page representing NOMAP memory 1018 * frames in the memory map will be PageReserved() 1019 * 1020 * Note: if the memory being marked %MEMBLOCK_NOMAP was allocated from 1021 * memblock, the caller must inform kmemleak to ignore that memory 1022 * 1023 * Return: 0 on success, -errno on failure. 1024 */ 1025 int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) 1026 { 1027 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_NOMAP); 1028 } 1029 1030 /** 1031 * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region. 1032 * @base: the base phys addr of the region 1033 * @size: the size of the region 1034 * 1035 * Return: 0 on success, -errno on failure. 1036 */ 1037 int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) 1038 { 1039 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_NOMAP); 1040 } 1041 1042 /** 1043 * memblock_reserved_mark_noinit - Mark a reserved memory region with flag 1044 * MEMBLOCK_RSRV_NOINIT which results in the struct pages not being initialized 1045 * for this region. 1046 * @base: the base phys addr of the region 1047 * @size: the size of the region 1048 * 1049 * struct pages will not be initialized for reserved memory regions marked with 1050 * %MEMBLOCK_RSRV_NOINIT. 1051 * 1052 * Return: 0 on success, -errno on failure. 1053 */ 1054 int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size) 1055 { 1056 return memblock_setclr_flag(&memblock.reserved, base, size, 1, 1057 MEMBLOCK_RSRV_NOINIT); 1058 } 1059 1060 static bool should_skip_region(struct memblock_type *type, 1061 struct memblock_region *m, 1062 int nid, int flags) 1063 { 1064 int m_nid = memblock_get_region_node(m); 1065 1066 /* we never skip regions when iterating memblock.reserved or physmem */ 1067 if (type != memblock_memory) 1068 return false; 1069 1070 /* only memory regions are associated with nodes, check it */ 1071 if (numa_valid_node(nid) && nid != m_nid) 1072 return true; 1073 1074 /* skip hotpluggable memory regions if needed */ 1075 if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && 1076 !(flags & MEMBLOCK_HOTPLUG)) 1077 return true; 1078 1079 /* if we want mirror memory skip non-mirror memory regions */ 1080 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) 1081 return true; 1082 1083 /* skip nomap memory unless we were asked for it explicitly */ 1084 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) 1085 return true; 1086 1087 /* skip driver-managed memory unless we were asked for it explicitly */ 1088 if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m)) 1089 return true; 1090 1091 return false; 1092 } 1093 1094 /** 1095 * __next_mem_range - next function for for_each_free_mem_range() etc. 1096 * @idx: pointer to u64 loop variable 1097 * @nid: node selector, %NUMA_NO_NODE for all nodes 1098 * @flags: pick from blocks based on memory attributes 1099 * @type_a: pointer to memblock_type from where the range is taken 1100 * @type_b: pointer to memblock_type which excludes memory from being taken 1101 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 1102 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 1103 * @out_nid: ptr to int for nid of the range, can be %NULL 1104 * 1105 * Find the first area from *@idx which matches @nid, fill the out 1106 * parameters, and update *@idx for the next iteration. The lower 32bit of 1107 * *@idx contains index into type_a and the upper 32bit indexes the 1108 * areas before each region in type_b. For example, if type_b regions 1109 * look like the following, 1110 * 1111 * 0:[0-16), 1:[32-48), 2:[128-130) 1112 * 1113 * The upper 32bit indexes the following regions. 1114 * 1115 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) 1116 * 1117 * As both region arrays are sorted, the function advances the two indices 1118 * in lockstep and returns each intersection. 1119 */ 1120 void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, 1121 struct memblock_type *type_a, 1122 struct memblock_type *type_b, phys_addr_t *out_start, 1123 phys_addr_t *out_end, int *out_nid) 1124 { 1125 int idx_a = *idx & 0xffffffff; 1126 int idx_b = *idx >> 32; 1127 1128 for (; idx_a < type_a->cnt; idx_a++) { 1129 struct memblock_region *m = &type_a->regions[idx_a]; 1130 1131 phys_addr_t m_start = m->base; 1132 phys_addr_t m_end = m->base + m->size; 1133 int m_nid = memblock_get_region_node(m); 1134 1135 if (should_skip_region(type_a, m, nid, flags)) 1136 continue; 1137 1138 if (!type_b) { 1139 if (out_start) 1140 *out_start = m_start; 1141 if (out_end) 1142 *out_end = m_end; 1143 if (out_nid) 1144 *out_nid = m_nid; 1145 idx_a++; 1146 *idx = (u32)idx_a | (u64)idx_b << 32; 1147 return; 1148 } 1149 1150 /* scan areas before each reservation */ 1151 for (; idx_b < type_b->cnt + 1; idx_b++) { 1152 struct memblock_region *r; 1153 phys_addr_t r_start; 1154 phys_addr_t r_end; 1155 1156 r = &type_b->regions[idx_b]; 1157 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1158 r_end = idx_b < type_b->cnt ? 1159 r->base : PHYS_ADDR_MAX; 1160 1161 /* 1162 * if idx_b advanced past idx_a, 1163 * break out to advance idx_a 1164 */ 1165 if (r_start >= m_end) 1166 break; 1167 /* if the two regions intersect, we're done */ 1168 if (m_start < r_end) { 1169 if (out_start) 1170 *out_start = 1171 max(m_start, r_start); 1172 if (out_end) 1173 *out_end = min(m_end, r_end); 1174 if (out_nid) 1175 *out_nid = m_nid; 1176 /* 1177 * The region which ends first is 1178 * advanced for the next iteration. 1179 */ 1180 if (m_end <= r_end) 1181 idx_a++; 1182 else 1183 idx_b++; 1184 *idx = (u32)idx_a | (u64)idx_b << 32; 1185 return; 1186 } 1187 } 1188 } 1189 1190 /* signal end of iteration */ 1191 *idx = ULLONG_MAX; 1192 } 1193 1194 /** 1195 * __next_mem_range_rev - generic next function for for_each_*_range_rev() 1196 * 1197 * @idx: pointer to u64 loop variable 1198 * @nid: node selector, %NUMA_NO_NODE for all nodes 1199 * @flags: pick from blocks based on memory attributes 1200 * @type_a: pointer to memblock_type from where the range is taken 1201 * @type_b: pointer to memblock_type which excludes memory from being taken 1202 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 1203 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 1204 * @out_nid: ptr to int for nid of the range, can be %NULL 1205 * 1206 * Finds the next range from type_a which is not marked as unsuitable 1207 * in type_b. 1208 * 1209 * Reverse of __next_mem_range(). 1210 */ 1211 void __init_memblock __next_mem_range_rev(u64 *idx, int nid, 1212 enum memblock_flags flags, 1213 struct memblock_type *type_a, 1214 struct memblock_type *type_b, 1215 phys_addr_t *out_start, 1216 phys_addr_t *out_end, int *out_nid) 1217 { 1218 int idx_a = *idx & 0xffffffff; 1219 int idx_b = *idx >> 32; 1220 1221 if (*idx == (u64)ULLONG_MAX) { 1222 idx_a = type_a->cnt - 1; 1223 if (type_b != NULL) 1224 idx_b = type_b->cnt; 1225 else 1226 idx_b = 0; 1227 } 1228 1229 for (; idx_a >= 0; idx_a--) { 1230 struct memblock_region *m = &type_a->regions[idx_a]; 1231 1232 phys_addr_t m_start = m->base; 1233 phys_addr_t m_end = m->base + m->size; 1234 int m_nid = memblock_get_region_node(m); 1235 1236 if (should_skip_region(type_a, m, nid, flags)) 1237 continue; 1238 1239 if (!type_b) { 1240 if (out_start) 1241 *out_start = m_start; 1242 if (out_end) 1243 *out_end = m_end; 1244 if (out_nid) 1245 *out_nid = m_nid; 1246 idx_a--; 1247 *idx = (u32)idx_a | (u64)idx_b << 32; 1248 return; 1249 } 1250 1251 /* scan areas before each reservation */ 1252 for (; idx_b >= 0; idx_b--) { 1253 struct memblock_region *r; 1254 phys_addr_t r_start; 1255 phys_addr_t r_end; 1256 1257 r = &type_b->regions[idx_b]; 1258 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1259 r_end = idx_b < type_b->cnt ? 1260 r->base : PHYS_ADDR_MAX; 1261 /* 1262 * if idx_b advanced past idx_a, 1263 * break out to advance idx_a 1264 */ 1265 1266 if (r_end <= m_start) 1267 break; 1268 /* if the two regions intersect, we're done */ 1269 if (m_end > r_start) { 1270 if (out_start) 1271 *out_start = max(m_start, r_start); 1272 if (out_end) 1273 *out_end = min(m_end, r_end); 1274 if (out_nid) 1275 *out_nid = m_nid; 1276 if (m_start >= r_start) 1277 idx_a--; 1278 else 1279 idx_b--; 1280 *idx = (u32)idx_a | (u64)idx_b << 32; 1281 return; 1282 } 1283 } 1284 } 1285 /* signal end of iteration */ 1286 *idx = ULLONG_MAX; 1287 } 1288 1289 /* 1290 * Common iterator interface used to define for_each_mem_pfn_range(). 1291 */ 1292 void __init_memblock __next_mem_pfn_range(int *idx, int nid, 1293 unsigned long *out_start_pfn, 1294 unsigned long *out_end_pfn, int *out_nid) 1295 { 1296 struct memblock_type *type = &memblock.memory; 1297 struct memblock_region *r; 1298 int r_nid; 1299 1300 while (++*idx < type->cnt) { 1301 r = &type->regions[*idx]; 1302 r_nid = memblock_get_region_node(r); 1303 1304 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) 1305 continue; 1306 if (!numa_valid_node(nid) || nid == r_nid) 1307 break; 1308 } 1309 if (*idx >= type->cnt) { 1310 *idx = -1; 1311 return; 1312 } 1313 1314 if (out_start_pfn) 1315 *out_start_pfn = PFN_UP(r->base); 1316 if (out_end_pfn) 1317 *out_end_pfn = PFN_DOWN(r->base + r->size); 1318 if (out_nid) 1319 *out_nid = r_nid; 1320 } 1321 1322 /** 1323 * memblock_set_node - set node ID on memblock regions 1324 * @base: base of area to set node ID for 1325 * @size: size of area to set node ID for 1326 * @type: memblock type to set node ID for 1327 * @nid: node ID to set 1328 * 1329 * Set the nid of memblock @type regions in [@base, @base + @size) to @nid. 1330 * Regions which cross the area boundaries are split as necessary. 1331 * 1332 * Return: 1333 * 0 on success, -errno on failure. 1334 */ 1335 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, 1336 struct memblock_type *type, int nid) 1337 { 1338 #ifdef CONFIG_NUMA 1339 int start_rgn, end_rgn; 1340 int i, ret; 1341 1342 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 1343 if (ret) 1344 return ret; 1345 1346 for (i = start_rgn; i < end_rgn; i++) 1347 memblock_set_region_node(&type->regions[i], nid); 1348 1349 memblock_merge_regions(type, start_rgn, end_rgn); 1350 #endif 1351 return 0; 1352 } 1353 1354 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 1355 /** 1356 * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone() 1357 * 1358 * @idx: pointer to u64 loop variable 1359 * @zone: zone in which all of the memory blocks reside 1360 * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL 1361 * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL 1362 * 1363 * This function is meant to be a zone/pfn specific wrapper for the 1364 * for_each_mem_range type iterators. Specifically they are used in the 1365 * deferred memory init routines and as such we were duplicating much of 1366 * this logic throughout the code. So instead of having it in multiple 1367 * locations it seemed like it would make more sense to centralize this to 1368 * one new iterator that does everything they need. 1369 */ 1370 void __init_memblock 1371 __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, 1372 unsigned long *out_spfn, unsigned long *out_epfn) 1373 { 1374 int zone_nid = zone_to_nid(zone); 1375 phys_addr_t spa, epa; 1376 1377 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1378 &memblock.memory, &memblock.reserved, 1379 &spa, &epa, NULL); 1380 1381 while (*idx != U64_MAX) { 1382 unsigned long epfn = PFN_DOWN(epa); 1383 unsigned long spfn = PFN_UP(spa); 1384 1385 /* 1386 * Verify the end is at least past the start of the zone and 1387 * that we have at least one PFN to initialize. 1388 */ 1389 if (zone->zone_start_pfn < epfn && spfn < epfn) { 1390 /* if we went too far just stop searching */ 1391 if (zone_end_pfn(zone) <= spfn) { 1392 *idx = U64_MAX; 1393 break; 1394 } 1395 1396 if (out_spfn) 1397 *out_spfn = max(zone->zone_start_pfn, spfn); 1398 if (out_epfn) 1399 *out_epfn = min(zone_end_pfn(zone), epfn); 1400 1401 return; 1402 } 1403 1404 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1405 &memblock.memory, &memblock.reserved, 1406 &spa, &epa, NULL); 1407 } 1408 1409 /* signal end of iteration */ 1410 if (out_spfn) 1411 *out_spfn = ULONG_MAX; 1412 if (out_epfn) 1413 *out_epfn = 0; 1414 } 1415 1416 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 1417 1418 /** 1419 * memblock_alloc_range_nid - allocate boot memory block 1420 * @size: size of memory block to be allocated in bytes 1421 * @align: alignment of the region and block's size 1422 * @start: the lower bound of the memory region to allocate (phys address) 1423 * @end: the upper bound of the memory region to allocate (phys address) 1424 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1425 * @exact_nid: control the allocation fall back to other nodes 1426 * 1427 * The allocation is performed from memory region limited by 1428 * memblock.current_limit if @end == %MEMBLOCK_ALLOC_ACCESSIBLE. 1429 * 1430 * If the specified node can not hold the requested memory and @exact_nid 1431 * is false, the allocation falls back to any node in the system. 1432 * 1433 * For systems with memory mirroring, the allocation is attempted first 1434 * from the regions with mirroring enabled and then retried from any 1435 * memory region. 1436 * 1437 * In addition, function using kmemleak_alloc_phys for allocated boot 1438 * memory block, it is never reported as leaks. 1439 * 1440 * Return: 1441 * Physical address of allocated memory block on success, %0 on failure. 1442 */ 1443 phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, 1444 phys_addr_t align, phys_addr_t start, 1445 phys_addr_t end, int nid, 1446 bool exact_nid) 1447 { 1448 enum memblock_flags flags = choose_memblock_flags(); 1449 phys_addr_t found; 1450 1451 /* 1452 * Detect any accidental use of these APIs after slab is ready, as at 1453 * this moment memblock may be deinitialized already and its 1454 * internal data may be destroyed (after execution of memblock_free_all) 1455 */ 1456 if (WARN_ON_ONCE(slab_is_available())) { 1457 void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid); 1458 1459 return vaddr ? virt_to_phys(vaddr) : 0; 1460 } 1461 1462 if (!align) { 1463 /* Can't use WARNs this early in boot on powerpc */ 1464 dump_stack(); 1465 align = SMP_CACHE_BYTES; 1466 } 1467 1468 again: 1469 found = memblock_find_in_range_node(size, align, start, end, nid, 1470 flags); 1471 if (found && !__memblock_reserve(found, size, nid, MEMBLOCK_RSRV_KERN)) 1472 goto done; 1473 1474 if (numa_valid_node(nid) && !exact_nid) { 1475 found = memblock_find_in_range_node(size, align, start, 1476 end, NUMA_NO_NODE, 1477 flags); 1478 if (found && !memblock_reserve_kern(found, size)) 1479 goto done; 1480 } 1481 1482 if (flags & MEMBLOCK_MIRROR) { 1483 flags &= ~MEMBLOCK_MIRROR; 1484 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", 1485 &size); 1486 goto again; 1487 } 1488 1489 return 0; 1490 1491 done: 1492 /* 1493 * Skip kmemleak for those places like kasan_init() and 1494 * early_pgtable_alloc() due to high volume. 1495 */ 1496 if (end != MEMBLOCK_ALLOC_NOLEAKTRACE) 1497 /* 1498 * Memblock allocated blocks are never reported as 1499 * leaks. This is because many of these blocks are 1500 * only referred via the physical address which is 1501 * not looked up by kmemleak. 1502 */ 1503 kmemleak_alloc_phys(found, size, 0); 1504 1505 /* 1506 * Some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP, 1507 * require memory to be accepted before it can be used by the 1508 * guest. 1509 * 1510 * Accept the memory of the allocated buffer. 1511 */ 1512 accept_memory(found, size); 1513 1514 return found; 1515 } 1516 1517 /** 1518 * memblock_phys_alloc_range - allocate a memory block inside specified range 1519 * @size: size of memory block to be allocated in bytes 1520 * @align: alignment of the region and block's size 1521 * @start: the lower bound of the memory region to allocate (physical address) 1522 * @end: the upper bound of the memory region to allocate (physical address) 1523 * 1524 * Allocate @size bytes in the between @start and @end. 1525 * 1526 * Return: physical address of the allocated memory block on success, 1527 * %0 on failure. 1528 */ 1529 phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size, 1530 phys_addr_t align, 1531 phys_addr_t start, 1532 phys_addr_t end) 1533 { 1534 memblock_dbg("%s: %llu bytes align=0x%llx from=%pa max_addr=%pa %pS\n", 1535 __func__, (u64)size, (u64)align, &start, &end, 1536 (void *)_RET_IP_); 1537 return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE, 1538 false); 1539 } 1540 1541 /** 1542 * memblock_phys_alloc_try_nid - allocate a memory block from specified NUMA node 1543 * @size: size of memory block to be allocated in bytes 1544 * @align: alignment of the region and block's size 1545 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1546 * 1547 * Allocates memory block from the specified NUMA node. If the node 1548 * has no available memory, attempts to allocated from any node in the 1549 * system. 1550 * 1551 * Return: physical address of the allocated memory block on success, 1552 * %0 on failure. 1553 */ 1554 phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) 1555 { 1556 return memblock_alloc_range_nid(size, align, 0, 1557 MEMBLOCK_ALLOC_ACCESSIBLE, nid, false); 1558 } 1559 1560 /** 1561 * memblock_alloc_internal - allocate boot memory block 1562 * @size: size of memory block to be allocated in bytes 1563 * @align: alignment of the region and block's size 1564 * @min_addr: the lower bound of the memory region to allocate (phys address) 1565 * @max_addr: the upper bound of the memory region to allocate (phys address) 1566 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1567 * @exact_nid: control the allocation fall back to other nodes 1568 * 1569 * Allocates memory block using memblock_alloc_range_nid() and 1570 * converts the returned physical address to virtual. 1571 * 1572 * The @min_addr limit is dropped if it can not be satisfied and the allocation 1573 * will fall back to memory below @min_addr. Other constraints, such 1574 * as node and mirrored memory will be handled again in 1575 * memblock_alloc_range_nid(). 1576 * 1577 * Return: 1578 * Virtual address of allocated memory block on success, NULL on failure. 1579 */ 1580 static void * __init memblock_alloc_internal( 1581 phys_addr_t size, phys_addr_t align, 1582 phys_addr_t min_addr, phys_addr_t max_addr, 1583 int nid, bool exact_nid) 1584 { 1585 phys_addr_t alloc; 1586 1587 1588 if (max_addr > memblock.current_limit) 1589 max_addr = memblock.current_limit; 1590 1591 alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid, 1592 exact_nid); 1593 1594 /* retry allocation without lower limit */ 1595 if (!alloc && min_addr) 1596 alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid, 1597 exact_nid); 1598 1599 if (!alloc) 1600 return NULL; 1601 1602 return phys_to_virt(alloc); 1603 } 1604 1605 /** 1606 * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node 1607 * without zeroing memory 1608 * @size: size of memory block to be allocated in bytes 1609 * @align: alignment of the region and block's size 1610 * @min_addr: the lower bound of the memory region from where the allocation 1611 * is preferred (phys address) 1612 * @max_addr: the upper bound of the memory region from where the allocation 1613 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1614 * allocate only from memory limited by memblock.current_limit value 1615 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1616 * 1617 * Public function, provides additional debug information (including caller 1618 * info), if enabled. Does not zero allocated memory. 1619 * 1620 * Return: 1621 * Virtual address of allocated memory block on success, NULL on failure. 1622 */ 1623 void * __init memblock_alloc_exact_nid_raw( 1624 phys_addr_t size, phys_addr_t align, 1625 phys_addr_t min_addr, phys_addr_t max_addr, 1626 int nid) 1627 { 1628 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1629 __func__, (u64)size, (u64)align, nid, &min_addr, 1630 &max_addr, (void *)_RET_IP_); 1631 1632 return memblock_alloc_internal(size, align, min_addr, max_addr, nid, 1633 true); 1634 } 1635 1636 /** 1637 * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing 1638 * memory and without panicking 1639 * @size: size of memory block to be allocated in bytes 1640 * @align: alignment of the region and block's size 1641 * @min_addr: the lower bound of the memory region from where the allocation 1642 * is preferred (phys address) 1643 * @max_addr: the upper bound of the memory region from where the allocation 1644 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1645 * allocate only from memory limited by memblock.current_limit value 1646 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1647 * 1648 * Public function, provides additional debug information (including caller 1649 * info), if enabled. Does not zero allocated memory, does not panic if request 1650 * cannot be satisfied. 1651 * 1652 * Return: 1653 * Virtual address of allocated memory block on success, NULL on failure. 1654 */ 1655 void * __init memblock_alloc_try_nid_raw( 1656 phys_addr_t size, phys_addr_t align, 1657 phys_addr_t min_addr, phys_addr_t max_addr, 1658 int nid) 1659 { 1660 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1661 __func__, (u64)size, (u64)align, nid, &min_addr, 1662 &max_addr, (void *)_RET_IP_); 1663 1664 return memblock_alloc_internal(size, align, min_addr, max_addr, nid, 1665 false); 1666 } 1667 1668 /** 1669 * memblock_alloc_try_nid - allocate boot memory block 1670 * @size: size of memory block to be allocated in bytes 1671 * @align: alignment of the region and block's size 1672 * @min_addr: the lower bound of the memory region from where the allocation 1673 * is preferred (phys address) 1674 * @max_addr: the upper bound of the memory region from where the allocation 1675 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1676 * allocate only from memory limited by memblock.current_limit value 1677 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1678 * 1679 * Public function, provides additional debug information (including caller 1680 * info), if enabled. This function zeroes the allocated memory. 1681 * 1682 * Return: 1683 * Virtual address of allocated memory block on success, NULL on failure. 1684 */ 1685 void * __init memblock_alloc_try_nid( 1686 phys_addr_t size, phys_addr_t align, 1687 phys_addr_t min_addr, phys_addr_t max_addr, 1688 int nid) 1689 { 1690 void *ptr; 1691 1692 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1693 __func__, (u64)size, (u64)align, nid, &min_addr, 1694 &max_addr, (void *)_RET_IP_); 1695 ptr = memblock_alloc_internal(size, align, 1696 min_addr, max_addr, nid, false); 1697 if (ptr) 1698 memset(ptr, 0, size); 1699 1700 return ptr; 1701 } 1702 1703 /** 1704 * __memblock_alloc_or_panic - Try to allocate memory and panic on failure 1705 * @size: size of memory block to be allocated in bytes 1706 * @align: alignment of the region and block's size 1707 * @func: caller func name 1708 * 1709 * This function attempts to allocate memory using memblock_alloc, 1710 * and in case of failure, it calls panic with the formatted message. 1711 * This function should not be used directly, please use the macro memblock_alloc_or_panic. 1712 */ 1713 void *__init __memblock_alloc_or_panic(phys_addr_t size, phys_addr_t align, 1714 const char *func) 1715 { 1716 void *addr = memblock_alloc(size, align); 1717 1718 if (unlikely(!addr)) 1719 panic("%s: Failed to allocate %pap bytes\n", func, &size); 1720 return addr; 1721 } 1722 1723 /** 1724 * memblock_free_late - free pages directly to buddy allocator 1725 * @base: phys starting address of the boot memory block 1726 * @size: size of the boot memory block in bytes 1727 * 1728 * This is only useful when the memblock allocator has already been torn 1729 * down, but we are still initializing the system. Pages are released directly 1730 * to the buddy allocator. 1731 */ 1732 void __init memblock_free_late(phys_addr_t base, phys_addr_t size) 1733 { 1734 phys_addr_t cursor, end; 1735 1736 end = base + size - 1; 1737 memblock_dbg("%s: [%pa-%pa] %pS\n", 1738 __func__, &base, &end, (void *)_RET_IP_); 1739 kmemleak_free_part_phys(base, size); 1740 cursor = PFN_UP(base); 1741 end = PFN_DOWN(base + size); 1742 1743 for (; cursor < end; cursor++) { 1744 memblock_free_pages(pfn_to_page(cursor), cursor, 0); 1745 totalram_pages_inc(); 1746 } 1747 } 1748 1749 /* 1750 * Remaining API functions 1751 */ 1752 1753 phys_addr_t __init_memblock memblock_phys_mem_size(void) 1754 { 1755 return memblock.memory.total_size; 1756 } 1757 1758 phys_addr_t __init_memblock memblock_reserved_size(void) 1759 { 1760 return memblock.reserved.total_size; 1761 } 1762 1763 phys_addr_t __init_memblock memblock_reserved_kern_size(phys_addr_t limit, int nid) 1764 { 1765 struct memblock_region *r; 1766 phys_addr_t total = 0; 1767 1768 for_each_reserved_mem_region(r) { 1769 phys_addr_t size = r->size; 1770 1771 if (r->base > limit) 1772 break; 1773 1774 if (r->base + r->size > limit) 1775 size = limit - r->base; 1776 1777 if (nid == memblock_get_region_node(r) || !numa_valid_node(nid)) 1778 if (r->flags & MEMBLOCK_RSRV_KERN) 1779 total += size; 1780 } 1781 1782 return total; 1783 } 1784 1785 /** 1786 * memblock_estimated_nr_free_pages - return estimated number of free pages 1787 * from memblock point of view 1788 * 1789 * During bootup, subsystems might need a rough estimate of the number of free 1790 * pages in the whole system, before precise numbers are available from the 1791 * buddy. Especially with CONFIG_DEFERRED_STRUCT_PAGE_INIT, the numbers 1792 * obtained from the buddy might be very imprecise during bootup. 1793 * 1794 * Return: 1795 * An estimated number of free pages from memblock point of view. 1796 */ 1797 unsigned long __init memblock_estimated_nr_free_pages(void) 1798 { 1799 return PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size()); 1800 } 1801 1802 /* lowest address */ 1803 phys_addr_t __init_memblock memblock_start_of_DRAM(void) 1804 { 1805 return memblock.memory.regions[0].base; 1806 } 1807 1808 phys_addr_t __init_memblock memblock_end_of_DRAM(void) 1809 { 1810 int idx = memblock.memory.cnt - 1; 1811 1812 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); 1813 } 1814 1815 static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) 1816 { 1817 phys_addr_t max_addr = PHYS_ADDR_MAX; 1818 struct memblock_region *r; 1819 1820 /* 1821 * translate the memory @limit size into the max address within one of 1822 * the memory memblock regions, if the @limit exceeds the total size 1823 * of those regions, max_addr will keep original value PHYS_ADDR_MAX 1824 */ 1825 for_each_mem_region(r) { 1826 if (limit <= r->size) { 1827 max_addr = r->base + limit; 1828 break; 1829 } 1830 limit -= r->size; 1831 } 1832 1833 return max_addr; 1834 } 1835 1836 void __init memblock_enforce_memory_limit(phys_addr_t limit) 1837 { 1838 phys_addr_t max_addr; 1839 1840 if (!limit) 1841 return; 1842 1843 max_addr = __find_max_addr(limit); 1844 1845 /* @limit exceeds the total size of the memory, do nothing */ 1846 if (max_addr == PHYS_ADDR_MAX) 1847 return; 1848 1849 /* truncate both memory and reserved regions */ 1850 memblock_remove_range(&memblock.memory, max_addr, 1851 PHYS_ADDR_MAX); 1852 memblock_remove_range(&memblock.reserved, max_addr, 1853 PHYS_ADDR_MAX); 1854 } 1855 1856 void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) 1857 { 1858 int start_rgn, end_rgn; 1859 int i, ret; 1860 1861 if (!size) 1862 return; 1863 1864 if (!memblock_memory->total_size) { 1865 pr_warn("%s: No memory registered yet\n", __func__); 1866 return; 1867 } 1868 1869 ret = memblock_isolate_range(&memblock.memory, base, size, 1870 &start_rgn, &end_rgn); 1871 if (ret) 1872 return; 1873 1874 /* remove all the MAP regions */ 1875 for (i = memblock.memory.cnt - 1; i >= end_rgn; i--) 1876 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1877 memblock_remove_region(&memblock.memory, i); 1878 1879 for (i = start_rgn - 1; i >= 0; i--) 1880 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1881 memblock_remove_region(&memblock.memory, i); 1882 1883 /* truncate the reserved regions */ 1884 memblock_remove_range(&memblock.reserved, 0, base); 1885 memblock_remove_range(&memblock.reserved, 1886 base + size, PHYS_ADDR_MAX); 1887 } 1888 1889 void __init memblock_mem_limit_remove_map(phys_addr_t limit) 1890 { 1891 phys_addr_t max_addr; 1892 1893 if (!limit) 1894 return; 1895 1896 max_addr = __find_max_addr(limit); 1897 1898 /* @limit exceeds the total size of the memory, do nothing */ 1899 if (max_addr == PHYS_ADDR_MAX) 1900 return; 1901 1902 memblock_cap_memory_range(0, max_addr); 1903 } 1904 1905 static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) 1906 { 1907 unsigned int left = 0, right = type->cnt; 1908 1909 do { 1910 unsigned int mid = (right + left) / 2; 1911 1912 if (addr < type->regions[mid].base) 1913 right = mid; 1914 else if (addr >= (type->regions[mid].base + 1915 type->regions[mid].size)) 1916 left = mid + 1; 1917 else 1918 return mid; 1919 } while (left < right); 1920 return -1; 1921 } 1922 1923 bool __init_memblock memblock_is_reserved(phys_addr_t addr) 1924 { 1925 return memblock_search(&memblock.reserved, addr) != -1; 1926 } 1927 1928 bool __init_memblock memblock_is_memory(phys_addr_t addr) 1929 { 1930 return memblock_search(&memblock.memory, addr) != -1; 1931 } 1932 1933 bool __init_memblock memblock_is_map_memory(phys_addr_t addr) 1934 { 1935 int i = memblock_search(&memblock.memory, addr); 1936 1937 if (i == -1) 1938 return false; 1939 return !memblock_is_nomap(&memblock.memory.regions[i]); 1940 } 1941 1942 int __init_memblock memblock_search_pfn_nid(unsigned long pfn, 1943 unsigned long *start_pfn, unsigned long *end_pfn) 1944 { 1945 struct memblock_type *type = &memblock.memory; 1946 int mid = memblock_search(type, PFN_PHYS(pfn)); 1947 1948 if (mid == -1) 1949 return NUMA_NO_NODE; 1950 1951 *start_pfn = PFN_DOWN(type->regions[mid].base); 1952 *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); 1953 1954 return memblock_get_region_node(&type->regions[mid]); 1955 } 1956 1957 /** 1958 * memblock_is_region_memory - check if a region is a subset of memory 1959 * @base: base of region to check 1960 * @size: size of region to check 1961 * 1962 * Check if the region [@base, @base + @size) is a subset of a memory block. 1963 * 1964 * Return: 1965 * 0 if false, non-zero if true 1966 */ 1967 bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) 1968 { 1969 int idx = memblock_search(&memblock.memory, base); 1970 phys_addr_t end = base + memblock_cap_size(base, &size); 1971 1972 if (idx == -1) 1973 return false; 1974 return (memblock.memory.regions[idx].base + 1975 memblock.memory.regions[idx].size) >= end; 1976 } 1977 1978 /** 1979 * memblock_is_region_reserved - check if a region intersects reserved memory 1980 * @base: base of region to check 1981 * @size: size of region to check 1982 * 1983 * Check if the region [@base, @base + @size) intersects a reserved 1984 * memory block. 1985 * 1986 * Return: 1987 * True if they intersect, false if not. 1988 */ 1989 bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) 1990 { 1991 return memblock_overlaps_region(&memblock.reserved, base, size); 1992 } 1993 1994 void __init_memblock memblock_trim_memory(phys_addr_t align) 1995 { 1996 phys_addr_t start, end, orig_start, orig_end; 1997 struct memblock_region *r; 1998 1999 for_each_mem_region(r) { 2000 orig_start = r->base; 2001 orig_end = r->base + r->size; 2002 start = round_up(orig_start, align); 2003 end = round_down(orig_end, align); 2004 2005 if (start == orig_start && end == orig_end) 2006 continue; 2007 2008 if (start < end) { 2009 r->base = start; 2010 r->size = end - start; 2011 } else { 2012 memblock_remove_region(&memblock.memory, 2013 r - memblock.memory.regions); 2014 r--; 2015 } 2016 } 2017 } 2018 2019 void __init_memblock memblock_set_current_limit(phys_addr_t limit) 2020 { 2021 memblock.current_limit = limit; 2022 } 2023 2024 phys_addr_t __init_memblock memblock_get_current_limit(void) 2025 { 2026 return memblock.current_limit; 2027 } 2028 2029 static void __init_memblock memblock_dump(struct memblock_type *type) 2030 { 2031 phys_addr_t base, end, size; 2032 enum memblock_flags flags; 2033 int idx; 2034 struct memblock_region *rgn; 2035 2036 pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt); 2037 2038 for_each_memblock_type(idx, type, rgn) { 2039 char nid_buf[32] = ""; 2040 2041 base = rgn->base; 2042 size = rgn->size; 2043 end = base + size - 1; 2044 flags = rgn->flags; 2045 #ifdef CONFIG_NUMA 2046 if (numa_valid_node(memblock_get_region_node(rgn))) 2047 snprintf(nid_buf, sizeof(nid_buf), " on node %d", 2048 memblock_get_region_node(rgn)); 2049 #endif 2050 pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n", 2051 type->name, idx, &base, &end, &size, nid_buf, flags); 2052 } 2053 } 2054 2055 static void __init_memblock __memblock_dump_all(void) 2056 { 2057 pr_info("MEMBLOCK configuration:\n"); 2058 pr_info(" memory size = %pa reserved size = %pa\n", 2059 &memblock.memory.total_size, 2060 &memblock.reserved.total_size); 2061 2062 memblock_dump(&memblock.memory); 2063 memblock_dump(&memblock.reserved); 2064 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 2065 memblock_dump(&physmem); 2066 #endif 2067 } 2068 2069 void __init_memblock memblock_dump_all(void) 2070 { 2071 if (memblock_debug) 2072 __memblock_dump_all(); 2073 } 2074 2075 void __init memblock_allow_resize(void) 2076 { 2077 memblock_can_resize = 1; 2078 } 2079 2080 static int __init early_memblock(char *p) 2081 { 2082 if (p && strstr(p, "debug")) 2083 memblock_debug = 1; 2084 return 0; 2085 } 2086 early_param("memblock", early_memblock); 2087 2088 static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) 2089 { 2090 struct page *start_pg, *end_pg; 2091 phys_addr_t pg, pgend; 2092 2093 /* 2094 * Convert start_pfn/end_pfn to a struct page pointer. 2095 */ 2096 start_pg = pfn_to_page(start_pfn - 1) + 1; 2097 end_pg = pfn_to_page(end_pfn - 1) + 1; 2098 2099 /* 2100 * Convert to physical addresses, and round start upwards and end 2101 * downwards. 2102 */ 2103 pg = PAGE_ALIGN(__pa(start_pg)); 2104 pgend = PAGE_ALIGN_DOWN(__pa(end_pg)); 2105 2106 /* 2107 * If there are free pages between these, free the section of the 2108 * memmap array. 2109 */ 2110 if (pg < pgend) 2111 memblock_phys_free(pg, pgend - pg); 2112 } 2113 2114 /* 2115 * The mem_map array can get very big. Free the unused area of the memory map. 2116 */ 2117 static void __init free_unused_memmap(void) 2118 { 2119 unsigned long start, end, prev_end = 0; 2120 int i; 2121 2122 if (!IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) || 2123 IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) 2124 return; 2125 2126 /* 2127 * This relies on each bank being in address order. 2128 * The banks are sorted previously in bootmem_init(). 2129 */ 2130 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { 2131 #ifdef CONFIG_SPARSEMEM 2132 /* 2133 * Take care not to free memmap entries that don't exist 2134 * due to SPARSEMEM sections which aren't present. 2135 */ 2136 start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); 2137 #endif 2138 /* 2139 * Align down here since many operations in VM subsystem 2140 * presume that there are no holes in the memory map inside 2141 * a pageblock 2142 */ 2143 start = pageblock_start_pfn(start); 2144 2145 /* 2146 * If we had a previous bank, and there is a space 2147 * between the current bank and the previous, free it. 2148 */ 2149 if (prev_end && prev_end < start) 2150 free_memmap(prev_end, start); 2151 2152 /* 2153 * Align up here since many operations in VM subsystem 2154 * presume that there are no holes in the memory map inside 2155 * a pageblock 2156 */ 2157 prev_end = pageblock_align(end); 2158 } 2159 2160 #ifdef CONFIG_SPARSEMEM 2161 if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) { 2162 prev_end = pageblock_align(end); 2163 free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); 2164 } 2165 #endif 2166 } 2167 2168 static void __init __free_pages_memory(unsigned long start, unsigned long end) 2169 { 2170 int order; 2171 2172 while (start < end) { 2173 /* 2174 * Free the pages in the largest chunks alignment allows. 2175 * 2176 * __ffs() behaviour is undefined for 0. start == 0 is 2177 * MAX_PAGE_ORDER-aligned, set order to MAX_PAGE_ORDER for 2178 * the case. 2179 */ 2180 if (start) 2181 order = min_t(int, MAX_PAGE_ORDER, __ffs(start)); 2182 else 2183 order = MAX_PAGE_ORDER; 2184 2185 while (start + (1UL << order) > end) 2186 order--; 2187 2188 memblock_free_pages(pfn_to_page(start), start, order); 2189 2190 start += (1UL << order); 2191 } 2192 } 2193 2194 static unsigned long __init __free_memory_core(phys_addr_t start, 2195 phys_addr_t end) 2196 { 2197 unsigned long start_pfn = PFN_UP(start); 2198 unsigned long end_pfn = PFN_DOWN(end); 2199 2200 if (!IS_ENABLED(CONFIG_HIGHMEM) && end_pfn > max_low_pfn) 2201 end_pfn = max_low_pfn; 2202 2203 if (start_pfn >= end_pfn) 2204 return 0; 2205 2206 __free_pages_memory(start_pfn, end_pfn); 2207 2208 return end_pfn - start_pfn; 2209 } 2210 2211 static void __init memmap_init_reserved_pages(void) 2212 { 2213 struct memblock_region *region; 2214 phys_addr_t start, end; 2215 int nid; 2216 unsigned long max_reserved; 2217 2218 /* 2219 * set nid on all reserved pages and also treat struct 2220 * pages for the NOMAP regions as PageReserved 2221 */ 2222 repeat: 2223 max_reserved = memblock.reserved.max; 2224 for_each_mem_region(region) { 2225 nid = memblock_get_region_node(region); 2226 start = region->base; 2227 end = start + region->size; 2228 2229 if (memblock_is_nomap(region)) 2230 reserve_bootmem_region(start, end, nid); 2231 2232 memblock_set_node(start, region->size, &memblock.reserved, nid); 2233 } 2234 /* 2235 * 'max' is changed means memblock.reserved has been doubled its 2236 * array, which may result a new reserved region before current 2237 * 'start'. Now we should repeat the procedure to set its node id. 2238 */ 2239 if (max_reserved != memblock.reserved.max) 2240 goto repeat; 2241 2242 /* 2243 * initialize struct pages for reserved regions that don't have 2244 * the MEMBLOCK_RSRV_NOINIT flag set 2245 */ 2246 for_each_reserved_mem_region(region) { 2247 if (!memblock_is_reserved_noinit(region)) { 2248 nid = memblock_get_region_node(region); 2249 start = region->base; 2250 end = start + region->size; 2251 2252 if (!numa_valid_node(nid)) 2253 nid = early_pfn_to_nid(PFN_DOWN(start)); 2254 2255 reserve_bootmem_region(start, end, nid); 2256 } 2257 } 2258 } 2259 2260 static unsigned long __init free_low_memory_core_early(void) 2261 { 2262 unsigned long count = 0; 2263 phys_addr_t start, end; 2264 u64 i; 2265 2266 memblock_clear_hotplug(0, -1); 2267 2268 memmap_init_reserved_pages(); 2269 2270 /* 2271 * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id 2272 * because in some case like Node0 doesn't have RAM installed 2273 * low ram will be on Node1 2274 */ 2275 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, 2276 NULL) 2277 count += __free_memory_core(start, end); 2278 2279 return count; 2280 } 2281 2282 static int reset_managed_pages_done __initdata; 2283 2284 static void __init reset_node_managed_pages(pg_data_t *pgdat) 2285 { 2286 struct zone *z; 2287 2288 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 2289 atomic_long_set(&z->managed_pages, 0); 2290 } 2291 2292 void __init reset_all_zones_managed_pages(void) 2293 { 2294 struct pglist_data *pgdat; 2295 2296 if (reset_managed_pages_done) 2297 return; 2298 2299 for_each_online_pgdat(pgdat) 2300 reset_node_managed_pages(pgdat); 2301 2302 reset_managed_pages_done = 1; 2303 } 2304 2305 /** 2306 * memblock_free_all - release free pages to the buddy allocator 2307 */ 2308 void __init memblock_free_all(void) 2309 { 2310 unsigned long pages; 2311 2312 free_unused_memmap(); 2313 reset_all_zones_managed_pages(); 2314 2315 pages = free_low_memory_core_early(); 2316 totalram_pages_add(pages); 2317 } 2318 2319 /* Keep a table to reserve named memory */ 2320 #define RESERVE_MEM_MAX_ENTRIES 8 2321 #define RESERVE_MEM_NAME_SIZE 16 2322 struct reserve_mem_table { 2323 char name[RESERVE_MEM_NAME_SIZE]; 2324 phys_addr_t start; 2325 phys_addr_t size; 2326 }; 2327 static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES]; 2328 static int reserved_mem_count; 2329 static DEFINE_MUTEX(reserve_mem_lock); 2330 2331 /* Add wildcard region with a lookup name */ 2332 static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size, 2333 const char *name) 2334 { 2335 struct reserve_mem_table *map; 2336 2337 map = &reserved_mem_table[reserved_mem_count++]; 2338 map->start = start; 2339 map->size = size; 2340 strscpy(map->name, name); 2341 } 2342 2343 static struct reserve_mem_table *reserve_mem_find_by_name_nolock(const char *name) 2344 { 2345 struct reserve_mem_table *map; 2346 int i; 2347 2348 for (i = 0; i < reserved_mem_count; i++) { 2349 map = &reserved_mem_table[i]; 2350 if (!map->size) 2351 continue; 2352 if (strcmp(name, map->name) == 0) 2353 return map; 2354 } 2355 return NULL; 2356 } 2357 2358 /** 2359 * reserve_mem_find_by_name - Find reserved memory region with a given name 2360 * @name: The name that is attached to a reserved memory region 2361 * @start: If found, holds the start address 2362 * @size: If found, holds the size of the address. 2363 * 2364 * @start and @size are only updated if @name is found. 2365 * 2366 * Returns: 1 if found or 0 if not found. 2367 */ 2368 int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size) 2369 { 2370 struct reserve_mem_table *map; 2371 2372 guard(mutex)(&reserve_mem_lock); 2373 map = reserve_mem_find_by_name_nolock(name); 2374 if (!map) 2375 return 0; 2376 2377 *start = map->start; 2378 *size = map->size; 2379 return 1; 2380 } 2381 EXPORT_SYMBOL_GPL(reserve_mem_find_by_name); 2382 2383 /** 2384 * reserve_mem_release_by_name - Release reserved memory region with a given name 2385 * @name: The name that is attatched to a reserved memory region 2386 * 2387 * Forcibly release the pages in the reserved memory region so that those memory 2388 * can be used as free memory. After released the reserved region size becomes 0. 2389 * 2390 * Returns: 1 if released or 0 if not found. 2391 */ 2392 int reserve_mem_release_by_name(const char *name) 2393 { 2394 char buf[RESERVE_MEM_NAME_SIZE + 12]; 2395 struct reserve_mem_table *map; 2396 void *start, *end; 2397 2398 guard(mutex)(&reserve_mem_lock); 2399 map = reserve_mem_find_by_name_nolock(name); 2400 if (!map) 2401 return 0; 2402 2403 start = phys_to_virt(map->start); 2404 end = start + map->size - 1; 2405 snprintf(buf, sizeof(buf), "reserve_mem:%s", name); 2406 free_reserved_area(start, end, 0, buf); 2407 map->size = 0; 2408 2409 return 1; 2410 } 2411 2412 /* 2413 * Parse reserve_mem=nn:align:name 2414 */ 2415 static int __init reserve_mem(char *p) 2416 { 2417 phys_addr_t start, size, align, tmp; 2418 char *name; 2419 char *oldp; 2420 int len; 2421 2422 if (!p) 2423 return -EINVAL; 2424 2425 /* Check if there's room for more reserved memory */ 2426 if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES) 2427 return -EBUSY; 2428 2429 oldp = p; 2430 size = memparse(p, &p); 2431 if (!size || p == oldp) 2432 return -EINVAL; 2433 2434 if (*p != ':') 2435 return -EINVAL; 2436 2437 align = memparse(p+1, &p); 2438 if (*p != ':') 2439 return -EINVAL; 2440 2441 /* 2442 * memblock_phys_alloc() doesn't like a zero size align, 2443 * but it is OK for this command to have it. 2444 */ 2445 if (align < SMP_CACHE_BYTES) 2446 align = SMP_CACHE_BYTES; 2447 2448 name = p + 1; 2449 len = strlen(name); 2450 2451 /* name needs to have length but not too big */ 2452 if (!len || len >= RESERVE_MEM_NAME_SIZE) 2453 return -EINVAL; 2454 2455 /* Make sure that name has text */ 2456 for (p = name; *p; p++) { 2457 if (!isspace(*p)) 2458 break; 2459 } 2460 if (!*p) 2461 return -EINVAL; 2462 2463 /* Make sure the name is not already used */ 2464 if (reserve_mem_find_by_name(name, &start, &tmp)) 2465 return -EBUSY; 2466 2467 start = memblock_phys_alloc(size, align); 2468 if (!start) 2469 return -ENOMEM; 2470 2471 reserved_mem_add(start, size, name); 2472 2473 return 1; 2474 } 2475 __setup("reserve_mem=", reserve_mem); 2476 2477 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK) 2478 static const char * const flagname[] = { 2479 [ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG", 2480 [ilog2(MEMBLOCK_MIRROR)] = "MIRROR", 2481 [ilog2(MEMBLOCK_NOMAP)] = "NOMAP", 2482 [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG", 2483 [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT", 2484 [ilog2(MEMBLOCK_RSRV_KERN)] = "RSV_KERN", 2485 }; 2486 2487 static int memblock_debug_show(struct seq_file *m, void *private) 2488 { 2489 struct memblock_type *type = m->private; 2490 struct memblock_region *reg; 2491 int i, j, nid; 2492 unsigned int count = ARRAY_SIZE(flagname); 2493 phys_addr_t end; 2494 2495 for (i = 0; i < type->cnt; i++) { 2496 reg = &type->regions[i]; 2497 end = reg->base + reg->size - 1; 2498 nid = memblock_get_region_node(reg); 2499 2500 seq_printf(m, "%4d: ", i); 2501 seq_printf(m, "%pa..%pa ", ®->base, &end); 2502 if (numa_valid_node(nid)) 2503 seq_printf(m, "%4d ", nid); 2504 else 2505 seq_printf(m, "%4c ", 'x'); 2506 if (reg->flags) { 2507 for (j = 0; j < count; j++) { 2508 if (reg->flags & (1U << j)) { 2509 seq_printf(m, "%s\n", flagname[j]); 2510 break; 2511 } 2512 } 2513 if (j == count) 2514 seq_printf(m, "%s\n", "UNKNOWN"); 2515 } else { 2516 seq_printf(m, "%s\n", "NONE"); 2517 } 2518 } 2519 return 0; 2520 } 2521 DEFINE_SHOW_ATTRIBUTE(memblock_debug); 2522 2523 static int __init memblock_init_debugfs(void) 2524 { 2525 struct dentry *root = debugfs_create_dir("memblock", NULL); 2526 2527 debugfs_create_file("memory", 0444, root, 2528 &memblock.memory, &memblock_debug_fops); 2529 debugfs_create_file("reserved", 0444, root, 2530 &memblock.reserved, &memblock_debug_fops); 2531 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 2532 debugfs_create_file("physmem", 0444, root, &physmem, 2533 &memblock_debug_fops); 2534 #endif 2535 2536 return 0; 2537 } 2538 __initcall(memblock_init_debugfs); 2539 2540 #endif /* CONFIG_DEBUG_FS */ 2541