1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Procedures for maintaining information about logical memory blocks. 4 * 5 * Peter Bergner, IBM Corp. June 2001. 6 * Copyright (C) 2001 Peter Bergner. 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/slab.h> 11 #include <linux/init.h> 12 #include <linux/bitops.h> 13 #include <linux/poison.h> 14 #include <linux/pfn.h> 15 #include <linux/debugfs.h> 16 #include <linux/kmemleak.h> 17 #include <linux/seq_file.h> 18 #include <linux/memblock.h> 19 20 #include <asm/sections.h> 21 #include <linux/io.h> 22 23 #include "internal.h" 24 25 #define INIT_MEMBLOCK_REGIONS 128 26 #define INIT_PHYSMEM_REGIONS 4 27 28 #ifndef INIT_MEMBLOCK_RESERVED_REGIONS 29 # define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS 30 #endif 31 32 #ifndef INIT_MEMBLOCK_MEMORY_REGIONS 33 #define INIT_MEMBLOCK_MEMORY_REGIONS INIT_MEMBLOCK_REGIONS 34 #endif 35 36 /** 37 * DOC: memblock overview 38 * 39 * Memblock is a method of managing memory regions during the early 40 * boot period when the usual kernel memory allocators are not up and 41 * running. 42 * 43 * Memblock views the system memory as collections of contiguous 44 * regions. There are several types of these collections: 45 * 46 * * ``memory`` - describes the physical memory available to the 47 * kernel; this may differ from the actual physical memory installed 48 * in the system, for instance when the memory is restricted with 49 * ``mem=`` command line parameter 50 * * ``reserved`` - describes the regions that were allocated 51 * * ``physmem`` - describes the actual physical memory available during 52 * boot regardless of the possible restrictions and memory hot(un)plug; 53 * the ``physmem`` type is only available on some architectures. 54 * 55 * Each region is represented by struct memblock_region that 56 * defines the region extents, its attributes and NUMA node id on NUMA 57 * systems. Every memory type is described by the struct memblock_type 58 * which contains an array of memory regions along with 59 * the allocator metadata. The "memory" and "reserved" types are nicely 60 * wrapped with struct memblock. This structure is statically 61 * initialized at build time. The region arrays are initially sized to 62 * %INIT_MEMBLOCK_MEMORY_REGIONS for "memory" and 63 * %INIT_MEMBLOCK_RESERVED_REGIONS for "reserved". The region array 64 * for "physmem" is initially sized to %INIT_PHYSMEM_REGIONS. 65 * The memblock_allow_resize() enables automatic resizing of the region 66 * arrays during addition of new regions. This feature should be used 67 * with care so that memory allocated for the region array will not 68 * overlap with areas that should be reserved, for example initrd. 69 * 70 * The early architecture setup should tell memblock what the physical 71 * memory layout is by using memblock_add() or memblock_add_node() 72 * functions. The first function does not assign the region to a NUMA 73 * node and it is appropriate for UMA systems. Yet, it is possible to 74 * use it on NUMA systems as well and assign the region to a NUMA node 75 * later in the setup process using memblock_set_node(). The 76 * memblock_add_node() performs such an assignment directly. 77 * 78 * Once memblock is setup the memory can be allocated using one of the 79 * API variants: 80 * 81 * * memblock_phys_alloc*() - these functions return the **physical** 82 * address of the allocated memory 83 * * memblock_alloc*() - these functions return the **virtual** address 84 * of the allocated memory. 85 * 86 * Note, that both API variants use implicit assumptions about allowed 87 * memory ranges and the fallback methods. Consult the documentation 88 * of memblock_alloc_internal() and memblock_alloc_range_nid() 89 * functions for more elaborate description. 90 * 91 * As the system boot progresses, the architecture specific mem_init() 92 * function frees all the memory to the buddy page allocator. 93 * 94 * Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the 95 * memblock data structures (except "physmem") will be discarded after the 96 * system initialization completes. 97 */ 98 99 #ifndef CONFIG_NUMA 100 struct pglist_data __refdata contig_page_data; 101 EXPORT_SYMBOL(contig_page_data); 102 #endif 103 104 unsigned long max_low_pfn; 105 unsigned long min_low_pfn; 106 unsigned long max_pfn; 107 unsigned long long max_possible_pfn; 108 109 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock; 110 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; 111 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 112 static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS]; 113 #endif 114 115 struct memblock memblock __initdata_memblock = { 116 .memory.regions = memblock_memory_init_regions, 117 .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS, 118 .memory.name = "memory", 119 120 .reserved.regions = memblock_reserved_init_regions, 121 .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, 122 .reserved.name = "reserved", 123 124 .bottom_up = false, 125 .current_limit = MEMBLOCK_ALLOC_ANYWHERE, 126 }; 127 128 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 129 struct memblock_type physmem = { 130 .regions = memblock_physmem_init_regions, 131 .max = INIT_PHYSMEM_REGIONS, 132 .name = "physmem", 133 }; 134 #endif 135 136 /* 137 * keep a pointer to &memblock.memory in the text section to use it in 138 * __next_mem_range() and its helpers. 139 * For architectures that do not keep memblock data after init, this 140 * pointer will be reset to NULL at memblock_discard() 141 */ 142 static __refdata struct memblock_type *memblock_memory = &memblock.memory; 143 144 #define for_each_memblock_type(i, memblock_type, rgn) \ 145 for (i = 0, rgn = &memblock_type->regions[0]; \ 146 i < memblock_type->cnt; \ 147 i++, rgn = &memblock_type->regions[i]) 148 149 #define memblock_dbg(fmt, ...) \ 150 do { \ 151 if (memblock_debug) \ 152 pr_info(fmt, ##__VA_ARGS__); \ 153 } while (0) 154 155 static int memblock_debug __initdata_memblock; 156 static bool system_has_some_mirror __initdata_memblock; 157 static int memblock_can_resize __initdata_memblock; 158 static int memblock_memory_in_slab __initdata_memblock; 159 static int memblock_reserved_in_slab __initdata_memblock; 160 161 bool __init_memblock memblock_has_mirror(void) 162 { 163 return system_has_some_mirror; 164 } 165 166 static enum memblock_flags __init_memblock choose_memblock_flags(void) 167 { 168 return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE; 169 } 170 171 /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ 172 static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) 173 { 174 return *size = min(*size, PHYS_ADDR_MAX - base); 175 } 176 177 /* 178 * Address comparison utilities 179 */ 180 unsigned long __init_memblock 181 memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, 182 phys_addr_t size2) 183 { 184 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); 185 } 186 187 bool __init_memblock memblock_overlaps_region(struct memblock_type *type, 188 phys_addr_t base, phys_addr_t size) 189 { 190 unsigned long i; 191 192 memblock_cap_size(base, &size); 193 194 for (i = 0; i < type->cnt; i++) 195 if (memblock_addrs_overlap(base, size, type->regions[i].base, 196 type->regions[i].size)) 197 return true; 198 return false; 199 } 200 201 /** 202 * __memblock_find_range_bottom_up - find free area utility in bottom-up 203 * @start: start of candidate range 204 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 205 * %MEMBLOCK_ALLOC_ACCESSIBLE 206 * @size: size of free area to find 207 * @align: alignment of free area to find 208 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 209 * @flags: pick from blocks based on memory attributes 210 * 211 * Utility called from memblock_find_in_range_node(), find free area bottom-up. 212 * 213 * Return: 214 * Found address on success, 0 on failure. 215 */ 216 static phys_addr_t __init_memblock 217 __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, 218 phys_addr_t size, phys_addr_t align, int nid, 219 enum memblock_flags flags) 220 { 221 phys_addr_t this_start, this_end, cand; 222 u64 i; 223 224 for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) { 225 this_start = clamp(this_start, start, end); 226 this_end = clamp(this_end, start, end); 227 228 cand = round_up(this_start, align); 229 if (cand < this_end && this_end - cand >= size) 230 return cand; 231 } 232 233 return 0; 234 } 235 236 /** 237 * __memblock_find_range_top_down - find free area utility, in top-down 238 * @start: start of candidate range 239 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 240 * %MEMBLOCK_ALLOC_ACCESSIBLE 241 * @size: size of free area to find 242 * @align: alignment of free area to find 243 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 244 * @flags: pick from blocks based on memory attributes 245 * 246 * Utility called from memblock_find_in_range_node(), find free area top-down. 247 * 248 * Return: 249 * Found address on success, 0 on failure. 250 */ 251 static phys_addr_t __init_memblock 252 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, 253 phys_addr_t size, phys_addr_t align, int nid, 254 enum memblock_flags flags) 255 { 256 phys_addr_t this_start, this_end, cand; 257 u64 i; 258 259 for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end, 260 NULL) { 261 this_start = clamp(this_start, start, end); 262 this_end = clamp(this_end, start, end); 263 264 if (this_end < size) 265 continue; 266 267 cand = round_down(this_end - size, align); 268 if (cand >= this_start) 269 return cand; 270 } 271 272 return 0; 273 } 274 275 /** 276 * memblock_find_in_range_node - find free area in given range and node 277 * @size: size of free area to find 278 * @align: alignment of free area to find 279 * @start: start of candidate range 280 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 281 * %MEMBLOCK_ALLOC_ACCESSIBLE 282 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 283 * @flags: pick from blocks based on memory attributes 284 * 285 * Find @size free area aligned to @align in the specified range and node. 286 * 287 * Return: 288 * Found address on success, 0 on failure. 289 */ 290 static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, 291 phys_addr_t align, phys_addr_t start, 292 phys_addr_t end, int nid, 293 enum memblock_flags flags) 294 { 295 /* pump up @end */ 296 if (end == MEMBLOCK_ALLOC_ACCESSIBLE || 297 end == MEMBLOCK_ALLOC_NOLEAKTRACE) 298 end = memblock.current_limit; 299 300 /* avoid allocating the first page */ 301 start = max_t(phys_addr_t, start, PAGE_SIZE); 302 end = max(start, end); 303 304 if (memblock_bottom_up()) 305 return __memblock_find_range_bottom_up(start, end, size, align, 306 nid, flags); 307 else 308 return __memblock_find_range_top_down(start, end, size, align, 309 nid, flags); 310 } 311 312 /** 313 * memblock_find_in_range - find free area in given range 314 * @start: start of candidate range 315 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 316 * %MEMBLOCK_ALLOC_ACCESSIBLE 317 * @size: size of free area to find 318 * @align: alignment of free area to find 319 * 320 * Find @size free area aligned to @align in the specified range. 321 * 322 * Return: 323 * Found address on success, 0 on failure. 324 */ 325 static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, 326 phys_addr_t end, phys_addr_t size, 327 phys_addr_t align) 328 { 329 phys_addr_t ret; 330 enum memblock_flags flags = choose_memblock_flags(); 331 332 again: 333 ret = memblock_find_in_range_node(size, align, start, end, 334 NUMA_NO_NODE, flags); 335 336 if (!ret && (flags & MEMBLOCK_MIRROR)) { 337 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", 338 &size); 339 flags &= ~MEMBLOCK_MIRROR; 340 goto again; 341 } 342 343 return ret; 344 } 345 346 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) 347 { 348 type->total_size -= type->regions[r].size; 349 memmove(&type->regions[r], &type->regions[r + 1], 350 (type->cnt - (r + 1)) * sizeof(type->regions[r])); 351 type->cnt--; 352 353 /* Special case for empty arrays */ 354 if (type->cnt == 0) { 355 WARN_ON(type->total_size != 0); 356 type->regions[0].base = 0; 357 type->regions[0].size = 0; 358 type->regions[0].flags = 0; 359 memblock_set_region_node(&type->regions[0], MAX_NUMNODES); 360 } 361 } 362 363 #ifndef CONFIG_ARCH_KEEP_MEMBLOCK 364 /** 365 * memblock_discard - discard memory and reserved arrays if they were allocated 366 */ 367 void __init memblock_discard(void) 368 { 369 phys_addr_t addr, size; 370 371 if (memblock.reserved.regions != memblock_reserved_init_regions) { 372 addr = __pa(memblock.reserved.regions); 373 size = PAGE_ALIGN(sizeof(struct memblock_region) * 374 memblock.reserved.max); 375 if (memblock_reserved_in_slab) 376 kfree(memblock.reserved.regions); 377 else 378 memblock_free_late(addr, size); 379 } 380 381 if (memblock.memory.regions != memblock_memory_init_regions) { 382 addr = __pa(memblock.memory.regions); 383 size = PAGE_ALIGN(sizeof(struct memblock_region) * 384 memblock.memory.max); 385 if (memblock_memory_in_slab) 386 kfree(memblock.memory.regions); 387 else 388 memblock_free_late(addr, size); 389 } 390 391 memblock_memory = NULL; 392 } 393 #endif 394 395 /** 396 * memblock_double_array - double the size of the memblock regions array 397 * @type: memblock type of the regions array being doubled 398 * @new_area_start: starting address of memory range to avoid overlap with 399 * @new_area_size: size of memory range to avoid overlap with 400 * 401 * Double the size of the @type regions array. If memblock is being used to 402 * allocate memory for a new reserved regions array and there is a previously 403 * allocated memory range [@new_area_start, @new_area_start + @new_area_size] 404 * waiting to be reserved, ensure the memory used by the new array does 405 * not overlap. 406 * 407 * Return: 408 * 0 on success, -1 on failure. 409 */ 410 static int __init_memblock memblock_double_array(struct memblock_type *type, 411 phys_addr_t new_area_start, 412 phys_addr_t new_area_size) 413 { 414 struct memblock_region *new_array, *old_array; 415 phys_addr_t old_alloc_size, new_alloc_size; 416 phys_addr_t old_size, new_size, addr, new_end; 417 int use_slab = slab_is_available(); 418 int *in_slab; 419 420 /* We don't allow resizing until we know about the reserved regions 421 * of memory that aren't suitable for allocation 422 */ 423 if (!memblock_can_resize) 424 panic("memblock: cannot resize %s array\n", type->name); 425 426 /* Calculate new doubled size */ 427 old_size = type->max * sizeof(struct memblock_region); 428 new_size = old_size << 1; 429 /* 430 * We need to allocated new one align to PAGE_SIZE, 431 * so we can free them completely later. 432 */ 433 old_alloc_size = PAGE_ALIGN(old_size); 434 new_alloc_size = PAGE_ALIGN(new_size); 435 436 /* Retrieve the slab flag */ 437 if (type == &memblock.memory) 438 in_slab = &memblock_memory_in_slab; 439 else 440 in_slab = &memblock_reserved_in_slab; 441 442 /* Try to find some space for it */ 443 if (use_slab) { 444 new_array = kmalloc(new_size, GFP_KERNEL); 445 addr = new_array ? __pa(new_array) : 0; 446 } else { 447 /* only exclude range when trying to double reserved.regions */ 448 if (type != &memblock.reserved) 449 new_area_start = new_area_size = 0; 450 451 addr = memblock_find_in_range(new_area_start + new_area_size, 452 memblock.current_limit, 453 new_alloc_size, PAGE_SIZE); 454 if (!addr && new_area_size) 455 addr = memblock_find_in_range(0, 456 min(new_area_start, memblock.current_limit), 457 new_alloc_size, PAGE_SIZE); 458 459 new_array = addr ? __va(addr) : NULL; 460 } 461 if (!addr) { 462 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", 463 type->name, type->max, type->max * 2); 464 return -1; 465 } 466 467 new_end = addr + new_size - 1; 468 memblock_dbg("memblock: %s is doubled to %ld at [%pa-%pa]", 469 type->name, type->max * 2, &addr, &new_end); 470 471 /* 472 * Found space, we now need to move the array over before we add the 473 * reserved region since it may be our reserved array itself that is 474 * full. 475 */ 476 memcpy(new_array, type->regions, old_size); 477 memset(new_array + type->max, 0, old_size); 478 old_array = type->regions; 479 type->regions = new_array; 480 type->max <<= 1; 481 482 /* Free old array. We needn't free it if the array is the static one */ 483 if (*in_slab) 484 kfree(old_array); 485 else if (old_array != memblock_memory_init_regions && 486 old_array != memblock_reserved_init_regions) 487 memblock_free(old_array, old_alloc_size); 488 489 /* 490 * Reserve the new array if that comes from the memblock. Otherwise, we 491 * needn't do it 492 */ 493 if (!use_slab) 494 BUG_ON(memblock_reserve(addr, new_alloc_size)); 495 496 /* Update slab flag */ 497 *in_slab = use_slab; 498 499 return 0; 500 } 501 502 /** 503 * memblock_merge_regions - merge neighboring compatible regions 504 * @type: memblock type to scan 505 * @start_rgn: start scanning from (@start_rgn - 1) 506 * @end_rgn: end scanning at (@end_rgn - 1) 507 * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn) 508 */ 509 static void __init_memblock memblock_merge_regions(struct memblock_type *type, 510 unsigned long start_rgn, 511 unsigned long end_rgn) 512 { 513 int i = 0; 514 if (start_rgn) 515 i = start_rgn - 1; 516 end_rgn = min(end_rgn, type->cnt - 1); 517 while (i < end_rgn) { 518 struct memblock_region *this = &type->regions[i]; 519 struct memblock_region *next = &type->regions[i + 1]; 520 521 if (this->base + this->size != next->base || 522 memblock_get_region_node(this) != 523 memblock_get_region_node(next) || 524 this->flags != next->flags) { 525 BUG_ON(this->base + this->size > next->base); 526 i++; 527 continue; 528 } 529 530 this->size += next->size; 531 /* move forward from next + 1, index of which is i + 2 */ 532 memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); 533 type->cnt--; 534 end_rgn--; 535 } 536 } 537 538 /** 539 * memblock_insert_region - insert new memblock region 540 * @type: memblock type to insert into 541 * @idx: index for the insertion point 542 * @base: base address of the new region 543 * @size: size of the new region 544 * @nid: node id of the new region 545 * @flags: flags of the new region 546 * 547 * Insert new memblock region [@base, @base + @size) into @type at @idx. 548 * @type must already have extra room to accommodate the new region. 549 */ 550 static void __init_memblock memblock_insert_region(struct memblock_type *type, 551 int idx, phys_addr_t base, 552 phys_addr_t size, 553 int nid, 554 enum memblock_flags flags) 555 { 556 struct memblock_region *rgn = &type->regions[idx]; 557 558 BUG_ON(type->cnt >= type->max); 559 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); 560 rgn->base = base; 561 rgn->size = size; 562 rgn->flags = flags; 563 memblock_set_region_node(rgn, nid); 564 type->cnt++; 565 type->total_size += size; 566 } 567 568 /** 569 * memblock_add_range - add new memblock region 570 * @type: memblock type to add new region into 571 * @base: base address of the new region 572 * @size: size of the new region 573 * @nid: nid of the new region 574 * @flags: flags of the new region 575 * 576 * Add new memblock region [@base, @base + @size) into @type. The new region 577 * is allowed to overlap with existing ones - overlaps don't affect already 578 * existing regions. @type is guaranteed to be minimal (all neighbouring 579 * compatible regions are merged) after the addition. 580 * 581 * Return: 582 * 0 on success, -errno on failure. 583 */ 584 static int __init_memblock memblock_add_range(struct memblock_type *type, 585 phys_addr_t base, phys_addr_t size, 586 int nid, enum memblock_flags flags) 587 { 588 bool insert = false; 589 phys_addr_t obase = base; 590 phys_addr_t end = base + memblock_cap_size(base, &size); 591 int idx, nr_new, start_rgn = -1, end_rgn; 592 struct memblock_region *rgn; 593 594 if (!size) 595 return 0; 596 597 /* special case for empty array */ 598 if (type->regions[0].size == 0) { 599 WARN_ON(type->cnt != 0 || type->total_size); 600 type->regions[0].base = base; 601 type->regions[0].size = size; 602 type->regions[0].flags = flags; 603 memblock_set_region_node(&type->regions[0], nid); 604 type->total_size = size; 605 type->cnt = 1; 606 return 0; 607 } 608 609 /* 610 * The worst case is when new range overlaps all existing regions, 611 * then we'll need type->cnt + 1 empty regions in @type. So if 612 * type->cnt * 2 + 1 is less than or equal to type->max, we know 613 * that there is enough empty regions in @type, and we can insert 614 * regions directly. 615 */ 616 if (type->cnt * 2 + 1 <= type->max) 617 insert = true; 618 619 repeat: 620 /* 621 * The following is executed twice. Once with %false @insert and 622 * then with %true. The first counts the number of regions needed 623 * to accommodate the new area. The second actually inserts them. 624 */ 625 base = obase; 626 nr_new = 0; 627 628 for_each_memblock_type(idx, type, rgn) { 629 phys_addr_t rbase = rgn->base; 630 phys_addr_t rend = rbase + rgn->size; 631 632 if (rbase >= end) 633 break; 634 if (rend <= base) 635 continue; 636 /* 637 * @rgn overlaps. If it separates the lower part of new 638 * area, insert that portion. 639 */ 640 if (rbase > base) { 641 #ifdef CONFIG_NUMA 642 WARN_ON(nid != memblock_get_region_node(rgn)); 643 #endif 644 WARN_ON(flags != rgn->flags); 645 nr_new++; 646 if (insert) { 647 if (start_rgn == -1) 648 start_rgn = idx; 649 end_rgn = idx + 1; 650 memblock_insert_region(type, idx++, base, 651 rbase - base, nid, 652 flags); 653 } 654 } 655 /* area below @rend is dealt with, forget about it */ 656 base = min(rend, end); 657 } 658 659 /* insert the remaining portion */ 660 if (base < end) { 661 nr_new++; 662 if (insert) { 663 if (start_rgn == -1) 664 start_rgn = idx; 665 end_rgn = idx + 1; 666 memblock_insert_region(type, idx, base, end - base, 667 nid, flags); 668 } 669 } 670 671 if (!nr_new) 672 return 0; 673 674 /* 675 * If this was the first round, resize array and repeat for actual 676 * insertions; otherwise, merge and return. 677 */ 678 if (!insert) { 679 while (type->cnt + nr_new > type->max) 680 if (memblock_double_array(type, obase, size) < 0) 681 return -ENOMEM; 682 insert = true; 683 goto repeat; 684 } else { 685 memblock_merge_regions(type, start_rgn, end_rgn); 686 return 0; 687 } 688 } 689 690 /** 691 * memblock_add_node - add new memblock region within a NUMA node 692 * @base: base address of the new region 693 * @size: size of the new region 694 * @nid: nid of the new region 695 * @flags: flags of the new region 696 * 697 * Add new memblock region [@base, @base + @size) to the "memory" 698 * type. See memblock_add_range() description for mode details 699 * 700 * Return: 701 * 0 on success, -errno on failure. 702 */ 703 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, 704 int nid, enum memblock_flags flags) 705 { 706 phys_addr_t end = base + size - 1; 707 708 memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, 709 &base, &end, nid, flags, (void *)_RET_IP_); 710 711 return memblock_add_range(&memblock.memory, base, size, nid, flags); 712 } 713 714 /** 715 * memblock_add - add new memblock region 716 * @base: base address of the new region 717 * @size: size of the new region 718 * 719 * Add new memblock region [@base, @base + @size) to the "memory" 720 * type. See memblock_add_range() description for mode details 721 * 722 * Return: 723 * 0 on success, -errno on failure. 724 */ 725 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) 726 { 727 phys_addr_t end = base + size - 1; 728 729 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 730 &base, &end, (void *)_RET_IP_); 731 732 return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); 733 } 734 735 /** 736 * memblock_validate_numa_coverage - check if amount of memory with 737 * no node ID assigned is less than a threshold 738 * @threshold_bytes: maximal number of pages that can have unassigned node 739 * ID (in bytes). 740 * 741 * A buggy firmware may report memory that does not belong to any node. 742 * Check if amount of such memory is below @threshold_bytes. 743 * 744 * Return: true on success, false on failure. 745 */ 746 bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_bytes) 747 { 748 unsigned long nr_pages = 0; 749 unsigned long start_pfn, end_pfn, mem_size_mb; 750 int nid, i; 751 752 /* calculate lose page */ 753 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { 754 if (!numa_valid_node(nid)) 755 nr_pages += end_pfn - start_pfn; 756 } 757 758 if ((nr_pages << PAGE_SHIFT) >= threshold_bytes) { 759 mem_size_mb = memblock_phys_mem_size() >> 20; 760 pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n", 761 (nr_pages << PAGE_SHIFT) >> 20, mem_size_mb); 762 return false; 763 } 764 765 return true; 766 } 767 768 769 /** 770 * memblock_isolate_range - isolate given range into disjoint memblocks 771 * @type: memblock type to isolate range for 772 * @base: base of range to isolate 773 * @size: size of range to isolate 774 * @start_rgn: out parameter for the start of isolated region 775 * @end_rgn: out parameter for the end of isolated region 776 * 777 * Walk @type and ensure that regions don't cross the boundaries defined by 778 * [@base, @base + @size). Crossing regions are split at the boundaries, 779 * which may create at most two more regions. The index of the first 780 * region inside the range is returned in *@start_rgn and the index of the 781 * first region after the range is returned in *@end_rgn. 782 * 783 * Return: 784 * 0 on success, -errno on failure. 785 */ 786 static int __init_memblock memblock_isolate_range(struct memblock_type *type, 787 phys_addr_t base, phys_addr_t size, 788 int *start_rgn, int *end_rgn) 789 { 790 phys_addr_t end = base + memblock_cap_size(base, &size); 791 int idx; 792 struct memblock_region *rgn; 793 794 *start_rgn = *end_rgn = 0; 795 796 if (!size) 797 return 0; 798 799 /* we'll create at most two more regions */ 800 while (type->cnt + 2 > type->max) 801 if (memblock_double_array(type, base, size) < 0) 802 return -ENOMEM; 803 804 for_each_memblock_type(idx, type, rgn) { 805 phys_addr_t rbase = rgn->base; 806 phys_addr_t rend = rbase + rgn->size; 807 808 if (rbase >= end) 809 break; 810 if (rend <= base) 811 continue; 812 813 if (rbase < base) { 814 /* 815 * @rgn intersects from below. Split and continue 816 * to process the next region - the new top half. 817 */ 818 rgn->base = base; 819 rgn->size -= base - rbase; 820 type->total_size -= base - rbase; 821 memblock_insert_region(type, idx, rbase, base - rbase, 822 memblock_get_region_node(rgn), 823 rgn->flags); 824 } else if (rend > end) { 825 /* 826 * @rgn intersects from above. Split and redo the 827 * current region - the new bottom half. 828 */ 829 rgn->base = end; 830 rgn->size -= end - rbase; 831 type->total_size -= end - rbase; 832 memblock_insert_region(type, idx--, rbase, end - rbase, 833 memblock_get_region_node(rgn), 834 rgn->flags); 835 } else { 836 /* @rgn is fully contained, record it */ 837 if (!*end_rgn) 838 *start_rgn = idx; 839 *end_rgn = idx + 1; 840 } 841 } 842 843 return 0; 844 } 845 846 static int __init_memblock memblock_remove_range(struct memblock_type *type, 847 phys_addr_t base, phys_addr_t size) 848 { 849 int start_rgn, end_rgn; 850 int i, ret; 851 852 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 853 if (ret) 854 return ret; 855 856 for (i = end_rgn - 1; i >= start_rgn; i--) 857 memblock_remove_region(type, i); 858 return 0; 859 } 860 861 int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) 862 { 863 phys_addr_t end = base + size - 1; 864 865 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 866 &base, &end, (void *)_RET_IP_); 867 868 return memblock_remove_range(&memblock.memory, base, size); 869 } 870 871 /** 872 * memblock_free - free boot memory allocation 873 * @ptr: starting address of the boot memory allocation 874 * @size: size of the boot memory block in bytes 875 * 876 * Free boot memory block previously allocated by memblock_alloc_xx() API. 877 * The freeing memory will not be released to the buddy allocator. 878 */ 879 void __init_memblock memblock_free(void *ptr, size_t size) 880 { 881 if (ptr) 882 memblock_phys_free(__pa(ptr), size); 883 } 884 885 /** 886 * memblock_phys_free - free boot memory block 887 * @base: phys starting address of the boot memory block 888 * @size: size of the boot memory block in bytes 889 * 890 * Free boot memory block previously allocated by memblock_phys_alloc_xx() API. 891 * The freeing memory will not be released to the buddy allocator. 892 */ 893 int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) 894 { 895 phys_addr_t end = base + size - 1; 896 897 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 898 &base, &end, (void *)_RET_IP_); 899 900 kmemleak_free_part_phys(base, size); 901 return memblock_remove_range(&memblock.reserved, base, size); 902 } 903 904 int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) 905 { 906 phys_addr_t end = base + size - 1; 907 908 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 909 &base, &end, (void *)_RET_IP_); 910 911 return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0); 912 } 913 914 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 915 int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size) 916 { 917 phys_addr_t end = base + size - 1; 918 919 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 920 &base, &end, (void *)_RET_IP_); 921 922 return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0); 923 } 924 #endif 925 926 /** 927 * memblock_setclr_flag - set or clear flag for a memory region 928 * @type: memblock type to set/clear flag for 929 * @base: base address of the region 930 * @size: size of the region 931 * @set: set or clear the flag 932 * @flag: the flag to update 933 * 934 * This function isolates region [@base, @base + @size), and sets/clears flag 935 * 936 * Return: 0 on success, -errno on failure. 937 */ 938 static int __init_memblock memblock_setclr_flag(struct memblock_type *type, 939 phys_addr_t base, phys_addr_t size, int set, int flag) 940 { 941 int i, ret, start_rgn, end_rgn; 942 943 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 944 if (ret) 945 return ret; 946 947 for (i = start_rgn; i < end_rgn; i++) { 948 struct memblock_region *r = &type->regions[i]; 949 950 if (set) 951 r->flags |= flag; 952 else 953 r->flags &= ~flag; 954 } 955 956 memblock_merge_regions(type, start_rgn, end_rgn); 957 return 0; 958 } 959 960 /** 961 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG. 962 * @base: the base phys addr of the region 963 * @size: the size of the region 964 * 965 * Return: 0 on success, -errno on failure. 966 */ 967 int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size) 968 { 969 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_HOTPLUG); 970 } 971 972 /** 973 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region. 974 * @base: the base phys addr of the region 975 * @size: the size of the region 976 * 977 * Return: 0 on success, -errno on failure. 978 */ 979 int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size) 980 { 981 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_HOTPLUG); 982 } 983 984 /** 985 * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR. 986 * @base: the base phys addr of the region 987 * @size: the size of the region 988 * 989 * Return: 0 on success, -errno on failure. 990 */ 991 int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) 992 { 993 if (!mirrored_kernelcore) 994 return 0; 995 996 system_has_some_mirror = true; 997 998 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_MIRROR); 999 } 1000 1001 /** 1002 * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP. 1003 * @base: the base phys addr of the region 1004 * @size: the size of the region 1005 * 1006 * The memory regions marked with %MEMBLOCK_NOMAP will not be added to the 1007 * direct mapping of the physical memory. These regions will still be 1008 * covered by the memory map. The struct page representing NOMAP memory 1009 * frames in the memory map will be PageReserved() 1010 * 1011 * Note: if the memory being marked %MEMBLOCK_NOMAP was allocated from 1012 * memblock, the caller must inform kmemleak to ignore that memory 1013 * 1014 * Return: 0 on success, -errno on failure. 1015 */ 1016 int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) 1017 { 1018 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_NOMAP); 1019 } 1020 1021 /** 1022 * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region. 1023 * @base: the base phys addr of the region 1024 * @size: the size of the region 1025 * 1026 * Return: 0 on success, -errno on failure. 1027 */ 1028 int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) 1029 { 1030 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_NOMAP); 1031 } 1032 1033 /** 1034 * memblock_reserved_mark_noinit - Mark a reserved memory region with flag 1035 * MEMBLOCK_RSRV_NOINIT which results in the struct pages not being initialized 1036 * for this region. 1037 * @base: the base phys addr of the region 1038 * @size: the size of the region 1039 * 1040 * struct pages will not be initialized for reserved memory regions marked with 1041 * %MEMBLOCK_RSRV_NOINIT. 1042 * 1043 * Return: 0 on success, -errno on failure. 1044 */ 1045 int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size) 1046 { 1047 return memblock_setclr_flag(&memblock.reserved, base, size, 1, 1048 MEMBLOCK_RSRV_NOINIT); 1049 } 1050 1051 static bool should_skip_region(struct memblock_type *type, 1052 struct memblock_region *m, 1053 int nid, int flags) 1054 { 1055 int m_nid = memblock_get_region_node(m); 1056 1057 /* we never skip regions when iterating memblock.reserved or physmem */ 1058 if (type != memblock_memory) 1059 return false; 1060 1061 /* only memory regions are associated with nodes, check it */ 1062 if (numa_valid_node(nid) && nid != m_nid) 1063 return true; 1064 1065 /* skip hotpluggable memory regions if needed */ 1066 if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && 1067 !(flags & MEMBLOCK_HOTPLUG)) 1068 return true; 1069 1070 /* if we want mirror memory skip non-mirror memory regions */ 1071 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) 1072 return true; 1073 1074 /* skip nomap memory unless we were asked for it explicitly */ 1075 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) 1076 return true; 1077 1078 /* skip driver-managed memory unless we were asked for it explicitly */ 1079 if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m)) 1080 return true; 1081 1082 return false; 1083 } 1084 1085 /** 1086 * __next_mem_range - next function for for_each_free_mem_range() etc. 1087 * @idx: pointer to u64 loop variable 1088 * @nid: node selector, %NUMA_NO_NODE for all nodes 1089 * @flags: pick from blocks based on memory attributes 1090 * @type_a: pointer to memblock_type from where the range is taken 1091 * @type_b: pointer to memblock_type which excludes memory from being taken 1092 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 1093 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 1094 * @out_nid: ptr to int for nid of the range, can be %NULL 1095 * 1096 * Find the first area from *@idx which matches @nid, fill the out 1097 * parameters, and update *@idx for the next iteration. The lower 32bit of 1098 * *@idx contains index into type_a and the upper 32bit indexes the 1099 * areas before each region in type_b. For example, if type_b regions 1100 * look like the following, 1101 * 1102 * 0:[0-16), 1:[32-48), 2:[128-130) 1103 * 1104 * The upper 32bit indexes the following regions. 1105 * 1106 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) 1107 * 1108 * As both region arrays are sorted, the function advances the two indices 1109 * in lockstep and returns each intersection. 1110 */ 1111 void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, 1112 struct memblock_type *type_a, 1113 struct memblock_type *type_b, phys_addr_t *out_start, 1114 phys_addr_t *out_end, int *out_nid) 1115 { 1116 int idx_a = *idx & 0xffffffff; 1117 int idx_b = *idx >> 32; 1118 1119 for (; idx_a < type_a->cnt; idx_a++) { 1120 struct memblock_region *m = &type_a->regions[idx_a]; 1121 1122 phys_addr_t m_start = m->base; 1123 phys_addr_t m_end = m->base + m->size; 1124 int m_nid = memblock_get_region_node(m); 1125 1126 if (should_skip_region(type_a, m, nid, flags)) 1127 continue; 1128 1129 if (!type_b) { 1130 if (out_start) 1131 *out_start = m_start; 1132 if (out_end) 1133 *out_end = m_end; 1134 if (out_nid) 1135 *out_nid = m_nid; 1136 idx_a++; 1137 *idx = (u32)idx_a | (u64)idx_b << 32; 1138 return; 1139 } 1140 1141 /* scan areas before each reservation */ 1142 for (; idx_b < type_b->cnt + 1; idx_b++) { 1143 struct memblock_region *r; 1144 phys_addr_t r_start; 1145 phys_addr_t r_end; 1146 1147 r = &type_b->regions[idx_b]; 1148 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1149 r_end = idx_b < type_b->cnt ? 1150 r->base : PHYS_ADDR_MAX; 1151 1152 /* 1153 * if idx_b advanced past idx_a, 1154 * break out to advance idx_a 1155 */ 1156 if (r_start >= m_end) 1157 break; 1158 /* if the two regions intersect, we're done */ 1159 if (m_start < r_end) { 1160 if (out_start) 1161 *out_start = 1162 max(m_start, r_start); 1163 if (out_end) 1164 *out_end = min(m_end, r_end); 1165 if (out_nid) 1166 *out_nid = m_nid; 1167 /* 1168 * The region which ends first is 1169 * advanced for the next iteration. 1170 */ 1171 if (m_end <= r_end) 1172 idx_a++; 1173 else 1174 idx_b++; 1175 *idx = (u32)idx_a | (u64)idx_b << 32; 1176 return; 1177 } 1178 } 1179 } 1180 1181 /* signal end of iteration */ 1182 *idx = ULLONG_MAX; 1183 } 1184 1185 /** 1186 * __next_mem_range_rev - generic next function for for_each_*_range_rev() 1187 * 1188 * @idx: pointer to u64 loop variable 1189 * @nid: node selector, %NUMA_NO_NODE for all nodes 1190 * @flags: pick from blocks based on memory attributes 1191 * @type_a: pointer to memblock_type from where the range is taken 1192 * @type_b: pointer to memblock_type which excludes memory from being taken 1193 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 1194 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 1195 * @out_nid: ptr to int for nid of the range, can be %NULL 1196 * 1197 * Finds the next range from type_a which is not marked as unsuitable 1198 * in type_b. 1199 * 1200 * Reverse of __next_mem_range(). 1201 */ 1202 void __init_memblock __next_mem_range_rev(u64 *idx, int nid, 1203 enum memblock_flags flags, 1204 struct memblock_type *type_a, 1205 struct memblock_type *type_b, 1206 phys_addr_t *out_start, 1207 phys_addr_t *out_end, int *out_nid) 1208 { 1209 int idx_a = *idx & 0xffffffff; 1210 int idx_b = *idx >> 32; 1211 1212 if (*idx == (u64)ULLONG_MAX) { 1213 idx_a = type_a->cnt - 1; 1214 if (type_b != NULL) 1215 idx_b = type_b->cnt; 1216 else 1217 idx_b = 0; 1218 } 1219 1220 for (; idx_a >= 0; idx_a--) { 1221 struct memblock_region *m = &type_a->regions[idx_a]; 1222 1223 phys_addr_t m_start = m->base; 1224 phys_addr_t m_end = m->base + m->size; 1225 int m_nid = memblock_get_region_node(m); 1226 1227 if (should_skip_region(type_a, m, nid, flags)) 1228 continue; 1229 1230 if (!type_b) { 1231 if (out_start) 1232 *out_start = m_start; 1233 if (out_end) 1234 *out_end = m_end; 1235 if (out_nid) 1236 *out_nid = m_nid; 1237 idx_a--; 1238 *idx = (u32)idx_a | (u64)idx_b << 32; 1239 return; 1240 } 1241 1242 /* scan areas before each reservation */ 1243 for (; idx_b >= 0; idx_b--) { 1244 struct memblock_region *r; 1245 phys_addr_t r_start; 1246 phys_addr_t r_end; 1247 1248 r = &type_b->regions[idx_b]; 1249 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1250 r_end = idx_b < type_b->cnt ? 1251 r->base : PHYS_ADDR_MAX; 1252 /* 1253 * if idx_b advanced past idx_a, 1254 * break out to advance idx_a 1255 */ 1256 1257 if (r_end <= m_start) 1258 break; 1259 /* if the two regions intersect, we're done */ 1260 if (m_end > r_start) { 1261 if (out_start) 1262 *out_start = max(m_start, r_start); 1263 if (out_end) 1264 *out_end = min(m_end, r_end); 1265 if (out_nid) 1266 *out_nid = m_nid; 1267 if (m_start >= r_start) 1268 idx_a--; 1269 else 1270 idx_b--; 1271 *idx = (u32)idx_a | (u64)idx_b << 32; 1272 return; 1273 } 1274 } 1275 } 1276 /* signal end of iteration */ 1277 *idx = ULLONG_MAX; 1278 } 1279 1280 /* 1281 * Common iterator interface used to define for_each_mem_pfn_range(). 1282 */ 1283 void __init_memblock __next_mem_pfn_range(int *idx, int nid, 1284 unsigned long *out_start_pfn, 1285 unsigned long *out_end_pfn, int *out_nid) 1286 { 1287 struct memblock_type *type = &memblock.memory; 1288 struct memblock_region *r; 1289 int r_nid; 1290 1291 while (++*idx < type->cnt) { 1292 r = &type->regions[*idx]; 1293 r_nid = memblock_get_region_node(r); 1294 1295 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) 1296 continue; 1297 if (!numa_valid_node(nid) || nid == r_nid) 1298 break; 1299 } 1300 if (*idx >= type->cnt) { 1301 *idx = -1; 1302 return; 1303 } 1304 1305 if (out_start_pfn) 1306 *out_start_pfn = PFN_UP(r->base); 1307 if (out_end_pfn) 1308 *out_end_pfn = PFN_DOWN(r->base + r->size); 1309 if (out_nid) 1310 *out_nid = r_nid; 1311 } 1312 1313 /** 1314 * memblock_set_node - set node ID on memblock regions 1315 * @base: base of area to set node ID for 1316 * @size: size of area to set node ID for 1317 * @type: memblock type to set node ID for 1318 * @nid: node ID to set 1319 * 1320 * Set the nid of memblock @type regions in [@base, @base + @size) to @nid. 1321 * Regions which cross the area boundaries are split as necessary. 1322 * 1323 * Return: 1324 * 0 on success, -errno on failure. 1325 */ 1326 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, 1327 struct memblock_type *type, int nid) 1328 { 1329 #ifdef CONFIG_NUMA 1330 int start_rgn, end_rgn; 1331 int i, ret; 1332 1333 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 1334 if (ret) 1335 return ret; 1336 1337 for (i = start_rgn; i < end_rgn; i++) 1338 memblock_set_region_node(&type->regions[i], nid); 1339 1340 memblock_merge_regions(type, start_rgn, end_rgn); 1341 #endif 1342 return 0; 1343 } 1344 1345 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 1346 /** 1347 * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone() 1348 * 1349 * @idx: pointer to u64 loop variable 1350 * @zone: zone in which all of the memory blocks reside 1351 * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL 1352 * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL 1353 * 1354 * This function is meant to be a zone/pfn specific wrapper for the 1355 * for_each_mem_range type iterators. Specifically they are used in the 1356 * deferred memory init routines and as such we were duplicating much of 1357 * this logic throughout the code. So instead of having it in multiple 1358 * locations it seemed like it would make more sense to centralize this to 1359 * one new iterator that does everything they need. 1360 */ 1361 void __init_memblock 1362 __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, 1363 unsigned long *out_spfn, unsigned long *out_epfn) 1364 { 1365 int zone_nid = zone_to_nid(zone); 1366 phys_addr_t spa, epa; 1367 1368 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1369 &memblock.memory, &memblock.reserved, 1370 &spa, &epa, NULL); 1371 1372 while (*idx != U64_MAX) { 1373 unsigned long epfn = PFN_DOWN(epa); 1374 unsigned long spfn = PFN_UP(spa); 1375 1376 /* 1377 * Verify the end is at least past the start of the zone and 1378 * that we have at least one PFN to initialize. 1379 */ 1380 if (zone->zone_start_pfn < epfn && spfn < epfn) { 1381 /* if we went too far just stop searching */ 1382 if (zone_end_pfn(zone) <= spfn) { 1383 *idx = U64_MAX; 1384 break; 1385 } 1386 1387 if (out_spfn) 1388 *out_spfn = max(zone->zone_start_pfn, spfn); 1389 if (out_epfn) 1390 *out_epfn = min(zone_end_pfn(zone), epfn); 1391 1392 return; 1393 } 1394 1395 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1396 &memblock.memory, &memblock.reserved, 1397 &spa, &epa, NULL); 1398 } 1399 1400 /* signal end of iteration */ 1401 if (out_spfn) 1402 *out_spfn = ULONG_MAX; 1403 if (out_epfn) 1404 *out_epfn = 0; 1405 } 1406 1407 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 1408 1409 /** 1410 * memblock_alloc_range_nid - allocate boot memory block 1411 * @size: size of memory block to be allocated in bytes 1412 * @align: alignment of the region and block's size 1413 * @start: the lower bound of the memory region to allocate (phys address) 1414 * @end: the upper bound of the memory region to allocate (phys address) 1415 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1416 * @exact_nid: control the allocation fall back to other nodes 1417 * 1418 * The allocation is performed from memory region limited by 1419 * memblock.current_limit if @end == %MEMBLOCK_ALLOC_ACCESSIBLE. 1420 * 1421 * If the specified node can not hold the requested memory and @exact_nid 1422 * is false, the allocation falls back to any node in the system. 1423 * 1424 * For systems with memory mirroring, the allocation is attempted first 1425 * from the regions with mirroring enabled and then retried from any 1426 * memory region. 1427 * 1428 * In addition, function using kmemleak_alloc_phys for allocated boot 1429 * memory block, it is never reported as leaks. 1430 * 1431 * Return: 1432 * Physical address of allocated memory block on success, %0 on failure. 1433 */ 1434 phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, 1435 phys_addr_t align, phys_addr_t start, 1436 phys_addr_t end, int nid, 1437 bool exact_nid) 1438 { 1439 enum memblock_flags flags = choose_memblock_flags(); 1440 phys_addr_t found; 1441 1442 /* 1443 * Detect any accidental use of these APIs after slab is ready, as at 1444 * this moment memblock may be deinitialized already and its 1445 * internal data may be destroyed (after execution of memblock_free_all) 1446 */ 1447 if (WARN_ON_ONCE(slab_is_available())) { 1448 void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid); 1449 1450 return vaddr ? virt_to_phys(vaddr) : 0; 1451 } 1452 1453 if (!align) { 1454 /* Can't use WARNs this early in boot on powerpc */ 1455 dump_stack(); 1456 align = SMP_CACHE_BYTES; 1457 } 1458 1459 again: 1460 found = memblock_find_in_range_node(size, align, start, end, nid, 1461 flags); 1462 if (found && !memblock_reserve(found, size)) 1463 goto done; 1464 1465 if (numa_valid_node(nid) && !exact_nid) { 1466 found = memblock_find_in_range_node(size, align, start, 1467 end, NUMA_NO_NODE, 1468 flags); 1469 if (found && !memblock_reserve(found, size)) 1470 goto done; 1471 } 1472 1473 if (flags & MEMBLOCK_MIRROR) { 1474 flags &= ~MEMBLOCK_MIRROR; 1475 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", 1476 &size); 1477 goto again; 1478 } 1479 1480 return 0; 1481 1482 done: 1483 /* 1484 * Skip kmemleak for those places like kasan_init() and 1485 * early_pgtable_alloc() due to high volume. 1486 */ 1487 if (end != MEMBLOCK_ALLOC_NOLEAKTRACE) 1488 /* 1489 * Memblock allocated blocks are never reported as 1490 * leaks. This is because many of these blocks are 1491 * only referred via the physical address which is 1492 * not looked up by kmemleak. 1493 */ 1494 kmemleak_alloc_phys(found, size, 0); 1495 1496 /* 1497 * Some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP, 1498 * require memory to be accepted before it can be used by the 1499 * guest. 1500 * 1501 * Accept the memory of the allocated buffer. 1502 */ 1503 accept_memory(found, found + size); 1504 1505 return found; 1506 } 1507 1508 /** 1509 * memblock_phys_alloc_range - allocate a memory block inside specified range 1510 * @size: size of memory block to be allocated in bytes 1511 * @align: alignment of the region and block's size 1512 * @start: the lower bound of the memory region to allocate (physical address) 1513 * @end: the upper bound of the memory region to allocate (physical address) 1514 * 1515 * Allocate @size bytes in the between @start and @end. 1516 * 1517 * Return: physical address of the allocated memory block on success, 1518 * %0 on failure. 1519 */ 1520 phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size, 1521 phys_addr_t align, 1522 phys_addr_t start, 1523 phys_addr_t end) 1524 { 1525 memblock_dbg("%s: %llu bytes align=0x%llx from=%pa max_addr=%pa %pS\n", 1526 __func__, (u64)size, (u64)align, &start, &end, 1527 (void *)_RET_IP_); 1528 return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE, 1529 false); 1530 } 1531 1532 /** 1533 * memblock_phys_alloc_try_nid - allocate a memory block from specified NUMA node 1534 * @size: size of memory block to be allocated in bytes 1535 * @align: alignment of the region and block's size 1536 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1537 * 1538 * Allocates memory block from the specified NUMA node. If the node 1539 * has no available memory, attempts to allocated from any node in the 1540 * system. 1541 * 1542 * Return: physical address of the allocated memory block on success, 1543 * %0 on failure. 1544 */ 1545 phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) 1546 { 1547 return memblock_alloc_range_nid(size, align, 0, 1548 MEMBLOCK_ALLOC_ACCESSIBLE, nid, false); 1549 } 1550 1551 /** 1552 * memblock_alloc_internal - allocate boot memory block 1553 * @size: size of memory block to be allocated in bytes 1554 * @align: alignment of the region and block's size 1555 * @min_addr: the lower bound of the memory region to allocate (phys address) 1556 * @max_addr: the upper bound of the memory region to allocate (phys address) 1557 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1558 * @exact_nid: control the allocation fall back to other nodes 1559 * 1560 * Allocates memory block using memblock_alloc_range_nid() and 1561 * converts the returned physical address to virtual. 1562 * 1563 * The @min_addr limit is dropped if it can not be satisfied and the allocation 1564 * will fall back to memory below @min_addr. Other constraints, such 1565 * as node and mirrored memory will be handled again in 1566 * memblock_alloc_range_nid(). 1567 * 1568 * Return: 1569 * Virtual address of allocated memory block on success, NULL on failure. 1570 */ 1571 static void * __init memblock_alloc_internal( 1572 phys_addr_t size, phys_addr_t align, 1573 phys_addr_t min_addr, phys_addr_t max_addr, 1574 int nid, bool exact_nid) 1575 { 1576 phys_addr_t alloc; 1577 1578 1579 if (max_addr > memblock.current_limit) 1580 max_addr = memblock.current_limit; 1581 1582 alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid, 1583 exact_nid); 1584 1585 /* retry allocation without lower limit */ 1586 if (!alloc && min_addr) 1587 alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid, 1588 exact_nid); 1589 1590 if (!alloc) 1591 return NULL; 1592 1593 return phys_to_virt(alloc); 1594 } 1595 1596 /** 1597 * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node 1598 * without zeroing memory 1599 * @size: size of memory block to be allocated in bytes 1600 * @align: alignment of the region and block's size 1601 * @min_addr: the lower bound of the memory region from where the allocation 1602 * is preferred (phys address) 1603 * @max_addr: the upper bound of the memory region from where the allocation 1604 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1605 * allocate only from memory limited by memblock.current_limit value 1606 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1607 * 1608 * Public function, provides additional debug information (including caller 1609 * info), if enabled. Does not zero allocated memory. 1610 * 1611 * Return: 1612 * Virtual address of allocated memory block on success, NULL on failure. 1613 */ 1614 void * __init memblock_alloc_exact_nid_raw( 1615 phys_addr_t size, phys_addr_t align, 1616 phys_addr_t min_addr, phys_addr_t max_addr, 1617 int nid) 1618 { 1619 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1620 __func__, (u64)size, (u64)align, nid, &min_addr, 1621 &max_addr, (void *)_RET_IP_); 1622 1623 return memblock_alloc_internal(size, align, min_addr, max_addr, nid, 1624 true); 1625 } 1626 1627 /** 1628 * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing 1629 * memory and without panicking 1630 * @size: size of memory block to be allocated in bytes 1631 * @align: alignment of the region and block's size 1632 * @min_addr: the lower bound of the memory region from where the allocation 1633 * is preferred (phys address) 1634 * @max_addr: the upper bound of the memory region from where the allocation 1635 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1636 * allocate only from memory limited by memblock.current_limit value 1637 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1638 * 1639 * Public function, provides additional debug information (including caller 1640 * info), if enabled. Does not zero allocated memory, does not panic if request 1641 * cannot be satisfied. 1642 * 1643 * Return: 1644 * Virtual address of allocated memory block on success, NULL on failure. 1645 */ 1646 void * __init memblock_alloc_try_nid_raw( 1647 phys_addr_t size, phys_addr_t align, 1648 phys_addr_t min_addr, phys_addr_t max_addr, 1649 int nid) 1650 { 1651 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1652 __func__, (u64)size, (u64)align, nid, &min_addr, 1653 &max_addr, (void *)_RET_IP_); 1654 1655 return memblock_alloc_internal(size, align, min_addr, max_addr, nid, 1656 false); 1657 } 1658 1659 /** 1660 * memblock_alloc_try_nid - allocate boot memory block 1661 * @size: size of memory block to be allocated in bytes 1662 * @align: alignment of the region and block's size 1663 * @min_addr: the lower bound of the memory region from where the allocation 1664 * is preferred (phys address) 1665 * @max_addr: the upper bound of the memory region from where the allocation 1666 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1667 * allocate only from memory limited by memblock.current_limit value 1668 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1669 * 1670 * Public function, provides additional debug information (including caller 1671 * info), if enabled. This function zeroes the allocated memory. 1672 * 1673 * Return: 1674 * Virtual address of allocated memory block on success, NULL on failure. 1675 */ 1676 void * __init memblock_alloc_try_nid( 1677 phys_addr_t size, phys_addr_t align, 1678 phys_addr_t min_addr, phys_addr_t max_addr, 1679 int nid) 1680 { 1681 void *ptr; 1682 1683 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1684 __func__, (u64)size, (u64)align, nid, &min_addr, 1685 &max_addr, (void *)_RET_IP_); 1686 ptr = memblock_alloc_internal(size, align, 1687 min_addr, max_addr, nid, false); 1688 if (ptr) 1689 memset(ptr, 0, size); 1690 1691 return ptr; 1692 } 1693 1694 /** 1695 * memblock_free_late - free pages directly to buddy allocator 1696 * @base: phys starting address of the boot memory block 1697 * @size: size of the boot memory block in bytes 1698 * 1699 * This is only useful when the memblock allocator has already been torn 1700 * down, but we are still initializing the system. Pages are released directly 1701 * to the buddy allocator. 1702 */ 1703 void __init memblock_free_late(phys_addr_t base, phys_addr_t size) 1704 { 1705 phys_addr_t cursor, end; 1706 1707 end = base + size - 1; 1708 memblock_dbg("%s: [%pa-%pa] %pS\n", 1709 __func__, &base, &end, (void *)_RET_IP_); 1710 kmemleak_free_part_phys(base, size); 1711 cursor = PFN_UP(base); 1712 end = PFN_DOWN(base + size); 1713 1714 for (; cursor < end; cursor++) { 1715 memblock_free_pages(pfn_to_page(cursor), cursor, 0); 1716 totalram_pages_inc(); 1717 } 1718 } 1719 1720 /* 1721 * Remaining API functions 1722 */ 1723 1724 phys_addr_t __init_memblock memblock_phys_mem_size(void) 1725 { 1726 return memblock.memory.total_size; 1727 } 1728 1729 phys_addr_t __init_memblock memblock_reserved_size(void) 1730 { 1731 return memblock.reserved.total_size; 1732 } 1733 1734 /* lowest address */ 1735 phys_addr_t __init_memblock memblock_start_of_DRAM(void) 1736 { 1737 return memblock.memory.regions[0].base; 1738 } 1739 1740 phys_addr_t __init_memblock memblock_end_of_DRAM(void) 1741 { 1742 int idx = memblock.memory.cnt - 1; 1743 1744 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); 1745 } 1746 1747 static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) 1748 { 1749 phys_addr_t max_addr = PHYS_ADDR_MAX; 1750 struct memblock_region *r; 1751 1752 /* 1753 * translate the memory @limit size into the max address within one of 1754 * the memory memblock regions, if the @limit exceeds the total size 1755 * of those regions, max_addr will keep original value PHYS_ADDR_MAX 1756 */ 1757 for_each_mem_region(r) { 1758 if (limit <= r->size) { 1759 max_addr = r->base + limit; 1760 break; 1761 } 1762 limit -= r->size; 1763 } 1764 1765 return max_addr; 1766 } 1767 1768 void __init memblock_enforce_memory_limit(phys_addr_t limit) 1769 { 1770 phys_addr_t max_addr; 1771 1772 if (!limit) 1773 return; 1774 1775 max_addr = __find_max_addr(limit); 1776 1777 /* @limit exceeds the total size of the memory, do nothing */ 1778 if (max_addr == PHYS_ADDR_MAX) 1779 return; 1780 1781 /* truncate both memory and reserved regions */ 1782 memblock_remove_range(&memblock.memory, max_addr, 1783 PHYS_ADDR_MAX); 1784 memblock_remove_range(&memblock.reserved, max_addr, 1785 PHYS_ADDR_MAX); 1786 } 1787 1788 void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) 1789 { 1790 int start_rgn, end_rgn; 1791 int i, ret; 1792 1793 if (!size) 1794 return; 1795 1796 if (!memblock_memory->total_size) { 1797 pr_warn("%s: No memory registered yet\n", __func__); 1798 return; 1799 } 1800 1801 ret = memblock_isolate_range(&memblock.memory, base, size, 1802 &start_rgn, &end_rgn); 1803 if (ret) 1804 return; 1805 1806 /* remove all the MAP regions */ 1807 for (i = memblock.memory.cnt - 1; i >= end_rgn; i--) 1808 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1809 memblock_remove_region(&memblock.memory, i); 1810 1811 for (i = start_rgn - 1; i >= 0; i--) 1812 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1813 memblock_remove_region(&memblock.memory, i); 1814 1815 /* truncate the reserved regions */ 1816 memblock_remove_range(&memblock.reserved, 0, base); 1817 memblock_remove_range(&memblock.reserved, 1818 base + size, PHYS_ADDR_MAX); 1819 } 1820 1821 void __init memblock_mem_limit_remove_map(phys_addr_t limit) 1822 { 1823 phys_addr_t max_addr; 1824 1825 if (!limit) 1826 return; 1827 1828 max_addr = __find_max_addr(limit); 1829 1830 /* @limit exceeds the total size of the memory, do nothing */ 1831 if (max_addr == PHYS_ADDR_MAX) 1832 return; 1833 1834 memblock_cap_memory_range(0, max_addr); 1835 } 1836 1837 static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) 1838 { 1839 unsigned int left = 0, right = type->cnt; 1840 1841 do { 1842 unsigned int mid = (right + left) / 2; 1843 1844 if (addr < type->regions[mid].base) 1845 right = mid; 1846 else if (addr >= (type->regions[mid].base + 1847 type->regions[mid].size)) 1848 left = mid + 1; 1849 else 1850 return mid; 1851 } while (left < right); 1852 return -1; 1853 } 1854 1855 bool __init_memblock memblock_is_reserved(phys_addr_t addr) 1856 { 1857 return memblock_search(&memblock.reserved, addr) != -1; 1858 } 1859 1860 bool __init_memblock memblock_is_memory(phys_addr_t addr) 1861 { 1862 return memblock_search(&memblock.memory, addr) != -1; 1863 } 1864 1865 bool __init_memblock memblock_is_map_memory(phys_addr_t addr) 1866 { 1867 int i = memblock_search(&memblock.memory, addr); 1868 1869 if (i == -1) 1870 return false; 1871 return !memblock_is_nomap(&memblock.memory.regions[i]); 1872 } 1873 1874 int __init_memblock memblock_search_pfn_nid(unsigned long pfn, 1875 unsigned long *start_pfn, unsigned long *end_pfn) 1876 { 1877 struct memblock_type *type = &memblock.memory; 1878 int mid = memblock_search(type, PFN_PHYS(pfn)); 1879 1880 if (mid == -1) 1881 return NUMA_NO_NODE; 1882 1883 *start_pfn = PFN_DOWN(type->regions[mid].base); 1884 *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); 1885 1886 return memblock_get_region_node(&type->regions[mid]); 1887 } 1888 1889 /** 1890 * memblock_is_region_memory - check if a region is a subset of memory 1891 * @base: base of region to check 1892 * @size: size of region to check 1893 * 1894 * Check if the region [@base, @base + @size) is a subset of a memory block. 1895 * 1896 * Return: 1897 * 0 if false, non-zero if true 1898 */ 1899 bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) 1900 { 1901 int idx = memblock_search(&memblock.memory, base); 1902 phys_addr_t end = base + memblock_cap_size(base, &size); 1903 1904 if (idx == -1) 1905 return false; 1906 return (memblock.memory.regions[idx].base + 1907 memblock.memory.regions[idx].size) >= end; 1908 } 1909 1910 /** 1911 * memblock_is_region_reserved - check if a region intersects reserved memory 1912 * @base: base of region to check 1913 * @size: size of region to check 1914 * 1915 * Check if the region [@base, @base + @size) intersects a reserved 1916 * memory block. 1917 * 1918 * Return: 1919 * True if they intersect, false if not. 1920 */ 1921 bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) 1922 { 1923 return memblock_overlaps_region(&memblock.reserved, base, size); 1924 } 1925 1926 void __init_memblock memblock_trim_memory(phys_addr_t align) 1927 { 1928 phys_addr_t start, end, orig_start, orig_end; 1929 struct memblock_region *r; 1930 1931 for_each_mem_region(r) { 1932 orig_start = r->base; 1933 orig_end = r->base + r->size; 1934 start = round_up(orig_start, align); 1935 end = round_down(orig_end, align); 1936 1937 if (start == orig_start && end == orig_end) 1938 continue; 1939 1940 if (start < end) { 1941 r->base = start; 1942 r->size = end - start; 1943 } else { 1944 memblock_remove_region(&memblock.memory, 1945 r - memblock.memory.regions); 1946 r--; 1947 } 1948 } 1949 } 1950 1951 void __init_memblock memblock_set_current_limit(phys_addr_t limit) 1952 { 1953 memblock.current_limit = limit; 1954 } 1955 1956 phys_addr_t __init_memblock memblock_get_current_limit(void) 1957 { 1958 return memblock.current_limit; 1959 } 1960 1961 static void __init_memblock memblock_dump(struct memblock_type *type) 1962 { 1963 phys_addr_t base, end, size; 1964 enum memblock_flags flags; 1965 int idx; 1966 struct memblock_region *rgn; 1967 1968 pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt); 1969 1970 for_each_memblock_type(idx, type, rgn) { 1971 char nid_buf[32] = ""; 1972 1973 base = rgn->base; 1974 size = rgn->size; 1975 end = base + size - 1; 1976 flags = rgn->flags; 1977 #ifdef CONFIG_NUMA 1978 if (numa_valid_node(memblock_get_region_node(rgn))) 1979 snprintf(nid_buf, sizeof(nid_buf), " on node %d", 1980 memblock_get_region_node(rgn)); 1981 #endif 1982 pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n", 1983 type->name, idx, &base, &end, &size, nid_buf, flags); 1984 } 1985 } 1986 1987 static void __init_memblock __memblock_dump_all(void) 1988 { 1989 pr_info("MEMBLOCK configuration:\n"); 1990 pr_info(" memory size = %pa reserved size = %pa\n", 1991 &memblock.memory.total_size, 1992 &memblock.reserved.total_size); 1993 1994 memblock_dump(&memblock.memory); 1995 memblock_dump(&memblock.reserved); 1996 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 1997 memblock_dump(&physmem); 1998 #endif 1999 } 2000 2001 void __init_memblock memblock_dump_all(void) 2002 { 2003 if (memblock_debug) 2004 __memblock_dump_all(); 2005 } 2006 2007 void __init memblock_allow_resize(void) 2008 { 2009 memblock_can_resize = 1; 2010 } 2011 2012 static int __init early_memblock(char *p) 2013 { 2014 if (p && strstr(p, "debug")) 2015 memblock_debug = 1; 2016 return 0; 2017 } 2018 early_param("memblock", early_memblock); 2019 2020 static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) 2021 { 2022 struct page *start_pg, *end_pg; 2023 phys_addr_t pg, pgend; 2024 2025 /* 2026 * Convert start_pfn/end_pfn to a struct page pointer. 2027 */ 2028 start_pg = pfn_to_page(start_pfn - 1) + 1; 2029 end_pg = pfn_to_page(end_pfn - 1) + 1; 2030 2031 /* 2032 * Convert to physical addresses, and round start upwards and end 2033 * downwards. 2034 */ 2035 pg = PAGE_ALIGN(__pa(start_pg)); 2036 pgend = PAGE_ALIGN_DOWN(__pa(end_pg)); 2037 2038 /* 2039 * If there are free pages between these, free the section of the 2040 * memmap array. 2041 */ 2042 if (pg < pgend) 2043 memblock_phys_free(pg, pgend - pg); 2044 } 2045 2046 /* 2047 * The mem_map array can get very big. Free the unused area of the memory map. 2048 */ 2049 static void __init free_unused_memmap(void) 2050 { 2051 unsigned long start, end, prev_end = 0; 2052 int i; 2053 2054 if (!IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) || 2055 IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) 2056 return; 2057 2058 /* 2059 * This relies on each bank being in address order. 2060 * The banks are sorted previously in bootmem_init(). 2061 */ 2062 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { 2063 #ifdef CONFIG_SPARSEMEM 2064 /* 2065 * Take care not to free memmap entries that don't exist 2066 * due to SPARSEMEM sections which aren't present. 2067 */ 2068 start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); 2069 #endif 2070 /* 2071 * Align down here since many operations in VM subsystem 2072 * presume that there are no holes in the memory map inside 2073 * a pageblock 2074 */ 2075 start = pageblock_start_pfn(start); 2076 2077 /* 2078 * If we had a previous bank, and there is a space 2079 * between the current bank and the previous, free it. 2080 */ 2081 if (prev_end && prev_end < start) 2082 free_memmap(prev_end, start); 2083 2084 /* 2085 * Align up here since many operations in VM subsystem 2086 * presume that there are no holes in the memory map inside 2087 * a pageblock 2088 */ 2089 prev_end = pageblock_align(end); 2090 } 2091 2092 #ifdef CONFIG_SPARSEMEM 2093 if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) { 2094 prev_end = pageblock_align(end); 2095 free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); 2096 } 2097 #endif 2098 } 2099 2100 static void __init __free_pages_memory(unsigned long start, unsigned long end) 2101 { 2102 int order; 2103 2104 while (start < end) { 2105 /* 2106 * Free the pages in the largest chunks alignment allows. 2107 * 2108 * __ffs() behaviour is undefined for 0. start == 0 is 2109 * MAX_PAGE_ORDER-aligned, set order to MAX_PAGE_ORDER for 2110 * the case. 2111 */ 2112 if (start) 2113 order = min_t(int, MAX_PAGE_ORDER, __ffs(start)); 2114 else 2115 order = MAX_PAGE_ORDER; 2116 2117 while (start + (1UL << order) > end) 2118 order--; 2119 2120 memblock_free_pages(pfn_to_page(start), start, order); 2121 2122 start += (1UL << order); 2123 } 2124 } 2125 2126 static unsigned long __init __free_memory_core(phys_addr_t start, 2127 phys_addr_t end) 2128 { 2129 unsigned long start_pfn = PFN_UP(start); 2130 unsigned long end_pfn = min_t(unsigned long, 2131 PFN_DOWN(end), max_low_pfn); 2132 2133 if (start_pfn >= end_pfn) 2134 return 0; 2135 2136 __free_pages_memory(start_pfn, end_pfn); 2137 2138 return end_pfn - start_pfn; 2139 } 2140 2141 static void __init memmap_init_reserved_pages(void) 2142 { 2143 struct memblock_region *region; 2144 phys_addr_t start, end; 2145 int nid; 2146 2147 /* 2148 * set nid on all reserved pages and also treat struct 2149 * pages for the NOMAP regions as PageReserved 2150 */ 2151 for_each_mem_region(region) { 2152 nid = memblock_get_region_node(region); 2153 start = region->base; 2154 end = start + region->size; 2155 2156 if (memblock_is_nomap(region)) 2157 reserve_bootmem_region(start, end, nid); 2158 2159 memblock_set_node(start, end, &memblock.reserved, nid); 2160 } 2161 2162 /* 2163 * initialize struct pages for reserved regions that don't have 2164 * the MEMBLOCK_RSRV_NOINIT flag set 2165 */ 2166 for_each_reserved_mem_region(region) { 2167 if (!memblock_is_reserved_noinit(region)) { 2168 nid = memblock_get_region_node(region); 2169 start = region->base; 2170 end = start + region->size; 2171 2172 if (!numa_valid_node(nid)) 2173 nid = early_pfn_to_nid(PFN_DOWN(start)); 2174 2175 reserve_bootmem_region(start, end, nid); 2176 } 2177 } 2178 } 2179 2180 static unsigned long __init free_low_memory_core_early(void) 2181 { 2182 unsigned long count = 0; 2183 phys_addr_t start, end; 2184 u64 i; 2185 2186 memblock_clear_hotplug(0, -1); 2187 2188 memmap_init_reserved_pages(); 2189 2190 /* 2191 * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id 2192 * because in some case like Node0 doesn't have RAM installed 2193 * low ram will be on Node1 2194 */ 2195 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, 2196 NULL) 2197 count += __free_memory_core(start, end); 2198 2199 return count; 2200 } 2201 2202 static int reset_managed_pages_done __initdata; 2203 2204 static void __init reset_node_managed_pages(pg_data_t *pgdat) 2205 { 2206 struct zone *z; 2207 2208 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 2209 atomic_long_set(&z->managed_pages, 0); 2210 } 2211 2212 void __init reset_all_zones_managed_pages(void) 2213 { 2214 struct pglist_data *pgdat; 2215 2216 if (reset_managed_pages_done) 2217 return; 2218 2219 for_each_online_pgdat(pgdat) 2220 reset_node_managed_pages(pgdat); 2221 2222 reset_managed_pages_done = 1; 2223 } 2224 2225 /** 2226 * memblock_free_all - release free pages to the buddy allocator 2227 */ 2228 void __init memblock_free_all(void) 2229 { 2230 unsigned long pages; 2231 2232 free_unused_memmap(); 2233 reset_all_zones_managed_pages(); 2234 2235 pages = free_low_memory_core_early(); 2236 totalram_pages_add(pages); 2237 } 2238 2239 /* Keep a table to reserve named memory */ 2240 #define RESERVE_MEM_MAX_ENTRIES 8 2241 #define RESERVE_MEM_NAME_SIZE 16 2242 struct reserve_mem_table { 2243 char name[RESERVE_MEM_NAME_SIZE]; 2244 phys_addr_t start; 2245 phys_addr_t size; 2246 }; 2247 static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES]; 2248 static int reserved_mem_count; 2249 2250 /* Add wildcard region with a lookup name */ 2251 static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size, 2252 const char *name) 2253 { 2254 struct reserve_mem_table *map; 2255 2256 map = &reserved_mem_table[reserved_mem_count++]; 2257 map->start = start; 2258 map->size = size; 2259 strscpy(map->name, name); 2260 } 2261 2262 /** 2263 * reserve_mem_find_by_name - Find reserved memory region with a given name 2264 * @name: The name that is attached to a reserved memory region 2265 * @start: If found, holds the start address 2266 * @size: If found, holds the size of the address. 2267 * 2268 * @start and @size are only updated if @name is found. 2269 * 2270 * Returns: 1 if found or 0 if not found. 2271 */ 2272 int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size) 2273 { 2274 struct reserve_mem_table *map; 2275 int i; 2276 2277 for (i = 0; i < reserved_mem_count; i++) { 2278 map = &reserved_mem_table[i]; 2279 if (!map->size) 2280 continue; 2281 if (strcmp(name, map->name) == 0) { 2282 *start = map->start; 2283 *size = map->size; 2284 return 1; 2285 } 2286 } 2287 return 0; 2288 } 2289 EXPORT_SYMBOL_GPL(reserve_mem_find_by_name); 2290 2291 /* 2292 * Parse reserve_mem=nn:align:name 2293 */ 2294 static int __init reserve_mem(char *p) 2295 { 2296 phys_addr_t start, size, align, tmp; 2297 char *name; 2298 char *oldp; 2299 int len; 2300 2301 if (!p) 2302 return -EINVAL; 2303 2304 /* Check if there's room for more reserved memory */ 2305 if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES) 2306 return -EBUSY; 2307 2308 oldp = p; 2309 size = memparse(p, &p); 2310 if (!size || p == oldp) 2311 return -EINVAL; 2312 2313 if (*p != ':') 2314 return -EINVAL; 2315 2316 align = memparse(p+1, &p); 2317 if (*p != ':') 2318 return -EINVAL; 2319 2320 /* 2321 * memblock_phys_alloc() doesn't like a zero size align, 2322 * but it is OK for this command to have it. 2323 */ 2324 if (align < SMP_CACHE_BYTES) 2325 align = SMP_CACHE_BYTES; 2326 2327 name = p + 1; 2328 len = strlen(name); 2329 2330 /* name needs to have length but not too big */ 2331 if (!len || len >= RESERVE_MEM_NAME_SIZE) 2332 return -EINVAL; 2333 2334 /* Make sure that name has text */ 2335 for (p = name; *p; p++) { 2336 if (!isspace(*p)) 2337 break; 2338 } 2339 if (!*p) 2340 return -EINVAL; 2341 2342 /* Make sure the name is not already used */ 2343 if (reserve_mem_find_by_name(name, &start, &tmp)) 2344 return -EBUSY; 2345 2346 start = memblock_phys_alloc(size, align); 2347 if (!start) 2348 return -ENOMEM; 2349 2350 reserved_mem_add(start, size, name); 2351 2352 return 1; 2353 } 2354 __setup("reserve_mem=", reserve_mem); 2355 2356 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK) 2357 static const char * const flagname[] = { 2358 [ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG", 2359 [ilog2(MEMBLOCK_MIRROR)] = "MIRROR", 2360 [ilog2(MEMBLOCK_NOMAP)] = "NOMAP", 2361 [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG", 2362 [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT", 2363 }; 2364 2365 static int memblock_debug_show(struct seq_file *m, void *private) 2366 { 2367 struct memblock_type *type = m->private; 2368 struct memblock_region *reg; 2369 int i, j, nid; 2370 unsigned int count = ARRAY_SIZE(flagname); 2371 phys_addr_t end; 2372 2373 for (i = 0; i < type->cnt; i++) { 2374 reg = &type->regions[i]; 2375 end = reg->base + reg->size - 1; 2376 nid = memblock_get_region_node(reg); 2377 2378 seq_printf(m, "%4d: ", i); 2379 seq_printf(m, "%pa..%pa ", ®->base, &end); 2380 if (numa_valid_node(nid)) 2381 seq_printf(m, "%4d ", nid); 2382 else 2383 seq_printf(m, "%4c ", 'x'); 2384 if (reg->flags) { 2385 for (j = 0; j < count; j++) { 2386 if (reg->flags & (1U << j)) { 2387 seq_printf(m, "%s\n", flagname[j]); 2388 break; 2389 } 2390 } 2391 if (j == count) 2392 seq_printf(m, "%s\n", "UNKNOWN"); 2393 } else { 2394 seq_printf(m, "%s\n", "NONE"); 2395 } 2396 } 2397 return 0; 2398 } 2399 DEFINE_SHOW_ATTRIBUTE(memblock_debug); 2400 2401 static int __init memblock_init_debugfs(void) 2402 { 2403 struct dentry *root = debugfs_create_dir("memblock", NULL); 2404 2405 debugfs_create_file("memory", 0444, root, 2406 &memblock.memory, &memblock_debug_fops); 2407 debugfs_create_file("reserved", 0444, root, 2408 &memblock.reserved, &memblock_debug_fops); 2409 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 2410 debugfs_create_file("physmem", 0444, root, &physmem, 2411 &memblock_debug_fops); 2412 #endif 2413 2414 return 0; 2415 } 2416 __initcall(memblock_init_debugfs); 2417 2418 #endif /* CONFIG_DEBUG_FS */ 2419