1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Procedures for maintaining information about logical memory blocks. 4 * 5 * Peter Bergner, IBM Corp. June 2001. 6 * Copyright (C) 2001 Peter Bergner. 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/slab.h> 11 #include <linux/init.h> 12 #include <linux/bitops.h> 13 #include <linux/poison.h> 14 #include <linux/pfn.h> 15 #include <linux/debugfs.h> 16 #include <linux/kmemleak.h> 17 #include <linux/seq_file.h> 18 #include <linux/memblock.h> 19 #include <linux/mutex.h> 20 21 #ifdef CONFIG_KEXEC_HANDOVER 22 #include <linux/libfdt.h> 23 #include <linux/kexec_handover.h> 24 #endif /* CONFIG_KEXEC_HANDOVER */ 25 26 #include <asm/sections.h> 27 #include <linux/io.h> 28 29 #include "internal.h" 30 31 #define INIT_MEMBLOCK_REGIONS 128 32 #define INIT_PHYSMEM_REGIONS 4 33 34 #ifndef INIT_MEMBLOCK_RESERVED_REGIONS 35 # define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS 36 #endif 37 38 #ifndef INIT_MEMBLOCK_MEMORY_REGIONS 39 #define INIT_MEMBLOCK_MEMORY_REGIONS INIT_MEMBLOCK_REGIONS 40 #endif 41 42 /** 43 * DOC: memblock overview 44 * 45 * Memblock is a method of managing memory regions during the early 46 * boot period when the usual kernel memory allocators are not up and 47 * running. 48 * 49 * Memblock views the system memory as collections of contiguous 50 * regions. There are several types of these collections: 51 * 52 * * ``memory`` - describes the physical memory available to the 53 * kernel; this may differ from the actual physical memory installed 54 * in the system, for instance when the memory is restricted with 55 * ``mem=`` command line parameter 56 * * ``reserved`` - describes the regions that were allocated 57 * * ``physmem`` - describes the actual physical memory available during 58 * boot regardless of the possible restrictions and memory hot(un)plug; 59 * the ``physmem`` type is only available on some architectures. 60 * 61 * Each region is represented by struct memblock_region that 62 * defines the region extents, its attributes and NUMA node id on NUMA 63 * systems. Every memory type is described by the struct memblock_type 64 * which contains an array of memory regions along with 65 * the allocator metadata. The "memory" and "reserved" types are nicely 66 * wrapped with struct memblock. This structure is statically 67 * initialized at build time. The region arrays are initially sized to 68 * %INIT_MEMBLOCK_MEMORY_REGIONS for "memory" and 69 * %INIT_MEMBLOCK_RESERVED_REGIONS for "reserved". The region array 70 * for "physmem" is initially sized to %INIT_PHYSMEM_REGIONS. 71 * The memblock_allow_resize() enables automatic resizing of the region 72 * arrays during addition of new regions. This feature should be used 73 * with care so that memory allocated for the region array will not 74 * overlap with areas that should be reserved, for example initrd. 75 * 76 * The early architecture setup should tell memblock what the physical 77 * memory layout is by using memblock_add() or memblock_add_node() 78 * functions. The first function does not assign the region to a NUMA 79 * node and it is appropriate for UMA systems. Yet, it is possible to 80 * use it on NUMA systems as well and assign the region to a NUMA node 81 * later in the setup process using memblock_set_node(). The 82 * memblock_add_node() performs such an assignment directly. 83 * 84 * Once memblock is setup the memory can be allocated using one of the 85 * API variants: 86 * 87 * * memblock_phys_alloc*() - these functions return the **physical** 88 * address of the allocated memory 89 * * memblock_alloc*() - these functions return the **virtual** address 90 * of the allocated memory. 91 * 92 * Note, that both API variants use implicit assumptions about allowed 93 * memory ranges and the fallback methods. Consult the documentation 94 * of memblock_alloc_internal() and memblock_alloc_range_nid() 95 * functions for more elaborate description. 96 * 97 * As the system boot progresses, the architecture specific mem_init() 98 * function frees all the memory to the buddy page allocator. 99 * 100 * Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the 101 * memblock data structures (except "physmem") will be discarded after the 102 * system initialization completes. 103 */ 104 105 #ifndef CONFIG_NUMA 106 struct pglist_data __refdata contig_page_data; 107 EXPORT_SYMBOL(contig_page_data); 108 #endif 109 110 unsigned long max_low_pfn; 111 unsigned long min_low_pfn; 112 unsigned long max_pfn; 113 unsigned long long max_possible_pfn; 114 115 #ifdef CONFIG_MEMBLOCK_KHO_SCRATCH 116 /* When set to true, only allocate from MEMBLOCK_KHO_SCRATCH ranges */ 117 static bool kho_scratch_only; 118 #else 119 #define kho_scratch_only false 120 #endif 121 122 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock; 123 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; 124 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 125 static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS]; 126 #endif 127 128 struct memblock memblock __initdata_memblock = { 129 .memory.regions = memblock_memory_init_regions, 130 .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS, 131 .memory.name = "memory", 132 133 .reserved.regions = memblock_reserved_init_regions, 134 .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, 135 .reserved.name = "reserved", 136 137 .bottom_up = false, 138 .current_limit = MEMBLOCK_ALLOC_ANYWHERE, 139 }; 140 141 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 142 struct memblock_type physmem = { 143 .regions = memblock_physmem_init_regions, 144 .max = INIT_PHYSMEM_REGIONS, 145 .name = "physmem", 146 }; 147 #endif 148 149 /* 150 * keep a pointer to &memblock.memory in the text section to use it in 151 * __next_mem_range() and its helpers. 152 * For architectures that do not keep memblock data after init, this 153 * pointer will be reset to NULL at memblock_discard() 154 */ 155 static __refdata struct memblock_type *memblock_memory = &memblock.memory; 156 157 #define for_each_memblock_type(i, memblock_type, rgn) \ 158 for (i = 0, rgn = &memblock_type->regions[0]; \ 159 i < memblock_type->cnt; \ 160 i++, rgn = &memblock_type->regions[i]) 161 162 #define memblock_dbg(fmt, ...) \ 163 do { \ 164 if (memblock_debug) \ 165 pr_info(fmt, ##__VA_ARGS__); \ 166 } while (0) 167 168 static int memblock_debug __initdata_memblock; 169 static bool system_has_some_mirror __initdata_memblock; 170 static int memblock_can_resize __initdata_memblock; 171 static int memblock_memory_in_slab __initdata_memblock; 172 static int memblock_reserved_in_slab __initdata_memblock; 173 174 bool __init_memblock memblock_has_mirror(void) 175 { 176 return system_has_some_mirror; 177 } 178 179 static enum memblock_flags __init_memblock choose_memblock_flags(void) 180 { 181 /* skip non-scratch memory for kho early boot allocations */ 182 if (kho_scratch_only) 183 return MEMBLOCK_KHO_SCRATCH; 184 185 return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE; 186 } 187 188 /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ 189 static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) 190 { 191 return *size = min(*size, PHYS_ADDR_MAX - base); 192 } 193 194 /* 195 * Address comparison utilities 196 */ 197 unsigned long __init_memblock 198 memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, 199 phys_addr_t size2) 200 { 201 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); 202 } 203 204 bool __init_memblock memblock_overlaps_region(struct memblock_type *type, 205 phys_addr_t base, phys_addr_t size) 206 { 207 unsigned long i; 208 209 memblock_cap_size(base, &size); 210 211 for (i = 0; i < type->cnt; i++) 212 if (memblock_addrs_overlap(base, size, type->regions[i].base, 213 type->regions[i].size)) 214 return true; 215 return false; 216 } 217 218 /** 219 * __memblock_find_range_bottom_up - find free area utility in bottom-up 220 * @start: start of candidate range 221 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 222 * %MEMBLOCK_ALLOC_ACCESSIBLE 223 * @size: size of free area to find 224 * @align: alignment of free area to find 225 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 226 * @flags: pick from blocks based on memory attributes 227 * 228 * Utility called from memblock_find_in_range_node(), find free area bottom-up. 229 * 230 * Return: 231 * Found address on success, 0 on failure. 232 */ 233 static phys_addr_t __init_memblock 234 __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, 235 phys_addr_t size, phys_addr_t align, int nid, 236 enum memblock_flags flags) 237 { 238 phys_addr_t this_start, this_end, cand; 239 u64 i; 240 241 for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) { 242 this_start = clamp(this_start, start, end); 243 this_end = clamp(this_end, start, end); 244 245 cand = round_up(this_start, align); 246 if (cand < this_end && this_end - cand >= size) 247 return cand; 248 } 249 250 return 0; 251 } 252 253 /** 254 * __memblock_find_range_top_down - find free area utility, in top-down 255 * @start: start of candidate range 256 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 257 * %MEMBLOCK_ALLOC_ACCESSIBLE 258 * @size: size of free area to find 259 * @align: alignment of free area to find 260 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 261 * @flags: pick from blocks based on memory attributes 262 * 263 * Utility called from memblock_find_in_range_node(), find free area top-down. 264 * 265 * Return: 266 * Found address on success, 0 on failure. 267 */ 268 static phys_addr_t __init_memblock 269 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, 270 phys_addr_t size, phys_addr_t align, int nid, 271 enum memblock_flags flags) 272 { 273 phys_addr_t this_start, this_end, cand; 274 u64 i; 275 276 for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end, 277 NULL) { 278 this_start = clamp(this_start, start, end); 279 this_end = clamp(this_end, start, end); 280 281 if (this_end < size) 282 continue; 283 284 cand = round_down(this_end - size, align); 285 if (cand >= this_start) 286 return cand; 287 } 288 289 return 0; 290 } 291 292 /** 293 * memblock_find_in_range_node - find free area in given range and node 294 * @size: size of free area to find 295 * @align: alignment of free area to find 296 * @start: start of candidate range 297 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 298 * %MEMBLOCK_ALLOC_ACCESSIBLE 299 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 300 * @flags: pick from blocks based on memory attributes 301 * 302 * Find @size free area aligned to @align in the specified range and node. 303 * 304 * Return: 305 * Found address on success, 0 on failure. 306 */ 307 static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, 308 phys_addr_t align, phys_addr_t start, 309 phys_addr_t end, int nid, 310 enum memblock_flags flags) 311 { 312 /* pump up @end */ 313 if (end == MEMBLOCK_ALLOC_ACCESSIBLE || 314 end == MEMBLOCK_ALLOC_NOLEAKTRACE) 315 end = memblock.current_limit; 316 317 /* avoid allocating the first page */ 318 start = max_t(phys_addr_t, start, PAGE_SIZE); 319 end = max(start, end); 320 321 if (memblock_bottom_up()) 322 return __memblock_find_range_bottom_up(start, end, size, align, 323 nid, flags); 324 else 325 return __memblock_find_range_top_down(start, end, size, align, 326 nid, flags); 327 } 328 329 /** 330 * memblock_find_in_range - find free area in given range 331 * @start: start of candidate range 332 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 333 * %MEMBLOCK_ALLOC_ACCESSIBLE 334 * @size: size of free area to find 335 * @align: alignment of free area to find 336 * 337 * Find @size free area aligned to @align in the specified range. 338 * 339 * Return: 340 * Found address on success, 0 on failure. 341 */ 342 static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, 343 phys_addr_t end, phys_addr_t size, 344 phys_addr_t align) 345 { 346 phys_addr_t ret; 347 enum memblock_flags flags = choose_memblock_flags(); 348 349 again: 350 ret = memblock_find_in_range_node(size, align, start, end, 351 NUMA_NO_NODE, flags); 352 353 if (!ret && (flags & MEMBLOCK_MIRROR)) { 354 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", 355 &size); 356 flags &= ~MEMBLOCK_MIRROR; 357 goto again; 358 } 359 360 return ret; 361 } 362 363 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) 364 { 365 type->total_size -= type->regions[r].size; 366 memmove(&type->regions[r], &type->regions[r + 1], 367 (type->cnt - (r + 1)) * sizeof(type->regions[r])); 368 type->cnt--; 369 370 /* Special case for empty arrays */ 371 if (type->cnt == 0) { 372 WARN_ON(type->total_size != 0); 373 type->regions[0].base = 0; 374 type->regions[0].size = 0; 375 type->regions[0].flags = 0; 376 memblock_set_region_node(&type->regions[0], MAX_NUMNODES); 377 } 378 } 379 380 #ifndef CONFIG_ARCH_KEEP_MEMBLOCK 381 /** 382 * memblock_discard - discard memory and reserved arrays if they were allocated 383 */ 384 void __init memblock_discard(void) 385 { 386 phys_addr_t addr, size; 387 388 if (memblock.reserved.regions != memblock_reserved_init_regions) { 389 addr = __pa(memblock.reserved.regions); 390 size = PAGE_ALIGN(sizeof(struct memblock_region) * 391 memblock.reserved.max); 392 if (memblock_reserved_in_slab) 393 kfree(memblock.reserved.regions); 394 else 395 memblock_free_late(addr, size); 396 } 397 398 if (memblock.memory.regions != memblock_memory_init_regions) { 399 addr = __pa(memblock.memory.regions); 400 size = PAGE_ALIGN(sizeof(struct memblock_region) * 401 memblock.memory.max); 402 if (memblock_memory_in_slab) 403 kfree(memblock.memory.regions); 404 else 405 memblock_free_late(addr, size); 406 } 407 408 memblock_memory = NULL; 409 } 410 #endif 411 412 /** 413 * memblock_double_array - double the size of the memblock regions array 414 * @type: memblock type of the regions array being doubled 415 * @new_area_start: starting address of memory range to avoid overlap with 416 * @new_area_size: size of memory range to avoid overlap with 417 * 418 * Double the size of the @type regions array. If memblock is being used to 419 * allocate memory for a new reserved regions array and there is a previously 420 * allocated memory range [@new_area_start, @new_area_start + @new_area_size] 421 * waiting to be reserved, ensure the memory used by the new array does 422 * not overlap. 423 * 424 * Return: 425 * 0 on success, -1 on failure. 426 */ 427 static int __init_memblock memblock_double_array(struct memblock_type *type, 428 phys_addr_t new_area_start, 429 phys_addr_t new_area_size) 430 { 431 struct memblock_region *new_array, *old_array; 432 phys_addr_t old_alloc_size, new_alloc_size; 433 phys_addr_t old_size, new_size, addr, new_end; 434 int use_slab = slab_is_available(); 435 int *in_slab; 436 437 /* We don't allow resizing until we know about the reserved regions 438 * of memory that aren't suitable for allocation 439 */ 440 if (!memblock_can_resize) 441 panic("memblock: cannot resize %s array\n", type->name); 442 443 /* Calculate new doubled size */ 444 old_size = type->max * sizeof(struct memblock_region); 445 new_size = old_size << 1; 446 /* 447 * We need to allocated new one align to PAGE_SIZE, 448 * so we can free them completely later. 449 */ 450 old_alloc_size = PAGE_ALIGN(old_size); 451 new_alloc_size = PAGE_ALIGN(new_size); 452 453 /* Retrieve the slab flag */ 454 if (type == &memblock.memory) 455 in_slab = &memblock_memory_in_slab; 456 else 457 in_slab = &memblock_reserved_in_slab; 458 459 /* Try to find some space for it */ 460 if (use_slab) { 461 new_array = kmalloc(new_size, GFP_KERNEL); 462 addr = new_array ? __pa(new_array) : 0; 463 } else { 464 /* only exclude range when trying to double reserved.regions */ 465 if (type != &memblock.reserved) 466 new_area_start = new_area_size = 0; 467 468 addr = memblock_find_in_range(new_area_start + new_area_size, 469 memblock.current_limit, 470 new_alloc_size, PAGE_SIZE); 471 if (!addr && new_area_size) 472 addr = memblock_find_in_range(0, 473 min(new_area_start, memblock.current_limit), 474 new_alloc_size, PAGE_SIZE); 475 476 if (addr) { 477 /* The memory may not have been accepted, yet. */ 478 accept_memory(addr, new_alloc_size); 479 480 new_array = __va(addr); 481 } else { 482 new_array = NULL; 483 } 484 } 485 if (!addr) { 486 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", 487 type->name, type->max, type->max * 2); 488 return -1; 489 } 490 491 new_end = addr + new_size - 1; 492 memblock_dbg("memblock: %s is doubled to %ld at [%pa-%pa]", 493 type->name, type->max * 2, &addr, &new_end); 494 495 /* 496 * Found space, we now need to move the array over before we add the 497 * reserved region since it may be our reserved array itself that is 498 * full. 499 */ 500 memcpy(new_array, type->regions, old_size); 501 memset(new_array + type->max, 0, old_size); 502 old_array = type->regions; 503 type->regions = new_array; 504 type->max <<= 1; 505 506 /* Free old array. We needn't free it if the array is the static one */ 507 if (*in_slab) 508 kfree(old_array); 509 else if (old_array != memblock_memory_init_regions && 510 old_array != memblock_reserved_init_regions) 511 memblock_free(old_array, old_alloc_size); 512 513 /* 514 * Reserve the new array if that comes from the memblock. Otherwise, we 515 * needn't do it 516 */ 517 if (!use_slab) 518 BUG_ON(memblock_reserve_kern(addr, new_alloc_size)); 519 520 /* Update slab flag */ 521 *in_slab = use_slab; 522 523 return 0; 524 } 525 526 /** 527 * memblock_merge_regions - merge neighboring compatible regions 528 * @type: memblock type to scan 529 * @start_rgn: start scanning from (@start_rgn - 1) 530 * @end_rgn: end scanning at (@end_rgn - 1) 531 * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn) 532 */ 533 static void __init_memblock memblock_merge_regions(struct memblock_type *type, 534 unsigned long start_rgn, 535 unsigned long end_rgn) 536 { 537 int i = 0; 538 if (start_rgn) 539 i = start_rgn - 1; 540 end_rgn = min(end_rgn, type->cnt - 1); 541 while (i < end_rgn) { 542 struct memblock_region *this = &type->regions[i]; 543 struct memblock_region *next = &type->regions[i + 1]; 544 545 if (this->base + this->size != next->base || 546 memblock_get_region_node(this) != 547 memblock_get_region_node(next) || 548 this->flags != next->flags) { 549 BUG_ON(this->base + this->size > next->base); 550 i++; 551 continue; 552 } 553 554 this->size += next->size; 555 /* move forward from next + 1, index of which is i + 2 */ 556 memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); 557 type->cnt--; 558 end_rgn--; 559 } 560 } 561 562 /** 563 * memblock_insert_region - insert new memblock region 564 * @type: memblock type to insert into 565 * @idx: index for the insertion point 566 * @base: base address of the new region 567 * @size: size of the new region 568 * @nid: node id of the new region 569 * @flags: flags of the new region 570 * 571 * Insert new memblock region [@base, @base + @size) into @type at @idx. 572 * @type must already have extra room to accommodate the new region. 573 */ 574 static void __init_memblock memblock_insert_region(struct memblock_type *type, 575 int idx, phys_addr_t base, 576 phys_addr_t size, 577 int nid, 578 enum memblock_flags flags) 579 { 580 struct memblock_region *rgn = &type->regions[idx]; 581 582 BUG_ON(type->cnt >= type->max); 583 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); 584 rgn->base = base; 585 rgn->size = size; 586 rgn->flags = flags; 587 memblock_set_region_node(rgn, nid); 588 type->cnt++; 589 type->total_size += size; 590 } 591 592 /** 593 * memblock_add_range - add new memblock region 594 * @type: memblock type to add new region into 595 * @base: base address of the new region 596 * @size: size of the new region 597 * @nid: nid of the new region 598 * @flags: flags of the new region 599 * 600 * Add new memblock region [@base, @base + @size) into @type. The new region 601 * is allowed to overlap with existing ones - overlaps don't affect already 602 * existing regions. @type is guaranteed to be minimal (all neighbouring 603 * compatible regions are merged) after the addition. 604 * 605 * Return: 606 * 0 on success, -errno on failure. 607 */ 608 static int __init_memblock memblock_add_range(struct memblock_type *type, 609 phys_addr_t base, phys_addr_t size, 610 int nid, enum memblock_flags flags) 611 { 612 bool insert = false; 613 phys_addr_t obase = base; 614 phys_addr_t end = base + memblock_cap_size(base, &size); 615 int idx, nr_new, start_rgn = -1, end_rgn; 616 struct memblock_region *rgn; 617 618 if (!size) 619 return 0; 620 621 /* special case for empty array */ 622 if (type->regions[0].size == 0) { 623 WARN_ON(type->cnt != 0 || type->total_size); 624 type->regions[0].base = base; 625 type->regions[0].size = size; 626 type->regions[0].flags = flags; 627 memblock_set_region_node(&type->regions[0], nid); 628 type->total_size = size; 629 type->cnt = 1; 630 return 0; 631 } 632 633 /* 634 * The worst case is when new range overlaps all existing regions, 635 * then we'll need type->cnt + 1 empty regions in @type. So if 636 * type->cnt * 2 + 1 is less than or equal to type->max, we know 637 * that there is enough empty regions in @type, and we can insert 638 * regions directly. 639 */ 640 if (type->cnt * 2 + 1 <= type->max) 641 insert = true; 642 643 repeat: 644 /* 645 * The following is executed twice. Once with %false @insert and 646 * then with %true. The first counts the number of regions needed 647 * to accommodate the new area. The second actually inserts them. 648 */ 649 base = obase; 650 nr_new = 0; 651 652 for_each_memblock_type(idx, type, rgn) { 653 phys_addr_t rbase = rgn->base; 654 phys_addr_t rend = rbase + rgn->size; 655 656 if (rbase >= end) 657 break; 658 if (rend <= base) 659 continue; 660 /* 661 * @rgn overlaps. If it separates the lower part of new 662 * area, insert that portion. 663 */ 664 if (rbase > base) { 665 #ifdef CONFIG_NUMA 666 WARN_ON(nid != memblock_get_region_node(rgn)); 667 #endif 668 WARN_ON(flags != MEMBLOCK_NONE && flags != rgn->flags); 669 nr_new++; 670 if (insert) { 671 if (start_rgn == -1) 672 start_rgn = idx; 673 end_rgn = idx + 1; 674 memblock_insert_region(type, idx++, base, 675 rbase - base, nid, 676 flags); 677 } 678 } 679 /* area below @rend is dealt with, forget about it */ 680 base = min(rend, end); 681 } 682 683 /* insert the remaining portion */ 684 if (base < end) { 685 nr_new++; 686 if (insert) { 687 if (start_rgn == -1) 688 start_rgn = idx; 689 end_rgn = idx + 1; 690 memblock_insert_region(type, idx, base, end - base, 691 nid, flags); 692 } 693 } 694 695 if (!nr_new) 696 return 0; 697 698 /* 699 * If this was the first round, resize array and repeat for actual 700 * insertions; otherwise, merge and return. 701 */ 702 if (!insert) { 703 while (type->cnt + nr_new > type->max) 704 if (memblock_double_array(type, obase, size) < 0) 705 return -ENOMEM; 706 insert = true; 707 goto repeat; 708 } else { 709 memblock_merge_regions(type, start_rgn, end_rgn); 710 return 0; 711 } 712 } 713 714 /** 715 * memblock_add_node - add new memblock region within a NUMA node 716 * @base: base address of the new region 717 * @size: size of the new region 718 * @nid: nid of the new region 719 * @flags: flags of the new region 720 * 721 * Add new memblock region [@base, @base + @size) to the "memory" 722 * type. See memblock_add_range() description for mode details 723 * 724 * Return: 725 * 0 on success, -errno on failure. 726 */ 727 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, 728 int nid, enum memblock_flags flags) 729 { 730 phys_addr_t end = base + size - 1; 731 732 memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, 733 &base, &end, nid, flags, (void *)_RET_IP_); 734 735 return memblock_add_range(&memblock.memory, base, size, nid, flags); 736 } 737 738 /** 739 * memblock_add - add new memblock region 740 * @base: base address of the new region 741 * @size: size of the new region 742 * 743 * Add new memblock region [@base, @base + @size) to the "memory" 744 * type. See memblock_add_range() description for mode details 745 * 746 * Return: 747 * 0 on success, -errno on failure. 748 */ 749 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) 750 { 751 phys_addr_t end = base + size - 1; 752 753 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 754 &base, &end, (void *)_RET_IP_); 755 756 return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); 757 } 758 759 /** 760 * memblock_validate_numa_coverage - check if amount of memory with 761 * no node ID assigned is less than a threshold 762 * @threshold_bytes: maximal memory size that can have unassigned node 763 * ID (in bytes). 764 * 765 * A buggy firmware may report memory that does not belong to any node. 766 * Check if amount of such memory is below @threshold_bytes. 767 * 768 * Return: true on success, false on failure. 769 */ 770 bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_bytes) 771 { 772 unsigned long nr_pages = 0; 773 unsigned long start_pfn, end_pfn, mem_size_mb; 774 int nid, i; 775 776 /* calculate lose page */ 777 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { 778 if (!numa_valid_node(nid)) 779 nr_pages += end_pfn - start_pfn; 780 } 781 782 if ((nr_pages << PAGE_SHIFT) > threshold_bytes) { 783 mem_size_mb = memblock_phys_mem_size() >> 20; 784 pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n", 785 (nr_pages << PAGE_SHIFT) >> 20, mem_size_mb); 786 return false; 787 } 788 789 return true; 790 } 791 792 793 /** 794 * memblock_isolate_range - isolate given range into disjoint memblocks 795 * @type: memblock type to isolate range for 796 * @base: base of range to isolate 797 * @size: size of range to isolate 798 * @start_rgn: out parameter for the start of isolated region 799 * @end_rgn: out parameter for the end of isolated region 800 * 801 * Walk @type and ensure that regions don't cross the boundaries defined by 802 * [@base, @base + @size). Crossing regions are split at the boundaries, 803 * which may create at most two more regions. The index of the first 804 * region inside the range is returned in *@start_rgn and the index of the 805 * first region after the range is returned in *@end_rgn. 806 * 807 * Return: 808 * 0 on success, -errno on failure. 809 */ 810 static int __init_memblock memblock_isolate_range(struct memblock_type *type, 811 phys_addr_t base, phys_addr_t size, 812 int *start_rgn, int *end_rgn) 813 { 814 phys_addr_t end = base + memblock_cap_size(base, &size); 815 int idx; 816 struct memblock_region *rgn; 817 818 *start_rgn = *end_rgn = 0; 819 820 if (!size) 821 return 0; 822 823 /* we'll create at most two more regions */ 824 while (type->cnt + 2 > type->max) 825 if (memblock_double_array(type, base, size) < 0) 826 return -ENOMEM; 827 828 for_each_memblock_type(idx, type, rgn) { 829 phys_addr_t rbase = rgn->base; 830 phys_addr_t rend = rbase + rgn->size; 831 832 if (rbase >= end) 833 break; 834 if (rend <= base) 835 continue; 836 837 if (rbase < base) { 838 /* 839 * @rgn intersects from below. Split and continue 840 * to process the next region - the new top half. 841 */ 842 rgn->base = base; 843 rgn->size -= base - rbase; 844 type->total_size -= base - rbase; 845 memblock_insert_region(type, idx, rbase, base - rbase, 846 memblock_get_region_node(rgn), 847 rgn->flags); 848 } else if (rend > end) { 849 /* 850 * @rgn intersects from above. Split and redo the 851 * current region - the new bottom half. 852 */ 853 rgn->base = end; 854 rgn->size -= end - rbase; 855 type->total_size -= end - rbase; 856 memblock_insert_region(type, idx--, rbase, end - rbase, 857 memblock_get_region_node(rgn), 858 rgn->flags); 859 } else { 860 /* @rgn is fully contained, record it */ 861 if (!*end_rgn) 862 *start_rgn = idx; 863 *end_rgn = idx + 1; 864 } 865 } 866 867 return 0; 868 } 869 870 static int __init_memblock memblock_remove_range(struct memblock_type *type, 871 phys_addr_t base, phys_addr_t size) 872 { 873 int start_rgn, end_rgn; 874 int i, ret; 875 876 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 877 if (ret) 878 return ret; 879 880 for (i = end_rgn - 1; i >= start_rgn; i--) 881 memblock_remove_region(type, i); 882 return 0; 883 } 884 885 int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) 886 { 887 phys_addr_t end = base + size - 1; 888 889 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 890 &base, &end, (void *)_RET_IP_); 891 892 return memblock_remove_range(&memblock.memory, base, size); 893 } 894 895 /** 896 * memblock_free - free boot memory allocation 897 * @ptr: starting address of the boot memory allocation 898 * @size: size of the boot memory block in bytes 899 * 900 * Free boot memory block previously allocated by memblock_alloc_xx() API. 901 * The freeing memory will not be released to the buddy allocator. 902 */ 903 void __init_memblock memblock_free(void *ptr, size_t size) 904 { 905 if (ptr) 906 memblock_phys_free(__pa(ptr), size); 907 } 908 909 /** 910 * memblock_phys_free - free boot memory block 911 * @base: phys starting address of the boot memory block 912 * @size: size of the boot memory block in bytes 913 * 914 * Free boot memory block previously allocated by memblock_phys_alloc_xx() API. 915 * The freeing memory will not be released to the buddy allocator. 916 */ 917 int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) 918 { 919 phys_addr_t end = base + size - 1; 920 921 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 922 &base, &end, (void *)_RET_IP_); 923 924 kmemleak_free_part_phys(base, size); 925 return memblock_remove_range(&memblock.reserved, base, size); 926 } 927 928 int __init_memblock __memblock_reserve(phys_addr_t base, phys_addr_t size, 929 int nid, enum memblock_flags flags) 930 { 931 phys_addr_t end = base + size - 1; 932 933 memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, 934 &base, &end, nid, flags, (void *)_RET_IP_); 935 936 return memblock_add_range(&memblock.reserved, base, size, nid, flags); 937 } 938 939 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 940 int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size) 941 { 942 phys_addr_t end = base + size - 1; 943 944 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 945 &base, &end, (void *)_RET_IP_); 946 947 return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0); 948 } 949 #endif 950 951 #ifdef CONFIG_MEMBLOCK_KHO_SCRATCH 952 __init void memblock_set_kho_scratch_only(void) 953 { 954 kho_scratch_only = true; 955 } 956 957 __init void memblock_clear_kho_scratch_only(void) 958 { 959 kho_scratch_only = false; 960 } 961 962 __init void memmap_init_kho_scratch_pages(void) 963 { 964 phys_addr_t start, end; 965 unsigned long pfn; 966 int nid; 967 u64 i; 968 969 if (!IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) 970 return; 971 972 /* 973 * Initialize struct pages for free scratch memory. 974 * The struct pages for reserved scratch memory will be set up in 975 * reserve_bootmem_region() 976 */ 977 __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, 978 MEMBLOCK_KHO_SCRATCH, &start, &end, &nid) { 979 for (pfn = PFN_UP(start); pfn < PFN_DOWN(end); pfn++) 980 init_deferred_page(pfn, nid); 981 } 982 } 983 #endif 984 985 /** 986 * memblock_setclr_flag - set or clear flag for a memory region 987 * @type: memblock type to set/clear flag for 988 * @base: base address of the region 989 * @size: size of the region 990 * @set: set or clear the flag 991 * @flag: the flag to update 992 * 993 * This function isolates region [@base, @base + @size), and sets/clears flag 994 * 995 * Return: 0 on success, -errno on failure. 996 */ 997 static int __init_memblock memblock_setclr_flag(struct memblock_type *type, 998 phys_addr_t base, phys_addr_t size, int set, int flag) 999 { 1000 int i, ret, start_rgn, end_rgn; 1001 1002 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 1003 if (ret) 1004 return ret; 1005 1006 for (i = start_rgn; i < end_rgn; i++) { 1007 struct memblock_region *r = &type->regions[i]; 1008 1009 if (set) 1010 r->flags |= flag; 1011 else 1012 r->flags &= ~flag; 1013 } 1014 1015 memblock_merge_regions(type, start_rgn, end_rgn); 1016 return 0; 1017 } 1018 1019 /** 1020 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG. 1021 * @base: the base phys addr of the region 1022 * @size: the size of the region 1023 * 1024 * Return: 0 on success, -errno on failure. 1025 */ 1026 int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size) 1027 { 1028 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_HOTPLUG); 1029 } 1030 1031 /** 1032 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region. 1033 * @base: the base phys addr of the region 1034 * @size: the size of the region 1035 * 1036 * Return: 0 on success, -errno on failure. 1037 */ 1038 int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size) 1039 { 1040 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_HOTPLUG); 1041 } 1042 1043 /** 1044 * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR. 1045 * @base: the base phys addr of the region 1046 * @size: the size of the region 1047 * 1048 * Return: 0 on success, -errno on failure. 1049 */ 1050 int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) 1051 { 1052 if (!mirrored_kernelcore) 1053 return 0; 1054 1055 system_has_some_mirror = true; 1056 1057 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_MIRROR); 1058 } 1059 1060 /** 1061 * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP. 1062 * @base: the base phys addr of the region 1063 * @size: the size of the region 1064 * 1065 * The memory regions marked with %MEMBLOCK_NOMAP will not be added to the 1066 * direct mapping of the physical memory. These regions will still be 1067 * covered by the memory map. The struct page representing NOMAP memory 1068 * frames in the memory map will be PageReserved() 1069 * 1070 * Note: if the memory being marked %MEMBLOCK_NOMAP was allocated from 1071 * memblock, the caller must inform kmemleak to ignore that memory 1072 * 1073 * Return: 0 on success, -errno on failure. 1074 */ 1075 int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) 1076 { 1077 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_NOMAP); 1078 } 1079 1080 /** 1081 * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region. 1082 * @base: the base phys addr of the region 1083 * @size: the size of the region 1084 * 1085 * Return: 0 on success, -errno on failure. 1086 */ 1087 int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) 1088 { 1089 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_NOMAP); 1090 } 1091 1092 /** 1093 * memblock_reserved_mark_noinit - Mark a reserved memory region with flag 1094 * MEMBLOCK_RSRV_NOINIT which results in the struct pages not being initialized 1095 * for this region. 1096 * @base: the base phys addr of the region 1097 * @size: the size of the region 1098 * 1099 * struct pages will not be initialized for reserved memory regions marked with 1100 * %MEMBLOCK_RSRV_NOINIT. 1101 * 1102 * Return: 0 on success, -errno on failure. 1103 */ 1104 int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size) 1105 { 1106 return memblock_setclr_flag(&memblock.reserved, base, size, 1, 1107 MEMBLOCK_RSRV_NOINIT); 1108 } 1109 1110 /** 1111 * memblock_mark_kho_scratch - Mark a memory region as MEMBLOCK_KHO_SCRATCH. 1112 * @base: the base phys addr of the region 1113 * @size: the size of the region 1114 * 1115 * Only memory regions marked with %MEMBLOCK_KHO_SCRATCH will be considered 1116 * for allocations during early boot with kexec handover. 1117 * 1118 * Return: 0 on success, -errno on failure. 1119 */ 1120 __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size) 1121 { 1122 return memblock_setclr_flag(&memblock.memory, base, size, 1, 1123 MEMBLOCK_KHO_SCRATCH); 1124 } 1125 1126 /** 1127 * memblock_clear_kho_scratch - Clear MEMBLOCK_KHO_SCRATCH flag for a 1128 * specified region. 1129 * @base: the base phys addr of the region 1130 * @size: the size of the region 1131 * 1132 * Return: 0 on success, -errno on failure. 1133 */ 1134 __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size) 1135 { 1136 return memblock_setclr_flag(&memblock.memory, base, size, 0, 1137 MEMBLOCK_KHO_SCRATCH); 1138 } 1139 1140 static bool should_skip_region(struct memblock_type *type, 1141 struct memblock_region *m, 1142 int nid, int flags) 1143 { 1144 int m_nid = memblock_get_region_node(m); 1145 1146 /* we never skip regions when iterating memblock.reserved or physmem */ 1147 if (type != memblock_memory) 1148 return false; 1149 1150 /* only memory regions are associated with nodes, check it */ 1151 if (numa_valid_node(nid) && nid != m_nid) 1152 return true; 1153 1154 /* skip hotpluggable memory regions if needed */ 1155 if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && 1156 !(flags & MEMBLOCK_HOTPLUG)) 1157 return true; 1158 1159 /* if we want mirror memory skip non-mirror memory regions */ 1160 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) 1161 return true; 1162 1163 /* skip nomap memory unless we were asked for it explicitly */ 1164 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) 1165 return true; 1166 1167 /* skip driver-managed memory unless we were asked for it explicitly */ 1168 if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m)) 1169 return true; 1170 1171 /* 1172 * In early alloc during kexec handover, we can only consider 1173 * MEMBLOCK_KHO_SCRATCH regions for the allocations 1174 */ 1175 if ((flags & MEMBLOCK_KHO_SCRATCH) && !memblock_is_kho_scratch(m)) 1176 return true; 1177 1178 return false; 1179 } 1180 1181 /** 1182 * __next_mem_range - next function for for_each_free_mem_range() etc. 1183 * @idx: pointer to u64 loop variable 1184 * @nid: node selector, %NUMA_NO_NODE for all nodes 1185 * @flags: pick from blocks based on memory attributes 1186 * @type_a: pointer to memblock_type from where the range is taken 1187 * @type_b: pointer to memblock_type which excludes memory from being taken 1188 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 1189 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 1190 * @out_nid: ptr to int for nid of the range, can be %NULL 1191 * 1192 * Find the first area from *@idx which matches @nid, fill the out 1193 * parameters, and update *@idx for the next iteration. The lower 32bit of 1194 * *@idx contains index into type_a and the upper 32bit indexes the 1195 * areas before each region in type_b. For example, if type_b regions 1196 * look like the following, 1197 * 1198 * 0:[0-16), 1:[32-48), 2:[128-130) 1199 * 1200 * The upper 32bit indexes the following regions. 1201 * 1202 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) 1203 * 1204 * As both region arrays are sorted, the function advances the two indices 1205 * in lockstep and returns each intersection. 1206 */ 1207 void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, 1208 struct memblock_type *type_a, 1209 struct memblock_type *type_b, phys_addr_t *out_start, 1210 phys_addr_t *out_end, int *out_nid) 1211 { 1212 int idx_a = *idx & 0xffffffff; 1213 int idx_b = *idx >> 32; 1214 1215 for (; idx_a < type_a->cnt; idx_a++) { 1216 struct memblock_region *m = &type_a->regions[idx_a]; 1217 1218 phys_addr_t m_start = m->base; 1219 phys_addr_t m_end = m->base + m->size; 1220 int m_nid = memblock_get_region_node(m); 1221 1222 if (should_skip_region(type_a, m, nid, flags)) 1223 continue; 1224 1225 if (!type_b) { 1226 if (out_start) 1227 *out_start = m_start; 1228 if (out_end) 1229 *out_end = m_end; 1230 if (out_nid) 1231 *out_nid = m_nid; 1232 idx_a++; 1233 *idx = (u32)idx_a | (u64)idx_b << 32; 1234 return; 1235 } 1236 1237 /* scan areas before each reservation */ 1238 for (; idx_b < type_b->cnt + 1; idx_b++) { 1239 struct memblock_region *r; 1240 phys_addr_t r_start; 1241 phys_addr_t r_end; 1242 1243 r = &type_b->regions[idx_b]; 1244 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1245 r_end = idx_b < type_b->cnt ? 1246 r->base : PHYS_ADDR_MAX; 1247 1248 /* 1249 * if idx_b advanced past idx_a, 1250 * break out to advance idx_a 1251 */ 1252 if (r_start >= m_end) 1253 break; 1254 /* if the two regions intersect, we're done */ 1255 if (m_start < r_end) { 1256 if (out_start) 1257 *out_start = 1258 max(m_start, r_start); 1259 if (out_end) 1260 *out_end = min(m_end, r_end); 1261 if (out_nid) 1262 *out_nid = m_nid; 1263 /* 1264 * The region which ends first is 1265 * advanced for the next iteration. 1266 */ 1267 if (m_end <= r_end) 1268 idx_a++; 1269 else 1270 idx_b++; 1271 *idx = (u32)idx_a | (u64)idx_b << 32; 1272 return; 1273 } 1274 } 1275 } 1276 1277 /* signal end of iteration */ 1278 *idx = ULLONG_MAX; 1279 } 1280 1281 /** 1282 * __next_mem_range_rev - generic next function for for_each_*_range_rev() 1283 * 1284 * @idx: pointer to u64 loop variable 1285 * @nid: node selector, %NUMA_NO_NODE for all nodes 1286 * @flags: pick from blocks based on memory attributes 1287 * @type_a: pointer to memblock_type from where the range is taken 1288 * @type_b: pointer to memblock_type which excludes memory from being taken 1289 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 1290 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 1291 * @out_nid: ptr to int for nid of the range, can be %NULL 1292 * 1293 * Finds the next range from type_a which is not marked as unsuitable 1294 * in type_b. 1295 * 1296 * Reverse of __next_mem_range(). 1297 */ 1298 void __init_memblock __next_mem_range_rev(u64 *idx, int nid, 1299 enum memblock_flags flags, 1300 struct memblock_type *type_a, 1301 struct memblock_type *type_b, 1302 phys_addr_t *out_start, 1303 phys_addr_t *out_end, int *out_nid) 1304 { 1305 int idx_a = *idx & 0xffffffff; 1306 int idx_b = *idx >> 32; 1307 1308 if (*idx == (u64)ULLONG_MAX) { 1309 idx_a = type_a->cnt - 1; 1310 if (type_b != NULL) 1311 idx_b = type_b->cnt; 1312 else 1313 idx_b = 0; 1314 } 1315 1316 for (; idx_a >= 0; idx_a--) { 1317 struct memblock_region *m = &type_a->regions[idx_a]; 1318 1319 phys_addr_t m_start = m->base; 1320 phys_addr_t m_end = m->base + m->size; 1321 int m_nid = memblock_get_region_node(m); 1322 1323 if (should_skip_region(type_a, m, nid, flags)) 1324 continue; 1325 1326 if (!type_b) { 1327 if (out_start) 1328 *out_start = m_start; 1329 if (out_end) 1330 *out_end = m_end; 1331 if (out_nid) 1332 *out_nid = m_nid; 1333 idx_a--; 1334 *idx = (u32)idx_a | (u64)idx_b << 32; 1335 return; 1336 } 1337 1338 /* scan areas before each reservation */ 1339 for (; idx_b >= 0; idx_b--) { 1340 struct memblock_region *r; 1341 phys_addr_t r_start; 1342 phys_addr_t r_end; 1343 1344 r = &type_b->regions[idx_b]; 1345 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1346 r_end = idx_b < type_b->cnt ? 1347 r->base : PHYS_ADDR_MAX; 1348 /* 1349 * if idx_b advanced past idx_a, 1350 * break out to advance idx_a 1351 */ 1352 1353 if (r_end <= m_start) 1354 break; 1355 /* if the two regions intersect, we're done */ 1356 if (m_end > r_start) { 1357 if (out_start) 1358 *out_start = max(m_start, r_start); 1359 if (out_end) 1360 *out_end = min(m_end, r_end); 1361 if (out_nid) 1362 *out_nid = m_nid; 1363 if (m_start >= r_start) 1364 idx_a--; 1365 else 1366 idx_b--; 1367 *idx = (u32)idx_a | (u64)idx_b << 32; 1368 return; 1369 } 1370 } 1371 } 1372 /* signal end of iteration */ 1373 *idx = ULLONG_MAX; 1374 } 1375 1376 /* 1377 * Common iterator interface used to define for_each_mem_pfn_range(). 1378 */ 1379 void __init_memblock __next_mem_pfn_range(int *idx, int nid, 1380 unsigned long *out_start_pfn, 1381 unsigned long *out_end_pfn, int *out_nid) 1382 { 1383 struct memblock_type *type = &memblock.memory; 1384 struct memblock_region *r; 1385 int r_nid; 1386 1387 while (++*idx < type->cnt) { 1388 r = &type->regions[*idx]; 1389 r_nid = memblock_get_region_node(r); 1390 1391 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) 1392 continue; 1393 if (!numa_valid_node(nid) || nid == r_nid) 1394 break; 1395 } 1396 if (*idx >= type->cnt) { 1397 *idx = -1; 1398 return; 1399 } 1400 1401 if (out_start_pfn) 1402 *out_start_pfn = PFN_UP(r->base); 1403 if (out_end_pfn) 1404 *out_end_pfn = PFN_DOWN(r->base + r->size); 1405 if (out_nid) 1406 *out_nid = r_nid; 1407 } 1408 1409 /** 1410 * memblock_set_node - set node ID on memblock regions 1411 * @base: base of area to set node ID for 1412 * @size: size of area to set node ID for 1413 * @type: memblock type to set node ID for 1414 * @nid: node ID to set 1415 * 1416 * Set the nid of memblock @type regions in [@base, @base + @size) to @nid. 1417 * Regions which cross the area boundaries are split as necessary. 1418 * 1419 * Return: 1420 * 0 on success, -errno on failure. 1421 */ 1422 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, 1423 struct memblock_type *type, int nid) 1424 { 1425 #ifdef CONFIG_NUMA 1426 int start_rgn, end_rgn; 1427 int i, ret; 1428 1429 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 1430 if (ret) 1431 return ret; 1432 1433 for (i = start_rgn; i < end_rgn; i++) 1434 memblock_set_region_node(&type->regions[i], nid); 1435 1436 memblock_merge_regions(type, start_rgn, end_rgn); 1437 #endif 1438 return 0; 1439 } 1440 1441 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 1442 /** 1443 * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone() 1444 * 1445 * @idx: pointer to u64 loop variable 1446 * @zone: zone in which all of the memory blocks reside 1447 * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL 1448 * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL 1449 * 1450 * This function is meant to be a zone/pfn specific wrapper for the 1451 * for_each_mem_range type iterators. Specifically they are used in the 1452 * deferred memory init routines and as such we were duplicating much of 1453 * this logic throughout the code. So instead of having it in multiple 1454 * locations it seemed like it would make more sense to centralize this to 1455 * one new iterator that does everything they need. 1456 */ 1457 void __init_memblock 1458 __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, 1459 unsigned long *out_spfn, unsigned long *out_epfn) 1460 { 1461 int zone_nid = zone_to_nid(zone); 1462 phys_addr_t spa, epa; 1463 1464 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1465 &memblock.memory, &memblock.reserved, 1466 &spa, &epa, NULL); 1467 1468 while (*idx != U64_MAX) { 1469 unsigned long epfn = PFN_DOWN(epa); 1470 unsigned long spfn = PFN_UP(spa); 1471 1472 /* 1473 * Verify the end is at least past the start of the zone and 1474 * that we have at least one PFN to initialize. 1475 */ 1476 if (zone->zone_start_pfn < epfn && spfn < epfn) { 1477 /* if we went too far just stop searching */ 1478 if (zone_end_pfn(zone) <= spfn) { 1479 *idx = U64_MAX; 1480 break; 1481 } 1482 1483 if (out_spfn) 1484 *out_spfn = max(zone->zone_start_pfn, spfn); 1485 if (out_epfn) 1486 *out_epfn = min(zone_end_pfn(zone), epfn); 1487 1488 return; 1489 } 1490 1491 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1492 &memblock.memory, &memblock.reserved, 1493 &spa, &epa, NULL); 1494 } 1495 1496 /* signal end of iteration */ 1497 if (out_spfn) 1498 *out_spfn = ULONG_MAX; 1499 if (out_epfn) 1500 *out_epfn = 0; 1501 } 1502 1503 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 1504 1505 /** 1506 * memblock_alloc_range_nid - allocate boot memory block 1507 * @size: size of memory block to be allocated in bytes 1508 * @align: alignment of the region and block's size 1509 * @start: the lower bound of the memory region to allocate (phys address) 1510 * @end: the upper bound of the memory region to allocate (phys address) 1511 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1512 * @exact_nid: control the allocation fall back to other nodes 1513 * 1514 * The allocation is performed from memory region limited by 1515 * memblock.current_limit if @end == %MEMBLOCK_ALLOC_ACCESSIBLE. 1516 * 1517 * If the specified node can not hold the requested memory and @exact_nid 1518 * is false, the allocation falls back to any node in the system. 1519 * 1520 * For systems with memory mirroring, the allocation is attempted first 1521 * from the regions with mirroring enabled and then retried from any 1522 * memory region. 1523 * 1524 * In addition, function using kmemleak_alloc_phys for allocated boot 1525 * memory block, it is never reported as leaks. 1526 * 1527 * Return: 1528 * Physical address of allocated memory block on success, %0 on failure. 1529 */ 1530 phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, 1531 phys_addr_t align, phys_addr_t start, 1532 phys_addr_t end, int nid, 1533 bool exact_nid) 1534 { 1535 enum memblock_flags flags = choose_memblock_flags(); 1536 phys_addr_t found; 1537 1538 /* 1539 * Detect any accidental use of these APIs after slab is ready, as at 1540 * this moment memblock may be deinitialized already and its 1541 * internal data may be destroyed (after execution of memblock_free_all) 1542 */ 1543 if (WARN_ON_ONCE(slab_is_available())) { 1544 void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid); 1545 1546 return vaddr ? virt_to_phys(vaddr) : 0; 1547 } 1548 1549 if (!align) { 1550 /* Can't use WARNs this early in boot on powerpc */ 1551 dump_stack(); 1552 align = SMP_CACHE_BYTES; 1553 } 1554 1555 again: 1556 found = memblock_find_in_range_node(size, align, start, end, nid, 1557 flags); 1558 if (found && !__memblock_reserve(found, size, nid, MEMBLOCK_RSRV_KERN)) 1559 goto done; 1560 1561 if (numa_valid_node(nid) && !exact_nid) { 1562 found = memblock_find_in_range_node(size, align, start, 1563 end, NUMA_NO_NODE, 1564 flags); 1565 if (found && !memblock_reserve_kern(found, size)) 1566 goto done; 1567 } 1568 1569 if (flags & MEMBLOCK_MIRROR) { 1570 flags &= ~MEMBLOCK_MIRROR; 1571 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", 1572 &size); 1573 goto again; 1574 } 1575 1576 return 0; 1577 1578 done: 1579 /* 1580 * Skip kmemleak for those places like kasan_init() and 1581 * early_pgtable_alloc() due to high volume. 1582 */ 1583 if (end != MEMBLOCK_ALLOC_NOLEAKTRACE) 1584 /* 1585 * Memblock allocated blocks are never reported as 1586 * leaks. This is because many of these blocks are 1587 * only referred via the physical address which is 1588 * not looked up by kmemleak. 1589 */ 1590 kmemleak_alloc_phys(found, size, 0); 1591 1592 /* 1593 * Some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP, 1594 * require memory to be accepted before it can be used by the 1595 * guest. 1596 * 1597 * Accept the memory of the allocated buffer. 1598 */ 1599 accept_memory(found, size); 1600 1601 return found; 1602 } 1603 1604 /** 1605 * memblock_phys_alloc_range - allocate a memory block inside specified range 1606 * @size: size of memory block to be allocated in bytes 1607 * @align: alignment of the region and block's size 1608 * @start: the lower bound of the memory region to allocate (physical address) 1609 * @end: the upper bound of the memory region to allocate (physical address) 1610 * 1611 * Allocate @size bytes in the between @start and @end. 1612 * 1613 * Return: physical address of the allocated memory block on success, 1614 * %0 on failure. 1615 */ 1616 phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size, 1617 phys_addr_t align, 1618 phys_addr_t start, 1619 phys_addr_t end) 1620 { 1621 memblock_dbg("%s: %llu bytes align=0x%llx from=%pa max_addr=%pa %pS\n", 1622 __func__, (u64)size, (u64)align, &start, &end, 1623 (void *)_RET_IP_); 1624 return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE, 1625 false); 1626 } 1627 1628 /** 1629 * memblock_phys_alloc_try_nid - allocate a memory block from specified NUMA node 1630 * @size: size of memory block to be allocated in bytes 1631 * @align: alignment of the region and block's size 1632 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1633 * 1634 * Allocates memory block from the specified NUMA node. If the node 1635 * has no available memory, attempts to allocated from any node in the 1636 * system. 1637 * 1638 * Return: physical address of the allocated memory block on success, 1639 * %0 on failure. 1640 */ 1641 phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) 1642 { 1643 return memblock_alloc_range_nid(size, align, 0, 1644 MEMBLOCK_ALLOC_ACCESSIBLE, nid, false); 1645 } 1646 1647 /** 1648 * memblock_alloc_internal - allocate boot memory block 1649 * @size: size of memory block to be allocated in bytes 1650 * @align: alignment of the region and block's size 1651 * @min_addr: the lower bound of the memory region to allocate (phys address) 1652 * @max_addr: the upper bound of the memory region to allocate (phys address) 1653 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1654 * @exact_nid: control the allocation fall back to other nodes 1655 * 1656 * Allocates memory block using memblock_alloc_range_nid() and 1657 * converts the returned physical address to virtual. 1658 * 1659 * The @min_addr limit is dropped if it can not be satisfied and the allocation 1660 * will fall back to memory below @min_addr. Other constraints, such 1661 * as node and mirrored memory will be handled again in 1662 * memblock_alloc_range_nid(). 1663 * 1664 * Return: 1665 * Virtual address of allocated memory block on success, NULL on failure. 1666 */ 1667 static void * __init memblock_alloc_internal( 1668 phys_addr_t size, phys_addr_t align, 1669 phys_addr_t min_addr, phys_addr_t max_addr, 1670 int nid, bool exact_nid) 1671 { 1672 phys_addr_t alloc; 1673 1674 1675 if (max_addr > memblock.current_limit) 1676 max_addr = memblock.current_limit; 1677 1678 alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid, 1679 exact_nid); 1680 1681 /* retry allocation without lower limit */ 1682 if (!alloc && min_addr) 1683 alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid, 1684 exact_nid); 1685 1686 if (!alloc) 1687 return NULL; 1688 1689 return phys_to_virt(alloc); 1690 } 1691 1692 /** 1693 * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node 1694 * without zeroing memory 1695 * @size: size of memory block to be allocated in bytes 1696 * @align: alignment of the region and block's size 1697 * @min_addr: the lower bound of the memory region from where the allocation 1698 * is preferred (phys address) 1699 * @max_addr: the upper bound of the memory region from where the allocation 1700 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1701 * allocate only from memory limited by memblock.current_limit value 1702 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1703 * 1704 * Public function, provides additional debug information (including caller 1705 * info), if enabled. Does not zero allocated memory. 1706 * 1707 * Return: 1708 * Virtual address of allocated memory block on success, NULL on failure. 1709 */ 1710 void * __init memblock_alloc_exact_nid_raw( 1711 phys_addr_t size, phys_addr_t align, 1712 phys_addr_t min_addr, phys_addr_t max_addr, 1713 int nid) 1714 { 1715 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1716 __func__, (u64)size, (u64)align, nid, &min_addr, 1717 &max_addr, (void *)_RET_IP_); 1718 1719 return memblock_alloc_internal(size, align, min_addr, max_addr, nid, 1720 true); 1721 } 1722 1723 /** 1724 * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing 1725 * memory and without panicking 1726 * @size: size of memory block to be allocated in bytes 1727 * @align: alignment of the region and block's size 1728 * @min_addr: the lower bound of the memory region from where the allocation 1729 * is preferred (phys address) 1730 * @max_addr: the upper bound of the memory region from where the allocation 1731 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1732 * allocate only from memory limited by memblock.current_limit value 1733 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1734 * 1735 * Public function, provides additional debug information (including caller 1736 * info), if enabled. Does not zero allocated memory, does not panic if request 1737 * cannot be satisfied. 1738 * 1739 * Return: 1740 * Virtual address of allocated memory block on success, NULL on failure. 1741 */ 1742 void * __init memblock_alloc_try_nid_raw( 1743 phys_addr_t size, phys_addr_t align, 1744 phys_addr_t min_addr, phys_addr_t max_addr, 1745 int nid) 1746 { 1747 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1748 __func__, (u64)size, (u64)align, nid, &min_addr, 1749 &max_addr, (void *)_RET_IP_); 1750 1751 return memblock_alloc_internal(size, align, min_addr, max_addr, nid, 1752 false); 1753 } 1754 1755 /** 1756 * memblock_alloc_try_nid - allocate boot memory block 1757 * @size: size of memory block to be allocated in bytes 1758 * @align: alignment of the region and block's size 1759 * @min_addr: the lower bound of the memory region from where the allocation 1760 * is preferred (phys address) 1761 * @max_addr: the upper bound of the memory region from where the allocation 1762 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1763 * allocate only from memory limited by memblock.current_limit value 1764 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1765 * 1766 * Public function, provides additional debug information (including caller 1767 * info), if enabled. This function zeroes the allocated memory. 1768 * 1769 * Return: 1770 * Virtual address of allocated memory block on success, NULL on failure. 1771 */ 1772 void * __init memblock_alloc_try_nid( 1773 phys_addr_t size, phys_addr_t align, 1774 phys_addr_t min_addr, phys_addr_t max_addr, 1775 int nid) 1776 { 1777 void *ptr; 1778 1779 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1780 __func__, (u64)size, (u64)align, nid, &min_addr, 1781 &max_addr, (void *)_RET_IP_); 1782 ptr = memblock_alloc_internal(size, align, 1783 min_addr, max_addr, nid, false); 1784 if (ptr) 1785 memset(ptr, 0, size); 1786 1787 return ptr; 1788 } 1789 1790 /** 1791 * __memblock_alloc_or_panic - Try to allocate memory and panic on failure 1792 * @size: size of memory block to be allocated in bytes 1793 * @align: alignment of the region and block's size 1794 * @func: caller func name 1795 * 1796 * This function attempts to allocate memory using memblock_alloc, 1797 * and in case of failure, it calls panic with the formatted message. 1798 * This function should not be used directly, please use the macro memblock_alloc_or_panic. 1799 */ 1800 void *__init __memblock_alloc_or_panic(phys_addr_t size, phys_addr_t align, 1801 const char *func) 1802 { 1803 void *addr = memblock_alloc(size, align); 1804 1805 if (unlikely(!addr)) 1806 panic("%s: Failed to allocate %pap bytes\n", func, &size); 1807 return addr; 1808 } 1809 1810 /** 1811 * memblock_free_late - free pages directly to buddy allocator 1812 * @base: phys starting address of the boot memory block 1813 * @size: size of the boot memory block in bytes 1814 * 1815 * This is only useful when the memblock allocator has already been torn 1816 * down, but we are still initializing the system. Pages are released directly 1817 * to the buddy allocator. 1818 */ 1819 void __init memblock_free_late(phys_addr_t base, phys_addr_t size) 1820 { 1821 phys_addr_t cursor, end; 1822 1823 end = base + size - 1; 1824 memblock_dbg("%s: [%pa-%pa] %pS\n", 1825 __func__, &base, &end, (void *)_RET_IP_); 1826 kmemleak_free_part_phys(base, size); 1827 cursor = PFN_UP(base); 1828 end = PFN_DOWN(base + size); 1829 1830 for (; cursor < end; cursor++) { 1831 memblock_free_pages(pfn_to_page(cursor), cursor, 0); 1832 totalram_pages_inc(); 1833 } 1834 } 1835 1836 /* 1837 * Remaining API functions 1838 */ 1839 1840 phys_addr_t __init_memblock memblock_phys_mem_size(void) 1841 { 1842 return memblock.memory.total_size; 1843 } 1844 1845 phys_addr_t __init_memblock memblock_reserved_size(void) 1846 { 1847 return memblock.reserved.total_size; 1848 } 1849 1850 phys_addr_t __init_memblock memblock_reserved_kern_size(phys_addr_t limit, int nid) 1851 { 1852 struct memblock_region *r; 1853 phys_addr_t total = 0; 1854 1855 for_each_reserved_mem_region(r) { 1856 phys_addr_t size = r->size; 1857 1858 if (r->base > limit) 1859 break; 1860 1861 if (r->base + r->size > limit) 1862 size = limit - r->base; 1863 1864 if (nid == memblock_get_region_node(r) || !numa_valid_node(nid)) 1865 if (r->flags & MEMBLOCK_RSRV_KERN) 1866 total += size; 1867 } 1868 1869 return total; 1870 } 1871 1872 /** 1873 * memblock_estimated_nr_free_pages - return estimated number of free pages 1874 * from memblock point of view 1875 * 1876 * During bootup, subsystems might need a rough estimate of the number of free 1877 * pages in the whole system, before precise numbers are available from the 1878 * buddy. Especially with CONFIG_DEFERRED_STRUCT_PAGE_INIT, the numbers 1879 * obtained from the buddy might be very imprecise during bootup. 1880 * 1881 * Return: 1882 * An estimated number of free pages from memblock point of view. 1883 */ 1884 unsigned long __init memblock_estimated_nr_free_pages(void) 1885 { 1886 return PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size()); 1887 } 1888 1889 /* lowest address */ 1890 phys_addr_t __init_memblock memblock_start_of_DRAM(void) 1891 { 1892 return memblock.memory.regions[0].base; 1893 } 1894 1895 phys_addr_t __init_memblock memblock_end_of_DRAM(void) 1896 { 1897 int idx = memblock.memory.cnt - 1; 1898 1899 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); 1900 } 1901 1902 static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) 1903 { 1904 phys_addr_t max_addr = PHYS_ADDR_MAX; 1905 struct memblock_region *r; 1906 1907 /* 1908 * translate the memory @limit size into the max address within one of 1909 * the memory memblock regions, if the @limit exceeds the total size 1910 * of those regions, max_addr will keep original value PHYS_ADDR_MAX 1911 */ 1912 for_each_mem_region(r) { 1913 if (limit <= r->size) { 1914 max_addr = r->base + limit; 1915 break; 1916 } 1917 limit -= r->size; 1918 } 1919 1920 return max_addr; 1921 } 1922 1923 void __init memblock_enforce_memory_limit(phys_addr_t limit) 1924 { 1925 phys_addr_t max_addr; 1926 1927 if (!limit) 1928 return; 1929 1930 max_addr = __find_max_addr(limit); 1931 1932 /* @limit exceeds the total size of the memory, do nothing */ 1933 if (max_addr == PHYS_ADDR_MAX) 1934 return; 1935 1936 /* truncate both memory and reserved regions */ 1937 memblock_remove_range(&memblock.memory, max_addr, 1938 PHYS_ADDR_MAX); 1939 memblock_remove_range(&memblock.reserved, max_addr, 1940 PHYS_ADDR_MAX); 1941 } 1942 1943 void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) 1944 { 1945 int start_rgn, end_rgn; 1946 int i, ret; 1947 1948 if (!size) 1949 return; 1950 1951 if (!memblock_memory->total_size) { 1952 pr_warn("%s: No memory registered yet\n", __func__); 1953 return; 1954 } 1955 1956 ret = memblock_isolate_range(&memblock.memory, base, size, 1957 &start_rgn, &end_rgn); 1958 if (ret) 1959 return; 1960 1961 /* remove all the MAP regions */ 1962 for (i = memblock.memory.cnt - 1; i >= end_rgn; i--) 1963 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1964 memblock_remove_region(&memblock.memory, i); 1965 1966 for (i = start_rgn - 1; i >= 0; i--) 1967 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1968 memblock_remove_region(&memblock.memory, i); 1969 1970 /* truncate the reserved regions */ 1971 memblock_remove_range(&memblock.reserved, 0, base); 1972 memblock_remove_range(&memblock.reserved, 1973 base + size, PHYS_ADDR_MAX); 1974 } 1975 1976 void __init memblock_mem_limit_remove_map(phys_addr_t limit) 1977 { 1978 phys_addr_t max_addr; 1979 1980 if (!limit) 1981 return; 1982 1983 max_addr = __find_max_addr(limit); 1984 1985 /* @limit exceeds the total size of the memory, do nothing */ 1986 if (max_addr == PHYS_ADDR_MAX) 1987 return; 1988 1989 memblock_cap_memory_range(0, max_addr); 1990 } 1991 1992 static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) 1993 { 1994 unsigned int left = 0, right = type->cnt; 1995 1996 do { 1997 unsigned int mid = (right + left) / 2; 1998 1999 if (addr < type->regions[mid].base) 2000 right = mid; 2001 else if (addr >= (type->regions[mid].base + 2002 type->regions[mid].size)) 2003 left = mid + 1; 2004 else 2005 return mid; 2006 } while (left < right); 2007 return -1; 2008 } 2009 2010 bool __init_memblock memblock_is_reserved(phys_addr_t addr) 2011 { 2012 return memblock_search(&memblock.reserved, addr) != -1; 2013 } 2014 2015 bool __init_memblock memblock_is_memory(phys_addr_t addr) 2016 { 2017 return memblock_search(&memblock.memory, addr) != -1; 2018 } 2019 2020 bool __init_memblock memblock_is_map_memory(phys_addr_t addr) 2021 { 2022 int i = memblock_search(&memblock.memory, addr); 2023 2024 if (i == -1) 2025 return false; 2026 return !memblock_is_nomap(&memblock.memory.regions[i]); 2027 } 2028 2029 int __init_memblock memblock_search_pfn_nid(unsigned long pfn, 2030 unsigned long *start_pfn, unsigned long *end_pfn) 2031 { 2032 struct memblock_type *type = &memblock.memory; 2033 int mid = memblock_search(type, PFN_PHYS(pfn)); 2034 2035 if (mid == -1) 2036 return NUMA_NO_NODE; 2037 2038 *start_pfn = PFN_DOWN(type->regions[mid].base); 2039 *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); 2040 2041 return memblock_get_region_node(&type->regions[mid]); 2042 } 2043 2044 /** 2045 * memblock_is_region_memory - check if a region is a subset of memory 2046 * @base: base of region to check 2047 * @size: size of region to check 2048 * 2049 * Check if the region [@base, @base + @size) is a subset of a memory block. 2050 * 2051 * Return: 2052 * 0 if false, non-zero if true 2053 */ 2054 bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) 2055 { 2056 int idx = memblock_search(&memblock.memory, base); 2057 phys_addr_t end = base + memblock_cap_size(base, &size); 2058 2059 if (idx == -1) 2060 return false; 2061 return (memblock.memory.regions[idx].base + 2062 memblock.memory.regions[idx].size) >= end; 2063 } 2064 2065 /** 2066 * memblock_is_region_reserved - check if a region intersects reserved memory 2067 * @base: base of region to check 2068 * @size: size of region to check 2069 * 2070 * Check if the region [@base, @base + @size) intersects a reserved 2071 * memory block. 2072 * 2073 * Return: 2074 * True if they intersect, false if not. 2075 */ 2076 bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) 2077 { 2078 return memblock_overlaps_region(&memblock.reserved, base, size); 2079 } 2080 2081 void __init_memblock memblock_trim_memory(phys_addr_t align) 2082 { 2083 phys_addr_t start, end, orig_start, orig_end; 2084 struct memblock_region *r; 2085 2086 for_each_mem_region(r) { 2087 orig_start = r->base; 2088 orig_end = r->base + r->size; 2089 start = round_up(orig_start, align); 2090 end = round_down(orig_end, align); 2091 2092 if (start == orig_start && end == orig_end) 2093 continue; 2094 2095 if (start < end) { 2096 r->base = start; 2097 r->size = end - start; 2098 } else { 2099 memblock_remove_region(&memblock.memory, 2100 r - memblock.memory.regions); 2101 r--; 2102 } 2103 } 2104 } 2105 2106 void __init_memblock memblock_set_current_limit(phys_addr_t limit) 2107 { 2108 memblock.current_limit = limit; 2109 } 2110 2111 phys_addr_t __init_memblock memblock_get_current_limit(void) 2112 { 2113 return memblock.current_limit; 2114 } 2115 2116 static void __init_memblock memblock_dump(struct memblock_type *type) 2117 { 2118 phys_addr_t base, end, size; 2119 enum memblock_flags flags; 2120 int idx; 2121 struct memblock_region *rgn; 2122 2123 pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt); 2124 2125 for_each_memblock_type(idx, type, rgn) { 2126 char nid_buf[32] = ""; 2127 2128 base = rgn->base; 2129 size = rgn->size; 2130 end = base + size - 1; 2131 flags = rgn->flags; 2132 #ifdef CONFIG_NUMA 2133 if (numa_valid_node(memblock_get_region_node(rgn))) 2134 snprintf(nid_buf, sizeof(nid_buf), " on node %d", 2135 memblock_get_region_node(rgn)); 2136 #endif 2137 pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n", 2138 type->name, idx, &base, &end, &size, nid_buf, flags); 2139 } 2140 } 2141 2142 static void __init_memblock __memblock_dump_all(void) 2143 { 2144 pr_info("MEMBLOCK configuration:\n"); 2145 pr_info(" memory size = %pa reserved size = %pa\n", 2146 &memblock.memory.total_size, 2147 &memblock.reserved.total_size); 2148 2149 memblock_dump(&memblock.memory); 2150 memblock_dump(&memblock.reserved); 2151 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 2152 memblock_dump(&physmem); 2153 #endif 2154 } 2155 2156 void __init_memblock memblock_dump_all(void) 2157 { 2158 if (memblock_debug) 2159 __memblock_dump_all(); 2160 } 2161 2162 void __init memblock_allow_resize(void) 2163 { 2164 memblock_can_resize = 1; 2165 } 2166 2167 static int __init early_memblock(char *p) 2168 { 2169 if (p && strstr(p, "debug")) 2170 memblock_debug = 1; 2171 return 0; 2172 } 2173 early_param("memblock", early_memblock); 2174 2175 static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) 2176 { 2177 struct page *start_pg, *end_pg; 2178 phys_addr_t pg, pgend; 2179 2180 /* 2181 * Convert start_pfn/end_pfn to a struct page pointer. 2182 */ 2183 start_pg = pfn_to_page(start_pfn - 1) + 1; 2184 end_pg = pfn_to_page(end_pfn - 1) + 1; 2185 2186 /* 2187 * Convert to physical addresses, and round start upwards and end 2188 * downwards. 2189 */ 2190 pg = PAGE_ALIGN(__pa(start_pg)); 2191 pgend = PAGE_ALIGN_DOWN(__pa(end_pg)); 2192 2193 /* 2194 * If there are free pages between these, free the section of the 2195 * memmap array. 2196 */ 2197 if (pg < pgend) 2198 memblock_phys_free(pg, pgend - pg); 2199 } 2200 2201 /* 2202 * The mem_map array can get very big. Free the unused area of the memory map. 2203 */ 2204 static void __init free_unused_memmap(void) 2205 { 2206 unsigned long start, end, prev_end = 0; 2207 int i; 2208 2209 if (!IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) || 2210 IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) 2211 return; 2212 2213 /* 2214 * This relies on each bank being in address order. 2215 * The banks are sorted previously in bootmem_init(). 2216 */ 2217 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { 2218 #ifdef CONFIG_SPARSEMEM 2219 /* 2220 * Take care not to free memmap entries that don't exist 2221 * due to SPARSEMEM sections which aren't present. 2222 */ 2223 start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); 2224 #endif 2225 /* 2226 * Align down here since many operations in VM subsystem 2227 * presume that there are no holes in the memory map inside 2228 * a pageblock 2229 */ 2230 start = pageblock_start_pfn(start); 2231 2232 /* 2233 * If we had a previous bank, and there is a space 2234 * between the current bank and the previous, free it. 2235 */ 2236 if (prev_end && prev_end < start) 2237 free_memmap(prev_end, start); 2238 2239 /* 2240 * Align up here since many operations in VM subsystem 2241 * presume that there are no holes in the memory map inside 2242 * a pageblock 2243 */ 2244 prev_end = pageblock_align(end); 2245 } 2246 2247 #ifdef CONFIG_SPARSEMEM 2248 if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) { 2249 prev_end = pageblock_align(end); 2250 free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); 2251 } 2252 #endif 2253 } 2254 2255 static void __init __free_pages_memory(unsigned long start, unsigned long end) 2256 { 2257 int order; 2258 2259 while (start < end) { 2260 /* 2261 * Free the pages in the largest chunks alignment allows. 2262 * 2263 * __ffs() behaviour is undefined for 0. start == 0 is 2264 * MAX_PAGE_ORDER-aligned, set order to MAX_PAGE_ORDER for 2265 * the case. 2266 */ 2267 if (start) 2268 order = min_t(int, MAX_PAGE_ORDER, __ffs(start)); 2269 else 2270 order = MAX_PAGE_ORDER; 2271 2272 while (start + (1UL << order) > end) 2273 order--; 2274 2275 memblock_free_pages(pfn_to_page(start), start, order); 2276 2277 start += (1UL << order); 2278 } 2279 } 2280 2281 static unsigned long __init __free_memory_core(phys_addr_t start, 2282 phys_addr_t end) 2283 { 2284 unsigned long start_pfn = PFN_UP(start); 2285 unsigned long end_pfn = PFN_DOWN(end); 2286 2287 if (!IS_ENABLED(CONFIG_HIGHMEM) && end_pfn > max_low_pfn) 2288 end_pfn = max_low_pfn; 2289 2290 if (start_pfn >= end_pfn) 2291 return 0; 2292 2293 __free_pages_memory(start_pfn, end_pfn); 2294 2295 return end_pfn - start_pfn; 2296 } 2297 2298 static void __init memmap_init_reserved_pages(void) 2299 { 2300 struct memblock_region *region; 2301 phys_addr_t start, end; 2302 int nid; 2303 unsigned long max_reserved; 2304 2305 /* 2306 * set nid on all reserved pages and also treat struct 2307 * pages for the NOMAP regions as PageReserved 2308 */ 2309 repeat: 2310 max_reserved = memblock.reserved.max; 2311 for_each_mem_region(region) { 2312 nid = memblock_get_region_node(region); 2313 start = region->base; 2314 end = start + region->size; 2315 2316 if (memblock_is_nomap(region)) 2317 reserve_bootmem_region(start, end, nid); 2318 2319 memblock_set_node(start, region->size, &memblock.reserved, nid); 2320 } 2321 /* 2322 * 'max' is changed means memblock.reserved has been doubled its 2323 * array, which may result a new reserved region before current 2324 * 'start'. Now we should repeat the procedure to set its node id. 2325 */ 2326 if (max_reserved != memblock.reserved.max) 2327 goto repeat; 2328 2329 /* 2330 * initialize struct pages for reserved regions that don't have 2331 * the MEMBLOCK_RSRV_NOINIT flag set 2332 */ 2333 for_each_reserved_mem_region(region) { 2334 if (!memblock_is_reserved_noinit(region)) { 2335 nid = memblock_get_region_node(region); 2336 start = region->base; 2337 end = start + region->size; 2338 2339 if (!numa_valid_node(nid)) 2340 nid = early_pfn_to_nid(PFN_DOWN(start)); 2341 2342 reserve_bootmem_region(start, end, nid); 2343 } 2344 } 2345 } 2346 2347 static unsigned long __init free_low_memory_core_early(void) 2348 { 2349 unsigned long count = 0; 2350 phys_addr_t start, end; 2351 u64 i; 2352 2353 memblock_clear_hotplug(0, -1); 2354 2355 memmap_init_reserved_pages(); 2356 2357 /* 2358 * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id 2359 * because in some case like Node0 doesn't have RAM installed 2360 * low ram will be on Node1 2361 */ 2362 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, 2363 NULL) 2364 count += __free_memory_core(start, end); 2365 2366 return count; 2367 } 2368 2369 static int reset_managed_pages_done __initdata; 2370 2371 static void __init reset_node_managed_pages(pg_data_t *pgdat) 2372 { 2373 struct zone *z; 2374 2375 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 2376 atomic_long_set(&z->managed_pages, 0); 2377 } 2378 2379 void __init reset_all_zones_managed_pages(void) 2380 { 2381 struct pglist_data *pgdat; 2382 2383 if (reset_managed_pages_done) 2384 return; 2385 2386 for_each_online_pgdat(pgdat) 2387 reset_node_managed_pages(pgdat); 2388 2389 reset_managed_pages_done = 1; 2390 } 2391 2392 /** 2393 * memblock_free_all - release free pages to the buddy allocator 2394 */ 2395 void __init memblock_free_all(void) 2396 { 2397 unsigned long pages; 2398 2399 free_unused_memmap(); 2400 reset_all_zones_managed_pages(); 2401 2402 memblock_clear_kho_scratch_only(); 2403 pages = free_low_memory_core_early(); 2404 totalram_pages_add(pages); 2405 } 2406 2407 /* Keep a table to reserve named memory */ 2408 #define RESERVE_MEM_MAX_ENTRIES 8 2409 #define RESERVE_MEM_NAME_SIZE 16 2410 struct reserve_mem_table { 2411 char name[RESERVE_MEM_NAME_SIZE]; 2412 phys_addr_t start; 2413 phys_addr_t size; 2414 }; 2415 static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES]; 2416 static int reserved_mem_count; 2417 static DEFINE_MUTEX(reserve_mem_lock); 2418 2419 /* Add wildcard region with a lookup name */ 2420 static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size, 2421 const char *name) 2422 { 2423 struct reserve_mem_table *map; 2424 2425 map = &reserved_mem_table[reserved_mem_count++]; 2426 map->start = start; 2427 map->size = size; 2428 strscpy(map->name, name); 2429 } 2430 2431 static struct reserve_mem_table *reserve_mem_find_by_name_nolock(const char *name) 2432 { 2433 struct reserve_mem_table *map; 2434 int i; 2435 2436 for (i = 0; i < reserved_mem_count; i++) { 2437 map = &reserved_mem_table[i]; 2438 if (!map->size) 2439 continue; 2440 if (strcmp(name, map->name) == 0) 2441 return map; 2442 } 2443 return NULL; 2444 } 2445 2446 /** 2447 * reserve_mem_find_by_name - Find reserved memory region with a given name 2448 * @name: The name that is attached to a reserved memory region 2449 * @start: If found, holds the start address 2450 * @size: If found, holds the size of the address. 2451 * 2452 * @start and @size are only updated if @name is found. 2453 * 2454 * Returns: 1 if found or 0 if not found. 2455 */ 2456 int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size) 2457 { 2458 struct reserve_mem_table *map; 2459 2460 guard(mutex)(&reserve_mem_lock); 2461 map = reserve_mem_find_by_name_nolock(name); 2462 if (!map) 2463 return 0; 2464 2465 *start = map->start; 2466 *size = map->size; 2467 return 1; 2468 } 2469 EXPORT_SYMBOL_GPL(reserve_mem_find_by_name); 2470 2471 /** 2472 * reserve_mem_release_by_name - Release reserved memory region with a given name 2473 * @name: The name that is attatched to a reserved memory region 2474 * 2475 * Forcibly release the pages in the reserved memory region so that those memory 2476 * can be used as free memory. After released the reserved region size becomes 0. 2477 * 2478 * Returns: 1 if released or 0 if not found. 2479 */ 2480 int reserve_mem_release_by_name(const char *name) 2481 { 2482 char buf[RESERVE_MEM_NAME_SIZE + 12]; 2483 struct reserve_mem_table *map; 2484 void *start, *end; 2485 2486 guard(mutex)(&reserve_mem_lock); 2487 map = reserve_mem_find_by_name_nolock(name); 2488 if (!map) 2489 return 0; 2490 2491 start = phys_to_virt(map->start); 2492 end = start + map->size - 1; 2493 snprintf(buf, sizeof(buf), "reserve_mem:%s", name); 2494 free_reserved_area(start, end, 0, buf); 2495 map->size = 0; 2496 2497 return 1; 2498 } 2499 2500 #ifdef CONFIG_KEXEC_HANDOVER 2501 #define MEMBLOCK_KHO_FDT "memblock" 2502 #define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1" 2503 #define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1" 2504 static struct page *kho_fdt; 2505 2506 static int reserve_mem_kho_finalize(struct kho_serialization *ser) 2507 { 2508 int err = 0, i; 2509 2510 for (i = 0; i < reserved_mem_count; i++) { 2511 struct reserve_mem_table *map = &reserved_mem_table[i]; 2512 2513 err |= kho_preserve_phys(map->start, map->size); 2514 } 2515 2516 err |= kho_preserve_folio(page_folio(kho_fdt)); 2517 err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt)); 2518 2519 return notifier_from_errno(err); 2520 } 2521 2522 static int reserve_mem_kho_notifier(struct notifier_block *self, 2523 unsigned long cmd, void *v) 2524 { 2525 switch (cmd) { 2526 case KEXEC_KHO_FINALIZE: 2527 return reserve_mem_kho_finalize((struct kho_serialization *)v); 2528 case KEXEC_KHO_ABORT: 2529 return NOTIFY_DONE; 2530 default: 2531 return NOTIFY_BAD; 2532 } 2533 } 2534 2535 static struct notifier_block reserve_mem_kho_nb = { 2536 .notifier_call = reserve_mem_kho_notifier, 2537 }; 2538 2539 static int __init prepare_kho_fdt(void) 2540 { 2541 int err = 0, i; 2542 void *fdt; 2543 2544 kho_fdt = alloc_page(GFP_KERNEL); 2545 if (!kho_fdt) 2546 return -ENOMEM; 2547 2548 fdt = page_to_virt(kho_fdt); 2549 2550 err |= fdt_create(fdt, PAGE_SIZE); 2551 err |= fdt_finish_reservemap(fdt); 2552 2553 err |= fdt_begin_node(fdt, ""); 2554 err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE); 2555 for (i = 0; i < reserved_mem_count; i++) { 2556 struct reserve_mem_table *map = &reserved_mem_table[i]; 2557 2558 err |= fdt_begin_node(fdt, map->name); 2559 err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE); 2560 err |= fdt_property(fdt, "start", &map->start, sizeof(map->start)); 2561 err |= fdt_property(fdt, "size", &map->size, sizeof(map->size)); 2562 err |= fdt_end_node(fdt); 2563 } 2564 err |= fdt_end_node(fdt); 2565 2566 err |= fdt_finish(fdt); 2567 2568 if (err) { 2569 pr_err("failed to prepare memblock FDT for KHO: %d\n", err); 2570 put_page(kho_fdt); 2571 kho_fdt = NULL; 2572 } 2573 2574 return err; 2575 } 2576 2577 static int __init reserve_mem_init(void) 2578 { 2579 int err; 2580 2581 if (!kho_is_enabled() || !reserved_mem_count) 2582 return 0; 2583 2584 err = prepare_kho_fdt(); 2585 if (err) 2586 return err; 2587 2588 err = register_kho_notifier(&reserve_mem_kho_nb); 2589 if (err) { 2590 put_page(kho_fdt); 2591 kho_fdt = NULL; 2592 } 2593 2594 return err; 2595 } 2596 late_initcall(reserve_mem_init); 2597 2598 static void *__init reserve_mem_kho_retrieve_fdt(void) 2599 { 2600 phys_addr_t fdt_phys; 2601 static void *fdt; 2602 int err; 2603 2604 if (fdt) 2605 return fdt; 2606 2607 err = kho_retrieve_subtree(MEMBLOCK_KHO_FDT, &fdt_phys); 2608 if (err) { 2609 if (err != -ENOENT) 2610 pr_warn("failed to retrieve FDT '%s' from KHO: %d\n", 2611 MEMBLOCK_KHO_FDT, err); 2612 return NULL; 2613 } 2614 2615 fdt = phys_to_virt(fdt_phys); 2616 2617 err = fdt_node_check_compatible(fdt, 0, MEMBLOCK_KHO_NODE_COMPATIBLE); 2618 if (err) { 2619 pr_warn("FDT '%s' is incompatible with '%s': %d\n", 2620 MEMBLOCK_KHO_FDT, MEMBLOCK_KHO_NODE_COMPATIBLE, err); 2621 fdt = NULL; 2622 } 2623 2624 return fdt; 2625 } 2626 2627 static bool __init reserve_mem_kho_revive(const char *name, phys_addr_t size, 2628 phys_addr_t align) 2629 { 2630 int err, len_start, len_size, offset; 2631 const phys_addr_t *p_start, *p_size; 2632 const void *fdt; 2633 2634 fdt = reserve_mem_kho_retrieve_fdt(); 2635 if (!fdt) 2636 return false; 2637 2638 offset = fdt_subnode_offset(fdt, 0, name); 2639 if (offset < 0) { 2640 pr_warn("FDT '%s' has no child '%s': %d\n", 2641 MEMBLOCK_KHO_FDT, name, offset); 2642 return false; 2643 } 2644 err = fdt_node_check_compatible(fdt, offset, RESERVE_MEM_KHO_NODE_COMPATIBLE); 2645 if (err) { 2646 pr_warn("Node '%s' is incompatible with '%s': %d\n", 2647 name, RESERVE_MEM_KHO_NODE_COMPATIBLE, err); 2648 return false; 2649 } 2650 2651 p_start = fdt_getprop(fdt, offset, "start", &len_start); 2652 p_size = fdt_getprop(fdt, offset, "size", &len_size); 2653 if (!p_start || len_start != sizeof(*p_start) || !p_size || 2654 len_size != sizeof(*p_size)) { 2655 return false; 2656 } 2657 2658 if (*p_start & (align - 1)) { 2659 pr_warn("KHO reserve-mem '%s' has wrong alignment (0x%lx, 0x%lx)\n", 2660 name, (long)align, (long)*p_start); 2661 return false; 2662 } 2663 2664 if (*p_size != size) { 2665 pr_warn("KHO reserve-mem '%s' has wrong size (0x%lx != 0x%lx)\n", 2666 name, (long)*p_size, (long)size); 2667 return false; 2668 } 2669 2670 reserved_mem_add(*p_start, size, name); 2671 pr_info("Revived memory reservation '%s' from KHO\n", name); 2672 2673 return true; 2674 } 2675 #else 2676 static bool __init reserve_mem_kho_revive(const char *name, phys_addr_t size, 2677 phys_addr_t align) 2678 { 2679 return false; 2680 } 2681 #endif /* CONFIG_KEXEC_HANDOVER */ 2682 2683 /* 2684 * Parse reserve_mem=nn:align:name 2685 */ 2686 static int __init reserve_mem(char *p) 2687 { 2688 phys_addr_t start, size, align, tmp; 2689 char *name; 2690 char *oldp; 2691 int len; 2692 2693 if (!p) 2694 return -EINVAL; 2695 2696 /* Check if there's room for more reserved memory */ 2697 if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES) 2698 return -EBUSY; 2699 2700 oldp = p; 2701 size = memparse(p, &p); 2702 if (!size || p == oldp) 2703 return -EINVAL; 2704 2705 if (*p != ':') 2706 return -EINVAL; 2707 2708 align = memparse(p+1, &p); 2709 if (*p != ':') 2710 return -EINVAL; 2711 2712 /* 2713 * memblock_phys_alloc() doesn't like a zero size align, 2714 * but it is OK for this command to have it. 2715 */ 2716 if (align < SMP_CACHE_BYTES) 2717 align = SMP_CACHE_BYTES; 2718 2719 name = p + 1; 2720 len = strlen(name); 2721 2722 /* name needs to have length but not too big */ 2723 if (!len || len >= RESERVE_MEM_NAME_SIZE) 2724 return -EINVAL; 2725 2726 /* Make sure that name has text */ 2727 for (p = name; *p; p++) { 2728 if (!isspace(*p)) 2729 break; 2730 } 2731 if (!*p) 2732 return -EINVAL; 2733 2734 /* Make sure the name is not already used */ 2735 if (reserve_mem_find_by_name(name, &start, &tmp)) 2736 return -EBUSY; 2737 2738 /* Pick previous allocations up from KHO if available */ 2739 if (reserve_mem_kho_revive(name, size, align)) 2740 return 1; 2741 2742 /* TODO: Allocation must be outside of scratch region */ 2743 start = memblock_phys_alloc(size, align); 2744 if (!start) 2745 return -ENOMEM; 2746 2747 reserved_mem_add(start, size, name); 2748 2749 return 1; 2750 } 2751 __setup("reserve_mem=", reserve_mem); 2752 2753 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK) 2754 static const char * const flagname[] = { 2755 [ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG", 2756 [ilog2(MEMBLOCK_MIRROR)] = "MIRROR", 2757 [ilog2(MEMBLOCK_NOMAP)] = "NOMAP", 2758 [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG", 2759 [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT", 2760 [ilog2(MEMBLOCK_RSRV_KERN)] = "RSV_KERN", 2761 [ilog2(MEMBLOCK_KHO_SCRATCH)] = "KHO_SCRATCH", 2762 }; 2763 2764 static int memblock_debug_show(struct seq_file *m, void *private) 2765 { 2766 struct memblock_type *type = m->private; 2767 struct memblock_region *reg; 2768 int i, j, nid; 2769 unsigned int count = ARRAY_SIZE(flagname); 2770 phys_addr_t end; 2771 2772 for (i = 0; i < type->cnt; i++) { 2773 reg = &type->regions[i]; 2774 end = reg->base + reg->size - 1; 2775 nid = memblock_get_region_node(reg); 2776 2777 seq_printf(m, "%4d: ", i); 2778 seq_printf(m, "%pa..%pa ", ®->base, &end); 2779 if (numa_valid_node(nid)) 2780 seq_printf(m, "%4d ", nid); 2781 else 2782 seq_printf(m, "%4c ", 'x'); 2783 if (reg->flags) { 2784 for (j = 0; j < count; j++) { 2785 if (reg->flags & (1U << j)) { 2786 seq_printf(m, "%s\n", flagname[j]); 2787 break; 2788 } 2789 } 2790 if (j == count) 2791 seq_printf(m, "%s\n", "UNKNOWN"); 2792 } else { 2793 seq_printf(m, "%s\n", "NONE"); 2794 } 2795 } 2796 return 0; 2797 } 2798 DEFINE_SHOW_ATTRIBUTE(memblock_debug); 2799 2800 static int __init memblock_init_debugfs(void) 2801 { 2802 struct dentry *root = debugfs_create_dir("memblock", NULL); 2803 2804 debugfs_create_file("memory", 0444, root, 2805 &memblock.memory, &memblock_debug_fops); 2806 debugfs_create_file("reserved", 0444, root, 2807 &memblock.reserved, &memblock_debug_fops); 2808 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 2809 debugfs_create_file("physmem", 0444, root, &physmem, 2810 &memblock_debug_fops); 2811 #endif 2812 2813 return 0; 2814 } 2815 __initcall(memblock_init_debugfs); 2816 2817 #endif /* CONFIG_DEBUG_FS */ 2818