1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Procedures for maintaining information about logical memory blocks. 4 * 5 * Peter Bergner, IBM Corp. June 2001. 6 * Copyright (C) 2001 Peter Bergner. 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/slab.h> 11 #include <linux/init.h> 12 #include <linux/bitops.h> 13 #include <linux/poison.h> 14 #include <linux/pfn.h> 15 #include <linux/debugfs.h> 16 #include <linux/kmemleak.h> 17 #include <linux/seq_file.h> 18 #include <linux/memblock.h> 19 #include <linux/mutex.h> 20 21 #ifdef CONFIG_KEXEC_HANDOVER 22 #include <linux/libfdt.h> 23 #include <linux/kexec_handover.h> 24 #endif /* CONFIG_KEXEC_HANDOVER */ 25 26 #include <asm/sections.h> 27 #include <linux/io.h> 28 29 #include "internal.h" 30 31 #define INIT_MEMBLOCK_REGIONS 128 32 #define INIT_PHYSMEM_REGIONS 4 33 34 #ifndef INIT_MEMBLOCK_RESERVED_REGIONS 35 # define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS 36 #endif 37 38 #ifndef INIT_MEMBLOCK_MEMORY_REGIONS 39 #define INIT_MEMBLOCK_MEMORY_REGIONS INIT_MEMBLOCK_REGIONS 40 #endif 41 42 /** 43 * DOC: memblock overview 44 * 45 * Memblock is a method of managing memory regions during the early 46 * boot period when the usual kernel memory allocators are not up and 47 * running. 48 * 49 * Memblock views the system memory as collections of contiguous 50 * regions. There are several types of these collections: 51 * 52 * * ``memory`` - describes the physical memory available to the 53 * kernel; this may differ from the actual physical memory installed 54 * in the system, for instance when the memory is restricted with 55 * ``mem=`` command line parameter 56 * * ``reserved`` - describes the regions that were allocated 57 * * ``physmem`` - describes the actual physical memory available during 58 * boot regardless of the possible restrictions and memory hot(un)plug; 59 * the ``physmem`` type is only available on some architectures. 60 * 61 * Each region is represented by struct memblock_region that 62 * defines the region extents, its attributes and NUMA node id on NUMA 63 * systems. Every memory type is described by the struct memblock_type 64 * which contains an array of memory regions along with 65 * the allocator metadata. The "memory" and "reserved" types are nicely 66 * wrapped with struct memblock. This structure is statically 67 * initialized at build time. The region arrays are initially sized to 68 * %INIT_MEMBLOCK_MEMORY_REGIONS for "memory" and 69 * %INIT_MEMBLOCK_RESERVED_REGIONS for "reserved". The region array 70 * for "physmem" is initially sized to %INIT_PHYSMEM_REGIONS. 71 * The memblock_allow_resize() enables automatic resizing of the region 72 * arrays during addition of new regions. This feature should be used 73 * with care so that memory allocated for the region array will not 74 * overlap with areas that should be reserved, for example initrd. 75 * 76 * The early architecture setup should tell memblock what the physical 77 * memory layout is by using memblock_add() or memblock_add_node() 78 * functions. The first function does not assign the region to a NUMA 79 * node and it is appropriate for UMA systems. Yet, it is possible to 80 * use it on NUMA systems as well and assign the region to a NUMA node 81 * later in the setup process using memblock_set_node(). The 82 * memblock_add_node() performs such an assignment directly. 83 * 84 * Once memblock is setup the memory can be allocated using one of the 85 * API variants: 86 * 87 * * memblock_phys_alloc*() - these functions return the **physical** 88 * address of the allocated memory 89 * * memblock_alloc*() - these functions return the **virtual** address 90 * of the allocated memory. 91 * 92 * Note, that both API variants use implicit assumptions about allowed 93 * memory ranges and the fallback methods. Consult the documentation 94 * of memblock_alloc_internal() and memblock_alloc_range_nid() 95 * functions for more elaborate description. 96 * 97 * As the system boot progresses, the architecture specific mem_init() 98 * function frees all the memory to the buddy page allocator. 99 * 100 * Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the 101 * memblock data structures (except "physmem") will be discarded after the 102 * system initialization completes. 103 */ 104 105 #ifndef CONFIG_NUMA 106 struct pglist_data __refdata contig_page_data; 107 EXPORT_SYMBOL(contig_page_data); 108 #endif 109 110 unsigned long max_low_pfn; 111 unsigned long min_low_pfn; 112 unsigned long max_pfn; 113 unsigned long long max_possible_pfn; 114 115 #ifdef CONFIG_MEMBLOCK_KHO_SCRATCH 116 /* When set to true, only allocate from MEMBLOCK_KHO_SCRATCH ranges */ 117 static bool kho_scratch_only; 118 #else 119 #define kho_scratch_only false 120 #endif 121 122 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock; 123 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; 124 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 125 static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS]; 126 #endif 127 128 struct memblock memblock __initdata_memblock = { 129 .memory.regions = memblock_memory_init_regions, 130 .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS, 131 .memory.name = "memory", 132 133 .reserved.regions = memblock_reserved_init_regions, 134 .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, 135 .reserved.name = "reserved", 136 137 .bottom_up = false, 138 .current_limit = MEMBLOCK_ALLOC_ANYWHERE, 139 }; 140 141 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 142 struct memblock_type physmem = { 143 .regions = memblock_physmem_init_regions, 144 .max = INIT_PHYSMEM_REGIONS, 145 .name = "physmem", 146 }; 147 #endif 148 149 /* 150 * keep a pointer to &memblock.memory in the text section to use it in 151 * __next_mem_range() and its helpers. 152 * For architectures that do not keep memblock data after init, this 153 * pointer will be reset to NULL at memblock_discard() 154 */ 155 static __refdata struct memblock_type *memblock_memory = &memblock.memory; 156 157 #define for_each_memblock_type(i, memblock_type, rgn) \ 158 for (i = 0, rgn = &memblock_type->regions[0]; \ 159 i < memblock_type->cnt; \ 160 i++, rgn = &memblock_type->regions[i]) 161 162 #define memblock_dbg(fmt, ...) \ 163 do { \ 164 if (memblock_debug) \ 165 pr_info(fmt, ##__VA_ARGS__); \ 166 } while (0) 167 168 static int memblock_debug __initdata_memblock; 169 static bool system_has_some_mirror __initdata_memblock; 170 static int memblock_can_resize __initdata_memblock; 171 static int memblock_memory_in_slab __initdata_memblock; 172 static int memblock_reserved_in_slab __initdata_memblock; 173 174 bool __init_memblock memblock_has_mirror(void) 175 { 176 return system_has_some_mirror; 177 } 178 179 static enum memblock_flags __init_memblock choose_memblock_flags(void) 180 { 181 /* skip non-scratch memory for kho early boot allocations */ 182 if (kho_scratch_only) 183 return MEMBLOCK_KHO_SCRATCH; 184 185 return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE; 186 } 187 188 /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ 189 static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) 190 { 191 return *size = min(*size, PHYS_ADDR_MAX - base); 192 } 193 194 /* 195 * Address comparison utilities 196 */ 197 unsigned long __init_memblock 198 memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, 199 phys_addr_t size2) 200 { 201 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); 202 } 203 204 bool __init_memblock memblock_overlaps_region(struct memblock_type *type, 205 phys_addr_t base, phys_addr_t size) 206 { 207 unsigned long i; 208 209 memblock_cap_size(base, &size); 210 211 for (i = 0; i < type->cnt; i++) 212 if (memblock_addrs_overlap(base, size, type->regions[i].base, 213 type->regions[i].size)) 214 return true; 215 return false; 216 } 217 218 /** 219 * __memblock_find_range_bottom_up - find free area utility in bottom-up 220 * @start: start of candidate range 221 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 222 * %MEMBLOCK_ALLOC_ACCESSIBLE 223 * @size: size of free area to find 224 * @align: alignment of free area to find 225 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 226 * @flags: pick from blocks based on memory attributes 227 * 228 * Utility called from memblock_find_in_range_node(), find free area bottom-up. 229 * 230 * Return: 231 * Found address on success, 0 on failure. 232 */ 233 static phys_addr_t __init_memblock 234 __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, 235 phys_addr_t size, phys_addr_t align, int nid, 236 enum memblock_flags flags) 237 { 238 phys_addr_t this_start, this_end, cand; 239 u64 i; 240 241 for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) { 242 this_start = clamp(this_start, start, end); 243 this_end = clamp(this_end, start, end); 244 245 cand = round_up(this_start, align); 246 if (cand < this_end && this_end - cand >= size) 247 return cand; 248 } 249 250 return 0; 251 } 252 253 /** 254 * __memblock_find_range_top_down - find free area utility, in top-down 255 * @start: start of candidate range 256 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 257 * %MEMBLOCK_ALLOC_ACCESSIBLE 258 * @size: size of free area to find 259 * @align: alignment of free area to find 260 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 261 * @flags: pick from blocks based on memory attributes 262 * 263 * Utility called from memblock_find_in_range_node(), find free area top-down. 264 * 265 * Return: 266 * Found address on success, 0 on failure. 267 */ 268 static phys_addr_t __init_memblock 269 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, 270 phys_addr_t size, phys_addr_t align, int nid, 271 enum memblock_flags flags) 272 { 273 phys_addr_t this_start, this_end, cand; 274 u64 i; 275 276 for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end, 277 NULL) { 278 this_start = clamp(this_start, start, end); 279 this_end = clamp(this_end, start, end); 280 281 if (this_end < size) 282 continue; 283 284 cand = round_down(this_end - size, align); 285 if (cand >= this_start) 286 return cand; 287 } 288 289 return 0; 290 } 291 292 /** 293 * memblock_find_in_range_node - find free area in given range and node 294 * @size: size of free area to find 295 * @align: alignment of free area to find 296 * @start: start of candidate range 297 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 298 * %MEMBLOCK_ALLOC_ACCESSIBLE 299 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 300 * @flags: pick from blocks based on memory attributes 301 * 302 * Find @size free area aligned to @align in the specified range and node. 303 * 304 * Return: 305 * Found address on success, 0 on failure. 306 */ 307 static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, 308 phys_addr_t align, phys_addr_t start, 309 phys_addr_t end, int nid, 310 enum memblock_flags flags) 311 { 312 /* pump up @end */ 313 if (end == MEMBLOCK_ALLOC_ACCESSIBLE || 314 end == MEMBLOCK_ALLOC_NOLEAKTRACE) 315 end = memblock.current_limit; 316 317 /* avoid allocating the first page */ 318 start = max_t(phys_addr_t, start, PAGE_SIZE); 319 end = max(start, end); 320 321 if (memblock_bottom_up()) 322 return __memblock_find_range_bottom_up(start, end, size, align, 323 nid, flags); 324 else 325 return __memblock_find_range_top_down(start, end, size, align, 326 nid, flags); 327 } 328 329 /** 330 * memblock_find_in_range - find free area in given range 331 * @start: start of candidate range 332 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 333 * %MEMBLOCK_ALLOC_ACCESSIBLE 334 * @size: size of free area to find 335 * @align: alignment of free area to find 336 * 337 * Find @size free area aligned to @align in the specified range. 338 * 339 * Return: 340 * Found address on success, 0 on failure. 341 */ 342 static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, 343 phys_addr_t end, phys_addr_t size, 344 phys_addr_t align) 345 { 346 phys_addr_t ret; 347 enum memblock_flags flags = choose_memblock_flags(); 348 349 again: 350 ret = memblock_find_in_range_node(size, align, start, end, 351 NUMA_NO_NODE, flags); 352 353 if (!ret && (flags & MEMBLOCK_MIRROR)) { 354 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", 355 &size); 356 flags &= ~MEMBLOCK_MIRROR; 357 goto again; 358 } 359 360 return ret; 361 } 362 363 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) 364 { 365 type->total_size -= type->regions[r].size; 366 memmove(&type->regions[r], &type->regions[r + 1], 367 (type->cnt - (r + 1)) * sizeof(type->regions[r])); 368 type->cnt--; 369 370 /* Special case for empty arrays */ 371 if (type->cnt == 0) { 372 WARN_ON(type->total_size != 0); 373 type->regions[0].base = 0; 374 type->regions[0].size = 0; 375 type->regions[0].flags = 0; 376 memblock_set_region_node(&type->regions[0], MAX_NUMNODES); 377 } 378 } 379 380 #ifndef CONFIG_ARCH_KEEP_MEMBLOCK 381 /** 382 * memblock_discard - discard memory and reserved arrays if they were allocated 383 */ 384 void __init memblock_discard(void) 385 { 386 phys_addr_t addr, size; 387 388 if (memblock.reserved.regions != memblock_reserved_init_regions) { 389 addr = __pa(memblock.reserved.regions); 390 size = PAGE_ALIGN(sizeof(struct memblock_region) * 391 memblock.reserved.max); 392 if (memblock_reserved_in_slab) 393 kfree(memblock.reserved.regions); 394 else 395 memblock_free_late(addr, size); 396 } 397 398 if (memblock.memory.regions != memblock_memory_init_regions) { 399 addr = __pa(memblock.memory.regions); 400 size = PAGE_ALIGN(sizeof(struct memblock_region) * 401 memblock.memory.max); 402 if (memblock_memory_in_slab) 403 kfree(memblock.memory.regions); 404 else 405 memblock_free_late(addr, size); 406 } 407 408 memblock_memory = NULL; 409 } 410 #endif 411 412 /** 413 * memblock_double_array - double the size of the memblock regions array 414 * @type: memblock type of the regions array being doubled 415 * @new_area_start: starting address of memory range to avoid overlap with 416 * @new_area_size: size of memory range to avoid overlap with 417 * 418 * Double the size of the @type regions array. If memblock is being used to 419 * allocate memory for a new reserved regions array and there is a previously 420 * allocated memory range [@new_area_start, @new_area_start + @new_area_size] 421 * waiting to be reserved, ensure the memory used by the new array does 422 * not overlap. 423 * 424 * Return: 425 * 0 on success, -1 on failure. 426 */ 427 static int __init_memblock memblock_double_array(struct memblock_type *type, 428 phys_addr_t new_area_start, 429 phys_addr_t new_area_size) 430 { 431 struct memblock_region *new_array, *old_array; 432 phys_addr_t old_alloc_size, new_alloc_size; 433 phys_addr_t old_size, new_size, addr, new_end; 434 int use_slab = slab_is_available(); 435 int *in_slab; 436 437 /* We don't allow resizing until we know about the reserved regions 438 * of memory that aren't suitable for allocation 439 */ 440 if (!memblock_can_resize) 441 panic("memblock: cannot resize %s array\n", type->name); 442 443 /* Calculate new doubled size */ 444 old_size = type->max * sizeof(struct memblock_region); 445 new_size = old_size << 1; 446 /* 447 * We need to allocated new one align to PAGE_SIZE, 448 * so we can free them completely later. 449 */ 450 old_alloc_size = PAGE_ALIGN(old_size); 451 new_alloc_size = PAGE_ALIGN(new_size); 452 453 /* Retrieve the slab flag */ 454 if (type == &memblock.memory) 455 in_slab = &memblock_memory_in_slab; 456 else 457 in_slab = &memblock_reserved_in_slab; 458 459 /* Try to find some space for it */ 460 if (use_slab) { 461 new_array = kmalloc(new_size, GFP_KERNEL); 462 addr = new_array ? __pa(new_array) : 0; 463 } else { 464 /* only exclude range when trying to double reserved.regions */ 465 if (type != &memblock.reserved) 466 new_area_start = new_area_size = 0; 467 468 addr = memblock_find_in_range(new_area_start + new_area_size, 469 memblock.current_limit, 470 new_alloc_size, PAGE_SIZE); 471 if (!addr && new_area_size) 472 addr = memblock_find_in_range(0, 473 min(new_area_start, memblock.current_limit), 474 new_alloc_size, PAGE_SIZE); 475 476 if (addr) { 477 /* The memory may not have been accepted, yet. */ 478 accept_memory(addr, new_alloc_size); 479 480 new_array = __va(addr); 481 } else { 482 new_array = NULL; 483 } 484 } 485 if (!addr) { 486 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", 487 type->name, type->max, type->max * 2); 488 return -1; 489 } 490 491 new_end = addr + new_size - 1; 492 memblock_dbg("memblock: %s is doubled to %ld at [%pa-%pa]", 493 type->name, type->max * 2, &addr, &new_end); 494 495 /* 496 * Found space, we now need to move the array over before we add the 497 * reserved region since it may be our reserved array itself that is 498 * full. 499 */ 500 memcpy(new_array, type->regions, old_size); 501 memset(new_array + type->max, 0, old_size); 502 old_array = type->regions; 503 type->regions = new_array; 504 type->max <<= 1; 505 506 /* Free old array. We needn't free it if the array is the static one */ 507 if (*in_slab) 508 kfree(old_array); 509 else if (old_array != memblock_memory_init_regions && 510 old_array != memblock_reserved_init_regions) 511 memblock_free(old_array, old_alloc_size); 512 513 /* 514 * Reserve the new array if that comes from the memblock. Otherwise, we 515 * needn't do it 516 */ 517 if (!use_slab) 518 BUG_ON(memblock_reserve_kern(addr, new_alloc_size)); 519 520 /* Update slab flag */ 521 *in_slab = use_slab; 522 523 return 0; 524 } 525 526 /** 527 * memblock_merge_regions - merge neighboring compatible regions 528 * @type: memblock type to scan 529 * @start_rgn: start scanning from (@start_rgn - 1) 530 * @end_rgn: end scanning at (@end_rgn - 1) 531 * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn) 532 */ 533 static void __init_memblock memblock_merge_regions(struct memblock_type *type, 534 unsigned long start_rgn, 535 unsigned long end_rgn) 536 { 537 int i = 0; 538 if (start_rgn) 539 i = start_rgn - 1; 540 end_rgn = min(end_rgn, type->cnt - 1); 541 while (i < end_rgn) { 542 struct memblock_region *this = &type->regions[i]; 543 struct memblock_region *next = &type->regions[i + 1]; 544 545 if (this->base + this->size != next->base || 546 memblock_get_region_node(this) != 547 memblock_get_region_node(next) || 548 this->flags != next->flags) { 549 BUG_ON(this->base + this->size > next->base); 550 i++; 551 continue; 552 } 553 554 this->size += next->size; 555 /* move forward from next + 1, index of which is i + 2 */ 556 memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); 557 type->cnt--; 558 end_rgn--; 559 } 560 } 561 562 /** 563 * memblock_insert_region - insert new memblock region 564 * @type: memblock type to insert into 565 * @idx: index for the insertion point 566 * @base: base address of the new region 567 * @size: size of the new region 568 * @nid: node id of the new region 569 * @flags: flags of the new region 570 * 571 * Insert new memblock region [@base, @base + @size) into @type at @idx. 572 * @type must already have extra room to accommodate the new region. 573 */ 574 static void __init_memblock memblock_insert_region(struct memblock_type *type, 575 int idx, phys_addr_t base, 576 phys_addr_t size, 577 int nid, 578 enum memblock_flags flags) 579 { 580 struct memblock_region *rgn = &type->regions[idx]; 581 582 BUG_ON(type->cnt >= type->max); 583 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); 584 rgn->base = base; 585 rgn->size = size; 586 rgn->flags = flags; 587 memblock_set_region_node(rgn, nid); 588 type->cnt++; 589 type->total_size += size; 590 } 591 592 /** 593 * memblock_add_range - add new memblock region 594 * @type: memblock type to add new region into 595 * @base: base address of the new region 596 * @size: size of the new region 597 * @nid: nid of the new region 598 * @flags: flags of the new region 599 * 600 * Add new memblock region [@base, @base + @size) into @type. The new region 601 * is allowed to overlap with existing ones - overlaps don't affect already 602 * existing regions. @type is guaranteed to be minimal (all neighbouring 603 * compatible regions are merged) after the addition. 604 * 605 * Return: 606 * 0 on success, -errno on failure. 607 */ 608 static int __init_memblock memblock_add_range(struct memblock_type *type, 609 phys_addr_t base, phys_addr_t size, 610 int nid, enum memblock_flags flags) 611 { 612 bool insert = false; 613 phys_addr_t obase = base; 614 phys_addr_t end = base + memblock_cap_size(base, &size); 615 int idx, nr_new, start_rgn = -1, end_rgn; 616 struct memblock_region *rgn; 617 618 if (!size) 619 return 0; 620 621 /* special case for empty array */ 622 if (type->regions[0].size == 0) { 623 WARN_ON(type->cnt != 0 || type->total_size); 624 type->regions[0].base = base; 625 type->regions[0].size = size; 626 type->regions[0].flags = flags; 627 memblock_set_region_node(&type->regions[0], nid); 628 type->total_size = size; 629 type->cnt = 1; 630 return 0; 631 } 632 633 /* 634 * The worst case is when new range overlaps all existing regions, 635 * then we'll need type->cnt + 1 empty regions in @type. So if 636 * type->cnt * 2 + 1 is less than or equal to type->max, we know 637 * that there is enough empty regions in @type, and we can insert 638 * regions directly. 639 */ 640 if (type->cnt * 2 + 1 <= type->max) 641 insert = true; 642 643 repeat: 644 /* 645 * The following is executed twice. Once with %false @insert and 646 * then with %true. The first counts the number of regions needed 647 * to accommodate the new area. The second actually inserts them. 648 */ 649 base = obase; 650 nr_new = 0; 651 652 for_each_memblock_type(idx, type, rgn) { 653 phys_addr_t rbase = rgn->base; 654 phys_addr_t rend = rbase + rgn->size; 655 656 if (rbase >= end) 657 break; 658 if (rend <= base) 659 continue; 660 /* 661 * @rgn overlaps. If it separates the lower part of new 662 * area, insert that portion. 663 */ 664 if (rbase > base) { 665 #ifdef CONFIG_NUMA 666 WARN_ON(nid != memblock_get_region_node(rgn)); 667 #endif 668 WARN_ON(flags != MEMBLOCK_NONE && flags != rgn->flags); 669 nr_new++; 670 if (insert) { 671 if (start_rgn == -1) 672 start_rgn = idx; 673 end_rgn = idx + 1; 674 memblock_insert_region(type, idx++, base, 675 rbase - base, nid, 676 flags); 677 } 678 } 679 /* area below @rend is dealt with, forget about it */ 680 base = min(rend, end); 681 } 682 683 /* insert the remaining portion */ 684 if (base < end) { 685 nr_new++; 686 if (insert) { 687 if (start_rgn == -1) 688 start_rgn = idx; 689 end_rgn = idx + 1; 690 memblock_insert_region(type, idx, base, end - base, 691 nid, flags); 692 } 693 } 694 695 if (!nr_new) 696 return 0; 697 698 /* 699 * If this was the first round, resize array and repeat for actual 700 * insertions; otherwise, merge and return. 701 */ 702 if (!insert) { 703 while (type->cnt + nr_new > type->max) 704 if (memblock_double_array(type, obase, size) < 0) 705 return -ENOMEM; 706 insert = true; 707 goto repeat; 708 } else { 709 memblock_merge_regions(type, start_rgn, end_rgn); 710 return 0; 711 } 712 } 713 714 /** 715 * memblock_add_node - add new memblock region within a NUMA node 716 * @base: base address of the new region 717 * @size: size of the new region 718 * @nid: nid of the new region 719 * @flags: flags of the new region 720 * 721 * Add new memblock region [@base, @base + @size) to the "memory" 722 * type. See memblock_add_range() description for mode details 723 * 724 * Return: 725 * 0 on success, -errno on failure. 726 */ 727 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, 728 int nid, enum memblock_flags flags) 729 { 730 phys_addr_t end = base + size - 1; 731 732 memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, 733 &base, &end, nid, flags, (void *)_RET_IP_); 734 735 return memblock_add_range(&memblock.memory, base, size, nid, flags); 736 } 737 738 /** 739 * memblock_add - add new memblock region 740 * @base: base address of the new region 741 * @size: size of the new region 742 * 743 * Add new memblock region [@base, @base + @size) to the "memory" 744 * type. See memblock_add_range() description for mode details 745 * 746 * Return: 747 * 0 on success, -errno on failure. 748 */ 749 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) 750 { 751 phys_addr_t end = base + size - 1; 752 753 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 754 &base, &end, (void *)_RET_IP_); 755 756 return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); 757 } 758 759 /** 760 * memblock_validate_numa_coverage - check if amount of memory with 761 * no node ID assigned is less than a threshold 762 * @threshold_bytes: maximal memory size that can have unassigned node 763 * ID (in bytes). 764 * 765 * A buggy firmware may report memory that does not belong to any node. 766 * Check if amount of such memory is below @threshold_bytes. 767 * 768 * Return: true on success, false on failure. 769 */ 770 bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_bytes) 771 { 772 unsigned long nr_pages = 0; 773 unsigned long start_pfn, end_pfn, mem_size_mb; 774 int nid, i; 775 776 /* calculate lose page */ 777 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { 778 if (!numa_valid_node(nid)) 779 nr_pages += end_pfn - start_pfn; 780 } 781 782 if ((nr_pages << PAGE_SHIFT) > threshold_bytes) { 783 mem_size_mb = memblock_phys_mem_size() / SZ_1M; 784 pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n", 785 (nr_pages << PAGE_SHIFT) / SZ_1M, mem_size_mb); 786 return false; 787 } 788 789 return true; 790 } 791 792 793 /** 794 * memblock_isolate_range - isolate given range into disjoint memblocks 795 * @type: memblock type to isolate range for 796 * @base: base of range to isolate 797 * @size: size of range to isolate 798 * @start_rgn: out parameter for the start of isolated region 799 * @end_rgn: out parameter for the end of isolated region 800 * 801 * Walk @type and ensure that regions don't cross the boundaries defined by 802 * [@base, @base + @size). Crossing regions are split at the boundaries, 803 * which may create at most two more regions. The index of the first 804 * region inside the range is returned in *@start_rgn and the index of the 805 * first region after the range is returned in *@end_rgn. 806 * 807 * Return: 808 * 0 on success, -errno on failure. 809 */ 810 static int __init_memblock memblock_isolate_range(struct memblock_type *type, 811 phys_addr_t base, phys_addr_t size, 812 int *start_rgn, int *end_rgn) 813 { 814 phys_addr_t end = base + memblock_cap_size(base, &size); 815 int idx; 816 struct memblock_region *rgn; 817 818 *start_rgn = *end_rgn = 0; 819 820 if (!size) 821 return 0; 822 823 /* we'll create at most two more regions */ 824 while (type->cnt + 2 > type->max) 825 if (memblock_double_array(type, base, size) < 0) 826 return -ENOMEM; 827 828 for_each_memblock_type(idx, type, rgn) { 829 phys_addr_t rbase = rgn->base; 830 phys_addr_t rend = rbase + rgn->size; 831 832 if (rbase >= end) 833 break; 834 if (rend <= base) 835 continue; 836 837 if (rbase < base) { 838 /* 839 * @rgn intersects from below. Split and continue 840 * to process the next region - the new top half. 841 */ 842 rgn->base = base; 843 rgn->size -= base - rbase; 844 type->total_size -= base - rbase; 845 memblock_insert_region(type, idx, rbase, base - rbase, 846 memblock_get_region_node(rgn), 847 rgn->flags); 848 } else if (rend > end) { 849 /* 850 * @rgn intersects from above. Split and redo the 851 * current region - the new bottom half. 852 */ 853 rgn->base = end; 854 rgn->size -= end - rbase; 855 type->total_size -= end - rbase; 856 memblock_insert_region(type, idx--, rbase, end - rbase, 857 memblock_get_region_node(rgn), 858 rgn->flags); 859 } else { 860 /* @rgn is fully contained, record it */ 861 if (!*end_rgn) 862 *start_rgn = idx; 863 *end_rgn = idx + 1; 864 } 865 } 866 867 return 0; 868 } 869 870 static int __init_memblock memblock_remove_range(struct memblock_type *type, 871 phys_addr_t base, phys_addr_t size) 872 { 873 int start_rgn, end_rgn; 874 int i, ret; 875 876 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 877 if (ret) 878 return ret; 879 880 for (i = end_rgn - 1; i >= start_rgn; i--) 881 memblock_remove_region(type, i); 882 return 0; 883 } 884 885 int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) 886 { 887 phys_addr_t end = base + size - 1; 888 889 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 890 &base, &end, (void *)_RET_IP_); 891 892 return memblock_remove_range(&memblock.memory, base, size); 893 } 894 895 /** 896 * memblock_free - free boot memory allocation 897 * @ptr: starting address of the boot memory allocation 898 * @size: size of the boot memory block in bytes 899 * 900 * Free boot memory block previously allocated by memblock_alloc_xx() API. 901 * The freeing memory will not be released to the buddy allocator. 902 */ 903 void __init_memblock memblock_free(void *ptr, size_t size) 904 { 905 if (ptr) 906 memblock_phys_free(__pa(ptr), size); 907 } 908 909 /** 910 * memblock_phys_free - free boot memory block 911 * @base: phys starting address of the boot memory block 912 * @size: size of the boot memory block in bytes 913 * 914 * Free boot memory block previously allocated by memblock_phys_alloc_xx() API. 915 * The freeing memory will not be released to the buddy allocator. 916 */ 917 int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) 918 { 919 phys_addr_t end = base + size - 1; 920 921 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 922 &base, &end, (void *)_RET_IP_); 923 924 kmemleak_free_part_phys(base, size); 925 return memblock_remove_range(&memblock.reserved, base, size); 926 } 927 928 int __init_memblock __memblock_reserve(phys_addr_t base, phys_addr_t size, 929 int nid, enum memblock_flags flags) 930 { 931 phys_addr_t end = base + size - 1; 932 933 memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, 934 &base, &end, nid, flags, (void *)_RET_IP_); 935 936 return memblock_add_range(&memblock.reserved, base, size, nid, flags); 937 } 938 939 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 940 int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size) 941 { 942 phys_addr_t end = base + size - 1; 943 944 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, 945 &base, &end, (void *)_RET_IP_); 946 947 return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0); 948 } 949 #endif 950 951 #ifdef CONFIG_MEMBLOCK_KHO_SCRATCH 952 __init void memblock_set_kho_scratch_only(void) 953 { 954 kho_scratch_only = true; 955 } 956 957 __init void memblock_clear_kho_scratch_only(void) 958 { 959 kho_scratch_only = false; 960 } 961 962 __init void memmap_init_kho_scratch_pages(void) 963 { 964 phys_addr_t start, end; 965 unsigned long pfn; 966 int nid; 967 u64 i; 968 969 if (!IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) 970 return; 971 972 /* 973 * Initialize struct pages for free scratch memory. 974 * The struct pages for reserved scratch memory will be set up in 975 * reserve_bootmem_region() 976 */ 977 __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, 978 MEMBLOCK_KHO_SCRATCH, &start, &end, &nid) { 979 for (pfn = PFN_UP(start); pfn < PFN_DOWN(end); pfn++) 980 init_deferred_page(pfn, nid); 981 } 982 } 983 #endif 984 985 /** 986 * memblock_setclr_flag - set or clear flag for a memory region 987 * @type: memblock type to set/clear flag for 988 * @base: base address of the region 989 * @size: size of the region 990 * @set: set or clear the flag 991 * @flag: the flag to update 992 * 993 * This function isolates region [@base, @base + @size), and sets/clears flag 994 * 995 * Return: 0 on success, -errno on failure. 996 */ 997 static int __init_memblock memblock_setclr_flag(struct memblock_type *type, 998 phys_addr_t base, phys_addr_t size, int set, int flag) 999 { 1000 int i, ret, start_rgn, end_rgn; 1001 1002 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 1003 if (ret) 1004 return ret; 1005 1006 for (i = start_rgn; i < end_rgn; i++) { 1007 struct memblock_region *r = &type->regions[i]; 1008 1009 if (set) 1010 r->flags |= flag; 1011 else 1012 r->flags &= ~flag; 1013 } 1014 1015 memblock_merge_regions(type, start_rgn, end_rgn); 1016 return 0; 1017 } 1018 1019 /** 1020 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG. 1021 * @base: the base phys addr of the region 1022 * @size: the size of the region 1023 * 1024 * Return: 0 on success, -errno on failure. 1025 */ 1026 int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size) 1027 { 1028 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_HOTPLUG); 1029 } 1030 1031 /** 1032 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region. 1033 * @base: the base phys addr of the region 1034 * @size: the size of the region 1035 * 1036 * Return: 0 on success, -errno on failure. 1037 */ 1038 int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size) 1039 { 1040 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_HOTPLUG); 1041 } 1042 1043 /** 1044 * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR. 1045 * @base: the base phys addr of the region 1046 * @size: the size of the region 1047 * 1048 * Return: 0 on success, -errno on failure. 1049 */ 1050 int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) 1051 { 1052 if (!mirrored_kernelcore) 1053 return 0; 1054 1055 system_has_some_mirror = true; 1056 1057 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_MIRROR); 1058 } 1059 1060 /** 1061 * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP. 1062 * @base: the base phys addr of the region 1063 * @size: the size of the region 1064 * 1065 * The memory regions marked with %MEMBLOCK_NOMAP will not be added to the 1066 * direct mapping of the physical memory. These regions will still be 1067 * covered by the memory map. The struct page representing NOMAP memory 1068 * frames in the memory map will be PageReserved() 1069 * 1070 * Note: if the memory being marked %MEMBLOCK_NOMAP was allocated from 1071 * memblock, the caller must inform kmemleak to ignore that memory 1072 * 1073 * Return: 0 on success, -errno on failure. 1074 */ 1075 int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) 1076 { 1077 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_NOMAP); 1078 } 1079 1080 /** 1081 * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region. 1082 * @base: the base phys addr of the region 1083 * @size: the size of the region 1084 * 1085 * Return: 0 on success, -errno on failure. 1086 */ 1087 int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) 1088 { 1089 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_NOMAP); 1090 } 1091 1092 /** 1093 * memblock_reserved_mark_noinit - Mark a reserved memory region with flag 1094 * MEMBLOCK_RSRV_NOINIT 1095 * 1096 * @base: the base phys addr of the region 1097 * @size: the size of the region 1098 * 1099 * The struct pages for the reserved regions marked %MEMBLOCK_RSRV_NOINIT will 1100 * not be fully initialized to allow the caller optimize their initialization. 1101 * 1102 * When %CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, setting this flag 1103 * completely bypasses the initialization of struct pages for such region. 1104 * 1105 * When %CONFIG_DEFERRED_STRUCT_PAGE_INIT is disabled, struct pages in this 1106 * region will be initialized with default values but won't be marked as 1107 * reserved. 1108 * 1109 * Return: 0 on success, -errno on failure. 1110 */ 1111 int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size) 1112 { 1113 return memblock_setclr_flag(&memblock.reserved, base, size, 1, 1114 MEMBLOCK_RSRV_NOINIT); 1115 } 1116 1117 /** 1118 * memblock_mark_kho_scratch - Mark a memory region as MEMBLOCK_KHO_SCRATCH. 1119 * @base: the base phys addr of the region 1120 * @size: the size of the region 1121 * 1122 * Only memory regions marked with %MEMBLOCK_KHO_SCRATCH will be considered 1123 * for allocations during early boot with kexec handover. 1124 * 1125 * Return: 0 on success, -errno on failure. 1126 */ 1127 __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size) 1128 { 1129 return memblock_setclr_flag(&memblock.memory, base, size, 1, 1130 MEMBLOCK_KHO_SCRATCH); 1131 } 1132 1133 /** 1134 * memblock_clear_kho_scratch - Clear MEMBLOCK_KHO_SCRATCH flag for a 1135 * specified region. 1136 * @base: the base phys addr of the region 1137 * @size: the size of the region 1138 * 1139 * Return: 0 on success, -errno on failure. 1140 */ 1141 __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size) 1142 { 1143 return memblock_setclr_flag(&memblock.memory, base, size, 0, 1144 MEMBLOCK_KHO_SCRATCH); 1145 } 1146 1147 static bool should_skip_region(struct memblock_type *type, 1148 struct memblock_region *m, 1149 int nid, int flags) 1150 { 1151 int m_nid = memblock_get_region_node(m); 1152 1153 /* we never skip regions when iterating memblock.reserved or physmem */ 1154 if (type != memblock_memory) 1155 return false; 1156 1157 /* only memory regions are associated with nodes, check it */ 1158 if (numa_valid_node(nid) && nid != m_nid) 1159 return true; 1160 1161 /* skip hotpluggable memory regions if needed */ 1162 if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && 1163 !(flags & MEMBLOCK_HOTPLUG)) 1164 return true; 1165 1166 /* if we want mirror memory skip non-mirror memory regions */ 1167 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) 1168 return true; 1169 1170 /* skip nomap memory unless we were asked for it explicitly */ 1171 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) 1172 return true; 1173 1174 /* skip driver-managed memory unless we were asked for it explicitly */ 1175 if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m)) 1176 return true; 1177 1178 /* 1179 * In early alloc during kexec handover, we can only consider 1180 * MEMBLOCK_KHO_SCRATCH regions for the allocations 1181 */ 1182 if ((flags & MEMBLOCK_KHO_SCRATCH) && !memblock_is_kho_scratch(m)) 1183 return true; 1184 1185 return false; 1186 } 1187 1188 /** 1189 * __next_mem_range - next function for for_each_free_mem_range() etc. 1190 * @idx: pointer to u64 loop variable 1191 * @nid: node selector, %NUMA_NO_NODE for all nodes 1192 * @flags: pick from blocks based on memory attributes 1193 * @type_a: pointer to memblock_type from where the range is taken 1194 * @type_b: pointer to memblock_type which excludes memory from being taken 1195 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 1196 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 1197 * @out_nid: ptr to int for nid of the range, can be %NULL 1198 * 1199 * Find the first area from *@idx which matches @nid, fill the out 1200 * parameters, and update *@idx for the next iteration. The lower 32bit of 1201 * *@idx contains index into type_a and the upper 32bit indexes the 1202 * areas before each region in type_b. For example, if type_b regions 1203 * look like the following, 1204 * 1205 * 0:[0-16), 1:[32-48), 2:[128-130) 1206 * 1207 * The upper 32bit indexes the following regions. 1208 * 1209 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) 1210 * 1211 * As both region arrays are sorted, the function advances the two indices 1212 * in lockstep and returns each intersection. 1213 */ 1214 void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, 1215 struct memblock_type *type_a, 1216 struct memblock_type *type_b, phys_addr_t *out_start, 1217 phys_addr_t *out_end, int *out_nid) 1218 { 1219 int idx_a = *idx & 0xffffffff; 1220 int idx_b = *idx >> 32; 1221 1222 for (; idx_a < type_a->cnt; idx_a++) { 1223 struct memblock_region *m = &type_a->regions[idx_a]; 1224 1225 phys_addr_t m_start = m->base; 1226 phys_addr_t m_end = m->base + m->size; 1227 int m_nid = memblock_get_region_node(m); 1228 1229 if (should_skip_region(type_a, m, nid, flags)) 1230 continue; 1231 1232 if (!type_b) { 1233 if (out_start) 1234 *out_start = m_start; 1235 if (out_end) 1236 *out_end = m_end; 1237 if (out_nid) 1238 *out_nid = m_nid; 1239 idx_a++; 1240 *idx = (u32)idx_a | (u64)idx_b << 32; 1241 return; 1242 } 1243 1244 /* scan areas before each reservation */ 1245 for (; idx_b < type_b->cnt + 1; idx_b++) { 1246 struct memblock_region *r; 1247 phys_addr_t r_start; 1248 phys_addr_t r_end; 1249 1250 r = &type_b->regions[idx_b]; 1251 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1252 r_end = idx_b < type_b->cnt ? 1253 r->base : PHYS_ADDR_MAX; 1254 1255 /* 1256 * if idx_b advanced past idx_a, 1257 * break out to advance idx_a 1258 */ 1259 if (r_start >= m_end) 1260 break; 1261 /* if the two regions intersect, we're done */ 1262 if (m_start < r_end) { 1263 if (out_start) 1264 *out_start = 1265 max(m_start, r_start); 1266 if (out_end) 1267 *out_end = min(m_end, r_end); 1268 if (out_nid) 1269 *out_nid = m_nid; 1270 /* 1271 * The region which ends first is 1272 * advanced for the next iteration. 1273 */ 1274 if (m_end <= r_end) 1275 idx_a++; 1276 else 1277 idx_b++; 1278 *idx = (u32)idx_a | (u64)idx_b << 32; 1279 return; 1280 } 1281 } 1282 } 1283 1284 /* signal end of iteration */ 1285 *idx = ULLONG_MAX; 1286 } 1287 1288 /** 1289 * __next_mem_range_rev - generic next function for for_each_*_range_rev() 1290 * 1291 * @idx: pointer to u64 loop variable 1292 * @nid: node selector, %NUMA_NO_NODE for all nodes 1293 * @flags: pick from blocks based on memory attributes 1294 * @type_a: pointer to memblock_type from where the range is taken 1295 * @type_b: pointer to memblock_type which excludes memory from being taken 1296 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 1297 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 1298 * @out_nid: ptr to int for nid of the range, can be %NULL 1299 * 1300 * Finds the next range from type_a which is not marked as unsuitable 1301 * in type_b. 1302 * 1303 * Reverse of __next_mem_range(). 1304 */ 1305 void __init_memblock __next_mem_range_rev(u64 *idx, int nid, 1306 enum memblock_flags flags, 1307 struct memblock_type *type_a, 1308 struct memblock_type *type_b, 1309 phys_addr_t *out_start, 1310 phys_addr_t *out_end, int *out_nid) 1311 { 1312 int idx_a = *idx & 0xffffffff; 1313 int idx_b = *idx >> 32; 1314 1315 if (*idx == (u64)ULLONG_MAX) { 1316 idx_a = type_a->cnt - 1; 1317 if (type_b != NULL) 1318 idx_b = type_b->cnt; 1319 else 1320 idx_b = 0; 1321 } 1322 1323 for (; idx_a >= 0; idx_a--) { 1324 struct memblock_region *m = &type_a->regions[idx_a]; 1325 1326 phys_addr_t m_start = m->base; 1327 phys_addr_t m_end = m->base + m->size; 1328 int m_nid = memblock_get_region_node(m); 1329 1330 if (should_skip_region(type_a, m, nid, flags)) 1331 continue; 1332 1333 if (!type_b) { 1334 if (out_start) 1335 *out_start = m_start; 1336 if (out_end) 1337 *out_end = m_end; 1338 if (out_nid) 1339 *out_nid = m_nid; 1340 idx_a--; 1341 *idx = (u32)idx_a | (u64)idx_b << 32; 1342 return; 1343 } 1344 1345 /* scan areas before each reservation */ 1346 for (; idx_b >= 0; idx_b--) { 1347 struct memblock_region *r; 1348 phys_addr_t r_start; 1349 phys_addr_t r_end; 1350 1351 r = &type_b->regions[idx_b]; 1352 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1353 r_end = idx_b < type_b->cnt ? 1354 r->base : PHYS_ADDR_MAX; 1355 /* 1356 * if idx_b advanced past idx_a, 1357 * break out to advance idx_a 1358 */ 1359 1360 if (r_end <= m_start) 1361 break; 1362 /* if the two regions intersect, we're done */ 1363 if (m_end > r_start) { 1364 if (out_start) 1365 *out_start = max(m_start, r_start); 1366 if (out_end) 1367 *out_end = min(m_end, r_end); 1368 if (out_nid) 1369 *out_nid = m_nid; 1370 if (m_start >= r_start) 1371 idx_a--; 1372 else 1373 idx_b--; 1374 *idx = (u32)idx_a | (u64)idx_b << 32; 1375 return; 1376 } 1377 } 1378 } 1379 /* signal end of iteration */ 1380 *idx = ULLONG_MAX; 1381 } 1382 1383 /* 1384 * Common iterator interface used to define for_each_mem_pfn_range(). 1385 */ 1386 void __init_memblock __next_mem_pfn_range(int *idx, int nid, 1387 unsigned long *out_start_pfn, 1388 unsigned long *out_end_pfn, int *out_nid) 1389 { 1390 struct memblock_type *type = &memblock.memory; 1391 struct memblock_region *r; 1392 int r_nid; 1393 1394 while (++*idx < type->cnt) { 1395 r = &type->regions[*idx]; 1396 r_nid = memblock_get_region_node(r); 1397 1398 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) 1399 continue; 1400 if (!numa_valid_node(nid) || nid == r_nid) 1401 break; 1402 } 1403 if (*idx >= type->cnt) { 1404 *idx = -1; 1405 return; 1406 } 1407 1408 if (out_start_pfn) 1409 *out_start_pfn = PFN_UP(r->base); 1410 if (out_end_pfn) 1411 *out_end_pfn = PFN_DOWN(r->base + r->size); 1412 if (out_nid) 1413 *out_nid = r_nid; 1414 } 1415 1416 /** 1417 * memblock_set_node - set node ID on memblock regions 1418 * @base: base of area to set node ID for 1419 * @size: size of area to set node ID for 1420 * @type: memblock type to set node ID for 1421 * @nid: node ID to set 1422 * 1423 * Set the nid of memblock @type regions in [@base, @base + @size) to @nid. 1424 * Regions which cross the area boundaries are split as necessary. 1425 * 1426 * Return: 1427 * 0 on success, -errno on failure. 1428 */ 1429 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, 1430 struct memblock_type *type, int nid) 1431 { 1432 #ifdef CONFIG_NUMA 1433 int start_rgn, end_rgn; 1434 int i, ret; 1435 1436 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 1437 if (ret) 1438 return ret; 1439 1440 for (i = start_rgn; i < end_rgn; i++) 1441 memblock_set_region_node(&type->regions[i], nid); 1442 1443 memblock_merge_regions(type, start_rgn, end_rgn); 1444 #endif 1445 return 0; 1446 } 1447 1448 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 1449 /** 1450 * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone() 1451 * 1452 * @idx: pointer to u64 loop variable 1453 * @zone: zone in which all of the memory blocks reside 1454 * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL 1455 * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL 1456 * 1457 * This function is meant to be a zone/pfn specific wrapper for the 1458 * for_each_mem_range type iterators. Specifically they are used in the 1459 * deferred memory init routines and as such we were duplicating much of 1460 * this logic throughout the code. So instead of having it in multiple 1461 * locations it seemed like it would make more sense to centralize this to 1462 * one new iterator that does everything they need. 1463 */ 1464 void __init_memblock 1465 __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, 1466 unsigned long *out_spfn, unsigned long *out_epfn) 1467 { 1468 int zone_nid = zone_to_nid(zone); 1469 phys_addr_t spa, epa; 1470 1471 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1472 &memblock.memory, &memblock.reserved, 1473 &spa, &epa, NULL); 1474 1475 while (*idx != U64_MAX) { 1476 unsigned long epfn = PFN_DOWN(epa); 1477 unsigned long spfn = PFN_UP(spa); 1478 1479 /* 1480 * Verify the end is at least past the start of the zone and 1481 * that we have at least one PFN to initialize. 1482 */ 1483 if (zone->zone_start_pfn < epfn && spfn < epfn) { 1484 /* if we went too far just stop searching */ 1485 if (zone_end_pfn(zone) <= spfn) { 1486 *idx = U64_MAX; 1487 break; 1488 } 1489 1490 if (out_spfn) 1491 *out_spfn = max(zone->zone_start_pfn, spfn); 1492 if (out_epfn) 1493 *out_epfn = min(zone_end_pfn(zone), epfn); 1494 1495 return; 1496 } 1497 1498 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, 1499 &memblock.memory, &memblock.reserved, 1500 &spa, &epa, NULL); 1501 } 1502 1503 /* signal end of iteration */ 1504 if (out_spfn) 1505 *out_spfn = ULONG_MAX; 1506 if (out_epfn) 1507 *out_epfn = 0; 1508 } 1509 1510 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 1511 1512 /** 1513 * memblock_alloc_range_nid - allocate boot memory block 1514 * @size: size of memory block to be allocated in bytes 1515 * @align: alignment of the region and block's size 1516 * @start: the lower bound of the memory region to allocate (phys address) 1517 * @end: the upper bound of the memory region to allocate (phys address) 1518 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1519 * @exact_nid: control the allocation fall back to other nodes 1520 * 1521 * The allocation is performed from memory region limited by 1522 * memblock.current_limit if @end == %MEMBLOCK_ALLOC_ACCESSIBLE. 1523 * 1524 * If the specified node can not hold the requested memory and @exact_nid 1525 * is false, the allocation falls back to any node in the system. 1526 * 1527 * For systems with memory mirroring, the allocation is attempted first 1528 * from the regions with mirroring enabled and then retried from any 1529 * memory region. 1530 * 1531 * In addition, function using kmemleak_alloc_phys for allocated boot 1532 * memory block, it is never reported as leaks. 1533 * 1534 * Return: 1535 * Physical address of allocated memory block on success, %0 on failure. 1536 */ 1537 phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, 1538 phys_addr_t align, phys_addr_t start, 1539 phys_addr_t end, int nid, 1540 bool exact_nid) 1541 { 1542 enum memblock_flags flags = choose_memblock_flags(); 1543 phys_addr_t found; 1544 1545 /* 1546 * Detect any accidental use of these APIs after slab is ready, as at 1547 * this moment memblock may be deinitialized already and its 1548 * internal data may be destroyed (after execution of memblock_free_all) 1549 */ 1550 if (WARN_ON_ONCE(slab_is_available())) { 1551 void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid); 1552 1553 return vaddr ? virt_to_phys(vaddr) : 0; 1554 } 1555 1556 if (!align) { 1557 /* Can't use WARNs this early in boot on powerpc */ 1558 dump_stack(); 1559 align = SMP_CACHE_BYTES; 1560 } 1561 1562 again: 1563 found = memblock_find_in_range_node(size, align, start, end, nid, 1564 flags); 1565 if (found && !__memblock_reserve(found, size, nid, MEMBLOCK_RSRV_KERN)) 1566 goto done; 1567 1568 if (numa_valid_node(nid) && !exact_nid) { 1569 found = memblock_find_in_range_node(size, align, start, 1570 end, NUMA_NO_NODE, 1571 flags); 1572 if (found && !memblock_reserve_kern(found, size)) 1573 goto done; 1574 } 1575 1576 if (flags & MEMBLOCK_MIRROR) { 1577 flags &= ~MEMBLOCK_MIRROR; 1578 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", 1579 &size); 1580 goto again; 1581 } 1582 1583 return 0; 1584 1585 done: 1586 /* 1587 * Skip kmemleak for those places like kasan_init() and 1588 * early_pgtable_alloc() due to high volume. 1589 */ 1590 if (end != MEMBLOCK_ALLOC_NOLEAKTRACE) 1591 /* 1592 * Memblock allocated blocks are never reported as 1593 * leaks. This is because many of these blocks are 1594 * only referred via the physical address which is 1595 * not looked up by kmemleak. 1596 */ 1597 kmemleak_alloc_phys(found, size, 0); 1598 1599 /* 1600 * Some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP, 1601 * require memory to be accepted before it can be used by the 1602 * guest. 1603 * 1604 * Accept the memory of the allocated buffer. 1605 */ 1606 accept_memory(found, size); 1607 1608 return found; 1609 } 1610 1611 /** 1612 * memblock_phys_alloc_range - allocate a memory block inside specified range 1613 * @size: size of memory block to be allocated in bytes 1614 * @align: alignment of the region and block's size 1615 * @start: the lower bound of the memory region to allocate (physical address) 1616 * @end: the upper bound of the memory region to allocate (physical address) 1617 * 1618 * Allocate @size bytes in the between @start and @end. 1619 * 1620 * Return: physical address of the allocated memory block on success, 1621 * %0 on failure. 1622 */ 1623 phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size, 1624 phys_addr_t align, 1625 phys_addr_t start, 1626 phys_addr_t end) 1627 { 1628 memblock_dbg("%s: %llu bytes align=0x%llx from=%pa max_addr=%pa %pS\n", 1629 __func__, (u64)size, (u64)align, &start, &end, 1630 (void *)_RET_IP_); 1631 return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE, 1632 false); 1633 } 1634 1635 /** 1636 * memblock_phys_alloc_try_nid - allocate a memory block from specified NUMA node 1637 * @size: size of memory block to be allocated in bytes 1638 * @align: alignment of the region and block's size 1639 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1640 * 1641 * Allocates memory block from the specified NUMA node. If the node 1642 * has no available memory, attempts to allocated from any node in the 1643 * system. 1644 * 1645 * Return: physical address of the allocated memory block on success, 1646 * %0 on failure. 1647 */ 1648 phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) 1649 { 1650 return memblock_alloc_range_nid(size, align, 0, 1651 MEMBLOCK_ALLOC_ACCESSIBLE, nid, false); 1652 } 1653 1654 /** 1655 * memblock_alloc_internal - allocate boot memory block 1656 * @size: size of memory block to be allocated in bytes 1657 * @align: alignment of the region and block's size 1658 * @min_addr: the lower bound of the memory region to allocate (phys address) 1659 * @max_addr: the upper bound of the memory region to allocate (phys address) 1660 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1661 * @exact_nid: control the allocation fall back to other nodes 1662 * 1663 * Allocates memory block using memblock_alloc_range_nid() and 1664 * converts the returned physical address to virtual. 1665 * 1666 * The @min_addr limit is dropped if it can not be satisfied and the allocation 1667 * will fall back to memory below @min_addr. Other constraints, such 1668 * as node and mirrored memory will be handled again in 1669 * memblock_alloc_range_nid(). 1670 * 1671 * Return: 1672 * Virtual address of allocated memory block on success, NULL on failure. 1673 */ 1674 static void * __init memblock_alloc_internal( 1675 phys_addr_t size, phys_addr_t align, 1676 phys_addr_t min_addr, phys_addr_t max_addr, 1677 int nid, bool exact_nid) 1678 { 1679 phys_addr_t alloc; 1680 1681 1682 if (max_addr > memblock.current_limit) 1683 max_addr = memblock.current_limit; 1684 1685 alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid, 1686 exact_nid); 1687 1688 /* retry allocation without lower limit */ 1689 if (!alloc && min_addr) 1690 alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid, 1691 exact_nid); 1692 1693 if (!alloc) 1694 return NULL; 1695 1696 return phys_to_virt(alloc); 1697 } 1698 1699 /** 1700 * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node 1701 * without zeroing memory 1702 * @size: size of memory block to be allocated in bytes 1703 * @align: alignment of the region and block's size 1704 * @min_addr: the lower bound of the memory region from where the allocation 1705 * is preferred (phys address) 1706 * @max_addr: the upper bound of the memory region from where the allocation 1707 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1708 * allocate only from memory limited by memblock.current_limit value 1709 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1710 * 1711 * Public function, provides additional debug information (including caller 1712 * info), if enabled. Does not zero allocated memory. 1713 * 1714 * Return: 1715 * Virtual address of allocated memory block on success, NULL on failure. 1716 */ 1717 void * __init memblock_alloc_exact_nid_raw( 1718 phys_addr_t size, phys_addr_t align, 1719 phys_addr_t min_addr, phys_addr_t max_addr, 1720 int nid) 1721 { 1722 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1723 __func__, (u64)size, (u64)align, nid, &min_addr, 1724 &max_addr, (void *)_RET_IP_); 1725 1726 return memblock_alloc_internal(size, align, min_addr, max_addr, nid, 1727 true); 1728 } 1729 1730 /** 1731 * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing 1732 * memory and without panicking 1733 * @size: size of memory block to be allocated in bytes 1734 * @align: alignment of the region and block's size 1735 * @min_addr: the lower bound of the memory region from where the allocation 1736 * is preferred (phys address) 1737 * @max_addr: the upper bound of the memory region from where the allocation 1738 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1739 * allocate only from memory limited by memblock.current_limit value 1740 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1741 * 1742 * Public function, provides additional debug information (including caller 1743 * info), if enabled. Does not zero allocated memory, does not panic if request 1744 * cannot be satisfied. 1745 * 1746 * Return: 1747 * Virtual address of allocated memory block on success, NULL on failure. 1748 */ 1749 void * __init memblock_alloc_try_nid_raw( 1750 phys_addr_t size, phys_addr_t align, 1751 phys_addr_t min_addr, phys_addr_t max_addr, 1752 int nid) 1753 { 1754 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1755 __func__, (u64)size, (u64)align, nid, &min_addr, 1756 &max_addr, (void *)_RET_IP_); 1757 1758 return memblock_alloc_internal(size, align, min_addr, max_addr, nid, 1759 false); 1760 } 1761 1762 /** 1763 * memblock_alloc_try_nid - allocate boot memory block 1764 * @size: size of memory block to be allocated in bytes 1765 * @align: alignment of the region and block's size 1766 * @min_addr: the lower bound of the memory region from where the allocation 1767 * is preferred (phys address) 1768 * @max_addr: the upper bound of the memory region from where the allocation 1769 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1770 * allocate only from memory limited by memblock.current_limit value 1771 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1772 * 1773 * Public function, provides additional debug information (including caller 1774 * info), if enabled. This function zeroes the allocated memory. 1775 * 1776 * Return: 1777 * Virtual address of allocated memory block on success, NULL on failure. 1778 */ 1779 void * __init memblock_alloc_try_nid( 1780 phys_addr_t size, phys_addr_t align, 1781 phys_addr_t min_addr, phys_addr_t max_addr, 1782 int nid) 1783 { 1784 void *ptr; 1785 1786 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", 1787 __func__, (u64)size, (u64)align, nid, &min_addr, 1788 &max_addr, (void *)_RET_IP_); 1789 ptr = memblock_alloc_internal(size, align, 1790 min_addr, max_addr, nid, false); 1791 if (ptr) 1792 memset(ptr, 0, size); 1793 1794 return ptr; 1795 } 1796 1797 /** 1798 * __memblock_alloc_or_panic - Try to allocate memory and panic on failure 1799 * @size: size of memory block to be allocated in bytes 1800 * @align: alignment of the region and block's size 1801 * @func: caller func name 1802 * 1803 * This function attempts to allocate memory using memblock_alloc, 1804 * and in case of failure, it calls panic with the formatted message. 1805 * This function should not be used directly, please use the macro memblock_alloc_or_panic. 1806 */ 1807 void *__init __memblock_alloc_or_panic(phys_addr_t size, phys_addr_t align, 1808 const char *func) 1809 { 1810 void *addr = memblock_alloc(size, align); 1811 1812 if (unlikely(!addr)) 1813 panic("%s: Failed to allocate %pap bytes\n", func, &size); 1814 return addr; 1815 } 1816 1817 /** 1818 * memblock_free_late - free pages directly to buddy allocator 1819 * @base: phys starting address of the boot memory block 1820 * @size: size of the boot memory block in bytes 1821 * 1822 * This is only useful when the memblock allocator has already been torn 1823 * down, but we are still initializing the system. Pages are released directly 1824 * to the buddy allocator. 1825 */ 1826 void __init memblock_free_late(phys_addr_t base, phys_addr_t size) 1827 { 1828 phys_addr_t cursor, end; 1829 1830 end = base + size - 1; 1831 memblock_dbg("%s: [%pa-%pa] %pS\n", 1832 __func__, &base, &end, (void *)_RET_IP_); 1833 kmemleak_free_part_phys(base, size); 1834 cursor = PFN_UP(base); 1835 end = PFN_DOWN(base + size); 1836 1837 for (; cursor < end; cursor++) { 1838 memblock_free_pages(pfn_to_page(cursor), cursor, 0); 1839 totalram_pages_inc(); 1840 } 1841 } 1842 1843 /* 1844 * Remaining API functions 1845 */ 1846 1847 phys_addr_t __init_memblock memblock_phys_mem_size(void) 1848 { 1849 return memblock.memory.total_size; 1850 } 1851 1852 phys_addr_t __init_memblock memblock_reserved_size(void) 1853 { 1854 return memblock.reserved.total_size; 1855 } 1856 1857 phys_addr_t __init_memblock memblock_reserved_kern_size(phys_addr_t limit, int nid) 1858 { 1859 struct memblock_region *r; 1860 phys_addr_t total = 0; 1861 1862 for_each_reserved_mem_region(r) { 1863 phys_addr_t size = r->size; 1864 1865 if (r->base > limit) 1866 break; 1867 1868 if (r->base + r->size > limit) 1869 size = limit - r->base; 1870 1871 if (nid == memblock_get_region_node(r) || !numa_valid_node(nid)) 1872 if (r->flags & MEMBLOCK_RSRV_KERN) 1873 total += size; 1874 } 1875 1876 return total; 1877 } 1878 1879 /** 1880 * memblock_estimated_nr_free_pages - return estimated number of free pages 1881 * from memblock point of view 1882 * 1883 * During bootup, subsystems might need a rough estimate of the number of free 1884 * pages in the whole system, before precise numbers are available from the 1885 * buddy. Especially with CONFIG_DEFERRED_STRUCT_PAGE_INIT, the numbers 1886 * obtained from the buddy might be very imprecise during bootup. 1887 * 1888 * Return: 1889 * An estimated number of free pages from memblock point of view. 1890 */ 1891 unsigned long __init memblock_estimated_nr_free_pages(void) 1892 { 1893 return PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size()); 1894 } 1895 1896 /* lowest address */ 1897 phys_addr_t __init_memblock memblock_start_of_DRAM(void) 1898 { 1899 return memblock.memory.regions[0].base; 1900 } 1901 1902 phys_addr_t __init_memblock memblock_end_of_DRAM(void) 1903 { 1904 int idx = memblock.memory.cnt - 1; 1905 1906 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); 1907 } 1908 1909 static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) 1910 { 1911 phys_addr_t max_addr = PHYS_ADDR_MAX; 1912 struct memblock_region *r; 1913 1914 /* 1915 * translate the memory @limit size into the max address within one of 1916 * the memory memblock regions, if the @limit exceeds the total size 1917 * of those regions, max_addr will keep original value PHYS_ADDR_MAX 1918 */ 1919 for_each_mem_region(r) { 1920 if (limit <= r->size) { 1921 max_addr = r->base + limit; 1922 break; 1923 } 1924 limit -= r->size; 1925 } 1926 1927 return max_addr; 1928 } 1929 1930 void __init memblock_enforce_memory_limit(phys_addr_t limit) 1931 { 1932 phys_addr_t max_addr; 1933 1934 if (!limit) 1935 return; 1936 1937 max_addr = __find_max_addr(limit); 1938 1939 /* @limit exceeds the total size of the memory, do nothing */ 1940 if (max_addr == PHYS_ADDR_MAX) 1941 return; 1942 1943 /* truncate both memory and reserved regions */ 1944 memblock_remove_range(&memblock.memory, max_addr, 1945 PHYS_ADDR_MAX); 1946 memblock_remove_range(&memblock.reserved, max_addr, 1947 PHYS_ADDR_MAX); 1948 } 1949 1950 void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) 1951 { 1952 int start_rgn, end_rgn; 1953 int i, ret; 1954 1955 if (!size) 1956 return; 1957 1958 if (!memblock_memory->total_size) { 1959 pr_warn("%s: No memory registered yet\n", __func__); 1960 return; 1961 } 1962 1963 ret = memblock_isolate_range(&memblock.memory, base, size, 1964 &start_rgn, &end_rgn); 1965 if (ret) 1966 return; 1967 1968 /* remove all the MAP regions */ 1969 for (i = memblock.memory.cnt - 1; i >= end_rgn; i--) 1970 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1971 memblock_remove_region(&memblock.memory, i); 1972 1973 for (i = start_rgn - 1; i >= 0; i--) 1974 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1975 memblock_remove_region(&memblock.memory, i); 1976 1977 /* truncate the reserved regions */ 1978 memblock_remove_range(&memblock.reserved, 0, base); 1979 memblock_remove_range(&memblock.reserved, 1980 base + size, PHYS_ADDR_MAX); 1981 } 1982 1983 void __init memblock_mem_limit_remove_map(phys_addr_t limit) 1984 { 1985 phys_addr_t max_addr; 1986 1987 if (!limit) 1988 return; 1989 1990 max_addr = __find_max_addr(limit); 1991 1992 /* @limit exceeds the total size of the memory, do nothing */ 1993 if (max_addr == PHYS_ADDR_MAX) 1994 return; 1995 1996 memblock_cap_memory_range(0, max_addr); 1997 } 1998 1999 static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) 2000 { 2001 unsigned int left = 0, right = type->cnt; 2002 2003 do { 2004 unsigned int mid = (right + left) / 2; 2005 2006 if (addr < type->regions[mid].base) 2007 right = mid; 2008 else if (addr >= (type->regions[mid].base + 2009 type->regions[mid].size)) 2010 left = mid + 1; 2011 else 2012 return mid; 2013 } while (left < right); 2014 return -1; 2015 } 2016 2017 bool __init_memblock memblock_is_reserved(phys_addr_t addr) 2018 { 2019 return memblock_search(&memblock.reserved, addr) != -1; 2020 } 2021 2022 bool __init_memblock memblock_is_memory(phys_addr_t addr) 2023 { 2024 return memblock_search(&memblock.memory, addr) != -1; 2025 } 2026 2027 bool __init_memblock memblock_is_map_memory(phys_addr_t addr) 2028 { 2029 int i = memblock_search(&memblock.memory, addr); 2030 2031 if (i == -1) 2032 return false; 2033 return !memblock_is_nomap(&memblock.memory.regions[i]); 2034 } 2035 2036 int __init_memblock memblock_search_pfn_nid(unsigned long pfn, 2037 unsigned long *start_pfn, unsigned long *end_pfn) 2038 { 2039 struct memblock_type *type = &memblock.memory; 2040 int mid = memblock_search(type, PFN_PHYS(pfn)); 2041 2042 if (mid == -1) 2043 return NUMA_NO_NODE; 2044 2045 *start_pfn = PFN_DOWN(type->regions[mid].base); 2046 *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); 2047 2048 return memblock_get_region_node(&type->regions[mid]); 2049 } 2050 2051 /** 2052 * memblock_is_region_memory - check if a region is a subset of memory 2053 * @base: base of region to check 2054 * @size: size of region to check 2055 * 2056 * Check if the region [@base, @base + @size) is a subset of a memory block. 2057 * 2058 * Return: 2059 * 0 if false, non-zero if true 2060 */ 2061 bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) 2062 { 2063 int idx = memblock_search(&memblock.memory, base); 2064 phys_addr_t end = base + memblock_cap_size(base, &size); 2065 2066 if (idx == -1) 2067 return false; 2068 return (memblock.memory.regions[idx].base + 2069 memblock.memory.regions[idx].size) >= end; 2070 } 2071 2072 /** 2073 * memblock_is_region_reserved - check if a region intersects reserved memory 2074 * @base: base of region to check 2075 * @size: size of region to check 2076 * 2077 * Check if the region [@base, @base + @size) intersects a reserved 2078 * memory block. 2079 * 2080 * Return: 2081 * True if they intersect, false if not. 2082 */ 2083 bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) 2084 { 2085 return memblock_overlaps_region(&memblock.reserved, base, size); 2086 } 2087 2088 void __init_memblock memblock_trim_memory(phys_addr_t align) 2089 { 2090 phys_addr_t start, end, orig_start, orig_end; 2091 struct memblock_region *r; 2092 2093 for_each_mem_region(r) { 2094 orig_start = r->base; 2095 orig_end = r->base + r->size; 2096 start = round_up(orig_start, align); 2097 end = round_down(orig_end, align); 2098 2099 if (start == orig_start && end == orig_end) 2100 continue; 2101 2102 if (start < end) { 2103 r->base = start; 2104 r->size = end - start; 2105 } else { 2106 memblock_remove_region(&memblock.memory, 2107 r - memblock.memory.regions); 2108 r--; 2109 } 2110 } 2111 } 2112 2113 void __init_memblock memblock_set_current_limit(phys_addr_t limit) 2114 { 2115 memblock.current_limit = limit; 2116 } 2117 2118 phys_addr_t __init_memblock memblock_get_current_limit(void) 2119 { 2120 return memblock.current_limit; 2121 } 2122 2123 static void __init_memblock memblock_dump(struct memblock_type *type) 2124 { 2125 phys_addr_t base, end, size; 2126 enum memblock_flags flags; 2127 int idx; 2128 struct memblock_region *rgn; 2129 2130 pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt); 2131 2132 for_each_memblock_type(idx, type, rgn) { 2133 char nid_buf[32] = ""; 2134 2135 base = rgn->base; 2136 size = rgn->size; 2137 end = base + size - 1; 2138 flags = rgn->flags; 2139 #ifdef CONFIG_NUMA 2140 if (numa_valid_node(memblock_get_region_node(rgn))) 2141 snprintf(nid_buf, sizeof(nid_buf), " on node %d", 2142 memblock_get_region_node(rgn)); 2143 #endif 2144 pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n", 2145 type->name, idx, &base, &end, &size, nid_buf, flags); 2146 } 2147 } 2148 2149 static void __init_memblock __memblock_dump_all(void) 2150 { 2151 pr_info("MEMBLOCK configuration:\n"); 2152 pr_info(" memory size = %pa reserved size = %pa\n", 2153 &memblock.memory.total_size, 2154 &memblock.reserved.total_size); 2155 2156 memblock_dump(&memblock.memory); 2157 memblock_dump(&memblock.reserved); 2158 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 2159 memblock_dump(&physmem); 2160 #endif 2161 } 2162 2163 void __init_memblock memblock_dump_all(void) 2164 { 2165 if (memblock_debug) 2166 __memblock_dump_all(); 2167 } 2168 2169 void __init memblock_allow_resize(void) 2170 { 2171 memblock_can_resize = 1; 2172 } 2173 2174 static int __init early_memblock(char *p) 2175 { 2176 if (p && strstr(p, "debug")) 2177 memblock_debug = 1; 2178 return 0; 2179 } 2180 early_param("memblock", early_memblock); 2181 2182 static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) 2183 { 2184 struct page *start_pg, *end_pg; 2185 phys_addr_t pg, pgend; 2186 2187 /* 2188 * Convert start_pfn/end_pfn to a struct page pointer. 2189 */ 2190 start_pg = pfn_to_page(start_pfn - 1) + 1; 2191 end_pg = pfn_to_page(end_pfn - 1) + 1; 2192 2193 /* 2194 * Convert to physical addresses, and round start upwards and end 2195 * downwards. 2196 */ 2197 pg = PAGE_ALIGN(__pa(start_pg)); 2198 pgend = PAGE_ALIGN_DOWN(__pa(end_pg)); 2199 2200 /* 2201 * If there are free pages between these, free the section of the 2202 * memmap array. 2203 */ 2204 if (pg < pgend) 2205 memblock_phys_free(pg, pgend - pg); 2206 } 2207 2208 /* 2209 * The mem_map array can get very big. Free the unused area of the memory map. 2210 */ 2211 static void __init free_unused_memmap(void) 2212 { 2213 unsigned long start, end, prev_end = 0; 2214 int i; 2215 2216 if (!IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) || 2217 IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) 2218 return; 2219 2220 /* 2221 * This relies on each bank being in address order. 2222 * The banks are sorted previously in bootmem_init(). 2223 */ 2224 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { 2225 #ifdef CONFIG_SPARSEMEM 2226 /* 2227 * Take care not to free memmap entries that don't exist 2228 * due to SPARSEMEM sections which aren't present. 2229 */ 2230 start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); 2231 #endif 2232 /* 2233 * Align down here since many operations in VM subsystem 2234 * presume that there are no holes in the memory map inside 2235 * a pageblock 2236 */ 2237 start = pageblock_start_pfn(start); 2238 2239 /* 2240 * If we had a previous bank, and there is a space 2241 * between the current bank and the previous, free it. 2242 */ 2243 if (prev_end && prev_end < start) 2244 free_memmap(prev_end, start); 2245 2246 /* 2247 * Align up here since many operations in VM subsystem 2248 * presume that there are no holes in the memory map inside 2249 * a pageblock 2250 */ 2251 prev_end = pageblock_align(end); 2252 } 2253 2254 #ifdef CONFIG_SPARSEMEM 2255 if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) { 2256 prev_end = pageblock_align(end); 2257 free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); 2258 } 2259 #endif 2260 } 2261 2262 static void __init __free_pages_memory(unsigned long start, unsigned long end) 2263 { 2264 int order; 2265 2266 while (start < end) { 2267 /* 2268 * Free the pages in the largest chunks alignment allows. 2269 * 2270 * __ffs() behaviour is undefined for 0. start == 0 is 2271 * MAX_PAGE_ORDER-aligned, set order to MAX_PAGE_ORDER for 2272 * the case. 2273 */ 2274 if (start) 2275 order = min_t(int, MAX_PAGE_ORDER, __ffs(start)); 2276 else 2277 order = MAX_PAGE_ORDER; 2278 2279 while (start + (1UL << order) > end) 2280 order--; 2281 2282 memblock_free_pages(pfn_to_page(start), start, order); 2283 2284 start += (1UL << order); 2285 } 2286 } 2287 2288 static unsigned long __init __free_memory_core(phys_addr_t start, 2289 phys_addr_t end) 2290 { 2291 unsigned long start_pfn = PFN_UP(start); 2292 unsigned long end_pfn = PFN_DOWN(end); 2293 2294 if (!IS_ENABLED(CONFIG_HIGHMEM) && end_pfn > max_low_pfn) 2295 end_pfn = max_low_pfn; 2296 2297 if (start_pfn >= end_pfn) 2298 return 0; 2299 2300 __free_pages_memory(start_pfn, end_pfn); 2301 2302 return end_pfn - start_pfn; 2303 } 2304 2305 static void __init memmap_init_reserved_pages(void) 2306 { 2307 struct memblock_region *region; 2308 phys_addr_t start, end; 2309 int nid; 2310 unsigned long max_reserved; 2311 2312 /* 2313 * set nid on all reserved pages and also treat struct 2314 * pages for the NOMAP regions as PageReserved 2315 */ 2316 repeat: 2317 max_reserved = memblock.reserved.max; 2318 for_each_mem_region(region) { 2319 nid = memblock_get_region_node(region); 2320 start = region->base; 2321 end = start + region->size; 2322 2323 if (memblock_is_nomap(region)) 2324 reserve_bootmem_region(start, end, nid); 2325 2326 memblock_set_node(start, region->size, &memblock.reserved, nid); 2327 } 2328 /* 2329 * 'max' is changed means memblock.reserved has been doubled its 2330 * array, which may result a new reserved region before current 2331 * 'start'. Now we should repeat the procedure to set its node id. 2332 */ 2333 if (max_reserved != memblock.reserved.max) 2334 goto repeat; 2335 2336 /* 2337 * initialize struct pages for reserved regions that don't have 2338 * the MEMBLOCK_RSRV_NOINIT flag set 2339 */ 2340 for_each_reserved_mem_region(region) { 2341 if (!memblock_is_reserved_noinit(region)) { 2342 nid = memblock_get_region_node(region); 2343 start = region->base; 2344 end = start + region->size; 2345 2346 if (!numa_valid_node(nid)) 2347 nid = early_pfn_to_nid(PFN_DOWN(start)); 2348 2349 reserve_bootmem_region(start, end, nid); 2350 } 2351 } 2352 } 2353 2354 static unsigned long __init free_low_memory_core_early(void) 2355 { 2356 unsigned long count = 0; 2357 phys_addr_t start, end; 2358 u64 i; 2359 2360 memblock_clear_hotplug(0, -1); 2361 2362 memmap_init_reserved_pages(); 2363 2364 /* 2365 * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id 2366 * because in some case like Node0 doesn't have RAM installed 2367 * low ram will be on Node1 2368 */ 2369 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, 2370 NULL) 2371 count += __free_memory_core(start, end); 2372 2373 return count; 2374 } 2375 2376 static int reset_managed_pages_done __initdata; 2377 2378 static void __init reset_node_managed_pages(pg_data_t *pgdat) 2379 { 2380 struct zone *z; 2381 2382 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 2383 atomic_long_set(&z->managed_pages, 0); 2384 } 2385 2386 void __init reset_all_zones_managed_pages(void) 2387 { 2388 struct pglist_data *pgdat; 2389 2390 if (reset_managed_pages_done) 2391 return; 2392 2393 for_each_online_pgdat(pgdat) 2394 reset_node_managed_pages(pgdat); 2395 2396 reset_managed_pages_done = 1; 2397 } 2398 2399 /** 2400 * memblock_free_all - release free pages to the buddy allocator 2401 */ 2402 void __init memblock_free_all(void) 2403 { 2404 unsigned long pages; 2405 2406 free_unused_memmap(); 2407 reset_all_zones_managed_pages(); 2408 2409 memblock_clear_kho_scratch_only(); 2410 pages = free_low_memory_core_early(); 2411 totalram_pages_add(pages); 2412 } 2413 2414 /* Keep a table to reserve named memory */ 2415 #define RESERVE_MEM_MAX_ENTRIES 8 2416 #define RESERVE_MEM_NAME_SIZE 16 2417 struct reserve_mem_table { 2418 char name[RESERVE_MEM_NAME_SIZE]; 2419 phys_addr_t start; 2420 phys_addr_t size; 2421 }; 2422 static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES]; 2423 static int reserved_mem_count; 2424 static DEFINE_MUTEX(reserve_mem_lock); 2425 2426 /* Add wildcard region with a lookup name */ 2427 static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size, 2428 const char *name) 2429 { 2430 struct reserve_mem_table *map; 2431 2432 map = &reserved_mem_table[reserved_mem_count++]; 2433 map->start = start; 2434 map->size = size; 2435 strscpy(map->name, name); 2436 } 2437 2438 static struct reserve_mem_table *reserve_mem_find_by_name_nolock(const char *name) 2439 { 2440 struct reserve_mem_table *map; 2441 int i; 2442 2443 for (i = 0; i < reserved_mem_count; i++) { 2444 map = &reserved_mem_table[i]; 2445 if (!map->size) 2446 continue; 2447 if (strcmp(name, map->name) == 0) 2448 return map; 2449 } 2450 return NULL; 2451 } 2452 2453 /** 2454 * reserve_mem_find_by_name - Find reserved memory region with a given name 2455 * @name: The name that is attached to a reserved memory region 2456 * @start: If found, holds the start address 2457 * @size: If found, holds the size of the address. 2458 * 2459 * @start and @size are only updated if @name is found. 2460 * 2461 * Returns: 1 if found or 0 if not found. 2462 */ 2463 int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size) 2464 { 2465 struct reserve_mem_table *map; 2466 2467 guard(mutex)(&reserve_mem_lock); 2468 map = reserve_mem_find_by_name_nolock(name); 2469 if (!map) 2470 return 0; 2471 2472 *start = map->start; 2473 *size = map->size; 2474 return 1; 2475 } 2476 EXPORT_SYMBOL_GPL(reserve_mem_find_by_name); 2477 2478 /** 2479 * reserve_mem_release_by_name - Release reserved memory region with a given name 2480 * @name: The name that is attatched to a reserved memory region 2481 * 2482 * Forcibly release the pages in the reserved memory region so that those memory 2483 * can be used as free memory. After released the reserved region size becomes 0. 2484 * 2485 * Returns: 1 if released or 0 if not found. 2486 */ 2487 int reserve_mem_release_by_name(const char *name) 2488 { 2489 char buf[RESERVE_MEM_NAME_SIZE + 12]; 2490 struct reserve_mem_table *map; 2491 void *start, *end; 2492 2493 guard(mutex)(&reserve_mem_lock); 2494 map = reserve_mem_find_by_name_nolock(name); 2495 if (!map) 2496 return 0; 2497 2498 start = phys_to_virt(map->start); 2499 end = start + map->size - 1; 2500 snprintf(buf, sizeof(buf), "reserve_mem:%s", name); 2501 free_reserved_area(start, end, 0, buf); 2502 map->size = 0; 2503 2504 return 1; 2505 } 2506 2507 #ifdef CONFIG_KEXEC_HANDOVER 2508 #define MEMBLOCK_KHO_FDT "memblock" 2509 #define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1" 2510 #define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1" 2511 static struct page *kho_fdt; 2512 2513 static int reserve_mem_kho_finalize(struct kho_serialization *ser) 2514 { 2515 int err = 0, i; 2516 2517 for (i = 0; i < reserved_mem_count; i++) { 2518 struct reserve_mem_table *map = &reserved_mem_table[i]; 2519 2520 err |= kho_preserve_phys(map->start, map->size); 2521 } 2522 2523 err |= kho_preserve_folio(page_folio(kho_fdt)); 2524 err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt)); 2525 2526 return notifier_from_errno(err); 2527 } 2528 2529 static int reserve_mem_kho_notifier(struct notifier_block *self, 2530 unsigned long cmd, void *v) 2531 { 2532 switch (cmd) { 2533 case KEXEC_KHO_FINALIZE: 2534 return reserve_mem_kho_finalize((struct kho_serialization *)v); 2535 case KEXEC_KHO_ABORT: 2536 return NOTIFY_DONE; 2537 default: 2538 return NOTIFY_BAD; 2539 } 2540 } 2541 2542 static struct notifier_block reserve_mem_kho_nb = { 2543 .notifier_call = reserve_mem_kho_notifier, 2544 }; 2545 2546 static int __init prepare_kho_fdt(void) 2547 { 2548 int err = 0, i; 2549 void *fdt; 2550 2551 kho_fdt = alloc_page(GFP_KERNEL); 2552 if (!kho_fdt) 2553 return -ENOMEM; 2554 2555 fdt = page_to_virt(kho_fdt); 2556 2557 err |= fdt_create(fdt, PAGE_SIZE); 2558 err |= fdt_finish_reservemap(fdt); 2559 2560 err |= fdt_begin_node(fdt, ""); 2561 err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE); 2562 for (i = 0; i < reserved_mem_count; i++) { 2563 struct reserve_mem_table *map = &reserved_mem_table[i]; 2564 2565 err |= fdt_begin_node(fdt, map->name); 2566 err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE); 2567 err |= fdt_property(fdt, "start", &map->start, sizeof(map->start)); 2568 err |= fdt_property(fdt, "size", &map->size, sizeof(map->size)); 2569 err |= fdt_end_node(fdt); 2570 } 2571 err |= fdt_end_node(fdt); 2572 2573 err |= fdt_finish(fdt); 2574 2575 if (err) { 2576 pr_err("failed to prepare memblock FDT for KHO: %d\n", err); 2577 put_page(kho_fdt); 2578 kho_fdt = NULL; 2579 } 2580 2581 return err; 2582 } 2583 2584 static int __init reserve_mem_init(void) 2585 { 2586 int err; 2587 2588 if (!kho_is_enabled() || !reserved_mem_count) 2589 return 0; 2590 2591 err = prepare_kho_fdt(); 2592 if (err) 2593 return err; 2594 2595 err = register_kho_notifier(&reserve_mem_kho_nb); 2596 if (err) { 2597 put_page(kho_fdt); 2598 kho_fdt = NULL; 2599 } 2600 2601 return err; 2602 } 2603 late_initcall(reserve_mem_init); 2604 2605 static void *__init reserve_mem_kho_retrieve_fdt(void) 2606 { 2607 phys_addr_t fdt_phys; 2608 static void *fdt; 2609 int err; 2610 2611 if (fdt) 2612 return fdt; 2613 2614 err = kho_retrieve_subtree(MEMBLOCK_KHO_FDT, &fdt_phys); 2615 if (err) { 2616 if (err != -ENOENT) 2617 pr_warn("failed to retrieve FDT '%s' from KHO: %d\n", 2618 MEMBLOCK_KHO_FDT, err); 2619 return NULL; 2620 } 2621 2622 fdt = phys_to_virt(fdt_phys); 2623 2624 err = fdt_node_check_compatible(fdt, 0, MEMBLOCK_KHO_NODE_COMPATIBLE); 2625 if (err) { 2626 pr_warn("FDT '%s' is incompatible with '%s': %d\n", 2627 MEMBLOCK_KHO_FDT, MEMBLOCK_KHO_NODE_COMPATIBLE, err); 2628 fdt = NULL; 2629 } 2630 2631 return fdt; 2632 } 2633 2634 static bool __init reserve_mem_kho_revive(const char *name, phys_addr_t size, 2635 phys_addr_t align) 2636 { 2637 int err, len_start, len_size, offset; 2638 const phys_addr_t *p_start, *p_size; 2639 const void *fdt; 2640 2641 fdt = reserve_mem_kho_retrieve_fdt(); 2642 if (!fdt) 2643 return false; 2644 2645 offset = fdt_subnode_offset(fdt, 0, name); 2646 if (offset < 0) { 2647 pr_warn("FDT '%s' has no child '%s': %d\n", 2648 MEMBLOCK_KHO_FDT, name, offset); 2649 return false; 2650 } 2651 err = fdt_node_check_compatible(fdt, offset, RESERVE_MEM_KHO_NODE_COMPATIBLE); 2652 if (err) { 2653 pr_warn("Node '%s' is incompatible with '%s': %d\n", 2654 name, RESERVE_MEM_KHO_NODE_COMPATIBLE, err); 2655 return false; 2656 } 2657 2658 p_start = fdt_getprop(fdt, offset, "start", &len_start); 2659 p_size = fdt_getprop(fdt, offset, "size", &len_size); 2660 if (!p_start || len_start != sizeof(*p_start) || !p_size || 2661 len_size != sizeof(*p_size)) { 2662 return false; 2663 } 2664 2665 if (*p_start & (align - 1)) { 2666 pr_warn("KHO reserve-mem '%s' has wrong alignment (0x%lx, 0x%lx)\n", 2667 name, (long)align, (long)*p_start); 2668 return false; 2669 } 2670 2671 if (*p_size != size) { 2672 pr_warn("KHO reserve-mem '%s' has wrong size (0x%lx != 0x%lx)\n", 2673 name, (long)*p_size, (long)size); 2674 return false; 2675 } 2676 2677 reserved_mem_add(*p_start, size, name); 2678 pr_info("Revived memory reservation '%s' from KHO\n", name); 2679 2680 return true; 2681 } 2682 #else 2683 static bool __init reserve_mem_kho_revive(const char *name, phys_addr_t size, 2684 phys_addr_t align) 2685 { 2686 return false; 2687 } 2688 #endif /* CONFIG_KEXEC_HANDOVER */ 2689 2690 /* 2691 * Parse reserve_mem=nn:align:name 2692 */ 2693 static int __init reserve_mem(char *p) 2694 { 2695 phys_addr_t start, size, align, tmp; 2696 char *name; 2697 char *oldp; 2698 int len; 2699 2700 if (!p) 2701 return -EINVAL; 2702 2703 /* Check if there's room for more reserved memory */ 2704 if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES) 2705 return -EBUSY; 2706 2707 oldp = p; 2708 size = memparse(p, &p); 2709 if (!size || p == oldp) 2710 return -EINVAL; 2711 2712 if (*p != ':') 2713 return -EINVAL; 2714 2715 align = memparse(p+1, &p); 2716 if (*p != ':') 2717 return -EINVAL; 2718 2719 /* 2720 * memblock_phys_alloc() doesn't like a zero size align, 2721 * but it is OK for this command to have it. 2722 */ 2723 if (align < SMP_CACHE_BYTES) 2724 align = SMP_CACHE_BYTES; 2725 2726 name = p + 1; 2727 len = strlen(name); 2728 2729 /* name needs to have length but not too big */ 2730 if (!len || len >= RESERVE_MEM_NAME_SIZE) 2731 return -EINVAL; 2732 2733 /* Make sure that name has text */ 2734 for (p = name; *p; p++) { 2735 if (!isspace(*p)) 2736 break; 2737 } 2738 if (!*p) 2739 return -EINVAL; 2740 2741 /* Make sure the name is not already used */ 2742 if (reserve_mem_find_by_name(name, &start, &tmp)) 2743 return -EBUSY; 2744 2745 /* Pick previous allocations up from KHO if available */ 2746 if (reserve_mem_kho_revive(name, size, align)) 2747 return 1; 2748 2749 /* TODO: Allocation must be outside of scratch region */ 2750 start = memblock_phys_alloc(size, align); 2751 if (!start) 2752 return -ENOMEM; 2753 2754 reserved_mem_add(start, size, name); 2755 2756 return 1; 2757 } 2758 __setup("reserve_mem=", reserve_mem); 2759 2760 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK) 2761 static const char * const flagname[] = { 2762 [ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG", 2763 [ilog2(MEMBLOCK_MIRROR)] = "MIRROR", 2764 [ilog2(MEMBLOCK_NOMAP)] = "NOMAP", 2765 [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG", 2766 [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT", 2767 [ilog2(MEMBLOCK_RSRV_KERN)] = "RSV_KERN", 2768 [ilog2(MEMBLOCK_KHO_SCRATCH)] = "KHO_SCRATCH", 2769 }; 2770 2771 static int memblock_debug_show(struct seq_file *m, void *private) 2772 { 2773 struct memblock_type *type = m->private; 2774 struct memblock_region *reg; 2775 int i, j, nid; 2776 unsigned int count = ARRAY_SIZE(flagname); 2777 phys_addr_t end; 2778 2779 for (i = 0; i < type->cnt; i++) { 2780 reg = &type->regions[i]; 2781 end = reg->base + reg->size - 1; 2782 nid = memblock_get_region_node(reg); 2783 2784 seq_printf(m, "%4d: ", i); 2785 seq_printf(m, "%pa..%pa ", ®->base, &end); 2786 if (numa_valid_node(nid)) 2787 seq_printf(m, "%4d ", nid); 2788 else 2789 seq_printf(m, "%4c ", 'x'); 2790 if (reg->flags) { 2791 for (j = 0; j < count; j++) { 2792 if (reg->flags & (1U << j)) { 2793 seq_printf(m, "%s\n", flagname[j]); 2794 break; 2795 } 2796 } 2797 if (j == count) 2798 seq_printf(m, "%s\n", "UNKNOWN"); 2799 } else { 2800 seq_printf(m, "%s\n", "NONE"); 2801 } 2802 } 2803 return 0; 2804 } 2805 DEFINE_SHOW_ATTRIBUTE(memblock_debug); 2806 2807 static int __init memblock_init_debugfs(void) 2808 { 2809 struct dentry *root = debugfs_create_dir("memblock", NULL); 2810 2811 debugfs_create_file("memory", 0444, root, 2812 &memblock.memory, &memblock_debug_fops); 2813 debugfs_create_file("reserved", 0444, root, 2814 &memblock.reserved, &memblock_debug_fops); 2815 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 2816 debugfs_create_file("physmem", 0444, root, &physmem, 2817 &memblock_debug_fops); 2818 #endif 2819 2820 return 0; 2821 } 2822 __initcall(memblock_init_debugfs); 2823 2824 #endif /* CONFIG_DEBUG_FS */ 2825