1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Procedures for maintaining information about logical memory blocks.
4 *
5 * Peter Bergner, IBM Corp. June 2001.
6 * Copyright (C) 2001 Peter Bergner.
7 */
8
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/init.h>
12 #include <linux/bitops.h>
13 #include <linux/poison.h>
14 #include <linux/pfn.h>
15 #include <linux/debugfs.h>
16 #include <linux/kmemleak.h>
17 #include <linux/seq_file.h>
18 #include <linux/memblock.h>
19 #include <linux/mutex.h>
20 #include <linux/string_helpers.h>
21
22 #ifdef CONFIG_KEXEC_HANDOVER
23 #include <linux/libfdt.h>
24 #include <linux/kexec_handover.h>
25 #include <linux/kho/abi/memblock.h>
26 #endif /* CONFIG_KEXEC_HANDOVER */
27
28 #include <asm/sections.h>
29 #include <linux/io.h>
30
31 #include "internal.h"
32
33 #define INIT_MEMBLOCK_REGIONS 128
34 #define INIT_PHYSMEM_REGIONS 4
35
36 #ifndef INIT_MEMBLOCK_RESERVED_REGIONS
37 # define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS
38 #endif
39
40 #ifndef INIT_MEMBLOCK_MEMORY_REGIONS
41 #define INIT_MEMBLOCK_MEMORY_REGIONS INIT_MEMBLOCK_REGIONS
42 #endif
43
44 /**
45 * DOC: memblock overview
46 *
47 * Memblock is a method of managing memory regions during the early
48 * boot period when the usual kernel memory allocators are not up and
49 * running.
50 *
51 * Memblock views the system memory as collections of contiguous
52 * regions. There are several types of these collections:
53 *
54 * * ``memory`` - describes the physical memory available to the
55 * kernel; this may differ from the actual physical memory installed
56 * in the system, for instance when the memory is restricted with
57 * ``mem=`` command line parameter
58 * * ``reserved`` - describes the regions that were allocated
59 * * ``physmem`` - describes the actual physical memory available during
60 * boot regardless of the possible restrictions and memory hot(un)plug;
61 * the ``physmem`` type is only available on some architectures.
62 *
63 * Each region is represented by struct memblock_region that
64 * defines the region extents, its attributes and NUMA node id on NUMA
65 * systems. Every memory type is described by the struct memblock_type
66 * which contains an array of memory regions along with
67 * the allocator metadata. The "memory" and "reserved" types are nicely
68 * wrapped with struct memblock. This structure is statically
69 * initialized at build time. The region arrays are initially sized to
70 * %INIT_MEMBLOCK_MEMORY_REGIONS for "memory" and
71 * %INIT_MEMBLOCK_RESERVED_REGIONS for "reserved". The region array
72 * for "physmem" is initially sized to %INIT_PHYSMEM_REGIONS.
73 * The memblock_allow_resize() enables automatic resizing of the region
74 * arrays during addition of new regions. This feature should be used
75 * with care so that memory allocated for the region array will not
76 * overlap with areas that should be reserved, for example initrd.
77 *
78 * The early architecture setup should tell memblock what the physical
79 * memory layout is by using memblock_add() or memblock_add_node()
80 * functions. The first function does not assign the region to a NUMA
81 * node and it is appropriate for UMA systems. Yet, it is possible to
82 * use it on NUMA systems as well and assign the region to a NUMA node
83 * later in the setup process using memblock_set_node(). The
84 * memblock_add_node() performs such an assignment directly.
85 *
86 * Once memblock is setup the memory can be allocated using one of the
87 * API variants:
88 *
89 * * memblock_phys_alloc*() - these functions return the **physical**
90 * address of the allocated memory
91 * * memblock_alloc*() - these functions return the **virtual** address
92 * of the allocated memory.
93 *
94 * Note, that both API variants use implicit assumptions about allowed
95 * memory ranges and the fallback methods. Consult the documentation
96 * of memblock_alloc_internal() and memblock_alloc_range_nid()
97 * functions for more elaborate description.
98 *
99 * As the system boot progresses, the architecture specific mem_init()
100 * function frees all the memory to the buddy page allocator.
101 *
102 * Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the
103 * memblock data structures (except "physmem") will be discarded after the
104 * system initialization completes.
105 */
106
107 #ifndef CONFIG_NUMA
108 struct pglist_data __refdata contig_page_data;
109 EXPORT_SYMBOL(contig_page_data);
110 #endif
111
112 unsigned long max_low_pfn;
113 unsigned long min_low_pfn;
114 unsigned long max_pfn;
115 unsigned long long max_possible_pfn;
116
117 #ifdef CONFIG_MEMBLOCK_KHO_SCRATCH
118 /* When set to true, only allocate from MEMBLOCK_KHO_SCRATCH ranges */
119 static bool kho_scratch_only;
120 #else
121 #define kho_scratch_only false
122 #endif
123
124 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock;
125 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock;
126 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
127 static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS];
128 #endif
129
130 struct memblock memblock __initdata_memblock = {
131 .memory.regions = memblock_memory_init_regions,
132 .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS,
133 .memory.name = "memory",
134
135 .reserved.regions = memblock_reserved_init_regions,
136 .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS,
137 .reserved.name = "reserved",
138
139 .bottom_up = false,
140 .current_limit = MEMBLOCK_ALLOC_ANYWHERE,
141 };
142
143 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
144 struct memblock_type physmem = {
145 .regions = memblock_physmem_init_regions,
146 .max = INIT_PHYSMEM_REGIONS,
147 .name = "physmem",
148 };
149 #endif
150
151 /*
152 * keep a pointer to &memblock.memory in the text section to use it in
153 * __next_mem_range() and its helpers.
154 * For architectures that do not keep memblock data after init, this
155 * pointer will be reset to NULL at memblock_discard()
156 */
157 static __refdata struct memblock_type *memblock_memory = &memblock.memory;
158
159 #define for_each_memblock_type(i, memblock_type, rgn) \
160 for (i = 0, rgn = &memblock_type->regions[0]; \
161 i < memblock_type->cnt; \
162 i++, rgn = &memblock_type->regions[i])
163
164 #define memblock_dbg(fmt, ...) \
165 do { \
166 if (memblock_debug) \
167 pr_info(fmt, ##__VA_ARGS__); \
168 } while (0)
169
170 static int memblock_debug __initdata_memblock;
171 static bool system_has_some_mirror __initdata_memblock;
172 static int memblock_can_resize __initdata_memblock;
173 static int memblock_memory_in_slab __initdata_memblock;
174 static int memblock_reserved_in_slab __initdata_memblock;
175
memblock_has_mirror(void)176 bool __init_memblock memblock_has_mirror(void)
177 {
178 return system_has_some_mirror;
179 }
180
choose_memblock_flags(void)181 static enum memblock_flags __init_memblock choose_memblock_flags(void)
182 {
183 /* skip non-scratch memory for kho early boot allocations */
184 if (kho_scratch_only)
185 return MEMBLOCK_KHO_SCRATCH;
186
187 return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE;
188 }
189
190 /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
memblock_cap_size(phys_addr_t base,phys_addr_t * size)191 static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
192 {
193 return *size = min(*size, PHYS_ADDR_MAX - base);
194 }
195
196 /*
197 * Address comparison utilities
198 */
199 unsigned long __init_memblock
memblock_addrs_overlap(phys_addr_t base1,phys_addr_t size1,phys_addr_t base2,phys_addr_t size2)200 memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2,
201 phys_addr_t size2)
202 {
203 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
204 }
205
memblock_overlaps_region(struct memblock_type * type,phys_addr_t base,phys_addr_t size)206 bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
207 phys_addr_t base, phys_addr_t size)
208 {
209 unsigned long i;
210
211 memblock_cap_size(base, &size);
212
213 for (i = 0; i < type->cnt; i++)
214 if (memblock_addrs_overlap(base, size, type->regions[i].base,
215 type->regions[i].size))
216 return true;
217 return false;
218 }
219
220 /**
221 * __memblock_find_range_bottom_up - find free area utility in bottom-up
222 * @start: start of candidate range
223 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
224 * %MEMBLOCK_ALLOC_ACCESSIBLE
225 * @size: size of free area to find
226 * @align: alignment of free area to find
227 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
228 * @flags: pick from blocks based on memory attributes
229 *
230 * Utility called from memblock_find_in_range_node(), find free area bottom-up.
231 *
232 * Return:
233 * Found address on success, 0 on failure.
234 */
235 static phys_addr_t __init_memblock
__memblock_find_range_bottom_up(phys_addr_t start,phys_addr_t end,phys_addr_t size,phys_addr_t align,int nid,enum memblock_flags flags)236 __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
237 phys_addr_t size, phys_addr_t align, int nid,
238 enum memblock_flags flags)
239 {
240 phys_addr_t this_start, this_end, cand;
241 u64 i;
242
243 for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) {
244 this_start = clamp(this_start, start, end);
245 this_end = clamp(this_end, start, end);
246
247 cand = round_up(this_start, align);
248 if (cand < this_end && this_end - cand >= size)
249 return cand;
250 }
251
252 return 0;
253 }
254
255 /**
256 * __memblock_find_range_top_down - find free area utility, in top-down
257 * @start: start of candidate range
258 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
259 * %MEMBLOCK_ALLOC_ACCESSIBLE
260 * @size: size of free area to find
261 * @align: alignment of free area to find
262 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
263 * @flags: pick from blocks based on memory attributes
264 *
265 * Utility called from memblock_find_in_range_node(), find free area top-down.
266 *
267 * Return:
268 * Found address on success, 0 on failure.
269 */
270 static phys_addr_t __init_memblock
__memblock_find_range_top_down(phys_addr_t start,phys_addr_t end,phys_addr_t size,phys_addr_t align,int nid,enum memblock_flags flags)271 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
272 phys_addr_t size, phys_addr_t align, int nid,
273 enum memblock_flags flags)
274 {
275 phys_addr_t this_start, this_end, cand;
276 u64 i;
277
278 for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end,
279 NULL) {
280 this_start = clamp(this_start, start, end);
281 this_end = clamp(this_end, start, end);
282
283 if (this_end < size)
284 continue;
285
286 cand = round_down(this_end - size, align);
287 if (cand >= this_start)
288 return cand;
289 }
290
291 return 0;
292 }
293
294 /**
295 * memblock_find_in_range_node - find free area in given range and node
296 * @size: size of free area to find
297 * @align: alignment of free area to find
298 * @start: start of candidate range
299 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
300 * %MEMBLOCK_ALLOC_ACCESSIBLE
301 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
302 * @flags: pick from blocks based on memory attributes
303 *
304 * Find @size free area aligned to @align in the specified range and node.
305 *
306 * Return:
307 * Found address on success, 0 on failure.
308 */
memblock_find_in_range_node(phys_addr_t size,phys_addr_t align,phys_addr_t start,phys_addr_t end,int nid,enum memblock_flags flags)309 static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
310 phys_addr_t align, phys_addr_t start,
311 phys_addr_t end, int nid,
312 enum memblock_flags flags)
313 {
314 /* pump up @end */
315 if (end == MEMBLOCK_ALLOC_ACCESSIBLE ||
316 end == MEMBLOCK_ALLOC_NOLEAKTRACE)
317 end = memblock.current_limit;
318
319 /* avoid allocating the first page */
320 start = max_t(phys_addr_t, start, PAGE_SIZE);
321 end = max(start, end);
322
323 if (memblock_bottom_up())
324 return __memblock_find_range_bottom_up(start, end, size, align,
325 nid, flags);
326 else
327 return __memblock_find_range_top_down(start, end, size, align,
328 nid, flags);
329 }
330
331 /**
332 * memblock_find_in_range - find free area in given range
333 * @start: start of candidate range
334 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
335 * %MEMBLOCK_ALLOC_ACCESSIBLE
336 * @size: size of free area to find
337 * @align: alignment of free area to find
338 *
339 * Find @size free area aligned to @align in the specified range.
340 *
341 * Return:
342 * Found address on success, 0 on failure.
343 */
memblock_find_in_range(phys_addr_t start,phys_addr_t end,phys_addr_t size,phys_addr_t align)344 static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
345 phys_addr_t end, phys_addr_t size,
346 phys_addr_t align)
347 {
348 phys_addr_t ret;
349 enum memblock_flags flags = choose_memblock_flags();
350
351 again:
352 ret = memblock_find_in_range_node(size, align, start, end,
353 NUMA_NO_NODE, flags);
354
355 if (!ret && (flags & MEMBLOCK_MIRROR)) {
356 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n",
357 &size);
358 flags &= ~MEMBLOCK_MIRROR;
359 goto again;
360 }
361
362 return ret;
363 }
364
memblock_remove_region(struct memblock_type * type,unsigned long r)365 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
366 {
367 type->total_size -= type->regions[r].size;
368 memmove(&type->regions[r], &type->regions[r + 1],
369 (type->cnt - (r + 1)) * sizeof(type->regions[r]));
370 type->cnt--;
371
372 /* Special case for empty arrays */
373 if (type->cnt == 0) {
374 WARN_ON(type->total_size != 0);
375 type->regions[0].base = 0;
376 type->regions[0].size = 0;
377 type->regions[0].flags = 0;
378 memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
379 }
380 }
381
382 #ifndef CONFIG_ARCH_KEEP_MEMBLOCK
383 /**
384 * memblock_discard - discard memory and reserved arrays if they were allocated
385 */
memblock_discard(void)386 void __init memblock_discard(void)
387 {
388 phys_addr_t size;
389 void *addr;
390
391 if (memblock.reserved.regions != memblock_reserved_init_regions) {
392 addr = memblock.reserved.regions;
393 size = PAGE_ALIGN(sizeof(struct memblock_region) *
394 memblock.reserved.max);
395 if (memblock_reserved_in_slab)
396 kfree(addr);
397 else
398 memblock_free(addr, size);
399 }
400
401 if (memblock.memory.regions != memblock_memory_init_regions) {
402 addr = memblock.memory.regions;
403 size = PAGE_ALIGN(sizeof(struct memblock_region) *
404 memblock.memory.max);
405 if (memblock_memory_in_slab)
406 kfree(addr);
407 else
408 memblock_free(addr, size);
409 }
410
411 memblock_memory = NULL;
412 }
413 #endif
414
415 /**
416 * memblock_double_array - double the size of the memblock regions array
417 * @type: memblock type of the regions array being doubled
418 * @new_area_start: starting address of memory range to avoid overlap with
419 * @new_area_size: size of memory range to avoid overlap with
420 *
421 * Double the size of the @type regions array. If memblock is being used to
422 * allocate memory for a new reserved regions array and there is a previously
423 * allocated memory range [@new_area_start, @new_area_start + @new_area_size]
424 * waiting to be reserved, ensure the memory used by the new array does
425 * not overlap.
426 *
427 * Return:
428 * 0 on success, -1 on failure.
429 */
memblock_double_array(struct memblock_type * type,phys_addr_t new_area_start,phys_addr_t new_area_size)430 static int __init_memblock memblock_double_array(struct memblock_type *type,
431 phys_addr_t new_area_start,
432 phys_addr_t new_area_size)
433 {
434 struct memblock_region *new_array, *old_array;
435 phys_addr_t old_alloc_size, new_alloc_size;
436 phys_addr_t old_size, new_size, addr, new_end;
437 int use_slab = slab_is_available();
438 int *in_slab;
439
440 /* We don't allow resizing until we know about the reserved regions
441 * of memory that aren't suitable for allocation
442 */
443 if (!memblock_can_resize)
444 panic("memblock: cannot resize %s array\n", type->name);
445
446 /* Calculate new doubled size */
447 old_size = type->max * sizeof(struct memblock_region);
448 new_size = old_size << 1;
449 /*
450 * We need to allocated new one align to PAGE_SIZE,
451 * so we can free them completely later.
452 */
453 old_alloc_size = PAGE_ALIGN(old_size);
454 new_alloc_size = PAGE_ALIGN(new_size);
455
456 /* Retrieve the slab flag */
457 if (type == &memblock.memory)
458 in_slab = &memblock_memory_in_slab;
459 else
460 in_slab = &memblock_reserved_in_slab;
461
462 /* Try to find some space for it */
463 if (use_slab) {
464 new_array = kmalloc(new_size, GFP_KERNEL);
465 addr = new_array ? __pa(new_array) : 0;
466 } else {
467 /* only exclude range when trying to double reserved.regions */
468 if (type != &memblock.reserved)
469 new_area_start = new_area_size = 0;
470
471 addr = memblock_find_in_range(new_area_start + new_area_size,
472 memblock.current_limit,
473 new_alloc_size, PAGE_SIZE);
474 if (!addr && new_area_size)
475 addr = memblock_find_in_range(0,
476 min(new_area_start, memblock.current_limit),
477 new_alloc_size, PAGE_SIZE);
478
479 if (addr) {
480 /* The memory may not have been accepted, yet. */
481 accept_memory(addr, new_alloc_size);
482
483 new_array = __va(addr);
484 } else {
485 new_array = NULL;
486 }
487 }
488 if (!addr) {
489 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
490 type->name, type->max, type->max * 2);
491 return -1;
492 }
493
494 new_end = addr + new_size - 1;
495 memblock_dbg("memblock: %s is doubled to %ld at [%pa-%pa]",
496 type->name, type->max * 2, &addr, &new_end);
497
498 /*
499 * Found space, we now need to move the array over before we add the
500 * reserved region since it may be our reserved array itself that is
501 * full.
502 */
503 memcpy(new_array, type->regions, old_size);
504 memset(new_array + type->max, 0, old_size);
505 old_array = type->regions;
506 type->regions = new_array;
507 type->max <<= 1;
508
509 /* Free old array. We needn't free it if the array is the static one */
510 if (*in_slab)
511 kfree(old_array);
512 else if (old_array != memblock_memory_init_regions &&
513 old_array != memblock_reserved_init_regions)
514 memblock_free(old_array, old_alloc_size);
515
516 /*
517 * Reserve the new array if that comes from the memblock. Otherwise, we
518 * needn't do it
519 */
520 if (!use_slab)
521 BUG_ON(memblock_reserve_kern(addr, new_alloc_size));
522
523 /* Update slab flag */
524 *in_slab = use_slab;
525
526 return 0;
527 }
528
529 /**
530 * memblock_merge_regions - merge neighboring compatible regions
531 * @type: memblock type to scan
532 * @start_rgn: start scanning from (@start_rgn - 1)
533 * @end_rgn: end scanning at (@end_rgn - 1)
534 * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn)
535 */
memblock_merge_regions(struct memblock_type * type,unsigned long start_rgn,unsigned long end_rgn)536 static void __init_memblock memblock_merge_regions(struct memblock_type *type,
537 unsigned long start_rgn,
538 unsigned long end_rgn)
539 {
540 int i = 0;
541 if (start_rgn)
542 i = start_rgn - 1;
543 end_rgn = min(end_rgn, type->cnt - 1);
544 while (i < end_rgn) {
545 struct memblock_region *this = &type->regions[i];
546 struct memblock_region *next = &type->regions[i + 1];
547
548 if (this->base + this->size != next->base ||
549 memblock_get_region_node(this) !=
550 memblock_get_region_node(next) ||
551 this->flags != next->flags) {
552 BUG_ON(this->base + this->size > next->base);
553 i++;
554 continue;
555 }
556
557 this->size += next->size;
558 /* move forward from next + 1, index of which is i + 2 */
559 memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next));
560 type->cnt--;
561 end_rgn--;
562 }
563 }
564
565 /**
566 * memblock_insert_region - insert new memblock region
567 * @type: memblock type to insert into
568 * @idx: index for the insertion point
569 * @base: base address of the new region
570 * @size: size of the new region
571 * @nid: node id of the new region
572 * @flags: flags of the new region
573 *
574 * Insert new memblock region [@base, @base + @size) into @type at @idx.
575 * @type must already have extra room to accommodate the new region.
576 */
memblock_insert_region(struct memblock_type * type,int idx,phys_addr_t base,phys_addr_t size,int nid,enum memblock_flags flags)577 static void __init_memblock memblock_insert_region(struct memblock_type *type,
578 int idx, phys_addr_t base,
579 phys_addr_t size,
580 int nid,
581 enum memblock_flags flags)
582 {
583 struct memblock_region *rgn = &type->regions[idx];
584
585 BUG_ON(type->cnt >= type->max);
586 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn));
587 rgn->base = base;
588 rgn->size = size;
589 rgn->flags = flags;
590 memblock_set_region_node(rgn, nid);
591 type->cnt++;
592 type->total_size += size;
593 }
594
595 /**
596 * memblock_add_range - add new memblock region
597 * @type: memblock type to add new region into
598 * @base: base address of the new region
599 * @size: size of the new region
600 * @nid: nid of the new region
601 * @flags: flags of the new region
602 *
603 * Add new memblock region [@base, @base + @size) into @type. The new region
604 * is allowed to overlap with existing ones - overlaps don't affect already
605 * existing regions. @type is guaranteed to be minimal (all neighbouring
606 * compatible regions are merged) after the addition.
607 *
608 * Return:
609 * 0 on success, -errno on failure.
610 */
memblock_add_range(struct memblock_type * type,phys_addr_t base,phys_addr_t size,int nid,enum memblock_flags flags)611 static int __init_memblock memblock_add_range(struct memblock_type *type,
612 phys_addr_t base, phys_addr_t size,
613 int nid, enum memblock_flags flags)
614 {
615 bool insert = false;
616 phys_addr_t obase = base;
617 phys_addr_t end = base + memblock_cap_size(base, &size);
618 int idx, nr_new, start_rgn = -1, end_rgn;
619 struct memblock_region *rgn;
620
621 if (!size)
622 return 0;
623
624 /* special case for empty array */
625 if (type->regions[0].size == 0) {
626 WARN_ON(type->cnt != 0 || type->total_size);
627 type->regions[0].base = base;
628 type->regions[0].size = size;
629 type->regions[0].flags = flags;
630 memblock_set_region_node(&type->regions[0], nid);
631 type->total_size = size;
632 type->cnt = 1;
633 return 0;
634 }
635
636 /*
637 * The worst case is when new range overlaps all existing regions,
638 * then we'll need type->cnt + 1 empty regions in @type. So if
639 * type->cnt * 2 + 1 is less than or equal to type->max, we know
640 * that there is enough empty regions in @type, and we can insert
641 * regions directly.
642 */
643 if (type->cnt * 2 + 1 <= type->max)
644 insert = true;
645
646 repeat:
647 /*
648 * The following is executed twice. Once with %false @insert and
649 * then with %true. The first counts the number of regions needed
650 * to accommodate the new area. The second actually inserts them.
651 */
652 base = obase;
653 nr_new = 0;
654
655 for_each_memblock_type(idx, type, rgn) {
656 phys_addr_t rbase = rgn->base;
657 phys_addr_t rend = rbase + rgn->size;
658
659 if (rbase >= end)
660 break;
661 if (rend <= base)
662 continue;
663 /*
664 * @rgn overlaps. If it separates the lower part of new
665 * area, insert that portion.
666 */
667 if (rbase > base) {
668 #ifdef CONFIG_NUMA
669 WARN_ON(nid != memblock_get_region_node(rgn));
670 #endif
671 WARN_ON(flags != MEMBLOCK_NONE && flags != rgn->flags);
672 nr_new++;
673 if (insert) {
674 if (start_rgn == -1)
675 start_rgn = idx;
676 end_rgn = idx + 1;
677 memblock_insert_region(type, idx++, base,
678 rbase - base, nid,
679 flags);
680 }
681 }
682 /* area below @rend is dealt with, forget about it */
683 base = min(rend, end);
684 }
685
686 /* insert the remaining portion */
687 if (base < end) {
688 nr_new++;
689 if (insert) {
690 if (start_rgn == -1)
691 start_rgn = idx;
692 end_rgn = idx + 1;
693 memblock_insert_region(type, idx, base, end - base,
694 nid, flags);
695 }
696 }
697
698 if (!nr_new)
699 return 0;
700
701 /*
702 * If this was the first round, resize array and repeat for actual
703 * insertions; otherwise, merge and return.
704 */
705 if (!insert) {
706 while (type->cnt + nr_new > type->max)
707 if (memblock_double_array(type, obase, size) < 0)
708 return -ENOMEM;
709 insert = true;
710 goto repeat;
711 } else {
712 memblock_merge_regions(type, start_rgn, end_rgn);
713 return 0;
714 }
715 }
716
717 /**
718 * memblock_add_node - add new memblock region within a NUMA node
719 * @base: base address of the new region
720 * @size: size of the new region
721 * @nid: nid of the new region
722 * @flags: flags of the new region
723 *
724 * Add new memblock region [@base, @base + @size) to the "memory"
725 * type. See memblock_add_range() description for mode details
726 *
727 * Return:
728 * 0 on success, -errno on failure.
729 */
memblock_add_node(phys_addr_t base,phys_addr_t size,int nid,enum memblock_flags flags)730 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
731 int nid, enum memblock_flags flags)
732 {
733 phys_addr_t end = base + size - 1;
734
735 memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__,
736 &base, &end, nid, flags, (void *)_RET_IP_);
737
738 return memblock_add_range(&memblock.memory, base, size, nid, flags);
739 }
740
741 /**
742 * memblock_add - add new memblock region
743 * @base: base address of the new region
744 * @size: size of the new region
745 *
746 * Add new memblock region [@base, @base + @size) to the "memory"
747 * type. See memblock_add_range() description for mode details
748 *
749 * Return:
750 * 0 on success, -errno on failure.
751 */
memblock_add(phys_addr_t base,phys_addr_t size)752 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
753 {
754 phys_addr_t end = base + size - 1;
755
756 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__,
757 &base, &end, (void *)_RET_IP_);
758
759 return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0);
760 }
761
762 /**
763 * memblock_validate_numa_coverage - check if amount of memory with
764 * no node ID assigned is less than a threshold
765 * @threshold_bytes: maximal memory size that can have unassigned node
766 * ID (in bytes).
767 *
768 * A buggy firmware may report memory that does not belong to any node.
769 * Check if amount of such memory is below @threshold_bytes.
770 *
771 * Return: true on success, false on failure.
772 */
memblock_validate_numa_coverage(unsigned long threshold_bytes)773 bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_bytes)
774 {
775 unsigned long nr_pages = 0;
776 unsigned long start_pfn, end_pfn, mem_size_mb;
777 int nid, i;
778
779 /* calculate lost page */
780 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
781 if (!numa_valid_node(nid))
782 nr_pages += end_pfn - start_pfn;
783 }
784
785 if ((nr_pages << PAGE_SHIFT) > threshold_bytes) {
786 mem_size_mb = memblock_phys_mem_size() / SZ_1M;
787 pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n",
788 (nr_pages << PAGE_SHIFT) / SZ_1M, mem_size_mb);
789 return false;
790 }
791
792 return true;
793 }
794
795
796 /**
797 * memblock_isolate_range - isolate given range into disjoint memblocks
798 * @type: memblock type to isolate range for
799 * @base: base of range to isolate
800 * @size: size of range to isolate
801 * @start_rgn: out parameter for the start of isolated region
802 * @end_rgn: out parameter for the end of isolated region
803 *
804 * Walk @type and ensure that regions don't cross the boundaries defined by
805 * [@base, @base + @size). Crossing regions are split at the boundaries,
806 * which may create at most two more regions. The index of the first
807 * region inside the range is returned in *@start_rgn and the index of the
808 * first region after the range is returned in *@end_rgn.
809 *
810 * Return:
811 * 0 on success, -errno on failure.
812 */
memblock_isolate_range(struct memblock_type * type,phys_addr_t base,phys_addr_t size,int * start_rgn,int * end_rgn)813 static int __init_memblock memblock_isolate_range(struct memblock_type *type,
814 phys_addr_t base, phys_addr_t size,
815 int *start_rgn, int *end_rgn)
816 {
817 phys_addr_t end = base + memblock_cap_size(base, &size);
818 int idx;
819 struct memblock_region *rgn;
820
821 *start_rgn = *end_rgn = 0;
822
823 if (!size)
824 return 0;
825
826 /* we'll create at most two more regions */
827 while (type->cnt + 2 > type->max)
828 if (memblock_double_array(type, base, size) < 0)
829 return -ENOMEM;
830
831 for_each_memblock_type(idx, type, rgn) {
832 phys_addr_t rbase = rgn->base;
833 phys_addr_t rend = rbase + rgn->size;
834
835 if (rbase >= end)
836 break;
837 if (rend <= base)
838 continue;
839
840 if (rbase < base) {
841 /*
842 * @rgn intersects from below. Split and continue
843 * to process the next region - the new top half.
844 */
845 rgn->base = base;
846 rgn->size -= base - rbase;
847 type->total_size -= base - rbase;
848 memblock_insert_region(type, idx, rbase, base - rbase,
849 memblock_get_region_node(rgn),
850 rgn->flags);
851 } else if (rend > end) {
852 /*
853 * @rgn intersects from above. Split and redo the
854 * current region - the new bottom half.
855 */
856 rgn->base = end;
857 rgn->size -= end - rbase;
858 type->total_size -= end - rbase;
859 memblock_insert_region(type, idx--, rbase, end - rbase,
860 memblock_get_region_node(rgn),
861 rgn->flags);
862 } else {
863 /* @rgn is fully contained, record it */
864 if (!*end_rgn)
865 *start_rgn = idx;
866 *end_rgn = idx + 1;
867 }
868 }
869
870 return 0;
871 }
872
memblock_remove_range(struct memblock_type * type,phys_addr_t base,phys_addr_t size)873 static int __init_memblock memblock_remove_range(struct memblock_type *type,
874 phys_addr_t base, phys_addr_t size)
875 {
876 int start_rgn, end_rgn;
877 int i, ret;
878
879 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
880 if (ret)
881 return ret;
882
883 for (i = end_rgn - 1; i >= start_rgn; i--)
884 memblock_remove_region(type, i);
885 return 0;
886 }
887
memblock_remove(phys_addr_t base,phys_addr_t size)888 int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
889 {
890 phys_addr_t end = base + size - 1;
891
892 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__,
893 &base, &end, (void *)_RET_IP_);
894
895 return memblock_remove_range(&memblock.memory, base, size);
896 }
897
__free_reserved_area(phys_addr_t start,phys_addr_t end,int poison)898 static unsigned long __free_reserved_area(phys_addr_t start, phys_addr_t end,
899 int poison)
900 {
901 unsigned long pages = 0, pfn;
902
903 if (deferred_pages_enabled()) {
904 WARN(1, "Cannot free reserved memory because of deferred initialization of the memory map");
905 return 0;
906 }
907
908 for_each_valid_pfn(pfn, PFN_UP(start), PFN_DOWN(end)) {
909 struct page *page = pfn_to_page(pfn);
910 void *direct_map_addr;
911
912 /*
913 * 'direct_map_addr' might be different from the kernel virtual
914 * address because some architectures use aliases.
915 * Going via physical address, pfn_to_page() and page_address()
916 * ensures that we get a _writeable_ alias for the memset().
917 */
918 direct_map_addr = page_address(page);
919 /*
920 * Perform a kasan-unchecked memset() since this memory
921 * has not been initialized.
922 */
923 direct_map_addr = kasan_reset_tag(direct_map_addr);
924 if ((unsigned int)poison <= 0xFF)
925 memset(direct_map_addr, poison, PAGE_SIZE);
926
927 free_reserved_page(page);
928 pages++;
929 }
930 return pages;
931 }
932
free_reserved_area(void * start,void * end,int poison,const char * s)933 unsigned long free_reserved_area(void *start, void *end, int poison, const char *s)
934 {
935 phys_addr_t start_pa, end_pa;
936 unsigned long pages;
937
938 /*
939 * end is the first address past the region and it may be beyond what
940 * __pa() or __pa_symbol() can handle.
941 * Use the address included in the range for the conversion and add back
942 * 1 afterwards.
943 */
944 if (__is_kernel((unsigned long)start)) {
945 start_pa = __pa_symbol(start);
946 end_pa = __pa_symbol(end - 1) + 1;
947 } else {
948 start_pa = __pa(start);
949 end_pa = __pa(end - 1) + 1;
950 }
951
952 if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
953 if (start_pa < end_pa)
954 memblock_remove_range(&memblock.reserved,
955 start_pa, end_pa - start_pa);
956 }
957
958 pages = __free_reserved_area(start_pa, end_pa, poison);
959 if (pages && s)
960 pr_info("Freeing %s memory: %ldK\n", s, K(pages));
961
962 return pages;
963 }
964
965 /**
966 * memblock_free - free boot memory allocation
967 * @ptr: starting address of the boot memory allocation
968 * @size: size of the boot memory block in bytes
969 *
970 * Free boot memory block previously allocated by memblock_alloc_xx() API.
971 * If called after the buddy allocator is available, the memory is released to
972 * the buddy allocator.
973 */
memblock_free(void * ptr,size_t size)974 void __init_memblock memblock_free(void *ptr, size_t size)
975 {
976 if (ptr)
977 memblock_phys_free(__pa(ptr), size);
978 }
979
980 /**
981 * memblock_phys_free - free boot memory block
982 * @base: phys starting address of the boot memory block
983 * @size: size of the boot memory block in bytes
984 *
985 * Free boot memory block previously allocated by memblock_phys_alloc_xx() API.
986 * If called after the buddy allocator is available, the memory is released to
987 * the buddy allocator.
988 */
memblock_phys_free(phys_addr_t base,phys_addr_t size)989 int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size)
990 {
991 phys_addr_t end = base + size - 1;
992 int ret = 0;
993
994 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__,
995 &base, &end, (void *)_RET_IP_);
996
997 kmemleak_free_part_phys(base, size);
998
999 if (!slab_is_available() || IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
1000 ret = memblock_remove_range(&memblock.reserved, base, size);
1001
1002 if (slab_is_available())
1003 __free_reserved_area(base, base + size, -1);
1004
1005 return ret;
1006 }
1007
__memblock_reserve(phys_addr_t base,phys_addr_t size,int nid,enum memblock_flags flags)1008 int __init_memblock __memblock_reserve(phys_addr_t base, phys_addr_t size,
1009 int nid, enum memblock_flags flags)
1010 {
1011 phys_addr_t end = base + size - 1;
1012
1013 memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__,
1014 &base, &end, nid, flags, (void *)_RET_IP_);
1015
1016 return memblock_add_range(&memblock.reserved, base, size, nid, flags);
1017 }
1018
1019 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
memblock_physmem_add(phys_addr_t base,phys_addr_t size)1020 int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size)
1021 {
1022 phys_addr_t end = base + size - 1;
1023
1024 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__,
1025 &base, &end, (void *)_RET_IP_);
1026
1027 return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0);
1028 }
1029 #endif
1030
1031 #ifdef CONFIG_MEMBLOCK_KHO_SCRATCH
memblock_set_kho_scratch_only(void)1032 __init void memblock_set_kho_scratch_only(void)
1033 {
1034 kho_scratch_only = true;
1035 }
1036
memblock_clear_kho_scratch_only(void)1037 __init void memblock_clear_kho_scratch_only(void)
1038 {
1039 kho_scratch_only = false;
1040 }
1041
memmap_init_kho_scratch_pages(void)1042 __init void memmap_init_kho_scratch_pages(void)
1043 {
1044 phys_addr_t start, end;
1045 unsigned long pfn;
1046 int nid;
1047 u64 i;
1048
1049 if (!IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT))
1050 return;
1051
1052 /*
1053 * Initialize struct pages for free scratch memory.
1054 * The struct pages for reserved scratch memory will be set up in
1055 * memmap_init_reserved_pages()
1056 */
1057 __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
1058 MEMBLOCK_KHO_SCRATCH, &start, &end, &nid) {
1059 for (pfn = PFN_UP(start); pfn < PFN_DOWN(end); pfn++)
1060 init_deferred_page(pfn, nid);
1061 }
1062 }
1063 #endif
1064
1065 /**
1066 * memblock_setclr_flag - set or clear flag for a memory region
1067 * @type: memblock type to set/clear flag for
1068 * @base: base address of the region
1069 * @size: size of the region
1070 * @set: set or clear the flag
1071 * @flag: the flag to update
1072 *
1073 * This function isolates region [@base, @base + @size), and sets/clears flag
1074 *
1075 * Return: 0 on success, -errno on failure.
1076 */
memblock_setclr_flag(struct memblock_type * type,phys_addr_t base,phys_addr_t size,int set,int flag)1077 static int __init_memblock memblock_setclr_flag(struct memblock_type *type,
1078 phys_addr_t base, phys_addr_t size, int set, int flag)
1079 {
1080 int i, ret, start_rgn, end_rgn;
1081
1082 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
1083 if (ret)
1084 return ret;
1085
1086 for (i = start_rgn; i < end_rgn; i++) {
1087 struct memblock_region *r = &type->regions[i];
1088
1089 if (set)
1090 r->flags |= flag;
1091 else
1092 r->flags &= ~flag;
1093 }
1094
1095 memblock_merge_regions(type, start_rgn, end_rgn);
1096 return 0;
1097 }
1098
1099 /**
1100 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG.
1101 * @base: the base phys addr of the region
1102 * @size: the size of the region
1103 *
1104 * Return: 0 on success, -errno on failure.
1105 */
memblock_mark_hotplug(phys_addr_t base,phys_addr_t size)1106 int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
1107 {
1108 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_HOTPLUG);
1109 }
1110
1111 /**
1112 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region.
1113 * @base: the base phys addr of the region
1114 * @size: the size of the region
1115 *
1116 * Return: 0 on success, -errno on failure.
1117 */
memblock_clear_hotplug(phys_addr_t base,phys_addr_t size)1118 int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
1119 {
1120 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_HOTPLUG);
1121 }
1122
1123 /**
1124 * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR.
1125 * @base: the base phys addr of the region
1126 * @size: the size of the region
1127 *
1128 * Return: 0 on success, -errno on failure.
1129 */
memblock_mark_mirror(phys_addr_t base,phys_addr_t size)1130 int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
1131 {
1132 if (!mirrored_kernelcore)
1133 return 0;
1134
1135 system_has_some_mirror = true;
1136
1137 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_MIRROR);
1138 }
1139
1140 /**
1141 * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP.
1142 * @base: the base phys addr of the region
1143 * @size: the size of the region
1144 *
1145 * The memory regions marked with %MEMBLOCK_NOMAP will not be added to the
1146 * direct mapping of the physical memory. These regions will still be
1147 * covered by the memory map. The struct page representing NOMAP memory
1148 * frames in the memory map will be PageReserved()
1149 *
1150 * Note: if the memory being marked %MEMBLOCK_NOMAP was allocated from
1151 * memblock, the caller must inform kmemleak to ignore that memory
1152 *
1153 * Return: 0 on success, -errno on failure.
1154 */
memblock_mark_nomap(phys_addr_t base,phys_addr_t size)1155 int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
1156 {
1157 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_NOMAP);
1158 }
1159
1160 /**
1161 * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region.
1162 * @base: the base phys addr of the region
1163 * @size: the size of the region
1164 *
1165 * Return: 0 on success, -errno on failure.
1166 */
memblock_clear_nomap(phys_addr_t base,phys_addr_t size)1167 int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
1168 {
1169 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_NOMAP);
1170 }
1171
1172 /**
1173 * memblock_reserved_mark_noinit - Mark a reserved memory region with flag
1174 * MEMBLOCK_RSRV_NOINIT
1175 *
1176 * @base: the base phys addr of the region
1177 * @size: the size of the region
1178 *
1179 * The struct pages for the reserved regions marked %MEMBLOCK_RSRV_NOINIT will
1180 * not be fully initialized to allow the caller optimize their initialization.
1181 *
1182 * When %CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, setting this flag
1183 * completely bypasses the initialization of struct pages for such region.
1184 *
1185 * When %CONFIG_DEFERRED_STRUCT_PAGE_INIT is disabled, struct pages in this
1186 * region will be initialized with default values but won't be marked as
1187 * reserved.
1188 *
1189 * Return: 0 on success, -errno on failure.
1190 */
memblock_reserved_mark_noinit(phys_addr_t base,phys_addr_t size)1191 int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size)
1192 {
1193 return memblock_setclr_flag(&memblock.reserved, base, size, 1,
1194 MEMBLOCK_RSRV_NOINIT);
1195 }
1196
1197 /**
1198 * memblock_reserved_mark_kern - Mark a reserved memory region with flag
1199 * MEMBLOCK_RSRV_KERN
1200 *
1201 * @base: the base phys addr of the region
1202 * @size: the size of the region
1203 *
1204 * Return: 0 on success, -errno on failure.
1205 */
memblock_reserved_mark_kern(phys_addr_t base,phys_addr_t size)1206 int __init_memblock memblock_reserved_mark_kern(phys_addr_t base, phys_addr_t size)
1207 {
1208 return memblock_setclr_flag(&memblock.reserved, base, size, 1,
1209 MEMBLOCK_RSRV_KERN);
1210 }
1211
1212 /**
1213 * memblock_mark_kho_scratch - Mark a memory region as MEMBLOCK_KHO_SCRATCH.
1214 * @base: the base phys addr of the region
1215 * @size: the size of the region
1216 *
1217 * Only memory regions marked with %MEMBLOCK_KHO_SCRATCH will be considered
1218 * for allocations during early boot with kexec handover.
1219 *
1220 * Return: 0 on success, -errno on failure.
1221 */
memblock_mark_kho_scratch(phys_addr_t base,phys_addr_t size)1222 __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
1223 {
1224 return memblock_setclr_flag(&memblock.memory, base, size, 1,
1225 MEMBLOCK_KHO_SCRATCH);
1226 }
1227
1228 /**
1229 * memblock_clear_kho_scratch - Clear MEMBLOCK_KHO_SCRATCH flag for a
1230 * specified region.
1231 * @base: the base phys addr of the region
1232 * @size: the size of the region
1233 *
1234 * Return: 0 on success, -errno on failure.
1235 */
memblock_clear_kho_scratch(phys_addr_t base,phys_addr_t size)1236 __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
1237 {
1238 return memblock_setclr_flag(&memblock.memory, base, size, 0,
1239 MEMBLOCK_KHO_SCRATCH);
1240 }
1241
should_skip_region(struct memblock_type * type,struct memblock_region * m,int nid,int flags)1242 static bool should_skip_region(struct memblock_type *type,
1243 struct memblock_region *m,
1244 int nid, int flags)
1245 {
1246 int m_nid = memblock_get_region_node(m);
1247
1248 /* we never skip regions when iterating memblock.reserved or physmem */
1249 if (type != memblock_memory)
1250 return false;
1251
1252 /* only memory regions are associated with nodes, check it */
1253 if (numa_valid_node(nid) && nid != m_nid)
1254 return true;
1255
1256 /* skip hotpluggable memory regions if needed */
1257 if (movable_node_is_enabled() && memblock_is_hotpluggable(m) &&
1258 !(flags & MEMBLOCK_HOTPLUG))
1259 return true;
1260
1261 /* if we want mirror memory skip non-mirror memory regions */
1262 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
1263 return true;
1264
1265 /* skip nomap memory unless we were asked for it explicitly */
1266 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
1267 return true;
1268
1269 /* skip driver-managed memory unless we were asked for it explicitly */
1270 if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m))
1271 return true;
1272
1273 /*
1274 * In early alloc during kexec handover, we can only consider
1275 * MEMBLOCK_KHO_SCRATCH regions for the allocations
1276 */
1277 if ((flags & MEMBLOCK_KHO_SCRATCH) && !memblock_is_kho_scratch(m))
1278 return true;
1279
1280 return false;
1281 }
1282
1283 /**
1284 * __next_mem_range - next function for for_each_free_mem_range() etc.
1285 * @idx: pointer to u64 loop variable
1286 * @nid: node selector, %NUMA_NO_NODE for all nodes
1287 * @flags: pick from blocks based on memory attributes
1288 * @type_a: pointer to memblock_type from where the range is taken
1289 * @type_b: pointer to memblock_type which excludes memory from being taken
1290 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
1291 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
1292 * @out_nid: ptr to int for nid of the range, can be %NULL
1293 *
1294 * Find the first area from *@idx which matches @nid, fill the out
1295 * parameters, and update *@idx for the next iteration. The lower 32bit of
1296 * *@idx contains index into type_a and the upper 32bit indexes the
1297 * areas before each region in type_b. For example, if type_b regions
1298 * look like the following,
1299 *
1300 * 0:[0-16), 1:[32-48), 2:[128-130)
1301 *
1302 * The upper 32bit indexes the following regions.
1303 *
1304 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX)
1305 *
1306 * As both region arrays are sorted, the function advances the two indices
1307 * in lockstep and returns each intersection.
1308 */
__next_mem_range(u64 * idx,int nid,enum memblock_flags flags,struct memblock_type * type_a,struct memblock_type * type_b,phys_addr_t * out_start,phys_addr_t * out_end,int * out_nid)1309 void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags,
1310 struct memblock_type *type_a,
1311 struct memblock_type *type_b, phys_addr_t *out_start,
1312 phys_addr_t *out_end, int *out_nid)
1313 {
1314 int idx_a = *idx & 0xffffffff;
1315 int idx_b = *idx >> 32;
1316
1317 for (; idx_a < type_a->cnt; idx_a++) {
1318 struct memblock_region *m = &type_a->regions[idx_a];
1319
1320 phys_addr_t m_start = m->base;
1321 phys_addr_t m_end = m->base + m->size;
1322 int m_nid = memblock_get_region_node(m);
1323
1324 if (should_skip_region(type_a, m, nid, flags))
1325 continue;
1326
1327 if (!type_b) {
1328 if (out_start)
1329 *out_start = m_start;
1330 if (out_end)
1331 *out_end = m_end;
1332 if (out_nid)
1333 *out_nid = m_nid;
1334 idx_a++;
1335 *idx = (u32)idx_a | (u64)idx_b << 32;
1336 return;
1337 }
1338
1339 /* scan areas before each reservation */
1340 for (; idx_b < type_b->cnt + 1; idx_b++) {
1341 struct memblock_region *r;
1342 phys_addr_t r_start;
1343 phys_addr_t r_end;
1344
1345 r = &type_b->regions[idx_b];
1346 r_start = idx_b ? r[-1].base + r[-1].size : 0;
1347 r_end = idx_b < type_b->cnt ?
1348 r->base : PHYS_ADDR_MAX;
1349
1350 /*
1351 * if idx_b advanced past idx_a,
1352 * break out to advance idx_a
1353 */
1354 if (r_start >= m_end)
1355 break;
1356 /* if the two regions intersect, we're done */
1357 if (m_start < r_end) {
1358 if (out_start)
1359 *out_start =
1360 max(m_start, r_start);
1361 if (out_end)
1362 *out_end = min(m_end, r_end);
1363 if (out_nid)
1364 *out_nid = m_nid;
1365 /*
1366 * The region which ends first is
1367 * advanced for the next iteration.
1368 */
1369 if (m_end <= r_end)
1370 idx_a++;
1371 else
1372 idx_b++;
1373 *idx = (u32)idx_a | (u64)idx_b << 32;
1374 return;
1375 }
1376 }
1377 }
1378
1379 /* signal end of iteration */
1380 *idx = ULLONG_MAX;
1381 }
1382
1383 /**
1384 * __next_mem_range_rev - generic next function for for_each_*_range_rev()
1385 *
1386 * @idx: pointer to u64 loop variable
1387 * @nid: node selector, %NUMA_NO_NODE for all nodes
1388 * @flags: pick from blocks based on memory attributes
1389 * @type_a: pointer to memblock_type from where the range is taken
1390 * @type_b: pointer to memblock_type which excludes memory from being taken
1391 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
1392 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
1393 * @out_nid: ptr to int for nid of the range, can be %NULL
1394 *
1395 * Finds the next range from type_a which is not marked as unsuitable
1396 * in type_b.
1397 *
1398 * Reverse of __next_mem_range().
1399 */
__next_mem_range_rev(u64 * idx,int nid,enum memblock_flags flags,struct memblock_type * type_a,struct memblock_type * type_b,phys_addr_t * out_start,phys_addr_t * out_end,int * out_nid)1400 void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
1401 enum memblock_flags flags,
1402 struct memblock_type *type_a,
1403 struct memblock_type *type_b,
1404 phys_addr_t *out_start,
1405 phys_addr_t *out_end, int *out_nid)
1406 {
1407 int idx_a = *idx & 0xffffffff;
1408 int idx_b = *idx >> 32;
1409
1410 if (*idx == (u64)ULLONG_MAX) {
1411 idx_a = type_a->cnt - 1;
1412 if (type_b != NULL)
1413 idx_b = type_b->cnt;
1414 else
1415 idx_b = 0;
1416 }
1417
1418 for (; idx_a >= 0; idx_a--) {
1419 struct memblock_region *m = &type_a->regions[idx_a];
1420
1421 phys_addr_t m_start = m->base;
1422 phys_addr_t m_end = m->base + m->size;
1423 int m_nid = memblock_get_region_node(m);
1424
1425 if (should_skip_region(type_a, m, nid, flags))
1426 continue;
1427
1428 if (!type_b) {
1429 if (out_start)
1430 *out_start = m_start;
1431 if (out_end)
1432 *out_end = m_end;
1433 if (out_nid)
1434 *out_nid = m_nid;
1435 idx_a--;
1436 *idx = (u32)idx_a | (u64)idx_b << 32;
1437 return;
1438 }
1439
1440 /* scan areas before each reservation */
1441 for (; idx_b >= 0; idx_b--) {
1442 struct memblock_region *r;
1443 phys_addr_t r_start;
1444 phys_addr_t r_end;
1445
1446 r = &type_b->regions[idx_b];
1447 r_start = idx_b ? r[-1].base + r[-1].size : 0;
1448 r_end = idx_b < type_b->cnt ?
1449 r->base : PHYS_ADDR_MAX;
1450 /*
1451 * if idx_b advanced past idx_a,
1452 * break out to advance idx_a
1453 */
1454
1455 if (r_end <= m_start)
1456 break;
1457 /* if the two regions intersect, we're done */
1458 if (m_end > r_start) {
1459 if (out_start)
1460 *out_start = max(m_start, r_start);
1461 if (out_end)
1462 *out_end = min(m_end, r_end);
1463 if (out_nid)
1464 *out_nid = m_nid;
1465 if (m_start >= r_start)
1466 idx_a--;
1467 else
1468 idx_b--;
1469 *idx = (u32)idx_a | (u64)idx_b << 32;
1470 return;
1471 }
1472 }
1473 }
1474 /* signal end of iteration */
1475 *idx = ULLONG_MAX;
1476 }
1477
1478 /*
1479 * Common iterator interface used to define for_each_mem_pfn_range().
1480 */
__next_mem_pfn_range(int * idx,int nid,unsigned long * out_start_pfn,unsigned long * out_end_pfn,int * out_nid)1481 void __init_memblock __next_mem_pfn_range(int *idx, int nid,
1482 unsigned long *out_start_pfn,
1483 unsigned long *out_end_pfn, int *out_nid)
1484 {
1485 struct memblock_type *type = &memblock.memory;
1486 struct memblock_region *r;
1487 int r_nid;
1488
1489 while (++*idx < type->cnt) {
1490 r = &type->regions[*idx];
1491 r_nid = memblock_get_region_node(r);
1492
1493 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
1494 continue;
1495 if (!numa_valid_node(nid) || nid == r_nid)
1496 break;
1497 }
1498 if (*idx >= type->cnt) {
1499 *idx = -1;
1500 return;
1501 }
1502
1503 if (out_start_pfn)
1504 *out_start_pfn = PFN_UP(r->base);
1505 if (out_end_pfn)
1506 *out_end_pfn = PFN_DOWN(r->base + r->size);
1507 if (out_nid)
1508 *out_nid = r_nid;
1509 }
1510
1511 /**
1512 * memblock_set_node - set node ID on memblock regions
1513 * @base: base of area to set node ID for
1514 * @size: size of area to set node ID for
1515 * @type: memblock type to set node ID for
1516 * @nid: node ID to set
1517 *
1518 * Set the nid of memblock @type regions in [@base, @base + @size) to @nid.
1519 * Regions which cross the area boundaries are split as necessary.
1520 *
1521 * Return:
1522 * 0 on success, -errno on failure.
1523 */
memblock_set_node(phys_addr_t base,phys_addr_t size,struct memblock_type * type,int nid)1524 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
1525 struct memblock_type *type, int nid)
1526 {
1527 #ifdef CONFIG_NUMA
1528 int start_rgn, end_rgn;
1529 int i, ret;
1530
1531 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
1532 if (ret)
1533 return ret;
1534
1535 for (i = start_rgn; i < end_rgn; i++)
1536 memblock_set_region_node(&type->regions[i], nid);
1537
1538 memblock_merge_regions(type, start_rgn, end_rgn);
1539 #endif
1540 return 0;
1541 }
1542
1543 /**
1544 * memblock_alloc_range_nid - allocate boot memory block
1545 * @size: size of memory block to be allocated in bytes
1546 * @align: alignment of the region and block's size
1547 * @start: the lower bound of the memory region to allocate (phys address)
1548 * @end: the upper bound of the memory region to allocate (phys address)
1549 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1550 * @exact_nid: control the allocation fall back to other nodes
1551 *
1552 * The allocation is performed from memory region limited by
1553 * memblock.current_limit if @end == %MEMBLOCK_ALLOC_ACCESSIBLE.
1554 *
1555 * If the specified node can not hold the requested memory and @exact_nid
1556 * is false, the allocation falls back to any node in the system.
1557 *
1558 * For systems with memory mirroring, the allocation is attempted first
1559 * from the regions with mirroring enabled and then retried from any
1560 * memory region.
1561 *
1562 * In addition, function using kmemleak_alloc_phys for allocated boot
1563 * memory block, it is never reported as leaks.
1564 *
1565 * Return:
1566 * Physical address of allocated memory block on success, %0 on failure.
1567 */
memblock_alloc_range_nid(phys_addr_t size,phys_addr_t align,phys_addr_t start,phys_addr_t end,int nid,bool exact_nid)1568 phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
1569 phys_addr_t align, phys_addr_t start,
1570 phys_addr_t end, int nid,
1571 bool exact_nid)
1572 {
1573 enum memblock_flags flags = choose_memblock_flags();
1574 phys_addr_t found;
1575
1576 /*
1577 * Detect any accidental use of these APIs after slab is ready, as at
1578 * this moment memblock may be deinitialized already and its
1579 * internal data may be destroyed (after execution of memblock_free_all)
1580 */
1581 if (WARN_ON_ONCE(slab_is_available())) {
1582 void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid);
1583
1584 return vaddr ? virt_to_phys(vaddr) : 0;
1585 }
1586
1587 if (!align) {
1588 /* Can't use WARNs this early in boot on powerpc */
1589 dump_stack();
1590 align = SMP_CACHE_BYTES;
1591 }
1592
1593 again:
1594 found = memblock_find_in_range_node(size, align, start, end, nid,
1595 flags);
1596 if (found && !__memblock_reserve(found, size, nid, MEMBLOCK_RSRV_KERN))
1597 goto done;
1598
1599 if (numa_valid_node(nid) && !exact_nid) {
1600 found = memblock_find_in_range_node(size, align, start,
1601 end, NUMA_NO_NODE,
1602 flags);
1603 if (found && !memblock_reserve_kern(found, size))
1604 goto done;
1605 }
1606
1607 if (flags & MEMBLOCK_MIRROR) {
1608 flags &= ~MEMBLOCK_MIRROR;
1609 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n",
1610 &size);
1611 goto again;
1612 }
1613
1614 return 0;
1615
1616 done:
1617 /*
1618 * Skip kmemleak for those places like kasan_init() and
1619 * early_pgtable_alloc() due to high volume.
1620 */
1621 if (end != MEMBLOCK_ALLOC_NOLEAKTRACE)
1622 /*
1623 * Memblock allocated blocks are never reported as
1624 * leaks. This is because many of these blocks are
1625 * only referred via the physical address which is
1626 * not looked up by kmemleak.
1627 */
1628 kmemleak_alloc_phys(found, size, 0);
1629
1630 /*
1631 * Some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP,
1632 * require memory to be accepted before it can be used by the
1633 * guest.
1634 *
1635 * Accept the memory of the allocated buffer.
1636 */
1637 accept_memory(found, size);
1638
1639 return found;
1640 }
1641
1642 /**
1643 * memblock_phys_alloc_range - allocate a memory block inside specified range
1644 * @size: size of memory block to be allocated in bytes
1645 * @align: alignment of the region and block's size
1646 * @start: the lower bound of the memory region to allocate (physical address)
1647 * @end: the upper bound of the memory region to allocate (physical address)
1648 *
1649 * Allocate @size bytes in the between @start and @end.
1650 *
1651 * Return: physical address of the allocated memory block on success,
1652 * %0 on failure.
1653 */
memblock_phys_alloc_range(phys_addr_t size,phys_addr_t align,phys_addr_t start,phys_addr_t end)1654 phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
1655 phys_addr_t align,
1656 phys_addr_t start,
1657 phys_addr_t end)
1658 {
1659 memblock_dbg("%s: %llu bytes align=0x%llx from=%pa max_addr=%pa %pS\n",
1660 __func__, (u64)size, (u64)align, &start, &end,
1661 (void *)_RET_IP_);
1662 return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
1663 false);
1664 }
1665
1666 /**
1667 * memblock_phys_alloc_try_nid - allocate a memory block from specified NUMA node
1668 * @size: size of memory block to be allocated in bytes
1669 * @align: alignment of the region and block's size
1670 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1671 *
1672 * Allocates memory block from the specified NUMA node. If the node
1673 * has no available memory, attempts to allocated from any node in the
1674 * system.
1675 *
1676 * Return: physical address of the allocated memory block on success,
1677 * %0 on failure.
1678 */
memblock_phys_alloc_try_nid(phys_addr_t size,phys_addr_t align,int nid)1679 phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
1680 {
1681 return memblock_alloc_range_nid(size, align, 0,
1682 MEMBLOCK_ALLOC_ACCESSIBLE, nid, false);
1683 }
1684
1685 /**
1686 * memblock_alloc_internal - allocate boot memory block
1687 * @size: size of memory block to be allocated in bytes
1688 * @align: alignment of the region and block's size
1689 * @min_addr: the lower bound of the memory region to allocate (phys address)
1690 * @max_addr: the upper bound of the memory region to allocate (phys address)
1691 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1692 * @exact_nid: control the allocation fall back to other nodes
1693 *
1694 * Allocates memory block using memblock_alloc_range_nid() and
1695 * converts the returned physical address to virtual.
1696 *
1697 * The @min_addr limit is dropped if it can not be satisfied and the allocation
1698 * will fall back to memory below @min_addr. Other constraints, such
1699 * as node and mirrored memory will be handled again in
1700 * memblock_alloc_range_nid().
1701 *
1702 * Return:
1703 * Virtual address of allocated memory block on success, NULL on failure.
1704 */
memblock_alloc_internal(phys_addr_t size,phys_addr_t align,phys_addr_t min_addr,phys_addr_t max_addr,int nid,bool exact_nid)1705 static void * __init memblock_alloc_internal(
1706 phys_addr_t size, phys_addr_t align,
1707 phys_addr_t min_addr, phys_addr_t max_addr,
1708 int nid, bool exact_nid)
1709 {
1710 phys_addr_t alloc;
1711
1712
1713 if (max_addr > memblock.current_limit)
1714 max_addr = memblock.current_limit;
1715
1716 alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid,
1717 exact_nid);
1718
1719 /* retry allocation without lower limit */
1720 if (!alloc && min_addr)
1721 alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid,
1722 exact_nid);
1723
1724 if (!alloc)
1725 return NULL;
1726
1727 return phys_to_virt(alloc);
1728 }
1729
1730 /**
1731 * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node
1732 * without zeroing memory
1733 * @size: size of memory block to be allocated in bytes
1734 * @align: alignment of the region and block's size
1735 * @min_addr: the lower bound of the memory region from where the allocation
1736 * is preferred (phys address)
1737 * @max_addr: the upper bound of the memory region from where the allocation
1738 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
1739 * allocate only from memory limited by memblock.current_limit value
1740 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1741 *
1742 * Public function, provides additional debug information (including caller
1743 * info), if enabled. Does not zero allocated memory.
1744 *
1745 * Return:
1746 * Virtual address of allocated memory block on success, NULL on failure.
1747 */
memblock_alloc_exact_nid_raw(phys_addr_t size,phys_addr_t align,phys_addr_t min_addr,phys_addr_t max_addr,int nid)1748 void * __init memblock_alloc_exact_nid_raw(
1749 phys_addr_t size, phys_addr_t align,
1750 phys_addr_t min_addr, phys_addr_t max_addr,
1751 int nid)
1752 {
1753 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
1754 __func__, (u64)size, (u64)align, nid, &min_addr,
1755 &max_addr, (void *)_RET_IP_);
1756
1757 return memblock_alloc_internal(size, align, min_addr, max_addr, nid,
1758 true);
1759 }
1760
1761 /**
1762 * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing
1763 * memory and without panicking
1764 * @size: size of memory block to be allocated in bytes
1765 * @align: alignment of the region and block's size
1766 * @min_addr: the lower bound of the memory region from where the allocation
1767 * is preferred (phys address)
1768 * @max_addr: the upper bound of the memory region from where the allocation
1769 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
1770 * allocate only from memory limited by memblock.current_limit value
1771 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1772 *
1773 * Public function, provides additional debug information (including caller
1774 * info), if enabled. Does not zero allocated memory, does not panic if request
1775 * cannot be satisfied.
1776 *
1777 * Return:
1778 * Virtual address of allocated memory block on success, NULL on failure.
1779 */
memblock_alloc_try_nid_raw(phys_addr_t size,phys_addr_t align,phys_addr_t min_addr,phys_addr_t max_addr,int nid)1780 void * __init memblock_alloc_try_nid_raw(
1781 phys_addr_t size, phys_addr_t align,
1782 phys_addr_t min_addr, phys_addr_t max_addr,
1783 int nid)
1784 {
1785 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
1786 __func__, (u64)size, (u64)align, nid, &min_addr,
1787 &max_addr, (void *)_RET_IP_);
1788
1789 return memblock_alloc_internal(size, align, min_addr, max_addr, nid,
1790 false);
1791 }
1792
1793 /**
1794 * memblock_alloc_try_nid - allocate boot memory block
1795 * @size: size of memory block to be allocated in bytes
1796 * @align: alignment of the region and block's size
1797 * @min_addr: the lower bound of the memory region from where the allocation
1798 * is preferred (phys address)
1799 * @max_addr: the upper bound of the memory region from where the allocation
1800 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
1801 * allocate only from memory limited by memblock.current_limit value
1802 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1803 *
1804 * Public function, provides additional debug information (including caller
1805 * info), if enabled. This function zeroes the allocated memory.
1806 *
1807 * Return:
1808 * Virtual address of allocated memory block on success, NULL on failure.
1809 */
memblock_alloc_try_nid(phys_addr_t size,phys_addr_t align,phys_addr_t min_addr,phys_addr_t max_addr,int nid)1810 void * __init memblock_alloc_try_nid(
1811 phys_addr_t size, phys_addr_t align,
1812 phys_addr_t min_addr, phys_addr_t max_addr,
1813 int nid)
1814 {
1815 void *ptr;
1816
1817 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
1818 __func__, (u64)size, (u64)align, nid, &min_addr,
1819 &max_addr, (void *)_RET_IP_);
1820 ptr = memblock_alloc_internal(size, align,
1821 min_addr, max_addr, nid, false);
1822 if (ptr)
1823 memset(ptr, 0, size);
1824
1825 return ptr;
1826 }
1827
1828 /**
1829 * __memblock_alloc_or_panic - Try to allocate memory and panic on failure
1830 * @size: size of memory block to be allocated in bytes
1831 * @align: alignment of the region and block's size
1832 * @func: caller func name
1833 *
1834 * This function attempts to allocate memory using memblock_alloc,
1835 * and in case of failure, it calls panic with the formatted message.
1836 * This function should not be used directly, please use the macro memblock_alloc_or_panic.
1837 */
__memblock_alloc_or_panic(phys_addr_t size,phys_addr_t align,const char * func)1838 void *__init __memblock_alloc_or_panic(phys_addr_t size, phys_addr_t align,
1839 const char *func)
1840 {
1841 void *addr = memblock_alloc(size, align);
1842
1843 if (unlikely(!addr))
1844 panic("%s: Failed to allocate %pap bytes\n", func, &size);
1845 return addr;
1846 }
1847
1848 /*
1849 * Remaining API functions
1850 */
1851
memblock_phys_mem_size(void)1852 phys_addr_t __init_memblock memblock_phys_mem_size(void)
1853 {
1854 return memblock.memory.total_size;
1855 }
1856
memblock_reserved_size(void)1857 phys_addr_t __init_memblock memblock_reserved_size(void)
1858 {
1859 return memblock.reserved.total_size;
1860 }
1861
memblock_reserved_kern_size(phys_addr_t limit,int nid)1862 phys_addr_t __init_memblock memblock_reserved_kern_size(phys_addr_t limit, int nid)
1863 {
1864 struct memblock_region *r;
1865 phys_addr_t total = 0;
1866
1867 for_each_reserved_mem_region(r) {
1868 phys_addr_t size = r->size;
1869
1870 if (r->base > limit)
1871 break;
1872
1873 if (r->base + r->size > limit)
1874 size = limit - r->base;
1875
1876 if (nid == memblock_get_region_node(r) || !numa_valid_node(nid))
1877 if (r->flags & MEMBLOCK_RSRV_KERN)
1878 total += size;
1879 }
1880
1881 return total;
1882 }
1883
1884 /**
1885 * memblock_estimated_nr_free_pages - return estimated number of free pages
1886 * from memblock point of view
1887 *
1888 * During bootup, subsystems might need a rough estimate of the number of free
1889 * pages in the whole system, before precise numbers are available from the
1890 * buddy. Especially with CONFIG_DEFERRED_STRUCT_PAGE_INIT, the numbers
1891 * obtained from the buddy might be very imprecise during bootup.
1892 *
1893 * Return:
1894 * An estimated number of free pages from memblock point of view.
1895 */
memblock_estimated_nr_free_pages(void)1896 unsigned long __init memblock_estimated_nr_free_pages(void)
1897 {
1898 return PHYS_PFN(memblock_phys_mem_size() -
1899 memblock_reserved_kern_size(MEMBLOCK_ALLOC_ANYWHERE, NUMA_NO_NODE));
1900 }
1901
1902 /* lowest address */
memblock_start_of_DRAM(void)1903 phys_addr_t __init_memblock memblock_start_of_DRAM(void)
1904 {
1905 return memblock.memory.regions[0].base;
1906 }
1907
memblock_end_of_DRAM(void)1908 phys_addr_t __init_memblock memblock_end_of_DRAM(void)
1909 {
1910 int idx = memblock.memory.cnt - 1;
1911
1912 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
1913 }
1914
__find_max_addr(phys_addr_t limit)1915 static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
1916 {
1917 phys_addr_t max_addr = PHYS_ADDR_MAX;
1918 struct memblock_region *r;
1919
1920 /*
1921 * translate the memory @limit size into the max address within one of
1922 * the memory memblock regions, if the @limit exceeds the total size
1923 * of those regions, max_addr will keep original value PHYS_ADDR_MAX
1924 */
1925 for_each_mem_region(r) {
1926 if (limit <= r->size) {
1927 max_addr = r->base + limit;
1928 break;
1929 }
1930 limit -= r->size;
1931 }
1932
1933 return max_addr;
1934 }
1935
memblock_enforce_memory_limit(phys_addr_t limit)1936 void __init memblock_enforce_memory_limit(phys_addr_t limit)
1937 {
1938 phys_addr_t max_addr;
1939
1940 if (!limit)
1941 return;
1942
1943 max_addr = __find_max_addr(limit);
1944
1945 /* @limit exceeds the total size of the memory, do nothing */
1946 if (max_addr == PHYS_ADDR_MAX)
1947 return;
1948
1949 /* truncate both memory and reserved regions */
1950 memblock_remove_range(&memblock.memory, max_addr,
1951 PHYS_ADDR_MAX);
1952 memblock_remove_range(&memblock.reserved, max_addr,
1953 PHYS_ADDR_MAX);
1954 }
1955
memblock_cap_memory_range(phys_addr_t base,phys_addr_t size)1956 void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
1957 {
1958 int start_rgn, end_rgn;
1959 int i, ret;
1960
1961 if (!size)
1962 return;
1963
1964 if (!memblock_memory->total_size) {
1965 pr_warn("%s: No memory registered yet\n", __func__);
1966 return;
1967 }
1968
1969 ret = memblock_isolate_range(&memblock.memory, base, size,
1970 &start_rgn, &end_rgn);
1971 if (ret)
1972 return;
1973
1974 /* remove all the MAP regions */
1975 for (i = memblock.memory.cnt - 1; i >= end_rgn; i--)
1976 if (!memblock_is_nomap(&memblock.memory.regions[i]))
1977 memblock_remove_region(&memblock.memory, i);
1978
1979 for (i = start_rgn - 1; i >= 0; i--)
1980 if (!memblock_is_nomap(&memblock.memory.regions[i]))
1981 memblock_remove_region(&memblock.memory, i);
1982
1983 /* truncate the reserved regions */
1984 memblock_remove_range(&memblock.reserved, 0, base);
1985 memblock_remove_range(&memblock.reserved,
1986 base + size, PHYS_ADDR_MAX);
1987 }
1988
memblock_mem_limit_remove_map(phys_addr_t limit)1989 void __init memblock_mem_limit_remove_map(phys_addr_t limit)
1990 {
1991 phys_addr_t max_addr;
1992
1993 if (!limit)
1994 return;
1995
1996 max_addr = __find_max_addr(limit);
1997
1998 /* @limit exceeds the total size of the memory, do nothing */
1999 if (max_addr == PHYS_ADDR_MAX)
2000 return;
2001
2002 memblock_cap_memory_range(0, max_addr);
2003 }
2004
memblock_search(struct memblock_type * type,phys_addr_t addr)2005 static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
2006 {
2007 unsigned int left = 0, right = type->cnt;
2008
2009 do {
2010 unsigned int mid = (right + left) / 2;
2011
2012 if (addr < type->regions[mid].base)
2013 right = mid;
2014 else if (addr >= (type->regions[mid].base +
2015 type->regions[mid].size))
2016 left = mid + 1;
2017 else
2018 return mid;
2019 } while (left < right);
2020 return -1;
2021 }
2022
memblock_is_reserved(phys_addr_t addr)2023 bool __init_memblock memblock_is_reserved(phys_addr_t addr)
2024 {
2025 return memblock_search(&memblock.reserved, addr) != -1;
2026 }
2027
memblock_is_memory(phys_addr_t addr)2028 bool __init_memblock memblock_is_memory(phys_addr_t addr)
2029 {
2030 return memblock_search(&memblock.memory, addr) != -1;
2031 }
2032
memblock_is_map_memory(phys_addr_t addr)2033 bool __init_memblock memblock_is_map_memory(phys_addr_t addr)
2034 {
2035 int i = memblock_search(&memblock.memory, addr);
2036
2037 if (i == -1)
2038 return false;
2039 return !memblock_is_nomap(&memblock.memory.regions[i]);
2040 }
2041
memblock_search_pfn_nid(unsigned long pfn,unsigned long * start_pfn,unsigned long * end_pfn)2042 int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
2043 unsigned long *start_pfn, unsigned long *end_pfn)
2044 {
2045 struct memblock_type *type = &memblock.memory;
2046 int mid = memblock_search(type, PFN_PHYS(pfn));
2047
2048 if (mid == -1)
2049 return NUMA_NO_NODE;
2050
2051 *start_pfn = PFN_DOWN(type->regions[mid].base);
2052 *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size);
2053
2054 return memblock_get_region_node(&type->regions[mid]);
2055 }
2056
2057 /**
2058 * memblock_is_region_memory - check if a region is a subset of memory
2059 * @base: base of region to check
2060 * @size: size of region to check
2061 *
2062 * Check if the region [@base, @base + @size) is a subset of a memory block.
2063 *
2064 * Return:
2065 * 0 if false, non-zero if true
2066 */
memblock_is_region_memory(phys_addr_t base,phys_addr_t size)2067 bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
2068 {
2069 int idx = memblock_search(&memblock.memory, base);
2070 phys_addr_t end = base + memblock_cap_size(base, &size);
2071
2072 if (idx == -1)
2073 return false;
2074 return (memblock.memory.regions[idx].base +
2075 memblock.memory.regions[idx].size) >= end;
2076 }
2077
2078 /**
2079 * memblock_is_region_reserved - check if a region intersects reserved memory
2080 * @base: base of region to check
2081 * @size: size of region to check
2082 *
2083 * Check if the region [@base, @base + @size) intersects a reserved
2084 * memory block.
2085 *
2086 * Return:
2087 * True if they intersect, false if not.
2088 */
memblock_is_region_reserved(phys_addr_t base,phys_addr_t size)2089 bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
2090 {
2091 return memblock_overlaps_region(&memblock.reserved, base, size);
2092 }
2093
memblock_trim_memory(phys_addr_t align)2094 void __init_memblock memblock_trim_memory(phys_addr_t align)
2095 {
2096 phys_addr_t start, end, orig_start, orig_end;
2097 struct memblock_region *r;
2098
2099 for_each_mem_region(r) {
2100 orig_start = r->base;
2101 orig_end = r->base + r->size;
2102 start = round_up(orig_start, align);
2103 end = round_down(orig_end, align);
2104
2105 if (start == orig_start && end == orig_end)
2106 continue;
2107
2108 if (start < end) {
2109 r->base = start;
2110 r->size = end - start;
2111 } else {
2112 memblock_remove_region(&memblock.memory,
2113 r - memblock.memory.regions);
2114 r--;
2115 }
2116 }
2117 }
2118
memblock_set_current_limit(phys_addr_t limit)2119 void __init_memblock memblock_set_current_limit(phys_addr_t limit)
2120 {
2121 memblock.current_limit = limit;
2122 }
2123
memblock_get_current_limit(void)2124 phys_addr_t __init_memblock memblock_get_current_limit(void)
2125 {
2126 return memblock.current_limit;
2127 }
2128
memblock_dump(struct memblock_type * type)2129 static void __init_memblock memblock_dump(struct memblock_type *type)
2130 {
2131 phys_addr_t base, end, size;
2132 enum memblock_flags flags;
2133 int idx;
2134 struct memblock_region *rgn;
2135
2136 pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt);
2137
2138 for_each_memblock_type(idx, type, rgn) {
2139 char nid_buf[32] = "";
2140
2141 base = rgn->base;
2142 size = rgn->size;
2143 end = base + size - 1;
2144 flags = rgn->flags;
2145 #ifdef CONFIG_NUMA
2146 if (numa_valid_node(memblock_get_region_node(rgn)))
2147 snprintf(nid_buf, sizeof(nid_buf), " on node %d",
2148 memblock_get_region_node(rgn));
2149 #endif
2150 pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n",
2151 type->name, idx, &base, &end, &size, nid_buf, flags);
2152 }
2153 }
2154
__memblock_dump_all(void)2155 static void __init_memblock __memblock_dump_all(void)
2156 {
2157 pr_info("MEMBLOCK configuration:\n");
2158 pr_info(" memory size = %pa reserved size = %pa\n",
2159 &memblock.memory.total_size,
2160 &memblock.reserved.total_size);
2161
2162 memblock_dump(&memblock.memory);
2163 memblock_dump(&memblock.reserved);
2164 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
2165 memblock_dump(&physmem);
2166 #endif
2167 }
2168
memblock_dump_all(void)2169 void __init_memblock memblock_dump_all(void)
2170 {
2171 if (memblock_debug)
2172 __memblock_dump_all();
2173 }
2174
memblock_allow_resize(void)2175 void __init memblock_allow_resize(void)
2176 {
2177 memblock_can_resize = 1;
2178 }
2179
early_memblock(char * p)2180 static int __init early_memblock(char *p)
2181 {
2182 if (p && strstr(p, "debug"))
2183 memblock_debug = 1;
2184 return 0;
2185 }
2186 early_param("memblock", early_memblock);
2187
free_memmap(unsigned long start_pfn,unsigned long end_pfn)2188 static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn)
2189 {
2190 struct page *start_pg, *end_pg;
2191 phys_addr_t pg, pgend;
2192
2193 /*
2194 * Convert start_pfn/end_pfn to a struct page pointer.
2195 */
2196 start_pg = pfn_to_page(start_pfn - 1) + 1;
2197 end_pg = pfn_to_page(end_pfn - 1) + 1;
2198
2199 /*
2200 * Convert to physical addresses, and round start upwards and end
2201 * downwards.
2202 */
2203 pg = PAGE_ALIGN(__pa(start_pg));
2204 pgend = PAGE_ALIGN_DOWN(__pa(end_pg));
2205
2206 /*
2207 * If there are free pages between these, free the section of the
2208 * memmap array.
2209 */
2210 if (pg < pgend)
2211 memblock_phys_free(pg, pgend - pg);
2212 }
2213
2214 /*
2215 * The mem_map array can get very big. Free the unused area of the memory map.
2216 */
free_unused_memmap(void)2217 static void __init free_unused_memmap(void)
2218 {
2219 unsigned long start, end, prev_end = 0;
2220 int i;
2221
2222 if (!IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) ||
2223 IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
2224 return;
2225
2226 /*
2227 * This relies on each bank being in address order.
2228 * The banks are sorted previously in bootmem_init().
2229 */
2230 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
2231 #ifdef CONFIG_SPARSEMEM
2232 /*
2233 * Take care not to free memmap entries that don't exist
2234 * due to SPARSEMEM sections which aren't present.
2235 */
2236 start = min(start, ALIGN(prev_end, PAGES_PER_SECTION));
2237 #endif
2238 /*
2239 * Align down here since many operations in VM subsystem
2240 * presume that there are no holes in the memory map inside
2241 * a pageblock
2242 */
2243 start = pageblock_start_pfn(start);
2244
2245 /*
2246 * If we had a previous bank, and there is a space
2247 * between the current bank and the previous, free it.
2248 */
2249 if (prev_end && prev_end < start)
2250 free_memmap(prev_end, start);
2251
2252 /*
2253 * Align up here since many operations in VM subsystem
2254 * presume that there are no holes in the memory map inside
2255 * a pageblock
2256 */
2257 prev_end = pageblock_align(end);
2258 }
2259
2260 #ifdef CONFIG_SPARSEMEM
2261 if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) {
2262 prev_end = pageblock_align(end);
2263 free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION));
2264 }
2265 #endif
2266 }
2267
__free_pages_memory(unsigned long start,unsigned long end)2268 static void __init __free_pages_memory(unsigned long start, unsigned long end)
2269 {
2270 int order;
2271
2272 while (start < end) {
2273 /*
2274 * Free the pages in the largest chunks alignment allows.
2275 *
2276 * __ffs() behaviour is undefined for 0. start == 0 is
2277 * MAX_PAGE_ORDER-aligned, set order to MAX_PAGE_ORDER for
2278 * the case.
2279 */
2280 if (start)
2281 order = min_t(int, MAX_PAGE_ORDER, __ffs(start));
2282 else
2283 order = MAX_PAGE_ORDER;
2284
2285 while (start + (1UL << order) > end)
2286 order--;
2287
2288 memblock_free_pages(start, order);
2289
2290 start += (1UL << order);
2291 }
2292 }
2293
__free_memory_core(phys_addr_t start,phys_addr_t end)2294 static unsigned long __init __free_memory_core(phys_addr_t start,
2295 phys_addr_t end)
2296 {
2297 unsigned long start_pfn = PFN_UP(start);
2298 unsigned long end_pfn = PFN_DOWN(end);
2299
2300 if (!IS_ENABLED(CONFIG_HIGHMEM) && end_pfn > max_low_pfn)
2301 end_pfn = max_low_pfn;
2302
2303 if (start_pfn >= end_pfn)
2304 return 0;
2305
2306 __free_pages_memory(start_pfn, end_pfn);
2307
2308 return end_pfn - start_pfn;
2309 }
2310
2311 /*
2312 * Initialised pages do not have PageReserved set. This function is called
2313 * for each reserved range and marks the pages PageReserved.
2314 * When deferred initialization of struct pages is enabled it also ensures
2315 * that struct pages are properly initialised.
2316 */
memmap_init_reserved_range(phys_addr_t start,phys_addr_t end,int nid)2317 static void __init memmap_init_reserved_range(phys_addr_t start,
2318 phys_addr_t end, int nid)
2319 {
2320 unsigned long pfn;
2321
2322 for_each_valid_pfn(pfn, PFN_DOWN(start), PFN_UP(end)) {
2323 struct page *page = pfn_to_page(pfn);
2324
2325 init_deferred_page(pfn, nid);
2326
2327 /*
2328 * no need for atomic set_bit because the struct
2329 * page is not visible yet so nobody should
2330 * access it yet.
2331 */
2332 __SetPageReserved(page);
2333 }
2334 }
2335
memmap_init_reserved_pages(void)2336 static void __init memmap_init_reserved_pages(void)
2337 {
2338 struct memblock_region *region;
2339 phys_addr_t start, end;
2340 int nid;
2341 unsigned long max_reserved;
2342
2343 /*
2344 * set nid on all reserved pages and also treat struct
2345 * pages for the NOMAP regions as PageReserved
2346 */
2347 repeat:
2348 max_reserved = memblock.reserved.max;
2349 for_each_mem_region(region) {
2350 nid = memblock_get_region_node(region);
2351 start = region->base;
2352 end = start + region->size;
2353
2354 if (memblock_is_nomap(region))
2355 memmap_init_reserved_range(start, end, nid);
2356
2357 memblock_set_node(start, region->size, &memblock.reserved, nid);
2358 }
2359 /*
2360 * 'max' is changed means memblock.reserved has been doubled its
2361 * array, which may result a new reserved region before current
2362 * 'start'. Now we should repeat the procedure to set its node id.
2363 */
2364 if (max_reserved != memblock.reserved.max)
2365 goto repeat;
2366
2367 /*
2368 * initialize struct pages for reserved regions that don't have
2369 * the MEMBLOCK_RSRV_NOINIT flag set
2370 */
2371 for_each_reserved_mem_region(region) {
2372 if (!memblock_is_reserved_noinit(region)) {
2373 nid = memblock_get_region_node(region);
2374 start = region->base;
2375 end = start + region->size;
2376
2377 if (!numa_valid_node(nid))
2378 nid = early_pfn_to_nid(PFN_DOWN(start));
2379
2380 memmap_init_reserved_range(start, end, nid);
2381 }
2382 }
2383 }
2384
free_low_memory_core_early(void)2385 static unsigned long __init free_low_memory_core_early(void)
2386 {
2387 unsigned long count = 0;
2388 phys_addr_t start, end;
2389 u64 i;
2390
2391 memblock_clear_hotplug(0, -1);
2392
2393 memmap_init_reserved_pages();
2394
2395 /*
2396 * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id
2397 * because in some case like Node0 doesn't have RAM installed
2398 * low ram will be on Node1
2399 */
2400 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end,
2401 NULL)
2402 count += __free_memory_core(start, end);
2403
2404 return count;
2405 }
2406
2407 static int reset_managed_pages_done __initdata;
2408
reset_node_managed_pages(pg_data_t * pgdat)2409 static void __init reset_node_managed_pages(pg_data_t *pgdat)
2410 {
2411 struct zone *z;
2412
2413 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
2414 atomic_long_set(&z->managed_pages, 0);
2415 }
2416
reset_all_zones_managed_pages(void)2417 void __init reset_all_zones_managed_pages(void)
2418 {
2419 struct pglist_data *pgdat;
2420
2421 if (reset_managed_pages_done)
2422 return;
2423
2424 for_each_online_pgdat(pgdat)
2425 reset_node_managed_pages(pgdat);
2426
2427 reset_managed_pages_done = 1;
2428 }
2429
2430 /**
2431 * memblock_free_all - release free pages to the buddy allocator
2432 */
memblock_free_all(void)2433 void __init memblock_free_all(void)
2434 {
2435 unsigned long pages;
2436
2437 free_unused_memmap();
2438 reset_all_zones_managed_pages();
2439
2440 memblock_clear_kho_scratch_only();
2441 pages = free_low_memory_core_early();
2442 totalram_pages_add(pages);
2443 }
2444
2445 /* Keep a table to reserve named memory */
2446 #define RESERVE_MEM_MAX_ENTRIES 8
2447 #define RESERVE_MEM_NAME_SIZE 16
2448 struct reserve_mem_table {
2449 char name[RESERVE_MEM_NAME_SIZE];
2450 phys_addr_t start;
2451 phys_addr_t size;
2452 };
2453 static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES];
2454 static int reserved_mem_count;
2455 static DEFINE_MUTEX(reserve_mem_lock);
2456
2457 /* Add wildcard region with a lookup name */
reserved_mem_add(phys_addr_t start,phys_addr_t size,const char * name)2458 static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size,
2459 const char *name)
2460 {
2461 struct reserve_mem_table *map;
2462
2463 map = &reserved_mem_table[reserved_mem_count++];
2464 map->start = start;
2465 map->size = size;
2466 strscpy(map->name, name);
2467 }
2468
reserve_mem_find_by_name_nolock(const char * name)2469 static struct reserve_mem_table *reserve_mem_find_by_name_nolock(const char *name)
2470 {
2471 struct reserve_mem_table *map;
2472 int i;
2473
2474 for (i = 0; i < reserved_mem_count; i++) {
2475 map = &reserved_mem_table[i];
2476 if (!map->size)
2477 continue;
2478 if (strcmp(name, map->name) == 0)
2479 return map;
2480 }
2481 return NULL;
2482 }
2483
2484 /**
2485 * reserve_mem_find_by_name - Find reserved memory region with a given name
2486 * @name: The name that is attached to a reserved memory region
2487 * @start: If found, holds the start address
2488 * @size: If found, holds the size of the address.
2489 *
2490 * @start and @size are only updated if @name is found.
2491 *
2492 * Returns: 1 if found or 0 if not found.
2493 */
reserve_mem_find_by_name(const char * name,phys_addr_t * start,phys_addr_t * size)2494 int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size)
2495 {
2496 struct reserve_mem_table *map;
2497
2498 guard(mutex)(&reserve_mem_lock);
2499 map = reserve_mem_find_by_name_nolock(name);
2500 if (!map)
2501 return 0;
2502
2503 *start = map->start;
2504 *size = map->size;
2505 return 1;
2506 }
2507 EXPORT_SYMBOL_GPL(reserve_mem_find_by_name);
2508
2509 /**
2510 * reserve_mem_release_by_name - Release reserved memory region with a given name
2511 * @name: The name that is attached to a reserved memory region
2512 *
2513 * Forcibly release the pages in the reserved memory region so that those memory
2514 * can be used as free memory. After released the reserved region size becomes 0.
2515 *
2516 * Returns: 1 if released or 0 if not found.
2517 */
reserve_mem_release_by_name(const char * name)2518 int reserve_mem_release_by_name(const char *name)
2519 {
2520 char buf[RESERVE_MEM_NAME_SIZE + 12];
2521 struct reserve_mem_table *map;
2522 void *start, *end;
2523
2524 guard(mutex)(&reserve_mem_lock);
2525 map = reserve_mem_find_by_name_nolock(name);
2526 if (!map)
2527 return 0;
2528
2529 start = phys_to_virt(map->start);
2530 end = start + map->size;
2531 snprintf(buf, sizeof(buf), "reserve_mem:%s", name);
2532 free_reserved_area(start, end, 0, buf);
2533 map->size = 0;
2534
2535 return 1;
2536 }
2537
2538 #ifdef CONFIG_KEXEC_HANDOVER
2539
reserved_mem_preserve(void)2540 static int __init reserved_mem_preserve(void)
2541 {
2542 unsigned int nr_preserved = 0;
2543 int err;
2544
2545 for (unsigned int i = 0; i < reserved_mem_count; i++, nr_preserved++) {
2546 struct reserve_mem_table *map = &reserved_mem_table[i];
2547 struct page *page = phys_to_page(map->start);
2548 unsigned int nr_pages = map->size >> PAGE_SHIFT;
2549
2550 err = kho_preserve_pages(page, nr_pages);
2551 if (err)
2552 goto err_unpreserve;
2553 }
2554
2555 return 0;
2556
2557 err_unpreserve:
2558 for (unsigned int i = 0; i < nr_preserved; i++) {
2559 struct reserve_mem_table *map = &reserved_mem_table[i];
2560 struct page *page = phys_to_page(map->start);
2561 unsigned int nr_pages = map->size >> PAGE_SHIFT;
2562
2563 kho_unpreserve_pages(page, nr_pages);
2564 }
2565
2566 return err;
2567 }
2568
prepare_kho_fdt(void)2569 static int __init prepare_kho_fdt(void)
2570 {
2571 struct page *fdt_page;
2572 void *fdt;
2573 int err;
2574
2575 fdt_page = alloc_page(GFP_KERNEL);
2576 if (!fdt_page) {
2577 err = -ENOMEM;
2578 goto err_report;
2579 }
2580
2581 fdt = page_to_virt(fdt_page);
2582 err = kho_preserve_pages(fdt_page, 1);
2583 if (err)
2584 goto err_free_fdt;
2585
2586 err |= fdt_create(fdt, PAGE_SIZE);
2587 err |= fdt_finish_reservemap(fdt);
2588 err |= fdt_begin_node(fdt, "");
2589 err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
2590
2591 for (unsigned int i = 0; !err && i < reserved_mem_count; i++) {
2592 struct reserve_mem_table *map = &reserved_mem_table[i];
2593
2594 err |= fdt_begin_node(fdt, map->name);
2595 err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
2596 err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
2597 err |= fdt_property(fdt, "size", &map->size, sizeof(map->size));
2598 err |= fdt_end_node(fdt);
2599 }
2600 err |= fdt_end_node(fdt);
2601 err |= fdt_finish(fdt);
2602
2603 if (err)
2604 goto err_unpreserve_fdt;
2605
2606 err = kho_add_subtree(MEMBLOCK_KHO_FDT, fdt, fdt_totalsize(fdt));
2607 if (err)
2608 goto err_unpreserve_fdt;
2609
2610 err = reserved_mem_preserve();
2611 if (err)
2612 goto err_remove_subtree;
2613
2614 return 0;
2615
2616 err_remove_subtree:
2617 kho_remove_subtree(fdt);
2618 err_unpreserve_fdt:
2619 kho_unpreserve_pages(fdt_page, 1);
2620 err_free_fdt:
2621 put_page(fdt_page);
2622 err_report:
2623 pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
2624
2625 return err;
2626 }
2627
reserve_mem_init(void)2628 static int __init reserve_mem_init(void)
2629 {
2630 int err;
2631
2632 if (!kho_is_enabled() || !reserved_mem_count)
2633 return 0;
2634
2635 err = prepare_kho_fdt();
2636 if (err)
2637 return err;
2638 return err;
2639 }
2640 late_initcall(reserve_mem_init);
2641
reserve_mem_kho_retrieve_fdt(void)2642 static void *__init reserve_mem_kho_retrieve_fdt(void)
2643 {
2644 phys_addr_t fdt_phys;
2645 static void *fdt;
2646 int err;
2647
2648 if (fdt)
2649 return fdt;
2650
2651 err = kho_retrieve_subtree(MEMBLOCK_KHO_FDT, &fdt_phys, NULL);
2652 if (err) {
2653 if (err != -ENOENT)
2654 pr_warn("failed to retrieve FDT '%s' from KHO: %d\n",
2655 MEMBLOCK_KHO_FDT, err);
2656 return NULL;
2657 }
2658
2659 fdt = phys_to_virt(fdt_phys);
2660
2661 err = fdt_node_check_compatible(fdt, 0, MEMBLOCK_KHO_NODE_COMPATIBLE);
2662 if (err) {
2663 pr_warn("FDT '%s' is incompatible with '%s': %d\n",
2664 MEMBLOCK_KHO_FDT, MEMBLOCK_KHO_NODE_COMPATIBLE, err);
2665 fdt = NULL;
2666 }
2667
2668 return fdt;
2669 }
2670
reserve_mem_kho_revive(const char * name,phys_addr_t size,phys_addr_t align)2671 static bool __init reserve_mem_kho_revive(const char *name, phys_addr_t size,
2672 phys_addr_t align)
2673 {
2674 int err, len_start, len_size, offset;
2675 const phys_addr_t *p_start, *p_size;
2676 const void *fdt;
2677
2678 fdt = reserve_mem_kho_retrieve_fdt();
2679 if (!fdt)
2680 return false;
2681
2682 offset = fdt_subnode_offset(fdt, 0, name);
2683 if (offset < 0) {
2684 pr_warn("FDT '%s' has no child '%s': %d\n",
2685 MEMBLOCK_KHO_FDT, name, offset);
2686 return false;
2687 }
2688 err = fdt_node_check_compatible(fdt, offset, RESERVE_MEM_KHO_NODE_COMPATIBLE);
2689 if (err) {
2690 pr_warn("Node '%s' is incompatible with '%s': %d\n",
2691 name, RESERVE_MEM_KHO_NODE_COMPATIBLE, err);
2692 return false;
2693 }
2694
2695 p_start = fdt_getprop(fdt, offset, "start", &len_start);
2696 p_size = fdt_getprop(fdt, offset, "size", &len_size);
2697 if (!p_start || len_start != sizeof(*p_start) || !p_size ||
2698 len_size != sizeof(*p_size)) {
2699 return false;
2700 }
2701
2702 if (*p_start & (align - 1)) {
2703 pr_warn("KHO reserve-mem '%s' has wrong alignment (0x%lx, 0x%lx)\n",
2704 name, (long)align, (long)*p_start);
2705 return false;
2706 }
2707
2708 if (*p_size != size) {
2709 pr_warn("KHO reserve-mem '%s' has wrong size (0x%lx != 0x%lx)\n",
2710 name, (long)*p_size, (long)size);
2711 return false;
2712 }
2713
2714 reserved_mem_add(*p_start, size, name);
2715 pr_info("Revived memory reservation '%s' from KHO\n", name);
2716
2717 return true;
2718 }
2719 #else
reserve_mem_kho_revive(const char * name,phys_addr_t size,phys_addr_t align)2720 static bool __init reserve_mem_kho_revive(const char *name, phys_addr_t size,
2721 phys_addr_t align)
2722 {
2723 return false;
2724 }
2725 #endif /* CONFIG_KEXEC_HANDOVER */
2726
2727 /*
2728 * Parse reserve_mem=nn:align:name
2729 */
reserve_mem(char * p)2730 static int __init reserve_mem(char *p)
2731 {
2732 phys_addr_t start, size, align, tmp;
2733 char *name;
2734 char *oldp;
2735 int len;
2736
2737 if (!p)
2738 goto err_param;
2739
2740 /* Check if there's room for more reserved memory */
2741 if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES) {
2742 pr_err("reserve_mem: no more room for reserved memory\n");
2743 return -EBUSY;
2744 }
2745
2746 oldp = p;
2747 size = memparse(p, &p);
2748 if (!size || p == oldp)
2749 goto err_param;
2750
2751 if (*p != ':')
2752 goto err_param;
2753
2754 align = memparse(p+1, &p);
2755 if (*p != ':')
2756 goto err_param;
2757
2758 /*
2759 * memblock_phys_alloc() doesn't like a zero size align,
2760 * but it is OK for this command to have it.
2761 */
2762 if (align < SMP_CACHE_BYTES)
2763 align = SMP_CACHE_BYTES;
2764
2765 name = p + 1;
2766 len = strlen(name);
2767
2768 /* name needs to have length but not too big */
2769 if (!len || len >= RESERVE_MEM_NAME_SIZE)
2770 goto err_param;
2771
2772 /* Make sure that name has text */
2773 for (p = name; *p; p++) {
2774 if (!isspace(*p))
2775 break;
2776 }
2777 if (!*p)
2778 goto err_param;
2779
2780 /* Make sure the name is not already used */
2781 if (reserve_mem_find_by_name(name, &start, &tmp)) {
2782 pr_err("reserve_mem: name \"%s\" was already used\n", name);
2783 return -EBUSY;
2784 }
2785
2786 /* Pick previous allocations up from KHO if available */
2787 if (reserve_mem_kho_revive(name, size, align))
2788 return 1;
2789
2790 /* TODO: Allocation must be outside of scratch region */
2791 start = memblock_phys_alloc(size, align);
2792 if (!start) {
2793 pr_err("reserve_mem: memblock allocation failed\n");
2794 return -ENOMEM;
2795 }
2796
2797 reserved_mem_add(start, size, name);
2798
2799 return 1;
2800 err_param:
2801 pr_err("reserve_mem: empty or malformed parameter\n");
2802 return -EINVAL;
2803 }
2804 __setup("reserve_mem=", reserve_mem);
2805
2806 #ifdef CONFIG_DEBUG_FS
2807 #ifdef CONFIG_ARCH_KEEP_MEMBLOCK
2808 static const char * const flagname[] = {
2809 [ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG",
2810 [ilog2(MEMBLOCK_MIRROR)] = "MIRROR",
2811 [ilog2(MEMBLOCK_NOMAP)] = "NOMAP",
2812 [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG",
2813 [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT",
2814 [ilog2(MEMBLOCK_RSRV_KERN)] = "RSV_KERN",
2815 [ilog2(MEMBLOCK_KHO_SCRATCH)] = "KHO_SCRATCH",
2816 };
2817
memblock_debug_show(struct seq_file * m,void * private)2818 static int memblock_debug_show(struct seq_file *m, void *private)
2819 {
2820 struct memblock_type *type = m->private;
2821 struct memblock_region *reg;
2822 int i, j, nid;
2823 unsigned int count = ARRAY_SIZE(flagname);
2824 phys_addr_t end;
2825
2826 for (i = 0; i < type->cnt; i++) {
2827 reg = &type->regions[i];
2828 end = reg->base + reg->size - 1;
2829 nid = memblock_get_region_node(reg);
2830
2831 seq_printf(m, "%4d: ", i);
2832 seq_printf(m, "%pa..%pa ", ®->base, &end);
2833 if (numa_valid_node(nid))
2834 seq_printf(m, "%4d ", nid);
2835 else
2836 seq_printf(m, "%4c ", 'x');
2837 if (reg->flags) {
2838 for (j = 0; j < count; j++) {
2839 if (reg->flags & (1U << j)) {
2840 seq_printf(m, "%s\n", flagname[j]);
2841 break;
2842 }
2843 }
2844 if (j == count)
2845 seq_printf(m, "%s\n", "UNKNOWN");
2846 } else {
2847 seq_printf(m, "%s\n", "NONE");
2848 }
2849 }
2850 return 0;
2851 }
2852 DEFINE_SHOW_ATTRIBUTE(memblock_debug);
2853
memblock_debugfs_expose_arrays(struct dentry * root)2854 static inline void memblock_debugfs_expose_arrays(struct dentry *root)
2855 {
2856 debugfs_create_file("memory", 0444, root,
2857 &memblock.memory, &memblock_debug_fops);
2858 debugfs_create_file("reserved", 0444, root,
2859 &memblock.reserved, &memblock_debug_fops);
2860 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
2861 debugfs_create_file("physmem", 0444, root, &physmem,
2862 &memblock_debug_fops);
2863 #endif
2864 }
2865
2866 #else
2867
memblock_debugfs_expose_arrays(struct dentry * root)2868 static inline void memblock_debugfs_expose_arrays(struct dentry *root) { }
2869
2870 #endif /* CONFIG_ARCH_KEEP_MEMBLOCK */
2871
memblock_reserve_mem_show(struct seq_file * m,void * private)2872 static int memblock_reserve_mem_show(struct seq_file *m, void *private)
2873 {
2874 struct reserve_mem_table *map;
2875 char txtsz[16];
2876
2877 guard(mutex)(&reserve_mem_lock);
2878 for (int i = 0; i < reserved_mem_count; i++) {
2879 map = &reserved_mem_table[i];
2880 if (!map->size)
2881 continue;
2882
2883 memset(txtsz, 0, sizeof(txtsz));
2884 string_get_size(map->size, 1, STRING_UNITS_2, txtsz, sizeof(txtsz));
2885 seq_printf(m, "%s\t\t(%s)\n", map->name, txtsz);
2886 }
2887
2888 return 0;
2889 }
2890 DEFINE_SHOW_ATTRIBUTE(memblock_reserve_mem);
2891
memblock_init_debugfs(void)2892 static int __init memblock_init_debugfs(void)
2893 {
2894 struct dentry *root;
2895
2896 if (!IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK) && !reserved_mem_count)
2897 return 0;
2898
2899 root = debugfs_create_dir("memblock", NULL);
2900
2901 if (reserved_mem_count)
2902 debugfs_create_file("reserve_mem_param", 0444, root, NULL,
2903 &memblock_reserve_mem_fops);
2904
2905 memblock_debugfs_expose_arrays(root);
2906 return 0;
2907 }
2908 __initcall(memblock_init_debugfs);
2909
2910 #endif /* CONFIG_DEBUG_FS */
2911