xref: /linux/mm/cma.c (revision e0c0ab04f6785abaa71b9b8dc252cb1a2072c225)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Contiguous Memory Allocator
4  *
5  * Copyright (c) 2010-2011 by Samsung Electronics.
6  * Copyright IBM Corporation, 2013
7  * Copyright LG Electronics Inc., 2014
8  * Written by:
9  *	Marek Szyprowski <m.szyprowski@samsung.com>
10  *	Michal Nazarewicz <mina86@mina86.com>
11  *	Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
12  *	Joonsoo Kim <iamjoonsoo.kim@lge.com>
13  */
14 
15 #define pr_fmt(fmt) "cma: " fmt
16 
17 #define CREATE_TRACE_POINTS
18 
19 #include <linux/memblock.h>
20 #include <linux/err.h>
21 #include <linux/list.h>
22 #include <linux/mm.h>
23 #include <linux/sizes.h>
24 #include <linux/slab.h>
25 #include <linux/log2.h>
26 #include <linux/cma.h>
27 #include <linux/highmem.h>
28 #include <linux/io.h>
29 #include <linux/kmemleak.h>
30 #include <trace/events/cma.h>
31 
32 #include "internal.h"
33 #include "cma.h"
34 
35 struct cma cma_areas[MAX_CMA_AREAS];
36 unsigned int cma_area_count;
37 
38 static int __init __cma_declare_contiguous_nid(phys_addr_t *basep,
39 			phys_addr_t size, phys_addr_t limit,
40 			phys_addr_t alignment, unsigned int order_per_bit,
41 			bool fixed, const char *name, struct cma **res_cma,
42 			int nid);
43 
44 phys_addr_t cma_get_base(const struct cma *cma)
45 {
46 	WARN_ON_ONCE(cma->nranges != 1);
47 	return PFN_PHYS(cma->ranges[0].base_pfn);
48 }
49 
50 unsigned long cma_get_size(const struct cma *cma)
51 {
52 	return cma->count << PAGE_SHIFT;
53 }
54 
55 const char *cma_get_name(const struct cma *cma)
56 {
57 	return cma->name;
58 }
59 
60 static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
61 					     unsigned int align_order)
62 {
63 	if (align_order <= cma->order_per_bit)
64 		return 0;
65 	return (1UL << (align_order - cma->order_per_bit)) - 1;
66 }
67 
68 /*
69  * Find the offset of the base PFN from the specified align_order.
70  * The value returned is represented in order_per_bits.
71  */
72 static unsigned long cma_bitmap_aligned_offset(const struct cma *cma,
73 					       const struct cma_memrange *cmr,
74 					       unsigned int align_order)
75 {
76 	return (cmr->base_pfn & ((1UL << align_order) - 1))
77 		>> cma->order_per_bit;
78 }
79 
80 static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma,
81 					      unsigned long pages)
82 {
83 	return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
84 }
85 
86 static void cma_clear_bitmap(struct cma *cma, const struct cma_memrange *cmr,
87 			     unsigned long pfn, unsigned long count)
88 {
89 	unsigned long bitmap_no, bitmap_count;
90 	unsigned long flags;
91 
92 	bitmap_no = (pfn - cmr->base_pfn) >> cma->order_per_bit;
93 	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
94 
95 	spin_lock_irqsave(&cma->lock, flags);
96 	bitmap_clear(cmr->bitmap, bitmap_no, bitmap_count);
97 	cma->available_count += count;
98 	spin_unlock_irqrestore(&cma->lock, flags);
99 }
100 
101 /*
102  * Check if a CMA area contains no ranges that intersect with
103  * multiple zones. Store the result in the flags in case
104  * this gets called more than once.
105  */
106 bool cma_validate_zones(struct cma *cma)
107 {
108 	int r;
109 	unsigned long base_pfn;
110 	struct cma_memrange *cmr;
111 	bool valid_bit_set;
112 
113 	/*
114 	 * If already validated, return result of previous check.
115 	 * Either the valid or invalid bit will be set if this
116 	 * check has already been done. If neither is set, the
117 	 * check has not been performed yet.
118 	 */
119 	valid_bit_set = test_bit(CMA_ZONES_VALID, &cma->flags);
120 	if (valid_bit_set || test_bit(CMA_ZONES_INVALID, &cma->flags))
121 		return valid_bit_set;
122 
123 	for (r = 0; r < cma->nranges; r++) {
124 		cmr = &cma->ranges[r];
125 		base_pfn = cmr->base_pfn;
126 
127 		/*
128 		 * alloc_contig_range() requires the pfn range specified
129 		 * to be in the same zone. Simplify by forcing the entire
130 		 * CMA resv range to be in the same zone.
131 		 */
132 		WARN_ON_ONCE(!pfn_valid(base_pfn));
133 		if (pfn_range_intersects_zones(cma->nid, base_pfn, cmr->count)) {
134 			set_bit(CMA_ZONES_INVALID, &cma->flags);
135 			return false;
136 		}
137 	}
138 
139 	set_bit(CMA_ZONES_VALID, &cma->flags);
140 
141 	return true;
142 }
143 
144 static void __init cma_activate_area(struct cma *cma)
145 {
146 	unsigned long pfn, end_pfn, early_pfn[CMA_MAX_RANGES];
147 	int allocrange, r;
148 	struct cma_memrange *cmr;
149 	unsigned long bitmap_count, count;
150 
151 	for (allocrange = 0; allocrange < cma->nranges; allocrange++) {
152 		cmr = &cma->ranges[allocrange];
153 		early_pfn[allocrange] = cmr->early_pfn;
154 		cmr->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma, cmr),
155 					    GFP_KERNEL);
156 		if (!cmr->bitmap)
157 			goto cleanup;
158 	}
159 
160 	if (!cma_validate_zones(cma))
161 		goto cleanup;
162 
163 	for (r = 0; r < cma->nranges; r++) {
164 		cmr = &cma->ranges[r];
165 		if (early_pfn[r] != cmr->base_pfn) {
166 			count = early_pfn[r] - cmr->base_pfn;
167 			bitmap_count = cma_bitmap_pages_to_bits(cma, count);
168 			bitmap_set(cmr->bitmap, 0, bitmap_count);
169 		}
170 
171 		for (pfn = early_pfn[r]; pfn < cmr->base_pfn + cmr->count;
172 		     pfn += pageblock_nr_pages)
173 			init_cma_reserved_pageblock(pfn_to_page(pfn));
174 	}
175 
176 	spin_lock_init(&cma->lock);
177 
178 	mutex_init(&cma->alloc_mutex);
179 
180 #ifdef CONFIG_CMA_DEBUGFS
181 	INIT_HLIST_HEAD(&cma->mem_head);
182 	spin_lock_init(&cma->mem_head_lock);
183 #endif
184 	set_bit(CMA_ACTIVATED, &cma->flags);
185 
186 	return;
187 
188 cleanup:
189 	for (r = 0; r < allocrange; r++)
190 		bitmap_free(cma->ranges[r].bitmap);
191 
192 	/* Expose all pages to the buddy, they are useless for CMA. */
193 	if (!test_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags)) {
194 		for (r = 0; r < allocrange; r++) {
195 			cmr = &cma->ranges[r];
196 			end_pfn = cmr->base_pfn + cmr->count;
197 			for (pfn = early_pfn[r]; pfn < end_pfn; pfn++)
198 				free_reserved_page(pfn_to_page(pfn));
199 		}
200 	}
201 	totalcma_pages -= cma->count;
202 	cma->available_count = cma->count = 0;
203 	pr_err("CMA area %s could not be activated\n", cma->name);
204 }
205 
206 static int __init cma_init_reserved_areas(void)
207 {
208 	int i;
209 
210 	for (i = 0; i < cma_area_count; i++)
211 		cma_activate_area(&cma_areas[i]);
212 
213 	return 0;
214 }
215 core_initcall(cma_init_reserved_areas);
216 
217 void __init cma_reserve_pages_on_error(struct cma *cma)
218 {
219 	set_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags);
220 }
221 
222 static int __init cma_new_area(const char *name, phys_addr_t size,
223 			       unsigned int order_per_bit,
224 			       struct cma **res_cma)
225 {
226 	struct cma *cma;
227 
228 	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
229 		pr_err("Not enough slots for CMA reserved regions!\n");
230 		return -ENOSPC;
231 	}
232 
233 	/*
234 	 * Each reserved area must be initialised later, when more kernel
235 	 * subsystems (like slab allocator) are available.
236 	 */
237 	cma = &cma_areas[cma_area_count];
238 	cma_area_count++;
239 
240 	if (name)
241 		snprintf(cma->name, CMA_MAX_NAME, "%s", name);
242 	else
243 		snprintf(cma->name, CMA_MAX_NAME,  "cma%d\n", cma_area_count);
244 
245 	cma->available_count = cma->count = size >> PAGE_SHIFT;
246 	cma->order_per_bit = order_per_bit;
247 	*res_cma = cma;
248 	totalcma_pages += cma->count;
249 
250 	return 0;
251 }
252 
253 static void __init cma_drop_area(struct cma *cma)
254 {
255 	totalcma_pages -= cma->count;
256 	cma_area_count--;
257 }
258 
259 /**
260  * cma_init_reserved_mem() - create custom contiguous area from reserved memory
261  * @base: Base address of the reserved area
262  * @size: Size of the reserved area (in bytes),
263  * @order_per_bit: Order of pages represented by one bit on bitmap.
264  * @name: The name of the area. If this parameter is NULL, the name of
265  *        the area will be set to "cmaN", where N is a running counter of
266  *        used areas.
267  * @res_cma: Pointer to store the created cma region.
268  *
269  * This function creates custom contiguous area from already reserved memory.
270  */
271 int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
272 				 unsigned int order_per_bit,
273 				 const char *name,
274 				 struct cma **res_cma)
275 {
276 	struct cma *cma;
277 	int ret;
278 
279 	/* Sanity checks */
280 	if (!size || !memblock_is_region_reserved(base, size))
281 		return -EINVAL;
282 
283 	/*
284 	 * CMA uses CMA_MIN_ALIGNMENT_BYTES as alignment requirement which
285 	 * needs pageblock_order to be initialized. Let's enforce it.
286 	 */
287 	if (!pageblock_order) {
288 		pr_err("pageblock_order not yet initialized. Called during early boot?\n");
289 		return -EINVAL;
290 	}
291 
292 	/* ensure minimal alignment required by mm core */
293 	if (!IS_ALIGNED(base | size, CMA_MIN_ALIGNMENT_BYTES))
294 		return -EINVAL;
295 
296 	ret = cma_new_area(name, size, order_per_bit, &cma);
297 	if (ret != 0)
298 		return ret;
299 
300 	cma->ranges[0].base_pfn = PFN_DOWN(base);
301 	cma->ranges[0].early_pfn = PFN_DOWN(base);
302 	cma->ranges[0].count = cma->count;
303 	cma->nranges = 1;
304 	cma->nid = NUMA_NO_NODE;
305 
306 	*res_cma = cma;
307 
308 	return 0;
309 }
310 
311 /*
312  * Structure used while walking physical memory ranges and finding out
313  * which one(s) to use for a CMA area.
314  */
315 struct cma_init_memrange {
316 	phys_addr_t base;
317 	phys_addr_t size;
318 	struct list_head list;
319 };
320 
321 /*
322  * Work array used during CMA initialization.
323  */
324 static struct cma_init_memrange memranges[CMA_MAX_RANGES] __initdata;
325 
326 static bool __init revsizecmp(struct cma_init_memrange *mlp,
327 			      struct cma_init_memrange *mrp)
328 {
329 	return mlp->size > mrp->size;
330 }
331 
332 static bool __init basecmp(struct cma_init_memrange *mlp,
333 			   struct cma_init_memrange *mrp)
334 {
335 	return mlp->base < mrp->base;
336 }
337 
338 /*
339  * Helper function to create sorted lists.
340  */
341 static void __init list_insert_sorted(
342 	struct list_head *ranges,
343 	struct cma_init_memrange *mrp,
344 	bool (*cmp)(struct cma_init_memrange *lh, struct cma_init_memrange *rh))
345 {
346 	struct list_head *mp;
347 	struct cma_init_memrange *mlp;
348 
349 	if (list_empty(ranges))
350 		list_add(&mrp->list, ranges);
351 	else {
352 		list_for_each(mp, ranges) {
353 			mlp = list_entry(mp, struct cma_init_memrange, list);
354 			if (cmp(mlp, mrp))
355 				break;
356 		}
357 		__list_add(&mrp->list, mlp->list.prev, &mlp->list);
358 	}
359 }
360 
361 /*
362  * Create CMA areas with a total size of @total_size. A normal allocation
363  * for one area is tried first. If that fails, the biggest memblock
364  * ranges above 4G are selected, and allocated bottom up.
365  *
366  * The complexity here is not great, but this function will only be
367  * called during boot, and the lists operated on have fewer than
368  * CMA_MAX_RANGES elements (default value: 8).
369  */
370 int __init cma_declare_contiguous_multi(phys_addr_t total_size,
371 			phys_addr_t align, unsigned int order_per_bit,
372 			const char *name, struct cma **res_cma, int nid)
373 {
374 	phys_addr_t start = 0, end;
375 	phys_addr_t size, sizesum, sizeleft;
376 	struct cma_init_memrange *mrp, *mlp, *failed;
377 	struct cma_memrange *cmrp;
378 	LIST_HEAD(ranges);
379 	LIST_HEAD(final_ranges);
380 	struct list_head *mp, *next;
381 	int ret, nr = 1;
382 	u64 i;
383 	struct cma *cma;
384 
385 	/*
386 	 * First, try it the normal way, producing just one range.
387 	 */
388 	ret = __cma_declare_contiguous_nid(&start, total_size, 0, align,
389 			order_per_bit, false, name, res_cma, nid);
390 	if (ret != -ENOMEM)
391 		goto out;
392 
393 	/*
394 	 * Couldn't find one range that fits our needs, so try multiple
395 	 * ranges.
396 	 *
397 	 * No need to do the alignment checks here, the call to
398 	 * cma_declare_contiguous_nid above would have caught
399 	 * any issues. With the checks, we know that:
400 	 *
401 	 * - @align is a power of 2
402 	 * - @align is >= pageblock alignment
403 	 * - @size is aligned to @align and to @order_per_bit
404 	 *
405 	 * So, as long as we create ranges that have a base
406 	 * aligned to @align, and a size that is aligned to
407 	 * both @align and @order_to_bit, things will work out.
408 	 */
409 	nr = 0;
410 	sizesum = 0;
411 	failed = NULL;
412 
413 	ret = cma_new_area(name, total_size, order_per_bit, &cma);
414 	if (ret != 0)
415 		goto out;
416 
417 	align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES);
418 	/*
419 	 * Create a list of ranges above 4G, largest range first.
420 	 */
421 	for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) {
422 		if (upper_32_bits(start) == 0)
423 			continue;
424 
425 		start = ALIGN(start, align);
426 		if (start >= end)
427 			continue;
428 
429 		end = ALIGN_DOWN(end, align);
430 		if (end <= start)
431 			continue;
432 
433 		size = end - start;
434 		size = ALIGN_DOWN(size, (PAGE_SIZE << order_per_bit));
435 		if (!size)
436 			continue;
437 		sizesum += size;
438 
439 		pr_debug("consider %016llx - %016llx\n", (u64)start, (u64)end);
440 
441 		/*
442 		 * If we don't yet have used the maximum number of
443 		 * areas, grab a new one.
444 		 *
445 		 * If we can't use anymore, see if this range is not
446 		 * smaller than the smallest one already recorded. If
447 		 * not, re-use the smallest element.
448 		 */
449 		if (nr < CMA_MAX_RANGES)
450 			mrp = &memranges[nr++];
451 		else {
452 			mrp = list_last_entry(&ranges,
453 					      struct cma_init_memrange, list);
454 			if (size < mrp->size)
455 				continue;
456 			list_del(&mrp->list);
457 			sizesum -= mrp->size;
458 			pr_debug("deleted %016llx - %016llx from the list\n",
459 				(u64)mrp->base, (u64)mrp->base + size);
460 		}
461 		mrp->base = start;
462 		mrp->size = size;
463 
464 		/*
465 		 * Now do a sorted insert.
466 		 */
467 		list_insert_sorted(&ranges, mrp, revsizecmp);
468 		pr_debug("added %016llx - %016llx to the list\n",
469 		    (u64)mrp->base, (u64)mrp->base + size);
470 		pr_debug("total size now %llu\n", (u64)sizesum);
471 	}
472 
473 	/*
474 	 * There is not enough room in the CMA_MAX_RANGES largest
475 	 * ranges, so bail out.
476 	 */
477 	if (sizesum < total_size) {
478 		cma_drop_area(cma);
479 		ret = -ENOMEM;
480 		goto out;
481 	}
482 
483 	/*
484 	 * Found ranges that provide enough combined space.
485 	 * Now, sorted them by address, smallest first, because we
486 	 * want to mimic a bottom-up memblock allocation.
487 	 */
488 	sizesum = 0;
489 	list_for_each_safe(mp, next, &ranges) {
490 		mlp = list_entry(mp, struct cma_init_memrange, list);
491 		list_del(mp);
492 		list_insert_sorted(&final_ranges, mlp, basecmp);
493 		sizesum += mlp->size;
494 		if (sizesum >= total_size)
495 			break;
496 	}
497 
498 	/*
499 	 * Walk the final list, and add a CMA range for
500 	 * each range, possibly not using the last one fully.
501 	 */
502 	nr = 0;
503 	sizeleft = total_size;
504 	list_for_each(mp, &final_ranges) {
505 		mlp = list_entry(mp, struct cma_init_memrange, list);
506 		size = min(sizeleft, mlp->size);
507 		if (memblock_reserve(mlp->base, size)) {
508 			/*
509 			 * Unexpected error. Could go on to
510 			 * the next one, but just abort to
511 			 * be safe.
512 			 */
513 			failed = mlp;
514 			break;
515 		}
516 
517 		pr_debug("created region %d: %016llx - %016llx\n",
518 		    nr, (u64)mlp->base, (u64)mlp->base + size);
519 		cmrp = &cma->ranges[nr++];
520 		cmrp->base_pfn = PHYS_PFN(mlp->base);
521 		cmrp->early_pfn = cmrp->base_pfn;
522 		cmrp->count = size >> PAGE_SHIFT;
523 
524 		sizeleft -= size;
525 		if (sizeleft == 0)
526 			break;
527 	}
528 
529 	if (failed) {
530 		list_for_each(mp, &final_ranges) {
531 			mlp = list_entry(mp, struct cma_init_memrange, list);
532 			if (mlp == failed)
533 				break;
534 			memblock_phys_free(mlp->base, mlp->size);
535 		}
536 		cma_drop_area(cma);
537 		ret = -ENOMEM;
538 		goto out;
539 	}
540 
541 	cma->nranges = nr;
542 	cma->nid = nid;
543 	*res_cma = cma;
544 
545 out:
546 	if (ret != 0)
547 		pr_err("Failed to reserve %lu MiB\n",
548 			(unsigned long)total_size / SZ_1M);
549 	else
550 		pr_info("Reserved %lu MiB in %d range%s\n",
551 			(unsigned long)total_size / SZ_1M, nr,
552 			nr > 1 ? "s" : "");
553 	return ret;
554 }
555 
556 /**
557  * cma_declare_contiguous_nid() - reserve custom contiguous area
558  * @base: Base address of the reserved area optional, use 0 for any
559  * @size: Size of the reserved area (in bytes),
560  * @limit: End address of the reserved memory (optional, 0 for any).
561  * @alignment: Alignment for the CMA area, should be power of 2 or zero
562  * @order_per_bit: Order of pages represented by one bit on bitmap.
563  * @fixed: hint about where to place the reserved area
564  * @name: The name of the area. See function cma_init_reserved_mem()
565  * @res_cma: Pointer to store the created cma region.
566  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
567  *
568  * This function reserves memory from early allocator. It should be
569  * called by arch specific code once the early allocator (memblock or bootmem)
570  * has been activated and all other subsystems have already allocated/reserved
571  * memory. This function allows to create custom reserved areas.
572  *
573  * If @fixed is true, reserve contiguous area at exactly @base.  If false,
574  * reserve in range from @base to @limit.
575  */
576 int __init cma_declare_contiguous_nid(phys_addr_t base,
577 			phys_addr_t size, phys_addr_t limit,
578 			phys_addr_t alignment, unsigned int order_per_bit,
579 			bool fixed, const char *name, struct cma **res_cma,
580 			int nid)
581 {
582 	int ret;
583 
584 	ret = __cma_declare_contiguous_nid(&base, size, limit, alignment,
585 			order_per_bit, fixed, name, res_cma, nid);
586 	if (ret != 0)
587 		pr_err("Failed to reserve %ld MiB\n",
588 				(unsigned long)size / SZ_1M);
589 	else
590 		pr_info("Reserved %ld MiB at %pa\n",
591 				(unsigned long)size / SZ_1M, &base);
592 
593 	return ret;
594 }
595 
596 static int __init __cma_declare_contiguous_nid(phys_addr_t *basep,
597 			phys_addr_t size, phys_addr_t limit,
598 			phys_addr_t alignment, unsigned int order_per_bit,
599 			bool fixed, const char *name, struct cma **res_cma,
600 			int nid)
601 {
602 	phys_addr_t memblock_end = memblock_end_of_DRAM();
603 	phys_addr_t highmem_start, base = *basep;
604 	int ret;
605 
606 	/*
607 	 * We can't use __pa(high_memory) directly, since high_memory
608 	 * isn't a valid direct map VA, and DEBUG_VIRTUAL will (validly)
609 	 * complain. Find the boundary by adding one to the last valid
610 	 * address.
611 	 */
612 	if (IS_ENABLED(CONFIG_HIGHMEM))
613 		highmem_start = __pa(high_memory - 1) + 1;
614 	else
615 		highmem_start = memblock_end_of_DRAM();
616 	pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n",
617 		__func__, &size, &base, &limit, &alignment);
618 
619 	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
620 		pr_err("Not enough slots for CMA reserved regions!\n");
621 		return -ENOSPC;
622 	}
623 
624 	if (!size)
625 		return -EINVAL;
626 
627 	if (alignment && !is_power_of_2(alignment))
628 		return -EINVAL;
629 
630 	if (!IS_ENABLED(CONFIG_NUMA))
631 		nid = NUMA_NO_NODE;
632 
633 	/* Sanitise input arguments. */
634 	alignment = max_t(phys_addr_t, alignment, CMA_MIN_ALIGNMENT_BYTES);
635 	if (fixed && base & (alignment - 1)) {
636 		pr_err("Region at %pa must be aligned to %pa bytes\n",
637 			&base, &alignment);
638 		return -EINVAL;
639 	}
640 	base = ALIGN(base, alignment);
641 	size = ALIGN(size, alignment);
642 	limit &= ~(alignment - 1);
643 
644 	if (!base)
645 		fixed = false;
646 
647 	/* size should be aligned with order_per_bit */
648 	if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
649 		return -EINVAL;
650 
651 	/*
652 	 * If allocating at a fixed base the request region must not cross the
653 	 * low/high memory boundary.
654 	 */
655 	if (fixed && base < highmem_start && base + size > highmem_start) {
656 		pr_err("Region at %pa defined on low/high memory boundary (%pa)\n",
657 			&base, &highmem_start);
658 		return -EINVAL;
659 	}
660 
661 	/*
662 	 * If the limit is unspecified or above the memblock end, its effective
663 	 * value will be the memblock end. Set it explicitly to simplify further
664 	 * checks.
665 	 */
666 	if (limit == 0 || limit > memblock_end)
667 		limit = memblock_end;
668 
669 	if (base + size > limit) {
670 		pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n",
671 			&size, &base, &limit);
672 		return -EINVAL;
673 	}
674 
675 	/* Reserve memory */
676 	if (fixed) {
677 		if (memblock_is_region_reserved(base, size) ||
678 		    memblock_reserve(base, size) < 0) {
679 			return -EBUSY;
680 		}
681 	} else {
682 		phys_addr_t addr = 0;
683 
684 		/*
685 		 * If there is enough memory, try a bottom-up allocation first.
686 		 * It will place the new cma area close to the start of the node
687 		 * and guarantee that the compaction is moving pages out of the
688 		 * cma area and not into it.
689 		 * Avoid using first 4GB to not interfere with constrained zones
690 		 * like DMA/DMA32.
691 		 */
692 #ifdef CONFIG_PHYS_ADDR_T_64BIT
693 		if (!memblock_bottom_up() && memblock_end >= SZ_4G + size) {
694 			memblock_set_bottom_up(true);
695 			addr = memblock_alloc_range_nid(size, alignment, SZ_4G,
696 							limit, nid, true);
697 			memblock_set_bottom_up(false);
698 		}
699 #endif
700 
701 		/*
702 		 * All pages in the reserved area must come from the same zone.
703 		 * If the requested region crosses the low/high memory boundary,
704 		 * try allocating from high memory first and fall back to low
705 		 * memory in case of failure.
706 		 */
707 		if (!addr && base < highmem_start && limit > highmem_start) {
708 			addr = memblock_alloc_range_nid(size, alignment,
709 					highmem_start, limit, nid, true);
710 			limit = highmem_start;
711 		}
712 
713 		if (!addr) {
714 			addr = memblock_alloc_range_nid(size, alignment, base,
715 					limit, nid, true);
716 			if (!addr)
717 				return -ENOMEM;
718 		}
719 
720 		/*
721 		 * kmemleak scans/reads tracked objects for pointers to other
722 		 * objects but this address isn't mapped and accessible
723 		 */
724 		kmemleak_ignore_phys(addr);
725 		base = addr;
726 	}
727 
728 	ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma);
729 	if (ret) {
730 		memblock_phys_free(base, size);
731 		return ret;
732 	}
733 
734 	(*res_cma)->nid = nid;
735 	*basep = base;
736 
737 	return 0;
738 }
739 
740 static void cma_debug_show_areas(struct cma *cma)
741 {
742 	unsigned long next_zero_bit, next_set_bit, nr_zero;
743 	unsigned long start;
744 	unsigned long nr_part;
745 	unsigned long nbits;
746 	int r;
747 	struct cma_memrange *cmr;
748 
749 	spin_lock_irq(&cma->lock);
750 	pr_info("number of available pages: ");
751 	for (r = 0; r < cma->nranges; r++) {
752 		cmr = &cma->ranges[r];
753 
754 		start = 0;
755 		nbits = cma_bitmap_maxno(cma, cmr);
756 
757 		pr_info("range %d: ", r);
758 		for (;;) {
759 			next_zero_bit = find_next_zero_bit(cmr->bitmap,
760 							   nbits, start);
761 			if (next_zero_bit >= nbits)
762 				break;
763 			next_set_bit = find_next_bit(cmr->bitmap, nbits,
764 						     next_zero_bit);
765 			nr_zero = next_set_bit - next_zero_bit;
766 			nr_part = nr_zero << cma->order_per_bit;
767 			pr_cont("%s%lu@%lu", start ? "+" : "", nr_part,
768 				next_zero_bit);
769 			start = next_zero_bit + nr_zero;
770 		}
771 		pr_info("\n");
772 	}
773 	pr_cont("=> %lu free of %lu total pages\n", cma->available_count,
774 			cma->count);
775 	spin_unlock_irq(&cma->lock);
776 }
777 
778 static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
779 				unsigned long count, unsigned int align,
780 				struct page **pagep, gfp_t gfp)
781 {
782 	unsigned long mask, offset;
783 	unsigned long pfn = -1;
784 	unsigned long start = 0;
785 	unsigned long bitmap_maxno, bitmap_no, bitmap_count;
786 	int ret = -EBUSY;
787 	struct page *page = NULL;
788 
789 	mask = cma_bitmap_aligned_mask(cma, align);
790 	offset = cma_bitmap_aligned_offset(cma, cmr, align);
791 	bitmap_maxno = cma_bitmap_maxno(cma, cmr);
792 	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
793 
794 	if (bitmap_count > bitmap_maxno)
795 		goto out;
796 
797 	for (;;) {
798 		spin_lock_irq(&cma->lock);
799 		/*
800 		 * If the request is larger than the available number
801 		 * of pages, stop right away.
802 		 */
803 		if (count > cma->available_count) {
804 			spin_unlock_irq(&cma->lock);
805 			break;
806 		}
807 		bitmap_no = bitmap_find_next_zero_area_off(cmr->bitmap,
808 				bitmap_maxno, start, bitmap_count, mask,
809 				offset);
810 		if (bitmap_no >= bitmap_maxno) {
811 			spin_unlock_irq(&cma->lock);
812 			break;
813 		}
814 		bitmap_set(cmr->bitmap, bitmap_no, bitmap_count);
815 		cma->available_count -= count;
816 		/*
817 		 * It's safe to drop the lock here. We've marked this region for
818 		 * our exclusive use. If the migration fails we will take the
819 		 * lock again and unmark it.
820 		 */
821 		spin_unlock_irq(&cma->lock);
822 
823 		pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
824 		mutex_lock(&cma->alloc_mutex);
825 		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, gfp);
826 		mutex_unlock(&cma->alloc_mutex);
827 		if (ret == 0) {
828 			page = pfn_to_page(pfn);
829 			break;
830 		}
831 
832 		cma_clear_bitmap(cma, cmr, pfn, count);
833 		if (ret != -EBUSY)
834 			break;
835 
836 		pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
837 			 __func__, pfn, pfn_to_page(pfn));
838 
839 		trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
840 					   count, align);
841 		/* try again with a bit different memory target */
842 		start = bitmap_no + mask + 1;
843 	}
844 out:
845 	*pagep = page;
846 	return ret;
847 }
848 
849 static struct page *__cma_alloc(struct cma *cma, unsigned long count,
850 		       unsigned int align, gfp_t gfp)
851 {
852 	struct page *page = NULL;
853 	int ret = -ENOMEM, r;
854 	unsigned long i;
855 	const char *name = cma ? cma->name : NULL;
856 
857 	trace_cma_alloc_start(name, count, align);
858 
859 	if (!cma || !cma->count)
860 		return page;
861 
862 	pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__,
863 		(void *)cma, cma->name, count, align);
864 
865 	if (!count)
866 		return page;
867 
868 	for (r = 0; r < cma->nranges; r++) {
869 		page = NULL;
870 
871 		ret = cma_range_alloc(cma, &cma->ranges[r], count, align,
872 				       &page, gfp);
873 		if (ret != -EBUSY || page)
874 			break;
875 	}
876 
877 	/*
878 	 * CMA can allocate multiple page blocks, which results in different
879 	 * blocks being marked with different tags. Reset the tags to ignore
880 	 * those page blocks.
881 	 */
882 	if (page) {
883 		for (i = 0; i < count; i++)
884 			page_kasan_tag_reset(nth_page(page, i));
885 	}
886 
887 	if (ret && !(gfp & __GFP_NOWARN)) {
888 		pr_err_ratelimited("%s: %s: alloc failed, req-size: %lu pages, ret: %d\n",
889 				   __func__, cma->name, count, ret);
890 		cma_debug_show_areas(cma);
891 	}
892 
893 	pr_debug("%s(): returned %p\n", __func__, page);
894 	trace_cma_alloc_finish(name, page ? page_to_pfn(page) : 0,
895 			       page, count, align, ret);
896 	if (page) {
897 		count_vm_event(CMA_ALLOC_SUCCESS);
898 		cma_sysfs_account_success_pages(cma, count);
899 	} else {
900 		count_vm_event(CMA_ALLOC_FAIL);
901 		cma_sysfs_account_fail_pages(cma, count);
902 	}
903 
904 	return page;
905 }
906 
907 /**
908  * cma_alloc() - allocate pages from contiguous area
909  * @cma:   Contiguous memory region for which the allocation is performed.
910  * @count: Requested number of pages.
911  * @align: Requested alignment of pages (in PAGE_SIZE order).
912  * @no_warn: Avoid printing message about failed allocation
913  *
914  * This function allocates part of contiguous memory on specific
915  * contiguous memory area.
916  */
917 struct page *cma_alloc(struct cma *cma, unsigned long count,
918 		       unsigned int align, bool no_warn)
919 {
920 	return __cma_alloc(cma, count, align, GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
921 }
922 
923 struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
924 {
925 	struct page *page;
926 
927 	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
928 		return NULL;
929 
930 	page = __cma_alloc(cma, 1 << order, order, gfp);
931 
932 	return page ? page_folio(page) : NULL;
933 }
934 
935 bool cma_pages_valid(struct cma *cma, const struct page *pages,
936 		     unsigned long count)
937 {
938 	unsigned long pfn, end;
939 	int r;
940 	struct cma_memrange *cmr;
941 	bool ret;
942 
943 	if (!cma || !pages || count > cma->count)
944 		return false;
945 
946 	pfn = page_to_pfn(pages);
947 	ret = false;
948 
949 	for (r = 0; r < cma->nranges; r++) {
950 		cmr = &cma->ranges[r];
951 		end = cmr->base_pfn + cmr->count;
952 		if (pfn >= cmr->base_pfn && pfn < end) {
953 			ret = pfn + count <= end;
954 			break;
955 		}
956 	}
957 
958 	if (!ret)
959 		pr_debug("%s(page %p, count %lu)\n",
960 				__func__, (void *)pages, count);
961 
962 	return ret;
963 }
964 
965 /**
966  * cma_release() - release allocated pages
967  * @cma:   Contiguous memory region for which the allocation is performed.
968  * @pages: Allocated pages.
969  * @count: Number of allocated pages.
970  *
971  * This function releases memory allocated by cma_alloc().
972  * It returns false when provided pages do not belong to contiguous area and
973  * true otherwise.
974  */
975 bool cma_release(struct cma *cma, const struct page *pages,
976 		 unsigned long count)
977 {
978 	struct cma_memrange *cmr;
979 	unsigned long pfn, end_pfn;
980 	int r;
981 
982 	pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count);
983 
984 	if (!cma_pages_valid(cma, pages, count))
985 		return false;
986 
987 	pfn = page_to_pfn(pages);
988 	end_pfn = pfn + count;
989 
990 	for (r = 0; r < cma->nranges; r++) {
991 		cmr = &cma->ranges[r];
992 		if (pfn >= cmr->base_pfn &&
993 		    pfn < (cmr->base_pfn + cmr->count)) {
994 			VM_BUG_ON(end_pfn > cmr->base_pfn + cmr->count);
995 			break;
996 		}
997 	}
998 
999 	if (r == cma->nranges)
1000 		return false;
1001 
1002 	free_contig_range(pfn, count);
1003 	cma_clear_bitmap(cma, cmr, pfn, count);
1004 	cma_sysfs_account_release_pages(cma, count);
1005 	trace_cma_release(cma->name, pfn, pages, count);
1006 
1007 	return true;
1008 }
1009 
1010 bool cma_free_folio(struct cma *cma, const struct folio *folio)
1011 {
1012 	if (WARN_ON(!folio_test_large(folio)))
1013 		return false;
1014 
1015 	return cma_release(cma, &folio->page, folio_nr_pages(folio));
1016 }
1017 
1018 int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
1019 {
1020 	int i;
1021 
1022 	for (i = 0; i < cma_area_count; i++) {
1023 		int ret = it(&cma_areas[i], data);
1024 
1025 		if (ret)
1026 			return ret;
1027 	}
1028 
1029 	return 0;
1030 }
1031 
1032 bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end)
1033 {
1034 	int r;
1035 	struct cma_memrange *cmr;
1036 	unsigned long rstart, rend;
1037 
1038 	for (r = 0; r < cma->nranges; r++) {
1039 		cmr = &cma->ranges[r];
1040 
1041 		rstart = PFN_PHYS(cmr->base_pfn);
1042 		rend = PFN_PHYS(cmr->base_pfn + cmr->count);
1043 		if (end < rstart)
1044 			continue;
1045 		if (start >= rend)
1046 			continue;
1047 		return true;
1048 	}
1049 
1050 	return false;
1051 }
1052 
1053 /*
1054  * Very basic function to reserve memory from a CMA area that has not
1055  * yet been activated. This is expected to be called early, when the
1056  * system is single-threaded, so there is no locking. The alignment
1057  * checking is restrictive - only pageblock-aligned areas
1058  * (CMA_MIN_ALIGNMENT_BYTES) may be reserved through this function.
1059  * This keeps things simple, and is enough for the current use case.
1060  *
1061  * The CMA bitmaps have not yet been allocated, so just start
1062  * reserving from the bottom up, using a PFN to keep track
1063  * of what has been reserved. Unreserving is not possible.
1064  *
1065  * The caller is responsible for initializing the page structures
1066  * in the area properly, since this just points to memblock-allocated
1067  * memory. The caller should subsequently use init_cma_pageblock to
1068  * set the migrate type and CMA stats  the pageblocks that were reserved.
1069  *
1070  * If the CMA area fails to activate later, memory obtained through
1071  * this interface is not handed to the page allocator, this is
1072  * the responsibility of the caller (e.g. like normal memblock-allocated
1073  * memory).
1074  */
1075 void __init *cma_reserve_early(struct cma *cma, unsigned long size)
1076 {
1077 	int r;
1078 	struct cma_memrange *cmr;
1079 	unsigned long available;
1080 	void *ret = NULL;
1081 
1082 	if (!cma || !cma->count)
1083 		return NULL;
1084 	/*
1085 	 * Can only be called early in init.
1086 	 */
1087 	if (test_bit(CMA_ACTIVATED, &cma->flags))
1088 		return NULL;
1089 
1090 	if (!IS_ALIGNED(size, CMA_MIN_ALIGNMENT_BYTES))
1091 		return NULL;
1092 
1093 	if (!IS_ALIGNED(size, (PAGE_SIZE << cma->order_per_bit)))
1094 		return NULL;
1095 
1096 	size >>= PAGE_SHIFT;
1097 
1098 	if (size > cma->available_count)
1099 		return NULL;
1100 
1101 	for (r = 0; r < cma->nranges; r++) {
1102 		cmr = &cma->ranges[r];
1103 		available = cmr->count - (cmr->early_pfn - cmr->base_pfn);
1104 		if (size <= available) {
1105 			ret = phys_to_virt(PFN_PHYS(cmr->early_pfn));
1106 			cmr->early_pfn += size;
1107 			cma->available_count -= size;
1108 			return ret;
1109 		}
1110 	}
1111 
1112 	return ret;
1113 }
1114