xref: /linux/mm/cma.c (revision 7a9b709e7cc5ce1ffb84ce07bf6d157e1de758df)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Contiguous Memory Allocator
4  *
5  * Copyright (c) 2010-2011 by Samsung Electronics.
6  * Copyright IBM Corporation, 2013
7  * Copyright LG Electronics Inc., 2014
8  * Written by:
9  *	Marek Szyprowski <m.szyprowski@samsung.com>
10  *	Michal Nazarewicz <mina86@mina86.com>
11  *	Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
12  *	Joonsoo Kim <iamjoonsoo.kim@lge.com>
13  */
14 
15 #define pr_fmt(fmt) "cma: " fmt
16 
17 #define CREATE_TRACE_POINTS
18 
19 #include <linux/memblock.h>
20 #include <linux/err.h>
21 #include <linux/list.h>
22 #include <linux/mm.h>
23 #include <linux/sizes.h>
24 #include <linux/slab.h>
25 #include <linux/log2.h>
26 #include <linux/cma.h>
27 #include <linux/highmem.h>
28 #include <linux/io.h>
29 #include <linux/kmemleak.h>
30 #include <trace/events/cma.h>
31 
32 #include "internal.h"
33 #include "cma.h"
34 
35 struct cma cma_areas[MAX_CMA_AREAS];
36 unsigned int cma_area_count;
37 
38 static int __init __cma_declare_contiguous_nid(phys_addr_t *basep,
39 			phys_addr_t size, phys_addr_t limit,
40 			phys_addr_t alignment, unsigned int order_per_bit,
41 			bool fixed, const char *name, struct cma **res_cma,
42 			int nid);
43 
44 phys_addr_t cma_get_base(const struct cma *cma)
45 {
46 	WARN_ON_ONCE(cma->nranges != 1);
47 	return PFN_PHYS(cma->ranges[0].base_pfn);
48 }
49 
50 unsigned long cma_get_size(const struct cma *cma)
51 {
52 	return cma->count << PAGE_SHIFT;
53 }
54 
55 const char *cma_get_name(const struct cma *cma)
56 {
57 	return cma->name;
58 }
59 
60 static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
61 					     unsigned int align_order)
62 {
63 	if (align_order <= cma->order_per_bit)
64 		return 0;
65 	return (1UL << (align_order - cma->order_per_bit)) - 1;
66 }
67 
68 /*
69  * Find the offset of the base PFN from the specified align_order.
70  * The value returned is represented in order_per_bits.
71  */
72 static unsigned long cma_bitmap_aligned_offset(const struct cma *cma,
73 					       const struct cma_memrange *cmr,
74 					       unsigned int align_order)
75 {
76 	return (cmr->base_pfn & ((1UL << align_order) - 1))
77 		>> cma->order_per_bit;
78 }
79 
80 static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma,
81 					      unsigned long pages)
82 {
83 	return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
84 }
85 
86 static void cma_clear_bitmap(struct cma *cma, const struct cma_memrange *cmr,
87 			     unsigned long pfn, unsigned long count)
88 {
89 	unsigned long bitmap_no, bitmap_count;
90 	unsigned long flags;
91 
92 	bitmap_no = (pfn - cmr->base_pfn) >> cma->order_per_bit;
93 	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
94 
95 	spin_lock_irqsave(&cma->lock, flags);
96 	bitmap_clear(cmr->bitmap, bitmap_no, bitmap_count);
97 	cma->available_count += count;
98 	spin_unlock_irqrestore(&cma->lock, flags);
99 }
100 
101 /*
102  * Check if a CMA area contains no ranges that intersect with
103  * multiple zones. Store the result in the flags in case
104  * this gets called more than once.
105  */
106 bool cma_validate_zones(struct cma *cma)
107 {
108 	int r;
109 	unsigned long base_pfn;
110 	struct cma_memrange *cmr;
111 	bool valid_bit_set;
112 
113 	/*
114 	 * If already validated, return result of previous check.
115 	 * Either the valid or invalid bit will be set if this
116 	 * check has already been done. If neither is set, the
117 	 * check has not been performed yet.
118 	 */
119 	valid_bit_set = test_bit(CMA_ZONES_VALID, &cma->flags);
120 	if (valid_bit_set || test_bit(CMA_ZONES_INVALID, &cma->flags))
121 		return valid_bit_set;
122 
123 	for (r = 0; r < cma->nranges; r++) {
124 		cmr = &cma->ranges[r];
125 		base_pfn = cmr->base_pfn;
126 
127 		/*
128 		 * alloc_contig_range() requires the pfn range specified
129 		 * to be in the same zone. Simplify by forcing the entire
130 		 * CMA resv range to be in the same zone.
131 		 */
132 		WARN_ON_ONCE(!pfn_valid(base_pfn));
133 		if (pfn_range_intersects_zones(cma->nid, base_pfn, cmr->count)) {
134 			set_bit(CMA_ZONES_INVALID, &cma->flags);
135 			return false;
136 		}
137 	}
138 
139 	set_bit(CMA_ZONES_VALID, &cma->flags);
140 
141 	return true;
142 }
143 
144 static void __init cma_activate_area(struct cma *cma)
145 {
146 	unsigned long pfn, end_pfn;
147 	int allocrange, r;
148 	struct cma_memrange *cmr;
149 	unsigned long bitmap_count, count;
150 
151 	for (allocrange = 0; allocrange < cma->nranges; allocrange++) {
152 		cmr = &cma->ranges[allocrange];
153 		cmr->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma, cmr),
154 					    GFP_KERNEL);
155 		if (!cmr->bitmap)
156 			goto cleanup;
157 	}
158 
159 	if (!cma_validate_zones(cma))
160 		goto cleanup;
161 
162 	for (r = 0; r < cma->nranges; r++) {
163 		cmr = &cma->ranges[r];
164 		if (cmr->early_pfn != cmr->base_pfn) {
165 			count = cmr->early_pfn - cmr->base_pfn;
166 			bitmap_count = cma_bitmap_pages_to_bits(cma, count);
167 			bitmap_set(cmr->bitmap, 0, bitmap_count);
168 		}
169 
170 		for (pfn = cmr->early_pfn; pfn < cmr->base_pfn + cmr->count;
171 		     pfn += pageblock_nr_pages)
172 			init_cma_reserved_pageblock(pfn_to_page(pfn));
173 	}
174 
175 	spin_lock_init(&cma->lock);
176 
177 	mutex_init(&cma->alloc_mutex);
178 
179 #ifdef CONFIG_CMA_DEBUGFS
180 	INIT_HLIST_HEAD(&cma->mem_head);
181 	spin_lock_init(&cma->mem_head_lock);
182 #endif
183 	set_bit(CMA_ACTIVATED, &cma->flags);
184 
185 	return;
186 
187 cleanup:
188 	for (r = 0; r < allocrange; r++)
189 		bitmap_free(cma->ranges[r].bitmap);
190 
191 	/* Expose all pages to the buddy, they are useless for CMA. */
192 	if (!test_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags)) {
193 		for (r = 0; r < allocrange; r++) {
194 			cmr = &cma->ranges[r];
195 			end_pfn = cmr->base_pfn + cmr->count;
196 			for (pfn = cmr->early_pfn; pfn < end_pfn; pfn++)
197 				free_reserved_page(pfn_to_page(pfn));
198 		}
199 	}
200 	totalcma_pages -= cma->count;
201 	cma->available_count = cma->count = 0;
202 	pr_err("CMA area %s could not be activated\n", cma->name);
203 }
204 
205 static int __init cma_init_reserved_areas(void)
206 {
207 	int i;
208 
209 	for (i = 0; i < cma_area_count; i++)
210 		cma_activate_area(&cma_areas[i]);
211 
212 	return 0;
213 }
214 core_initcall(cma_init_reserved_areas);
215 
216 void __init cma_reserve_pages_on_error(struct cma *cma)
217 {
218 	set_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags);
219 }
220 
221 static int __init cma_new_area(const char *name, phys_addr_t size,
222 			       unsigned int order_per_bit,
223 			       struct cma **res_cma)
224 {
225 	struct cma *cma;
226 
227 	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
228 		pr_err("Not enough slots for CMA reserved regions!\n");
229 		return -ENOSPC;
230 	}
231 
232 	/*
233 	 * Each reserved area must be initialised later, when more kernel
234 	 * subsystems (like slab allocator) are available.
235 	 */
236 	cma = &cma_areas[cma_area_count];
237 	cma_area_count++;
238 
239 	if (name)
240 		snprintf(cma->name, CMA_MAX_NAME, "%s", name);
241 	else
242 		snprintf(cma->name, CMA_MAX_NAME,  "cma%d\n", cma_area_count);
243 
244 	cma->available_count = cma->count = size >> PAGE_SHIFT;
245 	cma->order_per_bit = order_per_bit;
246 	*res_cma = cma;
247 	totalcma_pages += cma->count;
248 
249 	return 0;
250 }
251 
252 static void __init cma_drop_area(struct cma *cma)
253 {
254 	totalcma_pages -= cma->count;
255 	cma_area_count--;
256 }
257 
258 /**
259  * cma_init_reserved_mem() - create custom contiguous area from reserved memory
260  * @base: Base address of the reserved area
261  * @size: Size of the reserved area (in bytes),
262  * @order_per_bit: Order of pages represented by one bit on bitmap.
263  * @name: The name of the area. If this parameter is NULL, the name of
264  *        the area will be set to "cmaN", where N is a running counter of
265  *        used areas.
266  * @res_cma: Pointer to store the created cma region.
267  *
268  * This function creates custom contiguous area from already reserved memory.
269  */
270 int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
271 				 unsigned int order_per_bit,
272 				 const char *name,
273 				 struct cma **res_cma)
274 {
275 	struct cma *cma;
276 	int ret;
277 
278 	/* Sanity checks */
279 	if (!size || !memblock_is_region_reserved(base, size))
280 		return -EINVAL;
281 
282 	/*
283 	 * CMA uses CMA_MIN_ALIGNMENT_BYTES as alignment requirement which
284 	 * needs pageblock_order to be initialized. Let's enforce it.
285 	 */
286 	if (!pageblock_order) {
287 		pr_err("pageblock_order not yet initialized. Called during early boot?\n");
288 		return -EINVAL;
289 	}
290 
291 	/* ensure minimal alignment required by mm core */
292 	if (!IS_ALIGNED(base | size, CMA_MIN_ALIGNMENT_BYTES))
293 		return -EINVAL;
294 
295 	ret = cma_new_area(name, size, order_per_bit, &cma);
296 	if (ret != 0)
297 		return ret;
298 
299 	cma->ranges[0].base_pfn = PFN_DOWN(base);
300 	cma->ranges[0].early_pfn = PFN_DOWN(base);
301 	cma->ranges[0].count = cma->count;
302 	cma->nranges = 1;
303 	cma->nid = NUMA_NO_NODE;
304 
305 	*res_cma = cma;
306 
307 	return 0;
308 }
309 
310 /*
311  * Structure used while walking physical memory ranges and finding out
312  * which one(s) to use for a CMA area.
313  */
314 struct cma_init_memrange {
315 	phys_addr_t base;
316 	phys_addr_t size;
317 	struct list_head list;
318 };
319 
320 /*
321  * Work array used during CMA initialization.
322  */
323 static struct cma_init_memrange memranges[CMA_MAX_RANGES] __initdata;
324 
325 static bool __init revsizecmp(struct cma_init_memrange *mlp,
326 			      struct cma_init_memrange *mrp)
327 {
328 	return mlp->size > mrp->size;
329 }
330 
331 static bool __init basecmp(struct cma_init_memrange *mlp,
332 			   struct cma_init_memrange *mrp)
333 {
334 	return mlp->base < mrp->base;
335 }
336 
337 /*
338  * Helper function to create sorted lists.
339  */
340 static void __init list_insert_sorted(
341 	struct list_head *ranges,
342 	struct cma_init_memrange *mrp,
343 	bool (*cmp)(struct cma_init_memrange *lh, struct cma_init_memrange *rh))
344 {
345 	struct list_head *mp;
346 	struct cma_init_memrange *mlp;
347 
348 	if (list_empty(ranges))
349 		list_add(&mrp->list, ranges);
350 	else {
351 		list_for_each(mp, ranges) {
352 			mlp = list_entry(mp, struct cma_init_memrange, list);
353 			if (cmp(mlp, mrp))
354 				break;
355 		}
356 		__list_add(&mrp->list, mlp->list.prev, &mlp->list);
357 	}
358 }
359 
360 /*
361  * Create CMA areas with a total size of @total_size. A normal allocation
362  * for one area is tried first. If that fails, the biggest memblock
363  * ranges above 4G are selected, and allocated bottom up.
364  *
365  * The complexity here is not great, but this function will only be
366  * called during boot, and the lists operated on have fewer than
367  * CMA_MAX_RANGES elements (default value: 8).
368  */
369 int __init cma_declare_contiguous_multi(phys_addr_t total_size,
370 			phys_addr_t align, unsigned int order_per_bit,
371 			const char *name, struct cma **res_cma, int nid)
372 {
373 	phys_addr_t start = 0, end;
374 	phys_addr_t size, sizesum, sizeleft;
375 	struct cma_init_memrange *mrp, *mlp, *failed;
376 	struct cma_memrange *cmrp;
377 	LIST_HEAD(ranges);
378 	LIST_HEAD(final_ranges);
379 	struct list_head *mp, *next;
380 	int ret, nr = 1;
381 	u64 i;
382 	struct cma *cma;
383 
384 	/*
385 	 * First, try it the normal way, producing just one range.
386 	 */
387 	ret = __cma_declare_contiguous_nid(&start, total_size, 0, align,
388 			order_per_bit, false, name, res_cma, nid);
389 	if (ret != -ENOMEM)
390 		goto out;
391 
392 	/*
393 	 * Couldn't find one range that fits our needs, so try multiple
394 	 * ranges.
395 	 *
396 	 * No need to do the alignment checks here, the call to
397 	 * cma_declare_contiguous_nid above would have caught
398 	 * any issues. With the checks, we know that:
399 	 *
400 	 * - @align is a power of 2
401 	 * - @align is >= pageblock alignment
402 	 * - @size is aligned to @align and to @order_per_bit
403 	 *
404 	 * So, as long as we create ranges that have a base
405 	 * aligned to @align, and a size that is aligned to
406 	 * both @align and @order_to_bit, things will work out.
407 	 */
408 	nr = 0;
409 	sizesum = 0;
410 	failed = NULL;
411 
412 	ret = cma_new_area(name, total_size, order_per_bit, &cma);
413 	if (ret != 0)
414 		goto out;
415 
416 	align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES);
417 	/*
418 	 * Create a list of ranges above 4G, largest range first.
419 	 */
420 	for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) {
421 		if (upper_32_bits(start) == 0)
422 			continue;
423 
424 		start = ALIGN(start, align);
425 		if (start >= end)
426 			continue;
427 
428 		end = ALIGN_DOWN(end, align);
429 		if (end <= start)
430 			continue;
431 
432 		size = end - start;
433 		size = ALIGN_DOWN(size, (PAGE_SIZE << order_per_bit));
434 		if (!size)
435 			continue;
436 		sizesum += size;
437 
438 		pr_debug("consider %016llx - %016llx\n", (u64)start, (u64)end);
439 
440 		/*
441 		 * If we don't yet have used the maximum number of
442 		 * areas, grab a new one.
443 		 *
444 		 * If we can't use anymore, see if this range is not
445 		 * smaller than the smallest one already recorded. If
446 		 * not, re-use the smallest element.
447 		 */
448 		if (nr < CMA_MAX_RANGES)
449 			mrp = &memranges[nr++];
450 		else {
451 			mrp = list_last_entry(&ranges,
452 					      struct cma_init_memrange, list);
453 			if (size < mrp->size)
454 				continue;
455 			list_del(&mrp->list);
456 			sizesum -= mrp->size;
457 			pr_debug("deleted %016llx - %016llx from the list\n",
458 				(u64)mrp->base, (u64)mrp->base + size);
459 		}
460 		mrp->base = start;
461 		mrp->size = size;
462 
463 		/*
464 		 * Now do a sorted insert.
465 		 */
466 		list_insert_sorted(&ranges, mrp, revsizecmp);
467 		pr_debug("added %016llx - %016llx to the list\n",
468 		    (u64)mrp->base, (u64)mrp->base + size);
469 		pr_debug("total size now %llu\n", (u64)sizesum);
470 	}
471 
472 	/*
473 	 * There is not enough room in the CMA_MAX_RANGES largest
474 	 * ranges, so bail out.
475 	 */
476 	if (sizesum < total_size) {
477 		cma_drop_area(cma);
478 		ret = -ENOMEM;
479 		goto out;
480 	}
481 
482 	/*
483 	 * Found ranges that provide enough combined space.
484 	 * Now, sorted them by address, smallest first, because we
485 	 * want to mimic a bottom-up memblock allocation.
486 	 */
487 	sizesum = 0;
488 	list_for_each_safe(mp, next, &ranges) {
489 		mlp = list_entry(mp, struct cma_init_memrange, list);
490 		list_del(mp);
491 		list_insert_sorted(&final_ranges, mlp, basecmp);
492 		sizesum += mlp->size;
493 		if (sizesum >= total_size)
494 			break;
495 	}
496 
497 	/*
498 	 * Walk the final list, and add a CMA range for
499 	 * each range, possibly not using the last one fully.
500 	 */
501 	nr = 0;
502 	sizeleft = total_size;
503 	list_for_each(mp, &final_ranges) {
504 		mlp = list_entry(mp, struct cma_init_memrange, list);
505 		size = min(sizeleft, mlp->size);
506 		if (memblock_reserve(mlp->base, size)) {
507 			/*
508 			 * Unexpected error. Could go on to
509 			 * the next one, but just abort to
510 			 * be safe.
511 			 */
512 			failed = mlp;
513 			break;
514 		}
515 
516 		pr_debug("created region %d: %016llx - %016llx\n",
517 		    nr, (u64)mlp->base, (u64)mlp->base + size);
518 		cmrp = &cma->ranges[nr++];
519 		cmrp->base_pfn = PHYS_PFN(mlp->base);
520 		cmrp->early_pfn = cmrp->base_pfn;
521 		cmrp->count = size >> PAGE_SHIFT;
522 
523 		sizeleft -= size;
524 		if (sizeleft == 0)
525 			break;
526 	}
527 
528 	if (failed) {
529 		list_for_each(mp, &final_ranges) {
530 			mlp = list_entry(mp, struct cma_init_memrange, list);
531 			if (mlp == failed)
532 				break;
533 			memblock_phys_free(mlp->base, mlp->size);
534 		}
535 		cma_drop_area(cma);
536 		ret = -ENOMEM;
537 		goto out;
538 	}
539 
540 	cma->nranges = nr;
541 	cma->nid = nid;
542 	*res_cma = cma;
543 
544 out:
545 	if (ret != 0)
546 		pr_err("Failed to reserve %lu MiB\n",
547 			(unsigned long)total_size / SZ_1M);
548 	else
549 		pr_info("Reserved %lu MiB in %d range%s\n",
550 			(unsigned long)total_size / SZ_1M, nr,
551 			nr > 1 ? "s" : "");
552 	return ret;
553 }
554 
555 /**
556  * cma_declare_contiguous_nid() - reserve custom contiguous area
557  * @base: Base address of the reserved area optional, use 0 for any
558  * @size: Size of the reserved area (in bytes),
559  * @limit: End address of the reserved memory (optional, 0 for any).
560  * @alignment: Alignment for the CMA area, should be power of 2 or zero
561  * @order_per_bit: Order of pages represented by one bit on bitmap.
562  * @fixed: hint about where to place the reserved area
563  * @name: The name of the area. See function cma_init_reserved_mem()
564  * @res_cma: Pointer to store the created cma region.
565  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
566  *
567  * This function reserves memory from early allocator. It should be
568  * called by arch specific code once the early allocator (memblock or bootmem)
569  * has been activated and all other subsystems have already allocated/reserved
570  * memory. This function allows to create custom reserved areas.
571  *
572  * If @fixed is true, reserve contiguous area at exactly @base.  If false,
573  * reserve in range from @base to @limit.
574  */
575 int __init cma_declare_contiguous_nid(phys_addr_t base,
576 			phys_addr_t size, phys_addr_t limit,
577 			phys_addr_t alignment, unsigned int order_per_bit,
578 			bool fixed, const char *name, struct cma **res_cma,
579 			int nid)
580 {
581 	int ret;
582 
583 	ret = __cma_declare_contiguous_nid(&base, size, limit, alignment,
584 			order_per_bit, fixed, name, res_cma, nid);
585 	if (ret != 0)
586 		pr_err("Failed to reserve %ld MiB\n",
587 				(unsigned long)size / SZ_1M);
588 	else
589 		pr_info("Reserved %ld MiB at %pa\n",
590 				(unsigned long)size / SZ_1M, &base);
591 
592 	return ret;
593 }
594 
595 static int __init __cma_declare_contiguous_nid(phys_addr_t *basep,
596 			phys_addr_t size, phys_addr_t limit,
597 			phys_addr_t alignment, unsigned int order_per_bit,
598 			bool fixed, const char *name, struct cma **res_cma,
599 			int nid)
600 {
601 	phys_addr_t memblock_end = memblock_end_of_DRAM();
602 	phys_addr_t highmem_start, base = *basep;
603 	int ret;
604 
605 	/*
606 	 * We can't use __pa(high_memory) directly, since high_memory
607 	 * isn't a valid direct map VA, and DEBUG_VIRTUAL will (validly)
608 	 * complain. Find the boundary by adding one to the last valid
609 	 * address.
610 	 */
611 	highmem_start = __pa(high_memory - 1) + 1;
612 	pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n",
613 		__func__, &size, &base, &limit, &alignment);
614 
615 	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
616 		pr_err("Not enough slots for CMA reserved regions!\n");
617 		return -ENOSPC;
618 	}
619 
620 	if (!size)
621 		return -EINVAL;
622 
623 	if (alignment && !is_power_of_2(alignment))
624 		return -EINVAL;
625 
626 	if (!IS_ENABLED(CONFIG_NUMA))
627 		nid = NUMA_NO_NODE;
628 
629 	/* Sanitise input arguments. */
630 	alignment = max_t(phys_addr_t, alignment, CMA_MIN_ALIGNMENT_BYTES);
631 	if (fixed && base & (alignment - 1)) {
632 		pr_err("Region at %pa must be aligned to %pa bytes\n",
633 			&base, &alignment);
634 		return -EINVAL;
635 	}
636 	base = ALIGN(base, alignment);
637 	size = ALIGN(size, alignment);
638 	limit &= ~(alignment - 1);
639 
640 	if (!base)
641 		fixed = false;
642 
643 	/* size should be aligned with order_per_bit */
644 	if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
645 		return -EINVAL;
646 
647 	/*
648 	 * If allocating at a fixed base the request region must not cross the
649 	 * low/high memory boundary.
650 	 */
651 	if (fixed && base < highmem_start && base + size > highmem_start) {
652 		pr_err("Region at %pa defined on low/high memory boundary (%pa)\n",
653 			&base, &highmem_start);
654 		return -EINVAL;
655 	}
656 
657 	/*
658 	 * If the limit is unspecified or above the memblock end, its effective
659 	 * value will be the memblock end. Set it explicitly to simplify further
660 	 * checks.
661 	 */
662 	if (limit == 0 || limit > memblock_end)
663 		limit = memblock_end;
664 
665 	if (base + size > limit) {
666 		pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n",
667 			&size, &base, &limit);
668 		return -EINVAL;
669 	}
670 
671 	/* Reserve memory */
672 	if (fixed) {
673 		if (memblock_is_region_reserved(base, size) ||
674 		    memblock_reserve(base, size) < 0) {
675 			return -EBUSY;
676 		}
677 	} else {
678 		phys_addr_t addr = 0;
679 
680 		/*
681 		 * If there is enough memory, try a bottom-up allocation first.
682 		 * It will place the new cma area close to the start of the node
683 		 * and guarantee that the compaction is moving pages out of the
684 		 * cma area and not into it.
685 		 * Avoid using first 4GB to not interfere with constrained zones
686 		 * like DMA/DMA32.
687 		 */
688 #ifdef CONFIG_PHYS_ADDR_T_64BIT
689 		if (!memblock_bottom_up() && memblock_end >= SZ_4G + size) {
690 			memblock_set_bottom_up(true);
691 			addr = memblock_alloc_range_nid(size, alignment, SZ_4G,
692 							limit, nid, true);
693 			memblock_set_bottom_up(false);
694 		}
695 #endif
696 
697 		/*
698 		 * All pages in the reserved area must come from the same zone.
699 		 * If the requested region crosses the low/high memory boundary,
700 		 * try allocating from high memory first and fall back to low
701 		 * memory in case of failure.
702 		 */
703 		if (!addr && base < highmem_start && limit > highmem_start) {
704 			addr = memblock_alloc_range_nid(size, alignment,
705 					highmem_start, limit, nid, true);
706 			limit = highmem_start;
707 		}
708 
709 		if (!addr) {
710 			addr = memblock_alloc_range_nid(size, alignment, base,
711 					limit, nid, true);
712 			if (!addr)
713 				return -ENOMEM;
714 		}
715 
716 		/*
717 		 * kmemleak scans/reads tracked objects for pointers to other
718 		 * objects but this address isn't mapped and accessible
719 		 */
720 		kmemleak_ignore_phys(addr);
721 		base = addr;
722 	}
723 
724 	ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma);
725 	if (ret) {
726 		memblock_phys_free(base, size);
727 		return ret;
728 	}
729 
730 	(*res_cma)->nid = nid;
731 	*basep = base;
732 
733 	return 0;
734 }
735 
736 static void cma_debug_show_areas(struct cma *cma)
737 {
738 	unsigned long next_zero_bit, next_set_bit, nr_zero;
739 	unsigned long start;
740 	unsigned long nr_part;
741 	unsigned long nbits;
742 	int r;
743 	struct cma_memrange *cmr;
744 
745 	spin_lock_irq(&cma->lock);
746 	pr_info("number of available pages: ");
747 	for (r = 0; r < cma->nranges; r++) {
748 		cmr = &cma->ranges[r];
749 
750 		start = 0;
751 		nbits = cma_bitmap_maxno(cma, cmr);
752 
753 		pr_info("range %d: ", r);
754 		for (;;) {
755 			next_zero_bit = find_next_zero_bit(cmr->bitmap,
756 							   nbits, start);
757 			if (next_zero_bit >= nbits)
758 				break;
759 			next_set_bit = find_next_bit(cmr->bitmap, nbits,
760 						     next_zero_bit);
761 			nr_zero = next_set_bit - next_zero_bit;
762 			nr_part = nr_zero << cma->order_per_bit;
763 			pr_cont("%s%lu@%lu", start ? "+" : "", nr_part,
764 				next_zero_bit);
765 			start = next_zero_bit + nr_zero;
766 		}
767 		pr_info("\n");
768 	}
769 	pr_cont("=> %lu free of %lu total pages\n", cma->available_count,
770 			cma->count);
771 	spin_unlock_irq(&cma->lock);
772 }
773 
774 static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
775 				unsigned long count, unsigned int align,
776 				struct page **pagep, gfp_t gfp)
777 {
778 	unsigned long mask, offset;
779 	unsigned long pfn = -1;
780 	unsigned long start = 0;
781 	unsigned long bitmap_maxno, bitmap_no, bitmap_count;
782 	int ret = -EBUSY;
783 	struct page *page = NULL;
784 
785 	mask = cma_bitmap_aligned_mask(cma, align);
786 	offset = cma_bitmap_aligned_offset(cma, cmr, align);
787 	bitmap_maxno = cma_bitmap_maxno(cma, cmr);
788 	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
789 
790 	if (bitmap_count > bitmap_maxno)
791 		goto out;
792 
793 	for (;;) {
794 		spin_lock_irq(&cma->lock);
795 		/*
796 		 * If the request is larger than the available number
797 		 * of pages, stop right away.
798 		 */
799 		if (count > cma->available_count) {
800 			spin_unlock_irq(&cma->lock);
801 			break;
802 		}
803 		bitmap_no = bitmap_find_next_zero_area_off(cmr->bitmap,
804 				bitmap_maxno, start, bitmap_count, mask,
805 				offset);
806 		if (bitmap_no >= bitmap_maxno) {
807 			spin_unlock_irq(&cma->lock);
808 			break;
809 		}
810 		bitmap_set(cmr->bitmap, bitmap_no, bitmap_count);
811 		cma->available_count -= count;
812 		/*
813 		 * It's safe to drop the lock here. We've marked this region for
814 		 * our exclusive use. If the migration fails we will take the
815 		 * lock again and unmark it.
816 		 */
817 		spin_unlock_irq(&cma->lock);
818 
819 		pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
820 		mutex_lock(&cma->alloc_mutex);
821 		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, gfp);
822 		mutex_unlock(&cma->alloc_mutex);
823 		if (ret == 0) {
824 			page = pfn_to_page(pfn);
825 			break;
826 		}
827 
828 		cma_clear_bitmap(cma, cmr, pfn, count);
829 		if (ret != -EBUSY)
830 			break;
831 
832 		pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
833 			 __func__, pfn, pfn_to_page(pfn));
834 
835 		trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
836 					   count, align);
837 		/* try again with a bit different memory target */
838 		start = bitmap_no + mask + 1;
839 	}
840 out:
841 	*pagep = page;
842 	return ret;
843 }
844 
845 static struct page *__cma_alloc(struct cma *cma, unsigned long count,
846 		       unsigned int align, gfp_t gfp)
847 {
848 	struct page *page = NULL;
849 	int ret = -ENOMEM, r;
850 	unsigned long i;
851 	const char *name = cma ? cma->name : NULL;
852 
853 	trace_cma_alloc_start(name, count, align);
854 
855 	if (!cma || !cma->count)
856 		return page;
857 
858 	pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__,
859 		(void *)cma, cma->name, count, align);
860 
861 	if (!count)
862 		return page;
863 
864 	for (r = 0; r < cma->nranges; r++) {
865 		page = NULL;
866 
867 		ret = cma_range_alloc(cma, &cma->ranges[r], count, align,
868 				       &page, gfp);
869 		if (ret != -EBUSY || page)
870 			break;
871 	}
872 
873 	/*
874 	 * CMA can allocate multiple page blocks, which results in different
875 	 * blocks being marked with different tags. Reset the tags to ignore
876 	 * those page blocks.
877 	 */
878 	if (page) {
879 		for (i = 0; i < count; i++)
880 			page_kasan_tag_reset(nth_page(page, i));
881 	}
882 
883 	if (ret && !(gfp & __GFP_NOWARN)) {
884 		pr_err_ratelimited("%s: %s: alloc failed, req-size: %lu pages, ret: %d\n",
885 				   __func__, cma->name, count, ret);
886 		cma_debug_show_areas(cma);
887 	}
888 
889 	pr_debug("%s(): returned %p\n", __func__, page);
890 	trace_cma_alloc_finish(name, page ? page_to_pfn(page) : 0,
891 			       page, count, align, ret);
892 	if (page) {
893 		count_vm_event(CMA_ALLOC_SUCCESS);
894 		cma_sysfs_account_success_pages(cma, count);
895 	} else {
896 		count_vm_event(CMA_ALLOC_FAIL);
897 		cma_sysfs_account_fail_pages(cma, count);
898 	}
899 
900 	return page;
901 }
902 
903 /**
904  * cma_alloc() - allocate pages from contiguous area
905  * @cma:   Contiguous memory region for which the allocation is performed.
906  * @count: Requested number of pages.
907  * @align: Requested alignment of pages (in PAGE_SIZE order).
908  * @no_warn: Avoid printing message about failed allocation
909  *
910  * This function allocates part of contiguous memory on specific
911  * contiguous memory area.
912  */
913 struct page *cma_alloc(struct cma *cma, unsigned long count,
914 		       unsigned int align, bool no_warn)
915 {
916 	return __cma_alloc(cma, count, align, GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
917 }
918 
919 struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
920 {
921 	struct page *page;
922 
923 	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
924 		return NULL;
925 
926 	page = __cma_alloc(cma, 1 << order, order, gfp);
927 
928 	return page ? page_folio(page) : NULL;
929 }
930 
931 bool cma_pages_valid(struct cma *cma, const struct page *pages,
932 		     unsigned long count)
933 {
934 	unsigned long pfn, end;
935 	int r;
936 	struct cma_memrange *cmr;
937 	bool ret;
938 
939 	if (!cma || !pages || count > cma->count)
940 		return false;
941 
942 	pfn = page_to_pfn(pages);
943 	ret = false;
944 
945 	for (r = 0; r < cma->nranges; r++) {
946 		cmr = &cma->ranges[r];
947 		end = cmr->base_pfn + cmr->count;
948 		if (pfn >= cmr->base_pfn && pfn < end) {
949 			ret = pfn + count <= end;
950 			break;
951 		}
952 	}
953 
954 	if (!ret)
955 		pr_debug("%s(page %p, count %lu)\n",
956 				__func__, (void *)pages, count);
957 
958 	return ret;
959 }
960 
961 /**
962  * cma_release() - release allocated pages
963  * @cma:   Contiguous memory region for which the allocation is performed.
964  * @pages: Allocated pages.
965  * @count: Number of allocated pages.
966  *
967  * This function releases memory allocated by cma_alloc().
968  * It returns false when provided pages do not belong to contiguous area and
969  * true otherwise.
970  */
971 bool cma_release(struct cma *cma, const struct page *pages,
972 		 unsigned long count)
973 {
974 	struct cma_memrange *cmr;
975 	unsigned long pfn, end_pfn;
976 	int r;
977 
978 	pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count);
979 
980 	if (!cma_pages_valid(cma, pages, count))
981 		return false;
982 
983 	pfn = page_to_pfn(pages);
984 	end_pfn = pfn + count;
985 
986 	for (r = 0; r < cma->nranges; r++) {
987 		cmr = &cma->ranges[r];
988 		if (pfn >= cmr->base_pfn &&
989 		    pfn < (cmr->base_pfn + cmr->count)) {
990 			VM_BUG_ON(end_pfn > cmr->base_pfn + cmr->count);
991 			break;
992 		}
993 	}
994 
995 	if (r == cma->nranges)
996 		return false;
997 
998 	free_contig_range(pfn, count);
999 	cma_clear_bitmap(cma, cmr, pfn, count);
1000 	cma_sysfs_account_release_pages(cma, count);
1001 	trace_cma_release(cma->name, pfn, pages, count);
1002 
1003 	return true;
1004 }
1005 
1006 bool cma_free_folio(struct cma *cma, const struct folio *folio)
1007 {
1008 	if (WARN_ON(!folio_test_large(folio)))
1009 		return false;
1010 
1011 	return cma_release(cma, &folio->page, folio_nr_pages(folio));
1012 }
1013 
1014 int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
1015 {
1016 	int i;
1017 
1018 	for (i = 0; i < cma_area_count; i++) {
1019 		int ret = it(&cma_areas[i], data);
1020 
1021 		if (ret)
1022 			return ret;
1023 	}
1024 
1025 	return 0;
1026 }
1027 
1028 bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end)
1029 {
1030 	int r;
1031 	struct cma_memrange *cmr;
1032 	unsigned long rstart, rend;
1033 
1034 	for (r = 0; r < cma->nranges; r++) {
1035 		cmr = &cma->ranges[r];
1036 
1037 		rstart = PFN_PHYS(cmr->base_pfn);
1038 		rend = PFN_PHYS(cmr->base_pfn + cmr->count);
1039 		if (end < rstart)
1040 			continue;
1041 		if (start >= rend)
1042 			continue;
1043 		return true;
1044 	}
1045 
1046 	return false;
1047 }
1048 
1049 /*
1050  * Very basic function to reserve memory from a CMA area that has not
1051  * yet been activated. This is expected to be called early, when the
1052  * system is single-threaded, so there is no locking. The alignment
1053  * checking is restrictive - only pageblock-aligned areas
1054  * (CMA_MIN_ALIGNMENT_BYTES) may be reserved through this function.
1055  * This keeps things simple, and is enough for the current use case.
1056  *
1057  * The CMA bitmaps have not yet been allocated, so just start
1058  * reserving from the bottom up, using a PFN to keep track
1059  * of what has been reserved. Unreserving is not possible.
1060  *
1061  * The caller is responsible for initializing the page structures
1062  * in the area properly, since this just points to memblock-allocated
1063  * memory. The caller should subsequently use init_cma_pageblock to
1064  * set the migrate type and CMA stats  the pageblocks that were reserved.
1065  *
1066  * If the CMA area fails to activate later, memory obtained through
1067  * this interface is not handed to the page allocator, this is
1068  * the responsibility of the caller (e.g. like normal memblock-allocated
1069  * memory).
1070  */
1071 void __init *cma_reserve_early(struct cma *cma, unsigned long size)
1072 {
1073 	int r;
1074 	struct cma_memrange *cmr;
1075 	unsigned long available;
1076 	void *ret = NULL;
1077 
1078 	if (!cma || !cma->count)
1079 		return NULL;
1080 	/*
1081 	 * Can only be called early in init.
1082 	 */
1083 	if (test_bit(CMA_ACTIVATED, &cma->flags))
1084 		return NULL;
1085 
1086 	if (!IS_ALIGNED(size, CMA_MIN_ALIGNMENT_BYTES))
1087 		return NULL;
1088 
1089 	if (!IS_ALIGNED(size, (PAGE_SIZE << cma->order_per_bit)))
1090 		return NULL;
1091 
1092 	size >>= PAGE_SHIFT;
1093 
1094 	if (size > cma->available_count)
1095 		return NULL;
1096 
1097 	for (r = 0; r < cma->nranges; r++) {
1098 		cmr = &cma->ranges[r];
1099 		available = cmr->count - (cmr->early_pfn - cmr->base_pfn);
1100 		if (size <= available) {
1101 			ret = phys_to_virt(PFN_PHYS(cmr->early_pfn));
1102 			cmr->early_pfn += size;
1103 			cma->available_count -= size;
1104 			return ret;
1105 		}
1106 	}
1107 
1108 	return ret;
1109 }
1110