xref: /linux/tools/testing/selftests/bpf/libarena/src/buddy.bpf.c (revision 367e6e4a8173d47b4c57181cdd9dcbfc291755f0)
1 // SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
3 
4 #include <libarena/common.h>
5 #include <libarena/asan.h>
6 #include <libarena/buddy.h>
7 
8 /*
9  * Buddy allocator arena-based implementation.
10  *
11  * Memory is organized into chunks. These chunks
12  * cannot be coalesced or split. Allocating
13  * chunks allocates their memory eagerly.
14  *
15  * Internally, each chunk is organized into blocks.
16  * Blocks _can_ be coalesced/split, but only inside
17  * the chunk. Each block can be allocated or
18  * unallocated. If allocated, the entire block holds
19  * user data. If unallocated, the block is mostly
20  * invalid memory, with the exception of a header
21  * used for freelist tracking.
22  *
23  * The header is placed at an offset inside the block
24  * to prevent off-by-one errors from the previous block
25  * from trivially overwriting the header. Such an error
26  * is also not catchable by ASAN, since the header remains
27  * valid memory even after the block is freed. It is still
28  * theoretically possible for the header to be corrupted
29  * without being caught by ASAN, but harder.
30  *
31  * Since the allocator needs to track order information for
32  * both allocated and free blocks, and allocated blocks cannot
33  * store a header, the allocator also stores per-chunk order
34  * information in a reserved region at the beginning of the
35  * chunk. The header includes a bitmap with the order of blocks
36  * and their allocation state. It also includes the freelist
37  * heads for the allocation itself.
38  */
39 
40 
41 enum {
42 	BUDDY_POISONED = (s8)0xef,
43 
44 	/* Number of pages to be allocated per chunk. */
45 	BUDDY_CHUNK_PAGES	= BUDDY_CHUNK_BYTES / __PAGE_SIZE
46 };
47 
48 static inline int buddy_lock(struct buddy __arena *buddy)
49 {
50 	return arena_spin_lock(&buddy->lock);
51 }
52 
53 static inline void buddy_unlock(struct buddy __arena *buddy)
54 {
55 	arena_spin_unlock(&buddy->lock);
56 }
57 
58 /*
59  * Reserve part of the arena address space for the allocator. We use
60  * this to get aligned addresses for the chunks, since the arena
61  * page alloc kfuncs do not support aligning to a boundary (in this
62  * case 1 MiB, see buddy.h on how this is derived).
63  */
64 static int buddy_reserve_arena_vaddr(struct buddy __arena *buddy)
65 {
66 	buddy->vaddr = 0;
67 
68 	return bpf_arena_reserve_pages(&arena,
69 				       (void __arena *)BUDDY_VADDR_OFFSET,
70 				       BUDDY_VADDR_SIZE / __PAGE_SIZE);
71 }
72 
73 /*
74  * Free up any unused address space. Used only during teardown.
75  */
76 static void buddy_unreserve_arena_vaddr(struct buddy __arena *buddy)
77 {
78 	bpf_arena_free_pages(
79 		&arena, (void __arena *)(BUDDY_VADDR_OFFSET + buddy->vaddr),
80 		(BUDDY_VADDR_SIZE - buddy->vaddr) / __PAGE_SIZE);
81 
82 	buddy->vaddr = 0;
83 }
84 
85 /*
86  * Carve out part of the reserved address space and hand it over
87  * to the buddy allocator.
88  *
89  * We are assuming the buddy allocator is the only allocator in the
90  * system, so there is no race between this function reserving a
91  * page range and some other allocator actually making the BPF call
92  * to really create and reserve it.
93  *
94  * However, bump allocation must still be atomic because this function
95  * is called without the buddy lock from multiple threads concurrently.
96  */
97 __weak int buddy_alloc_arena_vaddr(struct buddy __arena *buddy, u64 *vaddrp)
98 {
99 	u64 vaddr, old, new;
100 
101 	if (!buddy || !vaddrp)
102 		return -EINVAL;
103 
104 	do {
105 		vaddr = buddy->vaddr;
106 		new = vaddr + BUDDY_CHUNK_BYTES;
107 
108 		if (new > BUDDY_VADDR_SIZE)
109 			return -EINVAL;
110 
111 		old = __sync_val_compare_and_swap(&buddy->vaddr, vaddr, new);
112 	} while (old != vaddr && can_loop);
113 
114 	if (old != vaddr)
115 		return -EINVAL;
116 
117 	*vaddrp = BUDDY_VADDR_OFFSET + vaddr;
118 
119 	return 0;
120 }
121 
122 static u64 arena_next_pow2(__u64 n)
123 {
124 	n--;
125 	n |= n >> 1;
126 	n |= n >> 2;
127 	n |= n >> 4;
128 	n |= n >> 8;
129 	n |= n >> 16;
130 	n |= n >> 32;
131 	n++;
132 
133 	return n;
134 }
135 
136 __weak
137 int idx_set_allocated(struct buddy_chunk __arena *chunk, u64 idx, bool allocated)
138 {
139 	bool already_allocated;
140 
141 	if (unlikely(idx >= BUDDY_CHUNK_ITEMS)) {
142 		arena_stderr("setting state of invalid idx (%ld, max %d)\n", idx,
143 			     BUDDY_CHUNK_ITEMS);
144 		return -EINVAL;
145 	}
146 
147 	already_allocated = chunk->allocated[idx / 8] & (1 << (idx % 8));
148 	if (unlikely(already_allocated == allocated)) {
149 		arena_stderr("Double %s of idx %ld for chunk %p",
150 				allocated ? "alloc" : "free",
151 				idx, chunk);
152 		return -EINVAL;
153 	}
154 
155 	if (allocated)
156 		chunk->allocated[idx / 8] |= 1 << (idx % 8);
157 	else
158 		chunk->allocated[idx / 8] &= ~(1 << (idx % 8));
159 
160 	return 0;
161 }
162 
163 static int idx_is_allocated(struct buddy_chunk __arena *chunk, u64 idx, bool *allocated)
164 {
165 	if (unlikely(idx >= BUDDY_CHUNK_ITEMS)) {
166 		arena_stderr("getting state of invalid idx (%llu, max %d)\n", idx,
167 			     BUDDY_CHUNK_ITEMS);
168 		return -EINVAL;
169 	}
170 
171 	*allocated = chunk->allocated[idx / 8] & (1 << (idx % 8));
172 	return 0;
173 }
174 
175 __weak
176 int idx_set_order(struct buddy_chunk __arena *chunk, u64 idx, u8 order)
177 {
178 	u8 prev_order;
179 
180 	if (unlikely(order >= BUDDY_CHUNK_NUM_ORDERS)) {
181 		arena_stderr("setting invalid order %u\n", order);
182 		return -EINVAL;
183 	}
184 
185 	if (unlikely(idx >= BUDDY_CHUNK_ITEMS)) {
186 		arena_stderr("setting order of invalid idx (%d, max %d)\n", idx,
187 			     BUDDY_CHUNK_ITEMS);
188 		return -EINVAL;
189 	}
190 
191 	/*
192 	 * We store two order instances per byte, one per nibble.
193 	 * Retain the existing nibble.
194 	 */
195 	prev_order = chunk->orders[idx / 2];
196 	if (idx & 0x1) {
197 		order &= 0xf;
198 		order |= (prev_order & 0xf0);
199 	} else {
200 		order <<= 4;
201 		order |= (prev_order & 0xf);
202 	}
203 
204 	chunk->orders[idx / 2] = order;
205 
206 	return 0;
207 }
208 
209 static u8 idx_get_order(struct buddy_chunk __arena *chunk, u64 idx)
210 {
211 	u8 result;
212 
213 	_Static_assert(BUDDY_CHUNK_NUM_ORDERS <= 16,
214 		       "order must fit in 4 bits");
215 
216 	if (unlikely(idx >= BUDDY_CHUNK_ITEMS)) {
217 		arena_stderr("getting order of invalid idx %u\n", idx);
218 		return BUDDY_CHUNK_NUM_ORDERS;
219 	}
220 
221 	result = chunk->orders[idx / 2];
222 
223 	return (idx & 0x1) ? (result & 0xf) : (result >> 4);
224 }
225 
226 static void __arena *idx_to_addr(struct buddy_chunk __arena *chunk, size_t idx)
227 {
228 	u64 address;
229 
230 	if (unlikely(idx >= BUDDY_CHUNK_ITEMS)) {
231 		arena_stderr("translating invalid idx %u\n", idx);
232 		return NULL;
233 	}
234 
235 	/*
236 	 * The data blocks start in the chunk after the metadata block.
237 	 * We find the actual address by indexing into the region at an
238 	 * BUDDY_MIN_ALLOC_BYTES granularity, the minimum allowed.
239 	 * The index number already accounts for the fact that the first
240 	 * blocks in the chunk are occupied by the metadata, so we do
241 	 * not need to offset it.
242 	 */
243 
244 	address = (u64)chunk + (idx * BUDDY_MIN_ALLOC_BYTES);
245 
246 	return (void __arena *)address;
247 }
248 
249 static struct buddy_header __arena *idx_to_header(struct buddy_chunk __arena *chunk, size_t idx)
250 {
251 	bool allocated;
252 	u64 address;
253 
254 	if (unlikely(idx_is_allocated(chunk, idx, &allocated))) {
255 		arena_stderr("accessing invalid idx 0x%lx\n", idx);
256 		return NULL;
257 	}
258 
259 	if (unlikely(allocated)) {
260 		arena_stderr("accessing allocated idx 0x%lx as header\n", idx);
261 		return NULL;
262 	}
263 
264 	address = (u64)idx_to_addr(chunk, idx);
265 	if (!address)
266 		return NULL;
267 
268 	/*
269 	 * Offset the header within the block. This avoids accidental overwrites
270 	 * to the header because of off-by-one errors when using adjacent blocks.
271 	 *
272 	 * The offset has been chosen as a compromise between ASAN effectiveness
273 	 * and allocator granularity:
274 	 * 1) ASAN dictates valid data runs are 8-byte aligned.
275 	 * 2) We want to keep a low minimum allocation size (currently 16).
276 	 *
277 	 * As a result, we have only two possible positions for the header: Bytes
278 	 * 0 and 8. Keeping the header in byte 0 means off-by-ones from the previous
279 	 * block touch the header, and, since the header must be accessible, ASAN
280 	 * will not trigger. Keeping the header on byte 8 means off-by-one errors from
281 	 * the previous block are caught by ASAN. Negative offsets are rarer, so
282 	 * while accesses into the block from the next block are possible, they are
283 	 * less probable.
284 	 */
285 
286 	return (struct buddy_header __arena *)(address + BUDDY_HEADER_OFF);
287 }
288 
289 static void header_add_freelist(struct buddy_chunk __arena *chunk, struct buddy_header __arena *header,
290 		u64 idx, u8 order)
291 {
292 	struct buddy_header __arena *tmp_header;
293 
294 	idx_set_order(chunk, idx, order);
295 
296 	header->next_index = chunk->freelists[order];
297 	header->prev_index = BUDDY_CHUNK_ITEMS;
298 
299 	if (header->next_index != BUDDY_CHUNK_ITEMS) {
300 		tmp_header = idx_to_header(chunk, header->next_index);
301 		tmp_header->prev_index = idx;
302 	}
303 
304 	chunk->freelists[order] = idx;
305 }
306 
307 static void header_remove_freelist(struct buddy_chunk __arena  *chunk,
308 				   struct buddy_header __arena *header, u8 order)
309 {
310 	struct buddy_header __arena *tmp_header;
311 
312 	if (header->prev_index != BUDDY_CHUNK_ITEMS) {
313 		tmp_header = idx_to_header(chunk, header->prev_index);
314 		tmp_header->next_index = header->next_index;
315 	}
316 
317 	if (header->next_index != BUDDY_CHUNK_ITEMS) {
318 		tmp_header = idx_to_header(chunk, header->next_index);
319 		tmp_header->prev_index = header->prev_index;
320 	}
321 
322 	/* Pop off the list head if necessary. */
323 	if (idx_to_header(chunk, chunk->freelists[order]) == header)
324 		chunk->freelists[order] = header->next_index;
325 
326 	header->prev_index = BUDDY_CHUNK_ITEMS;
327 	header->next_index = BUDDY_CHUNK_ITEMS;
328 }
329 
330 static u64 size_to_order(size_t size)
331 {
332 	u64 order;
333 
334 	/*
335 	 * Legal sizes are [1, 4GiB] (the biggest possible arena).
336 	 * Of course, sizes close to GiB are practically impossible
337 	 * to fulfill and allocation will fail, but that's taken care
338 	 * of by the caller.
339 	 */
340 
341 	if (unlikely(size == 0 || size > (1UL << 32))) {
342 		arena_stderr("illegal size request %lu\n", size);
343 		return 64;
344 	}
345 	/*
346 	 * To find the order of the allocation we find the first power of two
347 	 * >= the requested size, take the log2, then adjust it for the minimum
348 	 * allocation size by removing the minimum shift from it. Requests
349 	 * smaller than the minimum allocation size are rounded up.
350 	 */
351 	order = arena_fls(arena_next_pow2(size)) - 1;
352 	if (order < BUDDY_MIN_ALLOC_SHIFT)
353 		return 0;
354 
355 	return order - BUDDY_MIN_ALLOC_SHIFT;
356 }
357 
358 __weak
359 int add_leftovers_to_freelist(struct buddy_chunk __arena *chunk, u32 cur_idx,
360 		u64 min_order, u64 max_order)
361 {
362 	struct buddy_header __arena *header;
363 	u64 ord;
364 	u32 idx;
365 
366 	for (ord = min_order; ord < max_order && can_loop; ord++) {
367 		/* Mark the buddy as free and add it to the freelists. */
368 		idx = cur_idx + (1 << ord);
369 
370 		header = idx_to_header(chunk, idx);
371 		if (unlikely(!header)) {
372 			arena_stderr("idx %u has no header", idx);
373 			return -EINVAL;
374 		}
375 
376 		asan_unpoison(header, sizeof(*header));
377 
378 		header_add_freelist(chunk, header, idx, ord);
379 	}
380 
381 	return 0;
382 }
383 
384 static struct buddy_chunk __arena *buddy_chunk_get(struct buddy __arena *buddy)
385 {
386 	u64 order, ord, min_order, max_order;
387 	struct buddy_chunk __arena  *chunk;
388 	size_t left;
389 	int power2;
390 	u64 vaddr;
391 	u32 idx;
392 	int ret;
393 
394 	/*
395 	 * Step 1:  Allocate a properly aligned chunk, and
396 	 * prep it for insertion into the buddy allocator.
397 	 * We don't need the allocator lock until step 2.
398 	 */
399 
400 	ret = buddy_alloc_arena_vaddr(buddy, &vaddr);
401 	if (ret)
402 		return NULL;
403 
404 	/* Addresses must be aligned to the chunk boundary. */
405 	if (vaddr % BUDDY_CHUNK_BYTES)
406 		return NULL;
407 
408 	/* Unreserve the address space. */
409 	bpf_arena_free_pages(&arena, (void __arena *)vaddr,
410 			     BUDDY_CHUNK_PAGES);
411 
412 	chunk = bpf_arena_alloc_pages(&arena, (void __arena *)vaddr,
413 				      BUDDY_CHUNK_PAGES, NUMA_NO_NODE, 0);
414 	if (!chunk) {
415 		arena_stderr("[ALLOC FAILED]");
416 		return NULL;
417 	}
418 
419 	if (buddy_lock(buddy)) {
420 		/*
421 		 * We cannot reclaim the vaddr space, but that is ok - this
422 		 * operation should always succeed. The error path is to catch
423 		 * accidental deadlocks that will cause -ENOMEMs to the program as
424 		 * the allocator fails to refill itself, in which case vaddr usage
425 		 * is the least of our worries.
426 		 */
427 		bpf_arena_free_pages(&arena, (void __arena *)vaddr, BUDDY_CHUNK_PAGES);
428 		return NULL;
429 	}
430 
431 	asan_poison(chunk, BUDDY_POISONED, BUDDY_CHUNK_PAGES * __PAGE_SIZE);
432 
433 	/* Unpoison the chunk itself. */
434 	asan_unpoison(chunk, sizeof(*chunk));
435 
436 	/* Mark all freelists as empty. */
437 	for (ord = zero; ord < BUDDY_CHUNK_NUM_ORDERS && can_loop; ord++)
438 		chunk->freelists[ord] = BUDDY_CHUNK_ITEMS;
439 
440 	/*
441 	 * Initialize the chunk by carving out a page range to hold the metadata
442 	 * struct above, then dumping the rest of the pages into the allocator.
443 	 */
444 
445 	_Static_assert(BUDDY_CHUNK_PAGES * __PAGE_SIZE >=
446 			       BUDDY_MIN_ALLOC_BYTES *
447 				       BUDDY_CHUNK_ITEMS,
448 		       "chunk must fit within the allocation");
449 
450 	/*
451 	 * Step 2: Reserve a chunk for the chunk metadata, then breaks
452 	 * the rest of the full allocation into the different buckets.
453 	 * We allocating the memory by grabbing blocks of progressively
454 	 * smaller sizes from the allocator, which are guaranteed to be
455 	 * continuous.
456 	 *
457 	 * This operation also populates the allocator.
458 	 *
459 	 * Algorithm:
460 	 *
461 	 * - max_order: The last order allocation we made
462 	 * - left: How many bytes are left to allocate
463 	 * - cur_index: Current index into the top-level block we are
464 	 * allocating from.
465 	 *
466 	 * Step 3:
467 	 * - Find the largest power-of-2 allocation still smaller than left (infimum)
468 	 * - Reserve a chunk of that size, along with its buddy
469 	 * - For every order from [infimum + 1, last order), carve out a block
470 	 *   and put it into the allocator.
471 	 *
472 	 *  Example: Chunk size 0b1010000 (80 bytes)
473 	 *
474 	 *  Step 1:
475 	 *
476 	 *   idx  infimum                             1 << max_order
477 	 *   0        64        128                    1 << 20
478 	 *   |________|_________|______________________|
479 	 *
480 	 *   Blocks set aside:
481 	 *   	[0, 64)         - Completely allocated
482 	 *   	[64, 128)       - Will be further split in the next iteration
483 	 *
484 	 *   Blocks added to the allocator:
485 	 *   	[128, 256)
486 	 *   	[256, 512)
487 	 *   	...
488 	 *   	[1 << 18, 1 << 19)
489 	 *   	[1 << 19, 1 << 20)
490 	 *
491 	 *  Step 2:
492 	 *
493 	 *   idx  infimum			   idx + 1 << max_order
494 	 *   64	      80	96		   	64 + 1 << 6 = 128
495 	 *   |________|_________|______________________|
496 	 *
497 	 *   Blocks set aside:
498 	 *   	[64, 80)	- Completely allocated
499 	 *
500 	 *   Blocks added to the allocator:
501 	 *      [80, 96) - left == 0 so the buddy is unused and marked as freed
502 	 *   	[96, 128)
503 	 */
504 	 max_order = BUDDY_CHUNK_NUM_ORDERS;
505 	left = sizeof(*chunk);
506 	idx = 0;
507 	while (left && can_loop) {
508 		power2 = arena_fls(left) - 1;
509 		/*
510 		 * Note: The condition below only triggers to catch serious bugs
511 		 * early. There is no sane way to undo any block insertions from
512 		 * the allocated chunk, so just leak any leftover allocations,
513 		 * emit a diagnostic, unlock and exit.
514 		 *
515 		 */
516 		if (unlikely(power2 >= BUDDY_CHUNK_NUM_ORDERS)) {
517 			arena_stderr(
518 				"buddy chunk metadata require allocation of order %d\n",
519 				power2);
520 			arena_stderr(
521 				"chunk has size of 0x%lx bytes (left %lx bytes)\n",
522 				sizeof(*chunk), left);
523 			buddy_unlock(buddy);
524 
525 			return NULL;
526 		}
527 
528 		/* Round up allocations that are too small. */
529 
530 		left -= (power2 >= BUDDY_MIN_ALLOC_SHIFT) ? 1 << power2 : left;
531 		order = (power2 >= BUDDY_MIN_ALLOC_SHIFT) ? power2 - BUDDY_MIN_ALLOC_SHIFT : 0;
532 
533 		if (idx_set_allocated(chunk, idx, true)) {
534 			buddy_unlock(buddy);
535 			return NULL;
536 		}
537 
538 		/*
539 		 * Starting an order above the one we allocated, populate
540 		 * the allocator with free blocks. If this is the last
541 		 * allocation (left == 0), also mark the buddy as free.
542 		 *
543 		 * See comment above about error handling: The error path
544 		 * is only there as a way to mitigate deeply buggy allocator
545 		 * states by emitting a diagnostic in add_leftovers_to_freelist()
546 		 * and leaking any memory not added in the freelists.
547 		 */
548 		min_order = left ? order + 1 : order;
549 		if (add_leftovers_to_freelist(chunk, idx, min_order, max_order)) {
550 			buddy_unlock(buddy);
551 			return NULL;
552 		}
553 
554 		/* Adjust the index. */
555 		idx += 1 << order;
556 		max_order = order;
557 	}
558 
559 	buddy_unlock(buddy);
560 
561 	return chunk;
562 }
563 
564 __weak int buddy_init(struct buddy __arena *buddy)
565 {
566 	struct buddy_chunk __arena *chunk;
567 	int ret;
568 
569 	if (!asan_ready())
570 		return -EINVAL;
571 
572 	/* Reserve enough address space to ensure allocations are aligned. */
573 	ret = buddy_reserve_arena_vaddr(buddy);
574 	if (ret)
575 		return ret;
576 
577 	_Static_assert(BUDDY_CHUNK_PAGES > 0,
578 		       "chunk must use one or more pages");
579 
580 	chunk = buddy_chunk_get(buddy);
581 
582 	if (buddy_lock(buddy)) {
583 		bpf_arena_free_pages(&arena, chunk, BUDDY_CHUNK_PAGES);
584 		return -EINVAL;
585 	}
586 
587 	/* Chunk is already properly unpoisoned if allocated. */
588 	if (chunk)
589 		chunk->next = buddy->first_chunk;
590 
591 	/* Put the chunk at the beginning of the list. */
592 	buddy->first_chunk = chunk;
593 
594 	buddy_unlock(buddy);
595 
596 	return chunk ? 0 : -ENOMEM;
597 }
598 
599 /*
600  * Destroy the allocator. This does not check whether there are any allocations
601  * currently in use, so any pages being accessed will start taking arena faults.
602  * We do not take a lock because we are freeing arena pages, and nobody should
603  * be using the allocator at that point in the execution.
604  */
605 __weak int buddy_destroy(struct buddy __arena *buddy)
606 {
607 	struct buddy_chunk __arena *chunk, *next;
608 
609 	if (!buddy)
610 		return -EINVAL;
611 
612 	/*
613 	 * Traverse all buddy chunks and free them back to the arena
614 	 * with the same granularity they were allocated with.
615 	 */
616 	for (chunk = buddy->first_chunk; chunk && can_loop; chunk = next) {
617 		next = chunk->next;
618 
619 		/* Wholesale poison the entire block. */
620 		asan_poison(chunk, BUDDY_POISONED,
621 			    BUDDY_CHUNK_PAGES * __PAGE_SIZE);
622 		bpf_arena_free_pages(&arena, chunk, BUDDY_CHUNK_PAGES);
623 	}
624 
625 	/* Free up any part of the address space that did not get used. */
626 	buddy_unreserve_arena_vaddr(buddy);
627 
628 	/* Clear all fields. */
629 	buddy->first_chunk = NULL;
630 
631 	return 0;
632 }
633 
634 __weak u64 buddy_chunk_alloc(struct buddy_chunk __arena *chunk, int order_req)
635 {
636 	struct buddy_header __arena *header, *tmp_header, *next_header;
637 	u32 idx, tmpidx, retidx;
638 	u64 address;
639 	u64 order = 0;
640 	u64 i;
641 
642 	for (order = order_req; order < BUDDY_CHUNK_NUM_ORDERS && can_loop; order++) {
643 		if (chunk->freelists[order] != BUDDY_CHUNK_ITEMS)
644 			break;
645 	}
646 
647 	if (order >= BUDDY_CHUNK_NUM_ORDERS)
648 		return (u64)NULL;
649 
650 	retidx = chunk->freelists[order];
651 	header = idx_to_header(chunk, retidx);
652 	if (unlikely(!header))
653 		return (u64) NULL;
654 
655 	chunk->freelists[order] = header->next_index;
656 
657 	if (header->next_index != BUDDY_CHUNK_ITEMS) {
658 		next_header = idx_to_header(chunk, header->next_index);
659 		next_header->prev_index = BUDDY_CHUNK_ITEMS;
660 	}
661 
662 	header->prev_index = BUDDY_CHUNK_ITEMS;
663 	header->next_index = BUDDY_CHUNK_ITEMS;
664 	if (idx_set_order(chunk, retidx, order_req))
665 		return (u64)NULL;
666 
667 	if (idx_set_allocated(chunk, retidx, true))
668 		return (u64)NULL;
669 
670 	/*
671 	 * Do not unpoison the address yet, will be done by the caller
672 	 * because the caller has the exact allocation size requested.
673 	 */
674 	address = (u64)idx_to_addr(chunk, retidx);
675 	if (!address)
676 		return (u64)NULL;
677 
678 	/* If we allocated from a larger-order chunk, split the buddies. */
679 	for (i = order_req; i < order && can_loop; i++) {
680 		/*
681 		 * Flip the bit for the current order (the bit is guaranteed
682 		 * to be 0, so just add 1 << i).
683 		 */
684 		idx = retidx + (1 << i);
685 
686 		/* Add the buddy of the allocation to the free list. */
687 		header = idx_to_header(chunk, idx);
688 		/* Unpoison the buddy header */
689 		asan_unpoison(header, sizeof(*header));
690 
691 		if (idx_set_order(chunk, idx, i))
692 			return (u64)NULL;
693 
694 		/* Push the header to the beginning of the freelists list. */
695 		tmpidx = chunk->freelists[i];
696 
697 		header->prev_index = BUDDY_CHUNK_ITEMS;
698 		header->next_index = tmpidx;
699 
700 		if (tmpidx != BUDDY_CHUNK_ITEMS) {
701 			tmp_header = idx_to_header(chunk, tmpidx);
702 			tmp_header->prev_index = idx;
703 		}
704 
705 		chunk->freelists[i] = idx;
706 	}
707 
708 	return address;
709 }
710 
711 /* Scan the existing chunks for available memory. */
712 static u64 buddy_alloc_from_existing_chunks(struct buddy __arena *buddy, int order)
713 {
714 	struct buddy_chunk __arena *chunk;
715 	u64 address;
716 
717 	for (chunk = buddy->first_chunk; chunk != NULL && can_loop;
718 	     chunk = chunk->next) {
719 		address = buddy_chunk_alloc(chunk, order);
720 		if (address)
721 			return address;
722 	}
723 
724 	return (u64)NULL;
725 }
726 
727 /*
728  * Try an allocation from a newly allocated chunk. Also
729  * incorporate the chunk into the linked list.
730  */
731 static u64 buddy_alloc_from_new_chunk(struct buddy __arena *buddy, struct buddy_chunk __arena *chunk, int order)
732 {
733 	u64 address;
734 
735 	if (buddy_lock(buddy))
736 		return (u64)NULL;
737 
738 
739 	/*
740 	 * Add the chunk into the allocator and try
741 	 * to allocate specifically from that chunk.
742 	 */
743 	chunk->next = buddy->first_chunk;
744 	buddy->first_chunk = chunk;
745 
746 	address = buddy_chunk_alloc(buddy->first_chunk, order);
747 
748 	buddy_unlock(buddy);
749 
750 	return (u64)address;
751 }
752 __weak
753 void __arena *buddy_alloc(struct buddy __arena *buddy, size_t size)
754 {
755 	void __arena *address = NULL;
756 	struct buddy_chunk __arena *chunk;
757 	int order;
758 
759 	if (!buddy)
760 		return NULL;
761 
762 	order = size_to_order(size);
763 	if (order >= BUDDY_CHUNK_NUM_ORDERS || order < 0) {
764 		arena_stderr("invalid order %d (sz %lu)\n", order, size);
765 		return NULL;
766 	}
767 
768 	if (buddy_lock(buddy))
769 		return NULL;
770 
771 	address = (u8 __arena *)buddy_alloc_from_existing_chunks(buddy, order);
772 	buddy_unlock(buddy);
773 	if (address)
774 		goto done;
775 
776 	/* Get a new chunk. */
777 	chunk = buddy_chunk_get(buddy);
778 	if (chunk)
779 		address = (u8 __arena *)buddy_alloc_from_new_chunk(buddy, chunk, order);
780 
781 done:
782 	/* If we failed to allocate memory, return NULL. */
783 	if (!address)
784 		return NULL;
785 
786 	/*
787 	 * Unpoison exactly the amount of bytes requested. If the
788 	 * data is smaller than the header, we must poison any
789 	 * unused bytes that were part of the header.
790 	 */
791 	if (size < BUDDY_HEADER_OFF + sizeof(struct buddy_header __arena))
792 		asan_poison(address + BUDDY_HEADER_OFF, BUDDY_POISONED,
793 			    sizeof(struct buddy_header __arena));
794 
795 	asan_unpoison(address, size);
796 
797 	return address;
798 }
799 
800 static __always_inline int buddy_free_unlocked(struct buddy __arena *buddy, u64 addr)
801 {
802 	struct buddy_header __arena *header, *buddy_header;
803 	u64 idx, buddy_idx, tmp_idx;
804 	struct buddy_chunk __arena *chunk;
805 	bool allocated;
806 	u8 order;
807 	int ret;
808 
809 	if (!buddy)
810 		return -EINVAL;
811 
812 	if (addr & (BUDDY_MIN_ALLOC_BYTES - 1)) {
813 		arena_stderr("Freeing unaligned address %llx\n", addr);
814 		return -EINVAL;
815 	}
816 
817 	/* Get (chunk, idx) out of the address. */
818 	chunk = (void __arena *)(addr & ~BUDDY_CHUNK_OFFSET_MASK);
819 	idx = (addr & BUDDY_CHUNK_OFFSET_MASK) / BUDDY_MIN_ALLOC_BYTES;
820 
821 	/* Mark the block as unallocated so we can access the header. */
822 	ret = idx_set_allocated(chunk, idx, false);
823 	if (ret)
824 		return ret;
825 
826 	order  = idx_get_order(chunk, idx);
827 	header = idx_to_header(chunk, idx);
828 
829 	/* The header is in the block itself, keep it unpoisoned. */
830 	asan_poison((u8 __arena *)addr, BUDDY_POISONED,
831 		    BUDDY_MIN_ALLOC_BYTES << order);
832 	asan_unpoison(header, sizeof(*header));
833 
834 	/*
835 	 * Coalescing loop. Merge with free buddies of equal order.
836 	 * For every coalescing step, keep the left buddy and
837 	 * drop the right buddy's header.
838 	 */
839 	for (; order < BUDDY_CHUNK_NUM_ORDERS && can_loop; order++) {
840 		buddy_idx = idx ^ (1 << order);
841 
842 		/* Check if the buddy is actually free. */
843 		idx_is_allocated(chunk, buddy_idx, &allocated);
844 		if (allocated)
845 			break;
846 
847 		/*
848 		 * If buddy is not the same order as the chunk
849 		 * being freed, then we're done coalescing.
850 		 */
851 		if (idx_get_order(chunk, buddy_idx) != order)
852 			break;
853 
854 		buddy_header = idx_to_header(chunk, buddy_idx);
855 		header_remove_freelist(chunk, buddy_header, order);
856 
857 		/* Keep the left header out of the two buddies, drop the other one. */
858 		if (buddy_idx < idx) {
859 			tmp_idx = idx;
860 			idx = buddy_idx;
861 			buddy_idx = tmp_idx;
862 		}
863 
864 		/* Remove the buddy from the freelists so that we can merge it. */
865 		idx_set_order(chunk, buddy_idx, order);
866 
867 		buddy_header = idx_to_header(chunk, buddy_idx);
868 		asan_poison(buddy_header, BUDDY_POISONED,
869 			    sizeof(*buddy_header));
870 	}
871 
872 	/* Header properly freed but not in any freelists yet .*/
873 	idx_set_order(chunk, idx, order);
874 
875 	header = idx_to_header(chunk, idx);
876 	header_add_freelist(chunk, header, idx, order);
877 
878 	return 0;
879 }
880 
881 __weak int buddy_free(struct buddy __arena *buddy, void __arena *addr)
882 {
883 	int ret;
884 
885 	if (!buddy)
886 		return -EINVAL;
887 
888 	/* Freeing NULL is a valid no-op. */
889 	if (!addr)
890 		return 0;
891 
892 	ret = buddy_lock(buddy);
893 	if (ret)
894 		return ret;
895 
896 	ret = buddy_free_unlocked(buddy, (u64)addr);
897 
898 	buddy_unlock(buddy);
899 
900 	return ret;
901 }
902 
903 __weak char _license[] SEC("license") = "GPL";
904