xref: /linux/drivers/gpu/tests/gpu_buddy_test.c (revision 9e1e9d660255d7216067193d774f338d08d8528d)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  * Copyright © 2022 Maíra Canal <mairacanal@riseup.net>
5  */
6 
7 #include <kunit/test.h>
8 
9 #include <linux/prime_numbers.h>
10 #include <linux/sched/signal.h>
11 #include <linux/sizes.h>
12 
13 #include <linux/gpu_buddy.h>
14 
15 #include "gpu_random.h"
16 
17 static unsigned int random_seed;
18 
19 static inline u64 get_size(int order, u64 chunk_size)
20 {
21 	return (1 << order) * chunk_size;
22 }
23 
24 static void gpu_test_buddy_subtree_offset_alignment_stress(struct kunit *test)
25 {
26 	struct gpu_buddy_block *block;
27 	struct rb_node *node = NULL;
28 	const u64 mm_size = SZ_2M;
29 	const u64 alignments[] = {
30 		SZ_1M,
31 		SZ_512K,
32 		SZ_256K,
33 		SZ_128K,
34 		SZ_64K,
35 		SZ_32K,
36 		SZ_16K,
37 		SZ_8K,
38 	};
39 	struct list_head allocated[ARRAY_SIZE(alignments)];
40 	unsigned int i, max_subtree_align = 0;
41 	int ret, tree, order;
42 	struct gpu_buddy mm;
43 
44 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
45 			       "buddy_init failed\n");
46 
47 	for (i = 0; i < ARRAY_SIZE(allocated); i++)
48 		INIT_LIST_HEAD(&allocated[i]);
49 
50 	/*
51 	 * Exercise subtree_max_alignment tracking by allocating blocks with descending
52 	 * alignment constraints and freeing them in reverse order. This verifies that
53 	 * free-tree augmentation correctly propagates the maximum offset alignment
54 	 * present in each subtree at every stage.
55 	 */
56 
57 	for (i = 0; i < ARRAY_SIZE(alignments); i++) {
58 		struct gpu_buddy_block *root = NULL;
59 		unsigned int expected;
60 		u64 align;
61 
62 		align = alignments[i];
63 		expected = ilog2(align) - 1;
64 
65 		for (;;) {
66 			ret = gpu_buddy_alloc_blocks(&mm,
67 						     0, mm_size,
68 						     SZ_4K, align,
69 						     &allocated[i],
70 						     0);
71 			if (ret)
72 				break;
73 
74 			block = list_last_entry(&allocated[i],
75 						struct gpu_buddy_block,
76 						link);
77 			KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), align));
78 		}
79 
80 		for (order = mm.max_order; order >= 0 && !root; order--) {
81 			for (tree = 0; tree < 2; tree++) {
82 				node = mm.free_trees[tree][order].rb_node;
83 				if (node) {
84 					root = container_of(node,
85 							    struct gpu_buddy_block,
86 							    rb);
87 					break;
88 				}
89 			}
90 		}
91 
92 		KUNIT_ASSERT_NOT_NULL(test, root);
93 		KUNIT_EXPECT_EQ(test, root->subtree_max_alignment, expected);
94 	}
95 
96 	for (i = ARRAY_SIZE(alignments); i-- > 0; ) {
97 		gpu_buddy_free_list(&mm, &allocated[i], 0);
98 
99 		for (order = 0; order <= mm.max_order; order++) {
100 			for (tree = 0; tree < 2; tree++) {
101 				node = mm.free_trees[tree][order].rb_node;
102 				if (!node)
103 					continue;
104 
105 				block = container_of(node, struct gpu_buddy_block, rb);
106 				max_subtree_align = max(max_subtree_align,
107 							block->subtree_max_alignment);
108 			}
109 		}
110 
111 		KUNIT_EXPECT_GE(test, max_subtree_align, ilog2(alignments[i]));
112 	}
113 
114 	gpu_buddy_fini(&mm);
115 }
116 
117 static void gpu_test_buddy_offset_aligned_allocation(struct kunit *test)
118 {
119 	struct gpu_buddy_block *block, *tmp;
120 	int num_blocks, i, count = 0;
121 	LIST_HEAD(allocated);
122 	struct gpu_buddy mm;
123 	u64 mm_size = SZ_4M;
124 	LIST_HEAD(freed);
125 
126 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
127 			       "buddy_init failed\n");
128 
129 	num_blocks = mm_size / SZ_256K;
130 	/*
131 	 * Allocate multiple sizes under a fixed offset alignment.
132 	 * Ensures alignment handling is independent of allocation size and
133 	 * exercises subtree max-alignment pruning for small requests.
134 	 */
135 	for (i = 0; i < num_blocks; i++)
136 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_256K,
137 								    &allocated, 0),
138 					"buddy_alloc hit an error size=%u\n", SZ_8K);
139 
140 	list_for_each_entry(block, &allocated, link) {
141 		/* Ensure the allocated block uses the expected 8 KB size */
142 		KUNIT_EXPECT_EQ(test, gpu_buddy_block_size(&mm, block), SZ_8K);
143 		/* Ensure the block starts at a 256 KB-aligned offset for proper alignment */
144 		KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), SZ_256K));
145 	}
146 	gpu_buddy_free_list(&mm, &allocated, 0);
147 
148 	for (i = 0; i < num_blocks; i++)
149 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K,
150 								    &allocated, 0),
151 					"buddy_alloc hit an error size=%u\n", SZ_16K);
152 
153 	list_for_each_entry(block, &allocated, link) {
154 		/* Ensure the allocated block uses the expected 16 KB size */
155 		KUNIT_EXPECT_EQ(test, gpu_buddy_block_size(&mm, block), SZ_16K);
156 		/* Ensure the block starts at a 256 KB-aligned offset for proper alignment */
157 		KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), SZ_256K));
158 	}
159 
160 	/*
161 	 * Free alternating aligned blocks to introduce fragmentation.
162 	 * Ensures offset-aligned allocations remain valid after frees and
163 	 * verifies subtree max-alignment metadata is correctly maintained.
164 	 */
165 	list_for_each_entry_safe(block, tmp, &allocated, link) {
166 		if (count % 2 == 0)
167 			list_move_tail(&block->link, &freed);
168 		count++;
169 	}
170 	gpu_buddy_free_list(&mm, &freed, 0);
171 
172 	for (i = 0; i < num_blocks / 2; i++)
173 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K,
174 								    &allocated, 0),
175 					"buddy_alloc hit an error size=%u\n", SZ_16K);
176 
177 	/*
178 	 * Allocate with offset alignment after all slots are used; must fail.
179 	 * Confirms that no aligned offsets remain.
180 	 */
181 	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K,
182 							   &allocated, 0),
183 			       "buddy_alloc hit an error size=%u\n", SZ_16K);
184 	gpu_buddy_free_list(&mm, &allocated, 0);
185 	gpu_buddy_fini(&mm);
186 }
187 
188 static void gpu_test_buddy_fragmentation_performance(struct kunit *test)
189 {
190 	struct gpu_buddy_block *block, *tmp;
191 	int num_blocks, i, ret, count = 0;
192 	LIST_HEAD(allocated_blocks);
193 	unsigned long elapsed_ms;
194 	LIST_HEAD(reverse_list);
195 	LIST_HEAD(test_blocks);
196 	LIST_HEAD(clear_list);
197 	LIST_HEAD(dirty_list);
198 	LIST_HEAD(free_list);
199 	struct gpu_buddy mm;
200 	u64 mm_size = SZ_4G;
201 	ktime_t start, end;
202 
203 	/*
204 	 * Allocation under severe fragmentation
205 	 *
206 	 * Create severe fragmentation by allocating the entire 4 GiB address space
207 	 * as tiny 8 KiB blocks but forcing a 64 KiB alignment. The resulting pattern
208 	 * leaves many scattered holes. Split the allocations into two groups and
209 	 * return them with different flags to block coalescing, then repeatedly
210 	 * allocate and free 64 KiB blocks while timing the loop. This stresses how
211 	 * quickly the allocator can satisfy larger, aligned requests from a pool of
212 	 * highly fragmented space.
213 	 */
214 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
215 			       "buddy_init failed\n");
216 
217 	num_blocks = mm_size / SZ_64K;
218 
219 	start = ktime_get();
220 	/* Allocate with maximum fragmentation - 8K blocks with 64K alignment */
221 	for (i = 0; i < num_blocks; i++)
222 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
223 								    &allocated_blocks, 0),
224 					"buddy_alloc hit an error size=%u\n", SZ_8K);
225 
226 	list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
227 		if (count % 4 == 0 || count % 4 == 3)
228 			list_move_tail(&block->link, &clear_list);
229 		else
230 			list_move_tail(&block->link, &dirty_list);
231 		count++;
232 	}
233 
234 	/* Free with different flags to ensure no coalescing */
235 	gpu_buddy_free_list(&mm, &clear_list, GPU_BUDDY_CLEARED);
236 	gpu_buddy_free_list(&mm, &dirty_list, 0);
237 
238 	for (i = 0; i < num_blocks; i++)
239 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K,
240 								    &test_blocks, 0),
241 					"buddy_alloc hit an error size=%u\n", SZ_64K);
242 	gpu_buddy_free_list(&mm, &test_blocks, 0);
243 
244 	end = ktime_get();
245 	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
246 
247 	kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms);
248 
249 	gpu_buddy_fini(&mm);
250 
251 	/*
252 	 * Reverse free order under fragmentation
253 	 *
254 	 * Construct a fragmented 4 GiB space by allocating every 8 KiB block with
255 	 * 64 KiB alignment, creating a dense scatter of small regions. Half of the
256 	 * blocks are selectively freed to form sparse gaps, while the remaining
257 	 * allocations are preserved, reordered in reverse, and released back with
258 	 * the cleared flag. This models a pathological reverse-ordered free pattern
259 	 * and measures how quickly the allocator can merge and reclaim space when
260 	 * deallocation occurs in the opposite order of allocation, exposing the
261 	 * cost difference between a linear freelist scan and an ordered tree lookup.
262 	 */
263 	ret = gpu_buddy_init(&mm, mm_size, SZ_4K);
264 	KUNIT_ASSERT_EQ(test, ret, 0);
265 
266 	start = ktime_get();
267 	/* Allocate maximum fragmentation */
268 	for (i = 0; i < num_blocks; i++)
269 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
270 								    &allocated_blocks, 0),
271 					"buddy_alloc hit an error size=%u\n", SZ_8K);
272 
273 	list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
274 		if (count % 2 == 0)
275 			list_move_tail(&block->link, &free_list);
276 		count++;
277 	}
278 	gpu_buddy_free_list(&mm, &free_list, GPU_BUDDY_CLEARED);
279 
280 	list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link)
281 		list_move(&block->link, &reverse_list);
282 	gpu_buddy_free_list(&mm, &reverse_list, GPU_BUDDY_CLEARED);
283 
284 	end = ktime_get();
285 	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
286 
287 	kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms);
288 
289 	gpu_buddy_fini(&mm);
290 }
291 
292 static void gpu_test_buddy_alloc_range_bias(struct kunit *test)
293 {
294 	u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem;
295 	GPU_RND_STATE(prng, random_seed);
296 	unsigned int i, count, *order;
297 	struct gpu_buddy_block *block;
298 	unsigned long flags;
299 	struct gpu_buddy mm;
300 	LIST_HEAD(allocated);
301 
302 	bias_size = SZ_1M;
303 	ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size);
304 	ps = max(SZ_4K, ps);
305 	mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */
306 
307 	kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
308 
309 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
310 			       "buddy_init failed\n");
311 
312 	count = mm_size / bias_size;
313 	order = gpu_random_order(count, &prng);
314 	KUNIT_EXPECT_TRUE(test, order);
315 
316 	/*
317 	 * Idea is to split the address space into uniform bias ranges, and then
318 	 * in some random order allocate within each bias, using various
319 	 * patterns within. This should detect if allocations leak out from a
320 	 * given bias, for example.
321 	 */
322 
323 	for (i = 0; i < count; i++) {
324 		LIST_HEAD(tmp);
325 		u32 size;
326 
327 		bias_start = order[i] * bias_size;
328 		bias_end = bias_start + bias_size;
329 		bias_rem = bias_size;
330 
331 		/* internal round_up too big */
332 		KUNIT_ASSERT_TRUE_MSG(test,
333 				      gpu_buddy_alloc_blocks(&mm, bias_start,
334 							     bias_end, bias_size + ps, bias_size,
335 							     &allocated,
336 							     GPU_BUDDY_RANGE_ALLOCATION),
337 				      "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
338 				      bias_start, bias_end, bias_size, bias_size);
339 
340 		/* size too big */
341 		KUNIT_ASSERT_TRUE_MSG(test,
342 				      gpu_buddy_alloc_blocks(&mm, bias_start,
343 							     bias_end, bias_size + ps, ps,
344 							     &allocated,
345 							     GPU_BUDDY_RANGE_ALLOCATION),
346 				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
347 				      bias_start, bias_end, bias_size + ps, ps);
348 
349 		/* bias range too small for size */
350 		KUNIT_ASSERT_TRUE_MSG(test,
351 				      gpu_buddy_alloc_blocks(&mm, bias_start + ps,
352 							     bias_end, bias_size, ps,
353 							     &allocated,
354 							     GPU_BUDDY_RANGE_ALLOCATION),
355 				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
356 				      bias_start + ps, bias_end, bias_size, ps);
357 
358 		/* bias misaligned */
359 		KUNIT_ASSERT_TRUE_MSG(test,
360 				      gpu_buddy_alloc_blocks(&mm, bias_start + ps,
361 							     bias_end - ps,
362 							     bias_size >> 1, bias_size >> 1,
363 							     &allocated,
364 							     GPU_BUDDY_RANGE_ALLOCATION),
365 				      "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n",
366 				      bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1);
367 
368 		/* single big page */
369 		KUNIT_ASSERT_FALSE_MSG(test,
370 				       gpu_buddy_alloc_blocks(&mm, bias_start,
371 							      bias_end, bias_size, bias_size,
372 							      &tmp,
373 							      GPU_BUDDY_RANGE_ALLOCATION),
374 				       "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n",
375 				       bias_start, bias_end, bias_size, bias_size);
376 		gpu_buddy_free_list(&mm, &tmp, 0);
377 
378 		/* single page with internal round_up */
379 		KUNIT_ASSERT_FALSE_MSG(test,
380 				       gpu_buddy_alloc_blocks(&mm, bias_start,
381 							      bias_end, ps, bias_size,
382 							      &tmp,
383 							      GPU_BUDDY_RANGE_ALLOCATION),
384 				       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
385 				       bias_start, bias_end, ps, bias_size);
386 		gpu_buddy_free_list(&mm, &tmp, 0);
387 
388 		/* random size within */
389 		size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
390 		if (size)
391 			KUNIT_ASSERT_FALSE_MSG(test,
392 					       gpu_buddy_alloc_blocks(&mm, bias_start,
393 								      bias_end, size, ps,
394 								      &tmp,
395 								      GPU_BUDDY_RANGE_ALLOCATION),
396 					       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
397 					       bias_start, bias_end, size, ps);
398 
399 		bias_rem -= size;
400 		/* too big for current avail */
401 		KUNIT_ASSERT_TRUE_MSG(test,
402 				      gpu_buddy_alloc_blocks(&mm, bias_start,
403 							     bias_end, bias_rem + ps, ps,
404 							     &allocated,
405 							     GPU_BUDDY_RANGE_ALLOCATION),
406 				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
407 				      bias_start, bias_end, bias_rem + ps, ps);
408 
409 		if (bias_rem) {
410 			/* random fill of the remainder */
411 			size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
412 			size = max(size, ps);
413 
414 			KUNIT_ASSERT_FALSE_MSG(test,
415 					       gpu_buddy_alloc_blocks(&mm, bias_start,
416 								      bias_end, size, ps,
417 								      &allocated,
418 								      GPU_BUDDY_RANGE_ALLOCATION),
419 					       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
420 					       bias_start, bias_end, size, ps);
421 			/*
422 			 * Intentionally allow some space to be left
423 			 * unallocated, and ideally not always on the bias
424 			 * boundaries.
425 			 */
426 			gpu_buddy_free_list(&mm, &tmp, 0);
427 		} else {
428 			list_splice_tail(&tmp, &allocated);
429 		}
430 	}
431 
432 	kfree(order);
433 	gpu_buddy_free_list(&mm, &allocated, 0);
434 	gpu_buddy_fini(&mm);
435 
436 	/*
437 	 * Something more free-form. Idea is to pick a random starting bias
438 	 * range within the address space and then start filling it up. Also
439 	 * randomly grow the bias range in both directions as we go along. This
440 	 * should give us bias start/end which is not always uniform like above,
441 	 * and in some cases will require the allocator to jump over already
442 	 * allocated nodes in the middle of the address space.
443 	 */
444 
445 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
446 			       "buddy_init failed\n");
447 
448 	bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
449 	bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
450 	bias_end = max(bias_end, bias_start + ps);
451 	bias_rem = bias_end - bias_start;
452 
453 	do {
454 		u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
455 
456 		KUNIT_ASSERT_FALSE_MSG(test,
457 				       gpu_buddy_alloc_blocks(&mm, bias_start,
458 							      bias_end, size, ps,
459 							      &allocated,
460 							      GPU_BUDDY_RANGE_ALLOCATION),
461 				       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
462 				       bias_start, bias_end, size, ps);
463 		bias_rem -= size;
464 
465 		/*
466 		 * Try to randomly grow the bias range in both directions, or
467 		 * only one, or perhaps don't grow at all.
468 		 */
469 		do {
470 			u32 old_bias_start = bias_start;
471 			u32 old_bias_end = bias_end;
472 
473 			if (bias_start)
474 				bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps);
475 			if (bias_end != mm_size)
476 				bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps);
477 
478 			bias_rem += old_bias_start - bias_start;
479 			bias_rem += bias_end - old_bias_end;
480 		} while (!bias_rem && (bias_start || bias_end != mm_size));
481 	} while (bias_rem);
482 
483 	KUNIT_ASSERT_EQ(test, bias_start, 0);
484 	KUNIT_ASSERT_EQ(test, bias_end, mm_size);
485 	KUNIT_ASSERT_TRUE_MSG(test,
486 			      gpu_buddy_alloc_blocks(&mm, bias_start, bias_end,
487 						     ps, ps,
488 						     &allocated,
489 						     GPU_BUDDY_RANGE_ALLOCATION),
490 			      "buddy_alloc passed with bias(%x-%x), size=%u\n",
491 			      bias_start, bias_end, ps);
492 
493 	gpu_buddy_free_list(&mm, &allocated, 0);
494 	gpu_buddy_fini(&mm);
495 
496 	/*
497 	 * Allocate cleared blocks in the bias range when the GPU buddy's clear avail is
498 	 * zero. This will validate the bias range allocation in scenarios like system boot
499 	 * when no cleared blocks are available and exercise the fallback path too. The resulting
500 	 * blocks should always be dirty.
501 	 */
502 
503 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
504 			       "buddy_init failed\n");
505 
506 	bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
507 	bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
508 	bias_end = max(bias_end, bias_start + ps);
509 	bias_rem = bias_end - bias_start;
510 
511 	flags = GPU_BUDDY_CLEAR_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION;
512 	size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
513 
514 	KUNIT_ASSERT_FALSE_MSG(test,
515 			       gpu_buddy_alloc_blocks(&mm, bias_start,
516 						      bias_end, size, ps,
517 						      &allocated,
518 						      flags),
519 			       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
520 			       bias_start, bias_end, size, ps);
521 
522 	list_for_each_entry(block, &allocated, link)
523 		KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
524 
525 	gpu_buddy_free_list(&mm, &allocated, 0);
526 	gpu_buddy_fini(&mm);
527 }
528 
529 static void gpu_test_buddy_alloc_range(struct kunit *test)
530 {
531 	GPU_RND_STATE(prng, random_seed);
532 	struct gpu_buddy_block *block;
533 	struct gpu_buddy mm;
534 	u32 mm_size, total;
535 	LIST_HEAD(blocks);
536 	LIST_HEAD(tmp);
537 	u32 ps = SZ_4K;
538 	int ret;
539 
540 	mm_size = SZ_16M;
541 
542 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
543 			       "buddy_init failed\n");
544 
545 	/*
546 	 * Basic exact-range allocation.
547 	 * Allocate the entire mm as one exact range (start + size == end).
548 	 * This is the simplest case exercising __gpu_buddy_alloc_range.
549 	 */
550 	ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size, mm_size, ps, &blocks, 0);
551 	KUNIT_ASSERT_EQ_MSG(test, ret, 0,
552 			    "exact-range alloc of full mm failed\n");
553 
554 	total = 0;
555 	list_for_each_entry(block, &blocks, link) {
556 		u64 offset = gpu_buddy_block_offset(block);
557 		u64 bsize = gpu_buddy_block_size(&mm, block);
558 
559 		KUNIT_EXPECT_TRUE_MSG(test, offset + bsize <= (u64)mm_size,
560 				      "block [%llx, %llx) outside mm\n", offset, offset + bsize);
561 		total += (u32)bsize;
562 	}
563 	KUNIT_EXPECT_EQ(test, total, mm_size);
564 	KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
565 
566 	/* Full mm should be exhausted */
567 	ret = gpu_buddy_alloc_blocks(&mm, 0, ps, ps, ps, &tmp, 0);
568 	KUNIT_EXPECT_NE_MSG(test, ret, 0, "alloc should fail when mm is full\n");
569 
570 	gpu_buddy_free_list(&mm, &blocks, 0);
571 	KUNIT_EXPECT_EQ(test, mm.avail, (u64)mm_size);
572 	gpu_buddy_fini(&mm);
573 
574 	/*
575 	 * Exact-range allocation of sub-ranges.
576 	 * Split the mm into four equal quarters and allocate each as an exact
577 	 * range. Validates splitting and non-overlapping exact allocations.
578 	 */
579 	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
580 
581 	{
582 		u32 quarter = mm_size / 4;
583 		int i;
584 
585 		for (i = 0; i < 4; i++) {
586 			u32 start = i * quarter;
587 			u32 end = start + quarter;
588 
589 			ret = gpu_buddy_alloc_blocks(&mm, start, end, quarter, ps, &blocks, 0);
590 			KUNIT_ASSERT_EQ_MSG(test, ret, 0,
591 					    "exact-range alloc quarter %d [%x, %x) failed\n",
592 					    i, start, end);
593 		}
594 		KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
595 		gpu_buddy_free_list(&mm, &blocks, 0);
596 	}
597 
598 	gpu_buddy_fini(&mm);
599 
600 	/*
601 	 * Minimum chunk-size exact range at various offsets.
602 	 * Allocate single-page exact ranges at the start, middle and end.
603 	 */
604 	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
605 
606 	ret = gpu_buddy_alloc_blocks(&mm, 0, ps, ps, ps, &blocks, 0);
607 	KUNIT_ASSERT_EQ(test, ret, 0);
608 
609 	ret = gpu_buddy_alloc_blocks(&mm, mm_size / 2, mm_size / 2 + ps, ps, ps, &blocks, 0);
610 	KUNIT_ASSERT_EQ(test, ret, 0);
611 
612 	ret = gpu_buddy_alloc_blocks(&mm, mm_size - ps, mm_size, ps, ps, &blocks, 0);
613 	KUNIT_ASSERT_EQ(test, ret, 0);
614 
615 	total = 0;
616 	list_for_each_entry(block, &blocks, link)
617 		total += (u32)gpu_buddy_block_size(&mm, block);
618 	KUNIT_EXPECT_EQ(test, total, 3 * ps);
619 
620 	gpu_buddy_free_list(&mm, &blocks, 0);
621 	gpu_buddy_fini(&mm);
622 
623 	/*
624 	 * Non power-of-two mm size (multiple roots).
625 	 * Exact-range allocations that span root boundaries must still work.
626 	 */
627 	mm_size = SZ_4M + SZ_2M + SZ_1M; /* 7 MiB, three roots */
628 
629 	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
630 	KUNIT_EXPECT_GT(test, mm.n_roots, 1U);
631 
632 	/* Allocate first 4M root exactly */
633 	ret = gpu_buddy_alloc_blocks(&mm, 0, SZ_4M, SZ_4M, ps, &blocks, 0);
634 	KUNIT_ASSERT_EQ(test, ret, 0);
635 
636 	/* Allocate second root (4M-6M) exactly */
637 	ret = gpu_buddy_alloc_blocks(&mm, SZ_4M, SZ_4M + SZ_2M, SZ_2M, ps, &blocks, 0);
638 	KUNIT_ASSERT_EQ(test, ret, 0);
639 
640 	/* Allocate third root (6M-7M) exactly */
641 	ret = gpu_buddy_alloc_blocks(&mm, SZ_4M + SZ_2M, mm_size, SZ_1M, ps, &blocks, 0);
642 	KUNIT_ASSERT_EQ(test, ret, 0);
643 
644 	KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
645 	gpu_buddy_free_list(&mm, &blocks, 0);
646 
647 	/* Cross-root exact-range: the entire non-pot mm */
648 	ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size, mm_size, ps, &blocks, 0);
649 	KUNIT_ASSERT_EQ(test, ret, 0);
650 	KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
651 
652 	gpu_buddy_free_list(&mm, &blocks, 0);
653 	gpu_buddy_fini(&mm);
654 
655 	/*
656 	 * Randomized exact-range allocations.
657 	 * Divide the mm into N random-sized, contiguous, page-aligned slices
658 	 * and allocate each as an exact range in random order.
659 	 */
660 	mm_size = SZ_16M;
661 	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
662 
663 	{
664 #define N_RAND_RANGES 16
665 		u32 ranges[N_RAND_RANGES + 1]; /* boundaries */
666 		u32 order_arr[N_RAND_RANGES];
667 		u32 remaining = mm_size;
668 		int i;
669 
670 		ranges[0] = 0;
671 		for (i = 0; i < N_RAND_RANGES - 1; i++) {
672 			u32 max_chunk = remaining - (N_RAND_RANGES - 1 - i) * ps;
673 			u32 sz = max(round_up(prandom_u32_state(&prng) % max_chunk, ps), ps);
674 
675 			ranges[i + 1] = ranges[i] + sz;
676 			remaining -= sz;
677 		}
678 		ranges[N_RAND_RANGES] = mm_size;
679 
680 		/* Create a random order */
681 		for (i = 0; i < N_RAND_RANGES; i++)
682 			order_arr[i] = i;
683 		for (i = N_RAND_RANGES - 1; i > 0; i--) {
684 			u32 j = prandom_u32_state(&prng) % (i + 1);
685 			u32 tmp_val = order_arr[i];
686 
687 			order_arr[i] = order_arr[j];
688 			order_arr[j] = tmp_val;
689 		}
690 
691 		for (i = 0; i < N_RAND_RANGES; i++) {
692 			u32 idx = order_arr[i];
693 			u32 start = ranges[idx];
694 			u32 end = ranges[idx + 1];
695 			u32 sz = end - start;
696 
697 			ret = gpu_buddy_alloc_blocks(&mm, start, end, sz, ps, &blocks, 0);
698 			KUNIT_ASSERT_EQ_MSG(test, ret, 0,
699 					    "random exact-range [%x, %x) sz=%x failed\n",
700 					    start, end, sz);
701 		}
702 
703 		KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
704 		gpu_buddy_free_list(&mm, &blocks, 0);
705 #undef N_RAND_RANGES
706 	}
707 
708 	gpu_buddy_fini(&mm);
709 
710 	/*
711 	 * Negative case - partially allocated range.
712 	 * Allocate the first half, then try to exact-range allocate the full
713 	 * mm. This must fail because the first half is already occupied.
714 	 */
715 	mm_size = SZ_16M;
716 	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
717 
718 	ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size / 2, mm_size / 2, ps, &blocks, 0);
719 	KUNIT_ASSERT_EQ(test, ret, 0);
720 
721 	ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size, mm_size, ps, &tmp, 0);
722 	KUNIT_EXPECT_NE_MSG(test, ret, 0,
723 			    "exact-range alloc should fail when range is partially used\n");
724 
725 	/* Also try the already-occupied sub-range directly */
726 	ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size / 2, mm_size / 2, ps, &tmp, 0);
727 	KUNIT_EXPECT_NE_MSG(test, ret, 0,
728 			    "double alloc of same exact range should fail\n");
729 
730 	/* The free second half should still be allocatable */
731 	ret = gpu_buddy_alloc_blocks(&mm, mm_size / 2, mm_size, mm_size / 2, ps, &blocks, 0);
732 	KUNIT_ASSERT_EQ(test, ret, 0);
733 
734 	KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
735 	gpu_buddy_free_list(&mm, &blocks, 0);
736 	gpu_buddy_fini(&mm);
737 
738 	/*
739 	 * Negative case - checkerboard partial allocation.
740 	 * Allocate every other page-sized chunk in a small mm, then try to
741 	 * exact-range allocate a range covering two pages (one allocated, one
742 	 * free). This must fail.
743 	 */
744 	mm_size = SZ_64K;
745 	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
746 
747 	{
748 		u32 off;
749 
750 		for (off = 0; off < mm_size; off += 2 * ps) {
751 			ret = gpu_buddy_alloc_blocks(&mm, off, off + ps, ps, ps, &blocks, 0);
752 			KUNIT_ASSERT_EQ(test, ret, 0);
753 		}
754 
755 		/* Try exact range over a pair [allocated, free] */
756 		ret = gpu_buddy_alloc_blocks(&mm, 0, 2 * ps, 2 * ps, ps, &tmp, 0);
757 		KUNIT_EXPECT_NE_MSG(test, ret, 0,
758 				    "exact-range over partially allocated pair should fail\n");
759 
760 		/* The free pages individually should still work */
761 		ret = gpu_buddy_alloc_blocks(&mm, ps, 2 * ps, ps, ps, &blocks, 0);
762 		KUNIT_ASSERT_EQ(test, ret, 0);
763 
764 		gpu_buddy_free_list(&mm, &blocks, 0);
765 	}
766 
767 	gpu_buddy_fini(&mm);
768 
769 	/* Negative case - misaligned start/end/size */
770 	mm_size = SZ_16M;
771 	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
772 
773 	/* start not aligned to chunk_size */
774 	ret = gpu_buddy_alloc_blocks(&mm, ps / 2, ps / 2 + ps, ps, ps, &tmp, 0);
775 	KUNIT_EXPECT_NE(test, ret, 0);
776 
777 	/* size not aligned */
778 	ret = gpu_buddy_alloc_blocks(&mm, 0, ps + 1, ps + 1, ps, &tmp, 0);
779 	KUNIT_EXPECT_NE(test, ret, 0);
780 
781 	/* end exceeds mm size */
782 	ret = gpu_buddy_alloc_blocks(&mm, mm_size, mm_size + ps, ps, ps, &tmp, 0);
783 	KUNIT_EXPECT_NE(test, ret, 0);
784 
785 	gpu_buddy_fini(&mm);
786 
787 	/*
788 	 * Free and re-allocate the same exact range.
789 	 * This exercises merge-on-free followed by exact-range re-split.
790 	 */
791 	mm_size = SZ_16M;
792 	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
793 
794 	{
795 		int i;
796 
797 		for (i = 0; i < 5; i++) {
798 			ret = gpu_buddy_alloc_blocks(&mm, SZ_4M, SZ_4M + SZ_2M,
799 						     SZ_2M, ps, &blocks, 0);
800 			KUNIT_ASSERT_EQ_MSG(test, ret, 0,
801 					    "re-alloc iteration %d failed\n", i);
802 
803 			total = 0;
804 			list_for_each_entry(block, &blocks, link) {
805 				u64 offset = gpu_buddy_block_offset(block);
806 				u64 bsize = gpu_buddy_block_size(&mm, block);
807 
808 				KUNIT_EXPECT_GE(test, offset, (u64)SZ_4M);
809 				KUNIT_EXPECT_LE(test, offset + bsize, (u64)(SZ_4M + SZ_2M));
810 				total += (u32)bsize;
811 			}
812 			KUNIT_EXPECT_EQ(test, total, SZ_2M);
813 
814 			gpu_buddy_free_list(&mm, &blocks, 0);
815 		}
816 
817 		KUNIT_EXPECT_EQ(test, mm.avail, (u64)mm_size);
818 	}
819 
820 	gpu_buddy_fini(&mm);
821 
822 	/*
823 	 * Various power-of-two exact ranges within a large mm.
824 	 * Allocate non-overlapping power-of-two exact ranges at their natural
825 	 * alignment, validating that the allocator handles different orders.
826 	 */
827 	mm_size = SZ_16M;
828 	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
829 
830 	/* Allocate 4K at offset 0 */
831 	ret = gpu_buddy_alloc_blocks(&mm, 0, SZ_4K, SZ_4K, ps, &blocks, 0);
832 	KUNIT_ASSERT_EQ(test, ret, 0);
833 
834 	/* Allocate 64K at offset 64K */
835 	ret = gpu_buddy_alloc_blocks(&mm, SZ_64K, SZ_64K + SZ_64K, SZ_64K, ps, &blocks, 0);
836 	KUNIT_ASSERT_EQ(test, ret, 0);
837 
838 	/* Allocate 1M at offset 1M */
839 	ret = gpu_buddy_alloc_blocks(&mm, SZ_1M, SZ_1M + SZ_1M, SZ_1M, ps, &blocks, 0);
840 	KUNIT_ASSERT_EQ(test, ret, 0);
841 
842 	/* Allocate 4M at offset 4M */
843 	ret = gpu_buddy_alloc_blocks(&mm, SZ_4M, SZ_4M + SZ_4M, SZ_4M, ps, &blocks, 0);
844 	KUNIT_ASSERT_EQ(test, ret, 0);
845 
846 	total = 0;
847 	list_for_each_entry(block, &blocks, link)
848 		total += (u32)gpu_buddy_block_size(&mm, block);
849 	KUNIT_EXPECT_EQ(test, total, SZ_4K + SZ_64K + SZ_1M + SZ_4M);
850 
851 	gpu_buddy_free_list(&mm, &blocks, 0);
852 	gpu_buddy_fini(&mm);
853 }
854 
855 static void gpu_test_buddy_alloc_clear(struct kunit *test)
856 {
857 	unsigned long n_pages, total, i = 0;
858 	const unsigned long ps = SZ_4K;
859 	struct gpu_buddy_block *block;
860 	const int max_order = 12;
861 	LIST_HEAD(allocated);
862 	struct gpu_buddy mm;
863 	unsigned int order;
864 	u32 mm_size, size;
865 	LIST_HEAD(dirty);
866 	LIST_HEAD(clean);
867 
868 	mm_size = SZ_4K << max_order;
869 	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
870 
871 	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
872 
873 	/*
874 	 * Idea is to allocate and free some random portion of the address space,
875 	 * returning those pages as non-dirty and randomly alternate between
876 	 * requesting dirty and non-dirty pages (not going over the limit
877 	 * we freed as non-dirty), putting that into two separate lists.
878 	 * Loop over both lists at the end checking that the dirty list
879 	 * is indeed all dirty pages and vice versa. Free it all again,
880 	 * keeping the dirty/clear status.
881 	 */
882 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
883 							    5 * ps, ps, &allocated,
884 							    GPU_BUDDY_TOPDOWN_ALLOCATION),
885 				"buddy_alloc hit an error size=%lu\n", 5 * ps);
886 	gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
887 
888 	n_pages = 10;
889 	do {
890 		unsigned long flags;
891 		struct list_head *list;
892 		int slot = i % 2;
893 
894 		if (slot == 0) {
895 			list = &dirty;
896 			flags = 0;
897 		} else {
898 			list = &clean;
899 			flags = GPU_BUDDY_CLEAR_ALLOCATION;
900 		}
901 
902 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
903 								    ps, ps, list,
904 								    flags),
905 					"buddy_alloc hit an error size=%lu\n", ps);
906 	} while (++i < n_pages);
907 
908 	list_for_each_entry(block, &clean, link)
909 		KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), true);
910 
911 	list_for_each_entry(block, &dirty, link)
912 		KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
913 
914 	gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
915 
916 	/*
917 	 * Trying to go over the clear limit for some allocation.
918 	 * The allocation should never fail with reasonable page-size.
919 	 */
920 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
921 							    10 * ps, ps, &clean,
922 							    GPU_BUDDY_CLEAR_ALLOCATION),
923 				"buddy_alloc hit an error size=%lu\n", 10 * ps);
924 
925 	gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
926 	gpu_buddy_free_list(&mm, &dirty, 0);
927 	gpu_buddy_fini(&mm);
928 
929 	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
930 
931 	/*
932 	 * Create a new mm. Intentionally fragment the address space by creating
933 	 * two alternating lists. Free both lists, one as dirty the other as clean.
934 	 * Try to allocate double the previous size with matching min_page_size. The
935 	 * allocation should never fail as it calls the force_merge. Also check that
936 	 * the page is always dirty after force_merge. Free the page as dirty, then
937 	 * repeat the whole thing, increment the order until we hit the max_order.
938 	 */
939 
940 	i = 0;
941 	n_pages = mm_size / ps;
942 	do {
943 		struct list_head *list;
944 		int slot = i % 2;
945 
946 		if (slot == 0)
947 			list = &dirty;
948 		else
949 			list = &clean;
950 
951 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
952 								    ps, ps, list, 0),
953 					"buddy_alloc hit an error size=%lu\n", ps);
954 	} while (++i < n_pages);
955 
956 	gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
957 	gpu_buddy_free_list(&mm, &dirty, 0);
958 
959 	order = 1;
960 	do {
961 		size = SZ_4K << order;
962 
963 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
964 								    size, size, &allocated,
965 								    GPU_BUDDY_CLEAR_ALLOCATION),
966 					"buddy_alloc hit an error size=%u\n", size);
967 		total = 0;
968 		list_for_each_entry(block, &allocated, link) {
969 			if (size != mm_size)
970 				KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
971 			total += gpu_buddy_block_size(&mm, block);
972 		}
973 		KUNIT_EXPECT_EQ(test, total, size);
974 
975 		gpu_buddy_free_list(&mm, &allocated, 0);
976 	} while (++order <= max_order);
977 
978 	gpu_buddy_fini(&mm);
979 
980 	/*
981 	 * Create a new mm with a non power-of-two size. Allocate a random size from each
982 	 * root, free as cleared and then call fini. This will ensure the multi-root
983 	 * force merge during fini.
984 	 */
985 	mm_size = (SZ_4K << max_order) + (SZ_4K << (max_order - 2));
986 
987 	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
988 	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
989 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
990 							    4 * ps, ps, &allocated,
991 							    GPU_BUDDY_RANGE_ALLOCATION),
992 				"buddy_alloc hit an error size=%lu\n", 4 * ps);
993 	gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
994 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
995 							    2 * ps, ps, &allocated,
996 							    GPU_BUDDY_CLEAR_ALLOCATION),
997 				"buddy_alloc hit an error size=%lu\n", 2 * ps);
998 	gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
999 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size,
1000 							    ps, ps, &allocated,
1001 							    GPU_BUDDY_RANGE_ALLOCATION),
1002 				"buddy_alloc hit an error size=%lu\n", ps);
1003 	gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
1004 	gpu_buddy_fini(&mm);
1005 }
1006 
1007 static void gpu_test_buddy_alloc_contiguous(struct kunit *test)
1008 {
1009 	const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K;
1010 	unsigned long i, n_pages, total;
1011 	struct gpu_buddy_block *block;
1012 	struct gpu_buddy mm;
1013 	LIST_HEAD(left);
1014 	LIST_HEAD(middle);
1015 	LIST_HEAD(right);
1016 	LIST_HEAD(allocated);
1017 
1018 	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
1019 
1020 	/*
1021 	 * Idea is to fragment the address space by alternating block
1022 	 * allocations between three different lists; one for left, middle and
1023 	 * right. We can then free a list to simulate fragmentation. In
1024 	 * particular we want to exercise the GPU_BUDDY_CONTIGUOUS_ALLOCATION,
1025 	 * including the try_harder path.
1026 	 */
1027 
1028 	i = 0;
1029 	n_pages = mm_size / ps;
1030 	do {
1031 		struct list_head *list;
1032 		int slot = i % 3;
1033 
1034 		if (slot == 0)
1035 			list = &left;
1036 		else if (slot == 1)
1037 			list = &middle;
1038 		else
1039 			list = &right;
1040 		KUNIT_ASSERT_FALSE_MSG(test,
1041 				       gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1042 							      ps, ps, list, 0),
1043 				       "buddy_alloc hit an error size=%lu\n",
1044 				       ps);
1045 	} while (++i < n_pages);
1046 
1047 	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1048 							   3 * ps, ps, &allocated,
1049 							   GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1050 			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
1051 
1052 	gpu_buddy_free_list(&mm, &middle, 0);
1053 	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1054 							   3 * ps, ps, &allocated,
1055 							   GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1056 			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
1057 	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1058 							   2 * ps, ps, &allocated,
1059 							   GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1060 			       "buddy_alloc didn't error size=%lu\n", 2 * ps);
1061 
1062 	gpu_buddy_free_list(&mm, &right, 0);
1063 	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1064 							   3 * ps, ps, &allocated,
1065 							   GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1066 			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
1067 	/*
1068 	 * At this point we should have enough contiguous space for 2 blocks,
1069 	 * however they are never buddies (since we freed middle and right) so
1070 	 * will require the try_harder logic to find them.
1071 	 */
1072 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1073 							    2 * ps, ps, &allocated,
1074 							    GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1075 			       "buddy_alloc hit an error size=%lu\n", 2 * ps);
1076 
1077 	gpu_buddy_free_list(&mm, &left, 0);
1078 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1079 							    3 * ps, ps, &allocated,
1080 							    GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1081 			       "buddy_alloc hit an error size=%lu\n", 3 * ps);
1082 
1083 	total = 0;
1084 	list_for_each_entry(block, &allocated, link)
1085 		total += gpu_buddy_block_size(&mm, block);
1086 
1087 	KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
1088 
1089 	gpu_buddy_free_list(&mm, &allocated, 0);
1090 	gpu_buddy_fini(&mm);
1091 }
1092 
1093 static void gpu_test_buddy_alloc_pathological(struct kunit *test)
1094 {
1095 	u64 mm_size, size, start = 0;
1096 	struct gpu_buddy_block *block;
1097 	const int max_order = 3;
1098 	unsigned long flags = 0;
1099 	int order, top;
1100 	struct gpu_buddy mm;
1101 	LIST_HEAD(blocks);
1102 	LIST_HEAD(holes);
1103 	LIST_HEAD(tmp);
1104 
1105 	/*
1106 	 * Create a pot-sized mm, then allocate one of each possible
1107 	 * order within. This should leave the mm with exactly one
1108 	 * page left. Free the largest block, then whittle down again.
1109 	 * Eventually we will have a fully 50% fragmented mm.
1110 	 */
1111 
1112 	mm_size = SZ_4K << max_order;
1113 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
1114 			       "buddy_init failed\n");
1115 
1116 	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
1117 
1118 	for (top = max_order; top; top--) {
1119 		/* Make room by freeing the largest allocated block */
1120 		block = list_first_entry_or_null(&blocks, typeof(*block), link);
1121 		if (block) {
1122 			list_del(&block->link);
1123 			gpu_buddy_free_block(&mm, block);
1124 		}
1125 
1126 		for (order = top; order--;) {
1127 			size = get_size(order, mm.chunk_size);
1128 			KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start,
1129 									    mm_size, size, size,
1130 										&tmp, flags),
1131 					"buddy_alloc hit -ENOMEM with order=%d, top=%d\n",
1132 					order, top);
1133 
1134 			block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1135 			KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1136 
1137 			list_move_tail(&block->link, &blocks);
1138 		}
1139 
1140 		/* There should be one final page for this sub-allocation */
1141 		size = get_size(0, mm.chunk_size);
1142 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1143 								    size, size, &tmp, flags),
1144 							   "buddy_alloc hit -ENOMEM for hole\n");
1145 
1146 		block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1147 		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1148 
1149 		list_move_tail(&block->link, &holes);
1150 
1151 		size = get_size(top, mm.chunk_size);
1152 		KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1153 								   size, size, &tmp, flags),
1154 							  "buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!",
1155 							  top, max_order);
1156 	}
1157 
1158 	gpu_buddy_free_list(&mm, &holes, 0);
1159 
1160 	/* Nothing larger than blocks of chunk_size now available */
1161 	for (order = 1; order <= max_order; order++) {
1162 		size = get_size(order, mm.chunk_size);
1163 		KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1164 								   size, size, &tmp, flags),
1165 							  "buddy_alloc unexpectedly succeeded at order %d, it should be full!",
1166 							  order);
1167 	}
1168 
1169 	list_splice_tail(&holes, &blocks);
1170 	gpu_buddy_free_list(&mm, &blocks, 0);
1171 	gpu_buddy_fini(&mm);
1172 }
1173 
1174 static void gpu_test_buddy_alloc_pessimistic(struct kunit *test)
1175 {
1176 	u64 mm_size, size, start = 0;
1177 	struct gpu_buddy_block *block, *bn;
1178 	const unsigned int max_order = 16;
1179 	unsigned long flags = 0;
1180 	struct gpu_buddy mm;
1181 	unsigned int order;
1182 	LIST_HEAD(blocks);
1183 	LIST_HEAD(tmp);
1184 
1185 	/*
1186 	 * Create a pot-sized mm, then allocate one of each possible
1187 	 * order within. This should leave the mm with exactly one
1188 	 * page left.
1189 	 */
1190 
1191 	mm_size = SZ_4K << max_order;
1192 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
1193 			       "buddy_init failed\n");
1194 
1195 	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
1196 
1197 	for (order = 0; order < max_order; order++) {
1198 		size = get_size(order, mm.chunk_size);
1199 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1200 								    size, size, &tmp, flags),
1201 							   "buddy_alloc hit -ENOMEM with order=%d\n",
1202 							   order);
1203 
1204 		block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1205 		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1206 
1207 		list_move_tail(&block->link, &blocks);
1208 	}
1209 
1210 	/* And now the last remaining block available */
1211 	size = get_size(0, mm.chunk_size);
1212 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1213 							    size, size, &tmp, flags),
1214 						   "buddy_alloc hit -ENOMEM on final alloc\n");
1215 
1216 	block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1217 	KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1218 
1219 	list_move_tail(&block->link, &blocks);
1220 
1221 	/* Should be completely full! */
1222 	for (order = max_order; order--;) {
1223 		size = get_size(order, mm.chunk_size);
1224 		KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1225 								   size, size, &tmp, flags),
1226 							  "buddy_alloc unexpectedly succeeded, it should be full!");
1227 	}
1228 
1229 	block = list_last_entry(&blocks, typeof(*block), link);
1230 	list_del(&block->link);
1231 	gpu_buddy_free_block(&mm, block);
1232 
1233 	/* As we free in increasing size, we make available larger blocks */
1234 	order = 1;
1235 	list_for_each_entry_safe(block, bn, &blocks, link) {
1236 		list_del(&block->link);
1237 		gpu_buddy_free_block(&mm, block);
1238 
1239 		size = get_size(order, mm.chunk_size);
1240 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1241 								    size, size, &tmp, flags),
1242 							   "buddy_alloc hit -ENOMEM with order=%d\n",
1243 							   order);
1244 
1245 		block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1246 		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1247 
1248 		list_del(&block->link);
1249 		gpu_buddy_free_block(&mm, block);
1250 		order++;
1251 	}
1252 
1253 	/* To confirm, now the whole mm should be available */
1254 	size = get_size(max_order, mm.chunk_size);
1255 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1256 							    size, size, &tmp, flags),
1257 						   "buddy_alloc (realloc) hit -ENOMEM with order=%d\n",
1258 						   max_order);
1259 
1260 	block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1261 	KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1262 
1263 	list_del(&block->link);
1264 	gpu_buddy_free_block(&mm, block);
1265 	gpu_buddy_free_list(&mm, &blocks, 0);
1266 	gpu_buddy_fini(&mm);
1267 }
1268 
1269 static void gpu_test_buddy_alloc_optimistic(struct kunit *test)
1270 {
1271 	u64 mm_size, size, start = 0;
1272 	struct gpu_buddy_block *block;
1273 	unsigned long flags = 0;
1274 	const int max_order = 16;
1275 	struct gpu_buddy mm;
1276 	LIST_HEAD(blocks);
1277 	LIST_HEAD(tmp);
1278 	int order;
1279 
1280 	/*
1281 	 * Create a mm with one block of each order available, and
1282 	 * try to allocate them all.
1283 	 */
1284 
1285 	mm_size = SZ_4K * ((1 << (max_order + 1)) - 1);
1286 
1287 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
1288 			       "buddy_init failed\n");
1289 
1290 	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
1291 
1292 	for (order = 0; order <= max_order; order++) {
1293 		size = get_size(order, mm.chunk_size);
1294 		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1295 								    size, size, &tmp, flags),
1296 							   "buddy_alloc hit -ENOMEM with order=%d\n",
1297 							   order);
1298 
1299 		block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1300 		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1301 
1302 		list_move_tail(&block->link, &blocks);
1303 	}
1304 
1305 	/* Should be completely full! */
1306 	size = get_size(0, mm.chunk_size);
1307 	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1308 							   size, size, &tmp, flags),
1309 						  "buddy_alloc unexpectedly succeeded, it should be full!");
1310 
1311 	gpu_buddy_free_list(&mm, &blocks, 0);
1312 	gpu_buddy_fini(&mm);
1313 }
1314 
1315 static void gpu_test_buddy_alloc_limit(struct kunit *test)
1316 {
1317 	u64 size = U64_MAX, start = 0;
1318 	struct gpu_buddy_block *block;
1319 	unsigned long flags = 0;
1320 	LIST_HEAD(allocated);
1321 	struct gpu_buddy mm;
1322 
1323 	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, size, SZ_4K));
1324 
1325 	KUNIT_EXPECT_EQ_MSG(test, mm.max_order, GPU_BUDDY_MAX_ORDER,
1326 			    "mm.max_order(%d) != %d\n", mm.max_order,
1327 						GPU_BUDDY_MAX_ORDER);
1328 
1329 	size = mm.chunk_size << mm.max_order;
1330 	KUNIT_EXPECT_FALSE(test, gpu_buddy_alloc_blocks(&mm, start, size, size,
1331 							mm.chunk_size, &allocated, flags));
1332 
1333 	block = list_first_entry_or_null(&allocated, struct gpu_buddy_block, link);
1334 	KUNIT_EXPECT_TRUE(test, block);
1335 
1336 	KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_order(block), mm.max_order,
1337 			    "block order(%d) != %d\n",
1338 						gpu_buddy_block_order(block), mm.max_order);
1339 
1340 	KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_size(&mm, block),
1341 			    BIT_ULL(mm.max_order) * mm.chunk_size,
1342 						"block size(%llu) != %llu\n",
1343 						gpu_buddy_block_size(&mm, block),
1344 						BIT_ULL(mm.max_order) * mm.chunk_size);
1345 
1346 	gpu_buddy_free_list(&mm, &allocated, 0);
1347 	gpu_buddy_fini(&mm);
1348 }
1349 
1350 static void gpu_test_buddy_alloc_exceeds_max_order(struct kunit *test)
1351 {
1352 	u64 mm_size = SZ_8G + SZ_2G, size = SZ_8G + SZ_1G, min_block_size = SZ_8G;
1353 	struct gpu_buddy mm;
1354 	LIST_HEAD(blocks);
1355 	int err;
1356 
1357 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
1358 			       "buddy_init failed\n");
1359 
1360 	/* CONTIGUOUS allocation should succeed via try_harder fallback */
1361 	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, size,
1362 							    SZ_4K, &blocks,
1363 							    GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1364 			       "buddy_alloc hit an error size=%llu\n", size);
1365 	gpu_buddy_free_list(&mm, &blocks, 0);
1366 
1367 	/* Non-CONTIGUOUS with large min_block_size should return -EINVAL */
1368 	err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0);
1369 	KUNIT_EXPECT_EQ(test, err, -EINVAL);
1370 
1371 	/* Non-CONTIGUOUS + RANGE with large min_block_size should return -EINVAL */
1372 	err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks,
1373 				     GPU_BUDDY_RANGE_ALLOCATION);
1374 	KUNIT_EXPECT_EQ(test, err, -EINVAL);
1375 
1376 	/* CONTIGUOUS + RANGE should return -EINVAL (no try_harder for RANGE) */
1377 	err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks,
1378 				     GPU_BUDDY_CONTIGUOUS_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION);
1379 	KUNIT_EXPECT_EQ(test, err, -EINVAL);
1380 
1381 	gpu_buddy_fini(&mm);
1382 }
1383 
1384 static int gpu_buddy_suite_init(struct kunit_suite *suite)
1385 {
1386 	while (!random_seed)
1387 		random_seed = get_random_u32();
1388 
1389 	kunit_info(suite, "Testing GPU buddy manager, with random_seed=0x%x\n",
1390 		   random_seed);
1391 
1392 	return 0;
1393 }
1394 
1395 static struct kunit_case gpu_buddy_tests[] = {
1396 	KUNIT_CASE(gpu_test_buddy_alloc_limit),
1397 	KUNIT_CASE(gpu_test_buddy_alloc_optimistic),
1398 	KUNIT_CASE(gpu_test_buddy_alloc_pessimistic),
1399 	KUNIT_CASE(gpu_test_buddy_alloc_pathological),
1400 	KUNIT_CASE(gpu_test_buddy_alloc_contiguous),
1401 	KUNIT_CASE(gpu_test_buddy_alloc_clear),
1402 	KUNIT_CASE(gpu_test_buddy_alloc_range),
1403 	KUNIT_CASE(gpu_test_buddy_alloc_range_bias),
1404 	KUNIT_CASE_SLOW(gpu_test_buddy_fragmentation_performance),
1405 	KUNIT_CASE(gpu_test_buddy_alloc_exceeds_max_order),
1406 	KUNIT_CASE(gpu_test_buddy_offset_aligned_allocation),
1407 	KUNIT_CASE(gpu_test_buddy_subtree_offset_alignment_stress),
1408 	{}
1409 };
1410 
1411 static struct kunit_suite gpu_buddy_test_suite = {
1412 	.name = "gpu_buddy",
1413 	.suite_init = gpu_buddy_suite_init,
1414 	.test_cases = gpu_buddy_tests,
1415 };
1416 
1417 kunit_test_suite(gpu_buddy_test_suite);
1418 
1419 MODULE_AUTHOR("Intel Corporation");
1420 MODULE_DESCRIPTION("Kunit test for gpu_buddy functions");
1421 MODULE_LICENSE("GPL");
1422