1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 * Copyright © 2022 Maíra Canal <mairacanal@riseup.net> 5 */ 6 7 #include <kunit/test.h> 8 9 #include <linux/prime_numbers.h> 10 #include <linux/sched/signal.h> 11 #include <linux/sizes.h> 12 13 #include <linux/gpu_buddy.h> 14 15 #include "gpu_random.h" 16 17 static unsigned int random_seed; 18 19 static inline u64 get_size(int order, u64 chunk_size) 20 { 21 return (1 << order) * chunk_size; 22 } 23 24 static void gpu_test_buddy_subtree_offset_alignment_stress(struct kunit *test) 25 { 26 struct gpu_buddy_block *block; 27 struct rb_node *node = NULL; 28 const u64 mm_size = SZ_2M; 29 const u64 alignments[] = { 30 SZ_1M, 31 SZ_512K, 32 SZ_256K, 33 SZ_128K, 34 SZ_64K, 35 SZ_32K, 36 SZ_16K, 37 SZ_8K, 38 }; 39 struct list_head allocated[ARRAY_SIZE(alignments)]; 40 unsigned int i, max_subtree_align = 0; 41 int ret, tree, order; 42 struct gpu_buddy mm; 43 44 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 45 "buddy_init failed\n"); 46 47 for (i = 0; i < ARRAY_SIZE(allocated); i++) 48 INIT_LIST_HEAD(&allocated[i]); 49 50 /* 51 * Exercise subtree_max_alignment tracking by allocating blocks with descending 52 * alignment constraints and freeing them in reverse order. This verifies that 53 * free-tree augmentation correctly propagates the maximum offset alignment 54 * present in each subtree at every stage. 55 */ 56 57 for (i = 0; i < ARRAY_SIZE(alignments); i++) { 58 struct gpu_buddy_block *root = NULL; 59 unsigned int expected; 60 u64 align; 61 62 align = alignments[i]; 63 expected = ilog2(align) - 1; 64 65 for (;;) { 66 ret = gpu_buddy_alloc_blocks(&mm, 67 0, mm_size, 68 SZ_4K, align, 69 &allocated[i], 70 0); 71 if (ret) 72 break; 73 74 block = list_last_entry(&allocated[i], 75 struct gpu_buddy_block, 76 link); 77 KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), align)); 78 } 79 80 for (order = mm.max_order; order >= 0 && !root; order--) { 81 for (tree = 0; tree < 2; tree++) { 82 node = mm.free_trees[tree][order].rb_node; 83 if (node) { 84 root = container_of(node, 85 struct gpu_buddy_block, 86 rb); 87 break; 88 } 89 } 90 } 91 92 KUNIT_ASSERT_NOT_NULL(test, root); 93 KUNIT_EXPECT_EQ(test, root->subtree_max_alignment, expected); 94 } 95 96 for (i = ARRAY_SIZE(alignments); i-- > 0; ) { 97 gpu_buddy_free_list(&mm, &allocated[i], 0); 98 99 for (order = 0; order <= mm.max_order; order++) { 100 for (tree = 0; tree < 2; tree++) { 101 node = mm.free_trees[tree][order].rb_node; 102 if (!node) 103 continue; 104 105 block = container_of(node, struct gpu_buddy_block, rb); 106 max_subtree_align = max(max_subtree_align, 107 block->subtree_max_alignment); 108 } 109 } 110 111 KUNIT_EXPECT_GE(test, max_subtree_align, ilog2(alignments[i])); 112 } 113 114 gpu_buddy_fini(&mm); 115 } 116 117 static void gpu_test_buddy_offset_aligned_allocation(struct kunit *test) 118 { 119 struct gpu_buddy_block *block, *tmp; 120 int num_blocks, i, count = 0; 121 LIST_HEAD(allocated); 122 struct gpu_buddy mm; 123 u64 mm_size = SZ_4M; 124 LIST_HEAD(freed); 125 126 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 127 "buddy_init failed\n"); 128 129 num_blocks = mm_size / SZ_256K; 130 /* 131 * Allocate multiple sizes under a fixed offset alignment. 132 * Ensures alignment handling is independent of allocation size and 133 * exercises subtree max-alignment pruning for small requests. 134 */ 135 for (i = 0; i < num_blocks; i++) 136 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_256K, 137 &allocated, 0), 138 "buddy_alloc hit an error size=%u\n", SZ_8K); 139 140 list_for_each_entry(block, &allocated, link) { 141 /* Ensure the allocated block uses the expected 8 KB size */ 142 KUNIT_EXPECT_EQ(test, gpu_buddy_block_size(&mm, block), SZ_8K); 143 /* Ensure the block starts at a 256 KB-aligned offset for proper alignment */ 144 KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), SZ_256K)); 145 } 146 gpu_buddy_free_list(&mm, &allocated, 0); 147 148 for (i = 0; i < num_blocks; i++) 149 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K, 150 &allocated, 0), 151 "buddy_alloc hit an error size=%u\n", SZ_16K); 152 153 list_for_each_entry(block, &allocated, link) { 154 /* Ensure the allocated block uses the expected 16 KB size */ 155 KUNIT_EXPECT_EQ(test, gpu_buddy_block_size(&mm, block), SZ_16K); 156 /* Ensure the block starts at a 256 KB-aligned offset for proper alignment */ 157 KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), SZ_256K)); 158 } 159 160 /* 161 * Free alternating aligned blocks to introduce fragmentation. 162 * Ensures offset-aligned allocations remain valid after frees and 163 * verifies subtree max-alignment metadata is correctly maintained. 164 */ 165 list_for_each_entry_safe(block, tmp, &allocated, link) { 166 if (count % 2 == 0) 167 list_move_tail(&block->link, &freed); 168 count++; 169 } 170 gpu_buddy_free_list(&mm, &freed, 0); 171 172 for (i = 0; i < num_blocks / 2; i++) 173 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K, 174 &allocated, 0), 175 "buddy_alloc hit an error size=%u\n", SZ_16K); 176 177 /* 178 * Allocate with offset alignment after all slots are used; must fail. 179 * Confirms that no aligned offsets remain. 180 */ 181 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K, 182 &allocated, 0), 183 "buddy_alloc hit an error size=%u\n", SZ_16K); 184 gpu_buddy_free_list(&mm, &allocated, 0); 185 gpu_buddy_fini(&mm); 186 } 187 188 static void gpu_test_buddy_fragmentation_performance(struct kunit *test) 189 { 190 struct gpu_buddy_block *block, *tmp; 191 int num_blocks, i, ret, count = 0; 192 LIST_HEAD(allocated_blocks); 193 unsigned long elapsed_ms; 194 LIST_HEAD(reverse_list); 195 LIST_HEAD(test_blocks); 196 LIST_HEAD(clear_list); 197 LIST_HEAD(dirty_list); 198 LIST_HEAD(free_list); 199 struct gpu_buddy mm; 200 u64 mm_size = SZ_4G; 201 ktime_t start, end; 202 203 /* 204 * Allocation under severe fragmentation 205 * 206 * Create severe fragmentation by allocating the entire 4 GiB address space 207 * as tiny 8 KiB blocks but forcing a 64 KiB alignment. The resulting pattern 208 * leaves many scattered holes. Split the allocations into two groups and 209 * return them with different flags to block coalescing, then repeatedly 210 * allocate and free 64 KiB blocks while timing the loop. This stresses how 211 * quickly the allocator can satisfy larger, aligned requests from a pool of 212 * highly fragmented space. 213 */ 214 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 215 "buddy_init failed\n"); 216 217 num_blocks = mm_size / SZ_64K; 218 219 start = ktime_get(); 220 /* Allocate with maximum fragmentation - 8K blocks with 64K alignment */ 221 for (i = 0; i < num_blocks; i++) 222 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, 223 &allocated_blocks, 0), 224 "buddy_alloc hit an error size=%u\n", SZ_8K); 225 226 list_for_each_entry_safe(block, tmp, &allocated_blocks, link) { 227 if (count % 4 == 0 || count % 4 == 3) 228 list_move_tail(&block->link, &clear_list); 229 else 230 list_move_tail(&block->link, &dirty_list); 231 count++; 232 } 233 234 /* Free with different flags to ensure no coalescing */ 235 gpu_buddy_free_list(&mm, &clear_list, GPU_BUDDY_CLEARED); 236 gpu_buddy_free_list(&mm, &dirty_list, 0); 237 238 for (i = 0; i < num_blocks; i++) 239 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K, 240 &test_blocks, 0), 241 "buddy_alloc hit an error size=%u\n", SZ_64K); 242 gpu_buddy_free_list(&mm, &test_blocks, 0); 243 244 end = ktime_get(); 245 elapsed_ms = ktime_to_ms(ktime_sub(end, start)); 246 247 kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms); 248 249 gpu_buddy_fini(&mm); 250 251 /* 252 * Reverse free order under fragmentation 253 * 254 * Construct a fragmented 4 GiB space by allocating every 8 KiB block with 255 * 64 KiB alignment, creating a dense scatter of small regions. Half of the 256 * blocks are selectively freed to form sparse gaps, while the remaining 257 * allocations are preserved, reordered in reverse, and released back with 258 * the cleared flag. This models a pathological reverse-ordered free pattern 259 * and measures how quickly the allocator can merge and reclaim space when 260 * deallocation occurs in the opposite order of allocation, exposing the 261 * cost difference between a linear freelist scan and an ordered tree lookup. 262 */ 263 ret = gpu_buddy_init(&mm, mm_size, SZ_4K); 264 KUNIT_ASSERT_EQ(test, ret, 0); 265 266 start = ktime_get(); 267 /* Allocate maximum fragmentation */ 268 for (i = 0; i < num_blocks; i++) 269 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, 270 &allocated_blocks, 0), 271 "buddy_alloc hit an error size=%u\n", SZ_8K); 272 273 list_for_each_entry_safe(block, tmp, &allocated_blocks, link) { 274 if (count % 2 == 0) 275 list_move_tail(&block->link, &free_list); 276 count++; 277 } 278 gpu_buddy_free_list(&mm, &free_list, GPU_BUDDY_CLEARED); 279 280 list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link) 281 list_move(&block->link, &reverse_list); 282 gpu_buddy_free_list(&mm, &reverse_list, GPU_BUDDY_CLEARED); 283 284 end = ktime_get(); 285 elapsed_ms = ktime_to_ms(ktime_sub(end, start)); 286 287 kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms); 288 289 gpu_buddy_fini(&mm); 290 } 291 292 static void gpu_test_buddy_alloc_range_bias(struct kunit *test) 293 { 294 u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem; 295 GPU_RND_STATE(prng, random_seed); 296 unsigned int i, count, *order; 297 struct gpu_buddy_block *block; 298 unsigned long flags; 299 struct gpu_buddy mm; 300 LIST_HEAD(allocated); 301 302 bias_size = SZ_1M; 303 ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size); 304 ps = max(SZ_4K, ps); 305 mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */ 306 307 kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps); 308 309 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps), 310 "buddy_init failed\n"); 311 312 count = mm_size / bias_size; 313 order = gpu_random_order(count, &prng); 314 KUNIT_EXPECT_TRUE(test, order); 315 316 /* 317 * Idea is to split the address space into uniform bias ranges, and then 318 * in some random order allocate within each bias, using various 319 * patterns within. This should detect if allocations leak out from a 320 * given bias, for example. 321 */ 322 323 for (i = 0; i < count; i++) { 324 LIST_HEAD(tmp); 325 u32 size; 326 327 bias_start = order[i] * bias_size; 328 bias_end = bias_start + bias_size; 329 bias_rem = bias_size; 330 331 /* internal round_up too big */ 332 KUNIT_ASSERT_TRUE_MSG(test, 333 gpu_buddy_alloc_blocks(&mm, bias_start, 334 bias_end, bias_size + ps, bias_size, 335 &allocated, 336 GPU_BUDDY_RANGE_ALLOCATION), 337 "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", 338 bias_start, bias_end, bias_size, bias_size); 339 340 /* size too big */ 341 KUNIT_ASSERT_TRUE_MSG(test, 342 gpu_buddy_alloc_blocks(&mm, bias_start, 343 bias_end, bias_size + ps, ps, 344 &allocated, 345 GPU_BUDDY_RANGE_ALLOCATION), 346 "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", 347 bias_start, bias_end, bias_size + ps, ps); 348 349 /* bias range too small for size */ 350 KUNIT_ASSERT_TRUE_MSG(test, 351 gpu_buddy_alloc_blocks(&mm, bias_start + ps, 352 bias_end, bias_size, ps, 353 &allocated, 354 GPU_BUDDY_RANGE_ALLOCATION), 355 "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", 356 bias_start + ps, bias_end, bias_size, ps); 357 358 /* bias misaligned */ 359 KUNIT_ASSERT_TRUE_MSG(test, 360 gpu_buddy_alloc_blocks(&mm, bias_start + ps, 361 bias_end - ps, 362 bias_size >> 1, bias_size >> 1, 363 &allocated, 364 GPU_BUDDY_RANGE_ALLOCATION), 365 "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n", 366 bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1); 367 368 /* single big page */ 369 KUNIT_ASSERT_FALSE_MSG(test, 370 gpu_buddy_alloc_blocks(&mm, bias_start, 371 bias_end, bias_size, bias_size, 372 &tmp, 373 GPU_BUDDY_RANGE_ALLOCATION), 374 "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n", 375 bias_start, bias_end, bias_size, bias_size); 376 gpu_buddy_free_list(&mm, &tmp, 0); 377 378 /* single page with internal round_up */ 379 KUNIT_ASSERT_FALSE_MSG(test, 380 gpu_buddy_alloc_blocks(&mm, bias_start, 381 bias_end, ps, bias_size, 382 &tmp, 383 GPU_BUDDY_RANGE_ALLOCATION), 384 "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", 385 bias_start, bias_end, ps, bias_size); 386 gpu_buddy_free_list(&mm, &tmp, 0); 387 388 /* random size within */ 389 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); 390 if (size) 391 KUNIT_ASSERT_FALSE_MSG(test, 392 gpu_buddy_alloc_blocks(&mm, bias_start, 393 bias_end, size, ps, 394 &tmp, 395 GPU_BUDDY_RANGE_ALLOCATION), 396 "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", 397 bias_start, bias_end, size, ps); 398 399 bias_rem -= size; 400 /* too big for current avail */ 401 KUNIT_ASSERT_TRUE_MSG(test, 402 gpu_buddy_alloc_blocks(&mm, bias_start, 403 bias_end, bias_rem + ps, ps, 404 &allocated, 405 GPU_BUDDY_RANGE_ALLOCATION), 406 "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", 407 bias_start, bias_end, bias_rem + ps, ps); 408 409 if (bias_rem) { 410 /* random fill of the remainder */ 411 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); 412 size = max(size, ps); 413 414 KUNIT_ASSERT_FALSE_MSG(test, 415 gpu_buddy_alloc_blocks(&mm, bias_start, 416 bias_end, size, ps, 417 &allocated, 418 GPU_BUDDY_RANGE_ALLOCATION), 419 "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", 420 bias_start, bias_end, size, ps); 421 /* 422 * Intentionally allow some space to be left 423 * unallocated, and ideally not always on the bias 424 * boundaries. 425 */ 426 gpu_buddy_free_list(&mm, &tmp, 0); 427 } else { 428 list_splice_tail(&tmp, &allocated); 429 } 430 } 431 432 kfree(order); 433 gpu_buddy_free_list(&mm, &allocated, 0); 434 gpu_buddy_fini(&mm); 435 436 /* 437 * Something more free-form. Idea is to pick a random starting bias 438 * range within the address space and then start filling it up. Also 439 * randomly grow the bias range in both directions as we go along. This 440 * should give us bias start/end which is not always uniform like above, 441 * and in some cases will require the allocator to jump over already 442 * allocated nodes in the middle of the address space. 443 */ 444 445 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps), 446 "buddy_init failed\n"); 447 448 bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); 449 bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); 450 bias_end = max(bias_end, bias_start + ps); 451 bias_rem = bias_end - bias_start; 452 453 do { 454 u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); 455 456 KUNIT_ASSERT_FALSE_MSG(test, 457 gpu_buddy_alloc_blocks(&mm, bias_start, 458 bias_end, size, ps, 459 &allocated, 460 GPU_BUDDY_RANGE_ALLOCATION), 461 "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", 462 bias_start, bias_end, size, ps); 463 bias_rem -= size; 464 465 /* 466 * Try to randomly grow the bias range in both directions, or 467 * only one, or perhaps don't grow at all. 468 */ 469 do { 470 u32 old_bias_start = bias_start; 471 u32 old_bias_end = bias_end; 472 473 if (bias_start) 474 bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps); 475 if (bias_end != mm_size) 476 bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps); 477 478 bias_rem += old_bias_start - bias_start; 479 bias_rem += bias_end - old_bias_end; 480 } while (!bias_rem && (bias_start || bias_end != mm_size)); 481 } while (bias_rem); 482 483 KUNIT_ASSERT_EQ(test, bias_start, 0); 484 KUNIT_ASSERT_EQ(test, bias_end, mm_size); 485 KUNIT_ASSERT_TRUE_MSG(test, 486 gpu_buddy_alloc_blocks(&mm, bias_start, bias_end, 487 ps, ps, 488 &allocated, 489 GPU_BUDDY_RANGE_ALLOCATION), 490 "buddy_alloc passed with bias(%x-%x), size=%u\n", 491 bias_start, bias_end, ps); 492 493 gpu_buddy_free_list(&mm, &allocated, 0); 494 gpu_buddy_fini(&mm); 495 496 /* 497 * Allocate cleared blocks in the bias range when the GPU buddy's clear avail is 498 * zero. This will validate the bias range allocation in scenarios like system boot 499 * when no cleared blocks are available and exercise the fallback path too. The resulting 500 * blocks should always be dirty. 501 */ 502 503 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps), 504 "buddy_init failed\n"); 505 506 bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); 507 bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); 508 bias_end = max(bias_end, bias_start + ps); 509 bias_rem = bias_end - bias_start; 510 511 flags = GPU_BUDDY_CLEAR_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION; 512 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); 513 514 KUNIT_ASSERT_FALSE_MSG(test, 515 gpu_buddy_alloc_blocks(&mm, bias_start, 516 bias_end, size, ps, 517 &allocated, 518 flags), 519 "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", 520 bias_start, bias_end, size, ps); 521 522 list_for_each_entry(block, &allocated, link) 523 KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false); 524 525 gpu_buddy_free_list(&mm, &allocated, 0); 526 gpu_buddy_fini(&mm); 527 } 528 529 static void gpu_test_buddy_alloc_range(struct kunit *test) 530 { 531 GPU_RND_STATE(prng, random_seed); 532 struct gpu_buddy_block *block; 533 struct gpu_buddy mm; 534 u32 mm_size, total; 535 LIST_HEAD(blocks); 536 LIST_HEAD(tmp); 537 u32 ps = SZ_4K; 538 int ret; 539 540 mm_size = SZ_16M; 541 542 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps), 543 "buddy_init failed\n"); 544 545 /* 546 * Basic exact-range allocation. 547 * Allocate the entire mm as one exact range (start + size == end). 548 * This is the simplest case exercising __gpu_buddy_alloc_range. 549 */ 550 ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size, mm_size, ps, &blocks, 0); 551 KUNIT_ASSERT_EQ_MSG(test, ret, 0, 552 "exact-range alloc of full mm failed\n"); 553 554 total = 0; 555 list_for_each_entry(block, &blocks, link) { 556 u64 offset = gpu_buddy_block_offset(block); 557 u64 bsize = gpu_buddy_block_size(&mm, block); 558 559 KUNIT_EXPECT_TRUE_MSG(test, offset + bsize <= (u64)mm_size, 560 "block [%llx, %llx) outside mm\n", offset, offset + bsize); 561 total += (u32)bsize; 562 } 563 KUNIT_EXPECT_EQ(test, total, mm_size); 564 KUNIT_EXPECT_EQ(test, mm.avail, 0ULL); 565 566 /* Full mm should be exhausted */ 567 ret = gpu_buddy_alloc_blocks(&mm, 0, ps, ps, ps, &tmp, 0); 568 KUNIT_EXPECT_NE_MSG(test, ret, 0, "alloc should fail when mm is full\n"); 569 570 gpu_buddy_free_list(&mm, &blocks, 0); 571 KUNIT_EXPECT_EQ(test, mm.avail, (u64)mm_size); 572 gpu_buddy_fini(&mm); 573 574 /* 575 * Exact-range allocation of sub-ranges. 576 * Split the mm into four equal quarters and allocate each as an exact 577 * range. Validates splitting and non-overlapping exact allocations. 578 */ 579 KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 580 581 { 582 u32 quarter = mm_size / 4; 583 int i; 584 585 for (i = 0; i < 4; i++) { 586 u32 start = i * quarter; 587 u32 end = start + quarter; 588 589 ret = gpu_buddy_alloc_blocks(&mm, start, end, quarter, ps, &blocks, 0); 590 KUNIT_ASSERT_EQ_MSG(test, ret, 0, 591 "exact-range alloc quarter %d [%x, %x) failed\n", 592 i, start, end); 593 } 594 KUNIT_EXPECT_EQ(test, mm.avail, 0ULL); 595 gpu_buddy_free_list(&mm, &blocks, 0); 596 } 597 598 gpu_buddy_fini(&mm); 599 600 /* 601 * Minimum chunk-size exact range at various offsets. 602 * Allocate single-page exact ranges at the start, middle and end. 603 */ 604 KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 605 606 ret = gpu_buddy_alloc_blocks(&mm, 0, ps, ps, ps, &blocks, 0); 607 KUNIT_ASSERT_EQ(test, ret, 0); 608 609 ret = gpu_buddy_alloc_blocks(&mm, mm_size / 2, mm_size / 2 + ps, ps, ps, &blocks, 0); 610 KUNIT_ASSERT_EQ(test, ret, 0); 611 612 ret = gpu_buddy_alloc_blocks(&mm, mm_size - ps, mm_size, ps, ps, &blocks, 0); 613 KUNIT_ASSERT_EQ(test, ret, 0); 614 615 total = 0; 616 list_for_each_entry(block, &blocks, link) 617 total += (u32)gpu_buddy_block_size(&mm, block); 618 KUNIT_EXPECT_EQ(test, total, 3 * ps); 619 620 gpu_buddy_free_list(&mm, &blocks, 0); 621 gpu_buddy_fini(&mm); 622 623 /* 624 * Non power-of-two mm size (multiple roots). 625 * Exact-range allocations that span root boundaries must still work. 626 */ 627 mm_size = SZ_4M + SZ_2M + SZ_1M; /* 7 MiB, three roots */ 628 629 KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 630 KUNIT_EXPECT_GT(test, mm.n_roots, 1U); 631 632 /* Allocate first 4M root exactly */ 633 ret = gpu_buddy_alloc_blocks(&mm, 0, SZ_4M, SZ_4M, ps, &blocks, 0); 634 KUNIT_ASSERT_EQ(test, ret, 0); 635 636 /* Allocate second root (4M-6M) exactly */ 637 ret = gpu_buddy_alloc_blocks(&mm, SZ_4M, SZ_4M + SZ_2M, SZ_2M, ps, &blocks, 0); 638 KUNIT_ASSERT_EQ(test, ret, 0); 639 640 /* Allocate third root (6M-7M) exactly */ 641 ret = gpu_buddy_alloc_blocks(&mm, SZ_4M + SZ_2M, mm_size, SZ_1M, ps, &blocks, 0); 642 KUNIT_ASSERT_EQ(test, ret, 0); 643 644 KUNIT_EXPECT_EQ(test, mm.avail, 0ULL); 645 gpu_buddy_free_list(&mm, &blocks, 0); 646 647 /* Cross-root exact-range: the entire non-pot mm */ 648 ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size, mm_size, ps, &blocks, 0); 649 KUNIT_ASSERT_EQ(test, ret, 0); 650 KUNIT_EXPECT_EQ(test, mm.avail, 0ULL); 651 652 gpu_buddy_free_list(&mm, &blocks, 0); 653 gpu_buddy_fini(&mm); 654 655 /* 656 * Randomized exact-range allocations. 657 * Divide the mm into N random-sized, contiguous, page-aligned slices 658 * and allocate each as an exact range in random order. 659 */ 660 mm_size = SZ_16M; 661 KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 662 663 { 664 #define N_RAND_RANGES 16 665 u32 ranges[N_RAND_RANGES + 1]; /* boundaries */ 666 u32 order_arr[N_RAND_RANGES]; 667 u32 remaining = mm_size; 668 int i; 669 670 ranges[0] = 0; 671 for (i = 0; i < N_RAND_RANGES - 1; i++) { 672 u32 max_chunk = remaining - (N_RAND_RANGES - 1 - i) * ps; 673 u32 sz = max(round_up(prandom_u32_state(&prng) % max_chunk, ps), ps); 674 675 ranges[i + 1] = ranges[i] + sz; 676 remaining -= sz; 677 } 678 ranges[N_RAND_RANGES] = mm_size; 679 680 /* Create a random order */ 681 for (i = 0; i < N_RAND_RANGES; i++) 682 order_arr[i] = i; 683 for (i = N_RAND_RANGES - 1; i > 0; i--) { 684 u32 j = prandom_u32_state(&prng) % (i + 1); 685 u32 tmp_val = order_arr[i]; 686 687 order_arr[i] = order_arr[j]; 688 order_arr[j] = tmp_val; 689 } 690 691 for (i = 0; i < N_RAND_RANGES; i++) { 692 u32 idx = order_arr[i]; 693 u32 start = ranges[idx]; 694 u32 end = ranges[idx + 1]; 695 u32 sz = end - start; 696 697 ret = gpu_buddy_alloc_blocks(&mm, start, end, sz, ps, &blocks, 0); 698 KUNIT_ASSERT_EQ_MSG(test, ret, 0, 699 "random exact-range [%x, %x) sz=%x failed\n", 700 start, end, sz); 701 } 702 703 KUNIT_EXPECT_EQ(test, mm.avail, 0ULL); 704 gpu_buddy_free_list(&mm, &blocks, 0); 705 #undef N_RAND_RANGES 706 } 707 708 gpu_buddy_fini(&mm); 709 710 /* 711 * Negative case - partially allocated range. 712 * Allocate the first half, then try to exact-range allocate the full 713 * mm. This must fail because the first half is already occupied. 714 */ 715 mm_size = SZ_16M; 716 KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 717 718 ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size / 2, mm_size / 2, ps, &blocks, 0); 719 KUNIT_ASSERT_EQ(test, ret, 0); 720 721 ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size, mm_size, ps, &tmp, 0); 722 KUNIT_EXPECT_NE_MSG(test, ret, 0, 723 "exact-range alloc should fail when range is partially used\n"); 724 725 /* Also try the already-occupied sub-range directly */ 726 ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size / 2, mm_size / 2, ps, &tmp, 0); 727 KUNIT_EXPECT_NE_MSG(test, ret, 0, 728 "double alloc of same exact range should fail\n"); 729 730 /* The free second half should still be allocatable */ 731 ret = gpu_buddy_alloc_blocks(&mm, mm_size / 2, mm_size, mm_size / 2, ps, &blocks, 0); 732 KUNIT_ASSERT_EQ(test, ret, 0); 733 734 KUNIT_EXPECT_EQ(test, mm.avail, 0ULL); 735 gpu_buddy_free_list(&mm, &blocks, 0); 736 gpu_buddy_fini(&mm); 737 738 /* 739 * Negative case - checkerboard partial allocation. 740 * Allocate every other page-sized chunk in a small mm, then try to 741 * exact-range allocate a range covering two pages (one allocated, one 742 * free). This must fail. 743 */ 744 mm_size = SZ_64K; 745 KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 746 747 { 748 u32 off; 749 750 for (off = 0; off < mm_size; off += 2 * ps) { 751 ret = gpu_buddy_alloc_blocks(&mm, off, off + ps, ps, ps, &blocks, 0); 752 KUNIT_ASSERT_EQ(test, ret, 0); 753 } 754 755 /* Try exact range over a pair [allocated, free] */ 756 ret = gpu_buddy_alloc_blocks(&mm, 0, 2 * ps, 2 * ps, ps, &tmp, 0); 757 KUNIT_EXPECT_NE_MSG(test, ret, 0, 758 "exact-range over partially allocated pair should fail\n"); 759 760 /* The free pages individually should still work */ 761 ret = gpu_buddy_alloc_blocks(&mm, ps, 2 * ps, ps, ps, &blocks, 0); 762 KUNIT_ASSERT_EQ(test, ret, 0); 763 764 gpu_buddy_free_list(&mm, &blocks, 0); 765 } 766 767 gpu_buddy_fini(&mm); 768 769 /* Negative case - misaligned start/end/size */ 770 mm_size = SZ_16M; 771 KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 772 773 /* start not aligned to chunk_size */ 774 ret = gpu_buddy_alloc_blocks(&mm, ps / 2, ps / 2 + ps, ps, ps, &tmp, 0); 775 KUNIT_EXPECT_NE(test, ret, 0); 776 777 /* size not aligned */ 778 ret = gpu_buddy_alloc_blocks(&mm, 0, ps + 1, ps + 1, ps, &tmp, 0); 779 KUNIT_EXPECT_NE(test, ret, 0); 780 781 /* end exceeds mm size */ 782 ret = gpu_buddy_alloc_blocks(&mm, mm_size, mm_size + ps, ps, ps, &tmp, 0); 783 KUNIT_EXPECT_NE(test, ret, 0); 784 785 gpu_buddy_fini(&mm); 786 787 /* 788 * Free and re-allocate the same exact range. 789 * This exercises merge-on-free followed by exact-range re-split. 790 */ 791 mm_size = SZ_16M; 792 KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 793 794 { 795 int i; 796 797 for (i = 0; i < 5; i++) { 798 ret = gpu_buddy_alloc_blocks(&mm, SZ_4M, SZ_4M + SZ_2M, 799 SZ_2M, ps, &blocks, 0); 800 KUNIT_ASSERT_EQ_MSG(test, ret, 0, 801 "re-alloc iteration %d failed\n", i); 802 803 total = 0; 804 list_for_each_entry(block, &blocks, link) { 805 u64 offset = gpu_buddy_block_offset(block); 806 u64 bsize = gpu_buddy_block_size(&mm, block); 807 808 KUNIT_EXPECT_GE(test, offset, (u64)SZ_4M); 809 KUNIT_EXPECT_LE(test, offset + bsize, (u64)(SZ_4M + SZ_2M)); 810 total += (u32)bsize; 811 } 812 KUNIT_EXPECT_EQ(test, total, SZ_2M); 813 814 gpu_buddy_free_list(&mm, &blocks, 0); 815 } 816 817 KUNIT_EXPECT_EQ(test, mm.avail, (u64)mm_size); 818 } 819 820 gpu_buddy_fini(&mm); 821 822 /* 823 * Various power-of-two exact ranges within a large mm. 824 * Allocate non-overlapping power-of-two exact ranges at their natural 825 * alignment, validating that the allocator handles different orders. 826 */ 827 mm_size = SZ_16M; 828 KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 829 830 /* Allocate 4K at offset 0 */ 831 ret = gpu_buddy_alloc_blocks(&mm, 0, SZ_4K, SZ_4K, ps, &blocks, 0); 832 KUNIT_ASSERT_EQ(test, ret, 0); 833 834 /* Allocate 64K at offset 64K */ 835 ret = gpu_buddy_alloc_blocks(&mm, SZ_64K, SZ_64K + SZ_64K, SZ_64K, ps, &blocks, 0); 836 KUNIT_ASSERT_EQ(test, ret, 0); 837 838 /* Allocate 1M at offset 1M */ 839 ret = gpu_buddy_alloc_blocks(&mm, SZ_1M, SZ_1M + SZ_1M, SZ_1M, ps, &blocks, 0); 840 KUNIT_ASSERT_EQ(test, ret, 0); 841 842 /* Allocate 4M at offset 4M */ 843 ret = gpu_buddy_alloc_blocks(&mm, SZ_4M, SZ_4M + SZ_4M, SZ_4M, ps, &blocks, 0); 844 KUNIT_ASSERT_EQ(test, ret, 0); 845 846 total = 0; 847 list_for_each_entry(block, &blocks, link) 848 total += (u32)gpu_buddy_block_size(&mm, block); 849 KUNIT_EXPECT_EQ(test, total, SZ_4K + SZ_64K + SZ_1M + SZ_4M); 850 851 gpu_buddy_free_list(&mm, &blocks, 0); 852 gpu_buddy_fini(&mm); 853 } 854 855 static void gpu_test_buddy_alloc_clear(struct kunit *test) 856 { 857 unsigned long n_pages, total, i = 0; 858 const unsigned long ps = SZ_4K; 859 struct gpu_buddy_block *block; 860 const int max_order = 12; 861 LIST_HEAD(allocated); 862 struct gpu_buddy mm; 863 unsigned int order; 864 u32 mm_size, size; 865 LIST_HEAD(dirty); 866 LIST_HEAD(clean); 867 868 mm_size = SZ_4K << max_order; 869 KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 870 871 KUNIT_EXPECT_EQ(test, mm.max_order, max_order); 872 873 /* 874 * Idea is to allocate and free some random portion of the address space, 875 * returning those pages as non-dirty and randomly alternate between 876 * requesting dirty and non-dirty pages (not going over the limit 877 * we freed as non-dirty), putting that into two separate lists. 878 * Loop over both lists at the end checking that the dirty list 879 * is indeed all dirty pages and vice versa. Free it all again, 880 * keeping the dirty/clear status. 881 */ 882 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 883 5 * ps, ps, &allocated, 884 GPU_BUDDY_TOPDOWN_ALLOCATION), 885 "buddy_alloc hit an error size=%lu\n", 5 * ps); 886 gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED); 887 888 n_pages = 10; 889 do { 890 unsigned long flags; 891 struct list_head *list; 892 int slot = i % 2; 893 894 if (slot == 0) { 895 list = &dirty; 896 flags = 0; 897 } else { 898 list = &clean; 899 flags = GPU_BUDDY_CLEAR_ALLOCATION; 900 } 901 902 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 903 ps, ps, list, 904 flags), 905 "buddy_alloc hit an error size=%lu\n", ps); 906 } while (++i < n_pages); 907 908 list_for_each_entry(block, &clean, link) 909 KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), true); 910 911 list_for_each_entry(block, &dirty, link) 912 KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false); 913 914 gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED); 915 916 /* 917 * Trying to go over the clear limit for some allocation. 918 * The allocation should never fail with reasonable page-size. 919 */ 920 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 921 10 * ps, ps, &clean, 922 GPU_BUDDY_CLEAR_ALLOCATION), 923 "buddy_alloc hit an error size=%lu\n", 10 * ps); 924 925 gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED); 926 gpu_buddy_free_list(&mm, &dirty, 0); 927 gpu_buddy_fini(&mm); 928 929 KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 930 931 /* 932 * Create a new mm. Intentionally fragment the address space by creating 933 * two alternating lists. Free both lists, one as dirty the other as clean. 934 * Try to allocate double the previous size with matching min_page_size. The 935 * allocation should never fail as it calls the force_merge. Also check that 936 * the page is always dirty after force_merge. Free the page as dirty, then 937 * repeat the whole thing, increment the order until we hit the max_order. 938 */ 939 940 i = 0; 941 n_pages = mm_size / ps; 942 do { 943 struct list_head *list; 944 int slot = i % 2; 945 946 if (slot == 0) 947 list = &dirty; 948 else 949 list = &clean; 950 951 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 952 ps, ps, list, 0), 953 "buddy_alloc hit an error size=%lu\n", ps); 954 } while (++i < n_pages); 955 956 gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED); 957 gpu_buddy_free_list(&mm, &dirty, 0); 958 959 order = 1; 960 do { 961 size = SZ_4K << order; 962 963 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 964 size, size, &allocated, 965 GPU_BUDDY_CLEAR_ALLOCATION), 966 "buddy_alloc hit an error size=%u\n", size); 967 total = 0; 968 list_for_each_entry(block, &allocated, link) { 969 if (size != mm_size) 970 KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false); 971 total += gpu_buddy_block_size(&mm, block); 972 } 973 KUNIT_EXPECT_EQ(test, total, size); 974 975 gpu_buddy_free_list(&mm, &allocated, 0); 976 } while (++order <= max_order); 977 978 gpu_buddy_fini(&mm); 979 980 /* 981 * Create a new mm with a non power-of-two size. Allocate a random size from each 982 * root, free as cleared and then call fini. This will ensure the multi-root 983 * force merge during fini. 984 */ 985 mm_size = (SZ_4K << max_order) + (SZ_4K << (max_order - 2)); 986 987 KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 988 KUNIT_EXPECT_EQ(test, mm.max_order, max_order); 989 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order, 990 4 * ps, ps, &allocated, 991 GPU_BUDDY_RANGE_ALLOCATION), 992 "buddy_alloc hit an error size=%lu\n", 4 * ps); 993 gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED); 994 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order, 995 2 * ps, ps, &allocated, 996 GPU_BUDDY_CLEAR_ALLOCATION), 997 "buddy_alloc hit an error size=%lu\n", 2 * ps); 998 gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED); 999 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size, 1000 ps, ps, &allocated, 1001 GPU_BUDDY_RANGE_ALLOCATION), 1002 "buddy_alloc hit an error size=%lu\n", ps); 1003 gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED); 1004 gpu_buddy_fini(&mm); 1005 } 1006 1007 static void gpu_test_buddy_alloc_contiguous(struct kunit *test) 1008 { 1009 const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K; 1010 unsigned long i, n_pages, total; 1011 struct gpu_buddy_block *block; 1012 struct gpu_buddy mm; 1013 LIST_HEAD(left); 1014 LIST_HEAD(middle); 1015 LIST_HEAD(right); 1016 LIST_HEAD(allocated); 1017 1018 KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 1019 1020 /* 1021 * Idea is to fragment the address space by alternating block 1022 * allocations between three different lists; one for left, middle and 1023 * right. We can then free a list to simulate fragmentation. In 1024 * particular we want to exercise the GPU_BUDDY_CONTIGUOUS_ALLOCATION, 1025 * including the try_harder path. 1026 */ 1027 1028 i = 0; 1029 n_pages = mm_size / ps; 1030 do { 1031 struct list_head *list; 1032 int slot = i % 3; 1033 1034 if (slot == 0) 1035 list = &left; 1036 else if (slot == 1) 1037 list = &middle; 1038 else 1039 list = &right; 1040 KUNIT_ASSERT_FALSE_MSG(test, 1041 gpu_buddy_alloc_blocks(&mm, 0, mm_size, 1042 ps, ps, list, 0), 1043 "buddy_alloc hit an error size=%lu\n", 1044 ps); 1045 } while (++i < n_pages); 1046 1047 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 1048 3 * ps, ps, &allocated, 1049 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 1050 "buddy_alloc didn't error size=%lu\n", 3 * ps); 1051 1052 gpu_buddy_free_list(&mm, &middle, 0); 1053 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 1054 3 * ps, ps, &allocated, 1055 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 1056 "buddy_alloc didn't error size=%lu\n", 3 * ps); 1057 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 1058 2 * ps, ps, &allocated, 1059 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 1060 "buddy_alloc didn't error size=%lu\n", 2 * ps); 1061 1062 gpu_buddy_free_list(&mm, &right, 0); 1063 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 1064 3 * ps, ps, &allocated, 1065 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 1066 "buddy_alloc didn't error size=%lu\n", 3 * ps); 1067 /* 1068 * At this point we should have enough contiguous space for 2 blocks, 1069 * however they are never buddies (since we freed middle and right) so 1070 * will require the try_harder logic to find them. 1071 */ 1072 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 1073 2 * ps, ps, &allocated, 1074 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 1075 "buddy_alloc hit an error size=%lu\n", 2 * ps); 1076 1077 gpu_buddy_free_list(&mm, &left, 0); 1078 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 1079 3 * ps, ps, &allocated, 1080 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 1081 "buddy_alloc hit an error size=%lu\n", 3 * ps); 1082 1083 total = 0; 1084 list_for_each_entry(block, &allocated, link) 1085 total += gpu_buddy_block_size(&mm, block); 1086 1087 KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3); 1088 1089 gpu_buddy_free_list(&mm, &allocated, 0); 1090 gpu_buddy_fini(&mm); 1091 } 1092 1093 static void gpu_test_buddy_alloc_pathological(struct kunit *test) 1094 { 1095 u64 mm_size, size, start = 0; 1096 struct gpu_buddy_block *block; 1097 const int max_order = 3; 1098 unsigned long flags = 0; 1099 int order, top; 1100 struct gpu_buddy mm; 1101 LIST_HEAD(blocks); 1102 LIST_HEAD(holes); 1103 LIST_HEAD(tmp); 1104 1105 /* 1106 * Create a pot-sized mm, then allocate one of each possible 1107 * order within. This should leave the mm with exactly one 1108 * page left. Free the largest block, then whittle down again. 1109 * Eventually we will have a fully 50% fragmented mm. 1110 */ 1111 1112 mm_size = SZ_4K << max_order; 1113 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 1114 "buddy_init failed\n"); 1115 1116 KUNIT_EXPECT_EQ(test, mm.max_order, max_order); 1117 1118 for (top = max_order; top; top--) { 1119 /* Make room by freeing the largest allocated block */ 1120 block = list_first_entry_or_null(&blocks, typeof(*block), link); 1121 if (block) { 1122 list_del(&block->link); 1123 gpu_buddy_free_block(&mm, block); 1124 } 1125 1126 for (order = top; order--;) { 1127 size = get_size(order, mm.chunk_size); 1128 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, 1129 mm_size, size, size, 1130 &tmp, flags), 1131 "buddy_alloc hit -ENOMEM with order=%d, top=%d\n", 1132 order, top); 1133 1134 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 1135 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 1136 1137 list_move_tail(&block->link, &blocks); 1138 } 1139 1140 /* There should be one final page for this sub-allocation */ 1141 size = get_size(0, mm.chunk_size); 1142 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 1143 size, size, &tmp, flags), 1144 "buddy_alloc hit -ENOMEM for hole\n"); 1145 1146 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 1147 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 1148 1149 list_move_tail(&block->link, &holes); 1150 1151 size = get_size(top, mm.chunk_size); 1152 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 1153 size, size, &tmp, flags), 1154 "buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!", 1155 top, max_order); 1156 } 1157 1158 gpu_buddy_free_list(&mm, &holes, 0); 1159 1160 /* Nothing larger than blocks of chunk_size now available */ 1161 for (order = 1; order <= max_order; order++) { 1162 size = get_size(order, mm.chunk_size); 1163 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 1164 size, size, &tmp, flags), 1165 "buddy_alloc unexpectedly succeeded at order %d, it should be full!", 1166 order); 1167 } 1168 1169 list_splice_tail(&holes, &blocks); 1170 gpu_buddy_free_list(&mm, &blocks, 0); 1171 gpu_buddy_fini(&mm); 1172 } 1173 1174 static void gpu_test_buddy_alloc_pessimistic(struct kunit *test) 1175 { 1176 u64 mm_size, size, start = 0; 1177 struct gpu_buddy_block *block, *bn; 1178 const unsigned int max_order = 16; 1179 unsigned long flags = 0; 1180 struct gpu_buddy mm; 1181 unsigned int order; 1182 LIST_HEAD(blocks); 1183 LIST_HEAD(tmp); 1184 1185 /* 1186 * Create a pot-sized mm, then allocate one of each possible 1187 * order within. This should leave the mm with exactly one 1188 * page left. 1189 */ 1190 1191 mm_size = SZ_4K << max_order; 1192 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 1193 "buddy_init failed\n"); 1194 1195 KUNIT_EXPECT_EQ(test, mm.max_order, max_order); 1196 1197 for (order = 0; order < max_order; order++) { 1198 size = get_size(order, mm.chunk_size); 1199 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 1200 size, size, &tmp, flags), 1201 "buddy_alloc hit -ENOMEM with order=%d\n", 1202 order); 1203 1204 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 1205 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 1206 1207 list_move_tail(&block->link, &blocks); 1208 } 1209 1210 /* And now the last remaining block available */ 1211 size = get_size(0, mm.chunk_size); 1212 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 1213 size, size, &tmp, flags), 1214 "buddy_alloc hit -ENOMEM on final alloc\n"); 1215 1216 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 1217 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 1218 1219 list_move_tail(&block->link, &blocks); 1220 1221 /* Should be completely full! */ 1222 for (order = max_order; order--;) { 1223 size = get_size(order, mm.chunk_size); 1224 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 1225 size, size, &tmp, flags), 1226 "buddy_alloc unexpectedly succeeded, it should be full!"); 1227 } 1228 1229 block = list_last_entry(&blocks, typeof(*block), link); 1230 list_del(&block->link); 1231 gpu_buddy_free_block(&mm, block); 1232 1233 /* As we free in increasing size, we make available larger blocks */ 1234 order = 1; 1235 list_for_each_entry_safe(block, bn, &blocks, link) { 1236 list_del(&block->link); 1237 gpu_buddy_free_block(&mm, block); 1238 1239 size = get_size(order, mm.chunk_size); 1240 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 1241 size, size, &tmp, flags), 1242 "buddy_alloc hit -ENOMEM with order=%d\n", 1243 order); 1244 1245 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 1246 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 1247 1248 list_del(&block->link); 1249 gpu_buddy_free_block(&mm, block); 1250 order++; 1251 } 1252 1253 /* To confirm, now the whole mm should be available */ 1254 size = get_size(max_order, mm.chunk_size); 1255 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 1256 size, size, &tmp, flags), 1257 "buddy_alloc (realloc) hit -ENOMEM with order=%d\n", 1258 max_order); 1259 1260 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 1261 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 1262 1263 list_del(&block->link); 1264 gpu_buddy_free_block(&mm, block); 1265 gpu_buddy_free_list(&mm, &blocks, 0); 1266 gpu_buddy_fini(&mm); 1267 } 1268 1269 static void gpu_test_buddy_alloc_optimistic(struct kunit *test) 1270 { 1271 u64 mm_size, size, start = 0; 1272 struct gpu_buddy_block *block; 1273 unsigned long flags = 0; 1274 const int max_order = 16; 1275 struct gpu_buddy mm; 1276 LIST_HEAD(blocks); 1277 LIST_HEAD(tmp); 1278 int order; 1279 1280 /* 1281 * Create a mm with one block of each order available, and 1282 * try to allocate them all. 1283 */ 1284 1285 mm_size = SZ_4K * ((1 << (max_order + 1)) - 1); 1286 1287 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 1288 "buddy_init failed\n"); 1289 1290 KUNIT_EXPECT_EQ(test, mm.max_order, max_order); 1291 1292 for (order = 0; order <= max_order; order++) { 1293 size = get_size(order, mm.chunk_size); 1294 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 1295 size, size, &tmp, flags), 1296 "buddy_alloc hit -ENOMEM with order=%d\n", 1297 order); 1298 1299 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 1300 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 1301 1302 list_move_tail(&block->link, &blocks); 1303 } 1304 1305 /* Should be completely full! */ 1306 size = get_size(0, mm.chunk_size); 1307 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 1308 size, size, &tmp, flags), 1309 "buddy_alloc unexpectedly succeeded, it should be full!"); 1310 1311 gpu_buddy_free_list(&mm, &blocks, 0); 1312 gpu_buddy_fini(&mm); 1313 } 1314 1315 static void gpu_test_buddy_alloc_limit(struct kunit *test) 1316 { 1317 u64 size = U64_MAX, start = 0; 1318 struct gpu_buddy_block *block; 1319 unsigned long flags = 0; 1320 LIST_HEAD(allocated); 1321 struct gpu_buddy mm; 1322 1323 KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, size, SZ_4K)); 1324 1325 KUNIT_EXPECT_EQ_MSG(test, mm.max_order, GPU_BUDDY_MAX_ORDER, 1326 "mm.max_order(%d) != %d\n", mm.max_order, 1327 GPU_BUDDY_MAX_ORDER); 1328 1329 size = mm.chunk_size << mm.max_order; 1330 KUNIT_EXPECT_FALSE(test, gpu_buddy_alloc_blocks(&mm, start, size, size, 1331 mm.chunk_size, &allocated, flags)); 1332 1333 block = list_first_entry_or_null(&allocated, struct gpu_buddy_block, link); 1334 KUNIT_EXPECT_TRUE(test, block); 1335 1336 KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_order(block), mm.max_order, 1337 "block order(%d) != %d\n", 1338 gpu_buddy_block_order(block), mm.max_order); 1339 1340 KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_size(&mm, block), 1341 BIT_ULL(mm.max_order) * mm.chunk_size, 1342 "block size(%llu) != %llu\n", 1343 gpu_buddy_block_size(&mm, block), 1344 BIT_ULL(mm.max_order) * mm.chunk_size); 1345 1346 gpu_buddy_free_list(&mm, &allocated, 0); 1347 gpu_buddy_fini(&mm); 1348 } 1349 1350 static void gpu_test_buddy_alloc_exceeds_max_order(struct kunit *test) 1351 { 1352 u64 mm_size = SZ_8G + SZ_2G, size = SZ_8G + SZ_1G, min_block_size = SZ_8G; 1353 struct gpu_buddy mm; 1354 LIST_HEAD(blocks); 1355 int err; 1356 1357 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 1358 "buddy_init failed\n"); 1359 1360 /* CONTIGUOUS allocation should succeed via try_harder fallback */ 1361 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, 1362 SZ_4K, &blocks, 1363 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 1364 "buddy_alloc hit an error size=%llu\n", size); 1365 gpu_buddy_free_list(&mm, &blocks, 0); 1366 1367 /* Non-CONTIGUOUS with large min_block_size should return -EINVAL */ 1368 err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0); 1369 KUNIT_EXPECT_EQ(test, err, -EINVAL); 1370 1371 /* Non-CONTIGUOUS + RANGE with large min_block_size should return -EINVAL */ 1372 err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 1373 GPU_BUDDY_RANGE_ALLOCATION); 1374 KUNIT_EXPECT_EQ(test, err, -EINVAL); 1375 1376 /* CONTIGUOUS + RANGE should return -EINVAL (no try_harder for RANGE) */ 1377 err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks, 1378 GPU_BUDDY_CONTIGUOUS_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION); 1379 KUNIT_EXPECT_EQ(test, err, -EINVAL); 1380 1381 gpu_buddy_fini(&mm); 1382 } 1383 1384 static int gpu_buddy_suite_init(struct kunit_suite *suite) 1385 { 1386 while (!random_seed) 1387 random_seed = get_random_u32(); 1388 1389 kunit_info(suite, "Testing GPU buddy manager, with random_seed=0x%x\n", 1390 random_seed); 1391 1392 return 0; 1393 } 1394 1395 static struct kunit_case gpu_buddy_tests[] = { 1396 KUNIT_CASE(gpu_test_buddy_alloc_limit), 1397 KUNIT_CASE(gpu_test_buddy_alloc_optimistic), 1398 KUNIT_CASE(gpu_test_buddy_alloc_pessimistic), 1399 KUNIT_CASE(gpu_test_buddy_alloc_pathological), 1400 KUNIT_CASE(gpu_test_buddy_alloc_contiguous), 1401 KUNIT_CASE(gpu_test_buddy_alloc_clear), 1402 KUNIT_CASE(gpu_test_buddy_alloc_range), 1403 KUNIT_CASE(gpu_test_buddy_alloc_range_bias), 1404 KUNIT_CASE_SLOW(gpu_test_buddy_fragmentation_performance), 1405 KUNIT_CASE(gpu_test_buddy_alloc_exceeds_max_order), 1406 KUNIT_CASE(gpu_test_buddy_offset_aligned_allocation), 1407 KUNIT_CASE(gpu_test_buddy_subtree_offset_alignment_stress), 1408 {} 1409 }; 1410 1411 static struct kunit_suite gpu_buddy_test_suite = { 1412 .name = "gpu_buddy", 1413 .suite_init = gpu_buddy_suite_init, 1414 .test_cases = gpu_buddy_tests, 1415 }; 1416 1417 kunit_test_suite(gpu_buddy_test_suite); 1418 1419 MODULE_AUTHOR("Intel Corporation"); 1420 MODULE_DESCRIPTION("Kunit test for gpu_buddy functions"); 1421 MODULE_LICENSE("GPL"); 1422