1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 * Copyright © 2022 Maíra Canal <mairacanal@riseup.net> 5 */ 6 7 #include <kunit/test.h> 8 9 #include <linux/prime_numbers.h> 10 #include <linux/sched/signal.h> 11 #include <linux/sizes.h> 12 13 #include <linux/gpu_buddy.h> 14 15 #include "gpu_random.h" 16 17 static unsigned int random_seed; 18 19 static inline u64 get_size(int order, u64 chunk_size) 20 { 21 return (1 << order) * chunk_size; 22 } 23 24 static void gpu_test_buddy_subtree_offset_alignment_stress(struct kunit *test) 25 { 26 struct gpu_buddy_block *block; 27 struct rb_node *node = NULL; 28 const u64 mm_size = SZ_2M; 29 const u64 alignments[] = { 30 SZ_1M, 31 SZ_512K, 32 SZ_256K, 33 SZ_128K, 34 SZ_64K, 35 SZ_32K, 36 SZ_16K, 37 SZ_8K, 38 }; 39 struct list_head allocated[ARRAY_SIZE(alignments)]; 40 unsigned int i, max_subtree_align = 0; 41 int ret, tree, order; 42 struct gpu_buddy mm; 43 44 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 45 "buddy_init failed\n"); 46 47 for (i = 0; i < ARRAY_SIZE(allocated); i++) 48 INIT_LIST_HEAD(&allocated[i]); 49 50 /* 51 * Exercise subtree_max_alignment tracking by allocating blocks with descending 52 * alignment constraints and freeing them in reverse order. This verifies that 53 * free-tree augmentation correctly propagates the maximum offset alignment 54 * present in each subtree at every stage. 55 */ 56 57 for (i = 0; i < ARRAY_SIZE(alignments); i++) { 58 struct gpu_buddy_block *root = NULL; 59 unsigned int expected; 60 u64 align; 61 62 align = alignments[i]; 63 expected = ilog2(align) - 1; 64 65 for (;;) { 66 ret = gpu_buddy_alloc_blocks(&mm, 67 0, mm_size, 68 SZ_4K, align, 69 &allocated[i], 70 0); 71 if (ret) 72 break; 73 74 block = list_last_entry(&allocated[i], 75 struct gpu_buddy_block, 76 link); 77 KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), align)); 78 } 79 80 for (order = mm.max_order; order >= 0 && !root; order--) { 81 for (tree = 0; tree < 2; tree++) { 82 node = mm.free_trees[tree][order].rb_node; 83 if (node) { 84 root = container_of(node, 85 struct gpu_buddy_block, 86 rb); 87 break; 88 } 89 } 90 } 91 92 KUNIT_ASSERT_NOT_NULL(test, root); 93 KUNIT_EXPECT_EQ(test, root->subtree_max_alignment, expected); 94 } 95 96 for (i = ARRAY_SIZE(alignments); i-- > 0; ) { 97 gpu_buddy_free_list(&mm, &allocated[i], 0); 98 99 for (order = 0; order <= mm.max_order; order++) { 100 for (tree = 0; tree < 2; tree++) { 101 node = mm.free_trees[tree][order].rb_node; 102 if (!node) 103 continue; 104 105 block = container_of(node, struct gpu_buddy_block, rb); 106 max_subtree_align = max(max_subtree_align, 107 block->subtree_max_alignment); 108 } 109 } 110 111 KUNIT_EXPECT_GE(test, max_subtree_align, ilog2(alignments[i])); 112 } 113 114 gpu_buddy_fini(&mm); 115 } 116 117 static void gpu_test_buddy_offset_aligned_allocation(struct kunit *test) 118 { 119 struct gpu_buddy_block *block, *tmp; 120 int num_blocks, i, count = 0; 121 LIST_HEAD(allocated); 122 struct gpu_buddy mm; 123 u64 mm_size = SZ_4M; 124 LIST_HEAD(freed); 125 126 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 127 "buddy_init failed\n"); 128 129 num_blocks = mm_size / SZ_256K; 130 /* 131 * Allocate multiple sizes under a fixed offset alignment. 132 * Ensures alignment handling is independent of allocation size and 133 * exercises subtree max-alignment pruning for small requests. 134 */ 135 for (i = 0; i < num_blocks; i++) 136 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_256K, 137 &allocated, 0), 138 "buddy_alloc hit an error size=%u\n", SZ_8K); 139 140 list_for_each_entry(block, &allocated, link) { 141 /* Ensure the allocated block uses the expected 8 KB size */ 142 KUNIT_EXPECT_EQ(test, gpu_buddy_block_size(&mm, block), SZ_8K); 143 /* Ensure the block starts at a 256 KB-aligned offset for proper alignment */ 144 KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), SZ_256K)); 145 } 146 gpu_buddy_free_list(&mm, &allocated, 0); 147 148 for (i = 0; i < num_blocks; i++) 149 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K, 150 &allocated, 0), 151 "buddy_alloc hit an error size=%u\n", SZ_16K); 152 153 list_for_each_entry(block, &allocated, link) { 154 /* Ensure the allocated block uses the expected 16 KB size */ 155 KUNIT_EXPECT_EQ(test, gpu_buddy_block_size(&mm, block), SZ_16K); 156 /* Ensure the block starts at a 256 KB-aligned offset for proper alignment */ 157 KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), SZ_256K)); 158 } 159 160 /* 161 * Free alternating aligned blocks to introduce fragmentation. 162 * Ensures offset-aligned allocations remain valid after frees and 163 * verifies subtree max-alignment metadata is correctly maintained. 164 */ 165 list_for_each_entry_safe(block, tmp, &allocated, link) { 166 if (count % 2 == 0) 167 list_move_tail(&block->link, &freed); 168 count++; 169 } 170 gpu_buddy_free_list(&mm, &freed, 0); 171 172 for (i = 0; i < num_blocks / 2; i++) 173 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K, 174 &allocated, 0), 175 "buddy_alloc hit an error size=%u\n", SZ_16K); 176 177 /* 178 * Allocate with offset alignment after all slots are used; must fail. 179 * Confirms that no aligned offsets remain. 180 */ 181 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K, 182 &allocated, 0), 183 "buddy_alloc hit an error size=%u\n", SZ_16K); 184 gpu_buddy_free_list(&mm, &allocated, 0); 185 gpu_buddy_fini(&mm); 186 } 187 188 static void gpu_test_buddy_fragmentation_performance(struct kunit *test) 189 { 190 struct gpu_buddy_block *block, *tmp; 191 int num_blocks, i, ret, count = 0; 192 LIST_HEAD(allocated_blocks); 193 unsigned long elapsed_ms; 194 LIST_HEAD(reverse_list); 195 LIST_HEAD(test_blocks); 196 LIST_HEAD(clear_list); 197 LIST_HEAD(dirty_list); 198 LIST_HEAD(free_list); 199 struct gpu_buddy mm; 200 u64 mm_size = SZ_4G; 201 ktime_t start, end; 202 203 /* 204 * Allocation under severe fragmentation 205 * 206 * Create severe fragmentation by allocating the entire 4 GiB address space 207 * as tiny 8 KiB blocks but forcing a 64 KiB alignment. The resulting pattern 208 * leaves many scattered holes. Split the allocations into two groups and 209 * return them with different flags to block coalescing, then repeatedly 210 * allocate and free 64 KiB blocks while timing the loop. This stresses how 211 * quickly the allocator can satisfy larger, aligned requests from a pool of 212 * highly fragmented space. 213 */ 214 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 215 "buddy_init failed\n"); 216 217 num_blocks = mm_size / SZ_64K; 218 219 start = ktime_get(); 220 /* Allocate with maximum fragmentation - 8K blocks with 64K alignment */ 221 for (i = 0; i < num_blocks; i++) 222 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, 223 &allocated_blocks, 0), 224 "buddy_alloc hit an error size=%u\n", SZ_8K); 225 226 list_for_each_entry_safe(block, tmp, &allocated_blocks, link) { 227 if (count % 4 == 0 || count % 4 == 3) 228 list_move_tail(&block->link, &clear_list); 229 else 230 list_move_tail(&block->link, &dirty_list); 231 count++; 232 } 233 234 /* Free with different flags to ensure no coalescing */ 235 gpu_buddy_free_list(&mm, &clear_list, GPU_BUDDY_CLEARED); 236 gpu_buddy_free_list(&mm, &dirty_list, 0); 237 238 for (i = 0; i < num_blocks; i++) 239 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K, 240 &test_blocks, 0), 241 "buddy_alloc hit an error size=%u\n", SZ_64K); 242 gpu_buddy_free_list(&mm, &test_blocks, 0); 243 244 end = ktime_get(); 245 elapsed_ms = ktime_to_ms(ktime_sub(end, start)); 246 247 kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms); 248 249 gpu_buddy_fini(&mm); 250 251 /* 252 * Reverse free order under fragmentation 253 * 254 * Construct a fragmented 4 GiB space by allocating every 8 KiB block with 255 * 64 KiB alignment, creating a dense scatter of small regions. Half of the 256 * blocks are selectively freed to form sparse gaps, while the remaining 257 * allocations are preserved, reordered in reverse, and released back with 258 * the cleared flag. This models a pathological reverse-ordered free pattern 259 * and measures how quickly the allocator can merge and reclaim space when 260 * deallocation occurs in the opposite order of allocation, exposing the 261 * cost difference between a linear freelist scan and an ordered tree lookup. 262 */ 263 ret = gpu_buddy_init(&mm, mm_size, SZ_4K); 264 KUNIT_ASSERT_EQ(test, ret, 0); 265 266 start = ktime_get(); 267 /* Allocate maximum fragmentation */ 268 for (i = 0; i < num_blocks; i++) 269 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K, 270 &allocated_blocks, 0), 271 "buddy_alloc hit an error size=%u\n", SZ_8K); 272 273 list_for_each_entry_safe(block, tmp, &allocated_blocks, link) { 274 if (count % 2 == 0) 275 list_move_tail(&block->link, &free_list); 276 count++; 277 } 278 gpu_buddy_free_list(&mm, &free_list, GPU_BUDDY_CLEARED); 279 280 list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link) 281 list_move(&block->link, &reverse_list); 282 gpu_buddy_free_list(&mm, &reverse_list, GPU_BUDDY_CLEARED); 283 284 end = ktime_get(); 285 elapsed_ms = ktime_to_ms(ktime_sub(end, start)); 286 287 kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms); 288 289 gpu_buddy_fini(&mm); 290 } 291 292 static void gpu_test_buddy_alloc_range_bias(struct kunit *test) 293 { 294 u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem; 295 GPU_RND_STATE(prng, random_seed); 296 unsigned int i, count, *order; 297 struct gpu_buddy_block *block; 298 unsigned long flags; 299 struct gpu_buddy mm; 300 LIST_HEAD(allocated); 301 302 bias_size = SZ_1M; 303 ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size); 304 ps = max(SZ_4K, ps); 305 mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */ 306 307 kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps); 308 309 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps), 310 "buddy_init failed\n"); 311 312 count = mm_size / bias_size; 313 order = gpu_random_order(count, &prng); 314 KUNIT_EXPECT_TRUE(test, order); 315 316 /* 317 * Idea is to split the address space into uniform bias ranges, and then 318 * in some random order allocate within each bias, using various 319 * patterns within. This should detect if allocations leak out from a 320 * given bias, for example. 321 */ 322 323 for (i = 0; i < count; i++) { 324 LIST_HEAD(tmp); 325 u32 size; 326 327 bias_start = order[i] * bias_size; 328 bias_end = bias_start + bias_size; 329 bias_rem = bias_size; 330 331 /* internal round_up too big */ 332 KUNIT_ASSERT_TRUE_MSG(test, 333 gpu_buddy_alloc_blocks(&mm, bias_start, 334 bias_end, bias_size + ps, bias_size, 335 &allocated, 336 GPU_BUDDY_RANGE_ALLOCATION), 337 "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", 338 bias_start, bias_end, bias_size, bias_size); 339 340 /* size too big */ 341 KUNIT_ASSERT_TRUE_MSG(test, 342 gpu_buddy_alloc_blocks(&mm, bias_start, 343 bias_end, bias_size + ps, ps, 344 &allocated, 345 GPU_BUDDY_RANGE_ALLOCATION), 346 "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", 347 bias_start, bias_end, bias_size + ps, ps); 348 349 /* bias range too small for size */ 350 KUNIT_ASSERT_TRUE_MSG(test, 351 gpu_buddy_alloc_blocks(&mm, bias_start + ps, 352 bias_end, bias_size, ps, 353 &allocated, 354 GPU_BUDDY_RANGE_ALLOCATION), 355 "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", 356 bias_start + ps, bias_end, bias_size, ps); 357 358 /* bias misaligned */ 359 KUNIT_ASSERT_TRUE_MSG(test, 360 gpu_buddy_alloc_blocks(&mm, bias_start + ps, 361 bias_end - ps, 362 bias_size >> 1, bias_size >> 1, 363 &allocated, 364 GPU_BUDDY_RANGE_ALLOCATION), 365 "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n", 366 bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1); 367 368 /* single big page */ 369 KUNIT_ASSERT_FALSE_MSG(test, 370 gpu_buddy_alloc_blocks(&mm, bias_start, 371 bias_end, bias_size, bias_size, 372 &tmp, 373 GPU_BUDDY_RANGE_ALLOCATION), 374 "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n", 375 bias_start, bias_end, bias_size, bias_size); 376 gpu_buddy_free_list(&mm, &tmp, 0); 377 378 /* single page with internal round_up */ 379 KUNIT_ASSERT_FALSE_MSG(test, 380 gpu_buddy_alloc_blocks(&mm, bias_start, 381 bias_end, ps, bias_size, 382 &tmp, 383 GPU_BUDDY_RANGE_ALLOCATION), 384 "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", 385 bias_start, bias_end, ps, bias_size); 386 gpu_buddy_free_list(&mm, &tmp, 0); 387 388 /* random size within */ 389 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); 390 if (size) 391 KUNIT_ASSERT_FALSE_MSG(test, 392 gpu_buddy_alloc_blocks(&mm, bias_start, 393 bias_end, size, ps, 394 &tmp, 395 GPU_BUDDY_RANGE_ALLOCATION), 396 "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", 397 bias_start, bias_end, size, ps); 398 399 bias_rem -= size; 400 /* too big for current avail */ 401 KUNIT_ASSERT_TRUE_MSG(test, 402 gpu_buddy_alloc_blocks(&mm, bias_start, 403 bias_end, bias_rem + ps, ps, 404 &allocated, 405 GPU_BUDDY_RANGE_ALLOCATION), 406 "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", 407 bias_start, bias_end, bias_rem + ps, ps); 408 409 if (bias_rem) { 410 /* random fill of the remainder */ 411 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); 412 size = max(size, ps); 413 414 KUNIT_ASSERT_FALSE_MSG(test, 415 gpu_buddy_alloc_blocks(&mm, bias_start, 416 bias_end, size, ps, 417 &allocated, 418 GPU_BUDDY_RANGE_ALLOCATION), 419 "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", 420 bias_start, bias_end, size, ps); 421 /* 422 * Intentionally allow some space to be left 423 * unallocated, and ideally not always on the bias 424 * boundaries. 425 */ 426 gpu_buddy_free_list(&mm, &tmp, 0); 427 } else { 428 list_splice_tail(&tmp, &allocated); 429 } 430 } 431 432 kfree(order); 433 gpu_buddy_free_list(&mm, &allocated, 0); 434 gpu_buddy_fini(&mm); 435 436 /* 437 * Something more free-form. Idea is to pick a random starting bias 438 * range within the address space and then start filling it up. Also 439 * randomly grow the bias range in both directions as we go along. This 440 * should give us bias start/end which is not always uniform like above, 441 * and in some cases will require the allocator to jump over already 442 * allocated nodes in the middle of the address space. 443 */ 444 445 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps), 446 "buddy_init failed\n"); 447 448 bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); 449 bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); 450 bias_end = max(bias_end, bias_start + ps); 451 bias_rem = bias_end - bias_start; 452 453 do { 454 u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); 455 456 KUNIT_ASSERT_FALSE_MSG(test, 457 gpu_buddy_alloc_blocks(&mm, bias_start, 458 bias_end, size, ps, 459 &allocated, 460 GPU_BUDDY_RANGE_ALLOCATION), 461 "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", 462 bias_start, bias_end, size, ps); 463 bias_rem -= size; 464 465 /* 466 * Try to randomly grow the bias range in both directions, or 467 * only one, or perhaps don't grow at all. 468 */ 469 do { 470 u32 old_bias_start = bias_start; 471 u32 old_bias_end = bias_end; 472 473 if (bias_start) 474 bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps); 475 if (bias_end != mm_size) 476 bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps); 477 478 bias_rem += old_bias_start - bias_start; 479 bias_rem += bias_end - old_bias_end; 480 } while (!bias_rem && (bias_start || bias_end != mm_size)); 481 } while (bias_rem); 482 483 KUNIT_ASSERT_EQ(test, bias_start, 0); 484 KUNIT_ASSERT_EQ(test, bias_end, mm_size); 485 KUNIT_ASSERT_TRUE_MSG(test, 486 gpu_buddy_alloc_blocks(&mm, bias_start, bias_end, 487 ps, ps, 488 &allocated, 489 GPU_BUDDY_RANGE_ALLOCATION), 490 "buddy_alloc passed with bias(%x-%x), size=%u\n", 491 bias_start, bias_end, ps); 492 493 gpu_buddy_free_list(&mm, &allocated, 0); 494 gpu_buddy_fini(&mm); 495 496 /* 497 * Allocate cleared blocks in the bias range when the GPU buddy's clear avail is 498 * zero. This will validate the bias range allocation in scenarios like system boot 499 * when no cleared blocks are available and exercise the fallback path too. The resulting 500 * blocks should always be dirty. 501 */ 502 503 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps), 504 "buddy_init failed\n"); 505 506 bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); 507 bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); 508 bias_end = max(bias_end, bias_start + ps); 509 bias_rem = bias_end - bias_start; 510 511 flags = GPU_BUDDY_CLEAR_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION; 512 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); 513 514 KUNIT_ASSERT_FALSE_MSG(test, 515 gpu_buddy_alloc_blocks(&mm, bias_start, 516 bias_end, size, ps, 517 &allocated, 518 flags), 519 "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", 520 bias_start, bias_end, size, ps); 521 522 list_for_each_entry(block, &allocated, link) 523 KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false); 524 525 gpu_buddy_free_list(&mm, &allocated, 0); 526 gpu_buddy_fini(&mm); 527 } 528 529 static void gpu_test_buddy_alloc_clear(struct kunit *test) 530 { 531 unsigned long n_pages, total, i = 0; 532 const unsigned long ps = SZ_4K; 533 struct gpu_buddy_block *block; 534 const int max_order = 12; 535 LIST_HEAD(allocated); 536 struct gpu_buddy mm; 537 unsigned int order; 538 u32 mm_size, size; 539 LIST_HEAD(dirty); 540 LIST_HEAD(clean); 541 542 mm_size = SZ_4K << max_order; 543 KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 544 545 KUNIT_EXPECT_EQ(test, mm.max_order, max_order); 546 547 /* 548 * Idea is to allocate and free some random portion of the address space, 549 * returning those pages as non-dirty and randomly alternate between 550 * requesting dirty and non-dirty pages (not going over the limit 551 * we freed as non-dirty), putting that into two separate lists. 552 * Loop over both lists at the end checking that the dirty list 553 * is indeed all dirty pages and vice versa. Free it all again, 554 * keeping the dirty/clear status. 555 */ 556 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 557 5 * ps, ps, &allocated, 558 GPU_BUDDY_TOPDOWN_ALLOCATION), 559 "buddy_alloc hit an error size=%lu\n", 5 * ps); 560 gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED); 561 562 n_pages = 10; 563 do { 564 unsigned long flags; 565 struct list_head *list; 566 int slot = i % 2; 567 568 if (slot == 0) { 569 list = &dirty; 570 flags = 0; 571 } else { 572 list = &clean; 573 flags = GPU_BUDDY_CLEAR_ALLOCATION; 574 } 575 576 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 577 ps, ps, list, 578 flags), 579 "buddy_alloc hit an error size=%lu\n", ps); 580 } while (++i < n_pages); 581 582 list_for_each_entry(block, &clean, link) 583 KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), true); 584 585 list_for_each_entry(block, &dirty, link) 586 KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false); 587 588 gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED); 589 590 /* 591 * Trying to go over the clear limit for some allocation. 592 * The allocation should never fail with reasonable page-size. 593 */ 594 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 595 10 * ps, ps, &clean, 596 GPU_BUDDY_CLEAR_ALLOCATION), 597 "buddy_alloc hit an error size=%lu\n", 10 * ps); 598 599 gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED); 600 gpu_buddy_free_list(&mm, &dirty, 0); 601 gpu_buddy_fini(&mm); 602 603 KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 604 605 /* 606 * Create a new mm. Intentionally fragment the address space by creating 607 * two alternating lists. Free both lists, one as dirty the other as clean. 608 * Try to allocate double the previous size with matching min_page_size. The 609 * allocation should never fail as it calls the force_merge. Also check that 610 * the page is always dirty after force_merge. Free the page as dirty, then 611 * repeat the whole thing, increment the order until we hit the max_order. 612 */ 613 614 i = 0; 615 n_pages = mm_size / ps; 616 do { 617 struct list_head *list; 618 int slot = i % 2; 619 620 if (slot == 0) 621 list = &dirty; 622 else 623 list = &clean; 624 625 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 626 ps, ps, list, 0), 627 "buddy_alloc hit an error size=%lu\n", ps); 628 } while (++i < n_pages); 629 630 gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED); 631 gpu_buddy_free_list(&mm, &dirty, 0); 632 633 order = 1; 634 do { 635 size = SZ_4K << order; 636 637 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 638 size, size, &allocated, 639 GPU_BUDDY_CLEAR_ALLOCATION), 640 "buddy_alloc hit an error size=%u\n", size); 641 total = 0; 642 list_for_each_entry(block, &allocated, link) { 643 if (size != mm_size) 644 KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false); 645 total += gpu_buddy_block_size(&mm, block); 646 } 647 KUNIT_EXPECT_EQ(test, total, size); 648 649 gpu_buddy_free_list(&mm, &allocated, 0); 650 } while (++order <= max_order); 651 652 gpu_buddy_fini(&mm); 653 654 /* 655 * Create a new mm with a non power-of-two size. Allocate a random size from each 656 * root, free as cleared and then call fini. This will ensure the multi-root 657 * force merge during fini. 658 */ 659 mm_size = (SZ_4K << max_order) + (SZ_4K << (max_order - 2)); 660 661 KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 662 KUNIT_EXPECT_EQ(test, mm.max_order, max_order); 663 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order, 664 4 * ps, ps, &allocated, 665 GPU_BUDDY_RANGE_ALLOCATION), 666 "buddy_alloc hit an error size=%lu\n", 4 * ps); 667 gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED); 668 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order, 669 2 * ps, ps, &allocated, 670 GPU_BUDDY_CLEAR_ALLOCATION), 671 "buddy_alloc hit an error size=%lu\n", 2 * ps); 672 gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED); 673 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size, 674 ps, ps, &allocated, 675 GPU_BUDDY_RANGE_ALLOCATION), 676 "buddy_alloc hit an error size=%lu\n", ps); 677 gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED); 678 gpu_buddy_fini(&mm); 679 } 680 681 static void gpu_test_buddy_alloc_contiguous(struct kunit *test) 682 { 683 const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K; 684 unsigned long i, n_pages, total; 685 struct gpu_buddy_block *block; 686 struct gpu_buddy mm; 687 LIST_HEAD(left); 688 LIST_HEAD(middle); 689 LIST_HEAD(right); 690 LIST_HEAD(allocated); 691 692 KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps)); 693 694 /* 695 * Idea is to fragment the address space by alternating block 696 * allocations between three different lists; one for left, middle and 697 * right. We can then free a list to simulate fragmentation. In 698 * particular we want to exercise the GPU_BUDDY_CONTIGUOUS_ALLOCATION, 699 * including the try_harder path. 700 */ 701 702 i = 0; 703 n_pages = mm_size / ps; 704 do { 705 struct list_head *list; 706 int slot = i % 3; 707 708 if (slot == 0) 709 list = &left; 710 else if (slot == 1) 711 list = &middle; 712 else 713 list = &right; 714 KUNIT_ASSERT_FALSE_MSG(test, 715 gpu_buddy_alloc_blocks(&mm, 0, mm_size, 716 ps, ps, list, 0), 717 "buddy_alloc hit an error size=%lu\n", 718 ps); 719 } while (++i < n_pages); 720 721 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 722 3 * ps, ps, &allocated, 723 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 724 "buddy_alloc didn't error size=%lu\n", 3 * ps); 725 726 gpu_buddy_free_list(&mm, &middle, 0); 727 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 728 3 * ps, ps, &allocated, 729 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 730 "buddy_alloc didn't error size=%lu\n", 3 * ps); 731 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 732 2 * ps, ps, &allocated, 733 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 734 "buddy_alloc didn't error size=%lu\n", 2 * ps); 735 736 gpu_buddy_free_list(&mm, &right, 0); 737 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 738 3 * ps, ps, &allocated, 739 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 740 "buddy_alloc didn't error size=%lu\n", 3 * ps); 741 /* 742 * At this point we should have enough contiguous space for 2 blocks, 743 * however they are never buddies (since we freed middle and right) so 744 * will require the try_harder logic to find them. 745 */ 746 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 747 2 * ps, ps, &allocated, 748 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 749 "buddy_alloc hit an error size=%lu\n", 2 * ps); 750 751 gpu_buddy_free_list(&mm, &left, 0); 752 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, 753 3 * ps, ps, &allocated, 754 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 755 "buddy_alloc hit an error size=%lu\n", 3 * ps); 756 757 total = 0; 758 list_for_each_entry(block, &allocated, link) 759 total += gpu_buddy_block_size(&mm, block); 760 761 KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3); 762 763 gpu_buddy_free_list(&mm, &allocated, 0); 764 gpu_buddy_fini(&mm); 765 } 766 767 static void gpu_test_buddy_alloc_pathological(struct kunit *test) 768 { 769 u64 mm_size, size, start = 0; 770 struct gpu_buddy_block *block; 771 const int max_order = 3; 772 unsigned long flags = 0; 773 int order, top; 774 struct gpu_buddy mm; 775 LIST_HEAD(blocks); 776 LIST_HEAD(holes); 777 LIST_HEAD(tmp); 778 779 /* 780 * Create a pot-sized mm, then allocate one of each possible 781 * order within. This should leave the mm with exactly one 782 * page left. Free the largest block, then whittle down again. 783 * Eventually we will have a fully 50% fragmented mm. 784 */ 785 786 mm_size = SZ_4K << max_order; 787 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 788 "buddy_init failed\n"); 789 790 KUNIT_EXPECT_EQ(test, mm.max_order, max_order); 791 792 for (top = max_order; top; top--) { 793 /* Make room by freeing the largest allocated block */ 794 block = list_first_entry_or_null(&blocks, typeof(*block), link); 795 if (block) { 796 list_del(&block->link); 797 gpu_buddy_free_block(&mm, block); 798 } 799 800 for (order = top; order--;) { 801 size = get_size(order, mm.chunk_size); 802 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, 803 mm_size, size, size, 804 &tmp, flags), 805 "buddy_alloc hit -ENOMEM with order=%d, top=%d\n", 806 order, top); 807 808 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 809 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 810 811 list_move_tail(&block->link, &blocks); 812 } 813 814 /* There should be one final page for this sub-allocation */ 815 size = get_size(0, mm.chunk_size); 816 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 817 size, size, &tmp, flags), 818 "buddy_alloc hit -ENOMEM for hole\n"); 819 820 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 821 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 822 823 list_move_tail(&block->link, &holes); 824 825 size = get_size(top, mm.chunk_size); 826 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 827 size, size, &tmp, flags), 828 "buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!", 829 top, max_order); 830 } 831 832 gpu_buddy_free_list(&mm, &holes, 0); 833 834 /* Nothing larger than blocks of chunk_size now available */ 835 for (order = 1; order <= max_order; order++) { 836 size = get_size(order, mm.chunk_size); 837 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 838 size, size, &tmp, flags), 839 "buddy_alloc unexpectedly succeeded at order %d, it should be full!", 840 order); 841 } 842 843 list_splice_tail(&holes, &blocks); 844 gpu_buddy_free_list(&mm, &blocks, 0); 845 gpu_buddy_fini(&mm); 846 } 847 848 static void gpu_test_buddy_alloc_pessimistic(struct kunit *test) 849 { 850 u64 mm_size, size, start = 0; 851 struct gpu_buddy_block *block, *bn; 852 const unsigned int max_order = 16; 853 unsigned long flags = 0; 854 struct gpu_buddy mm; 855 unsigned int order; 856 LIST_HEAD(blocks); 857 LIST_HEAD(tmp); 858 859 /* 860 * Create a pot-sized mm, then allocate one of each possible 861 * order within. This should leave the mm with exactly one 862 * page left. 863 */ 864 865 mm_size = SZ_4K << max_order; 866 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 867 "buddy_init failed\n"); 868 869 KUNIT_EXPECT_EQ(test, mm.max_order, max_order); 870 871 for (order = 0; order < max_order; order++) { 872 size = get_size(order, mm.chunk_size); 873 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 874 size, size, &tmp, flags), 875 "buddy_alloc hit -ENOMEM with order=%d\n", 876 order); 877 878 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 879 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 880 881 list_move_tail(&block->link, &blocks); 882 } 883 884 /* And now the last remaining block available */ 885 size = get_size(0, mm.chunk_size); 886 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 887 size, size, &tmp, flags), 888 "buddy_alloc hit -ENOMEM on final alloc\n"); 889 890 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 891 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 892 893 list_move_tail(&block->link, &blocks); 894 895 /* Should be completely full! */ 896 for (order = max_order; order--;) { 897 size = get_size(order, mm.chunk_size); 898 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 899 size, size, &tmp, flags), 900 "buddy_alloc unexpectedly succeeded, it should be full!"); 901 } 902 903 block = list_last_entry(&blocks, typeof(*block), link); 904 list_del(&block->link); 905 gpu_buddy_free_block(&mm, block); 906 907 /* As we free in increasing size, we make available larger blocks */ 908 order = 1; 909 list_for_each_entry_safe(block, bn, &blocks, link) { 910 list_del(&block->link); 911 gpu_buddy_free_block(&mm, block); 912 913 size = get_size(order, mm.chunk_size); 914 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 915 size, size, &tmp, flags), 916 "buddy_alloc hit -ENOMEM with order=%d\n", 917 order); 918 919 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 920 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 921 922 list_del(&block->link); 923 gpu_buddy_free_block(&mm, block); 924 order++; 925 } 926 927 /* To confirm, now the whole mm should be available */ 928 size = get_size(max_order, mm.chunk_size); 929 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 930 size, size, &tmp, flags), 931 "buddy_alloc (realloc) hit -ENOMEM with order=%d\n", 932 max_order); 933 934 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 935 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 936 937 list_del(&block->link); 938 gpu_buddy_free_block(&mm, block); 939 gpu_buddy_free_list(&mm, &blocks, 0); 940 gpu_buddy_fini(&mm); 941 } 942 943 static void gpu_test_buddy_alloc_optimistic(struct kunit *test) 944 { 945 u64 mm_size, size, start = 0; 946 struct gpu_buddy_block *block; 947 unsigned long flags = 0; 948 const int max_order = 16; 949 struct gpu_buddy mm; 950 LIST_HEAD(blocks); 951 LIST_HEAD(tmp); 952 int order; 953 954 /* 955 * Create a mm with one block of each order available, and 956 * try to allocate them all. 957 */ 958 959 mm_size = SZ_4K * ((1 << (max_order + 1)) - 1); 960 961 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 962 "buddy_init failed\n"); 963 964 KUNIT_EXPECT_EQ(test, mm.max_order, max_order); 965 966 for (order = 0; order <= max_order; order++) { 967 size = get_size(order, mm.chunk_size); 968 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 969 size, size, &tmp, flags), 970 "buddy_alloc hit -ENOMEM with order=%d\n", 971 order); 972 973 block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link); 974 KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n"); 975 976 list_move_tail(&block->link, &blocks); 977 } 978 979 /* Should be completely full! */ 980 size = get_size(0, mm.chunk_size); 981 KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size, 982 size, size, &tmp, flags), 983 "buddy_alloc unexpectedly succeeded, it should be full!"); 984 985 gpu_buddy_free_list(&mm, &blocks, 0); 986 gpu_buddy_fini(&mm); 987 } 988 989 static void gpu_test_buddy_alloc_limit(struct kunit *test) 990 { 991 u64 size = U64_MAX, start = 0; 992 struct gpu_buddy_block *block; 993 unsigned long flags = 0; 994 LIST_HEAD(allocated); 995 struct gpu_buddy mm; 996 997 KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, size, SZ_4K)); 998 999 KUNIT_EXPECT_EQ_MSG(test, mm.max_order, GPU_BUDDY_MAX_ORDER, 1000 "mm.max_order(%d) != %d\n", mm.max_order, 1001 GPU_BUDDY_MAX_ORDER); 1002 1003 size = mm.chunk_size << mm.max_order; 1004 KUNIT_EXPECT_FALSE(test, gpu_buddy_alloc_blocks(&mm, start, size, size, 1005 mm.chunk_size, &allocated, flags)); 1006 1007 block = list_first_entry_or_null(&allocated, struct gpu_buddy_block, link); 1008 KUNIT_EXPECT_TRUE(test, block); 1009 1010 KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_order(block), mm.max_order, 1011 "block order(%d) != %d\n", 1012 gpu_buddy_block_order(block), mm.max_order); 1013 1014 KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_size(&mm, block), 1015 BIT_ULL(mm.max_order) * mm.chunk_size, 1016 "block size(%llu) != %llu\n", 1017 gpu_buddy_block_size(&mm, block), 1018 BIT_ULL(mm.max_order) * mm.chunk_size); 1019 1020 gpu_buddy_free_list(&mm, &allocated, 0); 1021 gpu_buddy_fini(&mm); 1022 } 1023 1024 static void gpu_test_buddy_alloc_exceeds_max_order(struct kunit *test) 1025 { 1026 u64 mm_size = SZ_8G + SZ_2G, size = SZ_8G + SZ_1G, min_block_size = SZ_8G; 1027 struct gpu_buddy mm; 1028 LIST_HEAD(blocks); 1029 int err; 1030 1031 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K), 1032 "buddy_init failed\n"); 1033 1034 /* CONTIGUOUS allocation should succeed via try_harder fallback */ 1035 KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, 1036 SZ_4K, &blocks, 1037 GPU_BUDDY_CONTIGUOUS_ALLOCATION), 1038 "buddy_alloc hit an error size=%llu\n", size); 1039 gpu_buddy_free_list(&mm, &blocks, 0); 1040 1041 /* Non-CONTIGUOUS with large min_block_size should return -EINVAL */ 1042 err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0); 1043 KUNIT_EXPECT_EQ(test, err, -EINVAL); 1044 1045 /* Non-CONTIGUOUS + RANGE with large min_block_size should return -EINVAL */ 1046 err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 1047 GPU_BUDDY_RANGE_ALLOCATION); 1048 KUNIT_EXPECT_EQ(test, err, -EINVAL); 1049 1050 /* CONTIGUOUS + RANGE should return -EINVAL (no try_harder for RANGE) */ 1051 err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks, 1052 GPU_BUDDY_CONTIGUOUS_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION); 1053 KUNIT_EXPECT_EQ(test, err, -EINVAL); 1054 1055 gpu_buddy_fini(&mm); 1056 } 1057 1058 static int gpu_buddy_suite_init(struct kunit_suite *suite) 1059 { 1060 while (!random_seed) 1061 random_seed = get_random_u32(); 1062 1063 kunit_info(suite, "Testing GPU buddy manager, with random_seed=0x%x\n", 1064 random_seed); 1065 1066 return 0; 1067 } 1068 1069 static struct kunit_case gpu_buddy_tests[] = { 1070 KUNIT_CASE(gpu_test_buddy_alloc_limit), 1071 KUNIT_CASE(gpu_test_buddy_alloc_optimistic), 1072 KUNIT_CASE(gpu_test_buddy_alloc_pessimistic), 1073 KUNIT_CASE(gpu_test_buddy_alloc_pathological), 1074 KUNIT_CASE(gpu_test_buddy_alloc_contiguous), 1075 KUNIT_CASE(gpu_test_buddy_alloc_clear), 1076 KUNIT_CASE(gpu_test_buddy_alloc_range_bias), 1077 KUNIT_CASE_SLOW(gpu_test_buddy_fragmentation_performance), 1078 KUNIT_CASE(gpu_test_buddy_alloc_exceeds_max_order), 1079 KUNIT_CASE(gpu_test_buddy_offset_aligned_allocation), 1080 KUNIT_CASE(gpu_test_buddy_subtree_offset_alignment_stress), 1081 {} 1082 }; 1083 1084 static struct kunit_suite gpu_buddy_test_suite = { 1085 .name = "gpu_buddy", 1086 .suite_init = gpu_buddy_suite_init, 1087 .test_cases = gpu_buddy_tests, 1088 }; 1089 1090 kunit_test_suite(gpu_buddy_test_suite); 1091 1092 MODULE_AUTHOR("Intel Corporation"); 1093 MODULE_DESCRIPTION("Kunit test for gpu_buddy functions"); 1094 MODULE_LICENSE("GPL"); 1095