1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include <linux/bug.h> 7 #include <linux/export.h> 8 #include <linux/kmemleak.h> 9 #include <linux/module.h> 10 #include <linux/sizes.h> 11 12 #include <linux/gpu_buddy.h> 13 14 /** 15 * gpu_buddy_assert - assert a condition in the buddy allocator 16 * @condition: condition expected to be true 17 * 18 * When CONFIG_KUNIT is enabled, evaluates @condition and, if false, triggers 19 * a WARN_ON() and also calls kunit_fail_current_test() so that any running 20 * kunit test is properly marked as failed. The stringified condition is 21 * included in the failure message for easy identification. 22 * 23 * When CONFIG_KUNIT is not enabled, this reduces to WARN_ON() so production 24 * builds retain the same warning semantics as before. 25 */ 26 #if IS_ENABLED(CONFIG_KUNIT) 27 #include <kunit/test-bug.h> 28 #define gpu_buddy_assert(condition) do { \ 29 if (WARN_ON(!(condition))) \ 30 kunit_fail_current_test("gpu_buddy_assert(" #condition ")"); \ 31 } while (0) 32 #else 33 #define gpu_buddy_assert(condition) WARN_ON(!(condition)) 34 #endif 35 36 static struct kmem_cache *slab_blocks; 37 38 static unsigned int 39 gpu_buddy_block_state(struct gpu_buddy_block *block) 40 { 41 return block->header & GPU_BUDDY_HEADER_STATE; 42 } 43 44 static bool 45 gpu_buddy_block_is_allocated(struct gpu_buddy_block *block) 46 { 47 return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED; 48 } 49 50 static bool 51 gpu_buddy_block_is_split(struct gpu_buddy_block *block) 52 { 53 return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT; 54 } 55 56 static unsigned int gpu_buddy_block_offset_alignment(struct gpu_buddy_block *block) 57 { 58 u64 offset = gpu_buddy_block_offset(block); 59 60 if (!offset) 61 /* 62 * __ffs64(0) is undefined; offset 0 is maximally aligned, so return 63 * a value greater than any possible alignment. 64 */ 65 return 64 + 1; 66 67 return __ffs64(offset); 68 } 69 70 RB_DECLARE_CALLBACKS_MAX(static, gpu_buddy_augment_cb, 71 struct gpu_buddy_block, rb, 72 unsigned int, subtree_max_alignment, 73 gpu_buddy_block_offset_alignment); 74 75 static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm, 76 struct gpu_buddy_block *parent, 77 unsigned int order, 78 u64 offset) 79 { 80 struct gpu_buddy_block *block; 81 82 BUG_ON(order > GPU_BUDDY_MAX_ORDER); 83 84 block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL); 85 if (!block) 86 return NULL; 87 88 block->header = offset; 89 block->header |= order; 90 block->parent = parent; 91 92 RB_CLEAR_NODE(&block->rb); 93 94 BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED); 95 return block; 96 } 97 98 static void gpu_block_free(struct gpu_buddy *mm, 99 struct gpu_buddy_block *block) 100 { 101 kmem_cache_free(slab_blocks, block); 102 } 103 104 static enum gpu_buddy_free_tree 105 get_block_tree(struct gpu_buddy_block *block) 106 { 107 return gpu_buddy_block_is_clear(block) ? 108 GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; 109 } 110 111 static struct gpu_buddy_block * 112 rbtree_get_free_block(const struct rb_node *node) 113 { 114 return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL; 115 } 116 117 static struct gpu_buddy_block * 118 rbtree_last_free_block(struct rb_root *root) 119 { 120 return rbtree_get_free_block(rb_last(root)); 121 } 122 123 static bool rbtree_is_empty(struct rb_root *root) 124 { 125 return RB_EMPTY_ROOT(root); 126 } 127 128 static void rbtree_insert(struct gpu_buddy *mm, 129 struct gpu_buddy_block *block, 130 enum gpu_buddy_free_tree tree) 131 { 132 struct rb_node **link, *parent = NULL; 133 unsigned int block_alignment, order; 134 struct gpu_buddy_block *node; 135 struct rb_root *root; 136 137 order = gpu_buddy_block_order(block); 138 block_alignment = gpu_buddy_block_offset_alignment(block); 139 140 root = &mm->free_trees[tree][order]; 141 link = &root->rb_node; 142 143 while (*link) { 144 parent = *link; 145 node = rbtree_get_free_block(parent); 146 /* 147 * Manual augmentation update during insertion traversal. Required 148 * because rb_insert_augmented() only calls rotate callback during 149 * rotations. This ensures all ancestors on the insertion path have 150 * correct subtree_max_alignment values. 151 */ 152 if (node->subtree_max_alignment < block_alignment) 153 node->subtree_max_alignment = block_alignment; 154 155 if (gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node)) 156 link = &parent->rb_left; 157 else 158 link = &parent->rb_right; 159 } 160 161 block->subtree_max_alignment = block_alignment; 162 rb_link_node(&block->rb, parent, link); 163 rb_insert_augmented(&block->rb, root, &gpu_buddy_augment_cb); 164 } 165 166 static void rbtree_remove(struct gpu_buddy *mm, 167 struct gpu_buddy_block *block) 168 { 169 unsigned int order = gpu_buddy_block_order(block); 170 enum gpu_buddy_free_tree tree; 171 struct rb_root *root; 172 173 tree = get_block_tree(block); 174 root = &mm->free_trees[tree][order]; 175 176 rb_erase_augmented(&block->rb, root, &gpu_buddy_augment_cb); 177 RB_CLEAR_NODE(&block->rb); 178 } 179 180 static void clear_reset(struct gpu_buddy_block *block) 181 { 182 block->header &= ~GPU_BUDDY_HEADER_CLEAR; 183 } 184 185 static void mark_cleared(struct gpu_buddy_block *block) 186 { 187 block->header |= GPU_BUDDY_HEADER_CLEAR; 188 } 189 190 static void mark_allocated(struct gpu_buddy *mm, 191 struct gpu_buddy_block *block) 192 { 193 block->header &= ~GPU_BUDDY_HEADER_STATE; 194 block->header |= GPU_BUDDY_ALLOCATED; 195 196 rbtree_remove(mm, block); 197 } 198 199 static void mark_free(struct gpu_buddy *mm, 200 struct gpu_buddy_block *block) 201 { 202 enum gpu_buddy_free_tree tree; 203 204 block->header &= ~GPU_BUDDY_HEADER_STATE; 205 block->header |= GPU_BUDDY_FREE; 206 207 tree = get_block_tree(block); 208 rbtree_insert(mm, block, tree); 209 } 210 211 static void mark_split(struct gpu_buddy *mm, 212 struct gpu_buddy_block *block) 213 { 214 block->header &= ~GPU_BUDDY_HEADER_STATE; 215 block->header |= GPU_BUDDY_SPLIT; 216 217 rbtree_remove(mm, block); 218 } 219 220 static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) 221 { 222 return s1 <= e2 && e1 >= s2; 223 } 224 225 static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) 226 { 227 return s1 <= s2 && e1 >= e2; 228 } 229 230 static struct gpu_buddy_block * 231 __get_buddy(struct gpu_buddy_block *block) 232 { 233 struct gpu_buddy_block *parent; 234 235 parent = block->parent; 236 if (!parent) 237 return NULL; 238 239 if (parent->left == block) 240 return parent->right; 241 242 return parent->left; 243 } 244 245 static unsigned int __gpu_buddy_free(struct gpu_buddy *mm, 246 struct gpu_buddy_block *block, 247 bool force_merge) 248 { 249 struct gpu_buddy_block *parent; 250 unsigned int order; 251 252 while ((parent = block->parent)) { 253 struct gpu_buddy_block *buddy; 254 255 buddy = __get_buddy(block); 256 257 if (!gpu_buddy_block_is_free(buddy)) 258 break; 259 260 if (!force_merge) { 261 /* 262 * Check the block and its buddy clear state and exit 263 * the loop if they both have the dissimilar state. 264 */ 265 if (gpu_buddy_block_is_clear(block) != 266 gpu_buddy_block_is_clear(buddy)) 267 break; 268 269 if (gpu_buddy_block_is_clear(block)) 270 mark_cleared(parent); 271 } 272 273 rbtree_remove(mm, buddy); 274 if (force_merge && gpu_buddy_block_is_clear(buddy)) 275 mm->clear_avail -= gpu_buddy_block_size(mm, buddy); 276 277 gpu_block_free(mm, block); 278 gpu_block_free(mm, buddy); 279 280 block = parent; 281 } 282 283 order = gpu_buddy_block_order(block); 284 mark_free(mm, block); 285 286 return order; 287 } 288 289 static int __force_merge(struct gpu_buddy *mm, 290 u64 start, 291 u64 end, 292 unsigned int min_order) 293 { 294 unsigned int tree, order; 295 int i; 296 297 if (!min_order) 298 return -ENOMEM; 299 300 if (min_order > mm->max_order) 301 return -EINVAL; 302 303 for_each_free_tree(tree) { 304 for (i = min_order - 1; i >= 0; i--) { 305 struct rb_node *iter = rb_last(&mm->free_trees[tree][i]); 306 307 while (iter) { 308 struct gpu_buddy_block *block, *buddy; 309 u64 block_start, block_end; 310 311 block = rbtree_get_free_block(iter); 312 iter = rb_prev(iter); 313 314 if (!block || !block->parent) 315 continue; 316 317 block_start = gpu_buddy_block_offset(block); 318 block_end = block_start + gpu_buddy_block_size(mm, block) - 1; 319 320 if (!contains(start, end, block_start, block_end)) 321 continue; 322 323 buddy = __get_buddy(block); 324 if (!gpu_buddy_block_is_free(buddy)) 325 continue; 326 327 gpu_buddy_assert(gpu_buddy_block_is_clear(block) != 328 gpu_buddy_block_is_clear(buddy)); 329 330 /* 331 * Advance to the next node when the current node is the buddy, 332 * as freeing the block will also remove its buddy from the tree. 333 */ 334 if (iter == &buddy->rb) 335 iter = rb_prev(iter); 336 337 rbtree_remove(mm, block); 338 if (gpu_buddy_block_is_clear(block)) 339 mm->clear_avail -= gpu_buddy_block_size(mm, block); 340 341 order = __gpu_buddy_free(mm, block, true); 342 if (order >= min_order) 343 return 0; 344 } 345 } 346 } 347 348 return -ENOMEM; 349 } 350 351 /** 352 * gpu_buddy_init - init memory manager 353 * 354 * @mm: GPU buddy manager to initialize 355 * @size: size in bytes to manage 356 * @chunk_size: minimum page size in bytes for our allocations 357 * 358 * Initializes the memory manager and its resources. 359 * 360 * Returns: 361 * 0 on success, error code on failure. 362 */ 363 int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size) 364 { 365 unsigned int i, j, root_count = 0; 366 u64 offset = 0; 367 368 if (size < chunk_size) 369 return -EINVAL; 370 371 if (chunk_size < SZ_4K) 372 return -EINVAL; 373 374 if (!is_power_of_2(chunk_size)) 375 return -EINVAL; 376 377 size = round_down(size, chunk_size); 378 379 mm->size = size; 380 mm->avail = size; 381 mm->clear_avail = 0; 382 mm->chunk_size = chunk_size; 383 mm->max_order = ilog2(size) - ilog2(chunk_size); 384 385 BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER); 386 387 mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES, 388 sizeof(*mm->free_trees), 389 GFP_KERNEL); 390 if (!mm->free_trees) 391 return -ENOMEM; 392 393 for_each_free_tree(i) { 394 mm->free_trees[i] = kmalloc_array(mm->max_order + 1, 395 sizeof(struct rb_root), 396 GFP_KERNEL); 397 if (!mm->free_trees[i]) 398 goto out_free_tree; 399 400 for (j = 0; j <= mm->max_order; ++j) 401 mm->free_trees[i][j] = RB_ROOT; 402 } 403 404 mm->n_roots = hweight64(size); 405 406 mm->roots = kmalloc_array(mm->n_roots, 407 sizeof(struct gpu_buddy_block *), 408 GFP_KERNEL); 409 if (!mm->roots) 410 goto out_free_tree; 411 412 /* 413 * Split into power-of-two blocks, in case we are given a size that is 414 * not itself a power-of-two. 415 */ 416 do { 417 struct gpu_buddy_block *root; 418 unsigned int order; 419 u64 root_size; 420 421 order = ilog2(size) - ilog2(chunk_size); 422 root_size = chunk_size << order; 423 424 root = gpu_block_alloc(mm, NULL, order, offset); 425 if (!root) 426 goto out_free_roots; 427 428 mark_free(mm, root); 429 430 BUG_ON(root_count > mm->max_order); 431 BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size); 432 433 mm->roots[root_count] = root; 434 435 offset += root_size; 436 size -= root_size; 437 root_count++; 438 } while (size); 439 440 return 0; 441 442 out_free_roots: 443 while (root_count--) 444 gpu_block_free(mm, mm->roots[root_count]); 445 kfree(mm->roots); 446 out_free_tree: 447 while (i--) 448 kfree(mm->free_trees[i]); 449 kfree(mm->free_trees); 450 return -ENOMEM; 451 } 452 EXPORT_SYMBOL(gpu_buddy_init); 453 454 /** 455 * gpu_buddy_fini - tear down the memory manager 456 * 457 * @mm: GPU buddy manager to free 458 * 459 * Cleanup memory manager resources and the freetree 460 */ 461 void gpu_buddy_fini(struct gpu_buddy *mm) 462 { 463 u64 root_size, size, start; 464 unsigned int order; 465 int i; 466 467 size = mm->size; 468 469 for (i = 0; i < mm->n_roots; ++i) { 470 order = ilog2(size) - ilog2(mm->chunk_size); 471 start = gpu_buddy_block_offset(mm->roots[i]); 472 __force_merge(mm, start, start + size, order); 473 474 gpu_buddy_assert(gpu_buddy_block_is_free(mm->roots[i])); 475 476 gpu_block_free(mm, mm->roots[i]); 477 478 root_size = mm->chunk_size << order; 479 size -= root_size; 480 } 481 482 gpu_buddy_assert(mm->avail == mm->size); 483 484 for_each_free_tree(i) 485 kfree(mm->free_trees[i]); 486 kfree(mm->free_trees); 487 kfree(mm->roots); 488 } 489 EXPORT_SYMBOL(gpu_buddy_fini); 490 491 static int split_block(struct gpu_buddy *mm, 492 struct gpu_buddy_block *block) 493 { 494 unsigned int block_order = gpu_buddy_block_order(block) - 1; 495 u64 offset = gpu_buddy_block_offset(block); 496 497 BUG_ON(!gpu_buddy_block_is_free(block)); 498 BUG_ON(!gpu_buddy_block_order(block)); 499 500 block->left = gpu_block_alloc(mm, block, block_order, offset); 501 if (!block->left) 502 return -ENOMEM; 503 504 block->right = gpu_block_alloc(mm, block, block_order, 505 offset + (mm->chunk_size << block_order)); 506 if (!block->right) { 507 gpu_block_free(mm, block->left); 508 return -ENOMEM; 509 } 510 511 mark_split(mm, block); 512 513 if (gpu_buddy_block_is_clear(block)) { 514 mark_cleared(block->left); 515 mark_cleared(block->right); 516 clear_reset(block); 517 } 518 519 mark_free(mm, block->left); 520 mark_free(mm, block->right); 521 522 return 0; 523 } 524 525 /** 526 * gpu_buddy_reset_clear - reset blocks clear state 527 * 528 * @mm: GPU buddy manager 529 * @is_clear: blocks clear state 530 * 531 * Reset the clear state based on @is_clear value for each block 532 * in the freetree. 533 */ 534 void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear) 535 { 536 enum gpu_buddy_free_tree src_tree, dst_tree; 537 u64 root_size, size, start; 538 unsigned int order; 539 int i; 540 541 size = mm->size; 542 for (i = 0; i < mm->n_roots; ++i) { 543 order = ilog2(size) - ilog2(mm->chunk_size); 544 start = gpu_buddy_block_offset(mm->roots[i]); 545 __force_merge(mm, start, start + size, order); 546 547 root_size = mm->chunk_size << order; 548 size -= root_size; 549 } 550 551 src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE; 552 dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; 553 554 for (i = 0; i <= mm->max_order; ++i) { 555 struct rb_root *root = &mm->free_trees[src_tree][i]; 556 struct gpu_buddy_block *block, *tmp; 557 558 rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { 559 rbtree_remove(mm, block); 560 if (is_clear) { 561 mark_cleared(block); 562 mm->clear_avail += gpu_buddy_block_size(mm, block); 563 } else { 564 clear_reset(block); 565 mm->clear_avail -= gpu_buddy_block_size(mm, block); 566 } 567 568 rbtree_insert(mm, block, dst_tree); 569 } 570 } 571 } 572 EXPORT_SYMBOL(gpu_buddy_reset_clear); 573 574 /** 575 * gpu_buddy_free_block - free a block 576 * 577 * @mm: GPU buddy manager 578 * @block: block to be freed 579 */ 580 void gpu_buddy_free_block(struct gpu_buddy *mm, 581 struct gpu_buddy_block *block) 582 { 583 BUG_ON(!gpu_buddy_block_is_allocated(block)); 584 mm->avail += gpu_buddy_block_size(mm, block); 585 if (gpu_buddy_block_is_clear(block)) 586 mm->clear_avail += gpu_buddy_block_size(mm, block); 587 588 __gpu_buddy_free(mm, block, false); 589 } 590 EXPORT_SYMBOL(gpu_buddy_free_block); 591 592 static void __gpu_buddy_free_list(struct gpu_buddy *mm, 593 struct list_head *objects, 594 bool mark_clear, 595 bool mark_dirty) 596 { 597 struct gpu_buddy_block *block, *on; 598 599 gpu_buddy_assert(!(mark_dirty && mark_clear)); 600 601 list_for_each_entry_safe(block, on, objects, link) { 602 if (mark_clear) 603 mark_cleared(block); 604 else if (mark_dirty) 605 clear_reset(block); 606 gpu_buddy_free_block(mm, block); 607 cond_resched(); 608 } 609 INIT_LIST_HEAD(objects); 610 } 611 612 static void gpu_buddy_free_list_internal(struct gpu_buddy *mm, 613 struct list_head *objects) 614 { 615 /* 616 * Don't touch the clear/dirty bit, since allocation is still internal 617 * at this point. For example we might have just failed part of the 618 * allocation. 619 */ 620 __gpu_buddy_free_list(mm, objects, false, false); 621 } 622 623 /** 624 * gpu_buddy_free_list - free blocks 625 * 626 * @mm: GPU buddy manager 627 * @objects: input list head to free blocks 628 * @flags: optional flags like GPU_BUDDY_CLEARED 629 */ 630 void gpu_buddy_free_list(struct gpu_buddy *mm, 631 struct list_head *objects, 632 unsigned int flags) 633 { 634 bool mark_clear = flags & GPU_BUDDY_CLEARED; 635 636 __gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear); 637 } 638 EXPORT_SYMBOL(gpu_buddy_free_list); 639 640 static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags) 641 { 642 bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION; 643 644 return needs_clear != gpu_buddy_block_is_clear(block); 645 } 646 647 static struct gpu_buddy_block * 648 __alloc_range_bias(struct gpu_buddy *mm, 649 u64 start, u64 end, 650 unsigned int order, 651 unsigned long flags, 652 bool fallback) 653 { 654 u64 req_size = mm->chunk_size << order; 655 struct gpu_buddy_block *block; 656 struct gpu_buddy_block *buddy; 657 LIST_HEAD(dfs); 658 int err; 659 int i; 660 661 end = end - 1; 662 663 for (i = 0; i < mm->n_roots; ++i) 664 list_add_tail(&mm->roots[i]->tmp_link, &dfs); 665 666 do { 667 u64 block_start; 668 u64 block_end; 669 670 block = list_first_entry_or_null(&dfs, 671 struct gpu_buddy_block, 672 tmp_link); 673 if (!block) 674 break; 675 676 list_del(&block->tmp_link); 677 678 if (gpu_buddy_block_order(block) < order) 679 continue; 680 681 block_start = gpu_buddy_block_offset(block); 682 block_end = block_start + gpu_buddy_block_size(mm, block) - 1; 683 684 if (!overlaps(start, end, block_start, block_end)) 685 continue; 686 687 if (gpu_buddy_block_is_allocated(block)) 688 continue; 689 690 if (block_start < start || block_end > end) { 691 u64 adjusted_start = max(block_start, start); 692 u64 adjusted_end = min(block_end, end); 693 694 if (round_down(adjusted_end + 1, req_size) <= 695 round_up(adjusted_start, req_size)) 696 continue; 697 } 698 699 if (!fallback && block_incompatible(block, flags)) 700 continue; 701 702 if (contains(start, end, block_start, block_end) && 703 order == gpu_buddy_block_order(block)) { 704 /* 705 * Find the free block within the range. 706 */ 707 if (gpu_buddy_block_is_free(block)) 708 return block; 709 710 continue; 711 } 712 713 if (!gpu_buddy_block_is_split(block)) { 714 err = split_block(mm, block); 715 if (unlikely(err)) 716 goto err_undo; 717 } 718 719 list_add(&block->right->tmp_link, &dfs); 720 list_add(&block->left->tmp_link, &dfs); 721 } while (1); 722 723 return ERR_PTR(-ENOSPC); 724 725 err_undo: 726 /* 727 * We really don't want to leave around a bunch of split blocks, since 728 * bigger is better, so make sure we merge everything back before we 729 * free the allocated blocks. 730 */ 731 buddy = __get_buddy(block); 732 if (buddy && 733 (gpu_buddy_block_is_free(block) && 734 gpu_buddy_block_is_free(buddy))) 735 __gpu_buddy_free(mm, block, false); 736 return ERR_PTR(err); 737 } 738 739 static struct gpu_buddy_block * 740 __gpu_buddy_alloc_range_bias(struct gpu_buddy *mm, 741 u64 start, u64 end, 742 unsigned int order, 743 unsigned long flags) 744 { 745 struct gpu_buddy_block *block; 746 bool fallback = false; 747 748 block = __alloc_range_bias(mm, start, end, order, 749 flags, fallback); 750 if (IS_ERR(block)) 751 return __alloc_range_bias(mm, start, end, order, 752 flags, !fallback); 753 754 return block; 755 } 756 757 static struct gpu_buddy_block * 758 get_maxblock(struct gpu_buddy *mm, 759 unsigned int order, 760 enum gpu_buddy_free_tree tree) 761 { 762 struct gpu_buddy_block *max_block = NULL, *block = NULL; 763 struct rb_root *root; 764 unsigned int i; 765 766 for (i = order; i <= mm->max_order; ++i) { 767 root = &mm->free_trees[tree][i]; 768 block = rbtree_last_free_block(root); 769 if (!block) 770 continue; 771 772 if (!max_block) { 773 max_block = block; 774 continue; 775 } 776 777 if (gpu_buddy_block_offset(block) > 778 gpu_buddy_block_offset(max_block)) { 779 max_block = block; 780 } 781 } 782 783 return max_block; 784 } 785 786 static struct gpu_buddy_block * 787 alloc_from_freetree(struct gpu_buddy *mm, 788 unsigned int order, 789 unsigned long flags) 790 { 791 struct gpu_buddy_block *block = NULL; 792 struct rb_root *root; 793 enum gpu_buddy_free_tree tree; 794 unsigned int tmp; 795 int err; 796 797 tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ? 798 GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; 799 800 if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) { 801 block = get_maxblock(mm, order, tree); 802 if (block) 803 /* Store the obtained block order */ 804 tmp = gpu_buddy_block_order(block); 805 } else { 806 for (tmp = order; tmp <= mm->max_order; ++tmp) { 807 /* Get RB tree root for this order and tree */ 808 root = &mm->free_trees[tree][tmp]; 809 block = rbtree_last_free_block(root); 810 if (block) 811 break; 812 } 813 } 814 815 if (!block) { 816 /* Try allocating from the other tree */ 817 tree = (tree == GPU_BUDDY_CLEAR_TREE) ? 818 GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE; 819 820 for (tmp = order; tmp <= mm->max_order; ++tmp) { 821 root = &mm->free_trees[tree][tmp]; 822 block = rbtree_last_free_block(root); 823 if (block) 824 break; 825 } 826 827 if (!block) 828 return ERR_PTR(-ENOSPC); 829 } 830 831 BUG_ON(!gpu_buddy_block_is_free(block)); 832 833 while (tmp != order) { 834 err = split_block(mm, block); 835 if (unlikely(err)) 836 goto err_undo; 837 838 block = block->right; 839 tmp--; 840 } 841 return block; 842 843 err_undo: 844 if (tmp != order) 845 __gpu_buddy_free(mm, block, false); 846 return ERR_PTR(err); 847 } 848 849 static bool 850 gpu_buddy_can_offset_align(u64 size, u64 min_block_size) 851 { 852 return size < min_block_size && is_power_of_2(size); 853 } 854 855 static bool gpu_buddy_subtree_can_satisfy(struct rb_node *node, 856 unsigned int alignment) 857 { 858 struct gpu_buddy_block *block; 859 860 block = rbtree_get_free_block(node); 861 return block->subtree_max_alignment >= alignment; 862 } 863 864 static struct gpu_buddy_block * 865 gpu_buddy_find_block_aligned(struct gpu_buddy *mm, 866 enum gpu_buddy_free_tree tree, 867 unsigned int order, 868 unsigned int alignment, 869 unsigned long flags) 870 { 871 struct rb_root *root = &mm->free_trees[tree][order]; 872 struct rb_node *rb = root->rb_node; 873 874 while (rb) { 875 struct gpu_buddy_block *block = rbtree_get_free_block(rb); 876 struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right; 877 878 if (right_node) { 879 if (gpu_buddy_subtree_can_satisfy(right_node, alignment)) { 880 rb = right_node; 881 continue; 882 } 883 } 884 885 if (gpu_buddy_block_offset_alignment(block) >= alignment) 886 return block; 887 888 if (left_node) { 889 if (gpu_buddy_subtree_can_satisfy(left_node, alignment)) { 890 rb = left_node; 891 continue; 892 } 893 } 894 895 break; 896 } 897 898 return NULL; 899 } 900 901 static struct gpu_buddy_block * 902 gpu_buddy_offset_aligned_allocation(struct gpu_buddy *mm, 903 u64 size, 904 u64 min_block_size, 905 unsigned long flags) 906 { 907 struct gpu_buddy_block *block = NULL; 908 unsigned int order, tmp, alignment; 909 struct gpu_buddy_block *buddy; 910 enum gpu_buddy_free_tree tree; 911 unsigned long pages; 912 int err; 913 914 alignment = ilog2(min_block_size); 915 pages = size >> ilog2(mm->chunk_size); 916 order = fls(pages) - 1; 917 918 tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ? 919 GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; 920 921 for (tmp = order; tmp <= mm->max_order; ++tmp) { 922 block = gpu_buddy_find_block_aligned(mm, tree, tmp, 923 alignment, flags); 924 if (!block) { 925 tree = (tree == GPU_BUDDY_CLEAR_TREE) ? 926 GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE; 927 block = gpu_buddy_find_block_aligned(mm, tree, tmp, 928 alignment, flags); 929 } 930 931 if (block) 932 break; 933 } 934 935 if (!block) 936 return ERR_PTR(-ENOSPC); 937 938 while (gpu_buddy_block_order(block) > order) { 939 struct gpu_buddy_block *left, *right; 940 941 err = split_block(mm, block); 942 if (unlikely(err)) 943 goto err_undo; 944 945 left = block->left; 946 right = block->right; 947 948 if (gpu_buddy_block_offset_alignment(right) >= alignment) 949 block = right; 950 else 951 block = left; 952 } 953 954 return block; 955 956 err_undo: 957 /* 958 * We really don't want to leave around a bunch of split blocks, since 959 * bigger is better, so make sure we merge everything back before we 960 * free the allocated blocks. 961 */ 962 buddy = __get_buddy(block); 963 if (buddy && 964 (gpu_buddy_block_is_free(block) && 965 gpu_buddy_block_is_free(buddy))) 966 __gpu_buddy_free(mm, block, false); 967 return ERR_PTR(err); 968 } 969 970 static int __alloc_range(struct gpu_buddy *mm, 971 struct list_head *dfs, 972 u64 start, u64 size, 973 struct list_head *blocks, 974 u64 *total_allocated_on_err) 975 { 976 struct gpu_buddy_block *block; 977 struct gpu_buddy_block *buddy; 978 u64 total_allocated = 0; 979 LIST_HEAD(allocated); 980 u64 end; 981 int err; 982 983 end = start + size - 1; 984 985 do { 986 u64 block_start; 987 u64 block_end; 988 989 block = list_first_entry_or_null(dfs, 990 struct gpu_buddy_block, 991 tmp_link); 992 if (!block) 993 break; 994 995 list_del(&block->tmp_link); 996 997 block_start = gpu_buddy_block_offset(block); 998 block_end = block_start + gpu_buddy_block_size(mm, block) - 1; 999 1000 if (!overlaps(start, end, block_start, block_end)) 1001 continue; 1002 1003 if (gpu_buddy_block_is_allocated(block)) { 1004 err = -ENOSPC; 1005 goto err_free; 1006 } 1007 1008 if (contains(start, end, block_start, block_end)) { 1009 if (gpu_buddy_block_is_free(block)) { 1010 mark_allocated(mm, block); 1011 total_allocated += gpu_buddy_block_size(mm, block); 1012 mm->avail -= gpu_buddy_block_size(mm, block); 1013 if (gpu_buddy_block_is_clear(block)) 1014 mm->clear_avail -= gpu_buddy_block_size(mm, block); 1015 list_add_tail(&block->link, &allocated); 1016 continue; 1017 } else if (!mm->clear_avail) { 1018 err = -ENOSPC; 1019 goto err_free; 1020 } 1021 } 1022 1023 if (!gpu_buddy_block_is_split(block)) { 1024 err = split_block(mm, block); 1025 if (unlikely(err)) 1026 goto err_undo; 1027 } 1028 1029 list_add(&block->right->tmp_link, dfs); 1030 list_add(&block->left->tmp_link, dfs); 1031 } while (1); 1032 1033 if (total_allocated < size) { 1034 err = -ENOSPC; 1035 goto err_free; 1036 } 1037 1038 list_splice_tail(&allocated, blocks); 1039 1040 return 0; 1041 1042 err_undo: 1043 /* 1044 * We really don't want to leave around a bunch of split blocks, since 1045 * bigger is better, so make sure we merge everything back before we 1046 * free the allocated blocks. 1047 */ 1048 buddy = __get_buddy(block); 1049 if (buddy && 1050 (gpu_buddy_block_is_free(block) && 1051 gpu_buddy_block_is_free(buddy))) 1052 __gpu_buddy_free(mm, block, false); 1053 1054 err_free: 1055 if (err == -ENOSPC && total_allocated_on_err) { 1056 list_splice_tail(&allocated, blocks); 1057 *total_allocated_on_err = total_allocated; 1058 } else { 1059 gpu_buddy_free_list_internal(mm, &allocated); 1060 } 1061 1062 return err; 1063 } 1064 1065 static int __gpu_buddy_alloc_range(struct gpu_buddy *mm, 1066 u64 start, 1067 u64 size, 1068 u64 *total_allocated_on_err, 1069 struct list_head *blocks) 1070 { 1071 LIST_HEAD(dfs); 1072 int i; 1073 1074 for (i = 0; i < mm->n_roots; ++i) 1075 list_add_tail(&mm->roots[i]->tmp_link, &dfs); 1076 1077 return __alloc_range(mm, &dfs, start, size, 1078 blocks, total_allocated_on_err); 1079 } 1080 1081 static int __alloc_contig_try_harder(struct gpu_buddy *mm, 1082 u64 size, 1083 u64 min_block_size, 1084 struct list_head *blocks) 1085 { 1086 u64 rhs_offset, lhs_offset, lhs_size, filled; 1087 struct gpu_buddy_block *block; 1088 unsigned int tree, order; 1089 LIST_HEAD(blocks_lhs); 1090 unsigned long pages; 1091 u64 modify_size; 1092 int err; 1093 1094 modify_size = rounddown_pow_of_two(size); 1095 pages = modify_size >> ilog2(mm->chunk_size); 1096 order = fls(pages) - 1; 1097 if (order == 0) 1098 return -ENOSPC; 1099 1100 for_each_free_tree(tree) { 1101 struct rb_root *root; 1102 struct rb_node *iter; 1103 1104 root = &mm->free_trees[tree][order]; 1105 if (rbtree_is_empty(root)) 1106 continue; 1107 1108 iter = rb_last(root); 1109 while (iter) { 1110 block = rbtree_get_free_block(iter); 1111 1112 /* Allocate blocks traversing RHS */ 1113 rhs_offset = gpu_buddy_block_offset(block); 1114 err = __gpu_buddy_alloc_range(mm, rhs_offset, size, 1115 &filled, blocks); 1116 if (!err || err != -ENOSPC) 1117 return err; 1118 1119 lhs_size = max((size - filled), min_block_size); 1120 if (!IS_ALIGNED(lhs_size, min_block_size)) 1121 lhs_size = round_up(lhs_size, min_block_size); 1122 1123 /* Allocate blocks traversing LHS */ 1124 lhs_offset = gpu_buddy_block_offset(block) - lhs_size; 1125 err = __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size, 1126 NULL, &blocks_lhs); 1127 if (!err) { 1128 list_splice(&blocks_lhs, blocks); 1129 return 0; 1130 } else if (err != -ENOSPC) { 1131 gpu_buddy_free_list_internal(mm, blocks); 1132 return err; 1133 } 1134 /* Free blocks for the next iteration */ 1135 gpu_buddy_free_list_internal(mm, blocks); 1136 1137 iter = rb_prev(iter); 1138 } 1139 } 1140 1141 return -ENOSPC; 1142 } 1143 1144 /** 1145 * gpu_buddy_block_trim - free unused pages 1146 * 1147 * @mm: GPU buddy manager 1148 * @start: start address to begin the trimming. 1149 * @new_size: original size requested 1150 * @blocks: Input and output list of allocated blocks. 1151 * MUST contain single block as input to be trimmed. 1152 * On success will contain the newly allocated blocks 1153 * making up the @new_size. Blocks always appear in 1154 * ascending order 1155 * 1156 * For contiguous allocation, we round up the size to the nearest 1157 * power of two value, drivers consume *actual* size, so remaining 1158 * portions are unused and can be optionally freed with this function 1159 * 1160 * Returns: 1161 * 0 on success, error code on failure. 1162 */ 1163 int gpu_buddy_block_trim(struct gpu_buddy *mm, 1164 u64 *start, 1165 u64 new_size, 1166 struct list_head *blocks) 1167 { 1168 struct gpu_buddy_block *parent; 1169 struct gpu_buddy_block *block; 1170 u64 block_start, block_end; 1171 LIST_HEAD(dfs); 1172 u64 new_start; 1173 int err; 1174 1175 if (!list_is_singular(blocks)) 1176 return -EINVAL; 1177 1178 block = list_first_entry(blocks, 1179 struct gpu_buddy_block, 1180 link); 1181 1182 block_start = gpu_buddy_block_offset(block); 1183 block_end = block_start + gpu_buddy_block_size(mm, block); 1184 1185 if (WARN_ON(!gpu_buddy_block_is_allocated(block))) 1186 return -EINVAL; 1187 1188 if (new_size > gpu_buddy_block_size(mm, block)) 1189 return -EINVAL; 1190 1191 if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size)) 1192 return -EINVAL; 1193 1194 if (new_size == gpu_buddy_block_size(mm, block)) 1195 return 0; 1196 1197 new_start = block_start; 1198 if (start) { 1199 new_start = *start; 1200 1201 if (new_start < block_start) 1202 return -EINVAL; 1203 1204 if (!IS_ALIGNED(new_start, mm->chunk_size)) 1205 return -EINVAL; 1206 1207 if (range_overflows(new_start, new_size, block_end)) 1208 return -EINVAL; 1209 } 1210 1211 list_del(&block->link); 1212 mark_free(mm, block); 1213 mm->avail += gpu_buddy_block_size(mm, block); 1214 if (gpu_buddy_block_is_clear(block)) 1215 mm->clear_avail += gpu_buddy_block_size(mm, block); 1216 1217 /* Prevent recursively freeing this node */ 1218 parent = block->parent; 1219 block->parent = NULL; 1220 1221 list_add(&block->tmp_link, &dfs); 1222 err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); 1223 if (err) { 1224 mark_allocated(mm, block); 1225 mm->avail -= gpu_buddy_block_size(mm, block); 1226 if (gpu_buddy_block_is_clear(block)) 1227 mm->clear_avail -= gpu_buddy_block_size(mm, block); 1228 list_add(&block->link, blocks); 1229 } 1230 1231 block->parent = parent; 1232 return err; 1233 } 1234 EXPORT_SYMBOL(gpu_buddy_block_trim); 1235 1236 static struct gpu_buddy_block * 1237 __gpu_buddy_alloc_blocks(struct gpu_buddy *mm, 1238 u64 start, u64 end, 1239 u64 size, u64 min_block_size, 1240 unsigned int order, 1241 unsigned long flags) 1242 { 1243 if (flags & GPU_BUDDY_RANGE_ALLOCATION) 1244 /* Allocate traversing within the range */ 1245 return __gpu_buddy_alloc_range_bias(mm, start, end, 1246 order, flags); 1247 else if (size < min_block_size) 1248 /* Allocate from an offset-aligned region without size rounding */ 1249 return gpu_buddy_offset_aligned_allocation(mm, size, 1250 min_block_size, 1251 flags); 1252 else 1253 /* Allocate from freetree */ 1254 return alloc_from_freetree(mm, order, flags); 1255 } 1256 1257 /** 1258 * gpu_buddy_alloc_blocks - allocate power-of-two blocks 1259 * 1260 * @mm: GPU buddy manager to allocate from 1261 * @start: start of the allowed range for this block 1262 * @end: end of the allowed range for this block 1263 * @size: size of the allocation in bytes 1264 * @min_block_size: alignment of the allocation 1265 * @blocks: output list head to add allocated blocks 1266 * @flags: GPU_BUDDY_*_ALLOCATION flags 1267 * 1268 * alloc_range_bias() called on range limitations, which traverses 1269 * the tree and returns the desired block. 1270 * 1271 * alloc_from_freetree() called when *no* range restrictions 1272 * are enforced, which picks the block from the freetree. 1273 * 1274 * Returns: 1275 * 0 on success, error code on failure. 1276 */ 1277 int gpu_buddy_alloc_blocks(struct gpu_buddy *mm, 1278 u64 start, u64 end, u64 size, 1279 u64 min_block_size, 1280 struct list_head *blocks, 1281 unsigned long flags) 1282 { 1283 struct gpu_buddy_block *block = NULL; 1284 u64 original_size, original_min_size; 1285 unsigned int min_order, order; 1286 LIST_HEAD(allocated); 1287 unsigned long pages; 1288 int err; 1289 1290 if (size < mm->chunk_size) 1291 return -EINVAL; 1292 1293 if (min_block_size < mm->chunk_size) 1294 return -EINVAL; 1295 1296 if (!is_power_of_2(min_block_size)) 1297 return -EINVAL; 1298 1299 if (!IS_ALIGNED(start | end | size, mm->chunk_size)) 1300 return -EINVAL; 1301 1302 if (end > mm->size) 1303 return -EINVAL; 1304 1305 if (range_overflows(start, size, mm->size)) 1306 return -EINVAL; 1307 1308 /* Actual range allocation */ 1309 if (start + size == end) { 1310 if (!IS_ALIGNED(start | end, min_block_size)) 1311 return -EINVAL; 1312 1313 return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks); 1314 } 1315 1316 original_size = size; 1317 original_min_size = min_block_size; 1318 1319 /* Roundup the size to power of 2 */ 1320 if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) { 1321 size = roundup_pow_of_two(size); 1322 min_block_size = size; 1323 /* 1324 * Normalize the requested size to min_block_size for regular allocations. 1325 * Offset-aligned allocations intentionally skip size rounding. 1326 */ 1327 } else if (!gpu_buddy_can_offset_align(size, min_block_size)) { 1328 size = round_up(size, min_block_size); 1329 } 1330 1331 pages = size >> ilog2(mm->chunk_size); 1332 order = fls(pages) - 1; 1333 min_order = ilog2(min_block_size) - ilog2(mm->chunk_size); 1334 1335 if (order > mm->max_order || size > mm->size) { 1336 if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) && 1337 !(flags & GPU_BUDDY_RANGE_ALLOCATION)) 1338 return __alloc_contig_try_harder(mm, original_size, 1339 original_min_size, blocks); 1340 1341 return -EINVAL; 1342 } 1343 1344 do { 1345 order = min(order, (unsigned int)fls(pages) - 1); 1346 BUG_ON(order > mm->max_order); 1347 /* 1348 * Regular allocations must not allocate blocks smaller than min_block_size. 1349 * Offset-aligned allocations deliberately bypass this constraint. 1350 */ 1351 BUG_ON(size >= min_block_size && order < min_order); 1352 1353 do { 1354 unsigned int fallback_order; 1355 1356 block = __gpu_buddy_alloc_blocks(mm, start, 1357 end, 1358 size, 1359 min_block_size, 1360 order, 1361 flags); 1362 if (!IS_ERR(block)) 1363 break; 1364 1365 if (size < min_block_size) { 1366 fallback_order = order; 1367 } else if (order == min_order) { 1368 fallback_order = min_order; 1369 } else { 1370 order--; 1371 continue; 1372 } 1373 1374 /* Try allocation through force merge method */ 1375 if (mm->clear_avail && 1376 !__force_merge(mm, start, end, fallback_order)) { 1377 block = __gpu_buddy_alloc_blocks(mm, start, 1378 end, 1379 size, 1380 min_block_size, 1381 fallback_order, 1382 flags); 1383 if (!IS_ERR(block)) { 1384 order = fallback_order; 1385 break; 1386 } 1387 } 1388 1389 /* 1390 * Try contiguous block allocation through 1391 * try harder method. 1392 */ 1393 if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION && 1394 !(flags & GPU_BUDDY_RANGE_ALLOCATION)) 1395 return __alloc_contig_try_harder(mm, 1396 original_size, 1397 original_min_size, 1398 blocks); 1399 err = -ENOSPC; 1400 goto err_free; 1401 } while (1); 1402 1403 mark_allocated(mm, block); 1404 mm->avail -= gpu_buddy_block_size(mm, block); 1405 if (gpu_buddy_block_is_clear(block)) 1406 mm->clear_avail -= gpu_buddy_block_size(mm, block); 1407 kmemleak_update_trace(block); 1408 list_add_tail(&block->link, &allocated); 1409 1410 pages -= BIT(order); 1411 1412 if (!pages) 1413 break; 1414 } while (1); 1415 1416 /* Trim the allocated block to the required size */ 1417 if (!(flags & GPU_BUDDY_TRIM_DISABLE) && 1418 original_size != size) { 1419 struct list_head *trim_list; 1420 LIST_HEAD(temp); 1421 u64 trim_size; 1422 1423 trim_list = &allocated; 1424 trim_size = original_size; 1425 1426 if (!list_is_singular(&allocated)) { 1427 block = list_last_entry(&allocated, typeof(*block), link); 1428 list_move(&block->link, &temp); 1429 trim_list = &temp; 1430 trim_size = gpu_buddy_block_size(mm, block) - 1431 (size - original_size); 1432 } 1433 1434 gpu_buddy_block_trim(mm, 1435 NULL, 1436 trim_size, 1437 trim_list); 1438 1439 if (!list_empty(&temp)) 1440 list_splice_tail(trim_list, &allocated); 1441 } 1442 1443 list_splice_tail(&allocated, blocks); 1444 return 0; 1445 1446 err_free: 1447 gpu_buddy_free_list_internal(mm, &allocated); 1448 return err; 1449 } 1450 EXPORT_SYMBOL(gpu_buddy_alloc_blocks); 1451 1452 /** 1453 * gpu_buddy_block_print - print block information 1454 * 1455 * @mm: GPU buddy manager 1456 * @block: GPU buddy block 1457 */ 1458 void gpu_buddy_block_print(struct gpu_buddy *mm, 1459 struct gpu_buddy_block *block) 1460 { 1461 u64 start = gpu_buddy_block_offset(block); 1462 u64 size = gpu_buddy_block_size(mm, block); 1463 1464 pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size); 1465 } 1466 EXPORT_SYMBOL(gpu_buddy_block_print); 1467 1468 /** 1469 * gpu_buddy_print - print allocator state 1470 * 1471 * @mm: GPU buddy manager 1472 * @p: GPU printer to use 1473 */ 1474 void gpu_buddy_print(struct gpu_buddy *mm) 1475 { 1476 int order; 1477 1478 pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n", 1479 mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20); 1480 1481 for (order = mm->max_order; order >= 0; order--) { 1482 struct gpu_buddy_block *block, *tmp; 1483 struct rb_root *root; 1484 u64 count = 0, free; 1485 unsigned int tree; 1486 1487 for_each_free_tree(tree) { 1488 root = &mm->free_trees[tree][order]; 1489 1490 rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { 1491 BUG_ON(!gpu_buddy_block_is_free(block)); 1492 count++; 1493 } 1494 } 1495 1496 free = count * (mm->chunk_size << order); 1497 if (free < SZ_1M) 1498 pr_info("order-%2d free: %8llu KiB, blocks: %llu\n", 1499 order, free >> 10, count); 1500 else 1501 pr_info("order-%2d free: %8llu MiB, blocks: %llu\n", 1502 order, free >> 20, count); 1503 } 1504 } 1505 EXPORT_SYMBOL(gpu_buddy_print); 1506 1507 static void gpu_buddy_module_exit(void) 1508 { 1509 kmem_cache_destroy(slab_blocks); 1510 } 1511 1512 static int __init gpu_buddy_module_init(void) 1513 { 1514 slab_blocks = KMEM_CACHE(gpu_buddy_block, 0); 1515 if (!slab_blocks) 1516 return -ENOMEM; 1517 1518 return 0; 1519 } 1520 1521 module_init(gpu_buddy_module_init); 1522 module_exit(gpu_buddy_module_exit); 1523 1524 MODULE_DESCRIPTION("GPU Buddy Allocator"); 1525 MODULE_LICENSE("Dual MIT/GPL"); 1526