14a9671a0SJoel Fernandes // SPDX-License-Identifier: MIT 24a9671a0SJoel Fernandes /* 34a9671a0SJoel Fernandes * Copyright © 2021 Intel Corporation 44a9671a0SJoel Fernandes */ 54a9671a0SJoel Fernandes 64a9671a0SJoel Fernandes #include <kunit/test-bug.h> 74a9671a0SJoel Fernandes 84a9671a0SJoel Fernandes #include <linux/export.h> 94a9671a0SJoel Fernandes #include <linux/kmemleak.h> 104a9671a0SJoel Fernandes #include <linux/module.h> 114a9671a0SJoel Fernandes #include <linux/sizes.h> 124a9671a0SJoel Fernandes 134a9671a0SJoel Fernandes #include <linux/gpu_buddy.h> 144a9671a0SJoel Fernandes 154a9671a0SJoel Fernandes static struct kmem_cache *slab_blocks; 164a9671a0SJoel Fernandes 17*df8c7892SSanjay Yadav static unsigned int 18*df8c7892SSanjay Yadav gpu_buddy_block_state(struct gpu_buddy_block *block) 19*df8c7892SSanjay Yadav { 20*df8c7892SSanjay Yadav return block->header & GPU_BUDDY_HEADER_STATE; 21*df8c7892SSanjay Yadav } 22*df8c7892SSanjay Yadav 23*df8c7892SSanjay Yadav static bool 24*df8c7892SSanjay Yadav gpu_buddy_block_is_allocated(struct gpu_buddy_block *block) 25*df8c7892SSanjay Yadav { 26*df8c7892SSanjay Yadav return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED; 27*df8c7892SSanjay Yadav } 28*df8c7892SSanjay Yadav 29*df8c7892SSanjay Yadav static bool 30*df8c7892SSanjay Yadav gpu_buddy_block_is_split(struct gpu_buddy_block *block) 31*df8c7892SSanjay Yadav { 32*df8c7892SSanjay Yadav return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT; 33*df8c7892SSanjay Yadav } 34*df8c7892SSanjay Yadav 35ba110db8SJoel Fernandes static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm, 36ba110db8SJoel Fernandes struct gpu_buddy_block *parent, 374a9671a0SJoel Fernandes unsigned int order, 384a9671a0SJoel Fernandes u64 offset) 394a9671a0SJoel Fernandes { 40ba110db8SJoel Fernandes struct gpu_buddy_block *block; 414a9671a0SJoel Fernandes 42ba110db8SJoel Fernandes BUG_ON(order > GPU_BUDDY_MAX_ORDER); 434a9671a0SJoel Fernandes 444a9671a0SJoel Fernandes block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL); 454a9671a0SJoel Fernandes if (!block) 464a9671a0SJoel Fernandes return NULL; 474a9671a0SJoel Fernandes 484a9671a0SJoel Fernandes block->header = offset; 494a9671a0SJoel Fernandes block->header |= order; 504a9671a0SJoel Fernandes block->parent = parent; 514a9671a0SJoel Fernandes 524a9671a0SJoel Fernandes RB_CLEAR_NODE(&block->rb); 534a9671a0SJoel Fernandes 54ba110db8SJoel Fernandes BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED); 554a9671a0SJoel Fernandes return block; 564a9671a0SJoel Fernandes } 574a9671a0SJoel Fernandes 58ba110db8SJoel Fernandes static void gpu_block_free(struct gpu_buddy *mm, 59ba110db8SJoel Fernandes struct gpu_buddy_block *block) 604a9671a0SJoel Fernandes { 614a9671a0SJoel Fernandes kmem_cache_free(slab_blocks, block); 624a9671a0SJoel Fernandes } 634a9671a0SJoel Fernandes 64ba110db8SJoel Fernandes static enum gpu_buddy_free_tree 65ba110db8SJoel Fernandes get_block_tree(struct gpu_buddy_block *block) 664a9671a0SJoel Fernandes { 67ba110db8SJoel Fernandes return gpu_buddy_block_is_clear(block) ? 68ba110db8SJoel Fernandes GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; 694a9671a0SJoel Fernandes } 704a9671a0SJoel Fernandes 71ba110db8SJoel Fernandes static struct gpu_buddy_block * 724a9671a0SJoel Fernandes rbtree_get_free_block(const struct rb_node *node) 734a9671a0SJoel Fernandes { 74ba110db8SJoel Fernandes return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL; 754a9671a0SJoel Fernandes } 764a9671a0SJoel Fernandes 77ba110db8SJoel Fernandes static struct gpu_buddy_block * 784a9671a0SJoel Fernandes rbtree_last_free_block(struct rb_root *root) 794a9671a0SJoel Fernandes { 804a9671a0SJoel Fernandes return rbtree_get_free_block(rb_last(root)); 814a9671a0SJoel Fernandes } 824a9671a0SJoel Fernandes 834a9671a0SJoel Fernandes static bool rbtree_is_empty(struct rb_root *root) 844a9671a0SJoel Fernandes { 854a9671a0SJoel Fernandes return RB_EMPTY_ROOT(root); 864a9671a0SJoel Fernandes } 874a9671a0SJoel Fernandes 88ba110db8SJoel Fernandes static bool gpu_buddy_block_offset_less(const struct gpu_buddy_block *block, 89ba110db8SJoel Fernandes const struct gpu_buddy_block *node) 904a9671a0SJoel Fernandes { 91ba110db8SJoel Fernandes return gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node); 924a9671a0SJoel Fernandes } 934a9671a0SJoel Fernandes 944a9671a0SJoel Fernandes static bool rbtree_block_offset_less(struct rb_node *block, 954a9671a0SJoel Fernandes const struct rb_node *node) 964a9671a0SJoel Fernandes { 97ba110db8SJoel Fernandes return gpu_buddy_block_offset_less(rbtree_get_free_block(block), 984a9671a0SJoel Fernandes rbtree_get_free_block(node)); 994a9671a0SJoel Fernandes } 1004a9671a0SJoel Fernandes 101ba110db8SJoel Fernandes static void rbtree_insert(struct gpu_buddy *mm, 102ba110db8SJoel Fernandes struct gpu_buddy_block *block, 103ba110db8SJoel Fernandes enum gpu_buddy_free_tree tree) 1044a9671a0SJoel Fernandes { 1054a9671a0SJoel Fernandes rb_add(&block->rb, 106ba110db8SJoel Fernandes &mm->free_trees[tree][gpu_buddy_block_order(block)], 1074a9671a0SJoel Fernandes rbtree_block_offset_less); 1084a9671a0SJoel Fernandes } 1094a9671a0SJoel Fernandes 110ba110db8SJoel Fernandes static void rbtree_remove(struct gpu_buddy *mm, 111ba110db8SJoel Fernandes struct gpu_buddy_block *block) 1124a9671a0SJoel Fernandes { 113ba110db8SJoel Fernandes unsigned int order = gpu_buddy_block_order(block); 114ba110db8SJoel Fernandes enum gpu_buddy_free_tree tree; 1154a9671a0SJoel Fernandes struct rb_root *root; 1164a9671a0SJoel Fernandes 1174a9671a0SJoel Fernandes tree = get_block_tree(block); 1184a9671a0SJoel Fernandes root = &mm->free_trees[tree][order]; 1194a9671a0SJoel Fernandes 1204a9671a0SJoel Fernandes rb_erase(&block->rb, root); 1214a9671a0SJoel Fernandes RB_CLEAR_NODE(&block->rb); 1224a9671a0SJoel Fernandes } 1234a9671a0SJoel Fernandes 124ba110db8SJoel Fernandes static void clear_reset(struct gpu_buddy_block *block) 1254a9671a0SJoel Fernandes { 126ba110db8SJoel Fernandes block->header &= ~GPU_BUDDY_HEADER_CLEAR; 1274a9671a0SJoel Fernandes } 1284a9671a0SJoel Fernandes 129ba110db8SJoel Fernandes static void mark_cleared(struct gpu_buddy_block *block) 1304a9671a0SJoel Fernandes { 131ba110db8SJoel Fernandes block->header |= GPU_BUDDY_HEADER_CLEAR; 1324a9671a0SJoel Fernandes } 1334a9671a0SJoel Fernandes 134ba110db8SJoel Fernandes static void mark_allocated(struct gpu_buddy *mm, 135ba110db8SJoel Fernandes struct gpu_buddy_block *block) 1364a9671a0SJoel Fernandes { 137ba110db8SJoel Fernandes block->header &= ~GPU_BUDDY_HEADER_STATE; 138ba110db8SJoel Fernandes block->header |= GPU_BUDDY_ALLOCATED; 1394a9671a0SJoel Fernandes 1404a9671a0SJoel Fernandes rbtree_remove(mm, block); 1414a9671a0SJoel Fernandes } 1424a9671a0SJoel Fernandes 143ba110db8SJoel Fernandes static void mark_free(struct gpu_buddy *mm, 144ba110db8SJoel Fernandes struct gpu_buddy_block *block) 1454a9671a0SJoel Fernandes { 146ba110db8SJoel Fernandes enum gpu_buddy_free_tree tree; 1474a9671a0SJoel Fernandes 148ba110db8SJoel Fernandes block->header &= ~GPU_BUDDY_HEADER_STATE; 149ba110db8SJoel Fernandes block->header |= GPU_BUDDY_FREE; 1504a9671a0SJoel Fernandes 1514a9671a0SJoel Fernandes tree = get_block_tree(block); 1524a9671a0SJoel Fernandes rbtree_insert(mm, block, tree); 1534a9671a0SJoel Fernandes } 1544a9671a0SJoel Fernandes 155ba110db8SJoel Fernandes static void mark_split(struct gpu_buddy *mm, 156ba110db8SJoel Fernandes struct gpu_buddy_block *block) 1574a9671a0SJoel Fernandes { 158ba110db8SJoel Fernandes block->header &= ~GPU_BUDDY_HEADER_STATE; 159ba110db8SJoel Fernandes block->header |= GPU_BUDDY_SPLIT; 1604a9671a0SJoel Fernandes 1614a9671a0SJoel Fernandes rbtree_remove(mm, block); 1624a9671a0SJoel Fernandes } 1634a9671a0SJoel Fernandes 1644a9671a0SJoel Fernandes static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) 1654a9671a0SJoel Fernandes { 1664a9671a0SJoel Fernandes return s1 <= e2 && e1 >= s2; 1674a9671a0SJoel Fernandes } 1684a9671a0SJoel Fernandes 1694a9671a0SJoel Fernandes static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) 1704a9671a0SJoel Fernandes { 1714a9671a0SJoel Fernandes return s1 <= s2 && e1 >= e2; 1724a9671a0SJoel Fernandes } 1734a9671a0SJoel Fernandes 174ba110db8SJoel Fernandes static struct gpu_buddy_block * 175ba110db8SJoel Fernandes __get_buddy(struct gpu_buddy_block *block) 1764a9671a0SJoel Fernandes { 177ba110db8SJoel Fernandes struct gpu_buddy_block *parent; 1784a9671a0SJoel Fernandes 1794a9671a0SJoel Fernandes parent = block->parent; 1804a9671a0SJoel Fernandes if (!parent) 1814a9671a0SJoel Fernandes return NULL; 1824a9671a0SJoel Fernandes 1834a9671a0SJoel Fernandes if (parent->left == block) 1844a9671a0SJoel Fernandes return parent->right; 1854a9671a0SJoel Fernandes 1864a9671a0SJoel Fernandes return parent->left; 1874a9671a0SJoel Fernandes } 1884a9671a0SJoel Fernandes 189ba110db8SJoel Fernandes static unsigned int __gpu_buddy_free(struct gpu_buddy *mm, 190ba110db8SJoel Fernandes struct gpu_buddy_block *block, 1914a9671a0SJoel Fernandes bool force_merge) 1924a9671a0SJoel Fernandes { 193ba110db8SJoel Fernandes struct gpu_buddy_block *parent; 1944a9671a0SJoel Fernandes unsigned int order; 1954a9671a0SJoel Fernandes 1964a9671a0SJoel Fernandes while ((parent = block->parent)) { 197ba110db8SJoel Fernandes struct gpu_buddy_block *buddy; 1984a9671a0SJoel Fernandes 1994a9671a0SJoel Fernandes buddy = __get_buddy(block); 2004a9671a0SJoel Fernandes 201ba110db8SJoel Fernandes if (!gpu_buddy_block_is_free(buddy)) 2024a9671a0SJoel Fernandes break; 2034a9671a0SJoel Fernandes 2044a9671a0SJoel Fernandes if (!force_merge) { 2054a9671a0SJoel Fernandes /* 2064a9671a0SJoel Fernandes * Check the block and its buddy clear state and exit 2074a9671a0SJoel Fernandes * the loop if they both have the dissimilar state. 2084a9671a0SJoel Fernandes */ 209ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block) != 210ba110db8SJoel Fernandes gpu_buddy_block_is_clear(buddy)) 2114a9671a0SJoel Fernandes break; 2124a9671a0SJoel Fernandes 213ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 2144a9671a0SJoel Fernandes mark_cleared(parent); 2154a9671a0SJoel Fernandes } 2164a9671a0SJoel Fernandes 2174a9671a0SJoel Fernandes rbtree_remove(mm, buddy); 218ba110db8SJoel Fernandes if (force_merge && gpu_buddy_block_is_clear(buddy)) 219ba110db8SJoel Fernandes mm->clear_avail -= gpu_buddy_block_size(mm, buddy); 2204a9671a0SJoel Fernandes 221ba110db8SJoel Fernandes gpu_block_free(mm, block); 222ba110db8SJoel Fernandes gpu_block_free(mm, buddy); 2234a9671a0SJoel Fernandes 2244a9671a0SJoel Fernandes block = parent; 2254a9671a0SJoel Fernandes } 2264a9671a0SJoel Fernandes 227ba110db8SJoel Fernandes order = gpu_buddy_block_order(block); 2284a9671a0SJoel Fernandes mark_free(mm, block); 2294a9671a0SJoel Fernandes 2304a9671a0SJoel Fernandes return order; 2314a9671a0SJoel Fernandes } 2324a9671a0SJoel Fernandes 233ba110db8SJoel Fernandes static int __force_merge(struct gpu_buddy *mm, 2344a9671a0SJoel Fernandes u64 start, 2354a9671a0SJoel Fernandes u64 end, 2364a9671a0SJoel Fernandes unsigned int min_order) 2374a9671a0SJoel Fernandes { 2384a9671a0SJoel Fernandes unsigned int tree, order; 2394a9671a0SJoel Fernandes int i; 2404a9671a0SJoel Fernandes 2414a9671a0SJoel Fernandes if (!min_order) 2424a9671a0SJoel Fernandes return -ENOMEM; 2434a9671a0SJoel Fernandes 2444a9671a0SJoel Fernandes if (min_order > mm->max_order) 2454a9671a0SJoel Fernandes return -EINVAL; 2464a9671a0SJoel Fernandes 2474a9671a0SJoel Fernandes for_each_free_tree(tree) { 2484a9671a0SJoel Fernandes for (i = min_order - 1; i >= 0; i--) { 2494a9671a0SJoel Fernandes struct rb_node *iter = rb_last(&mm->free_trees[tree][i]); 2504a9671a0SJoel Fernandes 2514a9671a0SJoel Fernandes while (iter) { 252ba110db8SJoel Fernandes struct gpu_buddy_block *block, *buddy; 2534a9671a0SJoel Fernandes u64 block_start, block_end; 2544a9671a0SJoel Fernandes 2554a9671a0SJoel Fernandes block = rbtree_get_free_block(iter); 2564a9671a0SJoel Fernandes iter = rb_prev(iter); 2574a9671a0SJoel Fernandes 2584a9671a0SJoel Fernandes if (!block || !block->parent) 2594a9671a0SJoel Fernandes continue; 2604a9671a0SJoel Fernandes 261ba110db8SJoel Fernandes block_start = gpu_buddy_block_offset(block); 262ba110db8SJoel Fernandes block_end = block_start + gpu_buddy_block_size(mm, block) - 1; 2634a9671a0SJoel Fernandes 2644a9671a0SJoel Fernandes if (!contains(start, end, block_start, block_end)) 2654a9671a0SJoel Fernandes continue; 2664a9671a0SJoel Fernandes 2674a9671a0SJoel Fernandes buddy = __get_buddy(block); 268ba110db8SJoel Fernandes if (!gpu_buddy_block_is_free(buddy)) 2694a9671a0SJoel Fernandes continue; 2704a9671a0SJoel Fernandes 271ba110db8SJoel Fernandes WARN_ON(gpu_buddy_block_is_clear(block) == 272ba110db8SJoel Fernandes gpu_buddy_block_is_clear(buddy)); 2734a9671a0SJoel Fernandes 2744a9671a0SJoel Fernandes /* 2754a9671a0SJoel Fernandes * Advance to the next node when the current node is the buddy, 2764a9671a0SJoel Fernandes * as freeing the block will also remove its buddy from the tree. 2774a9671a0SJoel Fernandes */ 2784a9671a0SJoel Fernandes if (iter == &buddy->rb) 2794a9671a0SJoel Fernandes iter = rb_prev(iter); 2804a9671a0SJoel Fernandes 2814a9671a0SJoel Fernandes rbtree_remove(mm, block); 282ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 283ba110db8SJoel Fernandes mm->clear_avail -= gpu_buddy_block_size(mm, block); 2844a9671a0SJoel Fernandes 285ba110db8SJoel Fernandes order = __gpu_buddy_free(mm, block, true); 2864a9671a0SJoel Fernandes if (order >= min_order) 2874a9671a0SJoel Fernandes return 0; 2884a9671a0SJoel Fernandes } 2894a9671a0SJoel Fernandes } 2904a9671a0SJoel Fernandes } 2914a9671a0SJoel Fernandes 2924a9671a0SJoel Fernandes return -ENOMEM; 2934a9671a0SJoel Fernandes } 2944a9671a0SJoel Fernandes 2954a9671a0SJoel Fernandes /** 296ba110db8SJoel Fernandes * gpu_buddy_init - init memory manager 2974a9671a0SJoel Fernandes * 298ba110db8SJoel Fernandes * @mm: GPU buddy manager to initialize 2994a9671a0SJoel Fernandes * @size: size in bytes to manage 3004a9671a0SJoel Fernandes * @chunk_size: minimum page size in bytes for our allocations 3014a9671a0SJoel Fernandes * 3024a9671a0SJoel Fernandes * Initializes the memory manager and its resources. 3034a9671a0SJoel Fernandes * 3044a9671a0SJoel Fernandes * Returns: 3054a9671a0SJoel Fernandes * 0 on success, error code on failure. 3064a9671a0SJoel Fernandes */ 307ba110db8SJoel Fernandes int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size) 3084a9671a0SJoel Fernandes { 3094a9671a0SJoel Fernandes unsigned int i, j, root_count = 0; 3104a9671a0SJoel Fernandes u64 offset = 0; 3114a9671a0SJoel Fernandes 3124a9671a0SJoel Fernandes if (size < chunk_size) 3134a9671a0SJoel Fernandes return -EINVAL; 3144a9671a0SJoel Fernandes 3154a9671a0SJoel Fernandes if (chunk_size < SZ_4K) 3164a9671a0SJoel Fernandes return -EINVAL; 3174a9671a0SJoel Fernandes 3184a9671a0SJoel Fernandes if (!is_power_of_2(chunk_size)) 3194a9671a0SJoel Fernandes return -EINVAL; 3204a9671a0SJoel Fernandes 3214a9671a0SJoel Fernandes size = round_down(size, chunk_size); 3224a9671a0SJoel Fernandes 3234a9671a0SJoel Fernandes mm->size = size; 3244a9671a0SJoel Fernandes mm->avail = size; 3254a9671a0SJoel Fernandes mm->clear_avail = 0; 3264a9671a0SJoel Fernandes mm->chunk_size = chunk_size; 3274a9671a0SJoel Fernandes mm->max_order = ilog2(size) - ilog2(chunk_size); 3284a9671a0SJoel Fernandes 329ba110db8SJoel Fernandes BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER); 3304a9671a0SJoel Fernandes 331ba110db8SJoel Fernandes mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES, 3324a9671a0SJoel Fernandes sizeof(*mm->free_trees), 3334a9671a0SJoel Fernandes GFP_KERNEL); 3344a9671a0SJoel Fernandes if (!mm->free_trees) 3354a9671a0SJoel Fernandes return -ENOMEM; 3364a9671a0SJoel Fernandes 3374a9671a0SJoel Fernandes for_each_free_tree(i) { 3384a9671a0SJoel Fernandes mm->free_trees[i] = kmalloc_array(mm->max_order + 1, 3394a9671a0SJoel Fernandes sizeof(struct rb_root), 3404a9671a0SJoel Fernandes GFP_KERNEL); 3414a9671a0SJoel Fernandes if (!mm->free_trees[i]) 3424a9671a0SJoel Fernandes goto out_free_tree; 3434a9671a0SJoel Fernandes 3444a9671a0SJoel Fernandes for (j = 0; j <= mm->max_order; ++j) 3454a9671a0SJoel Fernandes mm->free_trees[i][j] = RB_ROOT; 3464a9671a0SJoel Fernandes } 3474a9671a0SJoel Fernandes 3484a9671a0SJoel Fernandes mm->n_roots = hweight64(size); 3494a9671a0SJoel Fernandes 3504a9671a0SJoel Fernandes mm->roots = kmalloc_array(mm->n_roots, 351ba110db8SJoel Fernandes sizeof(struct gpu_buddy_block *), 3524a9671a0SJoel Fernandes GFP_KERNEL); 3534a9671a0SJoel Fernandes if (!mm->roots) 3544a9671a0SJoel Fernandes goto out_free_tree; 3554a9671a0SJoel Fernandes 3564a9671a0SJoel Fernandes /* 3574a9671a0SJoel Fernandes * Split into power-of-two blocks, in case we are given a size that is 3584a9671a0SJoel Fernandes * not itself a power-of-two. 3594a9671a0SJoel Fernandes */ 3604a9671a0SJoel Fernandes do { 361ba110db8SJoel Fernandes struct gpu_buddy_block *root; 3624a9671a0SJoel Fernandes unsigned int order; 3634a9671a0SJoel Fernandes u64 root_size; 3644a9671a0SJoel Fernandes 3654a9671a0SJoel Fernandes order = ilog2(size) - ilog2(chunk_size); 3664a9671a0SJoel Fernandes root_size = chunk_size << order; 3674a9671a0SJoel Fernandes 368ba110db8SJoel Fernandes root = gpu_block_alloc(mm, NULL, order, offset); 3694a9671a0SJoel Fernandes if (!root) 3704a9671a0SJoel Fernandes goto out_free_roots; 3714a9671a0SJoel Fernandes 3724a9671a0SJoel Fernandes mark_free(mm, root); 3734a9671a0SJoel Fernandes 3744a9671a0SJoel Fernandes BUG_ON(root_count > mm->max_order); 375ba110db8SJoel Fernandes BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size); 3764a9671a0SJoel Fernandes 3774a9671a0SJoel Fernandes mm->roots[root_count] = root; 3784a9671a0SJoel Fernandes 3794a9671a0SJoel Fernandes offset += root_size; 3804a9671a0SJoel Fernandes size -= root_size; 3814a9671a0SJoel Fernandes root_count++; 3824a9671a0SJoel Fernandes } while (size); 3834a9671a0SJoel Fernandes 3844a9671a0SJoel Fernandes return 0; 3854a9671a0SJoel Fernandes 3864a9671a0SJoel Fernandes out_free_roots: 3874a9671a0SJoel Fernandes while (root_count--) 388ba110db8SJoel Fernandes gpu_block_free(mm, mm->roots[root_count]); 3894a9671a0SJoel Fernandes kfree(mm->roots); 3904a9671a0SJoel Fernandes out_free_tree: 3914a9671a0SJoel Fernandes while (i--) 3924a9671a0SJoel Fernandes kfree(mm->free_trees[i]); 3934a9671a0SJoel Fernandes kfree(mm->free_trees); 3944a9671a0SJoel Fernandes return -ENOMEM; 3954a9671a0SJoel Fernandes } 396ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_init); 3974a9671a0SJoel Fernandes 3984a9671a0SJoel Fernandes /** 399ba110db8SJoel Fernandes * gpu_buddy_fini - tear down the memory manager 4004a9671a0SJoel Fernandes * 401ba110db8SJoel Fernandes * @mm: GPU buddy manager to free 4024a9671a0SJoel Fernandes * 4034a9671a0SJoel Fernandes * Cleanup memory manager resources and the freetree 4044a9671a0SJoel Fernandes */ 405ba110db8SJoel Fernandes void gpu_buddy_fini(struct gpu_buddy *mm) 4064a9671a0SJoel Fernandes { 4074a9671a0SJoel Fernandes u64 root_size, size, start; 4084a9671a0SJoel Fernandes unsigned int order; 4094a9671a0SJoel Fernandes int i; 4104a9671a0SJoel Fernandes 4114a9671a0SJoel Fernandes size = mm->size; 4124a9671a0SJoel Fernandes 4134a9671a0SJoel Fernandes for (i = 0; i < mm->n_roots; ++i) { 4144a9671a0SJoel Fernandes order = ilog2(size) - ilog2(mm->chunk_size); 415ba110db8SJoel Fernandes start = gpu_buddy_block_offset(mm->roots[i]); 4164a9671a0SJoel Fernandes __force_merge(mm, start, start + size, order); 4174a9671a0SJoel Fernandes 418ba110db8SJoel Fernandes if (WARN_ON(!gpu_buddy_block_is_free(mm->roots[i]))) 4194a9671a0SJoel Fernandes kunit_fail_current_test("buddy_fini() root"); 4204a9671a0SJoel Fernandes 421ba110db8SJoel Fernandes gpu_block_free(mm, mm->roots[i]); 4224a9671a0SJoel Fernandes 4234a9671a0SJoel Fernandes root_size = mm->chunk_size << order; 4244a9671a0SJoel Fernandes size -= root_size; 4254a9671a0SJoel Fernandes } 4264a9671a0SJoel Fernandes 4274a9671a0SJoel Fernandes WARN_ON(mm->avail != mm->size); 4284a9671a0SJoel Fernandes 4294a9671a0SJoel Fernandes for_each_free_tree(i) 4304a9671a0SJoel Fernandes kfree(mm->free_trees[i]); 4314a9671a0SJoel Fernandes kfree(mm->free_trees); 4324a9671a0SJoel Fernandes kfree(mm->roots); 4334a9671a0SJoel Fernandes } 434ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_fini); 4354a9671a0SJoel Fernandes 436ba110db8SJoel Fernandes static int split_block(struct gpu_buddy *mm, 437ba110db8SJoel Fernandes struct gpu_buddy_block *block) 4384a9671a0SJoel Fernandes { 439ba110db8SJoel Fernandes unsigned int block_order = gpu_buddy_block_order(block) - 1; 440ba110db8SJoel Fernandes u64 offset = gpu_buddy_block_offset(block); 4414a9671a0SJoel Fernandes 442ba110db8SJoel Fernandes BUG_ON(!gpu_buddy_block_is_free(block)); 443ba110db8SJoel Fernandes BUG_ON(!gpu_buddy_block_order(block)); 4444a9671a0SJoel Fernandes 445ba110db8SJoel Fernandes block->left = gpu_block_alloc(mm, block, block_order, offset); 4464a9671a0SJoel Fernandes if (!block->left) 4474a9671a0SJoel Fernandes return -ENOMEM; 4484a9671a0SJoel Fernandes 449ba110db8SJoel Fernandes block->right = gpu_block_alloc(mm, block, block_order, 4504a9671a0SJoel Fernandes offset + (mm->chunk_size << block_order)); 4514a9671a0SJoel Fernandes if (!block->right) { 452ba110db8SJoel Fernandes gpu_block_free(mm, block->left); 4534a9671a0SJoel Fernandes return -ENOMEM; 4544a9671a0SJoel Fernandes } 4554a9671a0SJoel Fernandes 4564a9671a0SJoel Fernandes mark_split(mm, block); 4574a9671a0SJoel Fernandes 458ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) { 4594a9671a0SJoel Fernandes mark_cleared(block->left); 4604a9671a0SJoel Fernandes mark_cleared(block->right); 4614a9671a0SJoel Fernandes clear_reset(block); 4624a9671a0SJoel Fernandes } 4634a9671a0SJoel Fernandes 4644a9671a0SJoel Fernandes mark_free(mm, block->left); 4654a9671a0SJoel Fernandes mark_free(mm, block->right); 4664a9671a0SJoel Fernandes 4674a9671a0SJoel Fernandes return 0; 4684a9671a0SJoel Fernandes } 4694a9671a0SJoel Fernandes 4704a9671a0SJoel Fernandes /** 471ba110db8SJoel Fernandes * gpu_buddy_reset_clear - reset blocks clear state 4724a9671a0SJoel Fernandes * 473ba110db8SJoel Fernandes * @mm: GPU buddy manager 4744a9671a0SJoel Fernandes * @is_clear: blocks clear state 4754a9671a0SJoel Fernandes * 4764a9671a0SJoel Fernandes * Reset the clear state based on @is_clear value for each block 4774a9671a0SJoel Fernandes * in the freetree. 4784a9671a0SJoel Fernandes */ 479ba110db8SJoel Fernandes void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear) 4804a9671a0SJoel Fernandes { 481ba110db8SJoel Fernandes enum gpu_buddy_free_tree src_tree, dst_tree; 4824a9671a0SJoel Fernandes u64 root_size, size, start; 4834a9671a0SJoel Fernandes unsigned int order; 4844a9671a0SJoel Fernandes int i; 4854a9671a0SJoel Fernandes 4864a9671a0SJoel Fernandes size = mm->size; 4874a9671a0SJoel Fernandes for (i = 0; i < mm->n_roots; ++i) { 4884a9671a0SJoel Fernandes order = ilog2(size) - ilog2(mm->chunk_size); 489ba110db8SJoel Fernandes start = gpu_buddy_block_offset(mm->roots[i]); 4904a9671a0SJoel Fernandes __force_merge(mm, start, start + size, order); 4914a9671a0SJoel Fernandes 4924a9671a0SJoel Fernandes root_size = mm->chunk_size << order; 4934a9671a0SJoel Fernandes size -= root_size; 4944a9671a0SJoel Fernandes } 4954a9671a0SJoel Fernandes 496ba110db8SJoel Fernandes src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE; 497ba110db8SJoel Fernandes dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; 4984a9671a0SJoel Fernandes 4994a9671a0SJoel Fernandes for (i = 0; i <= mm->max_order; ++i) { 5004a9671a0SJoel Fernandes struct rb_root *root = &mm->free_trees[src_tree][i]; 501ba110db8SJoel Fernandes struct gpu_buddy_block *block, *tmp; 5024a9671a0SJoel Fernandes 5034a9671a0SJoel Fernandes rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { 5044a9671a0SJoel Fernandes rbtree_remove(mm, block); 5054a9671a0SJoel Fernandes if (is_clear) { 5064a9671a0SJoel Fernandes mark_cleared(block); 507ba110db8SJoel Fernandes mm->clear_avail += gpu_buddy_block_size(mm, block); 5084a9671a0SJoel Fernandes } else { 5094a9671a0SJoel Fernandes clear_reset(block); 510ba110db8SJoel Fernandes mm->clear_avail -= gpu_buddy_block_size(mm, block); 5114a9671a0SJoel Fernandes } 5124a9671a0SJoel Fernandes 5134a9671a0SJoel Fernandes rbtree_insert(mm, block, dst_tree); 5144a9671a0SJoel Fernandes } 5154a9671a0SJoel Fernandes } 5164a9671a0SJoel Fernandes } 517ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_reset_clear); 5184a9671a0SJoel Fernandes 5194a9671a0SJoel Fernandes /** 520ba110db8SJoel Fernandes * gpu_buddy_free_block - free a block 5214a9671a0SJoel Fernandes * 522ba110db8SJoel Fernandes * @mm: GPU buddy manager 5234a9671a0SJoel Fernandes * @block: block to be freed 5244a9671a0SJoel Fernandes */ 525ba110db8SJoel Fernandes void gpu_buddy_free_block(struct gpu_buddy *mm, 526ba110db8SJoel Fernandes struct gpu_buddy_block *block) 5274a9671a0SJoel Fernandes { 528ba110db8SJoel Fernandes BUG_ON(!gpu_buddy_block_is_allocated(block)); 529ba110db8SJoel Fernandes mm->avail += gpu_buddy_block_size(mm, block); 530ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 531ba110db8SJoel Fernandes mm->clear_avail += gpu_buddy_block_size(mm, block); 5324a9671a0SJoel Fernandes 533ba110db8SJoel Fernandes __gpu_buddy_free(mm, block, false); 5344a9671a0SJoel Fernandes } 535ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_free_block); 5364a9671a0SJoel Fernandes 537ba110db8SJoel Fernandes static void __gpu_buddy_free_list(struct gpu_buddy *mm, 5384a9671a0SJoel Fernandes struct list_head *objects, 5394a9671a0SJoel Fernandes bool mark_clear, 5404a9671a0SJoel Fernandes bool mark_dirty) 5414a9671a0SJoel Fernandes { 542ba110db8SJoel Fernandes struct gpu_buddy_block *block, *on; 5434a9671a0SJoel Fernandes 5444a9671a0SJoel Fernandes WARN_ON(mark_dirty && mark_clear); 5454a9671a0SJoel Fernandes 5464a9671a0SJoel Fernandes list_for_each_entry_safe(block, on, objects, link) { 5474a9671a0SJoel Fernandes if (mark_clear) 5484a9671a0SJoel Fernandes mark_cleared(block); 5494a9671a0SJoel Fernandes else if (mark_dirty) 5504a9671a0SJoel Fernandes clear_reset(block); 551ba110db8SJoel Fernandes gpu_buddy_free_block(mm, block); 5524a9671a0SJoel Fernandes cond_resched(); 5534a9671a0SJoel Fernandes } 5544a9671a0SJoel Fernandes INIT_LIST_HEAD(objects); 5554a9671a0SJoel Fernandes } 5564a9671a0SJoel Fernandes 557ba110db8SJoel Fernandes static void gpu_buddy_free_list_internal(struct gpu_buddy *mm, 5584a9671a0SJoel Fernandes struct list_head *objects) 5594a9671a0SJoel Fernandes { 5604a9671a0SJoel Fernandes /* 5614a9671a0SJoel Fernandes * Don't touch the clear/dirty bit, since allocation is still internal 5624a9671a0SJoel Fernandes * at this point. For example we might have just failed part of the 5634a9671a0SJoel Fernandes * allocation. 5644a9671a0SJoel Fernandes */ 565ba110db8SJoel Fernandes __gpu_buddy_free_list(mm, objects, false, false); 5664a9671a0SJoel Fernandes } 5674a9671a0SJoel Fernandes 5684a9671a0SJoel Fernandes /** 569ba110db8SJoel Fernandes * gpu_buddy_free_list - free blocks 5704a9671a0SJoel Fernandes * 571ba110db8SJoel Fernandes * @mm: GPU buddy manager 5724a9671a0SJoel Fernandes * @objects: input list head to free blocks 573ba110db8SJoel Fernandes * @flags: optional flags like GPU_BUDDY_CLEARED 5744a9671a0SJoel Fernandes */ 575ba110db8SJoel Fernandes void gpu_buddy_free_list(struct gpu_buddy *mm, 5764a9671a0SJoel Fernandes struct list_head *objects, 5774a9671a0SJoel Fernandes unsigned int flags) 5784a9671a0SJoel Fernandes { 579ba110db8SJoel Fernandes bool mark_clear = flags & GPU_BUDDY_CLEARED; 5804a9671a0SJoel Fernandes 581ba110db8SJoel Fernandes __gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear); 5824a9671a0SJoel Fernandes } 583ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_free_list); 5844a9671a0SJoel Fernandes 585ba110db8SJoel Fernandes static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags) 5864a9671a0SJoel Fernandes { 587ba110db8SJoel Fernandes bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION; 5884a9671a0SJoel Fernandes 589ba110db8SJoel Fernandes return needs_clear != gpu_buddy_block_is_clear(block); 5904a9671a0SJoel Fernandes } 5914a9671a0SJoel Fernandes 592ba110db8SJoel Fernandes static struct gpu_buddy_block * 593ba110db8SJoel Fernandes __alloc_range_bias(struct gpu_buddy *mm, 5944a9671a0SJoel Fernandes u64 start, u64 end, 5954a9671a0SJoel Fernandes unsigned int order, 5964a9671a0SJoel Fernandes unsigned long flags, 5974a9671a0SJoel Fernandes bool fallback) 5984a9671a0SJoel Fernandes { 5994a9671a0SJoel Fernandes u64 req_size = mm->chunk_size << order; 600ba110db8SJoel Fernandes struct gpu_buddy_block *block; 601ba110db8SJoel Fernandes struct gpu_buddy_block *buddy; 6024a9671a0SJoel Fernandes LIST_HEAD(dfs); 6034a9671a0SJoel Fernandes int err; 6044a9671a0SJoel Fernandes int i; 6054a9671a0SJoel Fernandes 6064a9671a0SJoel Fernandes end = end - 1; 6074a9671a0SJoel Fernandes 6084a9671a0SJoel Fernandes for (i = 0; i < mm->n_roots; ++i) 6094a9671a0SJoel Fernandes list_add_tail(&mm->roots[i]->tmp_link, &dfs); 6104a9671a0SJoel Fernandes 6114a9671a0SJoel Fernandes do { 6124a9671a0SJoel Fernandes u64 block_start; 6134a9671a0SJoel Fernandes u64 block_end; 6144a9671a0SJoel Fernandes 6154a9671a0SJoel Fernandes block = list_first_entry_or_null(&dfs, 616ba110db8SJoel Fernandes struct gpu_buddy_block, 6174a9671a0SJoel Fernandes tmp_link); 6184a9671a0SJoel Fernandes if (!block) 6194a9671a0SJoel Fernandes break; 6204a9671a0SJoel Fernandes 6214a9671a0SJoel Fernandes list_del(&block->tmp_link); 6224a9671a0SJoel Fernandes 623ba110db8SJoel Fernandes if (gpu_buddy_block_order(block) < order) 6244a9671a0SJoel Fernandes continue; 6254a9671a0SJoel Fernandes 626ba110db8SJoel Fernandes block_start = gpu_buddy_block_offset(block); 627ba110db8SJoel Fernandes block_end = block_start + gpu_buddy_block_size(mm, block) - 1; 6284a9671a0SJoel Fernandes 6294a9671a0SJoel Fernandes if (!overlaps(start, end, block_start, block_end)) 6304a9671a0SJoel Fernandes continue; 6314a9671a0SJoel Fernandes 632ba110db8SJoel Fernandes if (gpu_buddy_block_is_allocated(block)) 6334a9671a0SJoel Fernandes continue; 6344a9671a0SJoel Fernandes 6354a9671a0SJoel Fernandes if (block_start < start || block_end > end) { 6364a9671a0SJoel Fernandes u64 adjusted_start = max(block_start, start); 6374a9671a0SJoel Fernandes u64 adjusted_end = min(block_end, end); 6384a9671a0SJoel Fernandes 6394a9671a0SJoel Fernandes if (round_down(adjusted_end + 1, req_size) <= 6404a9671a0SJoel Fernandes round_up(adjusted_start, req_size)) 6414a9671a0SJoel Fernandes continue; 6424a9671a0SJoel Fernandes } 6434a9671a0SJoel Fernandes 6444a9671a0SJoel Fernandes if (!fallback && block_incompatible(block, flags)) 6454a9671a0SJoel Fernandes continue; 6464a9671a0SJoel Fernandes 6474a9671a0SJoel Fernandes if (contains(start, end, block_start, block_end) && 648ba110db8SJoel Fernandes order == gpu_buddy_block_order(block)) { 6494a9671a0SJoel Fernandes /* 6504a9671a0SJoel Fernandes * Find the free block within the range. 6514a9671a0SJoel Fernandes */ 652ba110db8SJoel Fernandes if (gpu_buddy_block_is_free(block)) 6534a9671a0SJoel Fernandes return block; 6544a9671a0SJoel Fernandes 6554a9671a0SJoel Fernandes continue; 6564a9671a0SJoel Fernandes } 6574a9671a0SJoel Fernandes 658ba110db8SJoel Fernandes if (!gpu_buddy_block_is_split(block)) { 6594a9671a0SJoel Fernandes err = split_block(mm, block); 6604a9671a0SJoel Fernandes if (unlikely(err)) 6614a9671a0SJoel Fernandes goto err_undo; 6624a9671a0SJoel Fernandes } 6634a9671a0SJoel Fernandes 6644a9671a0SJoel Fernandes list_add(&block->right->tmp_link, &dfs); 6654a9671a0SJoel Fernandes list_add(&block->left->tmp_link, &dfs); 6664a9671a0SJoel Fernandes } while (1); 6674a9671a0SJoel Fernandes 6684a9671a0SJoel Fernandes return ERR_PTR(-ENOSPC); 6694a9671a0SJoel Fernandes 6704a9671a0SJoel Fernandes err_undo: 6714a9671a0SJoel Fernandes /* 6724a9671a0SJoel Fernandes * We really don't want to leave around a bunch of split blocks, since 6734a9671a0SJoel Fernandes * bigger is better, so make sure we merge everything back before we 6744a9671a0SJoel Fernandes * free the allocated blocks. 6754a9671a0SJoel Fernandes */ 6764a9671a0SJoel Fernandes buddy = __get_buddy(block); 6774a9671a0SJoel Fernandes if (buddy && 678ba110db8SJoel Fernandes (gpu_buddy_block_is_free(block) && 679ba110db8SJoel Fernandes gpu_buddy_block_is_free(buddy))) 680ba110db8SJoel Fernandes __gpu_buddy_free(mm, block, false); 6814a9671a0SJoel Fernandes return ERR_PTR(err); 6824a9671a0SJoel Fernandes } 6834a9671a0SJoel Fernandes 684ba110db8SJoel Fernandes static struct gpu_buddy_block * 685ba110db8SJoel Fernandes __gpu_buddy_alloc_range_bias(struct gpu_buddy *mm, 6864a9671a0SJoel Fernandes u64 start, u64 end, 6874a9671a0SJoel Fernandes unsigned int order, 6884a9671a0SJoel Fernandes unsigned long flags) 6894a9671a0SJoel Fernandes { 690ba110db8SJoel Fernandes struct gpu_buddy_block *block; 6914a9671a0SJoel Fernandes bool fallback = false; 6924a9671a0SJoel Fernandes 6934a9671a0SJoel Fernandes block = __alloc_range_bias(mm, start, end, order, 6944a9671a0SJoel Fernandes flags, fallback); 6954a9671a0SJoel Fernandes if (IS_ERR(block)) 6964a9671a0SJoel Fernandes return __alloc_range_bias(mm, start, end, order, 6974a9671a0SJoel Fernandes flags, !fallback); 6984a9671a0SJoel Fernandes 6994a9671a0SJoel Fernandes return block; 7004a9671a0SJoel Fernandes } 7014a9671a0SJoel Fernandes 702ba110db8SJoel Fernandes static struct gpu_buddy_block * 703ba110db8SJoel Fernandes get_maxblock(struct gpu_buddy *mm, 7044a9671a0SJoel Fernandes unsigned int order, 705ba110db8SJoel Fernandes enum gpu_buddy_free_tree tree) 7064a9671a0SJoel Fernandes { 707ba110db8SJoel Fernandes struct gpu_buddy_block *max_block = NULL, *block = NULL; 7084a9671a0SJoel Fernandes struct rb_root *root; 7094a9671a0SJoel Fernandes unsigned int i; 7104a9671a0SJoel Fernandes 7114a9671a0SJoel Fernandes for (i = order; i <= mm->max_order; ++i) { 7124a9671a0SJoel Fernandes root = &mm->free_trees[tree][i]; 7134a9671a0SJoel Fernandes block = rbtree_last_free_block(root); 7144a9671a0SJoel Fernandes if (!block) 7154a9671a0SJoel Fernandes continue; 7164a9671a0SJoel Fernandes 7174a9671a0SJoel Fernandes if (!max_block) { 7184a9671a0SJoel Fernandes max_block = block; 7194a9671a0SJoel Fernandes continue; 7204a9671a0SJoel Fernandes } 7214a9671a0SJoel Fernandes 722ba110db8SJoel Fernandes if (gpu_buddy_block_offset(block) > 723ba110db8SJoel Fernandes gpu_buddy_block_offset(max_block)) { 7244a9671a0SJoel Fernandes max_block = block; 7254a9671a0SJoel Fernandes } 7264a9671a0SJoel Fernandes } 7274a9671a0SJoel Fernandes 7284a9671a0SJoel Fernandes return max_block; 7294a9671a0SJoel Fernandes } 7304a9671a0SJoel Fernandes 731ba110db8SJoel Fernandes static struct gpu_buddy_block * 732ba110db8SJoel Fernandes alloc_from_freetree(struct gpu_buddy *mm, 7334a9671a0SJoel Fernandes unsigned int order, 7344a9671a0SJoel Fernandes unsigned long flags) 7354a9671a0SJoel Fernandes { 736ba110db8SJoel Fernandes struct gpu_buddy_block *block = NULL; 7374a9671a0SJoel Fernandes struct rb_root *root; 738ba110db8SJoel Fernandes enum gpu_buddy_free_tree tree; 7394a9671a0SJoel Fernandes unsigned int tmp; 7404a9671a0SJoel Fernandes int err; 7414a9671a0SJoel Fernandes 742ba110db8SJoel Fernandes tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ? 743ba110db8SJoel Fernandes GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; 7444a9671a0SJoel Fernandes 745ba110db8SJoel Fernandes if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) { 7464a9671a0SJoel Fernandes block = get_maxblock(mm, order, tree); 7474a9671a0SJoel Fernandes if (block) 7484a9671a0SJoel Fernandes /* Store the obtained block order */ 749ba110db8SJoel Fernandes tmp = gpu_buddy_block_order(block); 7504a9671a0SJoel Fernandes } else { 7514a9671a0SJoel Fernandes for (tmp = order; tmp <= mm->max_order; ++tmp) { 7524a9671a0SJoel Fernandes /* Get RB tree root for this order and tree */ 7534a9671a0SJoel Fernandes root = &mm->free_trees[tree][tmp]; 7544a9671a0SJoel Fernandes block = rbtree_last_free_block(root); 7554a9671a0SJoel Fernandes if (block) 7564a9671a0SJoel Fernandes break; 7574a9671a0SJoel Fernandes } 7584a9671a0SJoel Fernandes } 7594a9671a0SJoel Fernandes 7604a9671a0SJoel Fernandes if (!block) { 7614a9671a0SJoel Fernandes /* Try allocating from the other tree */ 762ba110db8SJoel Fernandes tree = (tree == GPU_BUDDY_CLEAR_TREE) ? 763ba110db8SJoel Fernandes GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE; 7644a9671a0SJoel Fernandes 7654a9671a0SJoel Fernandes for (tmp = order; tmp <= mm->max_order; ++tmp) { 7664a9671a0SJoel Fernandes root = &mm->free_trees[tree][tmp]; 7674a9671a0SJoel Fernandes block = rbtree_last_free_block(root); 7684a9671a0SJoel Fernandes if (block) 7694a9671a0SJoel Fernandes break; 7704a9671a0SJoel Fernandes } 7714a9671a0SJoel Fernandes 7724a9671a0SJoel Fernandes if (!block) 7734a9671a0SJoel Fernandes return ERR_PTR(-ENOSPC); 7744a9671a0SJoel Fernandes } 7754a9671a0SJoel Fernandes 776ba110db8SJoel Fernandes BUG_ON(!gpu_buddy_block_is_free(block)); 7774a9671a0SJoel Fernandes 7784a9671a0SJoel Fernandes while (tmp != order) { 7794a9671a0SJoel Fernandes err = split_block(mm, block); 7804a9671a0SJoel Fernandes if (unlikely(err)) 7814a9671a0SJoel Fernandes goto err_undo; 7824a9671a0SJoel Fernandes 7834a9671a0SJoel Fernandes block = block->right; 7844a9671a0SJoel Fernandes tmp--; 7854a9671a0SJoel Fernandes } 7864a9671a0SJoel Fernandes return block; 7874a9671a0SJoel Fernandes 7884a9671a0SJoel Fernandes err_undo: 7894a9671a0SJoel Fernandes if (tmp != order) 790ba110db8SJoel Fernandes __gpu_buddy_free(mm, block, false); 7914a9671a0SJoel Fernandes return ERR_PTR(err); 7924a9671a0SJoel Fernandes } 7934a9671a0SJoel Fernandes 794ba110db8SJoel Fernandes static int __alloc_range(struct gpu_buddy *mm, 7954a9671a0SJoel Fernandes struct list_head *dfs, 7964a9671a0SJoel Fernandes u64 start, u64 size, 7974a9671a0SJoel Fernandes struct list_head *blocks, 7984a9671a0SJoel Fernandes u64 *total_allocated_on_err) 7994a9671a0SJoel Fernandes { 800ba110db8SJoel Fernandes struct gpu_buddy_block *block; 801ba110db8SJoel Fernandes struct gpu_buddy_block *buddy; 8024a9671a0SJoel Fernandes u64 total_allocated = 0; 8034a9671a0SJoel Fernandes LIST_HEAD(allocated); 8044a9671a0SJoel Fernandes u64 end; 8054a9671a0SJoel Fernandes int err; 8064a9671a0SJoel Fernandes 8074a9671a0SJoel Fernandes end = start + size - 1; 8084a9671a0SJoel Fernandes 8094a9671a0SJoel Fernandes do { 8104a9671a0SJoel Fernandes u64 block_start; 8114a9671a0SJoel Fernandes u64 block_end; 8124a9671a0SJoel Fernandes 8134a9671a0SJoel Fernandes block = list_first_entry_or_null(dfs, 814ba110db8SJoel Fernandes struct gpu_buddy_block, 8154a9671a0SJoel Fernandes tmp_link); 8164a9671a0SJoel Fernandes if (!block) 8174a9671a0SJoel Fernandes break; 8184a9671a0SJoel Fernandes 8194a9671a0SJoel Fernandes list_del(&block->tmp_link); 8204a9671a0SJoel Fernandes 821ba110db8SJoel Fernandes block_start = gpu_buddy_block_offset(block); 822ba110db8SJoel Fernandes block_end = block_start + gpu_buddy_block_size(mm, block) - 1; 8234a9671a0SJoel Fernandes 8244a9671a0SJoel Fernandes if (!overlaps(start, end, block_start, block_end)) 8254a9671a0SJoel Fernandes continue; 8264a9671a0SJoel Fernandes 827ba110db8SJoel Fernandes if (gpu_buddy_block_is_allocated(block)) { 8284a9671a0SJoel Fernandes err = -ENOSPC; 8294a9671a0SJoel Fernandes goto err_free; 8304a9671a0SJoel Fernandes } 8314a9671a0SJoel Fernandes 8324a9671a0SJoel Fernandes if (contains(start, end, block_start, block_end)) { 833ba110db8SJoel Fernandes if (gpu_buddy_block_is_free(block)) { 8344a9671a0SJoel Fernandes mark_allocated(mm, block); 835ba110db8SJoel Fernandes total_allocated += gpu_buddy_block_size(mm, block); 836ba110db8SJoel Fernandes mm->avail -= gpu_buddy_block_size(mm, block); 837ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 838ba110db8SJoel Fernandes mm->clear_avail -= gpu_buddy_block_size(mm, block); 8394a9671a0SJoel Fernandes list_add_tail(&block->link, &allocated); 8404a9671a0SJoel Fernandes continue; 8414a9671a0SJoel Fernandes } else if (!mm->clear_avail) { 8424a9671a0SJoel Fernandes err = -ENOSPC; 8434a9671a0SJoel Fernandes goto err_free; 8444a9671a0SJoel Fernandes } 8454a9671a0SJoel Fernandes } 8464a9671a0SJoel Fernandes 847ba110db8SJoel Fernandes if (!gpu_buddy_block_is_split(block)) { 8484a9671a0SJoel Fernandes err = split_block(mm, block); 8494a9671a0SJoel Fernandes if (unlikely(err)) 8504a9671a0SJoel Fernandes goto err_undo; 8514a9671a0SJoel Fernandes } 8524a9671a0SJoel Fernandes 8534a9671a0SJoel Fernandes list_add(&block->right->tmp_link, dfs); 8544a9671a0SJoel Fernandes list_add(&block->left->tmp_link, dfs); 8554a9671a0SJoel Fernandes } while (1); 8564a9671a0SJoel Fernandes 8574a9671a0SJoel Fernandes if (total_allocated < size) { 8584a9671a0SJoel Fernandes err = -ENOSPC; 8594a9671a0SJoel Fernandes goto err_free; 8604a9671a0SJoel Fernandes } 8614a9671a0SJoel Fernandes 8624a9671a0SJoel Fernandes list_splice_tail(&allocated, blocks); 8634a9671a0SJoel Fernandes 8644a9671a0SJoel Fernandes return 0; 8654a9671a0SJoel Fernandes 8664a9671a0SJoel Fernandes err_undo: 8674a9671a0SJoel Fernandes /* 8684a9671a0SJoel Fernandes * We really don't want to leave around a bunch of split blocks, since 8694a9671a0SJoel Fernandes * bigger is better, so make sure we merge everything back before we 8704a9671a0SJoel Fernandes * free the allocated blocks. 8714a9671a0SJoel Fernandes */ 8724a9671a0SJoel Fernandes buddy = __get_buddy(block); 8734a9671a0SJoel Fernandes if (buddy && 874ba110db8SJoel Fernandes (gpu_buddy_block_is_free(block) && 875ba110db8SJoel Fernandes gpu_buddy_block_is_free(buddy))) 876ba110db8SJoel Fernandes __gpu_buddy_free(mm, block, false); 8774a9671a0SJoel Fernandes 8784a9671a0SJoel Fernandes err_free: 8794a9671a0SJoel Fernandes if (err == -ENOSPC && total_allocated_on_err) { 8804a9671a0SJoel Fernandes list_splice_tail(&allocated, blocks); 8814a9671a0SJoel Fernandes *total_allocated_on_err = total_allocated; 8824a9671a0SJoel Fernandes } else { 883ba110db8SJoel Fernandes gpu_buddy_free_list_internal(mm, &allocated); 8844a9671a0SJoel Fernandes } 8854a9671a0SJoel Fernandes 8864a9671a0SJoel Fernandes return err; 8874a9671a0SJoel Fernandes } 8884a9671a0SJoel Fernandes 889ba110db8SJoel Fernandes static int __gpu_buddy_alloc_range(struct gpu_buddy *mm, 8904a9671a0SJoel Fernandes u64 start, 8914a9671a0SJoel Fernandes u64 size, 8924a9671a0SJoel Fernandes u64 *total_allocated_on_err, 8934a9671a0SJoel Fernandes struct list_head *blocks) 8944a9671a0SJoel Fernandes { 8954a9671a0SJoel Fernandes LIST_HEAD(dfs); 8964a9671a0SJoel Fernandes int i; 8974a9671a0SJoel Fernandes 8984a9671a0SJoel Fernandes for (i = 0; i < mm->n_roots; ++i) 8994a9671a0SJoel Fernandes list_add_tail(&mm->roots[i]->tmp_link, &dfs); 9004a9671a0SJoel Fernandes 9014a9671a0SJoel Fernandes return __alloc_range(mm, &dfs, start, size, 9024a9671a0SJoel Fernandes blocks, total_allocated_on_err); 9034a9671a0SJoel Fernandes } 9044a9671a0SJoel Fernandes 905ba110db8SJoel Fernandes static int __alloc_contig_try_harder(struct gpu_buddy *mm, 9064a9671a0SJoel Fernandes u64 size, 9074a9671a0SJoel Fernandes u64 min_block_size, 9084a9671a0SJoel Fernandes struct list_head *blocks) 9094a9671a0SJoel Fernandes { 9104a9671a0SJoel Fernandes u64 rhs_offset, lhs_offset, lhs_size, filled; 911ba110db8SJoel Fernandes struct gpu_buddy_block *block; 9124a9671a0SJoel Fernandes unsigned int tree, order; 9134a9671a0SJoel Fernandes LIST_HEAD(blocks_lhs); 9144a9671a0SJoel Fernandes unsigned long pages; 9154a9671a0SJoel Fernandes u64 modify_size; 9164a9671a0SJoel Fernandes int err; 9174a9671a0SJoel Fernandes 9184a9671a0SJoel Fernandes modify_size = rounddown_pow_of_two(size); 9194a9671a0SJoel Fernandes pages = modify_size >> ilog2(mm->chunk_size); 9204a9671a0SJoel Fernandes order = fls(pages) - 1; 9214a9671a0SJoel Fernandes if (order == 0) 9224a9671a0SJoel Fernandes return -ENOSPC; 9234a9671a0SJoel Fernandes 9244a9671a0SJoel Fernandes for_each_free_tree(tree) { 9254a9671a0SJoel Fernandes struct rb_root *root; 9264a9671a0SJoel Fernandes struct rb_node *iter; 9274a9671a0SJoel Fernandes 9284a9671a0SJoel Fernandes root = &mm->free_trees[tree][order]; 9294a9671a0SJoel Fernandes if (rbtree_is_empty(root)) 9304a9671a0SJoel Fernandes continue; 9314a9671a0SJoel Fernandes 9324a9671a0SJoel Fernandes iter = rb_last(root); 9334a9671a0SJoel Fernandes while (iter) { 9344a9671a0SJoel Fernandes block = rbtree_get_free_block(iter); 9354a9671a0SJoel Fernandes 9364a9671a0SJoel Fernandes /* Allocate blocks traversing RHS */ 937ba110db8SJoel Fernandes rhs_offset = gpu_buddy_block_offset(block); 938ba110db8SJoel Fernandes err = __gpu_buddy_alloc_range(mm, rhs_offset, size, 9394a9671a0SJoel Fernandes &filled, blocks); 9404a9671a0SJoel Fernandes if (!err || err != -ENOSPC) 9414a9671a0SJoel Fernandes return err; 9424a9671a0SJoel Fernandes 9434a9671a0SJoel Fernandes lhs_size = max((size - filled), min_block_size); 9444a9671a0SJoel Fernandes if (!IS_ALIGNED(lhs_size, min_block_size)) 9454a9671a0SJoel Fernandes lhs_size = round_up(lhs_size, min_block_size); 9464a9671a0SJoel Fernandes 9474a9671a0SJoel Fernandes /* Allocate blocks traversing LHS */ 948ba110db8SJoel Fernandes lhs_offset = gpu_buddy_block_offset(block) - lhs_size; 949ba110db8SJoel Fernandes err = __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size, 9504a9671a0SJoel Fernandes NULL, &blocks_lhs); 9514a9671a0SJoel Fernandes if (!err) { 9524a9671a0SJoel Fernandes list_splice(&blocks_lhs, blocks); 9534a9671a0SJoel Fernandes return 0; 9544a9671a0SJoel Fernandes } else if (err != -ENOSPC) { 955ba110db8SJoel Fernandes gpu_buddy_free_list_internal(mm, blocks); 9564a9671a0SJoel Fernandes return err; 9574a9671a0SJoel Fernandes } 9584a9671a0SJoel Fernandes /* Free blocks for the next iteration */ 959ba110db8SJoel Fernandes gpu_buddy_free_list_internal(mm, blocks); 9604a9671a0SJoel Fernandes 9614a9671a0SJoel Fernandes iter = rb_prev(iter); 9624a9671a0SJoel Fernandes } 9634a9671a0SJoel Fernandes } 9644a9671a0SJoel Fernandes 9654a9671a0SJoel Fernandes return -ENOSPC; 9664a9671a0SJoel Fernandes } 9674a9671a0SJoel Fernandes 9684a9671a0SJoel Fernandes /** 969ba110db8SJoel Fernandes * gpu_buddy_block_trim - free unused pages 9704a9671a0SJoel Fernandes * 971ba110db8SJoel Fernandes * @mm: GPU buddy manager 9724a9671a0SJoel Fernandes * @start: start address to begin the trimming. 9734a9671a0SJoel Fernandes * @new_size: original size requested 9744a9671a0SJoel Fernandes * @blocks: Input and output list of allocated blocks. 9754a9671a0SJoel Fernandes * MUST contain single block as input to be trimmed. 9764a9671a0SJoel Fernandes * On success will contain the newly allocated blocks 9774a9671a0SJoel Fernandes * making up the @new_size. Blocks always appear in 9784a9671a0SJoel Fernandes * ascending order 9794a9671a0SJoel Fernandes * 9804a9671a0SJoel Fernandes * For contiguous allocation, we round up the size to the nearest 9814a9671a0SJoel Fernandes * power of two value, drivers consume *actual* size, so remaining 9824a9671a0SJoel Fernandes * portions are unused and can be optionally freed with this function 9834a9671a0SJoel Fernandes * 9844a9671a0SJoel Fernandes * Returns: 9854a9671a0SJoel Fernandes * 0 on success, error code on failure. 9864a9671a0SJoel Fernandes */ 987ba110db8SJoel Fernandes int gpu_buddy_block_trim(struct gpu_buddy *mm, 9884a9671a0SJoel Fernandes u64 *start, 9894a9671a0SJoel Fernandes u64 new_size, 9904a9671a0SJoel Fernandes struct list_head *blocks) 9914a9671a0SJoel Fernandes { 992ba110db8SJoel Fernandes struct gpu_buddy_block *parent; 993ba110db8SJoel Fernandes struct gpu_buddy_block *block; 9944a9671a0SJoel Fernandes u64 block_start, block_end; 9954a9671a0SJoel Fernandes LIST_HEAD(dfs); 9964a9671a0SJoel Fernandes u64 new_start; 9974a9671a0SJoel Fernandes int err; 9984a9671a0SJoel Fernandes 9994a9671a0SJoel Fernandes if (!list_is_singular(blocks)) 10004a9671a0SJoel Fernandes return -EINVAL; 10014a9671a0SJoel Fernandes 10024a9671a0SJoel Fernandes block = list_first_entry(blocks, 1003ba110db8SJoel Fernandes struct gpu_buddy_block, 10044a9671a0SJoel Fernandes link); 10054a9671a0SJoel Fernandes 1006ba110db8SJoel Fernandes block_start = gpu_buddy_block_offset(block); 1007ba110db8SJoel Fernandes block_end = block_start + gpu_buddy_block_size(mm, block); 10084a9671a0SJoel Fernandes 1009ba110db8SJoel Fernandes if (WARN_ON(!gpu_buddy_block_is_allocated(block))) 10104a9671a0SJoel Fernandes return -EINVAL; 10114a9671a0SJoel Fernandes 1012ba110db8SJoel Fernandes if (new_size > gpu_buddy_block_size(mm, block)) 10134a9671a0SJoel Fernandes return -EINVAL; 10144a9671a0SJoel Fernandes 10154a9671a0SJoel Fernandes if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size)) 10164a9671a0SJoel Fernandes return -EINVAL; 10174a9671a0SJoel Fernandes 1018ba110db8SJoel Fernandes if (new_size == gpu_buddy_block_size(mm, block)) 10194a9671a0SJoel Fernandes return 0; 10204a9671a0SJoel Fernandes 10214a9671a0SJoel Fernandes new_start = block_start; 10224a9671a0SJoel Fernandes if (start) { 10234a9671a0SJoel Fernandes new_start = *start; 10244a9671a0SJoel Fernandes 10254a9671a0SJoel Fernandes if (new_start < block_start) 10264a9671a0SJoel Fernandes return -EINVAL; 10274a9671a0SJoel Fernandes 10284a9671a0SJoel Fernandes if (!IS_ALIGNED(new_start, mm->chunk_size)) 10294a9671a0SJoel Fernandes return -EINVAL; 10304a9671a0SJoel Fernandes 10314a9671a0SJoel Fernandes if (range_overflows(new_start, new_size, block_end)) 10324a9671a0SJoel Fernandes return -EINVAL; 10334a9671a0SJoel Fernandes } 10344a9671a0SJoel Fernandes 10354a9671a0SJoel Fernandes list_del(&block->link); 10364a9671a0SJoel Fernandes mark_free(mm, block); 1037ba110db8SJoel Fernandes mm->avail += gpu_buddy_block_size(mm, block); 1038ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 1039ba110db8SJoel Fernandes mm->clear_avail += gpu_buddy_block_size(mm, block); 10404a9671a0SJoel Fernandes 10414a9671a0SJoel Fernandes /* Prevent recursively freeing this node */ 10424a9671a0SJoel Fernandes parent = block->parent; 10434a9671a0SJoel Fernandes block->parent = NULL; 10444a9671a0SJoel Fernandes 10454a9671a0SJoel Fernandes list_add(&block->tmp_link, &dfs); 10464a9671a0SJoel Fernandes err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); 10474a9671a0SJoel Fernandes if (err) { 10484a9671a0SJoel Fernandes mark_allocated(mm, block); 1049ba110db8SJoel Fernandes mm->avail -= gpu_buddy_block_size(mm, block); 1050ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 1051ba110db8SJoel Fernandes mm->clear_avail -= gpu_buddy_block_size(mm, block); 10524a9671a0SJoel Fernandes list_add(&block->link, blocks); 10534a9671a0SJoel Fernandes } 10544a9671a0SJoel Fernandes 10554a9671a0SJoel Fernandes block->parent = parent; 10564a9671a0SJoel Fernandes return err; 10574a9671a0SJoel Fernandes } 1058ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_block_trim); 10594a9671a0SJoel Fernandes 1060ba110db8SJoel Fernandes static struct gpu_buddy_block * 1061ba110db8SJoel Fernandes __gpu_buddy_alloc_blocks(struct gpu_buddy *mm, 10624a9671a0SJoel Fernandes u64 start, u64 end, 10634a9671a0SJoel Fernandes unsigned int order, 10644a9671a0SJoel Fernandes unsigned long flags) 10654a9671a0SJoel Fernandes { 1066ba110db8SJoel Fernandes if (flags & GPU_BUDDY_RANGE_ALLOCATION) 10674a9671a0SJoel Fernandes /* Allocate traversing within the range */ 1068ba110db8SJoel Fernandes return __gpu_buddy_alloc_range_bias(mm, start, end, 10694a9671a0SJoel Fernandes order, flags); 10704a9671a0SJoel Fernandes else 10714a9671a0SJoel Fernandes /* Allocate from freetree */ 10724a9671a0SJoel Fernandes return alloc_from_freetree(mm, order, flags); 10734a9671a0SJoel Fernandes } 10744a9671a0SJoel Fernandes 10754a9671a0SJoel Fernandes /** 1076ba110db8SJoel Fernandes * gpu_buddy_alloc_blocks - allocate power-of-two blocks 10774a9671a0SJoel Fernandes * 1078ba110db8SJoel Fernandes * @mm: GPU buddy manager to allocate from 10794a9671a0SJoel Fernandes * @start: start of the allowed range for this block 10804a9671a0SJoel Fernandes * @end: end of the allowed range for this block 10814a9671a0SJoel Fernandes * @size: size of the allocation in bytes 10824a9671a0SJoel Fernandes * @min_block_size: alignment of the allocation 10834a9671a0SJoel Fernandes * @blocks: output list head to add allocated blocks 1084ba110db8SJoel Fernandes * @flags: GPU_BUDDY_*_ALLOCATION flags 10854a9671a0SJoel Fernandes * 10864a9671a0SJoel Fernandes * alloc_range_bias() called on range limitations, which traverses 10874a9671a0SJoel Fernandes * the tree and returns the desired block. 10884a9671a0SJoel Fernandes * 10894a9671a0SJoel Fernandes * alloc_from_freetree() called when *no* range restrictions 10904a9671a0SJoel Fernandes * are enforced, which picks the block from the freetree. 10914a9671a0SJoel Fernandes * 10924a9671a0SJoel Fernandes * Returns: 10934a9671a0SJoel Fernandes * 0 on success, error code on failure. 10944a9671a0SJoel Fernandes */ 1095ba110db8SJoel Fernandes int gpu_buddy_alloc_blocks(struct gpu_buddy *mm, 10964a9671a0SJoel Fernandes u64 start, u64 end, u64 size, 10974a9671a0SJoel Fernandes u64 min_block_size, 10984a9671a0SJoel Fernandes struct list_head *blocks, 10994a9671a0SJoel Fernandes unsigned long flags) 11004a9671a0SJoel Fernandes { 1101ba110db8SJoel Fernandes struct gpu_buddy_block *block = NULL; 11024a9671a0SJoel Fernandes u64 original_size, original_min_size; 11034a9671a0SJoel Fernandes unsigned int min_order, order; 11044a9671a0SJoel Fernandes LIST_HEAD(allocated); 11054a9671a0SJoel Fernandes unsigned long pages; 11064a9671a0SJoel Fernandes int err; 11074a9671a0SJoel Fernandes 11084a9671a0SJoel Fernandes if (size < mm->chunk_size) 11094a9671a0SJoel Fernandes return -EINVAL; 11104a9671a0SJoel Fernandes 11114a9671a0SJoel Fernandes if (min_block_size < mm->chunk_size) 11124a9671a0SJoel Fernandes return -EINVAL; 11134a9671a0SJoel Fernandes 11144a9671a0SJoel Fernandes if (!is_power_of_2(min_block_size)) 11154a9671a0SJoel Fernandes return -EINVAL; 11164a9671a0SJoel Fernandes 11174a9671a0SJoel Fernandes if (!IS_ALIGNED(start | end | size, mm->chunk_size)) 11184a9671a0SJoel Fernandes return -EINVAL; 11194a9671a0SJoel Fernandes 11204a9671a0SJoel Fernandes if (end > mm->size) 11214a9671a0SJoel Fernandes return -EINVAL; 11224a9671a0SJoel Fernandes 11234a9671a0SJoel Fernandes if (range_overflows(start, size, mm->size)) 11244a9671a0SJoel Fernandes return -EINVAL; 11254a9671a0SJoel Fernandes 11264a9671a0SJoel Fernandes /* Actual range allocation */ 11274a9671a0SJoel Fernandes if (start + size == end) { 11284a9671a0SJoel Fernandes if (!IS_ALIGNED(start | end, min_block_size)) 11294a9671a0SJoel Fernandes return -EINVAL; 11304a9671a0SJoel Fernandes 1131ba110db8SJoel Fernandes return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks); 11324a9671a0SJoel Fernandes } 11334a9671a0SJoel Fernandes 11344a9671a0SJoel Fernandes original_size = size; 11354a9671a0SJoel Fernandes original_min_size = min_block_size; 11364a9671a0SJoel Fernandes 11374a9671a0SJoel Fernandes /* Roundup the size to power of 2 */ 1138ba110db8SJoel Fernandes if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) { 11394a9671a0SJoel Fernandes size = roundup_pow_of_two(size); 11404a9671a0SJoel Fernandes min_block_size = size; 11414a9671a0SJoel Fernandes /* Align size value to min_block_size */ 11424a9671a0SJoel Fernandes } else if (!IS_ALIGNED(size, min_block_size)) { 11434a9671a0SJoel Fernandes size = round_up(size, min_block_size); 11444a9671a0SJoel Fernandes } 11454a9671a0SJoel Fernandes 11464a9671a0SJoel Fernandes pages = size >> ilog2(mm->chunk_size); 11474a9671a0SJoel Fernandes order = fls(pages) - 1; 11484a9671a0SJoel Fernandes min_order = ilog2(min_block_size) - ilog2(mm->chunk_size); 11494a9671a0SJoel Fernandes 11504a9671a0SJoel Fernandes if (order > mm->max_order || size > mm->size) { 1151ba110db8SJoel Fernandes if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) && 1152ba110db8SJoel Fernandes !(flags & GPU_BUDDY_RANGE_ALLOCATION)) 11534a9671a0SJoel Fernandes return __alloc_contig_try_harder(mm, original_size, 11544a9671a0SJoel Fernandes original_min_size, blocks); 11554a9671a0SJoel Fernandes 11564a9671a0SJoel Fernandes return -EINVAL; 11574a9671a0SJoel Fernandes } 11584a9671a0SJoel Fernandes 11594a9671a0SJoel Fernandes do { 11604a9671a0SJoel Fernandes order = min(order, (unsigned int)fls(pages) - 1); 11614a9671a0SJoel Fernandes BUG_ON(order > mm->max_order); 11624a9671a0SJoel Fernandes BUG_ON(order < min_order); 11634a9671a0SJoel Fernandes 11644a9671a0SJoel Fernandes do { 1165ba110db8SJoel Fernandes block = __gpu_buddy_alloc_blocks(mm, start, 11664a9671a0SJoel Fernandes end, 11674a9671a0SJoel Fernandes order, 11684a9671a0SJoel Fernandes flags); 11694a9671a0SJoel Fernandes if (!IS_ERR(block)) 11704a9671a0SJoel Fernandes break; 11714a9671a0SJoel Fernandes 11724a9671a0SJoel Fernandes if (order-- == min_order) { 11734a9671a0SJoel Fernandes /* Try allocation through force merge method */ 11744a9671a0SJoel Fernandes if (mm->clear_avail && 11754a9671a0SJoel Fernandes !__force_merge(mm, start, end, min_order)) { 1176ba110db8SJoel Fernandes block = __gpu_buddy_alloc_blocks(mm, start, 11774a9671a0SJoel Fernandes end, 11784a9671a0SJoel Fernandes min_order, 11794a9671a0SJoel Fernandes flags); 11804a9671a0SJoel Fernandes if (!IS_ERR(block)) { 11814a9671a0SJoel Fernandes order = min_order; 11824a9671a0SJoel Fernandes break; 11834a9671a0SJoel Fernandes } 11844a9671a0SJoel Fernandes } 11854a9671a0SJoel Fernandes 11864a9671a0SJoel Fernandes /* 11874a9671a0SJoel Fernandes * Try contiguous block allocation through 11884a9671a0SJoel Fernandes * try harder method. 11894a9671a0SJoel Fernandes */ 1190ba110db8SJoel Fernandes if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION && 1191ba110db8SJoel Fernandes !(flags & GPU_BUDDY_RANGE_ALLOCATION)) 11924a9671a0SJoel Fernandes return __alloc_contig_try_harder(mm, 11934a9671a0SJoel Fernandes original_size, 11944a9671a0SJoel Fernandes original_min_size, 11954a9671a0SJoel Fernandes blocks); 11964a9671a0SJoel Fernandes err = -ENOSPC; 11974a9671a0SJoel Fernandes goto err_free; 11984a9671a0SJoel Fernandes } 11994a9671a0SJoel Fernandes } while (1); 12004a9671a0SJoel Fernandes 12014a9671a0SJoel Fernandes mark_allocated(mm, block); 1202ba110db8SJoel Fernandes mm->avail -= gpu_buddy_block_size(mm, block); 1203ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 1204ba110db8SJoel Fernandes mm->clear_avail -= gpu_buddy_block_size(mm, block); 12054a9671a0SJoel Fernandes kmemleak_update_trace(block); 12064a9671a0SJoel Fernandes list_add_tail(&block->link, &allocated); 12074a9671a0SJoel Fernandes 12084a9671a0SJoel Fernandes pages -= BIT(order); 12094a9671a0SJoel Fernandes 12104a9671a0SJoel Fernandes if (!pages) 12114a9671a0SJoel Fernandes break; 12124a9671a0SJoel Fernandes } while (1); 12134a9671a0SJoel Fernandes 12144a9671a0SJoel Fernandes /* Trim the allocated block to the required size */ 1215ba110db8SJoel Fernandes if (!(flags & GPU_BUDDY_TRIM_DISABLE) && 12164a9671a0SJoel Fernandes original_size != size) { 12174a9671a0SJoel Fernandes struct list_head *trim_list; 12184a9671a0SJoel Fernandes LIST_HEAD(temp); 12194a9671a0SJoel Fernandes u64 trim_size; 12204a9671a0SJoel Fernandes 12214a9671a0SJoel Fernandes trim_list = &allocated; 12224a9671a0SJoel Fernandes trim_size = original_size; 12234a9671a0SJoel Fernandes 12244a9671a0SJoel Fernandes if (!list_is_singular(&allocated)) { 12254a9671a0SJoel Fernandes block = list_last_entry(&allocated, typeof(*block), link); 12264a9671a0SJoel Fernandes list_move(&block->link, &temp); 12274a9671a0SJoel Fernandes trim_list = &temp; 1228ba110db8SJoel Fernandes trim_size = gpu_buddy_block_size(mm, block) - 12294a9671a0SJoel Fernandes (size - original_size); 12304a9671a0SJoel Fernandes } 12314a9671a0SJoel Fernandes 1232ba110db8SJoel Fernandes gpu_buddy_block_trim(mm, 12334a9671a0SJoel Fernandes NULL, 12344a9671a0SJoel Fernandes trim_size, 12354a9671a0SJoel Fernandes trim_list); 12364a9671a0SJoel Fernandes 12374a9671a0SJoel Fernandes if (!list_empty(&temp)) 12384a9671a0SJoel Fernandes list_splice_tail(trim_list, &allocated); 12394a9671a0SJoel Fernandes } 12404a9671a0SJoel Fernandes 12414a9671a0SJoel Fernandes list_splice_tail(&allocated, blocks); 12424a9671a0SJoel Fernandes return 0; 12434a9671a0SJoel Fernandes 12444a9671a0SJoel Fernandes err_free: 1245ba110db8SJoel Fernandes gpu_buddy_free_list_internal(mm, &allocated); 12464a9671a0SJoel Fernandes return err; 12474a9671a0SJoel Fernandes } 1248ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_alloc_blocks); 12494a9671a0SJoel Fernandes 12504a9671a0SJoel Fernandes /** 1251ba110db8SJoel Fernandes * gpu_buddy_block_print - print block information 12524a9671a0SJoel Fernandes * 1253ba110db8SJoel Fernandes * @mm: GPU buddy manager 1254ba110db8SJoel Fernandes * @block: GPU buddy block 12554a9671a0SJoel Fernandes */ 1256ba110db8SJoel Fernandes void gpu_buddy_block_print(struct gpu_buddy *mm, 1257ba110db8SJoel Fernandes struct gpu_buddy_block *block) 12584a9671a0SJoel Fernandes { 1259ba110db8SJoel Fernandes u64 start = gpu_buddy_block_offset(block); 1260ba110db8SJoel Fernandes u64 size = gpu_buddy_block_size(mm, block); 12614a9671a0SJoel Fernandes 1262ba110db8SJoel Fernandes pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size); 12634a9671a0SJoel Fernandes } 1264ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_block_print); 12654a9671a0SJoel Fernandes 12664a9671a0SJoel Fernandes /** 1267ba110db8SJoel Fernandes * gpu_buddy_print - print allocator state 12684a9671a0SJoel Fernandes * 1269ba110db8SJoel Fernandes * @mm: GPU buddy manager 1270ba110db8SJoel Fernandes * @p: GPU printer to use 12714a9671a0SJoel Fernandes */ 1272ba110db8SJoel Fernandes void gpu_buddy_print(struct gpu_buddy *mm) 12734a9671a0SJoel Fernandes { 12744a9671a0SJoel Fernandes int order; 12754a9671a0SJoel Fernandes 1276ba110db8SJoel Fernandes pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n", 12774a9671a0SJoel Fernandes mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20); 12784a9671a0SJoel Fernandes 12794a9671a0SJoel Fernandes for (order = mm->max_order; order >= 0; order--) { 1280ba110db8SJoel Fernandes struct gpu_buddy_block *block, *tmp; 12814a9671a0SJoel Fernandes struct rb_root *root; 12824a9671a0SJoel Fernandes u64 count = 0, free; 12834a9671a0SJoel Fernandes unsigned int tree; 12844a9671a0SJoel Fernandes 12854a9671a0SJoel Fernandes for_each_free_tree(tree) { 12864a9671a0SJoel Fernandes root = &mm->free_trees[tree][order]; 12874a9671a0SJoel Fernandes 12884a9671a0SJoel Fernandes rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { 1289ba110db8SJoel Fernandes BUG_ON(!gpu_buddy_block_is_free(block)); 12904a9671a0SJoel Fernandes count++; 12914a9671a0SJoel Fernandes } 12924a9671a0SJoel Fernandes } 12934a9671a0SJoel Fernandes 12944a9671a0SJoel Fernandes free = count * (mm->chunk_size << order); 12954a9671a0SJoel Fernandes if (free < SZ_1M) 1296ba110db8SJoel Fernandes pr_info("order-%2d free: %8llu KiB, blocks: %llu\n", 1297ba110db8SJoel Fernandes order, free >> 10, count); 12984a9671a0SJoel Fernandes else 1299ba110db8SJoel Fernandes pr_info("order-%2d free: %8llu MiB, blocks: %llu\n", 1300ba110db8SJoel Fernandes order, free >> 20, count); 13014a9671a0SJoel Fernandes } 13024a9671a0SJoel Fernandes } 1303ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_print); 13044a9671a0SJoel Fernandes 1305ba110db8SJoel Fernandes static void gpu_buddy_module_exit(void) 13064a9671a0SJoel Fernandes { 13074a9671a0SJoel Fernandes kmem_cache_destroy(slab_blocks); 13084a9671a0SJoel Fernandes } 13094a9671a0SJoel Fernandes 1310ba110db8SJoel Fernandes static int __init gpu_buddy_module_init(void) 13114a9671a0SJoel Fernandes { 1312ba110db8SJoel Fernandes slab_blocks = KMEM_CACHE(gpu_buddy_block, 0); 13134a9671a0SJoel Fernandes if (!slab_blocks) 13144a9671a0SJoel Fernandes return -ENOMEM; 13154a9671a0SJoel Fernandes 13164a9671a0SJoel Fernandes return 0; 13174a9671a0SJoel Fernandes } 13184a9671a0SJoel Fernandes 1319ba110db8SJoel Fernandes module_init(gpu_buddy_module_init); 1320ba110db8SJoel Fernandes module_exit(gpu_buddy_module_exit); 13214a9671a0SJoel Fernandes 1322ba110db8SJoel Fernandes MODULE_DESCRIPTION("GPU Buddy Allocator"); 13234a9671a0SJoel Fernandes MODULE_LICENSE("Dual MIT/GPL"); 1324