14a9671a0SJoel Fernandes // SPDX-License-Identifier: MIT 24a9671a0SJoel Fernandes /* 34a9671a0SJoel Fernandes * Copyright © 2021 Intel Corporation 44a9671a0SJoel Fernandes */ 54a9671a0SJoel Fernandes 64a9671a0SJoel Fernandes #include <kunit/test-bug.h> 74a9671a0SJoel Fernandes 84a9671a0SJoel Fernandes #include <linux/export.h> 94a9671a0SJoel Fernandes #include <linux/kmemleak.h> 104a9671a0SJoel Fernandes #include <linux/module.h> 114a9671a0SJoel Fernandes #include <linux/sizes.h> 124a9671a0SJoel Fernandes 134a9671a0SJoel Fernandes #include <linux/gpu_buddy.h> 144a9671a0SJoel Fernandes 154a9671a0SJoel Fernandes static struct kmem_cache *slab_blocks; 164a9671a0SJoel Fernandes 17*ba110db8SJoel Fernandes static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm, 18*ba110db8SJoel Fernandes struct gpu_buddy_block *parent, 194a9671a0SJoel Fernandes unsigned int order, 204a9671a0SJoel Fernandes u64 offset) 214a9671a0SJoel Fernandes { 22*ba110db8SJoel Fernandes struct gpu_buddy_block *block; 234a9671a0SJoel Fernandes 24*ba110db8SJoel Fernandes BUG_ON(order > GPU_BUDDY_MAX_ORDER); 254a9671a0SJoel Fernandes 264a9671a0SJoel Fernandes block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL); 274a9671a0SJoel Fernandes if (!block) 284a9671a0SJoel Fernandes return NULL; 294a9671a0SJoel Fernandes 304a9671a0SJoel Fernandes block->header = offset; 314a9671a0SJoel Fernandes block->header |= order; 324a9671a0SJoel Fernandes block->parent = parent; 334a9671a0SJoel Fernandes 344a9671a0SJoel Fernandes RB_CLEAR_NODE(&block->rb); 354a9671a0SJoel Fernandes 36*ba110db8SJoel Fernandes BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED); 374a9671a0SJoel Fernandes return block; 384a9671a0SJoel Fernandes } 394a9671a0SJoel Fernandes 40*ba110db8SJoel Fernandes static void gpu_block_free(struct gpu_buddy *mm, 41*ba110db8SJoel Fernandes struct gpu_buddy_block *block) 424a9671a0SJoel Fernandes { 434a9671a0SJoel Fernandes kmem_cache_free(slab_blocks, block); 444a9671a0SJoel Fernandes } 454a9671a0SJoel Fernandes 46*ba110db8SJoel Fernandes static enum gpu_buddy_free_tree 47*ba110db8SJoel Fernandes get_block_tree(struct gpu_buddy_block *block) 484a9671a0SJoel Fernandes { 49*ba110db8SJoel Fernandes return gpu_buddy_block_is_clear(block) ? 50*ba110db8SJoel Fernandes GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; 514a9671a0SJoel Fernandes } 524a9671a0SJoel Fernandes 53*ba110db8SJoel Fernandes static struct gpu_buddy_block * 544a9671a0SJoel Fernandes rbtree_get_free_block(const struct rb_node *node) 554a9671a0SJoel Fernandes { 56*ba110db8SJoel Fernandes return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL; 574a9671a0SJoel Fernandes } 584a9671a0SJoel Fernandes 59*ba110db8SJoel Fernandes static struct gpu_buddy_block * 604a9671a0SJoel Fernandes rbtree_last_free_block(struct rb_root *root) 614a9671a0SJoel Fernandes { 624a9671a0SJoel Fernandes return rbtree_get_free_block(rb_last(root)); 634a9671a0SJoel Fernandes } 644a9671a0SJoel Fernandes 654a9671a0SJoel Fernandes static bool rbtree_is_empty(struct rb_root *root) 664a9671a0SJoel Fernandes { 674a9671a0SJoel Fernandes return RB_EMPTY_ROOT(root); 684a9671a0SJoel Fernandes } 694a9671a0SJoel Fernandes 70*ba110db8SJoel Fernandes static bool gpu_buddy_block_offset_less(const struct gpu_buddy_block *block, 71*ba110db8SJoel Fernandes const struct gpu_buddy_block *node) 724a9671a0SJoel Fernandes { 73*ba110db8SJoel Fernandes return gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node); 744a9671a0SJoel Fernandes } 754a9671a0SJoel Fernandes 764a9671a0SJoel Fernandes static bool rbtree_block_offset_less(struct rb_node *block, 774a9671a0SJoel Fernandes const struct rb_node *node) 784a9671a0SJoel Fernandes { 79*ba110db8SJoel Fernandes return gpu_buddy_block_offset_less(rbtree_get_free_block(block), 804a9671a0SJoel Fernandes rbtree_get_free_block(node)); 814a9671a0SJoel Fernandes } 824a9671a0SJoel Fernandes 83*ba110db8SJoel Fernandes static void rbtree_insert(struct gpu_buddy *mm, 84*ba110db8SJoel Fernandes struct gpu_buddy_block *block, 85*ba110db8SJoel Fernandes enum gpu_buddy_free_tree tree) 864a9671a0SJoel Fernandes { 874a9671a0SJoel Fernandes rb_add(&block->rb, 88*ba110db8SJoel Fernandes &mm->free_trees[tree][gpu_buddy_block_order(block)], 894a9671a0SJoel Fernandes rbtree_block_offset_less); 904a9671a0SJoel Fernandes } 914a9671a0SJoel Fernandes 92*ba110db8SJoel Fernandes static void rbtree_remove(struct gpu_buddy *mm, 93*ba110db8SJoel Fernandes struct gpu_buddy_block *block) 944a9671a0SJoel Fernandes { 95*ba110db8SJoel Fernandes unsigned int order = gpu_buddy_block_order(block); 96*ba110db8SJoel Fernandes enum gpu_buddy_free_tree tree; 974a9671a0SJoel Fernandes struct rb_root *root; 984a9671a0SJoel Fernandes 994a9671a0SJoel Fernandes tree = get_block_tree(block); 1004a9671a0SJoel Fernandes root = &mm->free_trees[tree][order]; 1014a9671a0SJoel Fernandes 1024a9671a0SJoel Fernandes rb_erase(&block->rb, root); 1034a9671a0SJoel Fernandes RB_CLEAR_NODE(&block->rb); 1044a9671a0SJoel Fernandes } 1054a9671a0SJoel Fernandes 106*ba110db8SJoel Fernandes static void clear_reset(struct gpu_buddy_block *block) 1074a9671a0SJoel Fernandes { 108*ba110db8SJoel Fernandes block->header &= ~GPU_BUDDY_HEADER_CLEAR; 1094a9671a0SJoel Fernandes } 1104a9671a0SJoel Fernandes 111*ba110db8SJoel Fernandes static void mark_cleared(struct gpu_buddy_block *block) 1124a9671a0SJoel Fernandes { 113*ba110db8SJoel Fernandes block->header |= GPU_BUDDY_HEADER_CLEAR; 1144a9671a0SJoel Fernandes } 1154a9671a0SJoel Fernandes 116*ba110db8SJoel Fernandes static void mark_allocated(struct gpu_buddy *mm, 117*ba110db8SJoel Fernandes struct gpu_buddy_block *block) 1184a9671a0SJoel Fernandes { 119*ba110db8SJoel Fernandes block->header &= ~GPU_BUDDY_HEADER_STATE; 120*ba110db8SJoel Fernandes block->header |= GPU_BUDDY_ALLOCATED; 1214a9671a0SJoel Fernandes 1224a9671a0SJoel Fernandes rbtree_remove(mm, block); 1234a9671a0SJoel Fernandes } 1244a9671a0SJoel Fernandes 125*ba110db8SJoel Fernandes static void mark_free(struct gpu_buddy *mm, 126*ba110db8SJoel Fernandes struct gpu_buddy_block *block) 1274a9671a0SJoel Fernandes { 128*ba110db8SJoel Fernandes enum gpu_buddy_free_tree tree; 1294a9671a0SJoel Fernandes 130*ba110db8SJoel Fernandes block->header &= ~GPU_BUDDY_HEADER_STATE; 131*ba110db8SJoel Fernandes block->header |= GPU_BUDDY_FREE; 1324a9671a0SJoel Fernandes 1334a9671a0SJoel Fernandes tree = get_block_tree(block); 1344a9671a0SJoel Fernandes rbtree_insert(mm, block, tree); 1354a9671a0SJoel Fernandes } 1364a9671a0SJoel Fernandes 137*ba110db8SJoel Fernandes static void mark_split(struct gpu_buddy *mm, 138*ba110db8SJoel Fernandes struct gpu_buddy_block *block) 1394a9671a0SJoel Fernandes { 140*ba110db8SJoel Fernandes block->header &= ~GPU_BUDDY_HEADER_STATE; 141*ba110db8SJoel Fernandes block->header |= GPU_BUDDY_SPLIT; 1424a9671a0SJoel Fernandes 1434a9671a0SJoel Fernandes rbtree_remove(mm, block); 1444a9671a0SJoel Fernandes } 1454a9671a0SJoel Fernandes 1464a9671a0SJoel Fernandes static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) 1474a9671a0SJoel Fernandes { 1484a9671a0SJoel Fernandes return s1 <= e2 && e1 >= s2; 1494a9671a0SJoel Fernandes } 1504a9671a0SJoel Fernandes 1514a9671a0SJoel Fernandes static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) 1524a9671a0SJoel Fernandes { 1534a9671a0SJoel Fernandes return s1 <= s2 && e1 >= e2; 1544a9671a0SJoel Fernandes } 1554a9671a0SJoel Fernandes 156*ba110db8SJoel Fernandes static struct gpu_buddy_block * 157*ba110db8SJoel Fernandes __get_buddy(struct gpu_buddy_block *block) 1584a9671a0SJoel Fernandes { 159*ba110db8SJoel Fernandes struct gpu_buddy_block *parent; 1604a9671a0SJoel Fernandes 1614a9671a0SJoel Fernandes parent = block->parent; 1624a9671a0SJoel Fernandes if (!parent) 1634a9671a0SJoel Fernandes return NULL; 1644a9671a0SJoel Fernandes 1654a9671a0SJoel Fernandes if (parent->left == block) 1664a9671a0SJoel Fernandes return parent->right; 1674a9671a0SJoel Fernandes 1684a9671a0SJoel Fernandes return parent->left; 1694a9671a0SJoel Fernandes } 1704a9671a0SJoel Fernandes 171*ba110db8SJoel Fernandes static unsigned int __gpu_buddy_free(struct gpu_buddy *mm, 172*ba110db8SJoel Fernandes struct gpu_buddy_block *block, 1734a9671a0SJoel Fernandes bool force_merge) 1744a9671a0SJoel Fernandes { 175*ba110db8SJoel Fernandes struct gpu_buddy_block *parent; 1764a9671a0SJoel Fernandes unsigned int order; 1774a9671a0SJoel Fernandes 1784a9671a0SJoel Fernandes while ((parent = block->parent)) { 179*ba110db8SJoel Fernandes struct gpu_buddy_block *buddy; 1804a9671a0SJoel Fernandes 1814a9671a0SJoel Fernandes buddy = __get_buddy(block); 1824a9671a0SJoel Fernandes 183*ba110db8SJoel Fernandes if (!gpu_buddy_block_is_free(buddy)) 1844a9671a0SJoel Fernandes break; 1854a9671a0SJoel Fernandes 1864a9671a0SJoel Fernandes if (!force_merge) { 1874a9671a0SJoel Fernandes /* 1884a9671a0SJoel Fernandes * Check the block and its buddy clear state and exit 1894a9671a0SJoel Fernandes * the loop if they both have the dissimilar state. 1904a9671a0SJoel Fernandes */ 191*ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block) != 192*ba110db8SJoel Fernandes gpu_buddy_block_is_clear(buddy)) 1934a9671a0SJoel Fernandes break; 1944a9671a0SJoel Fernandes 195*ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 1964a9671a0SJoel Fernandes mark_cleared(parent); 1974a9671a0SJoel Fernandes } 1984a9671a0SJoel Fernandes 1994a9671a0SJoel Fernandes rbtree_remove(mm, buddy); 200*ba110db8SJoel Fernandes if (force_merge && gpu_buddy_block_is_clear(buddy)) 201*ba110db8SJoel Fernandes mm->clear_avail -= gpu_buddy_block_size(mm, buddy); 2024a9671a0SJoel Fernandes 203*ba110db8SJoel Fernandes gpu_block_free(mm, block); 204*ba110db8SJoel Fernandes gpu_block_free(mm, buddy); 2054a9671a0SJoel Fernandes 2064a9671a0SJoel Fernandes block = parent; 2074a9671a0SJoel Fernandes } 2084a9671a0SJoel Fernandes 209*ba110db8SJoel Fernandes order = gpu_buddy_block_order(block); 2104a9671a0SJoel Fernandes mark_free(mm, block); 2114a9671a0SJoel Fernandes 2124a9671a0SJoel Fernandes return order; 2134a9671a0SJoel Fernandes } 2144a9671a0SJoel Fernandes 215*ba110db8SJoel Fernandes static int __force_merge(struct gpu_buddy *mm, 2164a9671a0SJoel Fernandes u64 start, 2174a9671a0SJoel Fernandes u64 end, 2184a9671a0SJoel Fernandes unsigned int min_order) 2194a9671a0SJoel Fernandes { 2204a9671a0SJoel Fernandes unsigned int tree, order; 2214a9671a0SJoel Fernandes int i; 2224a9671a0SJoel Fernandes 2234a9671a0SJoel Fernandes if (!min_order) 2244a9671a0SJoel Fernandes return -ENOMEM; 2254a9671a0SJoel Fernandes 2264a9671a0SJoel Fernandes if (min_order > mm->max_order) 2274a9671a0SJoel Fernandes return -EINVAL; 2284a9671a0SJoel Fernandes 2294a9671a0SJoel Fernandes for_each_free_tree(tree) { 2304a9671a0SJoel Fernandes for (i = min_order - 1; i >= 0; i--) { 2314a9671a0SJoel Fernandes struct rb_node *iter = rb_last(&mm->free_trees[tree][i]); 2324a9671a0SJoel Fernandes 2334a9671a0SJoel Fernandes while (iter) { 234*ba110db8SJoel Fernandes struct gpu_buddy_block *block, *buddy; 2354a9671a0SJoel Fernandes u64 block_start, block_end; 2364a9671a0SJoel Fernandes 2374a9671a0SJoel Fernandes block = rbtree_get_free_block(iter); 2384a9671a0SJoel Fernandes iter = rb_prev(iter); 2394a9671a0SJoel Fernandes 2404a9671a0SJoel Fernandes if (!block || !block->parent) 2414a9671a0SJoel Fernandes continue; 2424a9671a0SJoel Fernandes 243*ba110db8SJoel Fernandes block_start = gpu_buddy_block_offset(block); 244*ba110db8SJoel Fernandes block_end = block_start + gpu_buddy_block_size(mm, block) - 1; 2454a9671a0SJoel Fernandes 2464a9671a0SJoel Fernandes if (!contains(start, end, block_start, block_end)) 2474a9671a0SJoel Fernandes continue; 2484a9671a0SJoel Fernandes 2494a9671a0SJoel Fernandes buddy = __get_buddy(block); 250*ba110db8SJoel Fernandes if (!gpu_buddy_block_is_free(buddy)) 2514a9671a0SJoel Fernandes continue; 2524a9671a0SJoel Fernandes 253*ba110db8SJoel Fernandes WARN_ON(gpu_buddy_block_is_clear(block) == 254*ba110db8SJoel Fernandes gpu_buddy_block_is_clear(buddy)); 2554a9671a0SJoel Fernandes 2564a9671a0SJoel Fernandes /* 2574a9671a0SJoel Fernandes * Advance to the next node when the current node is the buddy, 2584a9671a0SJoel Fernandes * as freeing the block will also remove its buddy from the tree. 2594a9671a0SJoel Fernandes */ 2604a9671a0SJoel Fernandes if (iter == &buddy->rb) 2614a9671a0SJoel Fernandes iter = rb_prev(iter); 2624a9671a0SJoel Fernandes 2634a9671a0SJoel Fernandes rbtree_remove(mm, block); 264*ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 265*ba110db8SJoel Fernandes mm->clear_avail -= gpu_buddy_block_size(mm, block); 2664a9671a0SJoel Fernandes 267*ba110db8SJoel Fernandes order = __gpu_buddy_free(mm, block, true); 2684a9671a0SJoel Fernandes if (order >= min_order) 2694a9671a0SJoel Fernandes return 0; 2704a9671a0SJoel Fernandes } 2714a9671a0SJoel Fernandes } 2724a9671a0SJoel Fernandes } 2734a9671a0SJoel Fernandes 2744a9671a0SJoel Fernandes return -ENOMEM; 2754a9671a0SJoel Fernandes } 2764a9671a0SJoel Fernandes 2774a9671a0SJoel Fernandes /** 278*ba110db8SJoel Fernandes * gpu_buddy_init - init memory manager 2794a9671a0SJoel Fernandes * 280*ba110db8SJoel Fernandes * @mm: GPU buddy manager to initialize 2814a9671a0SJoel Fernandes * @size: size in bytes to manage 2824a9671a0SJoel Fernandes * @chunk_size: minimum page size in bytes for our allocations 2834a9671a0SJoel Fernandes * 2844a9671a0SJoel Fernandes * Initializes the memory manager and its resources. 2854a9671a0SJoel Fernandes * 2864a9671a0SJoel Fernandes * Returns: 2874a9671a0SJoel Fernandes * 0 on success, error code on failure. 2884a9671a0SJoel Fernandes */ 289*ba110db8SJoel Fernandes int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size) 2904a9671a0SJoel Fernandes { 2914a9671a0SJoel Fernandes unsigned int i, j, root_count = 0; 2924a9671a0SJoel Fernandes u64 offset = 0; 2934a9671a0SJoel Fernandes 2944a9671a0SJoel Fernandes if (size < chunk_size) 2954a9671a0SJoel Fernandes return -EINVAL; 2964a9671a0SJoel Fernandes 2974a9671a0SJoel Fernandes if (chunk_size < SZ_4K) 2984a9671a0SJoel Fernandes return -EINVAL; 2994a9671a0SJoel Fernandes 3004a9671a0SJoel Fernandes if (!is_power_of_2(chunk_size)) 3014a9671a0SJoel Fernandes return -EINVAL; 3024a9671a0SJoel Fernandes 3034a9671a0SJoel Fernandes size = round_down(size, chunk_size); 3044a9671a0SJoel Fernandes 3054a9671a0SJoel Fernandes mm->size = size; 3064a9671a0SJoel Fernandes mm->avail = size; 3074a9671a0SJoel Fernandes mm->clear_avail = 0; 3084a9671a0SJoel Fernandes mm->chunk_size = chunk_size; 3094a9671a0SJoel Fernandes mm->max_order = ilog2(size) - ilog2(chunk_size); 3104a9671a0SJoel Fernandes 311*ba110db8SJoel Fernandes BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER); 3124a9671a0SJoel Fernandes 313*ba110db8SJoel Fernandes mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES, 3144a9671a0SJoel Fernandes sizeof(*mm->free_trees), 3154a9671a0SJoel Fernandes GFP_KERNEL); 3164a9671a0SJoel Fernandes if (!mm->free_trees) 3174a9671a0SJoel Fernandes return -ENOMEM; 3184a9671a0SJoel Fernandes 3194a9671a0SJoel Fernandes for_each_free_tree(i) { 3204a9671a0SJoel Fernandes mm->free_trees[i] = kmalloc_array(mm->max_order + 1, 3214a9671a0SJoel Fernandes sizeof(struct rb_root), 3224a9671a0SJoel Fernandes GFP_KERNEL); 3234a9671a0SJoel Fernandes if (!mm->free_trees[i]) 3244a9671a0SJoel Fernandes goto out_free_tree; 3254a9671a0SJoel Fernandes 3264a9671a0SJoel Fernandes for (j = 0; j <= mm->max_order; ++j) 3274a9671a0SJoel Fernandes mm->free_trees[i][j] = RB_ROOT; 3284a9671a0SJoel Fernandes } 3294a9671a0SJoel Fernandes 3304a9671a0SJoel Fernandes mm->n_roots = hweight64(size); 3314a9671a0SJoel Fernandes 3324a9671a0SJoel Fernandes mm->roots = kmalloc_array(mm->n_roots, 333*ba110db8SJoel Fernandes sizeof(struct gpu_buddy_block *), 3344a9671a0SJoel Fernandes GFP_KERNEL); 3354a9671a0SJoel Fernandes if (!mm->roots) 3364a9671a0SJoel Fernandes goto out_free_tree; 3374a9671a0SJoel Fernandes 3384a9671a0SJoel Fernandes /* 3394a9671a0SJoel Fernandes * Split into power-of-two blocks, in case we are given a size that is 3404a9671a0SJoel Fernandes * not itself a power-of-two. 3414a9671a0SJoel Fernandes */ 3424a9671a0SJoel Fernandes do { 343*ba110db8SJoel Fernandes struct gpu_buddy_block *root; 3444a9671a0SJoel Fernandes unsigned int order; 3454a9671a0SJoel Fernandes u64 root_size; 3464a9671a0SJoel Fernandes 3474a9671a0SJoel Fernandes order = ilog2(size) - ilog2(chunk_size); 3484a9671a0SJoel Fernandes root_size = chunk_size << order; 3494a9671a0SJoel Fernandes 350*ba110db8SJoel Fernandes root = gpu_block_alloc(mm, NULL, order, offset); 3514a9671a0SJoel Fernandes if (!root) 3524a9671a0SJoel Fernandes goto out_free_roots; 3534a9671a0SJoel Fernandes 3544a9671a0SJoel Fernandes mark_free(mm, root); 3554a9671a0SJoel Fernandes 3564a9671a0SJoel Fernandes BUG_ON(root_count > mm->max_order); 357*ba110db8SJoel Fernandes BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size); 3584a9671a0SJoel Fernandes 3594a9671a0SJoel Fernandes mm->roots[root_count] = root; 3604a9671a0SJoel Fernandes 3614a9671a0SJoel Fernandes offset += root_size; 3624a9671a0SJoel Fernandes size -= root_size; 3634a9671a0SJoel Fernandes root_count++; 3644a9671a0SJoel Fernandes } while (size); 3654a9671a0SJoel Fernandes 3664a9671a0SJoel Fernandes return 0; 3674a9671a0SJoel Fernandes 3684a9671a0SJoel Fernandes out_free_roots: 3694a9671a0SJoel Fernandes while (root_count--) 370*ba110db8SJoel Fernandes gpu_block_free(mm, mm->roots[root_count]); 3714a9671a0SJoel Fernandes kfree(mm->roots); 3724a9671a0SJoel Fernandes out_free_tree: 3734a9671a0SJoel Fernandes while (i--) 3744a9671a0SJoel Fernandes kfree(mm->free_trees[i]); 3754a9671a0SJoel Fernandes kfree(mm->free_trees); 3764a9671a0SJoel Fernandes return -ENOMEM; 3774a9671a0SJoel Fernandes } 378*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_init); 3794a9671a0SJoel Fernandes 3804a9671a0SJoel Fernandes /** 381*ba110db8SJoel Fernandes * gpu_buddy_fini - tear down the memory manager 3824a9671a0SJoel Fernandes * 383*ba110db8SJoel Fernandes * @mm: GPU buddy manager to free 3844a9671a0SJoel Fernandes * 3854a9671a0SJoel Fernandes * Cleanup memory manager resources and the freetree 3864a9671a0SJoel Fernandes */ 387*ba110db8SJoel Fernandes void gpu_buddy_fini(struct gpu_buddy *mm) 3884a9671a0SJoel Fernandes { 3894a9671a0SJoel Fernandes u64 root_size, size, start; 3904a9671a0SJoel Fernandes unsigned int order; 3914a9671a0SJoel Fernandes int i; 3924a9671a0SJoel Fernandes 3934a9671a0SJoel Fernandes size = mm->size; 3944a9671a0SJoel Fernandes 3954a9671a0SJoel Fernandes for (i = 0; i < mm->n_roots; ++i) { 3964a9671a0SJoel Fernandes order = ilog2(size) - ilog2(mm->chunk_size); 397*ba110db8SJoel Fernandes start = gpu_buddy_block_offset(mm->roots[i]); 3984a9671a0SJoel Fernandes __force_merge(mm, start, start + size, order); 3994a9671a0SJoel Fernandes 400*ba110db8SJoel Fernandes if (WARN_ON(!gpu_buddy_block_is_free(mm->roots[i]))) 4014a9671a0SJoel Fernandes kunit_fail_current_test("buddy_fini() root"); 4024a9671a0SJoel Fernandes 403*ba110db8SJoel Fernandes gpu_block_free(mm, mm->roots[i]); 4044a9671a0SJoel Fernandes 4054a9671a0SJoel Fernandes root_size = mm->chunk_size << order; 4064a9671a0SJoel Fernandes size -= root_size; 4074a9671a0SJoel Fernandes } 4084a9671a0SJoel Fernandes 4094a9671a0SJoel Fernandes WARN_ON(mm->avail != mm->size); 4104a9671a0SJoel Fernandes 4114a9671a0SJoel Fernandes for_each_free_tree(i) 4124a9671a0SJoel Fernandes kfree(mm->free_trees[i]); 4134a9671a0SJoel Fernandes kfree(mm->free_trees); 4144a9671a0SJoel Fernandes kfree(mm->roots); 4154a9671a0SJoel Fernandes } 416*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_fini); 4174a9671a0SJoel Fernandes 418*ba110db8SJoel Fernandes static int split_block(struct gpu_buddy *mm, 419*ba110db8SJoel Fernandes struct gpu_buddy_block *block) 4204a9671a0SJoel Fernandes { 421*ba110db8SJoel Fernandes unsigned int block_order = gpu_buddy_block_order(block) - 1; 422*ba110db8SJoel Fernandes u64 offset = gpu_buddy_block_offset(block); 4234a9671a0SJoel Fernandes 424*ba110db8SJoel Fernandes BUG_ON(!gpu_buddy_block_is_free(block)); 425*ba110db8SJoel Fernandes BUG_ON(!gpu_buddy_block_order(block)); 4264a9671a0SJoel Fernandes 427*ba110db8SJoel Fernandes block->left = gpu_block_alloc(mm, block, block_order, offset); 4284a9671a0SJoel Fernandes if (!block->left) 4294a9671a0SJoel Fernandes return -ENOMEM; 4304a9671a0SJoel Fernandes 431*ba110db8SJoel Fernandes block->right = gpu_block_alloc(mm, block, block_order, 4324a9671a0SJoel Fernandes offset + (mm->chunk_size << block_order)); 4334a9671a0SJoel Fernandes if (!block->right) { 434*ba110db8SJoel Fernandes gpu_block_free(mm, block->left); 4354a9671a0SJoel Fernandes return -ENOMEM; 4364a9671a0SJoel Fernandes } 4374a9671a0SJoel Fernandes 4384a9671a0SJoel Fernandes mark_split(mm, block); 4394a9671a0SJoel Fernandes 440*ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) { 4414a9671a0SJoel Fernandes mark_cleared(block->left); 4424a9671a0SJoel Fernandes mark_cleared(block->right); 4434a9671a0SJoel Fernandes clear_reset(block); 4444a9671a0SJoel Fernandes } 4454a9671a0SJoel Fernandes 4464a9671a0SJoel Fernandes mark_free(mm, block->left); 4474a9671a0SJoel Fernandes mark_free(mm, block->right); 4484a9671a0SJoel Fernandes 4494a9671a0SJoel Fernandes return 0; 4504a9671a0SJoel Fernandes } 4514a9671a0SJoel Fernandes 4524a9671a0SJoel Fernandes /** 453*ba110db8SJoel Fernandes * gpu_get_buddy - get buddy address 4544a9671a0SJoel Fernandes * 455*ba110db8SJoel Fernandes * @block: GPU buddy block 4564a9671a0SJoel Fernandes * 4574a9671a0SJoel Fernandes * Returns the corresponding buddy block for @block, or NULL 4584a9671a0SJoel Fernandes * if this is a root block and can't be merged further. 4594a9671a0SJoel Fernandes * Requires some kind of locking to protect against 4604a9671a0SJoel Fernandes * any concurrent allocate and free operations. 4614a9671a0SJoel Fernandes */ 462*ba110db8SJoel Fernandes struct gpu_buddy_block * 463*ba110db8SJoel Fernandes gpu_get_buddy(struct gpu_buddy_block *block) 4644a9671a0SJoel Fernandes { 4654a9671a0SJoel Fernandes return __get_buddy(block); 4664a9671a0SJoel Fernandes } 467*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_get_buddy); 4684a9671a0SJoel Fernandes 4694a9671a0SJoel Fernandes /** 470*ba110db8SJoel Fernandes * gpu_buddy_reset_clear - reset blocks clear state 4714a9671a0SJoel Fernandes * 472*ba110db8SJoel Fernandes * @mm: GPU buddy manager 4734a9671a0SJoel Fernandes * @is_clear: blocks clear state 4744a9671a0SJoel Fernandes * 4754a9671a0SJoel Fernandes * Reset the clear state based on @is_clear value for each block 4764a9671a0SJoel Fernandes * in the freetree. 4774a9671a0SJoel Fernandes */ 478*ba110db8SJoel Fernandes void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear) 4794a9671a0SJoel Fernandes { 480*ba110db8SJoel Fernandes enum gpu_buddy_free_tree src_tree, dst_tree; 4814a9671a0SJoel Fernandes u64 root_size, size, start; 4824a9671a0SJoel Fernandes unsigned int order; 4834a9671a0SJoel Fernandes int i; 4844a9671a0SJoel Fernandes 4854a9671a0SJoel Fernandes size = mm->size; 4864a9671a0SJoel Fernandes for (i = 0; i < mm->n_roots; ++i) { 4874a9671a0SJoel Fernandes order = ilog2(size) - ilog2(mm->chunk_size); 488*ba110db8SJoel Fernandes start = gpu_buddy_block_offset(mm->roots[i]); 4894a9671a0SJoel Fernandes __force_merge(mm, start, start + size, order); 4904a9671a0SJoel Fernandes 4914a9671a0SJoel Fernandes root_size = mm->chunk_size << order; 4924a9671a0SJoel Fernandes size -= root_size; 4934a9671a0SJoel Fernandes } 4944a9671a0SJoel Fernandes 495*ba110db8SJoel Fernandes src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE; 496*ba110db8SJoel Fernandes dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; 4974a9671a0SJoel Fernandes 4984a9671a0SJoel Fernandes for (i = 0; i <= mm->max_order; ++i) { 4994a9671a0SJoel Fernandes struct rb_root *root = &mm->free_trees[src_tree][i]; 500*ba110db8SJoel Fernandes struct gpu_buddy_block *block, *tmp; 5014a9671a0SJoel Fernandes 5024a9671a0SJoel Fernandes rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { 5034a9671a0SJoel Fernandes rbtree_remove(mm, block); 5044a9671a0SJoel Fernandes if (is_clear) { 5054a9671a0SJoel Fernandes mark_cleared(block); 506*ba110db8SJoel Fernandes mm->clear_avail += gpu_buddy_block_size(mm, block); 5074a9671a0SJoel Fernandes } else { 5084a9671a0SJoel Fernandes clear_reset(block); 509*ba110db8SJoel Fernandes mm->clear_avail -= gpu_buddy_block_size(mm, block); 5104a9671a0SJoel Fernandes } 5114a9671a0SJoel Fernandes 5124a9671a0SJoel Fernandes rbtree_insert(mm, block, dst_tree); 5134a9671a0SJoel Fernandes } 5144a9671a0SJoel Fernandes } 5154a9671a0SJoel Fernandes } 516*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_reset_clear); 5174a9671a0SJoel Fernandes 5184a9671a0SJoel Fernandes /** 519*ba110db8SJoel Fernandes * gpu_buddy_free_block - free a block 5204a9671a0SJoel Fernandes * 521*ba110db8SJoel Fernandes * @mm: GPU buddy manager 5224a9671a0SJoel Fernandes * @block: block to be freed 5234a9671a0SJoel Fernandes */ 524*ba110db8SJoel Fernandes void gpu_buddy_free_block(struct gpu_buddy *mm, 525*ba110db8SJoel Fernandes struct gpu_buddy_block *block) 5264a9671a0SJoel Fernandes { 527*ba110db8SJoel Fernandes BUG_ON(!gpu_buddy_block_is_allocated(block)); 528*ba110db8SJoel Fernandes mm->avail += gpu_buddy_block_size(mm, block); 529*ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 530*ba110db8SJoel Fernandes mm->clear_avail += gpu_buddy_block_size(mm, block); 5314a9671a0SJoel Fernandes 532*ba110db8SJoel Fernandes __gpu_buddy_free(mm, block, false); 5334a9671a0SJoel Fernandes } 534*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_free_block); 5354a9671a0SJoel Fernandes 536*ba110db8SJoel Fernandes static void __gpu_buddy_free_list(struct gpu_buddy *mm, 5374a9671a0SJoel Fernandes struct list_head *objects, 5384a9671a0SJoel Fernandes bool mark_clear, 5394a9671a0SJoel Fernandes bool mark_dirty) 5404a9671a0SJoel Fernandes { 541*ba110db8SJoel Fernandes struct gpu_buddy_block *block, *on; 5424a9671a0SJoel Fernandes 5434a9671a0SJoel Fernandes WARN_ON(mark_dirty && mark_clear); 5444a9671a0SJoel Fernandes 5454a9671a0SJoel Fernandes list_for_each_entry_safe(block, on, objects, link) { 5464a9671a0SJoel Fernandes if (mark_clear) 5474a9671a0SJoel Fernandes mark_cleared(block); 5484a9671a0SJoel Fernandes else if (mark_dirty) 5494a9671a0SJoel Fernandes clear_reset(block); 550*ba110db8SJoel Fernandes gpu_buddy_free_block(mm, block); 5514a9671a0SJoel Fernandes cond_resched(); 5524a9671a0SJoel Fernandes } 5534a9671a0SJoel Fernandes INIT_LIST_HEAD(objects); 5544a9671a0SJoel Fernandes } 5554a9671a0SJoel Fernandes 556*ba110db8SJoel Fernandes static void gpu_buddy_free_list_internal(struct gpu_buddy *mm, 5574a9671a0SJoel Fernandes struct list_head *objects) 5584a9671a0SJoel Fernandes { 5594a9671a0SJoel Fernandes /* 5604a9671a0SJoel Fernandes * Don't touch the clear/dirty bit, since allocation is still internal 5614a9671a0SJoel Fernandes * at this point. For example we might have just failed part of the 5624a9671a0SJoel Fernandes * allocation. 5634a9671a0SJoel Fernandes */ 564*ba110db8SJoel Fernandes __gpu_buddy_free_list(mm, objects, false, false); 5654a9671a0SJoel Fernandes } 5664a9671a0SJoel Fernandes 5674a9671a0SJoel Fernandes /** 568*ba110db8SJoel Fernandes * gpu_buddy_free_list - free blocks 5694a9671a0SJoel Fernandes * 570*ba110db8SJoel Fernandes * @mm: GPU buddy manager 5714a9671a0SJoel Fernandes * @objects: input list head to free blocks 572*ba110db8SJoel Fernandes * @flags: optional flags like GPU_BUDDY_CLEARED 5734a9671a0SJoel Fernandes */ 574*ba110db8SJoel Fernandes void gpu_buddy_free_list(struct gpu_buddy *mm, 5754a9671a0SJoel Fernandes struct list_head *objects, 5764a9671a0SJoel Fernandes unsigned int flags) 5774a9671a0SJoel Fernandes { 578*ba110db8SJoel Fernandes bool mark_clear = flags & GPU_BUDDY_CLEARED; 5794a9671a0SJoel Fernandes 580*ba110db8SJoel Fernandes __gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear); 5814a9671a0SJoel Fernandes } 582*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_free_list); 5834a9671a0SJoel Fernandes 584*ba110db8SJoel Fernandes static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags) 5854a9671a0SJoel Fernandes { 586*ba110db8SJoel Fernandes bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION; 5874a9671a0SJoel Fernandes 588*ba110db8SJoel Fernandes return needs_clear != gpu_buddy_block_is_clear(block); 5894a9671a0SJoel Fernandes } 5904a9671a0SJoel Fernandes 591*ba110db8SJoel Fernandes static struct gpu_buddy_block * 592*ba110db8SJoel Fernandes __alloc_range_bias(struct gpu_buddy *mm, 5934a9671a0SJoel Fernandes u64 start, u64 end, 5944a9671a0SJoel Fernandes unsigned int order, 5954a9671a0SJoel Fernandes unsigned long flags, 5964a9671a0SJoel Fernandes bool fallback) 5974a9671a0SJoel Fernandes { 5984a9671a0SJoel Fernandes u64 req_size = mm->chunk_size << order; 599*ba110db8SJoel Fernandes struct gpu_buddy_block *block; 600*ba110db8SJoel Fernandes struct gpu_buddy_block *buddy; 6014a9671a0SJoel Fernandes LIST_HEAD(dfs); 6024a9671a0SJoel Fernandes int err; 6034a9671a0SJoel Fernandes int i; 6044a9671a0SJoel Fernandes 6054a9671a0SJoel Fernandes end = end - 1; 6064a9671a0SJoel Fernandes 6074a9671a0SJoel Fernandes for (i = 0; i < mm->n_roots; ++i) 6084a9671a0SJoel Fernandes list_add_tail(&mm->roots[i]->tmp_link, &dfs); 6094a9671a0SJoel Fernandes 6104a9671a0SJoel Fernandes do { 6114a9671a0SJoel Fernandes u64 block_start; 6124a9671a0SJoel Fernandes u64 block_end; 6134a9671a0SJoel Fernandes 6144a9671a0SJoel Fernandes block = list_first_entry_or_null(&dfs, 615*ba110db8SJoel Fernandes struct gpu_buddy_block, 6164a9671a0SJoel Fernandes tmp_link); 6174a9671a0SJoel Fernandes if (!block) 6184a9671a0SJoel Fernandes break; 6194a9671a0SJoel Fernandes 6204a9671a0SJoel Fernandes list_del(&block->tmp_link); 6214a9671a0SJoel Fernandes 622*ba110db8SJoel Fernandes if (gpu_buddy_block_order(block) < order) 6234a9671a0SJoel Fernandes continue; 6244a9671a0SJoel Fernandes 625*ba110db8SJoel Fernandes block_start = gpu_buddy_block_offset(block); 626*ba110db8SJoel Fernandes block_end = block_start + gpu_buddy_block_size(mm, block) - 1; 6274a9671a0SJoel Fernandes 6284a9671a0SJoel Fernandes if (!overlaps(start, end, block_start, block_end)) 6294a9671a0SJoel Fernandes continue; 6304a9671a0SJoel Fernandes 631*ba110db8SJoel Fernandes if (gpu_buddy_block_is_allocated(block)) 6324a9671a0SJoel Fernandes continue; 6334a9671a0SJoel Fernandes 6344a9671a0SJoel Fernandes if (block_start < start || block_end > end) { 6354a9671a0SJoel Fernandes u64 adjusted_start = max(block_start, start); 6364a9671a0SJoel Fernandes u64 adjusted_end = min(block_end, end); 6374a9671a0SJoel Fernandes 6384a9671a0SJoel Fernandes if (round_down(adjusted_end + 1, req_size) <= 6394a9671a0SJoel Fernandes round_up(adjusted_start, req_size)) 6404a9671a0SJoel Fernandes continue; 6414a9671a0SJoel Fernandes } 6424a9671a0SJoel Fernandes 6434a9671a0SJoel Fernandes if (!fallback && block_incompatible(block, flags)) 6444a9671a0SJoel Fernandes continue; 6454a9671a0SJoel Fernandes 6464a9671a0SJoel Fernandes if (contains(start, end, block_start, block_end) && 647*ba110db8SJoel Fernandes order == gpu_buddy_block_order(block)) { 6484a9671a0SJoel Fernandes /* 6494a9671a0SJoel Fernandes * Find the free block within the range. 6504a9671a0SJoel Fernandes */ 651*ba110db8SJoel Fernandes if (gpu_buddy_block_is_free(block)) 6524a9671a0SJoel Fernandes return block; 6534a9671a0SJoel Fernandes 6544a9671a0SJoel Fernandes continue; 6554a9671a0SJoel Fernandes } 6564a9671a0SJoel Fernandes 657*ba110db8SJoel Fernandes if (!gpu_buddy_block_is_split(block)) { 6584a9671a0SJoel Fernandes err = split_block(mm, block); 6594a9671a0SJoel Fernandes if (unlikely(err)) 6604a9671a0SJoel Fernandes goto err_undo; 6614a9671a0SJoel Fernandes } 6624a9671a0SJoel Fernandes 6634a9671a0SJoel Fernandes list_add(&block->right->tmp_link, &dfs); 6644a9671a0SJoel Fernandes list_add(&block->left->tmp_link, &dfs); 6654a9671a0SJoel Fernandes } while (1); 6664a9671a0SJoel Fernandes 6674a9671a0SJoel Fernandes return ERR_PTR(-ENOSPC); 6684a9671a0SJoel Fernandes 6694a9671a0SJoel Fernandes err_undo: 6704a9671a0SJoel Fernandes /* 6714a9671a0SJoel Fernandes * We really don't want to leave around a bunch of split blocks, since 6724a9671a0SJoel Fernandes * bigger is better, so make sure we merge everything back before we 6734a9671a0SJoel Fernandes * free the allocated blocks. 6744a9671a0SJoel Fernandes */ 6754a9671a0SJoel Fernandes buddy = __get_buddy(block); 6764a9671a0SJoel Fernandes if (buddy && 677*ba110db8SJoel Fernandes (gpu_buddy_block_is_free(block) && 678*ba110db8SJoel Fernandes gpu_buddy_block_is_free(buddy))) 679*ba110db8SJoel Fernandes __gpu_buddy_free(mm, block, false); 6804a9671a0SJoel Fernandes return ERR_PTR(err); 6814a9671a0SJoel Fernandes } 6824a9671a0SJoel Fernandes 683*ba110db8SJoel Fernandes static struct gpu_buddy_block * 684*ba110db8SJoel Fernandes __gpu_buddy_alloc_range_bias(struct gpu_buddy *mm, 6854a9671a0SJoel Fernandes u64 start, u64 end, 6864a9671a0SJoel Fernandes unsigned int order, 6874a9671a0SJoel Fernandes unsigned long flags) 6884a9671a0SJoel Fernandes { 689*ba110db8SJoel Fernandes struct gpu_buddy_block *block; 6904a9671a0SJoel Fernandes bool fallback = false; 6914a9671a0SJoel Fernandes 6924a9671a0SJoel Fernandes block = __alloc_range_bias(mm, start, end, order, 6934a9671a0SJoel Fernandes flags, fallback); 6944a9671a0SJoel Fernandes if (IS_ERR(block)) 6954a9671a0SJoel Fernandes return __alloc_range_bias(mm, start, end, order, 6964a9671a0SJoel Fernandes flags, !fallback); 6974a9671a0SJoel Fernandes 6984a9671a0SJoel Fernandes return block; 6994a9671a0SJoel Fernandes } 7004a9671a0SJoel Fernandes 701*ba110db8SJoel Fernandes static struct gpu_buddy_block * 702*ba110db8SJoel Fernandes get_maxblock(struct gpu_buddy *mm, 7034a9671a0SJoel Fernandes unsigned int order, 704*ba110db8SJoel Fernandes enum gpu_buddy_free_tree tree) 7054a9671a0SJoel Fernandes { 706*ba110db8SJoel Fernandes struct gpu_buddy_block *max_block = NULL, *block = NULL; 7074a9671a0SJoel Fernandes struct rb_root *root; 7084a9671a0SJoel Fernandes unsigned int i; 7094a9671a0SJoel Fernandes 7104a9671a0SJoel Fernandes for (i = order; i <= mm->max_order; ++i) { 7114a9671a0SJoel Fernandes root = &mm->free_trees[tree][i]; 7124a9671a0SJoel Fernandes block = rbtree_last_free_block(root); 7134a9671a0SJoel Fernandes if (!block) 7144a9671a0SJoel Fernandes continue; 7154a9671a0SJoel Fernandes 7164a9671a0SJoel Fernandes if (!max_block) { 7174a9671a0SJoel Fernandes max_block = block; 7184a9671a0SJoel Fernandes continue; 7194a9671a0SJoel Fernandes } 7204a9671a0SJoel Fernandes 721*ba110db8SJoel Fernandes if (gpu_buddy_block_offset(block) > 722*ba110db8SJoel Fernandes gpu_buddy_block_offset(max_block)) { 7234a9671a0SJoel Fernandes max_block = block; 7244a9671a0SJoel Fernandes } 7254a9671a0SJoel Fernandes } 7264a9671a0SJoel Fernandes 7274a9671a0SJoel Fernandes return max_block; 7284a9671a0SJoel Fernandes } 7294a9671a0SJoel Fernandes 730*ba110db8SJoel Fernandes static struct gpu_buddy_block * 731*ba110db8SJoel Fernandes alloc_from_freetree(struct gpu_buddy *mm, 7324a9671a0SJoel Fernandes unsigned int order, 7334a9671a0SJoel Fernandes unsigned long flags) 7344a9671a0SJoel Fernandes { 735*ba110db8SJoel Fernandes struct gpu_buddy_block *block = NULL; 7364a9671a0SJoel Fernandes struct rb_root *root; 737*ba110db8SJoel Fernandes enum gpu_buddy_free_tree tree; 7384a9671a0SJoel Fernandes unsigned int tmp; 7394a9671a0SJoel Fernandes int err; 7404a9671a0SJoel Fernandes 741*ba110db8SJoel Fernandes tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ? 742*ba110db8SJoel Fernandes GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE; 7434a9671a0SJoel Fernandes 744*ba110db8SJoel Fernandes if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) { 7454a9671a0SJoel Fernandes block = get_maxblock(mm, order, tree); 7464a9671a0SJoel Fernandes if (block) 7474a9671a0SJoel Fernandes /* Store the obtained block order */ 748*ba110db8SJoel Fernandes tmp = gpu_buddy_block_order(block); 7494a9671a0SJoel Fernandes } else { 7504a9671a0SJoel Fernandes for (tmp = order; tmp <= mm->max_order; ++tmp) { 7514a9671a0SJoel Fernandes /* Get RB tree root for this order and tree */ 7524a9671a0SJoel Fernandes root = &mm->free_trees[tree][tmp]; 7534a9671a0SJoel Fernandes block = rbtree_last_free_block(root); 7544a9671a0SJoel Fernandes if (block) 7554a9671a0SJoel Fernandes break; 7564a9671a0SJoel Fernandes } 7574a9671a0SJoel Fernandes } 7584a9671a0SJoel Fernandes 7594a9671a0SJoel Fernandes if (!block) { 7604a9671a0SJoel Fernandes /* Try allocating from the other tree */ 761*ba110db8SJoel Fernandes tree = (tree == GPU_BUDDY_CLEAR_TREE) ? 762*ba110db8SJoel Fernandes GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE; 7634a9671a0SJoel Fernandes 7644a9671a0SJoel Fernandes for (tmp = order; tmp <= mm->max_order; ++tmp) { 7654a9671a0SJoel Fernandes root = &mm->free_trees[tree][tmp]; 7664a9671a0SJoel Fernandes block = rbtree_last_free_block(root); 7674a9671a0SJoel Fernandes if (block) 7684a9671a0SJoel Fernandes break; 7694a9671a0SJoel Fernandes } 7704a9671a0SJoel Fernandes 7714a9671a0SJoel Fernandes if (!block) 7724a9671a0SJoel Fernandes return ERR_PTR(-ENOSPC); 7734a9671a0SJoel Fernandes } 7744a9671a0SJoel Fernandes 775*ba110db8SJoel Fernandes BUG_ON(!gpu_buddy_block_is_free(block)); 7764a9671a0SJoel Fernandes 7774a9671a0SJoel Fernandes while (tmp != order) { 7784a9671a0SJoel Fernandes err = split_block(mm, block); 7794a9671a0SJoel Fernandes if (unlikely(err)) 7804a9671a0SJoel Fernandes goto err_undo; 7814a9671a0SJoel Fernandes 7824a9671a0SJoel Fernandes block = block->right; 7834a9671a0SJoel Fernandes tmp--; 7844a9671a0SJoel Fernandes } 7854a9671a0SJoel Fernandes return block; 7864a9671a0SJoel Fernandes 7874a9671a0SJoel Fernandes err_undo: 7884a9671a0SJoel Fernandes if (tmp != order) 789*ba110db8SJoel Fernandes __gpu_buddy_free(mm, block, false); 7904a9671a0SJoel Fernandes return ERR_PTR(err); 7914a9671a0SJoel Fernandes } 7924a9671a0SJoel Fernandes 793*ba110db8SJoel Fernandes static int __alloc_range(struct gpu_buddy *mm, 7944a9671a0SJoel Fernandes struct list_head *dfs, 7954a9671a0SJoel Fernandes u64 start, u64 size, 7964a9671a0SJoel Fernandes struct list_head *blocks, 7974a9671a0SJoel Fernandes u64 *total_allocated_on_err) 7984a9671a0SJoel Fernandes { 799*ba110db8SJoel Fernandes struct gpu_buddy_block *block; 800*ba110db8SJoel Fernandes struct gpu_buddy_block *buddy; 8014a9671a0SJoel Fernandes u64 total_allocated = 0; 8024a9671a0SJoel Fernandes LIST_HEAD(allocated); 8034a9671a0SJoel Fernandes u64 end; 8044a9671a0SJoel Fernandes int err; 8054a9671a0SJoel Fernandes 8064a9671a0SJoel Fernandes end = start + size - 1; 8074a9671a0SJoel Fernandes 8084a9671a0SJoel Fernandes do { 8094a9671a0SJoel Fernandes u64 block_start; 8104a9671a0SJoel Fernandes u64 block_end; 8114a9671a0SJoel Fernandes 8124a9671a0SJoel Fernandes block = list_first_entry_or_null(dfs, 813*ba110db8SJoel Fernandes struct gpu_buddy_block, 8144a9671a0SJoel Fernandes tmp_link); 8154a9671a0SJoel Fernandes if (!block) 8164a9671a0SJoel Fernandes break; 8174a9671a0SJoel Fernandes 8184a9671a0SJoel Fernandes list_del(&block->tmp_link); 8194a9671a0SJoel Fernandes 820*ba110db8SJoel Fernandes block_start = gpu_buddy_block_offset(block); 821*ba110db8SJoel Fernandes block_end = block_start + gpu_buddy_block_size(mm, block) - 1; 8224a9671a0SJoel Fernandes 8234a9671a0SJoel Fernandes if (!overlaps(start, end, block_start, block_end)) 8244a9671a0SJoel Fernandes continue; 8254a9671a0SJoel Fernandes 826*ba110db8SJoel Fernandes if (gpu_buddy_block_is_allocated(block)) { 8274a9671a0SJoel Fernandes err = -ENOSPC; 8284a9671a0SJoel Fernandes goto err_free; 8294a9671a0SJoel Fernandes } 8304a9671a0SJoel Fernandes 8314a9671a0SJoel Fernandes if (contains(start, end, block_start, block_end)) { 832*ba110db8SJoel Fernandes if (gpu_buddy_block_is_free(block)) { 8334a9671a0SJoel Fernandes mark_allocated(mm, block); 834*ba110db8SJoel Fernandes total_allocated += gpu_buddy_block_size(mm, block); 835*ba110db8SJoel Fernandes mm->avail -= gpu_buddy_block_size(mm, block); 836*ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 837*ba110db8SJoel Fernandes mm->clear_avail -= gpu_buddy_block_size(mm, block); 8384a9671a0SJoel Fernandes list_add_tail(&block->link, &allocated); 8394a9671a0SJoel Fernandes continue; 8404a9671a0SJoel Fernandes } else if (!mm->clear_avail) { 8414a9671a0SJoel Fernandes err = -ENOSPC; 8424a9671a0SJoel Fernandes goto err_free; 8434a9671a0SJoel Fernandes } 8444a9671a0SJoel Fernandes } 8454a9671a0SJoel Fernandes 846*ba110db8SJoel Fernandes if (!gpu_buddy_block_is_split(block)) { 8474a9671a0SJoel Fernandes err = split_block(mm, block); 8484a9671a0SJoel Fernandes if (unlikely(err)) 8494a9671a0SJoel Fernandes goto err_undo; 8504a9671a0SJoel Fernandes } 8514a9671a0SJoel Fernandes 8524a9671a0SJoel Fernandes list_add(&block->right->tmp_link, dfs); 8534a9671a0SJoel Fernandes list_add(&block->left->tmp_link, dfs); 8544a9671a0SJoel Fernandes } while (1); 8554a9671a0SJoel Fernandes 8564a9671a0SJoel Fernandes if (total_allocated < size) { 8574a9671a0SJoel Fernandes err = -ENOSPC; 8584a9671a0SJoel Fernandes goto err_free; 8594a9671a0SJoel Fernandes } 8604a9671a0SJoel Fernandes 8614a9671a0SJoel Fernandes list_splice_tail(&allocated, blocks); 8624a9671a0SJoel Fernandes 8634a9671a0SJoel Fernandes return 0; 8644a9671a0SJoel Fernandes 8654a9671a0SJoel Fernandes err_undo: 8664a9671a0SJoel Fernandes /* 8674a9671a0SJoel Fernandes * We really don't want to leave around a bunch of split blocks, since 8684a9671a0SJoel Fernandes * bigger is better, so make sure we merge everything back before we 8694a9671a0SJoel Fernandes * free the allocated blocks. 8704a9671a0SJoel Fernandes */ 8714a9671a0SJoel Fernandes buddy = __get_buddy(block); 8724a9671a0SJoel Fernandes if (buddy && 873*ba110db8SJoel Fernandes (gpu_buddy_block_is_free(block) && 874*ba110db8SJoel Fernandes gpu_buddy_block_is_free(buddy))) 875*ba110db8SJoel Fernandes __gpu_buddy_free(mm, block, false); 8764a9671a0SJoel Fernandes 8774a9671a0SJoel Fernandes err_free: 8784a9671a0SJoel Fernandes if (err == -ENOSPC && total_allocated_on_err) { 8794a9671a0SJoel Fernandes list_splice_tail(&allocated, blocks); 8804a9671a0SJoel Fernandes *total_allocated_on_err = total_allocated; 8814a9671a0SJoel Fernandes } else { 882*ba110db8SJoel Fernandes gpu_buddy_free_list_internal(mm, &allocated); 8834a9671a0SJoel Fernandes } 8844a9671a0SJoel Fernandes 8854a9671a0SJoel Fernandes return err; 8864a9671a0SJoel Fernandes } 8874a9671a0SJoel Fernandes 888*ba110db8SJoel Fernandes static int __gpu_buddy_alloc_range(struct gpu_buddy *mm, 8894a9671a0SJoel Fernandes u64 start, 8904a9671a0SJoel Fernandes u64 size, 8914a9671a0SJoel Fernandes u64 *total_allocated_on_err, 8924a9671a0SJoel Fernandes struct list_head *blocks) 8934a9671a0SJoel Fernandes { 8944a9671a0SJoel Fernandes LIST_HEAD(dfs); 8954a9671a0SJoel Fernandes int i; 8964a9671a0SJoel Fernandes 8974a9671a0SJoel Fernandes for (i = 0; i < mm->n_roots; ++i) 8984a9671a0SJoel Fernandes list_add_tail(&mm->roots[i]->tmp_link, &dfs); 8994a9671a0SJoel Fernandes 9004a9671a0SJoel Fernandes return __alloc_range(mm, &dfs, start, size, 9014a9671a0SJoel Fernandes blocks, total_allocated_on_err); 9024a9671a0SJoel Fernandes } 9034a9671a0SJoel Fernandes 904*ba110db8SJoel Fernandes static int __alloc_contig_try_harder(struct gpu_buddy *mm, 9054a9671a0SJoel Fernandes u64 size, 9064a9671a0SJoel Fernandes u64 min_block_size, 9074a9671a0SJoel Fernandes struct list_head *blocks) 9084a9671a0SJoel Fernandes { 9094a9671a0SJoel Fernandes u64 rhs_offset, lhs_offset, lhs_size, filled; 910*ba110db8SJoel Fernandes struct gpu_buddy_block *block; 9114a9671a0SJoel Fernandes unsigned int tree, order; 9124a9671a0SJoel Fernandes LIST_HEAD(blocks_lhs); 9134a9671a0SJoel Fernandes unsigned long pages; 9144a9671a0SJoel Fernandes u64 modify_size; 9154a9671a0SJoel Fernandes int err; 9164a9671a0SJoel Fernandes 9174a9671a0SJoel Fernandes modify_size = rounddown_pow_of_two(size); 9184a9671a0SJoel Fernandes pages = modify_size >> ilog2(mm->chunk_size); 9194a9671a0SJoel Fernandes order = fls(pages) - 1; 9204a9671a0SJoel Fernandes if (order == 0) 9214a9671a0SJoel Fernandes return -ENOSPC; 9224a9671a0SJoel Fernandes 9234a9671a0SJoel Fernandes for_each_free_tree(tree) { 9244a9671a0SJoel Fernandes struct rb_root *root; 9254a9671a0SJoel Fernandes struct rb_node *iter; 9264a9671a0SJoel Fernandes 9274a9671a0SJoel Fernandes root = &mm->free_trees[tree][order]; 9284a9671a0SJoel Fernandes if (rbtree_is_empty(root)) 9294a9671a0SJoel Fernandes continue; 9304a9671a0SJoel Fernandes 9314a9671a0SJoel Fernandes iter = rb_last(root); 9324a9671a0SJoel Fernandes while (iter) { 9334a9671a0SJoel Fernandes block = rbtree_get_free_block(iter); 9344a9671a0SJoel Fernandes 9354a9671a0SJoel Fernandes /* Allocate blocks traversing RHS */ 936*ba110db8SJoel Fernandes rhs_offset = gpu_buddy_block_offset(block); 937*ba110db8SJoel Fernandes err = __gpu_buddy_alloc_range(mm, rhs_offset, size, 9384a9671a0SJoel Fernandes &filled, blocks); 9394a9671a0SJoel Fernandes if (!err || err != -ENOSPC) 9404a9671a0SJoel Fernandes return err; 9414a9671a0SJoel Fernandes 9424a9671a0SJoel Fernandes lhs_size = max((size - filled), min_block_size); 9434a9671a0SJoel Fernandes if (!IS_ALIGNED(lhs_size, min_block_size)) 9444a9671a0SJoel Fernandes lhs_size = round_up(lhs_size, min_block_size); 9454a9671a0SJoel Fernandes 9464a9671a0SJoel Fernandes /* Allocate blocks traversing LHS */ 947*ba110db8SJoel Fernandes lhs_offset = gpu_buddy_block_offset(block) - lhs_size; 948*ba110db8SJoel Fernandes err = __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size, 9494a9671a0SJoel Fernandes NULL, &blocks_lhs); 9504a9671a0SJoel Fernandes if (!err) { 9514a9671a0SJoel Fernandes list_splice(&blocks_lhs, blocks); 9524a9671a0SJoel Fernandes return 0; 9534a9671a0SJoel Fernandes } else if (err != -ENOSPC) { 954*ba110db8SJoel Fernandes gpu_buddy_free_list_internal(mm, blocks); 9554a9671a0SJoel Fernandes return err; 9564a9671a0SJoel Fernandes } 9574a9671a0SJoel Fernandes /* Free blocks for the next iteration */ 958*ba110db8SJoel Fernandes gpu_buddy_free_list_internal(mm, blocks); 9594a9671a0SJoel Fernandes 9604a9671a0SJoel Fernandes iter = rb_prev(iter); 9614a9671a0SJoel Fernandes } 9624a9671a0SJoel Fernandes } 9634a9671a0SJoel Fernandes 9644a9671a0SJoel Fernandes return -ENOSPC; 9654a9671a0SJoel Fernandes } 9664a9671a0SJoel Fernandes 9674a9671a0SJoel Fernandes /** 968*ba110db8SJoel Fernandes * gpu_buddy_block_trim - free unused pages 9694a9671a0SJoel Fernandes * 970*ba110db8SJoel Fernandes * @mm: GPU buddy manager 9714a9671a0SJoel Fernandes * @start: start address to begin the trimming. 9724a9671a0SJoel Fernandes * @new_size: original size requested 9734a9671a0SJoel Fernandes * @blocks: Input and output list of allocated blocks. 9744a9671a0SJoel Fernandes * MUST contain single block as input to be trimmed. 9754a9671a0SJoel Fernandes * On success will contain the newly allocated blocks 9764a9671a0SJoel Fernandes * making up the @new_size. Blocks always appear in 9774a9671a0SJoel Fernandes * ascending order 9784a9671a0SJoel Fernandes * 9794a9671a0SJoel Fernandes * For contiguous allocation, we round up the size to the nearest 9804a9671a0SJoel Fernandes * power of two value, drivers consume *actual* size, so remaining 9814a9671a0SJoel Fernandes * portions are unused and can be optionally freed with this function 9824a9671a0SJoel Fernandes * 9834a9671a0SJoel Fernandes * Returns: 9844a9671a0SJoel Fernandes * 0 on success, error code on failure. 9854a9671a0SJoel Fernandes */ 986*ba110db8SJoel Fernandes int gpu_buddy_block_trim(struct gpu_buddy *mm, 9874a9671a0SJoel Fernandes u64 *start, 9884a9671a0SJoel Fernandes u64 new_size, 9894a9671a0SJoel Fernandes struct list_head *blocks) 9904a9671a0SJoel Fernandes { 991*ba110db8SJoel Fernandes struct gpu_buddy_block *parent; 992*ba110db8SJoel Fernandes struct gpu_buddy_block *block; 9934a9671a0SJoel Fernandes u64 block_start, block_end; 9944a9671a0SJoel Fernandes LIST_HEAD(dfs); 9954a9671a0SJoel Fernandes u64 new_start; 9964a9671a0SJoel Fernandes int err; 9974a9671a0SJoel Fernandes 9984a9671a0SJoel Fernandes if (!list_is_singular(blocks)) 9994a9671a0SJoel Fernandes return -EINVAL; 10004a9671a0SJoel Fernandes 10014a9671a0SJoel Fernandes block = list_first_entry(blocks, 1002*ba110db8SJoel Fernandes struct gpu_buddy_block, 10034a9671a0SJoel Fernandes link); 10044a9671a0SJoel Fernandes 1005*ba110db8SJoel Fernandes block_start = gpu_buddy_block_offset(block); 1006*ba110db8SJoel Fernandes block_end = block_start + gpu_buddy_block_size(mm, block); 10074a9671a0SJoel Fernandes 1008*ba110db8SJoel Fernandes if (WARN_ON(!gpu_buddy_block_is_allocated(block))) 10094a9671a0SJoel Fernandes return -EINVAL; 10104a9671a0SJoel Fernandes 1011*ba110db8SJoel Fernandes if (new_size > gpu_buddy_block_size(mm, block)) 10124a9671a0SJoel Fernandes return -EINVAL; 10134a9671a0SJoel Fernandes 10144a9671a0SJoel Fernandes if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size)) 10154a9671a0SJoel Fernandes return -EINVAL; 10164a9671a0SJoel Fernandes 1017*ba110db8SJoel Fernandes if (new_size == gpu_buddy_block_size(mm, block)) 10184a9671a0SJoel Fernandes return 0; 10194a9671a0SJoel Fernandes 10204a9671a0SJoel Fernandes new_start = block_start; 10214a9671a0SJoel Fernandes if (start) { 10224a9671a0SJoel Fernandes new_start = *start; 10234a9671a0SJoel Fernandes 10244a9671a0SJoel Fernandes if (new_start < block_start) 10254a9671a0SJoel Fernandes return -EINVAL; 10264a9671a0SJoel Fernandes 10274a9671a0SJoel Fernandes if (!IS_ALIGNED(new_start, mm->chunk_size)) 10284a9671a0SJoel Fernandes return -EINVAL; 10294a9671a0SJoel Fernandes 10304a9671a0SJoel Fernandes if (range_overflows(new_start, new_size, block_end)) 10314a9671a0SJoel Fernandes return -EINVAL; 10324a9671a0SJoel Fernandes } 10334a9671a0SJoel Fernandes 10344a9671a0SJoel Fernandes list_del(&block->link); 10354a9671a0SJoel Fernandes mark_free(mm, block); 1036*ba110db8SJoel Fernandes mm->avail += gpu_buddy_block_size(mm, block); 1037*ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 1038*ba110db8SJoel Fernandes mm->clear_avail += gpu_buddy_block_size(mm, block); 10394a9671a0SJoel Fernandes 10404a9671a0SJoel Fernandes /* Prevent recursively freeing this node */ 10414a9671a0SJoel Fernandes parent = block->parent; 10424a9671a0SJoel Fernandes block->parent = NULL; 10434a9671a0SJoel Fernandes 10444a9671a0SJoel Fernandes list_add(&block->tmp_link, &dfs); 10454a9671a0SJoel Fernandes err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); 10464a9671a0SJoel Fernandes if (err) { 10474a9671a0SJoel Fernandes mark_allocated(mm, block); 1048*ba110db8SJoel Fernandes mm->avail -= gpu_buddy_block_size(mm, block); 1049*ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 1050*ba110db8SJoel Fernandes mm->clear_avail -= gpu_buddy_block_size(mm, block); 10514a9671a0SJoel Fernandes list_add(&block->link, blocks); 10524a9671a0SJoel Fernandes } 10534a9671a0SJoel Fernandes 10544a9671a0SJoel Fernandes block->parent = parent; 10554a9671a0SJoel Fernandes return err; 10564a9671a0SJoel Fernandes } 1057*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_block_trim); 10584a9671a0SJoel Fernandes 1059*ba110db8SJoel Fernandes static struct gpu_buddy_block * 1060*ba110db8SJoel Fernandes __gpu_buddy_alloc_blocks(struct gpu_buddy *mm, 10614a9671a0SJoel Fernandes u64 start, u64 end, 10624a9671a0SJoel Fernandes unsigned int order, 10634a9671a0SJoel Fernandes unsigned long flags) 10644a9671a0SJoel Fernandes { 1065*ba110db8SJoel Fernandes if (flags & GPU_BUDDY_RANGE_ALLOCATION) 10664a9671a0SJoel Fernandes /* Allocate traversing within the range */ 1067*ba110db8SJoel Fernandes return __gpu_buddy_alloc_range_bias(mm, start, end, 10684a9671a0SJoel Fernandes order, flags); 10694a9671a0SJoel Fernandes else 10704a9671a0SJoel Fernandes /* Allocate from freetree */ 10714a9671a0SJoel Fernandes return alloc_from_freetree(mm, order, flags); 10724a9671a0SJoel Fernandes } 10734a9671a0SJoel Fernandes 10744a9671a0SJoel Fernandes /** 1075*ba110db8SJoel Fernandes * gpu_buddy_alloc_blocks - allocate power-of-two blocks 10764a9671a0SJoel Fernandes * 1077*ba110db8SJoel Fernandes * @mm: GPU buddy manager to allocate from 10784a9671a0SJoel Fernandes * @start: start of the allowed range for this block 10794a9671a0SJoel Fernandes * @end: end of the allowed range for this block 10804a9671a0SJoel Fernandes * @size: size of the allocation in bytes 10814a9671a0SJoel Fernandes * @min_block_size: alignment of the allocation 10824a9671a0SJoel Fernandes * @blocks: output list head to add allocated blocks 1083*ba110db8SJoel Fernandes * @flags: GPU_BUDDY_*_ALLOCATION flags 10844a9671a0SJoel Fernandes * 10854a9671a0SJoel Fernandes * alloc_range_bias() called on range limitations, which traverses 10864a9671a0SJoel Fernandes * the tree and returns the desired block. 10874a9671a0SJoel Fernandes * 10884a9671a0SJoel Fernandes * alloc_from_freetree() called when *no* range restrictions 10894a9671a0SJoel Fernandes * are enforced, which picks the block from the freetree. 10904a9671a0SJoel Fernandes * 10914a9671a0SJoel Fernandes * Returns: 10924a9671a0SJoel Fernandes * 0 on success, error code on failure. 10934a9671a0SJoel Fernandes */ 1094*ba110db8SJoel Fernandes int gpu_buddy_alloc_blocks(struct gpu_buddy *mm, 10954a9671a0SJoel Fernandes u64 start, u64 end, u64 size, 10964a9671a0SJoel Fernandes u64 min_block_size, 10974a9671a0SJoel Fernandes struct list_head *blocks, 10984a9671a0SJoel Fernandes unsigned long flags) 10994a9671a0SJoel Fernandes { 1100*ba110db8SJoel Fernandes struct gpu_buddy_block *block = NULL; 11014a9671a0SJoel Fernandes u64 original_size, original_min_size; 11024a9671a0SJoel Fernandes unsigned int min_order, order; 11034a9671a0SJoel Fernandes LIST_HEAD(allocated); 11044a9671a0SJoel Fernandes unsigned long pages; 11054a9671a0SJoel Fernandes int err; 11064a9671a0SJoel Fernandes 11074a9671a0SJoel Fernandes if (size < mm->chunk_size) 11084a9671a0SJoel Fernandes return -EINVAL; 11094a9671a0SJoel Fernandes 11104a9671a0SJoel Fernandes if (min_block_size < mm->chunk_size) 11114a9671a0SJoel Fernandes return -EINVAL; 11124a9671a0SJoel Fernandes 11134a9671a0SJoel Fernandes if (!is_power_of_2(min_block_size)) 11144a9671a0SJoel Fernandes return -EINVAL; 11154a9671a0SJoel Fernandes 11164a9671a0SJoel Fernandes if (!IS_ALIGNED(start | end | size, mm->chunk_size)) 11174a9671a0SJoel Fernandes return -EINVAL; 11184a9671a0SJoel Fernandes 11194a9671a0SJoel Fernandes if (end > mm->size) 11204a9671a0SJoel Fernandes return -EINVAL; 11214a9671a0SJoel Fernandes 11224a9671a0SJoel Fernandes if (range_overflows(start, size, mm->size)) 11234a9671a0SJoel Fernandes return -EINVAL; 11244a9671a0SJoel Fernandes 11254a9671a0SJoel Fernandes /* Actual range allocation */ 11264a9671a0SJoel Fernandes if (start + size == end) { 11274a9671a0SJoel Fernandes if (!IS_ALIGNED(start | end, min_block_size)) 11284a9671a0SJoel Fernandes return -EINVAL; 11294a9671a0SJoel Fernandes 1130*ba110db8SJoel Fernandes return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks); 11314a9671a0SJoel Fernandes } 11324a9671a0SJoel Fernandes 11334a9671a0SJoel Fernandes original_size = size; 11344a9671a0SJoel Fernandes original_min_size = min_block_size; 11354a9671a0SJoel Fernandes 11364a9671a0SJoel Fernandes /* Roundup the size to power of 2 */ 1137*ba110db8SJoel Fernandes if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) { 11384a9671a0SJoel Fernandes size = roundup_pow_of_two(size); 11394a9671a0SJoel Fernandes min_block_size = size; 11404a9671a0SJoel Fernandes /* Align size value to min_block_size */ 11414a9671a0SJoel Fernandes } else if (!IS_ALIGNED(size, min_block_size)) { 11424a9671a0SJoel Fernandes size = round_up(size, min_block_size); 11434a9671a0SJoel Fernandes } 11444a9671a0SJoel Fernandes 11454a9671a0SJoel Fernandes pages = size >> ilog2(mm->chunk_size); 11464a9671a0SJoel Fernandes order = fls(pages) - 1; 11474a9671a0SJoel Fernandes min_order = ilog2(min_block_size) - ilog2(mm->chunk_size); 11484a9671a0SJoel Fernandes 11494a9671a0SJoel Fernandes if (order > mm->max_order || size > mm->size) { 1150*ba110db8SJoel Fernandes if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) && 1151*ba110db8SJoel Fernandes !(flags & GPU_BUDDY_RANGE_ALLOCATION)) 11524a9671a0SJoel Fernandes return __alloc_contig_try_harder(mm, original_size, 11534a9671a0SJoel Fernandes original_min_size, blocks); 11544a9671a0SJoel Fernandes 11554a9671a0SJoel Fernandes return -EINVAL; 11564a9671a0SJoel Fernandes } 11574a9671a0SJoel Fernandes 11584a9671a0SJoel Fernandes do { 11594a9671a0SJoel Fernandes order = min(order, (unsigned int)fls(pages) - 1); 11604a9671a0SJoel Fernandes BUG_ON(order > mm->max_order); 11614a9671a0SJoel Fernandes BUG_ON(order < min_order); 11624a9671a0SJoel Fernandes 11634a9671a0SJoel Fernandes do { 1164*ba110db8SJoel Fernandes block = __gpu_buddy_alloc_blocks(mm, start, 11654a9671a0SJoel Fernandes end, 11664a9671a0SJoel Fernandes order, 11674a9671a0SJoel Fernandes flags); 11684a9671a0SJoel Fernandes if (!IS_ERR(block)) 11694a9671a0SJoel Fernandes break; 11704a9671a0SJoel Fernandes 11714a9671a0SJoel Fernandes if (order-- == min_order) { 11724a9671a0SJoel Fernandes /* Try allocation through force merge method */ 11734a9671a0SJoel Fernandes if (mm->clear_avail && 11744a9671a0SJoel Fernandes !__force_merge(mm, start, end, min_order)) { 1175*ba110db8SJoel Fernandes block = __gpu_buddy_alloc_blocks(mm, start, 11764a9671a0SJoel Fernandes end, 11774a9671a0SJoel Fernandes min_order, 11784a9671a0SJoel Fernandes flags); 11794a9671a0SJoel Fernandes if (!IS_ERR(block)) { 11804a9671a0SJoel Fernandes order = min_order; 11814a9671a0SJoel Fernandes break; 11824a9671a0SJoel Fernandes } 11834a9671a0SJoel Fernandes } 11844a9671a0SJoel Fernandes 11854a9671a0SJoel Fernandes /* 11864a9671a0SJoel Fernandes * Try contiguous block allocation through 11874a9671a0SJoel Fernandes * try harder method. 11884a9671a0SJoel Fernandes */ 1189*ba110db8SJoel Fernandes if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION && 1190*ba110db8SJoel Fernandes !(flags & GPU_BUDDY_RANGE_ALLOCATION)) 11914a9671a0SJoel Fernandes return __alloc_contig_try_harder(mm, 11924a9671a0SJoel Fernandes original_size, 11934a9671a0SJoel Fernandes original_min_size, 11944a9671a0SJoel Fernandes blocks); 11954a9671a0SJoel Fernandes err = -ENOSPC; 11964a9671a0SJoel Fernandes goto err_free; 11974a9671a0SJoel Fernandes } 11984a9671a0SJoel Fernandes } while (1); 11994a9671a0SJoel Fernandes 12004a9671a0SJoel Fernandes mark_allocated(mm, block); 1201*ba110db8SJoel Fernandes mm->avail -= gpu_buddy_block_size(mm, block); 1202*ba110db8SJoel Fernandes if (gpu_buddy_block_is_clear(block)) 1203*ba110db8SJoel Fernandes mm->clear_avail -= gpu_buddy_block_size(mm, block); 12044a9671a0SJoel Fernandes kmemleak_update_trace(block); 12054a9671a0SJoel Fernandes list_add_tail(&block->link, &allocated); 12064a9671a0SJoel Fernandes 12074a9671a0SJoel Fernandes pages -= BIT(order); 12084a9671a0SJoel Fernandes 12094a9671a0SJoel Fernandes if (!pages) 12104a9671a0SJoel Fernandes break; 12114a9671a0SJoel Fernandes } while (1); 12124a9671a0SJoel Fernandes 12134a9671a0SJoel Fernandes /* Trim the allocated block to the required size */ 1214*ba110db8SJoel Fernandes if (!(flags & GPU_BUDDY_TRIM_DISABLE) && 12154a9671a0SJoel Fernandes original_size != size) { 12164a9671a0SJoel Fernandes struct list_head *trim_list; 12174a9671a0SJoel Fernandes LIST_HEAD(temp); 12184a9671a0SJoel Fernandes u64 trim_size; 12194a9671a0SJoel Fernandes 12204a9671a0SJoel Fernandes trim_list = &allocated; 12214a9671a0SJoel Fernandes trim_size = original_size; 12224a9671a0SJoel Fernandes 12234a9671a0SJoel Fernandes if (!list_is_singular(&allocated)) { 12244a9671a0SJoel Fernandes block = list_last_entry(&allocated, typeof(*block), link); 12254a9671a0SJoel Fernandes list_move(&block->link, &temp); 12264a9671a0SJoel Fernandes trim_list = &temp; 1227*ba110db8SJoel Fernandes trim_size = gpu_buddy_block_size(mm, block) - 12284a9671a0SJoel Fernandes (size - original_size); 12294a9671a0SJoel Fernandes } 12304a9671a0SJoel Fernandes 1231*ba110db8SJoel Fernandes gpu_buddy_block_trim(mm, 12324a9671a0SJoel Fernandes NULL, 12334a9671a0SJoel Fernandes trim_size, 12344a9671a0SJoel Fernandes trim_list); 12354a9671a0SJoel Fernandes 12364a9671a0SJoel Fernandes if (!list_empty(&temp)) 12374a9671a0SJoel Fernandes list_splice_tail(trim_list, &allocated); 12384a9671a0SJoel Fernandes } 12394a9671a0SJoel Fernandes 12404a9671a0SJoel Fernandes list_splice_tail(&allocated, blocks); 12414a9671a0SJoel Fernandes return 0; 12424a9671a0SJoel Fernandes 12434a9671a0SJoel Fernandes err_free: 1244*ba110db8SJoel Fernandes gpu_buddy_free_list_internal(mm, &allocated); 12454a9671a0SJoel Fernandes return err; 12464a9671a0SJoel Fernandes } 1247*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_alloc_blocks); 12484a9671a0SJoel Fernandes 12494a9671a0SJoel Fernandes /** 1250*ba110db8SJoel Fernandes * gpu_buddy_block_print - print block information 12514a9671a0SJoel Fernandes * 1252*ba110db8SJoel Fernandes * @mm: GPU buddy manager 1253*ba110db8SJoel Fernandes * @block: GPU buddy block 12544a9671a0SJoel Fernandes */ 1255*ba110db8SJoel Fernandes void gpu_buddy_block_print(struct gpu_buddy *mm, 1256*ba110db8SJoel Fernandes struct gpu_buddy_block *block) 12574a9671a0SJoel Fernandes { 1258*ba110db8SJoel Fernandes u64 start = gpu_buddy_block_offset(block); 1259*ba110db8SJoel Fernandes u64 size = gpu_buddy_block_size(mm, block); 12604a9671a0SJoel Fernandes 1261*ba110db8SJoel Fernandes pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size); 12624a9671a0SJoel Fernandes } 1263*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_block_print); 12644a9671a0SJoel Fernandes 12654a9671a0SJoel Fernandes /** 1266*ba110db8SJoel Fernandes * gpu_buddy_print - print allocator state 12674a9671a0SJoel Fernandes * 1268*ba110db8SJoel Fernandes * @mm: GPU buddy manager 1269*ba110db8SJoel Fernandes * @p: GPU printer to use 12704a9671a0SJoel Fernandes */ 1271*ba110db8SJoel Fernandes void gpu_buddy_print(struct gpu_buddy *mm) 12724a9671a0SJoel Fernandes { 12734a9671a0SJoel Fernandes int order; 12744a9671a0SJoel Fernandes 1275*ba110db8SJoel Fernandes pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n", 12764a9671a0SJoel Fernandes mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20); 12774a9671a0SJoel Fernandes 12784a9671a0SJoel Fernandes for (order = mm->max_order; order >= 0; order--) { 1279*ba110db8SJoel Fernandes struct gpu_buddy_block *block, *tmp; 12804a9671a0SJoel Fernandes struct rb_root *root; 12814a9671a0SJoel Fernandes u64 count = 0, free; 12824a9671a0SJoel Fernandes unsigned int tree; 12834a9671a0SJoel Fernandes 12844a9671a0SJoel Fernandes for_each_free_tree(tree) { 12854a9671a0SJoel Fernandes root = &mm->free_trees[tree][order]; 12864a9671a0SJoel Fernandes 12874a9671a0SJoel Fernandes rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) { 1288*ba110db8SJoel Fernandes BUG_ON(!gpu_buddy_block_is_free(block)); 12894a9671a0SJoel Fernandes count++; 12904a9671a0SJoel Fernandes } 12914a9671a0SJoel Fernandes } 12924a9671a0SJoel Fernandes 12934a9671a0SJoel Fernandes free = count * (mm->chunk_size << order); 12944a9671a0SJoel Fernandes if (free < SZ_1M) 1295*ba110db8SJoel Fernandes pr_info("order-%2d free: %8llu KiB, blocks: %llu\n", 1296*ba110db8SJoel Fernandes order, free >> 10, count); 12974a9671a0SJoel Fernandes else 1298*ba110db8SJoel Fernandes pr_info("order-%2d free: %8llu MiB, blocks: %llu\n", 1299*ba110db8SJoel Fernandes order, free >> 20, count); 13004a9671a0SJoel Fernandes } 13014a9671a0SJoel Fernandes } 1302*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_print); 13034a9671a0SJoel Fernandes 1304*ba110db8SJoel Fernandes static void gpu_buddy_module_exit(void) 13054a9671a0SJoel Fernandes { 13064a9671a0SJoel Fernandes kmem_cache_destroy(slab_blocks); 13074a9671a0SJoel Fernandes } 13084a9671a0SJoel Fernandes 1309*ba110db8SJoel Fernandes static int __init gpu_buddy_module_init(void) 13104a9671a0SJoel Fernandes { 1311*ba110db8SJoel Fernandes slab_blocks = KMEM_CACHE(gpu_buddy_block, 0); 13124a9671a0SJoel Fernandes if (!slab_blocks) 13134a9671a0SJoel Fernandes return -ENOMEM; 13144a9671a0SJoel Fernandes 13154a9671a0SJoel Fernandes return 0; 13164a9671a0SJoel Fernandes } 13174a9671a0SJoel Fernandes 1318*ba110db8SJoel Fernandes module_init(gpu_buddy_module_init); 1319*ba110db8SJoel Fernandes module_exit(gpu_buddy_module_exit); 13204a9671a0SJoel Fernandes 1321*ba110db8SJoel Fernandes MODULE_DESCRIPTION("GPU Buddy Allocator"); 13224a9671a0SJoel Fernandes MODULE_LICENSE("Dual MIT/GPL"); 1323