xref: /linux/drivers/gpu/buddy.c (revision ba110db8e1bc206c13fd7d985e79b033f53bfdea)
14a9671a0SJoel Fernandes // SPDX-License-Identifier: MIT
24a9671a0SJoel Fernandes /*
34a9671a0SJoel Fernandes  * Copyright © 2021 Intel Corporation
44a9671a0SJoel Fernandes  */
54a9671a0SJoel Fernandes 
64a9671a0SJoel Fernandes #include <kunit/test-bug.h>
74a9671a0SJoel Fernandes 
84a9671a0SJoel Fernandes #include <linux/export.h>
94a9671a0SJoel Fernandes #include <linux/kmemleak.h>
104a9671a0SJoel Fernandes #include <linux/module.h>
114a9671a0SJoel Fernandes #include <linux/sizes.h>
124a9671a0SJoel Fernandes 
134a9671a0SJoel Fernandes #include <linux/gpu_buddy.h>
144a9671a0SJoel Fernandes 
154a9671a0SJoel Fernandes static struct kmem_cache *slab_blocks;
164a9671a0SJoel Fernandes 
17*ba110db8SJoel Fernandes static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
18*ba110db8SJoel Fernandes 					       struct gpu_buddy_block *parent,
194a9671a0SJoel Fernandes 					       unsigned int order,
204a9671a0SJoel Fernandes 					       u64 offset)
214a9671a0SJoel Fernandes {
22*ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
234a9671a0SJoel Fernandes 
24*ba110db8SJoel Fernandes 	BUG_ON(order > GPU_BUDDY_MAX_ORDER);
254a9671a0SJoel Fernandes 
264a9671a0SJoel Fernandes 	block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
274a9671a0SJoel Fernandes 	if (!block)
284a9671a0SJoel Fernandes 		return NULL;
294a9671a0SJoel Fernandes 
304a9671a0SJoel Fernandes 	block->header = offset;
314a9671a0SJoel Fernandes 	block->header |= order;
324a9671a0SJoel Fernandes 	block->parent = parent;
334a9671a0SJoel Fernandes 
344a9671a0SJoel Fernandes 	RB_CLEAR_NODE(&block->rb);
354a9671a0SJoel Fernandes 
36*ba110db8SJoel Fernandes 	BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED);
374a9671a0SJoel Fernandes 	return block;
384a9671a0SJoel Fernandes }
394a9671a0SJoel Fernandes 
40*ba110db8SJoel Fernandes static void gpu_block_free(struct gpu_buddy *mm,
41*ba110db8SJoel Fernandes 			   struct gpu_buddy_block *block)
424a9671a0SJoel Fernandes {
434a9671a0SJoel Fernandes 	kmem_cache_free(slab_blocks, block);
444a9671a0SJoel Fernandes }
454a9671a0SJoel Fernandes 
46*ba110db8SJoel Fernandes static enum gpu_buddy_free_tree
47*ba110db8SJoel Fernandes get_block_tree(struct gpu_buddy_block *block)
484a9671a0SJoel Fernandes {
49*ba110db8SJoel Fernandes 	return gpu_buddy_block_is_clear(block) ?
50*ba110db8SJoel Fernandes 	       GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
514a9671a0SJoel Fernandes }
524a9671a0SJoel Fernandes 
53*ba110db8SJoel Fernandes static struct gpu_buddy_block *
544a9671a0SJoel Fernandes rbtree_get_free_block(const struct rb_node *node)
554a9671a0SJoel Fernandes {
56*ba110db8SJoel Fernandes 	return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL;
574a9671a0SJoel Fernandes }
584a9671a0SJoel Fernandes 
59*ba110db8SJoel Fernandes static struct gpu_buddy_block *
604a9671a0SJoel Fernandes rbtree_last_free_block(struct rb_root *root)
614a9671a0SJoel Fernandes {
624a9671a0SJoel Fernandes 	return rbtree_get_free_block(rb_last(root));
634a9671a0SJoel Fernandes }
644a9671a0SJoel Fernandes 
654a9671a0SJoel Fernandes static bool rbtree_is_empty(struct rb_root *root)
664a9671a0SJoel Fernandes {
674a9671a0SJoel Fernandes 	return RB_EMPTY_ROOT(root);
684a9671a0SJoel Fernandes }
694a9671a0SJoel Fernandes 
70*ba110db8SJoel Fernandes static bool gpu_buddy_block_offset_less(const struct gpu_buddy_block *block,
71*ba110db8SJoel Fernandes 					const struct gpu_buddy_block *node)
724a9671a0SJoel Fernandes {
73*ba110db8SJoel Fernandes 	return gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node);
744a9671a0SJoel Fernandes }
754a9671a0SJoel Fernandes 
764a9671a0SJoel Fernandes static bool rbtree_block_offset_less(struct rb_node *block,
774a9671a0SJoel Fernandes 				     const struct rb_node *node)
784a9671a0SJoel Fernandes {
79*ba110db8SJoel Fernandes 	return gpu_buddy_block_offset_less(rbtree_get_free_block(block),
804a9671a0SJoel Fernandes 					   rbtree_get_free_block(node));
814a9671a0SJoel Fernandes }
824a9671a0SJoel Fernandes 
83*ba110db8SJoel Fernandes static void rbtree_insert(struct gpu_buddy *mm,
84*ba110db8SJoel Fernandes 			  struct gpu_buddy_block *block,
85*ba110db8SJoel Fernandes 			  enum gpu_buddy_free_tree tree)
864a9671a0SJoel Fernandes {
874a9671a0SJoel Fernandes 	rb_add(&block->rb,
88*ba110db8SJoel Fernandes 	       &mm->free_trees[tree][gpu_buddy_block_order(block)],
894a9671a0SJoel Fernandes 	       rbtree_block_offset_less);
904a9671a0SJoel Fernandes }
914a9671a0SJoel Fernandes 
92*ba110db8SJoel Fernandes static void rbtree_remove(struct gpu_buddy *mm,
93*ba110db8SJoel Fernandes 			  struct gpu_buddy_block *block)
944a9671a0SJoel Fernandes {
95*ba110db8SJoel Fernandes 	unsigned int order = gpu_buddy_block_order(block);
96*ba110db8SJoel Fernandes 	enum gpu_buddy_free_tree tree;
974a9671a0SJoel Fernandes 	struct rb_root *root;
984a9671a0SJoel Fernandes 
994a9671a0SJoel Fernandes 	tree = get_block_tree(block);
1004a9671a0SJoel Fernandes 	root = &mm->free_trees[tree][order];
1014a9671a0SJoel Fernandes 
1024a9671a0SJoel Fernandes 	rb_erase(&block->rb, root);
1034a9671a0SJoel Fernandes 	RB_CLEAR_NODE(&block->rb);
1044a9671a0SJoel Fernandes }
1054a9671a0SJoel Fernandes 
106*ba110db8SJoel Fernandes static void clear_reset(struct gpu_buddy_block *block)
1074a9671a0SJoel Fernandes {
108*ba110db8SJoel Fernandes 	block->header &= ~GPU_BUDDY_HEADER_CLEAR;
1094a9671a0SJoel Fernandes }
1104a9671a0SJoel Fernandes 
111*ba110db8SJoel Fernandes static void mark_cleared(struct gpu_buddy_block *block)
1124a9671a0SJoel Fernandes {
113*ba110db8SJoel Fernandes 	block->header |= GPU_BUDDY_HEADER_CLEAR;
1144a9671a0SJoel Fernandes }
1154a9671a0SJoel Fernandes 
116*ba110db8SJoel Fernandes static void mark_allocated(struct gpu_buddy *mm,
117*ba110db8SJoel Fernandes 			   struct gpu_buddy_block *block)
1184a9671a0SJoel Fernandes {
119*ba110db8SJoel Fernandes 	block->header &= ~GPU_BUDDY_HEADER_STATE;
120*ba110db8SJoel Fernandes 	block->header |= GPU_BUDDY_ALLOCATED;
1214a9671a0SJoel Fernandes 
1224a9671a0SJoel Fernandes 	rbtree_remove(mm, block);
1234a9671a0SJoel Fernandes }
1244a9671a0SJoel Fernandes 
125*ba110db8SJoel Fernandes static void mark_free(struct gpu_buddy *mm,
126*ba110db8SJoel Fernandes 		      struct gpu_buddy_block *block)
1274a9671a0SJoel Fernandes {
128*ba110db8SJoel Fernandes 	enum gpu_buddy_free_tree tree;
1294a9671a0SJoel Fernandes 
130*ba110db8SJoel Fernandes 	block->header &= ~GPU_BUDDY_HEADER_STATE;
131*ba110db8SJoel Fernandes 	block->header |= GPU_BUDDY_FREE;
1324a9671a0SJoel Fernandes 
1334a9671a0SJoel Fernandes 	tree = get_block_tree(block);
1344a9671a0SJoel Fernandes 	rbtree_insert(mm, block, tree);
1354a9671a0SJoel Fernandes }
1364a9671a0SJoel Fernandes 
137*ba110db8SJoel Fernandes static void mark_split(struct gpu_buddy *mm,
138*ba110db8SJoel Fernandes 		       struct gpu_buddy_block *block)
1394a9671a0SJoel Fernandes {
140*ba110db8SJoel Fernandes 	block->header &= ~GPU_BUDDY_HEADER_STATE;
141*ba110db8SJoel Fernandes 	block->header |= GPU_BUDDY_SPLIT;
1424a9671a0SJoel Fernandes 
1434a9671a0SJoel Fernandes 	rbtree_remove(mm, block);
1444a9671a0SJoel Fernandes }
1454a9671a0SJoel Fernandes 
1464a9671a0SJoel Fernandes static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
1474a9671a0SJoel Fernandes {
1484a9671a0SJoel Fernandes 	return s1 <= e2 && e1 >= s2;
1494a9671a0SJoel Fernandes }
1504a9671a0SJoel Fernandes 
1514a9671a0SJoel Fernandes static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
1524a9671a0SJoel Fernandes {
1534a9671a0SJoel Fernandes 	return s1 <= s2 && e1 >= e2;
1544a9671a0SJoel Fernandes }
1554a9671a0SJoel Fernandes 
156*ba110db8SJoel Fernandes static struct gpu_buddy_block *
157*ba110db8SJoel Fernandes __get_buddy(struct gpu_buddy_block *block)
1584a9671a0SJoel Fernandes {
159*ba110db8SJoel Fernandes 	struct gpu_buddy_block *parent;
1604a9671a0SJoel Fernandes 
1614a9671a0SJoel Fernandes 	parent = block->parent;
1624a9671a0SJoel Fernandes 	if (!parent)
1634a9671a0SJoel Fernandes 		return NULL;
1644a9671a0SJoel Fernandes 
1654a9671a0SJoel Fernandes 	if (parent->left == block)
1664a9671a0SJoel Fernandes 		return parent->right;
1674a9671a0SJoel Fernandes 
1684a9671a0SJoel Fernandes 	return parent->left;
1694a9671a0SJoel Fernandes }
1704a9671a0SJoel Fernandes 
171*ba110db8SJoel Fernandes static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
172*ba110db8SJoel Fernandes 				     struct gpu_buddy_block *block,
1734a9671a0SJoel Fernandes 				     bool force_merge)
1744a9671a0SJoel Fernandes {
175*ba110db8SJoel Fernandes 	struct gpu_buddy_block *parent;
1764a9671a0SJoel Fernandes 	unsigned int order;
1774a9671a0SJoel Fernandes 
1784a9671a0SJoel Fernandes 	while ((parent = block->parent)) {
179*ba110db8SJoel Fernandes 		struct gpu_buddy_block *buddy;
1804a9671a0SJoel Fernandes 
1814a9671a0SJoel Fernandes 		buddy = __get_buddy(block);
1824a9671a0SJoel Fernandes 
183*ba110db8SJoel Fernandes 		if (!gpu_buddy_block_is_free(buddy))
1844a9671a0SJoel Fernandes 			break;
1854a9671a0SJoel Fernandes 
1864a9671a0SJoel Fernandes 		if (!force_merge) {
1874a9671a0SJoel Fernandes 			/*
1884a9671a0SJoel Fernandes 			 * Check the block and its buddy clear state and exit
1894a9671a0SJoel Fernandes 			 * the loop if they both have the dissimilar state.
1904a9671a0SJoel Fernandes 			 */
191*ba110db8SJoel Fernandes 			if (gpu_buddy_block_is_clear(block) !=
192*ba110db8SJoel Fernandes 			    gpu_buddy_block_is_clear(buddy))
1934a9671a0SJoel Fernandes 				break;
1944a9671a0SJoel Fernandes 
195*ba110db8SJoel Fernandes 			if (gpu_buddy_block_is_clear(block))
1964a9671a0SJoel Fernandes 				mark_cleared(parent);
1974a9671a0SJoel Fernandes 		}
1984a9671a0SJoel Fernandes 
1994a9671a0SJoel Fernandes 		rbtree_remove(mm, buddy);
200*ba110db8SJoel Fernandes 		if (force_merge && gpu_buddy_block_is_clear(buddy))
201*ba110db8SJoel Fernandes 			mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
2024a9671a0SJoel Fernandes 
203*ba110db8SJoel Fernandes 		gpu_block_free(mm, block);
204*ba110db8SJoel Fernandes 		gpu_block_free(mm, buddy);
2054a9671a0SJoel Fernandes 
2064a9671a0SJoel Fernandes 		block = parent;
2074a9671a0SJoel Fernandes 	}
2084a9671a0SJoel Fernandes 
209*ba110db8SJoel Fernandes 	order = gpu_buddy_block_order(block);
2104a9671a0SJoel Fernandes 	mark_free(mm, block);
2114a9671a0SJoel Fernandes 
2124a9671a0SJoel Fernandes 	return order;
2134a9671a0SJoel Fernandes }
2144a9671a0SJoel Fernandes 
215*ba110db8SJoel Fernandes static int __force_merge(struct gpu_buddy *mm,
2164a9671a0SJoel Fernandes 			 u64 start,
2174a9671a0SJoel Fernandes 			 u64 end,
2184a9671a0SJoel Fernandes 			 unsigned int min_order)
2194a9671a0SJoel Fernandes {
2204a9671a0SJoel Fernandes 	unsigned int tree, order;
2214a9671a0SJoel Fernandes 	int i;
2224a9671a0SJoel Fernandes 
2234a9671a0SJoel Fernandes 	if (!min_order)
2244a9671a0SJoel Fernandes 		return -ENOMEM;
2254a9671a0SJoel Fernandes 
2264a9671a0SJoel Fernandes 	if (min_order > mm->max_order)
2274a9671a0SJoel Fernandes 		return -EINVAL;
2284a9671a0SJoel Fernandes 
2294a9671a0SJoel Fernandes 	for_each_free_tree(tree) {
2304a9671a0SJoel Fernandes 		for (i = min_order - 1; i >= 0; i--) {
2314a9671a0SJoel Fernandes 			struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
2324a9671a0SJoel Fernandes 
2334a9671a0SJoel Fernandes 			while (iter) {
234*ba110db8SJoel Fernandes 				struct gpu_buddy_block *block, *buddy;
2354a9671a0SJoel Fernandes 				u64 block_start, block_end;
2364a9671a0SJoel Fernandes 
2374a9671a0SJoel Fernandes 				block = rbtree_get_free_block(iter);
2384a9671a0SJoel Fernandes 				iter = rb_prev(iter);
2394a9671a0SJoel Fernandes 
2404a9671a0SJoel Fernandes 				if (!block || !block->parent)
2414a9671a0SJoel Fernandes 					continue;
2424a9671a0SJoel Fernandes 
243*ba110db8SJoel Fernandes 				block_start = gpu_buddy_block_offset(block);
244*ba110db8SJoel Fernandes 				block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
2454a9671a0SJoel Fernandes 
2464a9671a0SJoel Fernandes 				if (!contains(start, end, block_start, block_end))
2474a9671a0SJoel Fernandes 					continue;
2484a9671a0SJoel Fernandes 
2494a9671a0SJoel Fernandes 				buddy = __get_buddy(block);
250*ba110db8SJoel Fernandes 				if (!gpu_buddy_block_is_free(buddy))
2514a9671a0SJoel Fernandes 					continue;
2524a9671a0SJoel Fernandes 
253*ba110db8SJoel Fernandes 				WARN_ON(gpu_buddy_block_is_clear(block) ==
254*ba110db8SJoel Fernandes 					gpu_buddy_block_is_clear(buddy));
2554a9671a0SJoel Fernandes 
2564a9671a0SJoel Fernandes 				/*
2574a9671a0SJoel Fernandes 				 * Advance to the next node when the current node is the buddy,
2584a9671a0SJoel Fernandes 				 * as freeing the block will also remove its buddy from the tree.
2594a9671a0SJoel Fernandes 				 */
2604a9671a0SJoel Fernandes 				if (iter == &buddy->rb)
2614a9671a0SJoel Fernandes 					iter = rb_prev(iter);
2624a9671a0SJoel Fernandes 
2634a9671a0SJoel Fernandes 				rbtree_remove(mm, block);
264*ba110db8SJoel Fernandes 				if (gpu_buddy_block_is_clear(block))
265*ba110db8SJoel Fernandes 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
2664a9671a0SJoel Fernandes 
267*ba110db8SJoel Fernandes 				order = __gpu_buddy_free(mm, block, true);
2684a9671a0SJoel Fernandes 				if (order >= min_order)
2694a9671a0SJoel Fernandes 					return 0;
2704a9671a0SJoel Fernandes 			}
2714a9671a0SJoel Fernandes 		}
2724a9671a0SJoel Fernandes 	}
2734a9671a0SJoel Fernandes 
2744a9671a0SJoel Fernandes 	return -ENOMEM;
2754a9671a0SJoel Fernandes }
2764a9671a0SJoel Fernandes 
2774a9671a0SJoel Fernandes /**
278*ba110db8SJoel Fernandes  * gpu_buddy_init - init memory manager
2794a9671a0SJoel Fernandes  *
280*ba110db8SJoel Fernandes  * @mm: GPU buddy manager to initialize
2814a9671a0SJoel Fernandes  * @size: size in bytes to manage
2824a9671a0SJoel Fernandes  * @chunk_size: minimum page size in bytes for our allocations
2834a9671a0SJoel Fernandes  *
2844a9671a0SJoel Fernandes  * Initializes the memory manager and its resources.
2854a9671a0SJoel Fernandes  *
2864a9671a0SJoel Fernandes  * Returns:
2874a9671a0SJoel Fernandes  * 0 on success, error code on failure.
2884a9671a0SJoel Fernandes  */
289*ba110db8SJoel Fernandes int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
2904a9671a0SJoel Fernandes {
2914a9671a0SJoel Fernandes 	unsigned int i, j, root_count = 0;
2924a9671a0SJoel Fernandes 	u64 offset = 0;
2934a9671a0SJoel Fernandes 
2944a9671a0SJoel Fernandes 	if (size < chunk_size)
2954a9671a0SJoel Fernandes 		return -EINVAL;
2964a9671a0SJoel Fernandes 
2974a9671a0SJoel Fernandes 	if (chunk_size < SZ_4K)
2984a9671a0SJoel Fernandes 		return -EINVAL;
2994a9671a0SJoel Fernandes 
3004a9671a0SJoel Fernandes 	if (!is_power_of_2(chunk_size))
3014a9671a0SJoel Fernandes 		return -EINVAL;
3024a9671a0SJoel Fernandes 
3034a9671a0SJoel Fernandes 	size = round_down(size, chunk_size);
3044a9671a0SJoel Fernandes 
3054a9671a0SJoel Fernandes 	mm->size = size;
3064a9671a0SJoel Fernandes 	mm->avail = size;
3074a9671a0SJoel Fernandes 	mm->clear_avail = 0;
3084a9671a0SJoel Fernandes 	mm->chunk_size = chunk_size;
3094a9671a0SJoel Fernandes 	mm->max_order = ilog2(size) - ilog2(chunk_size);
3104a9671a0SJoel Fernandes 
311*ba110db8SJoel Fernandes 	BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
3124a9671a0SJoel Fernandes 
313*ba110db8SJoel Fernandes 	mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
3144a9671a0SJoel Fernandes 				       sizeof(*mm->free_trees),
3154a9671a0SJoel Fernandes 				       GFP_KERNEL);
3164a9671a0SJoel Fernandes 	if (!mm->free_trees)
3174a9671a0SJoel Fernandes 		return -ENOMEM;
3184a9671a0SJoel Fernandes 
3194a9671a0SJoel Fernandes 	for_each_free_tree(i) {
3204a9671a0SJoel Fernandes 		mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
3214a9671a0SJoel Fernandes 						  sizeof(struct rb_root),
3224a9671a0SJoel Fernandes 						  GFP_KERNEL);
3234a9671a0SJoel Fernandes 		if (!mm->free_trees[i])
3244a9671a0SJoel Fernandes 			goto out_free_tree;
3254a9671a0SJoel Fernandes 
3264a9671a0SJoel Fernandes 		for (j = 0; j <= mm->max_order; ++j)
3274a9671a0SJoel Fernandes 			mm->free_trees[i][j] = RB_ROOT;
3284a9671a0SJoel Fernandes 	}
3294a9671a0SJoel Fernandes 
3304a9671a0SJoel Fernandes 	mm->n_roots = hweight64(size);
3314a9671a0SJoel Fernandes 
3324a9671a0SJoel Fernandes 	mm->roots = kmalloc_array(mm->n_roots,
333*ba110db8SJoel Fernandes 				  sizeof(struct gpu_buddy_block *),
3344a9671a0SJoel Fernandes 				  GFP_KERNEL);
3354a9671a0SJoel Fernandes 	if (!mm->roots)
3364a9671a0SJoel Fernandes 		goto out_free_tree;
3374a9671a0SJoel Fernandes 
3384a9671a0SJoel Fernandes 	/*
3394a9671a0SJoel Fernandes 	 * Split into power-of-two blocks, in case we are given a size that is
3404a9671a0SJoel Fernandes 	 * not itself a power-of-two.
3414a9671a0SJoel Fernandes 	 */
3424a9671a0SJoel Fernandes 	do {
343*ba110db8SJoel Fernandes 		struct gpu_buddy_block *root;
3444a9671a0SJoel Fernandes 		unsigned int order;
3454a9671a0SJoel Fernandes 		u64 root_size;
3464a9671a0SJoel Fernandes 
3474a9671a0SJoel Fernandes 		order = ilog2(size) - ilog2(chunk_size);
3484a9671a0SJoel Fernandes 		root_size = chunk_size << order;
3494a9671a0SJoel Fernandes 
350*ba110db8SJoel Fernandes 		root = gpu_block_alloc(mm, NULL, order, offset);
3514a9671a0SJoel Fernandes 		if (!root)
3524a9671a0SJoel Fernandes 			goto out_free_roots;
3534a9671a0SJoel Fernandes 
3544a9671a0SJoel Fernandes 		mark_free(mm, root);
3554a9671a0SJoel Fernandes 
3564a9671a0SJoel Fernandes 		BUG_ON(root_count > mm->max_order);
357*ba110db8SJoel Fernandes 		BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size);
3584a9671a0SJoel Fernandes 
3594a9671a0SJoel Fernandes 		mm->roots[root_count] = root;
3604a9671a0SJoel Fernandes 
3614a9671a0SJoel Fernandes 		offset += root_size;
3624a9671a0SJoel Fernandes 		size -= root_size;
3634a9671a0SJoel Fernandes 		root_count++;
3644a9671a0SJoel Fernandes 	} while (size);
3654a9671a0SJoel Fernandes 
3664a9671a0SJoel Fernandes 	return 0;
3674a9671a0SJoel Fernandes 
3684a9671a0SJoel Fernandes out_free_roots:
3694a9671a0SJoel Fernandes 	while (root_count--)
370*ba110db8SJoel Fernandes 		gpu_block_free(mm, mm->roots[root_count]);
3714a9671a0SJoel Fernandes 	kfree(mm->roots);
3724a9671a0SJoel Fernandes out_free_tree:
3734a9671a0SJoel Fernandes 	while (i--)
3744a9671a0SJoel Fernandes 		kfree(mm->free_trees[i]);
3754a9671a0SJoel Fernandes 	kfree(mm->free_trees);
3764a9671a0SJoel Fernandes 	return -ENOMEM;
3774a9671a0SJoel Fernandes }
378*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_init);
3794a9671a0SJoel Fernandes 
3804a9671a0SJoel Fernandes /**
381*ba110db8SJoel Fernandes  * gpu_buddy_fini - tear down the memory manager
3824a9671a0SJoel Fernandes  *
383*ba110db8SJoel Fernandes  * @mm: GPU buddy manager to free
3844a9671a0SJoel Fernandes  *
3854a9671a0SJoel Fernandes  * Cleanup memory manager resources and the freetree
3864a9671a0SJoel Fernandes  */
387*ba110db8SJoel Fernandes void gpu_buddy_fini(struct gpu_buddy *mm)
3884a9671a0SJoel Fernandes {
3894a9671a0SJoel Fernandes 	u64 root_size, size, start;
3904a9671a0SJoel Fernandes 	unsigned int order;
3914a9671a0SJoel Fernandes 	int i;
3924a9671a0SJoel Fernandes 
3934a9671a0SJoel Fernandes 	size = mm->size;
3944a9671a0SJoel Fernandes 
3954a9671a0SJoel Fernandes 	for (i = 0; i < mm->n_roots; ++i) {
3964a9671a0SJoel Fernandes 		order = ilog2(size) - ilog2(mm->chunk_size);
397*ba110db8SJoel Fernandes 		start = gpu_buddy_block_offset(mm->roots[i]);
3984a9671a0SJoel Fernandes 		__force_merge(mm, start, start + size, order);
3994a9671a0SJoel Fernandes 
400*ba110db8SJoel Fernandes 		if (WARN_ON(!gpu_buddy_block_is_free(mm->roots[i])))
4014a9671a0SJoel Fernandes 			kunit_fail_current_test("buddy_fini() root");
4024a9671a0SJoel Fernandes 
403*ba110db8SJoel Fernandes 		gpu_block_free(mm, mm->roots[i]);
4044a9671a0SJoel Fernandes 
4054a9671a0SJoel Fernandes 		root_size = mm->chunk_size << order;
4064a9671a0SJoel Fernandes 		size -= root_size;
4074a9671a0SJoel Fernandes 	}
4084a9671a0SJoel Fernandes 
4094a9671a0SJoel Fernandes 	WARN_ON(mm->avail != mm->size);
4104a9671a0SJoel Fernandes 
4114a9671a0SJoel Fernandes 	for_each_free_tree(i)
4124a9671a0SJoel Fernandes 		kfree(mm->free_trees[i]);
4134a9671a0SJoel Fernandes 	kfree(mm->free_trees);
4144a9671a0SJoel Fernandes 	kfree(mm->roots);
4154a9671a0SJoel Fernandes }
416*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_fini);
4174a9671a0SJoel Fernandes 
418*ba110db8SJoel Fernandes static int split_block(struct gpu_buddy *mm,
419*ba110db8SJoel Fernandes 		       struct gpu_buddy_block *block)
4204a9671a0SJoel Fernandes {
421*ba110db8SJoel Fernandes 	unsigned int block_order = gpu_buddy_block_order(block) - 1;
422*ba110db8SJoel Fernandes 	u64 offset = gpu_buddy_block_offset(block);
4234a9671a0SJoel Fernandes 
424*ba110db8SJoel Fernandes 	BUG_ON(!gpu_buddy_block_is_free(block));
425*ba110db8SJoel Fernandes 	BUG_ON(!gpu_buddy_block_order(block));
4264a9671a0SJoel Fernandes 
427*ba110db8SJoel Fernandes 	block->left = gpu_block_alloc(mm, block, block_order, offset);
4284a9671a0SJoel Fernandes 	if (!block->left)
4294a9671a0SJoel Fernandes 		return -ENOMEM;
4304a9671a0SJoel Fernandes 
431*ba110db8SJoel Fernandes 	block->right = gpu_block_alloc(mm, block, block_order,
4324a9671a0SJoel Fernandes 				       offset + (mm->chunk_size << block_order));
4334a9671a0SJoel Fernandes 	if (!block->right) {
434*ba110db8SJoel Fernandes 		gpu_block_free(mm, block->left);
4354a9671a0SJoel Fernandes 		return -ENOMEM;
4364a9671a0SJoel Fernandes 	}
4374a9671a0SJoel Fernandes 
4384a9671a0SJoel Fernandes 	mark_split(mm, block);
4394a9671a0SJoel Fernandes 
440*ba110db8SJoel Fernandes 	if (gpu_buddy_block_is_clear(block)) {
4414a9671a0SJoel Fernandes 		mark_cleared(block->left);
4424a9671a0SJoel Fernandes 		mark_cleared(block->right);
4434a9671a0SJoel Fernandes 		clear_reset(block);
4444a9671a0SJoel Fernandes 	}
4454a9671a0SJoel Fernandes 
4464a9671a0SJoel Fernandes 	mark_free(mm, block->left);
4474a9671a0SJoel Fernandes 	mark_free(mm, block->right);
4484a9671a0SJoel Fernandes 
4494a9671a0SJoel Fernandes 	return 0;
4504a9671a0SJoel Fernandes }
4514a9671a0SJoel Fernandes 
4524a9671a0SJoel Fernandes /**
453*ba110db8SJoel Fernandes  * gpu_get_buddy - get buddy address
4544a9671a0SJoel Fernandes  *
455*ba110db8SJoel Fernandes  * @block: GPU buddy block
4564a9671a0SJoel Fernandes  *
4574a9671a0SJoel Fernandes  * Returns the corresponding buddy block for @block, or NULL
4584a9671a0SJoel Fernandes  * if this is a root block and can't be merged further.
4594a9671a0SJoel Fernandes  * Requires some kind of locking to protect against
4604a9671a0SJoel Fernandes  * any concurrent allocate and free operations.
4614a9671a0SJoel Fernandes  */
462*ba110db8SJoel Fernandes struct gpu_buddy_block *
463*ba110db8SJoel Fernandes gpu_get_buddy(struct gpu_buddy_block *block)
4644a9671a0SJoel Fernandes {
4654a9671a0SJoel Fernandes 	return __get_buddy(block);
4664a9671a0SJoel Fernandes }
467*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_get_buddy);
4684a9671a0SJoel Fernandes 
4694a9671a0SJoel Fernandes /**
470*ba110db8SJoel Fernandes  * gpu_buddy_reset_clear - reset blocks clear state
4714a9671a0SJoel Fernandes  *
472*ba110db8SJoel Fernandes  * @mm: GPU buddy manager
4734a9671a0SJoel Fernandes  * @is_clear: blocks clear state
4744a9671a0SJoel Fernandes  *
4754a9671a0SJoel Fernandes  * Reset the clear state based on @is_clear value for each block
4764a9671a0SJoel Fernandes  * in the freetree.
4774a9671a0SJoel Fernandes  */
478*ba110db8SJoel Fernandes void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear)
4794a9671a0SJoel Fernandes {
480*ba110db8SJoel Fernandes 	enum gpu_buddy_free_tree src_tree, dst_tree;
4814a9671a0SJoel Fernandes 	u64 root_size, size, start;
4824a9671a0SJoel Fernandes 	unsigned int order;
4834a9671a0SJoel Fernandes 	int i;
4844a9671a0SJoel Fernandes 
4854a9671a0SJoel Fernandes 	size = mm->size;
4864a9671a0SJoel Fernandes 	for (i = 0; i < mm->n_roots; ++i) {
4874a9671a0SJoel Fernandes 		order = ilog2(size) - ilog2(mm->chunk_size);
488*ba110db8SJoel Fernandes 		start = gpu_buddy_block_offset(mm->roots[i]);
4894a9671a0SJoel Fernandes 		__force_merge(mm, start, start + size, order);
4904a9671a0SJoel Fernandes 
4914a9671a0SJoel Fernandes 		root_size = mm->chunk_size << order;
4924a9671a0SJoel Fernandes 		size -= root_size;
4934a9671a0SJoel Fernandes 	}
4944a9671a0SJoel Fernandes 
495*ba110db8SJoel Fernandes 	src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
496*ba110db8SJoel Fernandes 	dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
4974a9671a0SJoel Fernandes 
4984a9671a0SJoel Fernandes 	for (i = 0; i <= mm->max_order; ++i) {
4994a9671a0SJoel Fernandes 		struct rb_root *root = &mm->free_trees[src_tree][i];
500*ba110db8SJoel Fernandes 		struct gpu_buddy_block *block, *tmp;
5014a9671a0SJoel Fernandes 
5024a9671a0SJoel Fernandes 		rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
5034a9671a0SJoel Fernandes 			rbtree_remove(mm, block);
5044a9671a0SJoel Fernandes 			if (is_clear) {
5054a9671a0SJoel Fernandes 				mark_cleared(block);
506*ba110db8SJoel Fernandes 				mm->clear_avail += gpu_buddy_block_size(mm, block);
5074a9671a0SJoel Fernandes 			} else {
5084a9671a0SJoel Fernandes 				clear_reset(block);
509*ba110db8SJoel Fernandes 				mm->clear_avail -= gpu_buddy_block_size(mm, block);
5104a9671a0SJoel Fernandes 			}
5114a9671a0SJoel Fernandes 
5124a9671a0SJoel Fernandes 			rbtree_insert(mm, block, dst_tree);
5134a9671a0SJoel Fernandes 		}
5144a9671a0SJoel Fernandes 	}
5154a9671a0SJoel Fernandes }
516*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_reset_clear);
5174a9671a0SJoel Fernandes 
5184a9671a0SJoel Fernandes /**
519*ba110db8SJoel Fernandes  * gpu_buddy_free_block - free a block
5204a9671a0SJoel Fernandes  *
521*ba110db8SJoel Fernandes  * @mm: GPU buddy manager
5224a9671a0SJoel Fernandes  * @block: block to be freed
5234a9671a0SJoel Fernandes  */
524*ba110db8SJoel Fernandes void gpu_buddy_free_block(struct gpu_buddy *mm,
525*ba110db8SJoel Fernandes 			  struct gpu_buddy_block *block)
5264a9671a0SJoel Fernandes {
527*ba110db8SJoel Fernandes 	BUG_ON(!gpu_buddy_block_is_allocated(block));
528*ba110db8SJoel Fernandes 	mm->avail += gpu_buddy_block_size(mm, block);
529*ba110db8SJoel Fernandes 	if (gpu_buddy_block_is_clear(block))
530*ba110db8SJoel Fernandes 		mm->clear_avail += gpu_buddy_block_size(mm, block);
5314a9671a0SJoel Fernandes 
532*ba110db8SJoel Fernandes 	__gpu_buddy_free(mm, block, false);
5334a9671a0SJoel Fernandes }
534*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_free_block);
5354a9671a0SJoel Fernandes 
536*ba110db8SJoel Fernandes static void __gpu_buddy_free_list(struct gpu_buddy *mm,
5374a9671a0SJoel Fernandes 				  struct list_head *objects,
5384a9671a0SJoel Fernandes 				  bool mark_clear,
5394a9671a0SJoel Fernandes 				  bool mark_dirty)
5404a9671a0SJoel Fernandes {
541*ba110db8SJoel Fernandes 	struct gpu_buddy_block *block, *on;
5424a9671a0SJoel Fernandes 
5434a9671a0SJoel Fernandes 	WARN_ON(mark_dirty && mark_clear);
5444a9671a0SJoel Fernandes 
5454a9671a0SJoel Fernandes 	list_for_each_entry_safe(block, on, objects, link) {
5464a9671a0SJoel Fernandes 		if (mark_clear)
5474a9671a0SJoel Fernandes 			mark_cleared(block);
5484a9671a0SJoel Fernandes 		else if (mark_dirty)
5494a9671a0SJoel Fernandes 			clear_reset(block);
550*ba110db8SJoel Fernandes 		gpu_buddy_free_block(mm, block);
5514a9671a0SJoel Fernandes 		cond_resched();
5524a9671a0SJoel Fernandes 	}
5534a9671a0SJoel Fernandes 	INIT_LIST_HEAD(objects);
5544a9671a0SJoel Fernandes }
5554a9671a0SJoel Fernandes 
556*ba110db8SJoel Fernandes static void gpu_buddy_free_list_internal(struct gpu_buddy *mm,
5574a9671a0SJoel Fernandes 					 struct list_head *objects)
5584a9671a0SJoel Fernandes {
5594a9671a0SJoel Fernandes 	/*
5604a9671a0SJoel Fernandes 	 * Don't touch the clear/dirty bit, since allocation is still internal
5614a9671a0SJoel Fernandes 	 * at this point. For example we might have just failed part of the
5624a9671a0SJoel Fernandes 	 * allocation.
5634a9671a0SJoel Fernandes 	 */
564*ba110db8SJoel Fernandes 	__gpu_buddy_free_list(mm, objects, false, false);
5654a9671a0SJoel Fernandes }
5664a9671a0SJoel Fernandes 
5674a9671a0SJoel Fernandes /**
568*ba110db8SJoel Fernandes  * gpu_buddy_free_list - free blocks
5694a9671a0SJoel Fernandes  *
570*ba110db8SJoel Fernandes  * @mm: GPU buddy manager
5714a9671a0SJoel Fernandes  * @objects: input list head to free blocks
572*ba110db8SJoel Fernandes  * @flags: optional flags like GPU_BUDDY_CLEARED
5734a9671a0SJoel Fernandes  */
574*ba110db8SJoel Fernandes void gpu_buddy_free_list(struct gpu_buddy *mm,
5754a9671a0SJoel Fernandes 			 struct list_head *objects,
5764a9671a0SJoel Fernandes 			 unsigned int flags)
5774a9671a0SJoel Fernandes {
578*ba110db8SJoel Fernandes 	bool mark_clear = flags & GPU_BUDDY_CLEARED;
5794a9671a0SJoel Fernandes 
580*ba110db8SJoel Fernandes 	__gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear);
5814a9671a0SJoel Fernandes }
582*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_free_list);
5834a9671a0SJoel Fernandes 
584*ba110db8SJoel Fernandes static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags)
5854a9671a0SJoel Fernandes {
586*ba110db8SJoel Fernandes 	bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION;
5874a9671a0SJoel Fernandes 
588*ba110db8SJoel Fernandes 	return needs_clear != gpu_buddy_block_is_clear(block);
5894a9671a0SJoel Fernandes }
5904a9671a0SJoel Fernandes 
591*ba110db8SJoel Fernandes static struct gpu_buddy_block *
592*ba110db8SJoel Fernandes __alloc_range_bias(struct gpu_buddy *mm,
5934a9671a0SJoel Fernandes 		   u64 start, u64 end,
5944a9671a0SJoel Fernandes 		   unsigned int order,
5954a9671a0SJoel Fernandes 		   unsigned long flags,
5964a9671a0SJoel Fernandes 		   bool fallback)
5974a9671a0SJoel Fernandes {
5984a9671a0SJoel Fernandes 	u64 req_size = mm->chunk_size << order;
599*ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
600*ba110db8SJoel Fernandes 	struct gpu_buddy_block *buddy;
6014a9671a0SJoel Fernandes 	LIST_HEAD(dfs);
6024a9671a0SJoel Fernandes 	int err;
6034a9671a0SJoel Fernandes 	int i;
6044a9671a0SJoel Fernandes 
6054a9671a0SJoel Fernandes 	end = end - 1;
6064a9671a0SJoel Fernandes 
6074a9671a0SJoel Fernandes 	for (i = 0; i < mm->n_roots; ++i)
6084a9671a0SJoel Fernandes 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
6094a9671a0SJoel Fernandes 
6104a9671a0SJoel Fernandes 	do {
6114a9671a0SJoel Fernandes 		u64 block_start;
6124a9671a0SJoel Fernandes 		u64 block_end;
6134a9671a0SJoel Fernandes 
6144a9671a0SJoel Fernandes 		block = list_first_entry_or_null(&dfs,
615*ba110db8SJoel Fernandes 						 struct gpu_buddy_block,
6164a9671a0SJoel Fernandes 						 tmp_link);
6174a9671a0SJoel Fernandes 		if (!block)
6184a9671a0SJoel Fernandes 			break;
6194a9671a0SJoel Fernandes 
6204a9671a0SJoel Fernandes 		list_del(&block->tmp_link);
6214a9671a0SJoel Fernandes 
622*ba110db8SJoel Fernandes 		if (gpu_buddy_block_order(block) < order)
6234a9671a0SJoel Fernandes 			continue;
6244a9671a0SJoel Fernandes 
625*ba110db8SJoel Fernandes 		block_start = gpu_buddy_block_offset(block);
626*ba110db8SJoel Fernandes 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
6274a9671a0SJoel Fernandes 
6284a9671a0SJoel Fernandes 		if (!overlaps(start, end, block_start, block_end))
6294a9671a0SJoel Fernandes 			continue;
6304a9671a0SJoel Fernandes 
631*ba110db8SJoel Fernandes 		if (gpu_buddy_block_is_allocated(block))
6324a9671a0SJoel Fernandes 			continue;
6334a9671a0SJoel Fernandes 
6344a9671a0SJoel Fernandes 		if (block_start < start || block_end > end) {
6354a9671a0SJoel Fernandes 			u64 adjusted_start = max(block_start, start);
6364a9671a0SJoel Fernandes 			u64 adjusted_end = min(block_end, end);
6374a9671a0SJoel Fernandes 
6384a9671a0SJoel Fernandes 			if (round_down(adjusted_end + 1, req_size) <=
6394a9671a0SJoel Fernandes 			    round_up(adjusted_start, req_size))
6404a9671a0SJoel Fernandes 				continue;
6414a9671a0SJoel Fernandes 		}
6424a9671a0SJoel Fernandes 
6434a9671a0SJoel Fernandes 		if (!fallback && block_incompatible(block, flags))
6444a9671a0SJoel Fernandes 			continue;
6454a9671a0SJoel Fernandes 
6464a9671a0SJoel Fernandes 		if (contains(start, end, block_start, block_end) &&
647*ba110db8SJoel Fernandes 		    order == gpu_buddy_block_order(block)) {
6484a9671a0SJoel Fernandes 			/*
6494a9671a0SJoel Fernandes 			 * Find the free block within the range.
6504a9671a0SJoel Fernandes 			 */
651*ba110db8SJoel Fernandes 			if (gpu_buddy_block_is_free(block))
6524a9671a0SJoel Fernandes 				return block;
6534a9671a0SJoel Fernandes 
6544a9671a0SJoel Fernandes 			continue;
6554a9671a0SJoel Fernandes 		}
6564a9671a0SJoel Fernandes 
657*ba110db8SJoel Fernandes 		if (!gpu_buddy_block_is_split(block)) {
6584a9671a0SJoel Fernandes 			err = split_block(mm, block);
6594a9671a0SJoel Fernandes 			if (unlikely(err))
6604a9671a0SJoel Fernandes 				goto err_undo;
6614a9671a0SJoel Fernandes 		}
6624a9671a0SJoel Fernandes 
6634a9671a0SJoel Fernandes 		list_add(&block->right->tmp_link, &dfs);
6644a9671a0SJoel Fernandes 		list_add(&block->left->tmp_link, &dfs);
6654a9671a0SJoel Fernandes 	} while (1);
6664a9671a0SJoel Fernandes 
6674a9671a0SJoel Fernandes 	return ERR_PTR(-ENOSPC);
6684a9671a0SJoel Fernandes 
6694a9671a0SJoel Fernandes err_undo:
6704a9671a0SJoel Fernandes 	/*
6714a9671a0SJoel Fernandes 	 * We really don't want to leave around a bunch of split blocks, since
6724a9671a0SJoel Fernandes 	 * bigger is better, so make sure we merge everything back before we
6734a9671a0SJoel Fernandes 	 * free the allocated blocks.
6744a9671a0SJoel Fernandes 	 */
6754a9671a0SJoel Fernandes 	buddy = __get_buddy(block);
6764a9671a0SJoel Fernandes 	if (buddy &&
677*ba110db8SJoel Fernandes 	    (gpu_buddy_block_is_free(block) &&
678*ba110db8SJoel Fernandes 	     gpu_buddy_block_is_free(buddy)))
679*ba110db8SJoel Fernandes 		__gpu_buddy_free(mm, block, false);
6804a9671a0SJoel Fernandes 	return ERR_PTR(err);
6814a9671a0SJoel Fernandes }
6824a9671a0SJoel Fernandes 
683*ba110db8SJoel Fernandes static struct gpu_buddy_block *
684*ba110db8SJoel Fernandes __gpu_buddy_alloc_range_bias(struct gpu_buddy *mm,
6854a9671a0SJoel Fernandes 			     u64 start, u64 end,
6864a9671a0SJoel Fernandes 			     unsigned int order,
6874a9671a0SJoel Fernandes 			     unsigned long flags)
6884a9671a0SJoel Fernandes {
689*ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
6904a9671a0SJoel Fernandes 	bool fallback = false;
6914a9671a0SJoel Fernandes 
6924a9671a0SJoel Fernandes 	block = __alloc_range_bias(mm, start, end, order,
6934a9671a0SJoel Fernandes 				   flags, fallback);
6944a9671a0SJoel Fernandes 	if (IS_ERR(block))
6954a9671a0SJoel Fernandes 		return __alloc_range_bias(mm, start, end, order,
6964a9671a0SJoel Fernandes 					  flags, !fallback);
6974a9671a0SJoel Fernandes 
6984a9671a0SJoel Fernandes 	return block;
6994a9671a0SJoel Fernandes }
7004a9671a0SJoel Fernandes 
701*ba110db8SJoel Fernandes static struct gpu_buddy_block *
702*ba110db8SJoel Fernandes get_maxblock(struct gpu_buddy *mm,
7034a9671a0SJoel Fernandes 	     unsigned int order,
704*ba110db8SJoel Fernandes 	     enum gpu_buddy_free_tree tree)
7054a9671a0SJoel Fernandes {
706*ba110db8SJoel Fernandes 	struct gpu_buddy_block *max_block = NULL, *block = NULL;
7074a9671a0SJoel Fernandes 	struct rb_root *root;
7084a9671a0SJoel Fernandes 	unsigned int i;
7094a9671a0SJoel Fernandes 
7104a9671a0SJoel Fernandes 	for (i = order; i <= mm->max_order; ++i) {
7114a9671a0SJoel Fernandes 		root = &mm->free_trees[tree][i];
7124a9671a0SJoel Fernandes 		block = rbtree_last_free_block(root);
7134a9671a0SJoel Fernandes 		if (!block)
7144a9671a0SJoel Fernandes 			continue;
7154a9671a0SJoel Fernandes 
7164a9671a0SJoel Fernandes 		if (!max_block) {
7174a9671a0SJoel Fernandes 			max_block = block;
7184a9671a0SJoel Fernandes 			continue;
7194a9671a0SJoel Fernandes 		}
7204a9671a0SJoel Fernandes 
721*ba110db8SJoel Fernandes 		if (gpu_buddy_block_offset(block) >
722*ba110db8SJoel Fernandes 		    gpu_buddy_block_offset(max_block)) {
7234a9671a0SJoel Fernandes 			max_block = block;
7244a9671a0SJoel Fernandes 		}
7254a9671a0SJoel Fernandes 	}
7264a9671a0SJoel Fernandes 
7274a9671a0SJoel Fernandes 	return max_block;
7284a9671a0SJoel Fernandes }
7294a9671a0SJoel Fernandes 
730*ba110db8SJoel Fernandes static struct gpu_buddy_block *
731*ba110db8SJoel Fernandes alloc_from_freetree(struct gpu_buddy *mm,
7324a9671a0SJoel Fernandes 		    unsigned int order,
7334a9671a0SJoel Fernandes 		    unsigned long flags)
7344a9671a0SJoel Fernandes {
735*ba110db8SJoel Fernandes 	struct gpu_buddy_block *block = NULL;
7364a9671a0SJoel Fernandes 	struct rb_root *root;
737*ba110db8SJoel Fernandes 	enum gpu_buddy_free_tree tree;
7384a9671a0SJoel Fernandes 	unsigned int tmp;
7394a9671a0SJoel Fernandes 	int err;
7404a9671a0SJoel Fernandes 
741*ba110db8SJoel Fernandes 	tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
742*ba110db8SJoel Fernandes 		GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
7434a9671a0SJoel Fernandes 
744*ba110db8SJoel Fernandes 	if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) {
7454a9671a0SJoel Fernandes 		block = get_maxblock(mm, order, tree);
7464a9671a0SJoel Fernandes 		if (block)
7474a9671a0SJoel Fernandes 			/* Store the obtained block order */
748*ba110db8SJoel Fernandes 			tmp = gpu_buddy_block_order(block);
7494a9671a0SJoel Fernandes 	} else {
7504a9671a0SJoel Fernandes 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
7514a9671a0SJoel Fernandes 			/* Get RB tree root for this order and tree */
7524a9671a0SJoel Fernandes 			root = &mm->free_trees[tree][tmp];
7534a9671a0SJoel Fernandes 			block = rbtree_last_free_block(root);
7544a9671a0SJoel Fernandes 			if (block)
7554a9671a0SJoel Fernandes 				break;
7564a9671a0SJoel Fernandes 		}
7574a9671a0SJoel Fernandes 	}
7584a9671a0SJoel Fernandes 
7594a9671a0SJoel Fernandes 	if (!block) {
7604a9671a0SJoel Fernandes 		/* Try allocating from the other tree */
761*ba110db8SJoel Fernandes 		tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
762*ba110db8SJoel Fernandes 			GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
7634a9671a0SJoel Fernandes 
7644a9671a0SJoel Fernandes 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
7654a9671a0SJoel Fernandes 			root = &mm->free_trees[tree][tmp];
7664a9671a0SJoel Fernandes 			block = rbtree_last_free_block(root);
7674a9671a0SJoel Fernandes 			if (block)
7684a9671a0SJoel Fernandes 				break;
7694a9671a0SJoel Fernandes 		}
7704a9671a0SJoel Fernandes 
7714a9671a0SJoel Fernandes 		if (!block)
7724a9671a0SJoel Fernandes 			return ERR_PTR(-ENOSPC);
7734a9671a0SJoel Fernandes 	}
7744a9671a0SJoel Fernandes 
775*ba110db8SJoel Fernandes 	BUG_ON(!gpu_buddy_block_is_free(block));
7764a9671a0SJoel Fernandes 
7774a9671a0SJoel Fernandes 	while (tmp != order) {
7784a9671a0SJoel Fernandes 		err = split_block(mm, block);
7794a9671a0SJoel Fernandes 		if (unlikely(err))
7804a9671a0SJoel Fernandes 			goto err_undo;
7814a9671a0SJoel Fernandes 
7824a9671a0SJoel Fernandes 		block = block->right;
7834a9671a0SJoel Fernandes 		tmp--;
7844a9671a0SJoel Fernandes 	}
7854a9671a0SJoel Fernandes 	return block;
7864a9671a0SJoel Fernandes 
7874a9671a0SJoel Fernandes err_undo:
7884a9671a0SJoel Fernandes 	if (tmp != order)
789*ba110db8SJoel Fernandes 		__gpu_buddy_free(mm, block, false);
7904a9671a0SJoel Fernandes 	return ERR_PTR(err);
7914a9671a0SJoel Fernandes }
7924a9671a0SJoel Fernandes 
793*ba110db8SJoel Fernandes static int __alloc_range(struct gpu_buddy *mm,
7944a9671a0SJoel Fernandes 			 struct list_head *dfs,
7954a9671a0SJoel Fernandes 			 u64 start, u64 size,
7964a9671a0SJoel Fernandes 			 struct list_head *blocks,
7974a9671a0SJoel Fernandes 			 u64 *total_allocated_on_err)
7984a9671a0SJoel Fernandes {
799*ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
800*ba110db8SJoel Fernandes 	struct gpu_buddy_block *buddy;
8014a9671a0SJoel Fernandes 	u64 total_allocated = 0;
8024a9671a0SJoel Fernandes 	LIST_HEAD(allocated);
8034a9671a0SJoel Fernandes 	u64 end;
8044a9671a0SJoel Fernandes 	int err;
8054a9671a0SJoel Fernandes 
8064a9671a0SJoel Fernandes 	end = start + size - 1;
8074a9671a0SJoel Fernandes 
8084a9671a0SJoel Fernandes 	do {
8094a9671a0SJoel Fernandes 		u64 block_start;
8104a9671a0SJoel Fernandes 		u64 block_end;
8114a9671a0SJoel Fernandes 
8124a9671a0SJoel Fernandes 		block = list_first_entry_or_null(dfs,
813*ba110db8SJoel Fernandes 						 struct gpu_buddy_block,
8144a9671a0SJoel Fernandes 						 tmp_link);
8154a9671a0SJoel Fernandes 		if (!block)
8164a9671a0SJoel Fernandes 			break;
8174a9671a0SJoel Fernandes 
8184a9671a0SJoel Fernandes 		list_del(&block->tmp_link);
8194a9671a0SJoel Fernandes 
820*ba110db8SJoel Fernandes 		block_start = gpu_buddy_block_offset(block);
821*ba110db8SJoel Fernandes 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
8224a9671a0SJoel Fernandes 
8234a9671a0SJoel Fernandes 		if (!overlaps(start, end, block_start, block_end))
8244a9671a0SJoel Fernandes 			continue;
8254a9671a0SJoel Fernandes 
826*ba110db8SJoel Fernandes 		if (gpu_buddy_block_is_allocated(block)) {
8274a9671a0SJoel Fernandes 			err = -ENOSPC;
8284a9671a0SJoel Fernandes 			goto err_free;
8294a9671a0SJoel Fernandes 		}
8304a9671a0SJoel Fernandes 
8314a9671a0SJoel Fernandes 		if (contains(start, end, block_start, block_end)) {
832*ba110db8SJoel Fernandes 			if (gpu_buddy_block_is_free(block)) {
8334a9671a0SJoel Fernandes 				mark_allocated(mm, block);
834*ba110db8SJoel Fernandes 				total_allocated += gpu_buddy_block_size(mm, block);
835*ba110db8SJoel Fernandes 				mm->avail -= gpu_buddy_block_size(mm, block);
836*ba110db8SJoel Fernandes 				if (gpu_buddy_block_is_clear(block))
837*ba110db8SJoel Fernandes 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
8384a9671a0SJoel Fernandes 				list_add_tail(&block->link, &allocated);
8394a9671a0SJoel Fernandes 				continue;
8404a9671a0SJoel Fernandes 			} else if (!mm->clear_avail) {
8414a9671a0SJoel Fernandes 				err = -ENOSPC;
8424a9671a0SJoel Fernandes 				goto err_free;
8434a9671a0SJoel Fernandes 			}
8444a9671a0SJoel Fernandes 		}
8454a9671a0SJoel Fernandes 
846*ba110db8SJoel Fernandes 		if (!gpu_buddy_block_is_split(block)) {
8474a9671a0SJoel Fernandes 			err = split_block(mm, block);
8484a9671a0SJoel Fernandes 			if (unlikely(err))
8494a9671a0SJoel Fernandes 				goto err_undo;
8504a9671a0SJoel Fernandes 		}
8514a9671a0SJoel Fernandes 
8524a9671a0SJoel Fernandes 		list_add(&block->right->tmp_link, dfs);
8534a9671a0SJoel Fernandes 		list_add(&block->left->tmp_link, dfs);
8544a9671a0SJoel Fernandes 	} while (1);
8554a9671a0SJoel Fernandes 
8564a9671a0SJoel Fernandes 	if (total_allocated < size) {
8574a9671a0SJoel Fernandes 		err = -ENOSPC;
8584a9671a0SJoel Fernandes 		goto err_free;
8594a9671a0SJoel Fernandes 	}
8604a9671a0SJoel Fernandes 
8614a9671a0SJoel Fernandes 	list_splice_tail(&allocated, blocks);
8624a9671a0SJoel Fernandes 
8634a9671a0SJoel Fernandes 	return 0;
8644a9671a0SJoel Fernandes 
8654a9671a0SJoel Fernandes err_undo:
8664a9671a0SJoel Fernandes 	/*
8674a9671a0SJoel Fernandes 	 * We really don't want to leave around a bunch of split blocks, since
8684a9671a0SJoel Fernandes 	 * bigger is better, so make sure we merge everything back before we
8694a9671a0SJoel Fernandes 	 * free the allocated blocks.
8704a9671a0SJoel Fernandes 	 */
8714a9671a0SJoel Fernandes 	buddy = __get_buddy(block);
8724a9671a0SJoel Fernandes 	if (buddy &&
873*ba110db8SJoel Fernandes 	    (gpu_buddy_block_is_free(block) &&
874*ba110db8SJoel Fernandes 	     gpu_buddy_block_is_free(buddy)))
875*ba110db8SJoel Fernandes 		__gpu_buddy_free(mm, block, false);
8764a9671a0SJoel Fernandes 
8774a9671a0SJoel Fernandes err_free:
8784a9671a0SJoel Fernandes 	if (err == -ENOSPC && total_allocated_on_err) {
8794a9671a0SJoel Fernandes 		list_splice_tail(&allocated, blocks);
8804a9671a0SJoel Fernandes 		*total_allocated_on_err = total_allocated;
8814a9671a0SJoel Fernandes 	} else {
882*ba110db8SJoel Fernandes 		gpu_buddy_free_list_internal(mm, &allocated);
8834a9671a0SJoel Fernandes 	}
8844a9671a0SJoel Fernandes 
8854a9671a0SJoel Fernandes 	return err;
8864a9671a0SJoel Fernandes }
8874a9671a0SJoel Fernandes 
888*ba110db8SJoel Fernandes static int __gpu_buddy_alloc_range(struct gpu_buddy *mm,
8894a9671a0SJoel Fernandes 				   u64 start,
8904a9671a0SJoel Fernandes 				   u64 size,
8914a9671a0SJoel Fernandes 				   u64 *total_allocated_on_err,
8924a9671a0SJoel Fernandes 				   struct list_head *blocks)
8934a9671a0SJoel Fernandes {
8944a9671a0SJoel Fernandes 	LIST_HEAD(dfs);
8954a9671a0SJoel Fernandes 	int i;
8964a9671a0SJoel Fernandes 
8974a9671a0SJoel Fernandes 	for (i = 0; i < mm->n_roots; ++i)
8984a9671a0SJoel Fernandes 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
8994a9671a0SJoel Fernandes 
9004a9671a0SJoel Fernandes 	return __alloc_range(mm, &dfs, start, size,
9014a9671a0SJoel Fernandes 			     blocks, total_allocated_on_err);
9024a9671a0SJoel Fernandes }
9034a9671a0SJoel Fernandes 
904*ba110db8SJoel Fernandes static int __alloc_contig_try_harder(struct gpu_buddy *mm,
9054a9671a0SJoel Fernandes 				     u64 size,
9064a9671a0SJoel Fernandes 				     u64 min_block_size,
9074a9671a0SJoel Fernandes 				     struct list_head *blocks)
9084a9671a0SJoel Fernandes {
9094a9671a0SJoel Fernandes 	u64 rhs_offset, lhs_offset, lhs_size, filled;
910*ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
9114a9671a0SJoel Fernandes 	unsigned int tree, order;
9124a9671a0SJoel Fernandes 	LIST_HEAD(blocks_lhs);
9134a9671a0SJoel Fernandes 	unsigned long pages;
9144a9671a0SJoel Fernandes 	u64 modify_size;
9154a9671a0SJoel Fernandes 	int err;
9164a9671a0SJoel Fernandes 
9174a9671a0SJoel Fernandes 	modify_size = rounddown_pow_of_two(size);
9184a9671a0SJoel Fernandes 	pages = modify_size >> ilog2(mm->chunk_size);
9194a9671a0SJoel Fernandes 	order = fls(pages) - 1;
9204a9671a0SJoel Fernandes 	if (order == 0)
9214a9671a0SJoel Fernandes 		return -ENOSPC;
9224a9671a0SJoel Fernandes 
9234a9671a0SJoel Fernandes 	for_each_free_tree(tree) {
9244a9671a0SJoel Fernandes 		struct rb_root *root;
9254a9671a0SJoel Fernandes 		struct rb_node *iter;
9264a9671a0SJoel Fernandes 
9274a9671a0SJoel Fernandes 		root = &mm->free_trees[tree][order];
9284a9671a0SJoel Fernandes 		if (rbtree_is_empty(root))
9294a9671a0SJoel Fernandes 			continue;
9304a9671a0SJoel Fernandes 
9314a9671a0SJoel Fernandes 		iter = rb_last(root);
9324a9671a0SJoel Fernandes 		while (iter) {
9334a9671a0SJoel Fernandes 			block = rbtree_get_free_block(iter);
9344a9671a0SJoel Fernandes 
9354a9671a0SJoel Fernandes 			/* Allocate blocks traversing RHS */
936*ba110db8SJoel Fernandes 			rhs_offset = gpu_buddy_block_offset(block);
937*ba110db8SJoel Fernandes 			err =  __gpu_buddy_alloc_range(mm, rhs_offset, size,
9384a9671a0SJoel Fernandes 						       &filled, blocks);
9394a9671a0SJoel Fernandes 			if (!err || err != -ENOSPC)
9404a9671a0SJoel Fernandes 				return err;
9414a9671a0SJoel Fernandes 
9424a9671a0SJoel Fernandes 			lhs_size = max((size - filled), min_block_size);
9434a9671a0SJoel Fernandes 			if (!IS_ALIGNED(lhs_size, min_block_size))
9444a9671a0SJoel Fernandes 				lhs_size = round_up(lhs_size, min_block_size);
9454a9671a0SJoel Fernandes 
9464a9671a0SJoel Fernandes 			/* Allocate blocks traversing LHS */
947*ba110db8SJoel Fernandes 			lhs_offset = gpu_buddy_block_offset(block) - lhs_size;
948*ba110db8SJoel Fernandes 			err =  __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size,
9494a9671a0SJoel Fernandes 						       NULL, &blocks_lhs);
9504a9671a0SJoel Fernandes 			if (!err) {
9514a9671a0SJoel Fernandes 				list_splice(&blocks_lhs, blocks);
9524a9671a0SJoel Fernandes 				return 0;
9534a9671a0SJoel Fernandes 			} else if (err != -ENOSPC) {
954*ba110db8SJoel Fernandes 				gpu_buddy_free_list_internal(mm, blocks);
9554a9671a0SJoel Fernandes 				return err;
9564a9671a0SJoel Fernandes 			}
9574a9671a0SJoel Fernandes 			/* Free blocks for the next iteration */
958*ba110db8SJoel Fernandes 			gpu_buddy_free_list_internal(mm, blocks);
9594a9671a0SJoel Fernandes 
9604a9671a0SJoel Fernandes 			iter = rb_prev(iter);
9614a9671a0SJoel Fernandes 		}
9624a9671a0SJoel Fernandes 	}
9634a9671a0SJoel Fernandes 
9644a9671a0SJoel Fernandes 	return -ENOSPC;
9654a9671a0SJoel Fernandes }
9664a9671a0SJoel Fernandes 
9674a9671a0SJoel Fernandes /**
968*ba110db8SJoel Fernandes  * gpu_buddy_block_trim - free unused pages
9694a9671a0SJoel Fernandes  *
970*ba110db8SJoel Fernandes  * @mm: GPU buddy manager
9714a9671a0SJoel Fernandes  * @start: start address to begin the trimming.
9724a9671a0SJoel Fernandes  * @new_size: original size requested
9734a9671a0SJoel Fernandes  * @blocks: Input and output list of allocated blocks.
9744a9671a0SJoel Fernandes  * MUST contain single block as input to be trimmed.
9754a9671a0SJoel Fernandes  * On success will contain the newly allocated blocks
9764a9671a0SJoel Fernandes  * making up the @new_size. Blocks always appear in
9774a9671a0SJoel Fernandes  * ascending order
9784a9671a0SJoel Fernandes  *
9794a9671a0SJoel Fernandes  * For contiguous allocation, we round up the size to the nearest
9804a9671a0SJoel Fernandes  * power of two value, drivers consume *actual* size, so remaining
9814a9671a0SJoel Fernandes  * portions are unused and can be optionally freed with this function
9824a9671a0SJoel Fernandes  *
9834a9671a0SJoel Fernandes  * Returns:
9844a9671a0SJoel Fernandes  * 0 on success, error code on failure.
9854a9671a0SJoel Fernandes  */
986*ba110db8SJoel Fernandes int gpu_buddy_block_trim(struct gpu_buddy *mm,
9874a9671a0SJoel Fernandes 			 u64 *start,
9884a9671a0SJoel Fernandes 			 u64 new_size,
9894a9671a0SJoel Fernandes 			 struct list_head *blocks)
9904a9671a0SJoel Fernandes {
991*ba110db8SJoel Fernandes 	struct gpu_buddy_block *parent;
992*ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
9934a9671a0SJoel Fernandes 	u64 block_start, block_end;
9944a9671a0SJoel Fernandes 	LIST_HEAD(dfs);
9954a9671a0SJoel Fernandes 	u64 new_start;
9964a9671a0SJoel Fernandes 	int err;
9974a9671a0SJoel Fernandes 
9984a9671a0SJoel Fernandes 	if (!list_is_singular(blocks))
9994a9671a0SJoel Fernandes 		return -EINVAL;
10004a9671a0SJoel Fernandes 
10014a9671a0SJoel Fernandes 	block = list_first_entry(blocks,
1002*ba110db8SJoel Fernandes 				 struct gpu_buddy_block,
10034a9671a0SJoel Fernandes 				 link);
10044a9671a0SJoel Fernandes 
1005*ba110db8SJoel Fernandes 	block_start = gpu_buddy_block_offset(block);
1006*ba110db8SJoel Fernandes 	block_end = block_start + gpu_buddy_block_size(mm, block);
10074a9671a0SJoel Fernandes 
1008*ba110db8SJoel Fernandes 	if (WARN_ON(!gpu_buddy_block_is_allocated(block)))
10094a9671a0SJoel Fernandes 		return -EINVAL;
10104a9671a0SJoel Fernandes 
1011*ba110db8SJoel Fernandes 	if (new_size > gpu_buddy_block_size(mm, block))
10124a9671a0SJoel Fernandes 		return -EINVAL;
10134a9671a0SJoel Fernandes 
10144a9671a0SJoel Fernandes 	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
10154a9671a0SJoel Fernandes 		return -EINVAL;
10164a9671a0SJoel Fernandes 
1017*ba110db8SJoel Fernandes 	if (new_size == gpu_buddy_block_size(mm, block))
10184a9671a0SJoel Fernandes 		return 0;
10194a9671a0SJoel Fernandes 
10204a9671a0SJoel Fernandes 	new_start = block_start;
10214a9671a0SJoel Fernandes 	if (start) {
10224a9671a0SJoel Fernandes 		new_start = *start;
10234a9671a0SJoel Fernandes 
10244a9671a0SJoel Fernandes 		if (new_start < block_start)
10254a9671a0SJoel Fernandes 			return -EINVAL;
10264a9671a0SJoel Fernandes 
10274a9671a0SJoel Fernandes 		if (!IS_ALIGNED(new_start, mm->chunk_size))
10284a9671a0SJoel Fernandes 			return -EINVAL;
10294a9671a0SJoel Fernandes 
10304a9671a0SJoel Fernandes 		if (range_overflows(new_start, new_size, block_end))
10314a9671a0SJoel Fernandes 			return -EINVAL;
10324a9671a0SJoel Fernandes 	}
10334a9671a0SJoel Fernandes 
10344a9671a0SJoel Fernandes 	list_del(&block->link);
10354a9671a0SJoel Fernandes 	mark_free(mm, block);
1036*ba110db8SJoel Fernandes 	mm->avail += gpu_buddy_block_size(mm, block);
1037*ba110db8SJoel Fernandes 	if (gpu_buddy_block_is_clear(block))
1038*ba110db8SJoel Fernandes 		mm->clear_avail += gpu_buddy_block_size(mm, block);
10394a9671a0SJoel Fernandes 
10404a9671a0SJoel Fernandes 	/* Prevent recursively freeing this node */
10414a9671a0SJoel Fernandes 	parent = block->parent;
10424a9671a0SJoel Fernandes 	block->parent = NULL;
10434a9671a0SJoel Fernandes 
10444a9671a0SJoel Fernandes 	list_add(&block->tmp_link, &dfs);
10454a9671a0SJoel Fernandes 	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
10464a9671a0SJoel Fernandes 	if (err) {
10474a9671a0SJoel Fernandes 		mark_allocated(mm, block);
1048*ba110db8SJoel Fernandes 		mm->avail -= gpu_buddy_block_size(mm, block);
1049*ba110db8SJoel Fernandes 		if (gpu_buddy_block_is_clear(block))
1050*ba110db8SJoel Fernandes 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
10514a9671a0SJoel Fernandes 		list_add(&block->link, blocks);
10524a9671a0SJoel Fernandes 	}
10534a9671a0SJoel Fernandes 
10544a9671a0SJoel Fernandes 	block->parent = parent;
10554a9671a0SJoel Fernandes 	return err;
10564a9671a0SJoel Fernandes }
1057*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_block_trim);
10584a9671a0SJoel Fernandes 
1059*ba110db8SJoel Fernandes static struct gpu_buddy_block *
1060*ba110db8SJoel Fernandes __gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
10614a9671a0SJoel Fernandes 			 u64 start, u64 end,
10624a9671a0SJoel Fernandes 			 unsigned int order,
10634a9671a0SJoel Fernandes 			 unsigned long flags)
10644a9671a0SJoel Fernandes {
1065*ba110db8SJoel Fernandes 	if (flags & GPU_BUDDY_RANGE_ALLOCATION)
10664a9671a0SJoel Fernandes 		/* Allocate traversing within the range */
1067*ba110db8SJoel Fernandes 		return  __gpu_buddy_alloc_range_bias(mm, start, end,
10684a9671a0SJoel Fernandes 						     order, flags);
10694a9671a0SJoel Fernandes 	else
10704a9671a0SJoel Fernandes 		/* Allocate from freetree */
10714a9671a0SJoel Fernandes 		return alloc_from_freetree(mm, order, flags);
10724a9671a0SJoel Fernandes }
10734a9671a0SJoel Fernandes 
10744a9671a0SJoel Fernandes /**
1075*ba110db8SJoel Fernandes  * gpu_buddy_alloc_blocks - allocate power-of-two blocks
10764a9671a0SJoel Fernandes  *
1077*ba110db8SJoel Fernandes  * @mm: GPU buddy manager to allocate from
10784a9671a0SJoel Fernandes  * @start: start of the allowed range for this block
10794a9671a0SJoel Fernandes  * @end: end of the allowed range for this block
10804a9671a0SJoel Fernandes  * @size: size of the allocation in bytes
10814a9671a0SJoel Fernandes  * @min_block_size: alignment of the allocation
10824a9671a0SJoel Fernandes  * @blocks: output list head to add allocated blocks
1083*ba110db8SJoel Fernandes  * @flags: GPU_BUDDY_*_ALLOCATION flags
10844a9671a0SJoel Fernandes  *
10854a9671a0SJoel Fernandes  * alloc_range_bias() called on range limitations, which traverses
10864a9671a0SJoel Fernandes  * the tree and returns the desired block.
10874a9671a0SJoel Fernandes  *
10884a9671a0SJoel Fernandes  * alloc_from_freetree() called when *no* range restrictions
10894a9671a0SJoel Fernandes  * are enforced, which picks the block from the freetree.
10904a9671a0SJoel Fernandes  *
10914a9671a0SJoel Fernandes  * Returns:
10924a9671a0SJoel Fernandes  * 0 on success, error code on failure.
10934a9671a0SJoel Fernandes  */
1094*ba110db8SJoel Fernandes int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
10954a9671a0SJoel Fernandes 			   u64 start, u64 end, u64 size,
10964a9671a0SJoel Fernandes 			   u64 min_block_size,
10974a9671a0SJoel Fernandes 			   struct list_head *blocks,
10984a9671a0SJoel Fernandes 			   unsigned long flags)
10994a9671a0SJoel Fernandes {
1100*ba110db8SJoel Fernandes 	struct gpu_buddy_block *block = NULL;
11014a9671a0SJoel Fernandes 	u64 original_size, original_min_size;
11024a9671a0SJoel Fernandes 	unsigned int min_order, order;
11034a9671a0SJoel Fernandes 	LIST_HEAD(allocated);
11044a9671a0SJoel Fernandes 	unsigned long pages;
11054a9671a0SJoel Fernandes 	int err;
11064a9671a0SJoel Fernandes 
11074a9671a0SJoel Fernandes 	if (size < mm->chunk_size)
11084a9671a0SJoel Fernandes 		return -EINVAL;
11094a9671a0SJoel Fernandes 
11104a9671a0SJoel Fernandes 	if (min_block_size < mm->chunk_size)
11114a9671a0SJoel Fernandes 		return -EINVAL;
11124a9671a0SJoel Fernandes 
11134a9671a0SJoel Fernandes 	if (!is_power_of_2(min_block_size))
11144a9671a0SJoel Fernandes 		return -EINVAL;
11154a9671a0SJoel Fernandes 
11164a9671a0SJoel Fernandes 	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
11174a9671a0SJoel Fernandes 		return -EINVAL;
11184a9671a0SJoel Fernandes 
11194a9671a0SJoel Fernandes 	if (end > mm->size)
11204a9671a0SJoel Fernandes 		return -EINVAL;
11214a9671a0SJoel Fernandes 
11224a9671a0SJoel Fernandes 	if (range_overflows(start, size, mm->size))
11234a9671a0SJoel Fernandes 		return -EINVAL;
11244a9671a0SJoel Fernandes 
11254a9671a0SJoel Fernandes 	/* Actual range allocation */
11264a9671a0SJoel Fernandes 	if (start + size == end) {
11274a9671a0SJoel Fernandes 		if (!IS_ALIGNED(start | end, min_block_size))
11284a9671a0SJoel Fernandes 			return -EINVAL;
11294a9671a0SJoel Fernandes 
1130*ba110db8SJoel Fernandes 		return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks);
11314a9671a0SJoel Fernandes 	}
11324a9671a0SJoel Fernandes 
11334a9671a0SJoel Fernandes 	original_size = size;
11344a9671a0SJoel Fernandes 	original_min_size = min_block_size;
11354a9671a0SJoel Fernandes 
11364a9671a0SJoel Fernandes 	/* Roundup the size to power of 2 */
1137*ba110db8SJoel Fernandes 	if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) {
11384a9671a0SJoel Fernandes 		size = roundup_pow_of_two(size);
11394a9671a0SJoel Fernandes 		min_block_size = size;
11404a9671a0SJoel Fernandes 	/* Align size value to min_block_size */
11414a9671a0SJoel Fernandes 	} else if (!IS_ALIGNED(size, min_block_size)) {
11424a9671a0SJoel Fernandes 		size = round_up(size, min_block_size);
11434a9671a0SJoel Fernandes 	}
11444a9671a0SJoel Fernandes 
11454a9671a0SJoel Fernandes 	pages = size >> ilog2(mm->chunk_size);
11464a9671a0SJoel Fernandes 	order = fls(pages) - 1;
11474a9671a0SJoel Fernandes 	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
11484a9671a0SJoel Fernandes 
11494a9671a0SJoel Fernandes 	if (order > mm->max_order || size > mm->size) {
1150*ba110db8SJoel Fernandes 		if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) &&
1151*ba110db8SJoel Fernandes 		    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
11524a9671a0SJoel Fernandes 			return __alloc_contig_try_harder(mm, original_size,
11534a9671a0SJoel Fernandes 							 original_min_size, blocks);
11544a9671a0SJoel Fernandes 
11554a9671a0SJoel Fernandes 		return -EINVAL;
11564a9671a0SJoel Fernandes 	}
11574a9671a0SJoel Fernandes 
11584a9671a0SJoel Fernandes 	do {
11594a9671a0SJoel Fernandes 		order = min(order, (unsigned int)fls(pages) - 1);
11604a9671a0SJoel Fernandes 		BUG_ON(order > mm->max_order);
11614a9671a0SJoel Fernandes 		BUG_ON(order < min_order);
11624a9671a0SJoel Fernandes 
11634a9671a0SJoel Fernandes 		do {
1164*ba110db8SJoel Fernandes 			block = __gpu_buddy_alloc_blocks(mm, start,
11654a9671a0SJoel Fernandes 							 end,
11664a9671a0SJoel Fernandes 							 order,
11674a9671a0SJoel Fernandes 							 flags);
11684a9671a0SJoel Fernandes 			if (!IS_ERR(block))
11694a9671a0SJoel Fernandes 				break;
11704a9671a0SJoel Fernandes 
11714a9671a0SJoel Fernandes 			if (order-- == min_order) {
11724a9671a0SJoel Fernandes 				/* Try allocation through force merge method */
11734a9671a0SJoel Fernandes 				if (mm->clear_avail &&
11744a9671a0SJoel Fernandes 				    !__force_merge(mm, start, end, min_order)) {
1175*ba110db8SJoel Fernandes 					block = __gpu_buddy_alloc_blocks(mm, start,
11764a9671a0SJoel Fernandes 									 end,
11774a9671a0SJoel Fernandes 									 min_order,
11784a9671a0SJoel Fernandes 									 flags);
11794a9671a0SJoel Fernandes 					if (!IS_ERR(block)) {
11804a9671a0SJoel Fernandes 						order = min_order;
11814a9671a0SJoel Fernandes 						break;
11824a9671a0SJoel Fernandes 					}
11834a9671a0SJoel Fernandes 				}
11844a9671a0SJoel Fernandes 
11854a9671a0SJoel Fernandes 				/*
11864a9671a0SJoel Fernandes 				 * Try contiguous block allocation through
11874a9671a0SJoel Fernandes 				 * try harder method.
11884a9671a0SJoel Fernandes 				 */
1189*ba110db8SJoel Fernandes 				if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
1190*ba110db8SJoel Fernandes 				    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
11914a9671a0SJoel Fernandes 					return __alloc_contig_try_harder(mm,
11924a9671a0SJoel Fernandes 									 original_size,
11934a9671a0SJoel Fernandes 									 original_min_size,
11944a9671a0SJoel Fernandes 									 blocks);
11954a9671a0SJoel Fernandes 				err = -ENOSPC;
11964a9671a0SJoel Fernandes 				goto err_free;
11974a9671a0SJoel Fernandes 			}
11984a9671a0SJoel Fernandes 		} while (1);
11994a9671a0SJoel Fernandes 
12004a9671a0SJoel Fernandes 		mark_allocated(mm, block);
1201*ba110db8SJoel Fernandes 		mm->avail -= gpu_buddy_block_size(mm, block);
1202*ba110db8SJoel Fernandes 		if (gpu_buddy_block_is_clear(block))
1203*ba110db8SJoel Fernandes 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
12044a9671a0SJoel Fernandes 		kmemleak_update_trace(block);
12054a9671a0SJoel Fernandes 		list_add_tail(&block->link, &allocated);
12064a9671a0SJoel Fernandes 
12074a9671a0SJoel Fernandes 		pages -= BIT(order);
12084a9671a0SJoel Fernandes 
12094a9671a0SJoel Fernandes 		if (!pages)
12104a9671a0SJoel Fernandes 			break;
12114a9671a0SJoel Fernandes 	} while (1);
12124a9671a0SJoel Fernandes 
12134a9671a0SJoel Fernandes 	/* Trim the allocated block to the required size */
1214*ba110db8SJoel Fernandes 	if (!(flags & GPU_BUDDY_TRIM_DISABLE) &&
12154a9671a0SJoel Fernandes 	    original_size != size) {
12164a9671a0SJoel Fernandes 		struct list_head *trim_list;
12174a9671a0SJoel Fernandes 		LIST_HEAD(temp);
12184a9671a0SJoel Fernandes 		u64 trim_size;
12194a9671a0SJoel Fernandes 
12204a9671a0SJoel Fernandes 		trim_list = &allocated;
12214a9671a0SJoel Fernandes 		trim_size = original_size;
12224a9671a0SJoel Fernandes 
12234a9671a0SJoel Fernandes 		if (!list_is_singular(&allocated)) {
12244a9671a0SJoel Fernandes 			block = list_last_entry(&allocated, typeof(*block), link);
12254a9671a0SJoel Fernandes 			list_move(&block->link, &temp);
12264a9671a0SJoel Fernandes 			trim_list = &temp;
1227*ba110db8SJoel Fernandes 			trim_size = gpu_buddy_block_size(mm, block) -
12284a9671a0SJoel Fernandes 				(size - original_size);
12294a9671a0SJoel Fernandes 		}
12304a9671a0SJoel Fernandes 
1231*ba110db8SJoel Fernandes 		gpu_buddy_block_trim(mm,
12324a9671a0SJoel Fernandes 				     NULL,
12334a9671a0SJoel Fernandes 				     trim_size,
12344a9671a0SJoel Fernandes 				     trim_list);
12354a9671a0SJoel Fernandes 
12364a9671a0SJoel Fernandes 		if (!list_empty(&temp))
12374a9671a0SJoel Fernandes 			list_splice_tail(trim_list, &allocated);
12384a9671a0SJoel Fernandes 	}
12394a9671a0SJoel Fernandes 
12404a9671a0SJoel Fernandes 	list_splice_tail(&allocated, blocks);
12414a9671a0SJoel Fernandes 	return 0;
12424a9671a0SJoel Fernandes 
12434a9671a0SJoel Fernandes err_free:
1244*ba110db8SJoel Fernandes 	gpu_buddy_free_list_internal(mm, &allocated);
12454a9671a0SJoel Fernandes 	return err;
12464a9671a0SJoel Fernandes }
1247*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_alloc_blocks);
12484a9671a0SJoel Fernandes 
12494a9671a0SJoel Fernandes /**
1250*ba110db8SJoel Fernandes  * gpu_buddy_block_print - print block information
12514a9671a0SJoel Fernandes  *
1252*ba110db8SJoel Fernandes  * @mm: GPU buddy manager
1253*ba110db8SJoel Fernandes  * @block: GPU buddy block
12544a9671a0SJoel Fernandes  */
1255*ba110db8SJoel Fernandes void gpu_buddy_block_print(struct gpu_buddy *mm,
1256*ba110db8SJoel Fernandes 			   struct gpu_buddy_block *block)
12574a9671a0SJoel Fernandes {
1258*ba110db8SJoel Fernandes 	u64 start = gpu_buddy_block_offset(block);
1259*ba110db8SJoel Fernandes 	u64 size = gpu_buddy_block_size(mm, block);
12604a9671a0SJoel Fernandes 
1261*ba110db8SJoel Fernandes 	pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size);
12624a9671a0SJoel Fernandes }
1263*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_block_print);
12644a9671a0SJoel Fernandes 
12654a9671a0SJoel Fernandes /**
1266*ba110db8SJoel Fernandes  * gpu_buddy_print - print allocator state
12674a9671a0SJoel Fernandes  *
1268*ba110db8SJoel Fernandes  * @mm: GPU buddy manager
1269*ba110db8SJoel Fernandes  * @p: GPU printer to use
12704a9671a0SJoel Fernandes  */
1271*ba110db8SJoel Fernandes void gpu_buddy_print(struct gpu_buddy *mm)
12724a9671a0SJoel Fernandes {
12734a9671a0SJoel Fernandes 	int order;
12744a9671a0SJoel Fernandes 
1275*ba110db8SJoel Fernandes 	pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
12764a9671a0SJoel Fernandes 		mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
12774a9671a0SJoel Fernandes 
12784a9671a0SJoel Fernandes 	for (order = mm->max_order; order >= 0; order--) {
1279*ba110db8SJoel Fernandes 		struct gpu_buddy_block *block, *tmp;
12804a9671a0SJoel Fernandes 		struct rb_root *root;
12814a9671a0SJoel Fernandes 		u64 count = 0, free;
12824a9671a0SJoel Fernandes 		unsigned int tree;
12834a9671a0SJoel Fernandes 
12844a9671a0SJoel Fernandes 		for_each_free_tree(tree) {
12854a9671a0SJoel Fernandes 			root = &mm->free_trees[tree][order];
12864a9671a0SJoel Fernandes 
12874a9671a0SJoel Fernandes 			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
1288*ba110db8SJoel Fernandes 				BUG_ON(!gpu_buddy_block_is_free(block));
12894a9671a0SJoel Fernandes 				count++;
12904a9671a0SJoel Fernandes 			}
12914a9671a0SJoel Fernandes 		}
12924a9671a0SJoel Fernandes 
12934a9671a0SJoel Fernandes 		free = count * (mm->chunk_size << order);
12944a9671a0SJoel Fernandes 		if (free < SZ_1M)
1295*ba110db8SJoel Fernandes 			pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
1296*ba110db8SJoel Fernandes 				order, free >> 10, count);
12974a9671a0SJoel Fernandes 		else
1298*ba110db8SJoel Fernandes 			pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
1299*ba110db8SJoel Fernandes 				order, free >> 20, count);
13004a9671a0SJoel Fernandes 	}
13014a9671a0SJoel Fernandes }
1302*ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_print);
13034a9671a0SJoel Fernandes 
1304*ba110db8SJoel Fernandes static void gpu_buddy_module_exit(void)
13054a9671a0SJoel Fernandes {
13064a9671a0SJoel Fernandes 	kmem_cache_destroy(slab_blocks);
13074a9671a0SJoel Fernandes }
13084a9671a0SJoel Fernandes 
1309*ba110db8SJoel Fernandes static int __init gpu_buddy_module_init(void)
13104a9671a0SJoel Fernandes {
1311*ba110db8SJoel Fernandes 	slab_blocks = KMEM_CACHE(gpu_buddy_block, 0);
13124a9671a0SJoel Fernandes 	if (!slab_blocks)
13134a9671a0SJoel Fernandes 		return -ENOMEM;
13144a9671a0SJoel Fernandes 
13154a9671a0SJoel Fernandes 	return 0;
13164a9671a0SJoel Fernandes }
13174a9671a0SJoel Fernandes 
1318*ba110db8SJoel Fernandes module_init(gpu_buddy_module_init);
1319*ba110db8SJoel Fernandes module_exit(gpu_buddy_module_exit);
13204a9671a0SJoel Fernandes 
1321*ba110db8SJoel Fernandes MODULE_DESCRIPTION("GPU Buddy Allocator");
13224a9671a0SJoel Fernandes MODULE_LICENSE("Dual MIT/GPL");
1323