xref: /linux/drivers/gpu/buddy.c (revision df8c7892e06efa5df2aa780a338f33a4f666370b)
14a9671a0SJoel Fernandes // SPDX-License-Identifier: MIT
24a9671a0SJoel Fernandes /*
34a9671a0SJoel Fernandes  * Copyright © 2021 Intel Corporation
44a9671a0SJoel Fernandes  */
54a9671a0SJoel Fernandes 
64a9671a0SJoel Fernandes #include <kunit/test-bug.h>
74a9671a0SJoel Fernandes 
84a9671a0SJoel Fernandes #include <linux/export.h>
94a9671a0SJoel Fernandes #include <linux/kmemleak.h>
104a9671a0SJoel Fernandes #include <linux/module.h>
114a9671a0SJoel Fernandes #include <linux/sizes.h>
124a9671a0SJoel Fernandes 
134a9671a0SJoel Fernandes #include <linux/gpu_buddy.h>
144a9671a0SJoel Fernandes 
154a9671a0SJoel Fernandes static struct kmem_cache *slab_blocks;
164a9671a0SJoel Fernandes 
17*df8c7892SSanjay Yadav static unsigned int
18*df8c7892SSanjay Yadav gpu_buddy_block_state(struct gpu_buddy_block *block)
19*df8c7892SSanjay Yadav {
20*df8c7892SSanjay Yadav 	return block->header & GPU_BUDDY_HEADER_STATE;
21*df8c7892SSanjay Yadav }
22*df8c7892SSanjay Yadav 
23*df8c7892SSanjay Yadav static bool
24*df8c7892SSanjay Yadav gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
25*df8c7892SSanjay Yadav {
26*df8c7892SSanjay Yadav 	return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
27*df8c7892SSanjay Yadav }
28*df8c7892SSanjay Yadav 
29*df8c7892SSanjay Yadav static bool
30*df8c7892SSanjay Yadav gpu_buddy_block_is_split(struct gpu_buddy_block *block)
31*df8c7892SSanjay Yadav {
32*df8c7892SSanjay Yadav 	return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
33*df8c7892SSanjay Yadav }
34*df8c7892SSanjay Yadav 
35ba110db8SJoel Fernandes static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
36ba110db8SJoel Fernandes 					       struct gpu_buddy_block *parent,
374a9671a0SJoel Fernandes 					       unsigned int order,
384a9671a0SJoel Fernandes 					       u64 offset)
394a9671a0SJoel Fernandes {
40ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
414a9671a0SJoel Fernandes 
42ba110db8SJoel Fernandes 	BUG_ON(order > GPU_BUDDY_MAX_ORDER);
434a9671a0SJoel Fernandes 
444a9671a0SJoel Fernandes 	block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
454a9671a0SJoel Fernandes 	if (!block)
464a9671a0SJoel Fernandes 		return NULL;
474a9671a0SJoel Fernandes 
484a9671a0SJoel Fernandes 	block->header = offset;
494a9671a0SJoel Fernandes 	block->header |= order;
504a9671a0SJoel Fernandes 	block->parent = parent;
514a9671a0SJoel Fernandes 
524a9671a0SJoel Fernandes 	RB_CLEAR_NODE(&block->rb);
534a9671a0SJoel Fernandes 
54ba110db8SJoel Fernandes 	BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED);
554a9671a0SJoel Fernandes 	return block;
564a9671a0SJoel Fernandes }
574a9671a0SJoel Fernandes 
58ba110db8SJoel Fernandes static void gpu_block_free(struct gpu_buddy *mm,
59ba110db8SJoel Fernandes 			   struct gpu_buddy_block *block)
604a9671a0SJoel Fernandes {
614a9671a0SJoel Fernandes 	kmem_cache_free(slab_blocks, block);
624a9671a0SJoel Fernandes }
634a9671a0SJoel Fernandes 
64ba110db8SJoel Fernandes static enum gpu_buddy_free_tree
65ba110db8SJoel Fernandes get_block_tree(struct gpu_buddy_block *block)
664a9671a0SJoel Fernandes {
67ba110db8SJoel Fernandes 	return gpu_buddy_block_is_clear(block) ?
68ba110db8SJoel Fernandes 	       GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
694a9671a0SJoel Fernandes }
704a9671a0SJoel Fernandes 
71ba110db8SJoel Fernandes static struct gpu_buddy_block *
724a9671a0SJoel Fernandes rbtree_get_free_block(const struct rb_node *node)
734a9671a0SJoel Fernandes {
74ba110db8SJoel Fernandes 	return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL;
754a9671a0SJoel Fernandes }
764a9671a0SJoel Fernandes 
77ba110db8SJoel Fernandes static struct gpu_buddy_block *
784a9671a0SJoel Fernandes rbtree_last_free_block(struct rb_root *root)
794a9671a0SJoel Fernandes {
804a9671a0SJoel Fernandes 	return rbtree_get_free_block(rb_last(root));
814a9671a0SJoel Fernandes }
824a9671a0SJoel Fernandes 
834a9671a0SJoel Fernandes static bool rbtree_is_empty(struct rb_root *root)
844a9671a0SJoel Fernandes {
854a9671a0SJoel Fernandes 	return RB_EMPTY_ROOT(root);
864a9671a0SJoel Fernandes }
874a9671a0SJoel Fernandes 
88ba110db8SJoel Fernandes static bool gpu_buddy_block_offset_less(const struct gpu_buddy_block *block,
89ba110db8SJoel Fernandes 					const struct gpu_buddy_block *node)
904a9671a0SJoel Fernandes {
91ba110db8SJoel Fernandes 	return gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node);
924a9671a0SJoel Fernandes }
934a9671a0SJoel Fernandes 
944a9671a0SJoel Fernandes static bool rbtree_block_offset_less(struct rb_node *block,
954a9671a0SJoel Fernandes 				     const struct rb_node *node)
964a9671a0SJoel Fernandes {
97ba110db8SJoel Fernandes 	return gpu_buddy_block_offset_less(rbtree_get_free_block(block),
984a9671a0SJoel Fernandes 					   rbtree_get_free_block(node));
994a9671a0SJoel Fernandes }
1004a9671a0SJoel Fernandes 
101ba110db8SJoel Fernandes static void rbtree_insert(struct gpu_buddy *mm,
102ba110db8SJoel Fernandes 			  struct gpu_buddy_block *block,
103ba110db8SJoel Fernandes 			  enum gpu_buddy_free_tree tree)
1044a9671a0SJoel Fernandes {
1054a9671a0SJoel Fernandes 	rb_add(&block->rb,
106ba110db8SJoel Fernandes 	       &mm->free_trees[tree][gpu_buddy_block_order(block)],
1074a9671a0SJoel Fernandes 	       rbtree_block_offset_less);
1084a9671a0SJoel Fernandes }
1094a9671a0SJoel Fernandes 
110ba110db8SJoel Fernandes static void rbtree_remove(struct gpu_buddy *mm,
111ba110db8SJoel Fernandes 			  struct gpu_buddy_block *block)
1124a9671a0SJoel Fernandes {
113ba110db8SJoel Fernandes 	unsigned int order = gpu_buddy_block_order(block);
114ba110db8SJoel Fernandes 	enum gpu_buddy_free_tree tree;
1154a9671a0SJoel Fernandes 	struct rb_root *root;
1164a9671a0SJoel Fernandes 
1174a9671a0SJoel Fernandes 	tree = get_block_tree(block);
1184a9671a0SJoel Fernandes 	root = &mm->free_trees[tree][order];
1194a9671a0SJoel Fernandes 
1204a9671a0SJoel Fernandes 	rb_erase(&block->rb, root);
1214a9671a0SJoel Fernandes 	RB_CLEAR_NODE(&block->rb);
1224a9671a0SJoel Fernandes }
1234a9671a0SJoel Fernandes 
124ba110db8SJoel Fernandes static void clear_reset(struct gpu_buddy_block *block)
1254a9671a0SJoel Fernandes {
126ba110db8SJoel Fernandes 	block->header &= ~GPU_BUDDY_HEADER_CLEAR;
1274a9671a0SJoel Fernandes }
1284a9671a0SJoel Fernandes 
129ba110db8SJoel Fernandes static void mark_cleared(struct gpu_buddy_block *block)
1304a9671a0SJoel Fernandes {
131ba110db8SJoel Fernandes 	block->header |= GPU_BUDDY_HEADER_CLEAR;
1324a9671a0SJoel Fernandes }
1334a9671a0SJoel Fernandes 
134ba110db8SJoel Fernandes static void mark_allocated(struct gpu_buddy *mm,
135ba110db8SJoel Fernandes 			   struct gpu_buddy_block *block)
1364a9671a0SJoel Fernandes {
137ba110db8SJoel Fernandes 	block->header &= ~GPU_BUDDY_HEADER_STATE;
138ba110db8SJoel Fernandes 	block->header |= GPU_BUDDY_ALLOCATED;
1394a9671a0SJoel Fernandes 
1404a9671a0SJoel Fernandes 	rbtree_remove(mm, block);
1414a9671a0SJoel Fernandes }
1424a9671a0SJoel Fernandes 
143ba110db8SJoel Fernandes static void mark_free(struct gpu_buddy *mm,
144ba110db8SJoel Fernandes 		      struct gpu_buddy_block *block)
1454a9671a0SJoel Fernandes {
146ba110db8SJoel Fernandes 	enum gpu_buddy_free_tree tree;
1474a9671a0SJoel Fernandes 
148ba110db8SJoel Fernandes 	block->header &= ~GPU_BUDDY_HEADER_STATE;
149ba110db8SJoel Fernandes 	block->header |= GPU_BUDDY_FREE;
1504a9671a0SJoel Fernandes 
1514a9671a0SJoel Fernandes 	tree = get_block_tree(block);
1524a9671a0SJoel Fernandes 	rbtree_insert(mm, block, tree);
1534a9671a0SJoel Fernandes }
1544a9671a0SJoel Fernandes 
155ba110db8SJoel Fernandes static void mark_split(struct gpu_buddy *mm,
156ba110db8SJoel Fernandes 		       struct gpu_buddy_block *block)
1574a9671a0SJoel Fernandes {
158ba110db8SJoel Fernandes 	block->header &= ~GPU_BUDDY_HEADER_STATE;
159ba110db8SJoel Fernandes 	block->header |= GPU_BUDDY_SPLIT;
1604a9671a0SJoel Fernandes 
1614a9671a0SJoel Fernandes 	rbtree_remove(mm, block);
1624a9671a0SJoel Fernandes }
1634a9671a0SJoel Fernandes 
1644a9671a0SJoel Fernandes static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
1654a9671a0SJoel Fernandes {
1664a9671a0SJoel Fernandes 	return s1 <= e2 && e1 >= s2;
1674a9671a0SJoel Fernandes }
1684a9671a0SJoel Fernandes 
1694a9671a0SJoel Fernandes static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
1704a9671a0SJoel Fernandes {
1714a9671a0SJoel Fernandes 	return s1 <= s2 && e1 >= e2;
1724a9671a0SJoel Fernandes }
1734a9671a0SJoel Fernandes 
174ba110db8SJoel Fernandes static struct gpu_buddy_block *
175ba110db8SJoel Fernandes __get_buddy(struct gpu_buddy_block *block)
1764a9671a0SJoel Fernandes {
177ba110db8SJoel Fernandes 	struct gpu_buddy_block *parent;
1784a9671a0SJoel Fernandes 
1794a9671a0SJoel Fernandes 	parent = block->parent;
1804a9671a0SJoel Fernandes 	if (!parent)
1814a9671a0SJoel Fernandes 		return NULL;
1824a9671a0SJoel Fernandes 
1834a9671a0SJoel Fernandes 	if (parent->left == block)
1844a9671a0SJoel Fernandes 		return parent->right;
1854a9671a0SJoel Fernandes 
1864a9671a0SJoel Fernandes 	return parent->left;
1874a9671a0SJoel Fernandes }
1884a9671a0SJoel Fernandes 
189ba110db8SJoel Fernandes static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
190ba110db8SJoel Fernandes 				     struct gpu_buddy_block *block,
1914a9671a0SJoel Fernandes 				     bool force_merge)
1924a9671a0SJoel Fernandes {
193ba110db8SJoel Fernandes 	struct gpu_buddy_block *parent;
1944a9671a0SJoel Fernandes 	unsigned int order;
1954a9671a0SJoel Fernandes 
1964a9671a0SJoel Fernandes 	while ((parent = block->parent)) {
197ba110db8SJoel Fernandes 		struct gpu_buddy_block *buddy;
1984a9671a0SJoel Fernandes 
1994a9671a0SJoel Fernandes 		buddy = __get_buddy(block);
2004a9671a0SJoel Fernandes 
201ba110db8SJoel Fernandes 		if (!gpu_buddy_block_is_free(buddy))
2024a9671a0SJoel Fernandes 			break;
2034a9671a0SJoel Fernandes 
2044a9671a0SJoel Fernandes 		if (!force_merge) {
2054a9671a0SJoel Fernandes 			/*
2064a9671a0SJoel Fernandes 			 * Check the block and its buddy clear state and exit
2074a9671a0SJoel Fernandes 			 * the loop if they both have the dissimilar state.
2084a9671a0SJoel Fernandes 			 */
209ba110db8SJoel Fernandes 			if (gpu_buddy_block_is_clear(block) !=
210ba110db8SJoel Fernandes 			    gpu_buddy_block_is_clear(buddy))
2114a9671a0SJoel Fernandes 				break;
2124a9671a0SJoel Fernandes 
213ba110db8SJoel Fernandes 			if (gpu_buddy_block_is_clear(block))
2144a9671a0SJoel Fernandes 				mark_cleared(parent);
2154a9671a0SJoel Fernandes 		}
2164a9671a0SJoel Fernandes 
2174a9671a0SJoel Fernandes 		rbtree_remove(mm, buddy);
218ba110db8SJoel Fernandes 		if (force_merge && gpu_buddy_block_is_clear(buddy))
219ba110db8SJoel Fernandes 			mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
2204a9671a0SJoel Fernandes 
221ba110db8SJoel Fernandes 		gpu_block_free(mm, block);
222ba110db8SJoel Fernandes 		gpu_block_free(mm, buddy);
2234a9671a0SJoel Fernandes 
2244a9671a0SJoel Fernandes 		block = parent;
2254a9671a0SJoel Fernandes 	}
2264a9671a0SJoel Fernandes 
227ba110db8SJoel Fernandes 	order = gpu_buddy_block_order(block);
2284a9671a0SJoel Fernandes 	mark_free(mm, block);
2294a9671a0SJoel Fernandes 
2304a9671a0SJoel Fernandes 	return order;
2314a9671a0SJoel Fernandes }
2324a9671a0SJoel Fernandes 
233ba110db8SJoel Fernandes static int __force_merge(struct gpu_buddy *mm,
2344a9671a0SJoel Fernandes 			 u64 start,
2354a9671a0SJoel Fernandes 			 u64 end,
2364a9671a0SJoel Fernandes 			 unsigned int min_order)
2374a9671a0SJoel Fernandes {
2384a9671a0SJoel Fernandes 	unsigned int tree, order;
2394a9671a0SJoel Fernandes 	int i;
2404a9671a0SJoel Fernandes 
2414a9671a0SJoel Fernandes 	if (!min_order)
2424a9671a0SJoel Fernandes 		return -ENOMEM;
2434a9671a0SJoel Fernandes 
2444a9671a0SJoel Fernandes 	if (min_order > mm->max_order)
2454a9671a0SJoel Fernandes 		return -EINVAL;
2464a9671a0SJoel Fernandes 
2474a9671a0SJoel Fernandes 	for_each_free_tree(tree) {
2484a9671a0SJoel Fernandes 		for (i = min_order - 1; i >= 0; i--) {
2494a9671a0SJoel Fernandes 			struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
2504a9671a0SJoel Fernandes 
2514a9671a0SJoel Fernandes 			while (iter) {
252ba110db8SJoel Fernandes 				struct gpu_buddy_block *block, *buddy;
2534a9671a0SJoel Fernandes 				u64 block_start, block_end;
2544a9671a0SJoel Fernandes 
2554a9671a0SJoel Fernandes 				block = rbtree_get_free_block(iter);
2564a9671a0SJoel Fernandes 				iter = rb_prev(iter);
2574a9671a0SJoel Fernandes 
2584a9671a0SJoel Fernandes 				if (!block || !block->parent)
2594a9671a0SJoel Fernandes 					continue;
2604a9671a0SJoel Fernandes 
261ba110db8SJoel Fernandes 				block_start = gpu_buddy_block_offset(block);
262ba110db8SJoel Fernandes 				block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
2634a9671a0SJoel Fernandes 
2644a9671a0SJoel Fernandes 				if (!contains(start, end, block_start, block_end))
2654a9671a0SJoel Fernandes 					continue;
2664a9671a0SJoel Fernandes 
2674a9671a0SJoel Fernandes 				buddy = __get_buddy(block);
268ba110db8SJoel Fernandes 				if (!gpu_buddy_block_is_free(buddy))
2694a9671a0SJoel Fernandes 					continue;
2704a9671a0SJoel Fernandes 
271ba110db8SJoel Fernandes 				WARN_ON(gpu_buddy_block_is_clear(block) ==
272ba110db8SJoel Fernandes 					gpu_buddy_block_is_clear(buddy));
2734a9671a0SJoel Fernandes 
2744a9671a0SJoel Fernandes 				/*
2754a9671a0SJoel Fernandes 				 * Advance to the next node when the current node is the buddy,
2764a9671a0SJoel Fernandes 				 * as freeing the block will also remove its buddy from the tree.
2774a9671a0SJoel Fernandes 				 */
2784a9671a0SJoel Fernandes 				if (iter == &buddy->rb)
2794a9671a0SJoel Fernandes 					iter = rb_prev(iter);
2804a9671a0SJoel Fernandes 
2814a9671a0SJoel Fernandes 				rbtree_remove(mm, block);
282ba110db8SJoel Fernandes 				if (gpu_buddy_block_is_clear(block))
283ba110db8SJoel Fernandes 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
2844a9671a0SJoel Fernandes 
285ba110db8SJoel Fernandes 				order = __gpu_buddy_free(mm, block, true);
2864a9671a0SJoel Fernandes 				if (order >= min_order)
2874a9671a0SJoel Fernandes 					return 0;
2884a9671a0SJoel Fernandes 			}
2894a9671a0SJoel Fernandes 		}
2904a9671a0SJoel Fernandes 	}
2914a9671a0SJoel Fernandes 
2924a9671a0SJoel Fernandes 	return -ENOMEM;
2934a9671a0SJoel Fernandes }
2944a9671a0SJoel Fernandes 
2954a9671a0SJoel Fernandes /**
296ba110db8SJoel Fernandes  * gpu_buddy_init - init memory manager
2974a9671a0SJoel Fernandes  *
298ba110db8SJoel Fernandes  * @mm: GPU buddy manager to initialize
2994a9671a0SJoel Fernandes  * @size: size in bytes to manage
3004a9671a0SJoel Fernandes  * @chunk_size: minimum page size in bytes for our allocations
3014a9671a0SJoel Fernandes  *
3024a9671a0SJoel Fernandes  * Initializes the memory manager and its resources.
3034a9671a0SJoel Fernandes  *
3044a9671a0SJoel Fernandes  * Returns:
3054a9671a0SJoel Fernandes  * 0 on success, error code on failure.
3064a9671a0SJoel Fernandes  */
307ba110db8SJoel Fernandes int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
3084a9671a0SJoel Fernandes {
3094a9671a0SJoel Fernandes 	unsigned int i, j, root_count = 0;
3104a9671a0SJoel Fernandes 	u64 offset = 0;
3114a9671a0SJoel Fernandes 
3124a9671a0SJoel Fernandes 	if (size < chunk_size)
3134a9671a0SJoel Fernandes 		return -EINVAL;
3144a9671a0SJoel Fernandes 
3154a9671a0SJoel Fernandes 	if (chunk_size < SZ_4K)
3164a9671a0SJoel Fernandes 		return -EINVAL;
3174a9671a0SJoel Fernandes 
3184a9671a0SJoel Fernandes 	if (!is_power_of_2(chunk_size))
3194a9671a0SJoel Fernandes 		return -EINVAL;
3204a9671a0SJoel Fernandes 
3214a9671a0SJoel Fernandes 	size = round_down(size, chunk_size);
3224a9671a0SJoel Fernandes 
3234a9671a0SJoel Fernandes 	mm->size = size;
3244a9671a0SJoel Fernandes 	mm->avail = size;
3254a9671a0SJoel Fernandes 	mm->clear_avail = 0;
3264a9671a0SJoel Fernandes 	mm->chunk_size = chunk_size;
3274a9671a0SJoel Fernandes 	mm->max_order = ilog2(size) - ilog2(chunk_size);
3284a9671a0SJoel Fernandes 
329ba110db8SJoel Fernandes 	BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
3304a9671a0SJoel Fernandes 
331ba110db8SJoel Fernandes 	mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
3324a9671a0SJoel Fernandes 				       sizeof(*mm->free_trees),
3334a9671a0SJoel Fernandes 				       GFP_KERNEL);
3344a9671a0SJoel Fernandes 	if (!mm->free_trees)
3354a9671a0SJoel Fernandes 		return -ENOMEM;
3364a9671a0SJoel Fernandes 
3374a9671a0SJoel Fernandes 	for_each_free_tree(i) {
3384a9671a0SJoel Fernandes 		mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
3394a9671a0SJoel Fernandes 						  sizeof(struct rb_root),
3404a9671a0SJoel Fernandes 						  GFP_KERNEL);
3414a9671a0SJoel Fernandes 		if (!mm->free_trees[i])
3424a9671a0SJoel Fernandes 			goto out_free_tree;
3434a9671a0SJoel Fernandes 
3444a9671a0SJoel Fernandes 		for (j = 0; j <= mm->max_order; ++j)
3454a9671a0SJoel Fernandes 			mm->free_trees[i][j] = RB_ROOT;
3464a9671a0SJoel Fernandes 	}
3474a9671a0SJoel Fernandes 
3484a9671a0SJoel Fernandes 	mm->n_roots = hweight64(size);
3494a9671a0SJoel Fernandes 
3504a9671a0SJoel Fernandes 	mm->roots = kmalloc_array(mm->n_roots,
351ba110db8SJoel Fernandes 				  sizeof(struct gpu_buddy_block *),
3524a9671a0SJoel Fernandes 				  GFP_KERNEL);
3534a9671a0SJoel Fernandes 	if (!mm->roots)
3544a9671a0SJoel Fernandes 		goto out_free_tree;
3554a9671a0SJoel Fernandes 
3564a9671a0SJoel Fernandes 	/*
3574a9671a0SJoel Fernandes 	 * Split into power-of-two blocks, in case we are given a size that is
3584a9671a0SJoel Fernandes 	 * not itself a power-of-two.
3594a9671a0SJoel Fernandes 	 */
3604a9671a0SJoel Fernandes 	do {
361ba110db8SJoel Fernandes 		struct gpu_buddy_block *root;
3624a9671a0SJoel Fernandes 		unsigned int order;
3634a9671a0SJoel Fernandes 		u64 root_size;
3644a9671a0SJoel Fernandes 
3654a9671a0SJoel Fernandes 		order = ilog2(size) - ilog2(chunk_size);
3664a9671a0SJoel Fernandes 		root_size = chunk_size << order;
3674a9671a0SJoel Fernandes 
368ba110db8SJoel Fernandes 		root = gpu_block_alloc(mm, NULL, order, offset);
3694a9671a0SJoel Fernandes 		if (!root)
3704a9671a0SJoel Fernandes 			goto out_free_roots;
3714a9671a0SJoel Fernandes 
3724a9671a0SJoel Fernandes 		mark_free(mm, root);
3734a9671a0SJoel Fernandes 
3744a9671a0SJoel Fernandes 		BUG_ON(root_count > mm->max_order);
375ba110db8SJoel Fernandes 		BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size);
3764a9671a0SJoel Fernandes 
3774a9671a0SJoel Fernandes 		mm->roots[root_count] = root;
3784a9671a0SJoel Fernandes 
3794a9671a0SJoel Fernandes 		offset += root_size;
3804a9671a0SJoel Fernandes 		size -= root_size;
3814a9671a0SJoel Fernandes 		root_count++;
3824a9671a0SJoel Fernandes 	} while (size);
3834a9671a0SJoel Fernandes 
3844a9671a0SJoel Fernandes 	return 0;
3854a9671a0SJoel Fernandes 
3864a9671a0SJoel Fernandes out_free_roots:
3874a9671a0SJoel Fernandes 	while (root_count--)
388ba110db8SJoel Fernandes 		gpu_block_free(mm, mm->roots[root_count]);
3894a9671a0SJoel Fernandes 	kfree(mm->roots);
3904a9671a0SJoel Fernandes out_free_tree:
3914a9671a0SJoel Fernandes 	while (i--)
3924a9671a0SJoel Fernandes 		kfree(mm->free_trees[i]);
3934a9671a0SJoel Fernandes 	kfree(mm->free_trees);
3944a9671a0SJoel Fernandes 	return -ENOMEM;
3954a9671a0SJoel Fernandes }
396ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_init);
3974a9671a0SJoel Fernandes 
3984a9671a0SJoel Fernandes /**
399ba110db8SJoel Fernandes  * gpu_buddy_fini - tear down the memory manager
4004a9671a0SJoel Fernandes  *
401ba110db8SJoel Fernandes  * @mm: GPU buddy manager to free
4024a9671a0SJoel Fernandes  *
4034a9671a0SJoel Fernandes  * Cleanup memory manager resources and the freetree
4044a9671a0SJoel Fernandes  */
405ba110db8SJoel Fernandes void gpu_buddy_fini(struct gpu_buddy *mm)
4064a9671a0SJoel Fernandes {
4074a9671a0SJoel Fernandes 	u64 root_size, size, start;
4084a9671a0SJoel Fernandes 	unsigned int order;
4094a9671a0SJoel Fernandes 	int i;
4104a9671a0SJoel Fernandes 
4114a9671a0SJoel Fernandes 	size = mm->size;
4124a9671a0SJoel Fernandes 
4134a9671a0SJoel Fernandes 	for (i = 0; i < mm->n_roots; ++i) {
4144a9671a0SJoel Fernandes 		order = ilog2(size) - ilog2(mm->chunk_size);
415ba110db8SJoel Fernandes 		start = gpu_buddy_block_offset(mm->roots[i]);
4164a9671a0SJoel Fernandes 		__force_merge(mm, start, start + size, order);
4174a9671a0SJoel Fernandes 
418ba110db8SJoel Fernandes 		if (WARN_ON(!gpu_buddy_block_is_free(mm->roots[i])))
4194a9671a0SJoel Fernandes 			kunit_fail_current_test("buddy_fini() root");
4204a9671a0SJoel Fernandes 
421ba110db8SJoel Fernandes 		gpu_block_free(mm, mm->roots[i]);
4224a9671a0SJoel Fernandes 
4234a9671a0SJoel Fernandes 		root_size = mm->chunk_size << order;
4244a9671a0SJoel Fernandes 		size -= root_size;
4254a9671a0SJoel Fernandes 	}
4264a9671a0SJoel Fernandes 
4274a9671a0SJoel Fernandes 	WARN_ON(mm->avail != mm->size);
4284a9671a0SJoel Fernandes 
4294a9671a0SJoel Fernandes 	for_each_free_tree(i)
4304a9671a0SJoel Fernandes 		kfree(mm->free_trees[i]);
4314a9671a0SJoel Fernandes 	kfree(mm->free_trees);
4324a9671a0SJoel Fernandes 	kfree(mm->roots);
4334a9671a0SJoel Fernandes }
434ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_fini);
4354a9671a0SJoel Fernandes 
436ba110db8SJoel Fernandes static int split_block(struct gpu_buddy *mm,
437ba110db8SJoel Fernandes 		       struct gpu_buddy_block *block)
4384a9671a0SJoel Fernandes {
439ba110db8SJoel Fernandes 	unsigned int block_order = gpu_buddy_block_order(block) - 1;
440ba110db8SJoel Fernandes 	u64 offset = gpu_buddy_block_offset(block);
4414a9671a0SJoel Fernandes 
442ba110db8SJoel Fernandes 	BUG_ON(!gpu_buddy_block_is_free(block));
443ba110db8SJoel Fernandes 	BUG_ON(!gpu_buddy_block_order(block));
4444a9671a0SJoel Fernandes 
445ba110db8SJoel Fernandes 	block->left = gpu_block_alloc(mm, block, block_order, offset);
4464a9671a0SJoel Fernandes 	if (!block->left)
4474a9671a0SJoel Fernandes 		return -ENOMEM;
4484a9671a0SJoel Fernandes 
449ba110db8SJoel Fernandes 	block->right = gpu_block_alloc(mm, block, block_order,
4504a9671a0SJoel Fernandes 				       offset + (mm->chunk_size << block_order));
4514a9671a0SJoel Fernandes 	if (!block->right) {
452ba110db8SJoel Fernandes 		gpu_block_free(mm, block->left);
4534a9671a0SJoel Fernandes 		return -ENOMEM;
4544a9671a0SJoel Fernandes 	}
4554a9671a0SJoel Fernandes 
4564a9671a0SJoel Fernandes 	mark_split(mm, block);
4574a9671a0SJoel Fernandes 
458ba110db8SJoel Fernandes 	if (gpu_buddy_block_is_clear(block)) {
4594a9671a0SJoel Fernandes 		mark_cleared(block->left);
4604a9671a0SJoel Fernandes 		mark_cleared(block->right);
4614a9671a0SJoel Fernandes 		clear_reset(block);
4624a9671a0SJoel Fernandes 	}
4634a9671a0SJoel Fernandes 
4644a9671a0SJoel Fernandes 	mark_free(mm, block->left);
4654a9671a0SJoel Fernandes 	mark_free(mm, block->right);
4664a9671a0SJoel Fernandes 
4674a9671a0SJoel Fernandes 	return 0;
4684a9671a0SJoel Fernandes }
4694a9671a0SJoel Fernandes 
4704a9671a0SJoel Fernandes /**
471ba110db8SJoel Fernandes  * gpu_buddy_reset_clear - reset blocks clear state
4724a9671a0SJoel Fernandes  *
473ba110db8SJoel Fernandes  * @mm: GPU buddy manager
4744a9671a0SJoel Fernandes  * @is_clear: blocks clear state
4754a9671a0SJoel Fernandes  *
4764a9671a0SJoel Fernandes  * Reset the clear state based on @is_clear value for each block
4774a9671a0SJoel Fernandes  * in the freetree.
4784a9671a0SJoel Fernandes  */
479ba110db8SJoel Fernandes void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear)
4804a9671a0SJoel Fernandes {
481ba110db8SJoel Fernandes 	enum gpu_buddy_free_tree src_tree, dst_tree;
4824a9671a0SJoel Fernandes 	u64 root_size, size, start;
4834a9671a0SJoel Fernandes 	unsigned int order;
4844a9671a0SJoel Fernandes 	int i;
4854a9671a0SJoel Fernandes 
4864a9671a0SJoel Fernandes 	size = mm->size;
4874a9671a0SJoel Fernandes 	for (i = 0; i < mm->n_roots; ++i) {
4884a9671a0SJoel Fernandes 		order = ilog2(size) - ilog2(mm->chunk_size);
489ba110db8SJoel Fernandes 		start = gpu_buddy_block_offset(mm->roots[i]);
4904a9671a0SJoel Fernandes 		__force_merge(mm, start, start + size, order);
4914a9671a0SJoel Fernandes 
4924a9671a0SJoel Fernandes 		root_size = mm->chunk_size << order;
4934a9671a0SJoel Fernandes 		size -= root_size;
4944a9671a0SJoel Fernandes 	}
4954a9671a0SJoel Fernandes 
496ba110db8SJoel Fernandes 	src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
497ba110db8SJoel Fernandes 	dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
4984a9671a0SJoel Fernandes 
4994a9671a0SJoel Fernandes 	for (i = 0; i <= mm->max_order; ++i) {
5004a9671a0SJoel Fernandes 		struct rb_root *root = &mm->free_trees[src_tree][i];
501ba110db8SJoel Fernandes 		struct gpu_buddy_block *block, *tmp;
5024a9671a0SJoel Fernandes 
5034a9671a0SJoel Fernandes 		rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
5044a9671a0SJoel Fernandes 			rbtree_remove(mm, block);
5054a9671a0SJoel Fernandes 			if (is_clear) {
5064a9671a0SJoel Fernandes 				mark_cleared(block);
507ba110db8SJoel Fernandes 				mm->clear_avail += gpu_buddy_block_size(mm, block);
5084a9671a0SJoel Fernandes 			} else {
5094a9671a0SJoel Fernandes 				clear_reset(block);
510ba110db8SJoel Fernandes 				mm->clear_avail -= gpu_buddy_block_size(mm, block);
5114a9671a0SJoel Fernandes 			}
5124a9671a0SJoel Fernandes 
5134a9671a0SJoel Fernandes 			rbtree_insert(mm, block, dst_tree);
5144a9671a0SJoel Fernandes 		}
5154a9671a0SJoel Fernandes 	}
5164a9671a0SJoel Fernandes }
517ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_reset_clear);
5184a9671a0SJoel Fernandes 
5194a9671a0SJoel Fernandes /**
520ba110db8SJoel Fernandes  * gpu_buddy_free_block - free a block
5214a9671a0SJoel Fernandes  *
522ba110db8SJoel Fernandes  * @mm: GPU buddy manager
5234a9671a0SJoel Fernandes  * @block: block to be freed
5244a9671a0SJoel Fernandes  */
525ba110db8SJoel Fernandes void gpu_buddy_free_block(struct gpu_buddy *mm,
526ba110db8SJoel Fernandes 			  struct gpu_buddy_block *block)
5274a9671a0SJoel Fernandes {
528ba110db8SJoel Fernandes 	BUG_ON(!gpu_buddy_block_is_allocated(block));
529ba110db8SJoel Fernandes 	mm->avail += gpu_buddy_block_size(mm, block);
530ba110db8SJoel Fernandes 	if (gpu_buddy_block_is_clear(block))
531ba110db8SJoel Fernandes 		mm->clear_avail += gpu_buddy_block_size(mm, block);
5324a9671a0SJoel Fernandes 
533ba110db8SJoel Fernandes 	__gpu_buddy_free(mm, block, false);
5344a9671a0SJoel Fernandes }
535ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_free_block);
5364a9671a0SJoel Fernandes 
537ba110db8SJoel Fernandes static void __gpu_buddy_free_list(struct gpu_buddy *mm,
5384a9671a0SJoel Fernandes 				  struct list_head *objects,
5394a9671a0SJoel Fernandes 				  bool mark_clear,
5404a9671a0SJoel Fernandes 				  bool mark_dirty)
5414a9671a0SJoel Fernandes {
542ba110db8SJoel Fernandes 	struct gpu_buddy_block *block, *on;
5434a9671a0SJoel Fernandes 
5444a9671a0SJoel Fernandes 	WARN_ON(mark_dirty && mark_clear);
5454a9671a0SJoel Fernandes 
5464a9671a0SJoel Fernandes 	list_for_each_entry_safe(block, on, objects, link) {
5474a9671a0SJoel Fernandes 		if (mark_clear)
5484a9671a0SJoel Fernandes 			mark_cleared(block);
5494a9671a0SJoel Fernandes 		else if (mark_dirty)
5504a9671a0SJoel Fernandes 			clear_reset(block);
551ba110db8SJoel Fernandes 		gpu_buddy_free_block(mm, block);
5524a9671a0SJoel Fernandes 		cond_resched();
5534a9671a0SJoel Fernandes 	}
5544a9671a0SJoel Fernandes 	INIT_LIST_HEAD(objects);
5554a9671a0SJoel Fernandes }
5564a9671a0SJoel Fernandes 
557ba110db8SJoel Fernandes static void gpu_buddy_free_list_internal(struct gpu_buddy *mm,
5584a9671a0SJoel Fernandes 					 struct list_head *objects)
5594a9671a0SJoel Fernandes {
5604a9671a0SJoel Fernandes 	/*
5614a9671a0SJoel Fernandes 	 * Don't touch the clear/dirty bit, since allocation is still internal
5624a9671a0SJoel Fernandes 	 * at this point. For example we might have just failed part of the
5634a9671a0SJoel Fernandes 	 * allocation.
5644a9671a0SJoel Fernandes 	 */
565ba110db8SJoel Fernandes 	__gpu_buddy_free_list(mm, objects, false, false);
5664a9671a0SJoel Fernandes }
5674a9671a0SJoel Fernandes 
5684a9671a0SJoel Fernandes /**
569ba110db8SJoel Fernandes  * gpu_buddy_free_list - free blocks
5704a9671a0SJoel Fernandes  *
571ba110db8SJoel Fernandes  * @mm: GPU buddy manager
5724a9671a0SJoel Fernandes  * @objects: input list head to free blocks
573ba110db8SJoel Fernandes  * @flags: optional flags like GPU_BUDDY_CLEARED
5744a9671a0SJoel Fernandes  */
575ba110db8SJoel Fernandes void gpu_buddy_free_list(struct gpu_buddy *mm,
5764a9671a0SJoel Fernandes 			 struct list_head *objects,
5774a9671a0SJoel Fernandes 			 unsigned int flags)
5784a9671a0SJoel Fernandes {
579ba110db8SJoel Fernandes 	bool mark_clear = flags & GPU_BUDDY_CLEARED;
5804a9671a0SJoel Fernandes 
581ba110db8SJoel Fernandes 	__gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear);
5824a9671a0SJoel Fernandes }
583ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_free_list);
5844a9671a0SJoel Fernandes 
585ba110db8SJoel Fernandes static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags)
5864a9671a0SJoel Fernandes {
587ba110db8SJoel Fernandes 	bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION;
5884a9671a0SJoel Fernandes 
589ba110db8SJoel Fernandes 	return needs_clear != gpu_buddy_block_is_clear(block);
5904a9671a0SJoel Fernandes }
5914a9671a0SJoel Fernandes 
592ba110db8SJoel Fernandes static struct gpu_buddy_block *
593ba110db8SJoel Fernandes __alloc_range_bias(struct gpu_buddy *mm,
5944a9671a0SJoel Fernandes 		   u64 start, u64 end,
5954a9671a0SJoel Fernandes 		   unsigned int order,
5964a9671a0SJoel Fernandes 		   unsigned long flags,
5974a9671a0SJoel Fernandes 		   bool fallback)
5984a9671a0SJoel Fernandes {
5994a9671a0SJoel Fernandes 	u64 req_size = mm->chunk_size << order;
600ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
601ba110db8SJoel Fernandes 	struct gpu_buddy_block *buddy;
6024a9671a0SJoel Fernandes 	LIST_HEAD(dfs);
6034a9671a0SJoel Fernandes 	int err;
6044a9671a0SJoel Fernandes 	int i;
6054a9671a0SJoel Fernandes 
6064a9671a0SJoel Fernandes 	end = end - 1;
6074a9671a0SJoel Fernandes 
6084a9671a0SJoel Fernandes 	for (i = 0; i < mm->n_roots; ++i)
6094a9671a0SJoel Fernandes 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
6104a9671a0SJoel Fernandes 
6114a9671a0SJoel Fernandes 	do {
6124a9671a0SJoel Fernandes 		u64 block_start;
6134a9671a0SJoel Fernandes 		u64 block_end;
6144a9671a0SJoel Fernandes 
6154a9671a0SJoel Fernandes 		block = list_first_entry_or_null(&dfs,
616ba110db8SJoel Fernandes 						 struct gpu_buddy_block,
6174a9671a0SJoel Fernandes 						 tmp_link);
6184a9671a0SJoel Fernandes 		if (!block)
6194a9671a0SJoel Fernandes 			break;
6204a9671a0SJoel Fernandes 
6214a9671a0SJoel Fernandes 		list_del(&block->tmp_link);
6224a9671a0SJoel Fernandes 
623ba110db8SJoel Fernandes 		if (gpu_buddy_block_order(block) < order)
6244a9671a0SJoel Fernandes 			continue;
6254a9671a0SJoel Fernandes 
626ba110db8SJoel Fernandes 		block_start = gpu_buddy_block_offset(block);
627ba110db8SJoel Fernandes 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
6284a9671a0SJoel Fernandes 
6294a9671a0SJoel Fernandes 		if (!overlaps(start, end, block_start, block_end))
6304a9671a0SJoel Fernandes 			continue;
6314a9671a0SJoel Fernandes 
632ba110db8SJoel Fernandes 		if (gpu_buddy_block_is_allocated(block))
6334a9671a0SJoel Fernandes 			continue;
6344a9671a0SJoel Fernandes 
6354a9671a0SJoel Fernandes 		if (block_start < start || block_end > end) {
6364a9671a0SJoel Fernandes 			u64 adjusted_start = max(block_start, start);
6374a9671a0SJoel Fernandes 			u64 adjusted_end = min(block_end, end);
6384a9671a0SJoel Fernandes 
6394a9671a0SJoel Fernandes 			if (round_down(adjusted_end + 1, req_size) <=
6404a9671a0SJoel Fernandes 			    round_up(adjusted_start, req_size))
6414a9671a0SJoel Fernandes 				continue;
6424a9671a0SJoel Fernandes 		}
6434a9671a0SJoel Fernandes 
6444a9671a0SJoel Fernandes 		if (!fallback && block_incompatible(block, flags))
6454a9671a0SJoel Fernandes 			continue;
6464a9671a0SJoel Fernandes 
6474a9671a0SJoel Fernandes 		if (contains(start, end, block_start, block_end) &&
648ba110db8SJoel Fernandes 		    order == gpu_buddy_block_order(block)) {
6494a9671a0SJoel Fernandes 			/*
6504a9671a0SJoel Fernandes 			 * Find the free block within the range.
6514a9671a0SJoel Fernandes 			 */
652ba110db8SJoel Fernandes 			if (gpu_buddy_block_is_free(block))
6534a9671a0SJoel Fernandes 				return block;
6544a9671a0SJoel Fernandes 
6554a9671a0SJoel Fernandes 			continue;
6564a9671a0SJoel Fernandes 		}
6574a9671a0SJoel Fernandes 
658ba110db8SJoel Fernandes 		if (!gpu_buddy_block_is_split(block)) {
6594a9671a0SJoel Fernandes 			err = split_block(mm, block);
6604a9671a0SJoel Fernandes 			if (unlikely(err))
6614a9671a0SJoel Fernandes 				goto err_undo;
6624a9671a0SJoel Fernandes 		}
6634a9671a0SJoel Fernandes 
6644a9671a0SJoel Fernandes 		list_add(&block->right->tmp_link, &dfs);
6654a9671a0SJoel Fernandes 		list_add(&block->left->tmp_link, &dfs);
6664a9671a0SJoel Fernandes 	} while (1);
6674a9671a0SJoel Fernandes 
6684a9671a0SJoel Fernandes 	return ERR_PTR(-ENOSPC);
6694a9671a0SJoel Fernandes 
6704a9671a0SJoel Fernandes err_undo:
6714a9671a0SJoel Fernandes 	/*
6724a9671a0SJoel Fernandes 	 * We really don't want to leave around a bunch of split blocks, since
6734a9671a0SJoel Fernandes 	 * bigger is better, so make sure we merge everything back before we
6744a9671a0SJoel Fernandes 	 * free the allocated blocks.
6754a9671a0SJoel Fernandes 	 */
6764a9671a0SJoel Fernandes 	buddy = __get_buddy(block);
6774a9671a0SJoel Fernandes 	if (buddy &&
678ba110db8SJoel Fernandes 	    (gpu_buddy_block_is_free(block) &&
679ba110db8SJoel Fernandes 	     gpu_buddy_block_is_free(buddy)))
680ba110db8SJoel Fernandes 		__gpu_buddy_free(mm, block, false);
6814a9671a0SJoel Fernandes 	return ERR_PTR(err);
6824a9671a0SJoel Fernandes }
6834a9671a0SJoel Fernandes 
684ba110db8SJoel Fernandes static struct gpu_buddy_block *
685ba110db8SJoel Fernandes __gpu_buddy_alloc_range_bias(struct gpu_buddy *mm,
6864a9671a0SJoel Fernandes 			     u64 start, u64 end,
6874a9671a0SJoel Fernandes 			     unsigned int order,
6884a9671a0SJoel Fernandes 			     unsigned long flags)
6894a9671a0SJoel Fernandes {
690ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
6914a9671a0SJoel Fernandes 	bool fallback = false;
6924a9671a0SJoel Fernandes 
6934a9671a0SJoel Fernandes 	block = __alloc_range_bias(mm, start, end, order,
6944a9671a0SJoel Fernandes 				   flags, fallback);
6954a9671a0SJoel Fernandes 	if (IS_ERR(block))
6964a9671a0SJoel Fernandes 		return __alloc_range_bias(mm, start, end, order,
6974a9671a0SJoel Fernandes 					  flags, !fallback);
6984a9671a0SJoel Fernandes 
6994a9671a0SJoel Fernandes 	return block;
7004a9671a0SJoel Fernandes }
7014a9671a0SJoel Fernandes 
702ba110db8SJoel Fernandes static struct gpu_buddy_block *
703ba110db8SJoel Fernandes get_maxblock(struct gpu_buddy *mm,
7044a9671a0SJoel Fernandes 	     unsigned int order,
705ba110db8SJoel Fernandes 	     enum gpu_buddy_free_tree tree)
7064a9671a0SJoel Fernandes {
707ba110db8SJoel Fernandes 	struct gpu_buddy_block *max_block = NULL, *block = NULL;
7084a9671a0SJoel Fernandes 	struct rb_root *root;
7094a9671a0SJoel Fernandes 	unsigned int i;
7104a9671a0SJoel Fernandes 
7114a9671a0SJoel Fernandes 	for (i = order; i <= mm->max_order; ++i) {
7124a9671a0SJoel Fernandes 		root = &mm->free_trees[tree][i];
7134a9671a0SJoel Fernandes 		block = rbtree_last_free_block(root);
7144a9671a0SJoel Fernandes 		if (!block)
7154a9671a0SJoel Fernandes 			continue;
7164a9671a0SJoel Fernandes 
7174a9671a0SJoel Fernandes 		if (!max_block) {
7184a9671a0SJoel Fernandes 			max_block = block;
7194a9671a0SJoel Fernandes 			continue;
7204a9671a0SJoel Fernandes 		}
7214a9671a0SJoel Fernandes 
722ba110db8SJoel Fernandes 		if (gpu_buddy_block_offset(block) >
723ba110db8SJoel Fernandes 		    gpu_buddy_block_offset(max_block)) {
7244a9671a0SJoel Fernandes 			max_block = block;
7254a9671a0SJoel Fernandes 		}
7264a9671a0SJoel Fernandes 	}
7274a9671a0SJoel Fernandes 
7284a9671a0SJoel Fernandes 	return max_block;
7294a9671a0SJoel Fernandes }
7304a9671a0SJoel Fernandes 
731ba110db8SJoel Fernandes static struct gpu_buddy_block *
732ba110db8SJoel Fernandes alloc_from_freetree(struct gpu_buddy *mm,
7334a9671a0SJoel Fernandes 		    unsigned int order,
7344a9671a0SJoel Fernandes 		    unsigned long flags)
7354a9671a0SJoel Fernandes {
736ba110db8SJoel Fernandes 	struct gpu_buddy_block *block = NULL;
7374a9671a0SJoel Fernandes 	struct rb_root *root;
738ba110db8SJoel Fernandes 	enum gpu_buddy_free_tree tree;
7394a9671a0SJoel Fernandes 	unsigned int tmp;
7404a9671a0SJoel Fernandes 	int err;
7414a9671a0SJoel Fernandes 
742ba110db8SJoel Fernandes 	tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
743ba110db8SJoel Fernandes 		GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
7444a9671a0SJoel Fernandes 
745ba110db8SJoel Fernandes 	if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) {
7464a9671a0SJoel Fernandes 		block = get_maxblock(mm, order, tree);
7474a9671a0SJoel Fernandes 		if (block)
7484a9671a0SJoel Fernandes 			/* Store the obtained block order */
749ba110db8SJoel Fernandes 			tmp = gpu_buddy_block_order(block);
7504a9671a0SJoel Fernandes 	} else {
7514a9671a0SJoel Fernandes 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
7524a9671a0SJoel Fernandes 			/* Get RB tree root for this order and tree */
7534a9671a0SJoel Fernandes 			root = &mm->free_trees[tree][tmp];
7544a9671a0SJoel Fernandes 			block = rbtree_last_free_block(root);
7554a9671a0SJoel Fernandes 			if (block)
7564a9671a0SJoel Fernandes 				break;
7574a9671a0SJoel Fernandes 		}
7584a9671a0SJoel Fernandes 	}
7594a9671a0SJoel Fernandes 
7604a9671a0SJoel Fernandes 	if (!block) {
7614a9671a0SJoel Fernandes 		/* Try allocating from the other tree */
762ba110db8SJoel Fernandes 		tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
763ba110db8SJoel Fernandes 			GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
7644a9671a0SJoel Fernandes 
7654a9671a0SJoel Fernandes 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
7664a9671a0SJoel Fernandes 			root = &mm->free_trees[tree][tmp];
7674a9671a0SJoel Fernandes 			block = rbtree_last_free_block(root);
7684a9671a0SJoel Fernandes 			if (block)
7694a9671a0SJoel Fernandes 				break;
7704a9671a0SJoel Fernandes 		}
7714a9671a0SJoel Fernandes 
7724a9671a0SJoel Fernandes 		if (!block)
7734a9671a0SJoel Fernandes 			return ERR_PTR(-ENOSPC);
7744a9671a0SJoel Fernandes 	}
7754a9671a0SJoel Fernandes 
776ba110db8SJoel Fernandes 	BUG_ON(!gpu_buddy_block_is_free(block));
7774a9671a0SJoel Fernandes 
7784a9671a0SJoel Fernandes 	while (tmp != order) {
7794a9671a0SJoel Fernandes 		err = split_block(mm, block);
7804a9671a0SJoel Fernandes 		if (unlikely(err))
7814a9671a0SJoel Fernandes 			goto err_undo;
7824a9671a0SJoel Fernandes 
7834a9671a0SJoel Fernandes 		block = block->right;
7844a9671a0SJoel Fernandes 		tmp--;
7854a9671a0SJoel Fernandes 	}
7864a9671a0SJoel Fernandes 	return block;
7874a9671a0SJoel Fernandes 
7884a9671a0SJoel Fernandes err_undo:
7894a9671a0SJoel Fernandes 	if (tmp != order)
790ba110db8SJoel Fernandes 		__gpu_buddy_free(mm, block, false);
7914a9671a0SJoel Fernandes 	return ERR_PTR(err);
7924a9671a0SJoel Fernandes }
7934a9671a0SJoel Fernandes 
794ba110db8SJoel Fernandes static int __alloc_range(struct gpu_buddy *mm,
7954a9671a0SJoel Fernandes 			 struct list_head *dfs,
7964a9671a0SJoel Fernandes 			 u64 start, u64 size,
7974a9671a0SJoel Fernandes 			 struct list_head *blocks,
7984a9671a0SJoel Fernandes 			 u64 *total_allocated_on_err)
7994a9671a0SJoel Fernandes {
800ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
801ba110db8SJoel Fernandes 	struct gpu_buddy_block *buddy;
8024a9671a0SJoel Fernandes 	u64 total_allocated = 0;
8034a9671a0SJoel Fernandes 	LIST_HEAD(allocated);
8044a9671a0SJoel Fernandes 	u64 end;
8054a9671a0SJoel Fernandes 	int err;
8064a9671a0SJoel Fernandes 
8074a9671a0SJoel Fernandes 	end = start + size - 1;
8084a9671a0SJoel Fernandes 
8094a9671a0SJoel Fernandes 	do {
8104a9671a0SJoel Fernandes 		u64 block_start;
8114a9671a0SJoel Fernandes 		u64 block_end;
8124a9671a0SJoel Fernandes 
8134a9671a0SJoel Fernandes 		block = list_first_entry_or_null(dfs,
814ba110db8SJoel Fernandes 						 struct gpu_buddy_block,
8154a9671a0SJoel Fernandes 						 tmp_link);
8164a9671a0SJoel Fernandes 		if (!block)
8174a9671a0SJoel Fernandes 			break;
8184a9671a0SJoel Fernandes 
8194a9671a0SJoel Fernandes 		list_del(&block->tmp_link);
8204a9671a0SJoel Fernandes 
821ba110db8SJoel Fernandes 		block_start = gpu_buddy_block_offset(block);
822ba110db8SJoel Fernandes 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
8234a9671a0SJoel Fernandes 
8244a9671a0SJoel Fernandes 		if (!overlaps(start, end, block_start, block_end))
8254a9671a0SJoel Fernandes 			continue;
8264a9671a0SJoel Fernandes 
827ba110db8SJoel Fernandes 		if (gpu_buddy_block_is_allocated(block)) {
8284a9671a0SJoel Fernandes 			err = -ENOSPC;
8294a9671a0SJoel Fernandes 			goto err_free;
8304a9671a0SJoel Fernandes 		}
8314a9671a0SJoel Fernandes 
8324a9671a0SJoel Fernandes 		if (contains(start, end, block_start, block_end)) {
833ba110db8SJoel Fernandes 			if (gpu_buddy_block_is_free(block)) {
8344a9671a0SJoel Fernandes 				mark_allocated(mm, block);
835ba110db8SJoel Fernandes 				total_allocated += gpu_buddy_block_size(mm, block);
836ba110db8SJoel Fernandes 				mm->avail -= gpu_buddy_block_size(mm, block);
837ba110db8SJoel Fernandes 				if (gpu_buddy_block_is_clear(block))
838ba110db8SJoel Fernandes 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
8394a9671a0SJoel Fernandes 				list_add_tail(&block->link, &allocated);
8404a9671a0SJoel Fernandes 				continue;
8414a9671a0SJoel Fernandes 			} else if (!mm->clear_avail) {
8424a9671a0SJoel Fernandes 				err = -ENOSPC;
8434a9671a0SJoel Fernandes 				goto err_free;
8444a9671a0SJoel Fernandes 			}
8454a9671a0SJoel Fernandes 		}
8464a9671a0SJoel Fernandes 
847ba110db8SJoel Fernandes 		if (!gpu_buddy_block_is_split(block)) {
8484a9671a0SJoel Fernandes 			err = split_block(mm, block);
8494a9671a0SJoel Fernandes 			if (unlikely(err))
8504a9671a0SJoel Fernandes 				goto err_undo;
8514a9671a0SJoel Fernandes 		}
8524a9671a0SJoel Fernandes 
8534a9671a0SJoel Fernandes 		list_add(&block->right->tmp_link, dfs);
8544a9671a0SJoel Fernandes 		list_add(&block->left->tmp_link, dfs);
8554a9671a0SJoel Fernandes 	} while (1);
8564a9671a0SJoel Fernandes 
8574a9671a0SJoel Fernandes 	if (total_allocated < size) {
8584a9671a0SJoel Fernandes 		err = -ENOSPC;
8594a9671a0SJoel Fernandes 		goto err_free;
8604a9671a0SJoel Fernandes 	}
8614a9671a0SJoel Fernandes 
8624a9671a0SJoel Fernandes 	list_splice_tail(&allocated, blocks);
8634a9671a0SJoel Fernandes 
8644a9671a0SJoel Fernandes 	return 0;
8654a9671a0SJoel Fernandes 
8664a9671a0SJoel Fernandes err_undo:
8674a9671a0SJoel Fernandes 	/*
8684a9671a0SJoel Fernandes 	 * We really don't want to leave around a bunch of split blocks, since
8694a9671a0SJoel Fernandes 	 * bigger is better, so make sure we merge everything back before we
8704a9671a0SJoel Fernandes 	 * free the allocated blocks.
8714a9671a0SJoel Fernandes 	 */
8724a9671a0SJoel Fernandes 	buddy = __get_buddy(block);
8734a9671a0SJoel Fernandes 	if (buddy &&
874ba110db8SJoel Fernandes 	    (gpu_buddy_block_is_free(block) &&
875ba110db8SJoel Fernandes 	     gpu_buddy_block_is_free(buddy)))
876ba110db8SJoel Fernandes 		__gpu_buddy_free(mm, block, false);
8774a9671a0SJoel Fernandes 
8784a9671a0SJoel Fernandes err_free:
8794a9671a0SJoel Fernandes 	if (err == -ENOSPC && total_allocated_on_err) {
8804a9671a0SJoel Fernandes 		list_splice_tail(&allocated, blocks);
8814a9671a0SJoel Fernandes 		*total_allocated_on_err = total_allocated;
8824a9671a0SJoel Fernandes 	} else {
883ba110db8SJoel Fernandes 		gpu_buddy_free_list_internal(mm, &allocated);
8844a9671a0SJoel Fernandes 	}
8854a9671a0SJoel Fernandes 
8864a9671a0SJoel Fernandes 	return err;
8874a9671a0SJoel Fernandes }
8884a9671a0SJoel Fernandes 
889ba110db8SJoel Fernandes static int __gpu_buddy_alloc_range(struct gpu_buddy *mm,
8904a9671a0SJoel Fernandes 				   u64 start,
8914a9671a0SJoel Fernandes 				   u64 size,
8924a9671a0SJoel Fernandes 				   u64 *total_allocated_on_err,
8934a9671a0SJoel Fernandes 				   struct list_head *blocks)
8944a9671a0SJoel Fernandes {
8954a9671a0SJoel Fernandes 	LIST_HEAD(dfs);
8964a9671a0SJoel Fernandes 	int i;
8974a9671a0SJoel Fernandes 
8984a9671a0SJoel Fernandes 	for (i = 0; i < mm->n_roots; ++i)
8994a9671a0SJoel Fernandes 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
9004a9671a0SJoel Fernandes 
9014a9671a0SJoel Fernandes 	return __alloc_range(mm, &dfs, start, size,
9024a9671a0SJoel Fernandes 			     blocks, total_allocated_on_err);
9034a9671a0SJoel Fernandes }
9044a9671a0SJoel Fernandes 
905ba110db8SJoel Fernandes static int __alloc_contig_try_harder(struct gpu_buddy *mm,
9064a9671a0SJoel Fernandes 				     u64 size,
9074a9671a0SJoel Fernandes 				     u64 min_block_size,
9084a9671a0SJoel Fernandes 				     struct list_head *blocks)
9094a9671a0SJoel Fernandes {
9104a9671a0SJoel Fernandes 	u64 rhs_offset, lhs_offset, lhs_size, filled;
911ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
9124a9671a0SJoel Fernandes 	unsigned int tree, order;
9134a9671a0SJoel Fernandes 	LIST_HEAD(blocks_lhs);
9144a9671a0SJoel Fernandes 	unsigned long pages;
9154a9671a0SJoel Fernandes 	u64 modify_size;
9164a9671a0SJoel Fernandes 	int err;
9174a9671a0SJoel Fernandes 
9184a9671a0SJoel Fernandes 	modify_size = rounddown_pow_of_two(size);
9194a9671a0SJoel Fernandes 	pages = modify_size >> ilog2(mm->chunk_size);
9204a9671a0SJoel Fernandes 	order = fls(pages) - 1;
9214a9671a0SJoel Fernandes 	if (order == 0)
9224a9671a0SJoel Fernandes 		return -ENOSPC;
9234a9671a0SJoel Fernandes 
9244a9671a0SJoel Fernandes 	for_each_free_tree(tree) {
9254a9671a0SJoel Fernandes 		struct rb_root *root;
9264a9671a0SJoel Fernandes 		struct rb_node *iter;
9274a9671a0SJoel Fernandes 
9284a9671a0SJoel Fernandes 		root = &mm->free_trees[tree][order];
9294a9671a0SJoel Fernandes 		if (rbtree_is_empty(root))
9304a9671a0SJoel Fernandes 			continue;
9314a9671a0SJoel Fernandes 
9324a9671a0SJoel Fernandes 		iter = rb_last(root);
9334a9671a0SJoel Fernandes 		while (iter) {
9344a9671a0SJoel Fernandes 			block = rbtree_get_free_block(iter);
9354a9671a0SJoel Fernandes 
9364a9671a0SJoel Fernandes 			/* Allocate blocks traversing RHS */
937ba110db8SJoel Fernandes 			rhs_offset = gpu_buddy_block_offset(block);
938ba110db8SJoel Fernandes 			err =  __gpu_buddy_alloc_range(mm, rhs_offset, size,
9394a9671a0SJoel Fernandes 						       &filled, blocks);
9404a9671a0SJoel Fernandes 			if (!err || err != -ENOSPC)
9414a9671a0SJoel Fernandes 				return err;
9424a9671a0SJoel Fernandes 
9434a9671a0SJoel Fernandes 			lhs_size = max((size - filled), min_block_size);
9444a9671a0SJoel Fernandes 			if (!IS_ALIGNED(lhs_size, min_block_size))
9454a9671a0SJoel Fernandes 				lhs_size = round_up(lhs_size, min_block_size);
9464a9671a0SJoel Fernandes 
9474a9671a0SJoel Fernandes 			/* Allocate blocks traversing LHS */
948ba110db8SJoel Fernandes 			lhs_offset = gpu_buddy_block_offset(block) - lhs_size;
949ba110db8SJoel Fernandes 			err =  __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size,
9504a9671a0SJoel Fernandes 						       NULL, &blocks_lhs);
9514a9671a0SJoel Fernandes 			if (!err) {
9524a9671a0SJoel Fernandes 				list_splice(&blocks_lhs, blocks);
9534a9671a0SJoel Fernandes 				return 0;
9544a9671a0SJoel Fernandes 			} else if (err != -ENOSPC) {
955ba110db8SJoel Fernandes 				gpu_buddy_free_list_internal(mm, blocks);
9564a9671a0SJoel Fernandes 				return err;
9574a9671a0SJoel Fernandes 			}
9584a9671a0SJoel Fernandes 			/* Free blocks for the next iteration */
959ba110db8SJoel Fernandes 			gpu_buddy_free_list_internal(mm, blocks);
9604a9671a0SJoel Fernandes 
9614a9671a0SJoel Fernandes 			iter = rb_prev(iter);
9624a9671a0SJoel Fernandes 		}
9634a9671a0SJoel Fernandes 	}
9644a9671a0SJoel Fernandes 
9654a9671a0SJoel Fernandes 	return -ENOSPC;
9664a9671a0SJoel Fernandes }
9674a9671a0SJoel Fernandes 
9684a9671a0SJoel Fernandes /**
969ba110db8SJoel Fernandes  * gpu_buddy_block_trim - free unused pages
9704a9671a0SJoel Fernandes  *
971ba110db8SJoel Fernandes  * @mm: GPU buddy manager
9724a9671a0SJoel Fernandes  * @start: start address to begin the trimming.
9734a9671a0SJoel Fernandes  * @new_size: original size requested
9744a9671a0SJoel Fernandes  * @blocks: Input and output list of allocated blocks.
9754a9671a0SJoel Fernandes  * MUST contain single block as input to be trimmed.
9764a9671a0SJoel Fernandes  * On success will contain the newly allocated blocks
9774a9671a0SJoel Fernandes  * making up the @new_size. Blocks always appear in
9784a9671a0SJoel Fernandes  * ascending order
9794a9671a0SJoel Fernandes  *
9804a9671a0SJoel Fernandes  * For contiguous allocation, we round up the size to the nearest
9814a9671a0SJoel Fernandes  * power of two value, drivers consume *actual* size, so remaining
9824a9671a0SJoel Fernandes  * portions are unused and can be optionally freed with this function
9834a9671a0SJoel Fernandes  *
9844a9671a0SJoel Fernandes  * Returns:
9854a9671a0SJoel Fernandes  * 0 on success, error code on failure.
9864a9671a0SJoel Fernandes  */
987ba110db8SJoel Fernandes int gpu_buddy_block_trim(struct gpu_buddy *mm,
9884a9671a0SJoel Fernandes 			 u64 *start,
9894a9671a0SJoel Fernandes 			 u64 new_size,
9904a9671a0SJoel Fernandes 			 struct list_head *blocks)
9914a9671a0SJoel Fernandes {
992ba110db8SJoel Fernandes 	struct gpu_buddy_block *parent;
993ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
9944a9671a0SJoel Fernandes 	u64 block_start, block_end;
9954a9671a0SJoel Fernandes 	LIST_HEAD(dfs);
9964a9671a0SJoel Fernandes 	u64 new_start;
9974a9671a0SJoel Fernandes 	int err;
9984a9671a0SJoel Fernandes 
9994a9671a0SJoel Fernandes 	if (!list_is_singular(blocks))
10004a9671a0SJoel Fernandes 		return -EINVAL;
10014a9671a0SJoel Fernandes 
10024a9671a0SJoel Fernandes 	block = list_first_entry(blocks,
1003ba110db8SJoel Fernandes 				 struct gpu_buddy_block,
10044a9671a0SJoel Fernandes 				 link);
10054a9671a0SJoel Fernandes 
1006ba110db8SJoel Fernandes 	block_start = gpu_buddy_block_offset(block);
1007ba110db8SJoel Fernandes 	block_end = block_start + gpu_buddy_block_size(mm, block);
10084a9671a0SJoel Fernandes 
1009ba110db8SJoel Fernandes 	if (WARN_ON(!gpu_buddy_block_is_allocated(block)))
10104a9671a0SJoel Fernandes 		return -EINVAL;
10114a9671a0SJoel Fernandes 
1012ba110db8SJoel Fernandes 	if (new_size > gpu_buddy_block_size(mm, block))
10134a9671a0SJoel Fernandes 		return -EINVAL;
10144a9671a0SJoel Fernandes 
10154a9671a0SJoel Fernandes 	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
10164a9671a0SJoel Fernandes 		return -EINVAL;
10174a9671a0SJoel Fernandes 
1018ba110db8SJoel Fernandes 	if (new_size == gpu_buddy_block_size(mm, block))
10194a9671a0SJoel Fernandes 		return 0;
10204a9671a0SJoel Fernandes 
10214a9671a0SJoel Fernandes 	new_start = block_start;
10224a9671a0SJoel Fernandes 	if (start) {
10234a9671a0SJoel Fernandes 		new_start = *start;
10244a9671a0SJoel Fernandes 
10254a9671a0SJoel Fernandes 		if (new_start < block_start)
10264a9671a0SJoel Fernandes 			return -EINVAL;
10274a9671a0SJoel Fernandes 
10284a9671a0SJoel Fernandes 		if (!IS_ALIGNED(new_start, mm->chunk_size))
10294a9671a0SJoel Fernandes 			return -EINVAL;
10304a9671a0SJoel Fernandes 
10314a9671a0SJoel Fernandes 		if (range_overflows(new_start, new_size, block_end))
10324a9671a0SJoel Fernandes 			return -EINVAL;
10334a9671a0SJoel Fernandes 	}
10344a9671a0SJoel Fernandes 
10354a9671a0SJoel Fernandes 	list_del(&block->link);
10364a9671a0SJoel Fernandes 	mark_free(mm, block);
1037ba110db8SJoel Fernandes 	mm->avail += gpu_buddy_block_size(mm, block);
1038ba110db8SJoel Fernandes 	if (gpu_buddy_block_is_clear(block))
1039ba110db8SJoel Fernandes 		mm->clear_avail += gpu_buddy_block_size(mm, block);
10404a9671a0SJoel Fernandes 
10414a9671a0SJoel Fernandes 	/* Prevent recursively freeing this node */
10424a9671a0SJoel Fernandes 	parent = block->parent;
10434a9671a0SJoel Fernandes 	block->parent = NULL;
10444a9671a0SJoel Fernandes 
10454a9671a0SJoel Fernandes 	list_add(&block->tmp_link, &dfs);
10464a9671a0SJoel Fernandes 	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
10474a9671a0SJoel Fernandes 	if (err) {
10484a9671a0SJoel Fernandes 		mark_allocated(mm, block);
1049ba110db8SJoel Fernandes 		mm->avail -= gpu_buddy_block_size(mm, block);
1050ba110db8SJoel Fernandes 		if (gpu_buddy_block_is_clear(block))
1051ba110db8SJoel Fernandes 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
10524a9671a0SJoel Fernandes 		list_add(&block->link, blocks);
10534a9671a0SJoel Fernandes 	}
10544a9671a0SJoel Fernandes 
10554a9671a0SJoel Fernandes 	block->parent = parent;
10564a9671a0SJoel Fernandes 	return err;
10574a9671a0SJoel Fernandes }
1058ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_block_trim);
10594a9671a0SJoel Fernandes 
1060ba110db8SJoel Fernandes static struct gpu_buddy_block *
1061ba110db8SJoel Fernandes __gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
10624a9671a0SJoel Fernandes 			 u64 start, u64 end,
10634a9671a0SJoel Fernandes 			 unsigned int order,
10644a9671a0SJoel Fernandes 			 unsigned long flags)
10654a9671a0SJoel Fernandes {
1066ba110db8SJoel Fernandes 	if (flags & GPU_BUDDY_RANGE_ALLOCATION)
10674a9671a0SJoel Fernandes 		/* Allocate traversing within the range */
1068ba110db8SJoel Fernandes 		return  __gpu_buddy_alloc_range_bias(mm, start, end,
10694a9671a0SJoel Fernandes 						     order, flags);
10704a9671a0SJoel Fernandes 	else
10714a9671a0SJoel Fernandes 		/* Allocate from freetree */
10724a9671a0SJoel Fernandes 		return alloc_from_freetree(mm, order, flags);
10734a9671a0SJoel Fernandes }
10744a9671a0SJoel Fernandes 
10754a9671a0SJoel Fernandes /**
1076ba110db8SJoel Fernandes  * gpu_buddy_alloc_blocks - allocate power-of-two blocks
10774a9671a0SJoel Fernandes  *
1078ba110db8SJoel Fernandes  * @mm: GPU buddy manager to allocate from
10794a9671a0SJoel Fernandes  * @start: start of the allowed range for this block
10804a9671a0SJoel Fernandes  * @end: end of the allowed range for this block
10814a9671a0SJoel Fernandes  * @size: size of the allocation in bytes
10824a9671a0SJoel Fernandes  * @min_block_size: alignment of the allocation
10834a9671a0SJoel Fernandes  * @blocks: output list head to add allocated blocks
1084ba110db8SJoel Fernandes  * @flags: GPU_BUDDY_*_ALLOCATION flags
10854a9671a0SJoel Fernandes  *
10864a9671a0SJoel Fernandes  * alloc_range_bias() called on range limitations, which traverses
10874a9671a0SJoel Fernandes  * the tree and returns the desired block.
10884a9671a0SJoel Fernandes  *
10894a9671a0SJoel Fernandes  * alloc_from_freetree() called when *no* range restrictions
10904a9671a0SJoel Fernandes  * are enforced, which picks the block from the freetree.
10914a9671a0SJoel Fernandes  *
10924a9671a0SJoel Fernandes  * Returns:
10934a9671a0SJoel Fernandes  * 0 on success, error code on failure.
10944a9671a0SJoel Fernandes  */
1095ba110db8SJoel Fernandes int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
10964a9671a0SJoel Fernandes 			   u64 start, u64 end, u64 size,
10974a9671a0SJoel Fernandes 			   u64 min_block_size,
10984a9671a0SJoel Fernandes 			   struct list_head *blocks,
10994a9671a0SJoel Fernandes 			   unsigned long flags)
11004a9671a0SJoel Fernandes {
1101ba110db8SJoel Fernandes 	struct gpu_buddy_block *block = NULL;
11024a9671a0SJoel Fernandes 	u64 original_size, original_min_size;
11034a9671a0SJoel Fernandes 	unsigned int min_order, order;
11044a9671a0SJoel Fernandes 	LIST_HEAD(allocated);
11054a9671a0SJoel Fernandes 	unsigned long pages;
11064a9671a0SJoel Fernandes 	int err;
11074a9671a0SJoel Fernandes 
11084a9671a0SJoel Fernandes 	if (size < mm->chunk_size)
11094a9671a0SJoel Fernandes 		return -EINVAL;
11104a9671a0SJoel Fernandes 
11114a9671a0SJoel Fernandes 	if (min_block_size < mm->chunk_size)
11124a9671a0SJoel Fernandes 		return -EINVAL;
11134a9671a0SJoel Fernandes 
11144a9671a0SJoel Fernandes 	if (!is_power_of_2(min_block_size))
11154a9671a0SJoel Fernandes 		return -EINVAL;
11164a9671a0SJoel Fernandes 
11174a9671a0SJoel Fernandes 	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
11184a9671a0SJoel Fernandes 		return -EINVAL;
11194a9671a0SJoel Fernandes 
11204a9671a0SJoel Fernandes 	if (end > mm->size)
11214a9671a0SJoel Fernandes 		return -EINVAL;
11224a9671a0SJoel Fernandes 
11234a9671a0SJoel Fernandes 	if (range_overflows(start, size, mm->size))
11244a9671a0SJoel Fernandes 		return -EINVAL;
11254a9671a0SJoel Fernandes 
11264a9671a0SJoel Fernandes 	/* Actual range allocation */
11274a9671a0SJoel Fernandes 	if (start + size == end) {
11284a9671a0SJoel Fernandes 		if (!IS_ALIGNED(start | end, min_block_size))
11294a9671a0SJoel Fernandes 			return -EINVAL;
11304a9671a0SJoel Fernandes 
1131ba110db8SJoel Fernandes 		return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks);
11324a9671a0SJoel Fernandes 	}
11334a9671a0SJoel Fernandes 
11344a9671a0SJoel Fernandes 	original_size = size;
11354a9671a0SJoel Fernandes 	original_min_size = min_block_size;
11364a9671a0SJoel Fernandes 
11374a9671a0SJoel Fernandes 	/* Roundup the size to power of 2 */
1138ba110db8SJoel Fernandes 	if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) {
11394a9671a0SJoel Fernandes 		size = roundup_pow_of_two(size);
11404a9671a0SJoel Fernandes 		min_block_size = size;
11414a9671a0SJoel Fernandes 	/* Align size value to min_block_size */
11424a9671a0SJoel Fernandes 	} else if (!IS_ALIGNED(size, min_block_size)) {
11434a9671a0SJoel Fernandes 		size = round_up(size, min_block_size);
11444a9671a0SJoel Fernandes 	}
11454a9671a0SJoel Fernandes 
11464a9671a0SJoel Fernandes 	pages = size >> ilog2(mm->chunk_size);
11474a9671a0SJoel Fernandes 	order = fls(pages) - 1;
11484a9671a0SJoel Fernandes 	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
11494a9671a0SJoel Fernandes 
11504a9671a0SJoel Fernandes 	if (order > mm->max_order || size > mm->size) {
1151ba110db8SJoel Fernandes 		if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) &&
1152ba110db8SJoel Fernandes 		    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
11534a9671a0SJoel Fernandes 			return __alloc_contig_try_harder(mm, original_size,
11544a9671a0SJoel Fernandes 							 original_min_size, blocks);
11554a9671a0SJoel Fernandes 
11564a9671a0SJoel Fernandes 		return -EINVAL;
11574a9671a0SJoel Fernandes 	}
11584a9671a0SJoel Fernandes 
11594a9671a0SJoel Fernandes 	do {
11604a9671a0SJoel Fernandes 		order = min(order, (unsigned int)fls(pages) - 1);
11614a9671a0SJoel Fernandes 		BUG_ON(order > mm->max_order);
11624a9671a0SJoel Fernandes 		BUG_ON(order < min_order);
11634a9671a0SJoel Fernandes 
11644a9671a0SJoel Fernandes 		do {
1165ba110db8SJoel Fernandes 			block = __gpu_buddy_alloc_blocks(mm, start,
11664a9671a0SJoel Fernandes 							 end,
11674a9671a0SJoel Fernandes 							 order,
11684a9671a0SJoel Fernandes 							 flags);
11694a9671a0SJoel Fernandes 			if (!IS_ERR(block))
11704a9671a0SJoel Fernandes 				break;
11714a9671a0SJoel Fernandes 
11724a9671a0SJoel Fernandes 			if (order-- == min_order) {
11734a9671a0SJoel Fernandes 				/* Try allocation through force merge method */
11744a9671a0SJoel Fernandes 				if (mm->clear_avail &&
11754a9671a0SJoel Fernandes 				    !__force_merge(mm, start, end, min_order)) {
1176ba110db8SJoel Fernandes 					block = __gpu_buddy_alloc_blocks(mm, start,
11774a9671a0SJoel Fernandes 									 end,
11784a9671a0SJoel Fernandes 									 min_order,
11794a9671a0SJoel Fernandes 									 flags);
11804a9671a0SJoel Fernandes 					if (!IS_ERR(block)) {
11814a9671a0SJoel Fernandes 						order = min_order;
11824a9671a0SJoel Fernandes 						break;
11834a9671a0SJoel Fernandes 					}
11844a9671a0SJoel Fernandes 				}
11854a9671a0SJoel Fernandes 
11864a9671a0SJoel Fernandes 				/*
11874a9671a0SJoel Fernandes 				 * Try contiguous block allocation through
11884a9671a0SJoel Fernandes 				 * try harder method.
11894a9671a0SJoel Fernandes 				 */
1190ba110db8SJoel Fernandes 				if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
1191ba110db8SJoel Fernandes 				    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
11924a9671a0SJoel Fernandes 					return __alloc_contig_try_harder(mm,
11934a9671a0SJoel Fernandes 									 original_size,
11944a9671a0SJoel Fernandes 									 original_min_size,
11954a9671a0SJoel Fernandes 									 blocks);
11964a9671a0SJoel Fernandes 				err = -ENOSPC;
11974a9671a0SJoel Fernandes 				goto err_free;
11984a9671a0SJoel Fernandes 			}
11994a9671a0SJoel Fernandes 		} while (1);
12004a9671a0SJoel Fernandes 
12014a9671a0SJoel Fernandes 		mark_allocated(mm, block);
1202ba110db8SJoel Fernandes 		mm->avail -= gpu_buddy_block_size(mm, block);
1203ba110db8SJoel Fernandes 		if (gpu_buddy_block_is_clear(block))
1204ba110db8SJoel Fernandes 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
12054a9671a0SJoel Fernandes 		kmemleak_update_trace(block);
12064a9671a0SJoel Fernandes 		list_add_tail(&block->link, &allocated);
12074a9671a0SJoel Fernandes 
12084a9671a0SJoel Fernandes 		pages -= BIT(order);
12094a9671a0SJoel Fernandes 
12104a9671a0SJoel Fernandes 		if (!pages)
12114a9671a0SJoel Fernandes 			break;
12124a9671a0SJoel Fernandes 	} while (1);
12134a9671a0SJoel Fernandes 
12144a9671a0SJoel Fernandes 	/* Trim the allocated block to the required size */
1215ba110db8SJoel Fernandes 	if (!(flags & GPU_BUDDY_TRIM_DISABLE) &&
12164a9671a0SJoel Fernandes 	    original_size != size) {
12174a9671a0SJoel Fernandes 		struct list_head *trim_list;
12184a9671a0SJoel Fernandes 		LIST_HEAD(temp);
12194a9671a0SJoel Fernandes 		u64 trim_size;
12204a9671a0SJoel Fernandes 
12214a9671a0SJoel Fernandes 		trim_list = &allocated;
12224a9671a0SJoel Fernandes 		trim_size = original_size;
12234a9671a0SJoel Fernandes 
12244a9671a0SJoel Fernandes 		if (!list_is_singular(&allocated)) {
12254a9671a0SJoel Fernandes 			block = list_last_entry(&allocated, typeof(*block), link);
12264a9671a0SJoel Fernandes 			list_move(&block->link, &temp);
12274a9671a0SJoel Fernandes 			trim_list = &temp;
1228ba110db8SJoel Fernandes 			trim_size = gpu_buddy_block_size(mm, block) -
12294a9671a0SJoel Fernandes 				(size - original_size);
12304a9671a0SJoel Fernandes 		}
12314a9671a0SJoel Fernandes 
1232ba110db8SJoel Fernandes 		gpu_buddy_block_trim(mm,
12334a9671a0SJoel Fernandes 				     NULL,
12344a9671a0SJoel Fernandes 				     trim_size,
12354a9671a0SJoel Fernandes 				     trim_list);
12364a9671a0SJoel Fernandes 
12374a9671a0SJoel Fernandes 		if (!list_empty(&temp))
12384a9671a0SJoel Fernandes 			list_splice_tail(trim_list, &allocated);
12394a9671a0SJoel Fernandes 	}
12404a9671a0SJoel Fernandes 
12414a9671a0SJoel Fernandes 	list_splice_tail(&allocated, blocks);
12424a9671a0SJoel Fernandes 	return 0;
12434a9671a0SJoel Fernandes 
12444a9671a0SJoel Fernandes err_free:
1245ba110db8SJoel Fernandes 	gpu_buddy_free_list_internal(mm, &allocated);
12464a9671a0SJoel Fernandes 	return err;
12474a9671a0SJoel Fernandes }
1248ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_alloc_blocks);
12494a9671a0SJoel Fernandes 
12504a9671a0SJoel Fernandes /**
1251ba110db8SJoel Fernandes  * gpu_buddy_block_print - print block information
12524a9671a0SJoel Fernandes  *
1253ba110db8SJoel Fernandes  * @mm: GPU buddy manager
1254ba110db8SJoel Fernandes  * @block: GPU buddy block
12554a9671a0SJoel Fernandes  */
1256ba110db8SJoel Fernandes void gpu_buddy_block_print(struct gpu_buddy *mm,
1257ba110db8SJoel Fernandes 			   struct gpu_buddy_block *block)
12584a9671a0SJoel Fernandes {
1259ba110db8SJoel Fernandes 	u64 start = gpu_buddy_block_offset(block);
1260ba110db8SJoel Fernandes 	u64 size = gpu_buddy_block_size(mm, block);
12614a9671a0SJoel Fernandes 
1262ba110db8SJoel Fernandes 	pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size);
12634a9671a0SJoel Fernandes }
1264ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_block_print);
12654a9671a0SJoel Fernandes 
12664a9671a0SJoel Fernandes /**
1267ba110db8SJoel Fernandes  * gpu_buddy_print - print allocator state
12684a9671a0SJoel Fernandes  *
1269ba110db8SJoel Fernandes  * @mm: GPU buddy manager
1270ba110db8SJoel Fernandes  * @p: GPU printer to use
12714a9671a0SJoel Fernandes  */
1272ba110db8SJoel Fernandes void gpu_buddy_print(struct gpu_buddy *mm)
12734a9671a0SJoel Fernandes {
12744a9671a0SJoel Fernandes 	int order;
12754a9671a0SJoel Fernandes 
1276ba110db8SJoel Fernandes 	pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
12774a9671a0SJoel Fernandes 		mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
12784a9671a0SJoel Fernandes 
12794a9671a0SJoel Fernandes 	for (order = mm->max_order; order >= 0; order--) {
1280ba110db8SJoel Fernandes 		struct gpu_buddy_block *block, *tmp;
12814a9671a0SJoel Fernandes 		struct rb_root *root;
12824a9671a0SJoel Fernandes 		u64 count = 0, free;
12834a9671a0SJoel Fernandes 		unsigned int tree;
12844a9671a0SJoel Fernandes 
12854a9671a0SJoel Fernandes 		for_each_free_tree(tree) {
12864a9671a0SJoel Fernandes 			root = &mm->free_trees[tree][order];
12874a9671a0SJoel Fernandes 
12884a9671a0SJoel Fernandes 			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
1289ba110db8SJoel Fernandes 				BUG_ON(!gpu_buddy_block_is_free(block));
12904a9671a0SJoel Fernandes 				count++;
12914a9671a0SJoel Fernandes 			}
12924a9671a0SJoel Fernandes 		}
12934a9671a0SJoel Fernandes 
12944a9671a0SJoel Fernandes 		free = count * (mm->chunk_size << order);
12954a9671a0SJoel Fernandes 		if (free < SZ_1M)
1296ba110db8SJoel Fernandes 			pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
1297ba110db8SJoel Fernandes 				order, free >> 10, count);
12984a9671a0SJoel Fernandes 		else
1299ba110db8SJoel Fernandes 			pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
1300ba110db8SJoel Fernandes 				order, free >> 20, count);
13014a9671a0SJoel Fernandes 	}
13024a9671a0SJoel Fernandes }
1303ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_print);
13044a9671a0SJoel Fernandes 
1305ba110db8SJoel Fernandes static void gpu_buddy_module_exit(void)
13064a9671a0SJoel Fernandes {
13074a9671a0SJoel Fernandes 	kmem_cache_destroy(slab_blocks);
13084a9671a0SJoel Fernandes }
13094a9671a0SJoel Fernandes 
1310ba110db8SJoel Fernandes static int __init gpu_buddy_module_init(void)
13114a9671a0SJoel Fernandes {
1312ba110db8SJoel Fernandes 	slab_blocks = KMEM_CACHE(gpu_buddy_block, 0);
13134a9671a0SJoel Fernandes 	if (!slab_blocks)
13144a9671a0SJoel Fernandes 		return -ENOMEM;
13154a9671a0SJoel Fernandes 
13164a9671a0SJoel Fernandes 	return 0;
13174a9671a0SJoel Fernandes }
13184a9671a0SJoel Fernandes 
1319ba110db8SJoel Fernandes module_init(gpu_buddy_module_init);
1320ba110db8SJoel Fernandes module_exit(gpu_buddy_module_exit);
13214a9671a0SJoel Fernandes 
1322ba110db8SJoel Fernandes MODULE_DESCRIPTION("GPU Buddy Allocator");
13234a9671a0SJoel Fernandes MODULE_LICENSE("Dual MIT/GPL");
1324