xref: /linux/drivers/gpu/buddy.c (revision 4a57e0913e8c7fff407e97909f4ae48caa84d612)
14a9671a0SJoel Fernandes // SPDX-License-Identifier: MIT
24a9671a0SJoel Fernandes /*
34a9671a0SJoel Fernandes  * Copyright © 2021 Intel Corporation
44a9671a0SJoel Fernandes  */
54a9671a0SJoel Fernandes 
6abdcd4c3SSanjay Yadav #include <linux/bug.h>
74a9671a0SJoel Fernandes #include <linux/export.h>
84a9671a0SJoel Fernandes #include <linux/kmemleak.h>
94a9671a0SJoel Fernandes #include <linux/module.h>
104a9671a0SJoel Fernandes #include <linux/sizes.h>
114a9671a0SJoel Fernandes 
124a9671a0SJoel Fernandes #include <linux/gpu_buddy.h>
134a9671a0SJoel Fernandes 
14abdcd4c3SSanjay Yadav /**
15abdcd4c3SSanjay Yadav  * gpu_buddy_assert - assert a condition in the buddy allocator
16abdcd4c3SSanjay Yadav  * @condition: condition expected to be true
17abdcd4c3SSanjay Yadav  *
18abdcd4c3SSanjay Yadav  * When CONFIG_KUNIT is enabled, evaluates @condition and, if false, triggers
19abdcd4c3SSanjay Yadav  * a WARN_ON() and also calls kunit_fail_current_test() so that any running
20abdcd4c3SSanjay Yadav  * kunit test is properly marked as failed. The stringified condition is
21abdcd4c3SSanjay Yadav  * included in the failure message for easy identification.
22abdcd4c3SSanjay Yadav  *
23abdcd4c3SSanjay Yadav  * When CONFIG_KUNIT is not enabled, this reduces to WARN_ON() so production
24abdcd4c3SSanjay Yadav  * builds retain the same warning semantics as before.
25abdcd4c3SSanjay Yadav  */
26abdcd4c3SSanjay Yadav #if IS_ENABLED(CONFIG_KUNIT)
27abdcd4c3SSanjay Yadav #include <kunit/test-bug.h>
28abdcd4c3SSanjay Yadav #define gpu_buddy_assert(condition) do {						\
29abdcd4c3SSanjay Yadav 	if (WARN_ON(!(condition)))						\
30abdcd4c3SSanjay Yadav 		kunit_fail_current_test("gpu_buddy_assert(" #condition ")");	\
31abdcd4c3SSanjay Yadav } while (0)
32abdcd4c3SSanjay Yadav #else
33abdcd4c3SSanjay Yadav #define gpu_buddy_assert(condition) WARN_ON(!(condition))
34abdcd4c3SSanjay Yadav #endif
35abdcd4c3SSanjay Yadav 
364a9671a0SJoel Fernandes static struct kmem_cache *slab_blocks;
374a9671a0SJoel Fernandes 
38df8c7892SSanjay Yadav static unsigned int
39df8c7892SSanjay Yadav gpu_buddy_block_state(struct gpu_buddy_block *block)
40df8c7892SSanjay Yadav {
41df8c7892SSanjay Yadav 	return block->header & GPU_BUDDY_HEADER_STATE;
42df8c7892SSanjay Yadav }
43df8c7892SSanjay Yadav 
44df8c7892SSanjay Yadav static bool
45df8c7892SSanjay Yadav gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
46df8c7892SSanjay Yadav {
47df8c7892SSanjay Yadav 	return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
48df8c7892SSanjay Yadav }
49df8c7892SSanjay Yadav 
50df8c7892SSanjay Yadav static bool
51df8c7892SSanjay Yadav gpu_buddy_block_is_split(struct gpu_buddy_block *block)
52df8c7892SSanjay Yadav {
53df8c7892SSanjay Yadav 	return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
54df8c7892SSanjay Yadav }
55df8c7892SSanjay Yadav 
56*493740d7SArunpravin Paneer Selvam static unsigned int gpu_buddy_block_offset_alignment(struct gpu_buddy_block *block)
57*493740d7SArunpravin Paneer Selvam {
58*493740d7SArunpravin Paneer Selvam 	u64 offset = gpu_buddy_block_offset(block);
59*493740d7SArunpravin Paneer Selvam 
60*493740d7SArunpravin Paneer Selvam 	if (!offset)
61*493740d7SArunpravin Paneer Selvam 		/*
62*493740d7SArunpravin Paneer Selvam 		 * __ffs64(0) is undefined; offset 0 is maximally aligned, so return
63*493740d7SArunpravin Paneer Selvam 		 * a value greater than any possible alignment.
64*493740d7SArunpravin Paneer Selvam 		 */
65*493740d7SArunpravin Paneer Selvam 		return 64 + 1;
66*493740d7SArunpravin Paneer Selvam 
67*493740d7SArunpravin Paneer Selvam 	return __ffs64(offset);
68*493740d7SArunpravin Paneer Selvam }
69*493740d7SArunpravin Paneer Selvam 
70*493740d7SArunpravin Paneer Selvam RB_DECLARE_CALLBACKS_MAX(static, gpu_buddy_augment_cb,
71*493740d7SArunpravin Paneer Selvam 			 struct gpu_buddy_block, rb,
72*493740d7SArunpravin Paneer Selvam 			 unsigned int, subtree_max_alignment,
73*493740d7SArunpravin Paneer Selvam 			 gpu_buddy_block_offset_alignment);
74*493740d7SArunpravin Paneer Selvam 
75ba110db8SJoel Fernandes static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
76ba110db8SJoel Fernandes 					       struct gpu_buddy_block *parent,
774a9671a0SJoel Fernandes 					       unsigned int order,
784a9671a0SJoel Fernandes 					       u64 offset)
794a9671a0SJoel Fernandes {
80ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
814a9671a0SJoel Fernandes 
82ba110db8SJoel Fernandes 	BUG_ON(order > GPU_BUDDY_MAX_ORDER);
834a9671a0SJoel Fernandes 
844a9671a0SJoel Fernandes 	block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
854a9671a0SJoel Fernandes 	if (!block)
864a9671a0SJoel Fernandes 		return NULL;
874a9671a0SJoel Fernandes 
884a9671a0SJoel Fernandes 	block->header = offset;
894a9671a0SJoel Fernandes 	block->header |= order;
904a9671a0SJoel Fernandes 	block->parent = parent;
914a9671a0SJoel Fernandes 
924a9671a0SJoel Fernandes 	RB_CLEAR_NODE(&block->rb);
934a9671a0SJoel Fernandes 
94ba110db8SJoel Fernandes 	BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED);
954a9671a0SJoel Fernandes 	return block;
964a9671a0SJoel Fernandes }
974a9671a0SJoel Fernandes 
98ba110db8SJoel Fernandes static void gpu_block_free(struct gpu_buddy *mm,
99ba110db8SJoel Fernandes 			   struct gpu_buddy_block *block)
1004a9671a0SJoel Fernandes {
1014a9671a0SJoel Fernandes 	kmem_cache_free(slab_blocks, block);
1024a9671a0SJoel Fernandes }
1034a9671a0SJoel Fernandes 
104ba110db8SJoel Fernandes static enum gpu_buddy_free_tree
105ba110db8SJoel Fernandes get_block_tree(struct gpu_buddy_block *block)
1064a9671a0SJoel Fernandes {
107ba110db8SJoel Fernandes 	return gpu_buddy_block_is_clear(block) ?
108ba110db8SJoel Fernandes 	       GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
1094a9671a0SJoel Fernandes }
1104a9671a0SJoel Fernandes 
111ba110db8SJoel Fernandes static struct gpu_buddy_block *
1124a9671a0SJoel Fernandes rbtree_get_free_block(const struct rb_node *node)
1134a9671a0SJoel Fernandes {
114ba110db8SJoel Fernandes 	return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL;
1154a9671a0SJoel Fernandes }
1164a9671a0SJoel Fernandes 
117ba110db8SJoel Fernandes static struct gpu_buddy_block *
1184a9671a0SJoel Fernandes rbtree_last_free_block(struct rb_root *root)
1194a9671a0SJoel Fernandes {
1204a9671a0SJoel Fernandes 	return rbtree_get_free_block(rb_last(root));
1214a9671a0SJoel Fernandes }
1224a9671a0SJoel Fernandes 
1234a9671a0SJoel Fernandes static bool rbtree_is_empty(struct rb_root *root)
1244a9671a0SJoel Fernandes {
1254a9671a0SJoel Fernandes 	return RB_EMPTY_ROOT(root);
1264a9671a0SJoel Fernandes }
1274a9671a0SJoel Fernandes 
128ba110db8SJoel Fernandes static void rbtree_insert(struct gpu_buddy *mm,
129ba110db8SJoel Fernandes 			  struct gpu_buddy_block *block,
130ba110db8SJoel Fernandes 			  enum gpu_buddy_free_tree tree)
1314a9671a0SJoel Fernandes {
132*493740d7SArunpravin Paneer Selvam 	struct rb_node **link, *parent = NULL;
133*493740d7SArunpravin Paneer Selvam 	unsigned int block_alignment, order;
134*493740d7SArunpravin Paneer Selvam 	struct gpu_buddy_block *node;
135*493740d7SArunpravin Paneer Selvam 	struct rb_root *root;
136*493740d7SArunpravin Paneer Selvam 
137*493740d7SArunpravin Paneer Selvam 	order = gpu_buddy_block_order(block);
138*493740d7SArunpravin Paneer Selvam 	block_alignment = gpu_buddy_block_offset_alignment(block);
139*493740d7SArunpravin Paneer Selvam 
140*493740d7SArunpravin Paneer Selvam 	root = &mm->free_trees[tree][order];
141*493740d7SArunpravin Paneer Selvam 	link = &root->rb_node;
142*493740d7SArunpravin Paneer Selvam 
143*493740d7SArunpravin Paneer Selvam 	while (*link) {
144*493740d7SArunpravin Paneer Selvam 		parent = *link;
145*493740d7SArunpravin Paneer Selvam 		node = rbtree_get_free_block(parent);
146*493740d7SArunpravin Paneer Selvam 		/*
147*493740d7SArunpravin Paneer Selvam 		 * Manual augmentation update during insertion traversal. Required
148*493740d7SArunpravin Paneer Selvam 		 * because rb_insert_augmented() only calls rotate callback during
149*493740d7SArunpravin Paneer Selvam 		 * rotations. This ensures all ancestors on the insertion path have
150*493740d7SArunpravin Paneer Selvam 		 * correct subtree_max_alignment values.
151*493740d7SArunpravin Paneer Selvam 		 */
152*493740d7SArunpravin Paneer Selvam 		if (node->subtree_max_alignment < block_alignment)
153*493740d7SArunpravin Paneer Selvam 			node->subtree_max_alignment = block_alignment;
154*493740d7SArunpravin Paneer Selvam 
155*493740d7SArunpravin Paneer Selvam 		if (gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node))
156*493740d7SArunpravin Paneer Selvam 			link = &parent->rb_left;
157*493740d7SArunpravin Paneer Selvam 		else
158*493740d7SArunpravin Paneer Selvam 			link = &parent->rb_right;
159*493740d7SArunpravin Paneer Selvam 	}
160*493740d7SArunpravin Paneer Selvam 
161*493740d7SArunpravin Paneer Selvam 	block->subtree_max_alignment = block_alignment;
162*493740d7SArunpravin Paneer Selvam 	rb_link_node(&block->rb, parent, link);
163*493740d7SArunpravin Paneer Selvam 	rb_insert_augmented(&block->rb, root, &gpu_buddy_augment_cb);
1644a9671a0SJoel Fernandes }
1654a9671a0SJoel Fernandes 
166ba110db8SJoel Fernandes static void rbtree_remove(struct gpu_buddy *mm,
167ba110db8SJoel Fernandes 			  struct gpu_buddy_block *block)
1684a9671a0SJoel Fernandes {
169ba110db8SJoel Fernandes 	unsigned int order = gpu_buddy_block_order(block);
170ba110db8SJoel Fernandes 	enum gpu_buddy_free_tree tree;
1714a9671a0SJoel Fernandes 	struct rb_root *root;
1724a9671a0SJoel Fernandes 
1734a9671a0SJoel Fernandes 	tree = get_block_tree(block);
1744a9671a0SJoel Fernandes 	root = &mm->free_trees[tree][order];
1754a9671a0SJoel Fernandes 
176*493740d7SArunpravin Paneer Selvam 	rb_erase_augmented(&block->rb, root, &gpu_buddy_augment_cb);
1774a9671a0SJoel Fernandes 	RB_CLEAR_NODE(&block->rb);
1784a9671a0SJoel Fernandes }
1794a9671a0SJoel Fernandes 
180ba110db8SJoel Fernandes static void clear_reset(struct gpu_buddy_block *block)
1814a9671a0SJoel Fernandes {
182ba110db8SJoel Fernandes 	block->header &= ~GPU_BUDDY_HEADER_CLEAR;
1834a9671a0SJoel Fernandes }
1844a9671a0SJoel Fernandes 
185ba110db8SJoel Fernandes static void mark_cleared(struct gpu_buddy_block *block)
1864a9671a0SJoel Fernandes {
187ba110db8SJoel Fernandes 	block->header |= GPU_BUDDY_HEADER_CLEAR;
1884a9671a0SJoel Fernandes }
1894a9671a0SJoel Fernandes 
190ba110db8SJoel Fernandes static void mark_allocated(struct gpu_buddy *mm,
191ba110db8SJoel Fernandes 			   struct gpu_buddy_block *block)
1924a9671a0SJoel Fernandes {
193ba110db8SJoel Fernandes 	block->header &= ~GPU_BUDDY_HEADER_STATE;
194ba110db8SJoel Fernandes 	block->header |= GPU_BUDDY_ALLOCATED;
1954a9671a0SJoel Fernandes 
1964a9671a0SJoel Fernandes 	rbtree_remove(mm, block);
1974a9671a0SJoel Fernandes }
1984a9671a0SJoel Fernandes 
199ba110db8SJoel Fernandes static void mark_free(struct gpu_buddy *mm,
200ba110db8SJoel Fernandes 		      struct gpu_buddy_block *block)
2014a9671a0SJoel Fernandes {
202ba110db8SJoel Fernandes 	enum gpu_buddy_free_tree tree;
2034a9671a0SJoel Fernandes 
204ba110db8SJoel Fernandes 	block->header &= ~GPU_BUDDY_HEADER_STATE;
205ba110db8SJoel Fernandes 	block->header |= GPU_BUDDY_FREE;
2064a9671a0SJoel Fernandes 
2074a9671a0SJoel Fernandes 	tree = get_block_tree(block);
2084a9671a0SJoel Fernandes 	rbtree_insert(mm, block, tree);
2094a9671a0SJoel Fernandes }
2104a9671a0SJoel Fernandes 
211ba110db8SJoel Fernandes static void mark_split(struct gpu_buddy *mm,
212ba110db8SJoel Fernandes 		       struct gpu_buddy_block *block)
2134a9671a0SJoel Fernandes {
214ba110db8SJoel Fernandes 	block->header &= ~GPU_BUDDY_HEADER_STATE;
215ba110db8SJoel Fernandes 	block->header |= GPU_BUDDY_SPLIT;
2164a9671a0SJoel Fernandes 
2174a9671a0SJoel Fernandes 	rbtree_remove(mm, block);
2184a9671a0SJoel Fernandes }
2194a9671a0SJoel Fernandes 
2204a9671a0SJoel Fernandes static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
2214a9671a0SJoel Fernandes {
2224a9671a0SJoel Fernandes 	return s1 <= e2 && e1 >= s2;
2234a9671a0SJoel Fernandes }
2244a9671a0SJoel Fernandes 
2254a9671a0SJoel Fernandes static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
2264a9671a0SJoel Fernandes {
2274a9671a0SJoel Fernandes 	return s1 <= s2 && e1 >= e2;
2284a9671a0SJoel Fernandes }
2294a9671a0SJoel Fernandes 
230ba110db8SJoel Fernandes static struct gpu_buddy_block *
231ba110db8SJoel Fernandes __get_buddy(struct gpu_buddy_block *block)
2324a9671a0SJoel Fernandes {
233ba110db8SJoel Fernandes 	struct gpu_buddy_block *parent;
2344a9671a0SJoel Fernandes 
2354a9671a0SJoel Fernandes 	parent = block->parent;
2364a9671a0SJoel Fernandes 	if (!parent)
2374a9671a0SJoel Fernandes 		return NULL;
2384a9671a0SJoel Fernandes 
2394a9671a0SJoel Fernandes 	if (parent->left == block)
2404a9671a0SJoel Fernandes 		return parent->right;
2414a9671a0SJoel Fernandes 
2424a9671a0SJoel Fernandes 	return parent->left;
2434a9671a0SJoel Fernandes }
2444a9671a0SJoel Fernandes 
245ba110db8SJoel Fernandes static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
246ba110db8SJoel Fernandes 				     struct gpu_buddy_block *block,
2474a9671a0SJoel Fernandes 				     bool force_merge)
2484a9671a0SJoel Fernandes {
249ba110db8SJoel Fernandes 	struct gpu_buddy_block *parent;
2504a9671a0SJoel Fernandes 	unsigned int order;
2514a9671a0SJoel Fernandes 
2524a9671a0SJoel Fernandes 	while ((parent = block->parent)) {
253ba110db8SJoel Fernandes 		struct gpu_buddy_block *buddy;
2544a9671a0SJoel Fernandes 
2554a9671a0SJoel Fernandes 		buddy = __get_buddy(block);
2564a9671a0SJoel Fernandes 
257ba110db8SJoel Fernandes 		if (!gpu_buddy_block_is_free(buddy))
2584a9671a0SJoel Fernandes 			break;
2594a9671a0SJoel Fernandes 
2604a9671a0SJoel Fernandes 		if (!force_merge) {
2614a9671a0SJoel Fernandes 			/*
2624a9671a0SJoel Fernandes 			 * Check the block and its buddy clear state and exit
2634a9671a0SJoel Fernandes 			 * the loop if they both have the dissimilar state.
2644a9671a0SJoel Fernandes 			 */
265ba110db8SJoel Fernandes 			if (gpu_buddy_block_is_clear(block) !=
266ba110db8SJoel Fernandes 			    gpu_buddy_block_is_clear(buddy))
2674a9671a0SJoel Fernandes 				break;
2684a9671a0SJoel Fernandes 
269ba110db8SJoel Fernandes 			if (gpu_buddy_block_is_clear(block))
2704a9671a0SJoel Fernandes 				mark_cleared(parent);
2714a9671a0SJoel Fernandes 		}
2724a9671a0SJoel Fernandes 
2734a9671a0SJoel Fernandes 		rbtree_remove(mm, buddy);
274ba110db8SJoel Fernandes 		if (force_merge && gpu_buddy_block_is_clear(buddy))
275ba110db8SJoel Fernandes 			mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
2764a9671a0SJoel Fernandes 
277ba110db8SJoel Fernandes 		gpu_block_free(mm, block);
278ba110db8SJoel Fernandes 		gpu_block_free(mm, buddy);
2794a9671a0SJoel Fernandes 
2804a9671a0SJoel Fernandes 		block = parent;
2814a9671a0SJoel Fernandes 	}
2824a9671a0SJoel Fernandes 
283ba110db8SJoel Fernandes 	order = gpu_buddy_block_order(block);
2844a9671a0SJoel Fernandes 	mark_free(mm, block);
2854a9671a0SJoel Fernandes 
2864a9671a0SJoel Fernandes 	return order;
2874a9671a0SJoel Fernandes }
2884a9671a0SJoel Fernandes 
289ba110db8SJoel Fernandes static int __force_merge(struct gpu_buddy *mm,
2904a9671a0SJoel Fernandes 			 u64 start,
2914a9671a0SJoel Fernandes 			 u64 end,
2924a9671a0SJoel Fernandes 			 unsigned int min_order)
2934a9671a0SJoel Fernandes {
2944a9671a0SJoel Fernandes 	unsigned int tree, order;
2954a9671a0SJoel Fernandes 	int i;
2964a9671a0SJoel Fernandes 
2974a9671a0SJoel Fernandes 	if (!min_order)
2984a9671a0SJoel Fernandes 		return -ENOMEM;
2994a9671a0SJoel Fernandes 
3004a9671a0SJoel Fernandes 	if (min_order > mm->max_order)
3014a9671a0SJoel Fernandes 		return -EINVAL;
3024a9671a0SJoel Fernandes 
3034a9671a0SJoel Fernandes 	for_each_free_tree(tree) {
3044a9671a0SJoel Fernandes 		for (i = min_order - 1; i >= 0; i--) {
3054a9671a0SJoel Fernandes 			struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
3064a9671a0SJoel Fernandes 
3074a9671a0SJoel Fernandes 			while (iter) {
308ba110db8SJoel Fernandes 				struct gpu_buddy_block *block, *buddy;
3094a9671a0SJoel Fernandes 				u64 block_start, block_end;
3104a9671a0SJoel Fernandes 
3114a9671a0SJoel Fernandes 				block = rbtree_get_free_block(iter);
3124a9671a0SJoel Fernandes 				iter = rb_prev(iter);
3134a9671a0SJoel Fernandes 
3144a9671a0SJoel Fernandes 				if (!block || !block->parent)
3154a9671a0SJoel Fernandes 					continue;
3164a9671a0SJoel Fernandes 
317ba110db8SJoel Fernandes 				block_start = gpu_buddy_block_offset(block);
318ba110db8SJoel Fernandes 				block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
3194a9671a0SJoel Fernandes 
3204a9671a0SJoel Fernandes 				if (!contains(start, end, block_start, block_end))
3214a9671a0SJoel Fernandes 					continue;
3224a9671a0SJoel Fernandes 
3234a9671a0SJoel Fernandes 				buddy = __get_buddy(block);
324ba110db8SJoel Fernandes 				if (!gpu_buddy_block_is_free(buddy))
3254a9671a0SJoel Fernandes 					continue;
3264a9671a0SJoel Fernandes 
327abdcd4c3SSanjay Yadav 				gpu_buddy_assert(gpu_buddy_block_is_clear(block) !=
328ba110db8SJoel Fernandes 						 gpu_buddy_block_is_clear(buddy));
3294a9671a0SJoel Fernandes 
3304a9671a0SJoel Fernandes 				/*
3314a9671a0SJoel Fernandes 				 * Advance to the next node when the current node is the buddy,
3324a9671a0SJoel Fernandes 				 * as freeing the block will also remove its buddy from the tree.
3334a9671a0SJoel Fernandes 				 */
3344a9671a0SJoel Fernandes 				if (iter == &buddy->rb)
3354a9671a0SJoel Fernandes 					iter = rb_prev(iter);
3364a9671a0SJoel Fernandes 
3374a9671a0SJoel Fernandes 				rbtree_remove(mm, block);
338ba110db8SJoel Fernandes 				if (gpu_buddy_block_is_clear(block))
339ba110db8SJoel Fernandes 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
3404a9671a0SJoel Fernandes 
341ba110db8SJoel Fernandes 				order = __gpu_buddy_free(mm, block, true);
3424a9671a0SJoel Fernandes 				if (order >= min_order)
3434a9671a0SJoel Fernandes 					return 0;
3444a9671a0SJoel Fernandes 			}
3454a9671a0SJoel Fernandes 		}
3464a9671a0SJoel Fernandes 	}
3474a9671a0SJoel Fernandes 
3484a9671a0SJoel Fernandes 	return -ENOMEM;
3494a9671a0SJoel Fernandes }
3504a9671a0SJoel Fernandes 
3514a9671a0SJoel Fernandes /**
352ba110db8SJoel Fernandes  * gpu_buddy_init - init memory manager
3534a9671a0SJoel Fernandes  *
354ba110db8SJoel Fernandes  * @mm: GPU buddy manager to initialize
3554a9671a0SJoel Fernandes  * @size: size in bytes to manage
3564a9671a0SJoel Fernandes  * @chunk_size: minimum page size in bytes for our allocations
3574a9671a0SJoel Fernandes  *
3584a9671a0SJoel Fernandes  * Initializes the memory manager and its resources.
3594a9671a0SJoel Fernandes  *
3604a9671a0SJoel Fernandes  * Returns:
3614a9671a0SJoel Fernandes  * 0 on success, error code on failure.
3624a9671a0SJoel Fernandes  */
363ba110db8SJoel Fernandes int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
3644a9671a0SJoel Fernandes {
3654a9671a0SJoel Fernandes 	unsigned int i, j, root_count = 0;
3664a9671a0SJoel Fernandes 	u64 offset = 0;
3674a9671a0SJoel Fernandes 
3684a9671a0SJoel Fernandes 	if (size < chunk_size)
3694a9671a0SJoel Fernandes 		return -EINVAL;
3704a9671a0SJoel Fernandes 
3714a9671a0SJoel Fernandes 	if (chunk_size < SZ_4K)
3724a9671a0SJoel Fernandes 		return -EINVAL;
3734a9671a0SJoel Fernandes 
3744a9671a0SJoel Fernandes 	if (!is_power_of_2(chunk_size))
3754a9671a0SJoel Fernandes 		return -EINVAL;
3764a9671a0SJoel Fernandes 
3774a9671a0SJoel Fernandes 	size = round_down(size, chunk_size);
3784a9671a0SJoel Fernandes 
3794a9671a0SJoel Fernandes 	mm->size = size;
3804a9671a0SJoel Fernandes 	mm->avail = size;
3814a9671a0SJoel Fernandes 	mm->clear_avail = 0;
3824a9671a0SJoel Fernandes 	mm->chunk_size = chunk_size;
3834a9671a0SJoel Fernandes 	mm->max_order = ilog2(size) - ilog2(chunk_size);
3844a9671a0SJoel Fernandes 
385ba110db8SJoel Fernandes 	BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
3864a9671a0SJoel Fernandes 
387ba110db8SJoel Fernandes 	mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
3884a9671a0SJoel Fernandes 				       sizeof(*mm->free_trees),
3894a9671a0SJoel Fernandes 				       GFP_KERNEL);
3904a9671a0SJoel Fernandes 	if (!mm->free_trees)
3914a9671a0SJoel Fernandes 		return -ENOMEM;
3924a9671a0SJoel Fernandes 
3934a9671a0SJoel Fernandes 	for_each_free_tree(i) {
3944a9671a0SJoel Fernandes 		mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
3954a9671a0SJoel Fernandes 						  sizeof(struct rb_root),
3964a9671a0SJoel Fernandes 						  GFP_KERNEL);
3974a9671a0SJoel Fernandes 		if (!mm->free_trees[i])
3984a9671a0SJoel Fernandes 			goto out_free_tree;
3994a9671a0SJoel Fernandes 
4004a9671a0SJoel Fernandes 		for (j = 0; j <= mm->max_order; ++j)
4014a9671a0SJoel Fernandes 			mm->free_trees[i][j] = RB_ROOT;
4024a9671a0SJoel Fernandes 	}
4034a9671a0SJoel Fernandes 
4044a9671a0SJoel Fernandes 	mm->n_roots = hweight64(size);
4054a9671a0SJoel Fernandes 
4064a9671a0SJoel Fernandes 	mm->roots = kmalloc_array(mm->n_roots,
407ba110db8SJoel Fernandes 				  sizeof(struct gpu_buddy_block *),
4084a9671a0SJoel Fernandes 				  GFP_KERNEL);
4094a9671a0SJoel Fernandes 	if (!mm->roots)
4104a9671a0SJoel Fernandes 		goto out_free_tree;
4114a9671a0SJoel Fernandes 
4124a9671a0SJoel Fernandes 	/*
4134a9671a0SJoel Fernandes 	 * Split into power-of-two blocks, in case we are given a size that is
4144a9671a0SJoel Fernandes 	 * not itself a power-of-two.
4154a9671a0SJoel Fernandes 	 */
4164a9671a0SJoel Fernandes 	do {
417ba110db8SJoel Fernandes 		struct gpu_buddy_block *root;
4184a9671a0SJoel Fernandes 		unsigned int order;
4194a9671a0SJoel Fernandes 		u64 root_size;
4204a9671a0SJoel Fernandes 
4214a9671a0SJoel Fernandes 		order = ilog2(size) - ilog2(chunk_size);
4224a9671a0SJoel Fernandes 		root_size = chunk_size << order;
4234a9671a0SJoel Fernandes 
424ba110db8SJoel Fernandes 		root = gpu_block_alloc(mm, NULL, order, offset);
4254a9671a0SJoel Fernandes 		if (!root)
4264a9671a0SJoel Fernandes 			goto out_free_roots;
4274a9671a0SJoel Fernandes 
4284a9671a0SJoel Fernandes 		mark_free(mm, root);
4294a9671a0SJoel Fernandes 
4304a9671a0SJoel Fernandes 		BUG_ON(root_count > mm->max_order);
431ba110db8SJoel Fernandes 		BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size);
4324a9671a0SJoel Fernandes 
4334a9671a0SJoel Fernandes 		mm->roots[root_count] = root;
4344a9671a0SJoel Fernandes 
4354a9671a0SJoel Fernandes 		offset += root_size;
4364a9671a0SJoel Fernandes 		size -= root_size;
4374a9671a0SJoel Fernandes 		root_count++;
4384a9671a0SJoel Fernandes 	} while (size);
4394a9671a0SJoel Fernandes 
4404a9671a0SJoel Fernandes 	return 0;
4414a9671a0SJoel Fernandes 
4424a9671a0SJoel Fernandes out_free_roots:
4434a9671a0SJoel Fernandes 	while (root_count--)
444ba110db8SJoel Fernandes 		gpu_block_free(mm, mm->roots[root_count]);
4454a9671a0SJoel Fernandes 	kfree(mm->roots);
4464a9671a0SJoel Fernandes out_free_tree:
4474a9671a0SJoel Fernandes 	while (i--)
4484a9671a0SJoel Fernandes 		kfree(mm->free_trees[i]);
4494a9671a0SJoel Fernandes 	kfree(mm->free_trees);
4504a9671a0SJoel Fernandes 	return -ENOMEM;
4514a9671a0SJoel Fernandes }
452ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_init);
4534a9671a0SJoel Fernandes 
4544a9671a0SJoel Fernandes /**
455ba110db8SJoel Fernandes  * gpu_buddy_fini - tear down the memory manager
4564a9671a0SJoel Fernandes  *
457ba110db8SJoel Fernandes  * @mm: GPU buddy manager to free
4584a9671a0SJoel Fernandes  *
4594a9671a0SJoel Fernandes  * Cleanup memory manager resources and the freetree
4604a9671a0SJoel Fernandes  */
461ba110db8SJoel Fernandes void gpu_buddy_fini(struct gpu_buddy *mm)
4624a9671a0SJoel Fernandes {
4634a9671a0SJoel Fernandes 	u64 root_size, size, start;
4644a9671a0SJoel Fernandes 	unsigned int order;
4654a9671a0SJoel Fernandes 	int i;
4664a9671a0SJoel Fernandes 
4674a9671a0SJoel Fernandes 	size = mm->size;
4684a9671a0SJoel Fernandes 
4694a9671a0SJoel Fernandes 	for (i = 0; i < mm->n_roots; ++i) {
4704a9671a0SJoel Fernandes 		order = ilog2(size) - ilog2(mm->chunk_size);
471ba110db8SJoel Fernandes 		start = gpu_buddy_block_offset(mm->roots[i]);
4724a9671a0SJoel Fernandes 		__force_merge(mm, start, start + size, order);
4734a9671a0SJoel Fernandes 
474abdcd4c3SSanjay Yadav 		gpu_buddy_assert(gpu_buddy_block_is_free(mm->roots[i]));
4754a9671a0SJoel Fernandes 
476ba110db8SJoel Fernandes 		gpu_block_free(mm, mm->roots[i]);
4774a9671a0SJoel Fernandes 
4784a9671a0SJoel Fernandes 		root_size = mm->chunk_size << order;
4794a9671a0SJoel Fernandes 		size -= root_size;
4804a9671a0SJoel Fernandes 	}
4814a9671a0SJoel Fernandes 
482abdcd4c3SSanjay Yadav 	gpu_buddy_assert(mm->avail == mm->size);
4834a9671a0SJoel Fernandes 
4844a9671a0SJoel Fernandes 	for_each_free_tree(i)
4854a9671a0SJoel Fernandes 		kfree(mm->free_trees[i]);
4864a9671a0SJoel Fernandes 	kfree(mm->free_trees);
4874a9671a0SJoel Fernandes 	kfree(mm->roots);
4884a9671a0SJoel Fernandes }
489ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_fini);
4904a9671a0SJoel Fernandes 
491ba110db8SJoel Fernandes static int split_block(struct gpu_buddy *mm,
492ba110db8SJoel Fernandes 		       struct gpu_buddy_block *block)
4934a9671a0SJoel Fernandes {
494ba110db8SJoel Fernandes 	unsigned int block_order = gpu_buddy_block_order(block) - 1;
495ba110db8SJoel Fernandes 	u64 offset = gpu_buddy_block_offset(block);
4964a9671a0SJoel Fernandes 
497ba110db8SJoel Fernandes 	BUG_ON(!gpu_buddy_block_is_free(block));
498ba110db8SJoel Fernandes 	BUG_ON(!gpu_buddy_block_order(block));
4994a9671a0SJoel Fernandes 
500ba110db8SJoel Fernandes 	block->left = gpu_block_alloc(mm, block, block_order, offset);
5014a9671a0SJoel Fernandes 	if (!block->left)
5024a9671a0SJoel Fernandes 		return -ENOMEM;
5034a9671a0SJoel Fernandes 
504ba110db8SJoel Fernandes 	block->right = gpu_block_alloc(mm, block, block_order,
5054a9671a0SJoel Fernandes 				       offset + (mm->chunk_size << block_order));
5064a9671a0SJoel Fernandes 	if (!block->right) {
507ba110db8SJoel Fernandes 		gpu_block_free(mm, block->left);
5084a9671a0SJoel Fernandes 		return -ENOMEM;
5094a9671a0SJoel Fernandes 	}
5104a9671a0SJoel Fernandes 
5114a9671a0SJoel Fernandes 	mark_split(mm, block);
5124a9671a0SJoel Fernandes 
513ba110db8SJoel Fernandes 	if (gpu_buddy_block_is_clear(block)) {
5144a9671a0SJoel Fernandes 		mark_cleared(block->left);
5154a9671a0SJoel Fernandes 		mark_cleared(block->right);
5164a9671a0SJoel Fernandes 		clear_reset(block);
5174a9671a0SJoel Fernandes 	}
5184a9671a0SJoel Fernandes 
5194a9671a0SJoel Fernandes 	mark_free(mm, block->left);
5204a9671a0SJoel Fernandes 	mark_free(mm, block->right);
5214a9671a0SJoel Fernandes 
5224a9671a0SJoel Fernandes 	return 0;
5234a9671a0SJoel Fernandes }
5244a9671a0SJoel Fernandes 
5254a9671a0SJoel Fernandes /**
526ba110db8SJoel Fernandes  * gpu_buddy_reset_clear - reset blocks clear state
5274a9671a0SJoel Fernandes  *
528ba110db8SJoel Fernandes  * @mm: GPU buddy manager
5294a9671a0SJoel Fernandes  * @is_clear: blocks clear state
5304a9671a0SJoel Fernandes  *
5314a9671a0SJoel Fernandes  * Reset the clear state based on @is_clear value for each block
5324a9671a0SJoel Fernandes  * in the freetree.
5334a9671a0SJoel Fernandes  */
534ba110db8SJoel Fernandes void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear)
5354a9671a0SJoel Fernandes {
536ba110db8SJoel Fernandes 	enum gpu_buddy_free_tree src_tree, dst_tree;
5374a9671a0SJoel Fernandes 	u64 root_size, size, start;
5384a9671a0SJoel Fernandes 	unsigned int order;
5394a9671a0SJoel Fernandes 	int i;
5404a9671a0SJoel Fernandes 
5414a9671a0SJoel Fernandes 	size = mm->size;
5424a9671a0SJoel Fernandes 	for (i = 0; i < mm->n_roots; ++i) {
5434a9671a0SJoel Fernandes 		order = ilog2(size) - ilog2(mm->chunk_size);
544ba110db8SJoel Fernandes 		start = gpu_buddy_block_offset(mm->roots[i]);
5454a9671a0SJoel Fernandes 		__force_merge(mm, start, start + size, order);
5464a9671a0SJoel Fernandes 
5474a9671a0SJoel Fernandes 		root_size = mm->chunk_size << order;
5484a9671a0SJoel Fernandes 		size -= root_size;
5494a9671a0SJoel Fernandes 	}
5504a9671a0SJoel Fernandes 
551ba110db8SJoel Fernandes 	src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
552ba110db8SJoel Fernandes 	dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
5534a9671a0SJoel Fernandes 
5544a9671a0SJoel Fernandes 	for (i = 0; i <= mm->max_order; ++i) {
5554a9671a0SJoel Fernandes 		struct rb_root *root = &mm->free_trees[src_tree][i];
556ba110db8SJoel Fernandes 		struct gpu_buddy_block *block, *tmp;
5574a9671a0SJoel Fernandes 
5584a9671a0SJoel Fernandes 		rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
5594a9671a0SJoel Fernandes 			rbtree_remove(mm, block);
5604a9671a0SJoel Fernandes 			if (is_clear) {
5614a9671a0SJoel Fernandes 				mark_cleared(block);
562ba110db8SJoel Fernandes 				mm->clear_avail += gpu_buddy_block_size(mm, block);
5634a9671a0SJoel Fernandes 			} else {
5644a9671a0SJoel Fernandes 				clear_reset(block);
565ba110db8SJoel Fernandes 				mm->clear_avail -= gpu_buddy_block_size(mm, block);
5664a9671a0SJoel Fernandes 			}
5674a9671a0SJoel Fernandes 
5684a9671a0SJoel Fernandes 			rbtree_insert(mm, block, dst_tree);
5694a9671a0SJoel Fernandes 		}
5704a9671a0SJoel Fernandes 	}
5714a9671a0SJoel Fernandes }
572ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_reset_clear);
5734a9671a0SJoel Fernandes 
5744a9671a0SJoel Fernandes /**
575ba110db8SJoel Fernandes  * gpu_buddy_free_block - free a block
5764a9671a0SJoel Fernandes  *
577ba110db8SJoel Fernandes  * @mm: GPU buddy manager
5784a9671a0SJoel Fernandes  * @block: block to be freed
5794a9671a0SJoel Fernandes  */
580ba110db8SJoel Fernandes void gpu_buddy_free_block(struct gpu_buddy *mm,
581ba110db8SJoel Fernandes 			  struct gpu_buddy_block *block)
5824a9671a0SJoel Fernandes {
583ba110db8SJoel Fernandes 	BUG_ON(!gpu_buddy_block_is_allocated(block));
584ba110db8SJoel Fernandes 	mm->avail += gpu_buddy_block_size(mm, block);
585ba110db8SJoel Fernandes 	if (gpu_buddy_block_is_clear(block))
586ba110db8SJoel Fernandes 		mm->clear_avail += gpu_buddy_block_size(mm, block);
5874a9671a0SJoel Fernandes 
588ba110db8SJoel Fernandes 	__gpu_buddy_free(mm, block, false);
5894a9671a0SJoel Fernandes }
590ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_free_block);
5914a9671a0SJoel Fernandes 
592ba110db8SJoel Fernandes static void __gpu_buddy_free_list(struct gpu_buddy *mm,
5934a9671a0SJoel Fernandes 				  struct list_head *objects,
5944a9671a0SJoel Fernandes 				  bool mark_clear,
5954a9671a0SJoel Fernandes 				  bool mark_dirty)
5964a9671a0SJoel Fernandes {
597ba110db8SJoel Fernandes 	struct gpu_buddy_block *block, *on;
5984a9671a0SJoel Fernandes 
599abdcd4c3SSanjay Yadav 	gpu_buddy_assert(!(mark_dirty && mark_clear));
6004a9671a0SJoel Fernandes 
6014a9671a0SJoel Fernandes 	list_for_each_entry_safe(block, on, objects, link) {
6024a9671a0SJoel Fernandes 		if (mark_clear)
6034a9671a0SJoel Fernandes 			mark_cleared(block);
6044a9671a0SJoel Fernandes 		else if (mark_dirty)
6054a9671a0SJoel Fernandes 			clear_reset(block);
606ba110db8SJoel Fernandes 		gpu_buddy_free_block(mm, block);
6074a9671a0SJoel Fernandes 		cond_resched();
6084a9671a0SJoel Fernandes 	}
6094a9671a0SJoel Fernandes 	INIT_LIST_HEAD(objects);
6104a9671a0SJoel Fernandes }
6114a9671a0SJoel Fernandes 
612ba110db8SJoel Fernandes static void gpu_buddy_free_list_internal(struct gpu_buddy *mm,
6134a9671a0SJoel Fernandes 					 struct list_head *objects)
6144a9671a0SJoel Fernandes {
6154a9671a0SJoel Fernandes 	/*
6164a9671a0SJoel Fernandes 	 * Don't touch the clear/dirty bit, since allocation is still internal
6174a9671a0SJoel Fernandes 	 * at this point. For example we might have just failed part of the
6184a9671a0SJoel Fernandes 	 * allocation.
6194a9671a0SJoel Fernandes 	 */
620ba110db8SJoel Fernandes 	__gpu_buddy_free_list(mm, objects, false, false);
6214a9671a0SJoel Fernandes }
6224a9671a0SJoel Fernandes 
6234a9671a0SJoel Fernandes /**
624ba110db8SJoel Fernandes  * gpu_buddy_free_list - free blocks
6254a9671a0SJoel Fernandes  *
626ba110db8SJoel Fernandes  * @mm: GPU buddy manager
6274a9671a0SJoel Fernandes  * @objects: input list head to free blocks
628ba110db8SJoel Fernandes  * @flags: optional flags like GPU_BUDDY_CLEARED
6294a9671a0SJoel Fernandes  */
630ba110db8SJoel Fernandes void gpu_buddy_free_list(struct gpu_buddy *mm,
6314a9671a0SJoel Fernandes 			 struct list_head *objects,
6324a9671a0SJoel Fernandes 			 unsigned int flags)
6334a9671a0SJoel Fernandes {
634ba110db8SJoel Fernandes 	bool mark_clear = flags & GPU_BUDDY_CLEARED;
6354a9671a0SJoel Fernandes 
636ba110db8SJoel Fernandes 	__gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear);
6374a9671a0SJoel Fernandes }
638ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_free_list);
6394a9671a0SJoel Fernandes 
640ba110db8SJoel Fernandes static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags)
6414a9671a0SJoel Fernandes {
642ba110db8SJoel Fernandes 	bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION;
6434a9671a0SJoel Fernandes 
644ba110db8SJoel Fernandes 	return needs_clear != gpu_buddy_block_is_clear(block);
6454a9671a0SJoel Fernandes }
6464a9671a0SJoel Fernandes 
647ba110db8SJoel Fernandes static struct gpu_buddy_block *
648ba110db8SJoel Fernandes __alloc_range_bias(struct gpu_buddy *mm,
6494a9671a0SJoel Fernandes 		   u64 start, u64 end,
6504a9671a0SJoel Fernandes 		   unsigned int order,
6514a9671a0SJoel Fernandes 		   unsigned long flags,
6524a9671a0SJoel Fernandes 		   bool fallback)
6534a9671a0SJoel Fernandes {
6544a9671a0SJoel Fernandes 	u64 req_size = mm->chunk_size << order;
655ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
656ba110db8SJoel Fernandes 	struct gpu_buddy_block *buddy;
6574a9671a0SJoel Fernandes 	LIST_HEAD(dfs);
6584a9671a0SJoel Fernandes 	int err;
6594a9671a0SJoel Fernandes 	int i;
6604a9671a0SJoel Fernandes 
6614a9671a0SJoel Fernandes 	end = end - 1;
6624a9671a0SJoel Fernandes 
6634a9671a0SJoel Fernandes 	for (i = 0; i < mm->n_roots; ++i)
6644a9671a0SJoel Fernandes 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
6654a9671a0SJoel Fernandes 
6664a9671a0SJoel Fernandes 	do {
6674a9671a0SJoel Fernandes 		u64 block_start;
6684a9671a0SJoel Fernandes 		u64 block_end;
6694a9671a0SJoel Fernandes 
6704a9671a0SJoel Fernandes 		block = list_first_entry_or_null(&dfs,
671ba110db8SJoel Fernandes 						 struct gpu_buddy_block,
6724a9671a0SJoel Fernandes 						 tmp_link);
6734a9671a0SJoel Fernandes 		if (!block)
6744a9671a0SJoel Fernandes 			break;
6754a9671a0SJoel Fernandes 
6764a9671a0SJoel Fernandes 		list_del(&block->tmp_link);
6774a9671a0SJoel Fernandes 
678ba110db8SJoel Fernandes 		if (gpu_buddy_block_order(block) < order)
6794a9671a0SJoel Fernandes 			continue;
6804a9671a0SJoel Fernandes 
681ba110db8SJoel Fernandes 		block_start = gpu_buddy_block_offset(block);
682ba110db8SJoel Fernandes 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
6834a9671a0SJoel Fernandes 
6844a9671a0SJoel Fernandes 		if (!overlaps(start, end, block_start, block_end))
6854a9671a0SJoel Fernandes 			continue;
6864a9671a0SJoel Fernandes 
687ba110db8SJoel Fernandes 		if (gpu_buddy_block_is_allocated(block))
6884a9671a0SJoel Fernandes 			continue;
6894a9671a0SJoel Fernandes 
6904a9671a0SJoel Fernandes 		if (block_start < start || block_end > end) {
6914a9671a0SJoel Fernandes 			u64 adjusted_start = max(block_start, start);
6924a9671a0SJoel Fernandes 			u64 adjusted_end = min(block_end, end);
6934a9671a0SJoel Fernandes 
6944a9671a0SJoel Fernandes 			if (round_down(adjusted_end + 1, req_size) <=
6954a9671a0SJoel Fernandes 			    round_up(adjusted_start, req_size))
6964a9671a0SJoel Fernandes 				continue;
6974a9671a0SJoel Fernandes 		}
6984a9671a0SJoel Fernandes 
6994a9671a0SJoel Fernandes 		if (!fallback && block_incompatible(block, flags))
7004a9671a0SJoel Fernandes 			continue;
7014a9671a0SJoel Fernandes 
7024a9671a0SJoel Fernandes 		if (contains(start, end, block_start, block_end) &&
703ba110db8SJoel Fernandes 		    order == gpu_buddy_block_order(block)) {
7044a9671a0SJoel Fernandes 			/*
7054a9671a0SJoel Fernandes 			 * Find the free block within the range.
7064a9671a0SJoel Fernandes 			 */
707ba110db8SJoel Fernandes 			if (gpu_buddy_block_is_free(block))
7084a9671a0SJoel Fernandes 				return block;
7094a9671a0SJoel Fernandes 
7104a9671a0SJoel Fernandes 			continue;
7114a9671a0SJoel Fernandes 		}
7124a9671a0SJoel Fernandes 
713ba110db8SJoel Fernandes 		if (!gpu_buddy_block_is_split(block)) {
7144a9671a0SJoel Fernandes 			err = split_block(mm, block);
7154a9671a0SJoel Fernandes 			if (unlikely(err))
7164a9671a0SJoel Fernandes 				goto err_undo;
7174a9671a0SJoel Fernandes 		}
7184a9671a0SJoel Fernandes 
7194a9671a0SJoel Fernandes 		list_add(&block->right->tmp_link, &dfs);
7204a9671a0SJoel Fernandes 		list_add(&block->left->tmp_link, &dfs);
7214a9671a0SJoel Fernandes 	} while (1);
7224a9671a0SJoel Fernandes 
7234a9671a0SJoel Fernandes 	return ERR_PTR(-ENOSPC);
7244a9671a0SJoel Fernandes 
7254a9671a0SJoel Fernandes err_undo:
7264a9671a0SJoel Fernandes 	/*
7274a9671a0SJoel Fernandes 	 * We really don't want to leave around a bunch of split blocks, since
7284a9671a0SJoel Fernandes 	 * bigger is better, so make sure we merge everything back before we
7294a9671a0SJoel Fernandes 	 * free the allocated blocks.
7304a9671a0SJoel Fernandes 	 */
7314a9671a0SJoel Fernandes 	buddy = __get_buddy(block);
7324a9671a0SJoel Fernandes 	if (buddy &&
733ba110db8SJoel Fernandes 	    (gpu_buddy_block_is_free(block) &&
734ba110db8SJoel Fernandes 	     gpu_buddy_block_is_free(buddy)))
735ba110db8SJoel Fernandes 		__gpu_buddy_free(mm, block, false);
7364a9671a0SJoel Fernandes 	return ERR_PTR(err);
7374a9671a0SJoel Fernandes }
7384a9671a0SJoel Fernandes 
739ba110db8SJoel Fernandes static struct gpu_buddy_block *
740ba110db8SJoel Fernandes __gpu_buddy_alloc_range_bias(struct gpu_buddy *mm,
7414a9671a0SJoel Fernandes 			     u64 start, u64 end,
7424a9671a0SJoel Fernandes 			     unsigned int order,
7434a9671a0SJoel Fernandes 			     unsigned long flags)
7444a9671a0SJoel Fernandes {
745ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
7464a9671a0SJoel Fernandes 	bool fallback = false;
7474a9671a0SJoel Fernandes 
7484a9671a0SJoel Fernandes 	block = __alloc_range_bias(mm, start, end, order,
7494a9671a0SJoel Fernandes 				   flags, fallback);
7504a9671a0SJoel Fernandes 	if (IS_ERR(block))
7514a9671a0SJoel Fernandes 		return __alloc_range_bias(mm, start, end, order,
7524a9671a0SJoel Fernandes 					  flags, !fallback);
7534a9671a0SJoel Fernandes 
7544a9671a0SJoel Fernandes 	return block;
7554a9671a0SJoel Fernandes }
7564a9671a0SJoel Fernandes 
757ba110db8SJoel Fernandes static struct gpu_buddy_block *
758ba110db8SJoel Fernandes get_maxblock(struct gpu_buddy *mm,
7594a9671a0SJoel Fernandes 	     unsigned int order,
760ba110db8SJoel Fernandes 	     enum gpu_buddy_free_tree tree)
7614a9671a0SJoel Fernandes {
762ba110db8SJoel Fernandes 	struct gpu_buddy_block *max_block = NULL, *block = NULL;
7634a9671a0SJoel Fernandes 	struct rb_root *root;
7644a9671a0SJoel Fernandes 	unsigned int i;
7654a9671a0SJoel Fernandes 
7664a9671a0SJoel Fernandes 	for (i = order; i <= mm->max_order; ++i) {
7674a9671a0SJoel Fernandes 		root = &mm->free_trees[tree][i];
7684a9671a0SJoel Fernandes 		block = rbtree_last_free_block(root);
7694a9671a0SJoel Fernandes 		if (!block)
7704a9671a0SJoel Fernandes 			continue;
7714a9671a0SJoel Fernandes 
7724a9671a0SJoel Fernandes 		if (!max_block) {
7734a9671a0SJoel Fernandes 			max_block = block;
7744a9671a0SJoel Fernandes 			continue;
7754a9671a0SJoel Fernandes 		}
7764a9671a0SJoel Fernandes 
777ba110db8SJoel Fernandes 		if (gpu_buddy_block_offset(block) >
778ba110db8SJoel Fernandes 		    gpu_buddy_block_offset(max_block)) {
7794a9671a0SJoel Fernandes 			max_block = block;
7804a9671a0SJoel Fernandes 		}
7814a9671a0SJoel Fernandes 	}
7824a9671a0SJoel Fernandes 
7834a9671a0SJoel Fernandes 	return max_block;
7844a9671a0SJoel Fernandes }
7854a9671a0SJoel Fernandes 
786ba110db8SJoel Fernandes static struct gpu_buddy_block *
787ba110db8SJoel Fernandes alloc_from_freetree(struct gpu_buddy *mm,
7884a9671a0SJoel Fernandes 		    unsigned int order,
7894a9671a0SJoel Fernandes 		    unsigned long flags)
7904a9671a0SJoel Fernandes {
791ba110db8SJoel Fernandes 	struct gpu_buddy_block *block = NULL;
7924a9671a0SJoel Fernandes 	struct rb_root *root;
793ba110db8SJoel Fernandes 	enum gpu_buddy_free_tree tree;
7944a9671a0SJoel Fernandes 	unsigned int tmp;
7954a9671a0SJoel Fernandes 	int err;
7964a9671a0SJoel Fernandes 
797ba110db8SJoel Fernandes 	tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
798ba110db8SJoel Fernandes 		GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
7994a9671a0SJoel Fernandes 
800ba110db8SJoel Fernandes 	if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) {
8014a9671a0SJoel Fernandes 		block = get_maxblock(mm, order, tree);
8024a9671a0SJoel Fernandes 		if (block)
8034a9671a0SJoel Fernandes 			/* Store the obtained block order */
804ba110db8SJoel Fernandes 			tmp = gpu_buddy_block_order(block);
8054a9671a0SJoel Fernandes 	} else {
8064a9671a0SJoel Fernandes 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
8074a9671a0SJoel Fernandes 			/* Get RB tree root for this order and tree */
8084a9671a0SJoel Fernandes 			root = &mm->free_trees[tree][tmp];
8094a9671a0SJoel Fernandes 			block = rbtree_last_free_block(root);
8104a9671a0SJoel Fernandes 			if (block)
8114a9671a0SJoel Fernandes 				break;
8124a9671a0SJoel Fernandes 		}
8134a9671a0SJoel Fernandes 	}
8144a9671a0SJoel Fernandes 
8154a9671a0SJoel Fernandes 	if (!block) {
8164a9671a0SJoel Fernandes 		/* Try allocating from the other tree */
817ba110db8SJoel Fernandes 		tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
818ba110db8SJoel Fernandes 			GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
8194a9671a0SJoel Fernandes 
8204a9671a0SJoel Fernandes 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
8214a9671a0SJoel Fernandes 			root = &mm->free_trees[tree][tmp];
8224a9671a0SJoel Fernandes 			block = rbtree_last_free_block(root);
8234a9671a0SJoel Fernandes 			if (block)
8244a9671a0SJoel Fernandes 				break;
8254a9671a0SJoel Fernandes 		}
8264a9671a0SJoel Fernandes 
8274a9671a0SJoel Fernandes 		if (!block)
8284a9671a0SJoel Fernandes 			return ERR_PTR(-ENOSPC);
8294a9671a0SJoel Fernandes 	}
8304a9671a0SJoel Fernandes 
831ba110db8SJoel Fernandes 	BUG_ON(!gpu_buddy_block_is_free(block));
8324a9671a0SJoel Fernandes 
8334a9671a0SJoel Fernandes 	while (tmp != order) {
8344a9671a0SJoel Fernandes 		err = split_block(mm, block);
8354a9671a0SJoel Fernandes 		if (unlikely(err))
8364a9671a0SJoel Fernandes 			goto err_undo;
8374a9671a0SJoel Fernandes 
8384a9671a0SJoel Fernandes 		block = block->right;
8394a9671a0SJoel Fernandes 		tmp--;
8404a9671a0SJoel Fernandes 	}
8414a9671a0SJoel Fernandes 	return block;
8424a9671a0SJoel Fernandes 
8434a9671a0SJoel Fernandes err_undo:
8444a9671a0SJoel Fernandes 	if (tmp != order)
845ba110db8SJoel Fernandes 		__gpu_buddy_free(mm, block, false);
8464a9671a0SJoel Fernandes 	return ERR_PTR(err);
8474a9671a0SJoel Fernandes }
8484a9671a0SJoel Fernandes 
849*493740d7SArunpravin Paneer Selvam static bool
850*493740d7SArunpravin Paneer Selvam gpu_buddy_can_offset_align(u64 size, u64 min_block_size)
851*493740d7SArunpravin Paneer Selvam {
852*493740d7SArunpravin Paneer Selvam 	return size < min_block_size && is_power_of_2(size);
853*493740d7SArunpravin Paneer Selvam }
854*493740d7SArunpravin Paneer Selvam 
855*493740d7SArunpravin Paneer Selvam static bool gpu_buddy_subtree_can_satisfy(struct rb_node *node,
856*493740d7SArunpravin Paneer Selvam 					  unsigned int alignment)
857*493740d7SArunpravin Paneer Selvam {
858*493740d7SArunpravin Paneer Selvam 	struct gpu_buddy_block *block;
859*493740d7SArunpravin Paneer Selvam 
860*493740d7SArunpravin Paneer Selvam 	block = rbtree_get_free_block(node);
861*493740d7SArunpravin Paneer Selvam 	return block->subtree_max_alignment >= alignment;
862*493740d7SArunpravin Paneer Selvam }
863*493740d7SArunpravin Paneer Selvam 
864*493740d7SArunpravin Paneer Selvam static struct gpu_buddy_block *
865*493740d7SArunpravin Paneer Selvam gpu_buddy_find_block_aligned(struct gpu_buddy *mm,
866*493740d7SArunpravin Paneer Selvam 			     enum gpu_buddy_free_tree tree,
867*493740d7SArunpravin Paneer Selvam 			     unsigned int order,
868*493740d7SArunpravin Paneer Selvam 			     unsigned int alignment,
869*493740d7SArunpravin Paneer Selvam 			     unsigned long flags)
870*493740d7SArunpravin Paneer Selvam {
871*493740d7SArunpravin Paneer Selvam 	struct rb_root *root = &mm->free_trees[tree][order];
872*493740d7SArunpravin Paneer Selvam 	struct rb_node *rb = root->rb_node;
873*493740d7SArunpravin Paneer Selvam 
874*493740d7SArunpravin Paneer Selvam 	while (rb) {
875*493740d7SArunpravin Paneer Selvam 		struct gpu_buddy_block *block = rbtree_get_free_block(rb);
876*493740d7SArunpravin Paneer Selvam 		struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right;
877*493740d7SArunpravin Paneer Selvam 
878*493740d7SArunpravin Paneer Selvam 		if (right_node) {
879*493740d7SArunpravin Paneer Selvam 			if (gpu_buddy_subtree_can_satisfy(right_node, alignment)) {
880*493740d7SArunpravin Paneer Selvam 				rb = right_node;
881*493740d7SArunpravin Paneer Selvam 				continue;
882*493740d7SArunpravin Paneer Selvam 			}
883*493740d7SArunpravin Paneer Selvam 		}
884*493740d7SArunpravin Paneer Selvam 
885*493740d7SArunpravin Paneer Selvam 		if (gpu_buddy_block_offset_alignment(block) >= alignment)
886*493740d7SArunpravin Paneer Selvam 			return block;
887*493740d7SArunpravin Paneer Selvam 
888*493740d7SArunpravin Paneer Selvam 		if (left_node) {
889*493740d7SArunpravin Paneer Selvam 			if (gpu_buddy_subtree_can_satisfy(left_node, alignment)) {
890*493740d7SArunpravin Paneer Selvam 				rb = left_node;
891*493740d7SArunpravin Paneer Selvam 				continue;
892*493740d7SArunpravin Paneer Selvam 			}
893*493740d7SArunpravin Paneer Selvam 		}
894*493740d7SArunpravin Paneer Selvam 
895*493740d7SArunpravin Paneer Selvam 		break;
896*493740d7SArunpravin Paneer Selvam 	}
897*493740d7SArunpravin Paneer Selvam 
898*493740d7SArunpravin Paneer Selvam 	return NULL;
899*493740d7SArunpravin Paneer Selvam }
900*493740d7SArunpravin Paneer Selvam 
901*493740d7SArunpravin Paneer Selvam static struct gpu_buddy_block *
902*493740d7SArunpravin Paneer Selvam gpu_buddy_offset_aligned_allocation(struct gpu_buddy *mm,
903*493740d7SArunpravin Paneer Selvam 				    u64 size,
904*493740d7SArunpravin Paneer Selvam 				    u64 min_block_size,
905*493740d7SArunpravin Paneer Selvam 				    unsigned long flags)
906*493740d7SArunpravin Paneer Selvam {
907*493740d7SArunpravin Paneer Selvam 	struct gpu_buddy_block *block = NULL;
908*493740d7SArunpravin Paneer Selvam 	unsigned int order, tmp, alignment;
909*493740d7SArunpravin Paneer Selvam 	struct gpu_buddy_block *buddy;
910*493740d7SArunpravin Paneer Selvam 	enum gpu_buddy_free_tree tree;
911*493740d7SArunpravin Paneer Selvam 	unsigned long pages;
912*493740d7SArunpravin Paneer Selvam 	int err;
913*493740d7SArunpravin Paneer Selvam 
914*493740d7SArunpravin Paneer Selvam 	alignment = ilog2(min_block_size);
915*493740d7SArunpravin Paneer Selvam 	pages = size >> ilog2(mm->chunk_size);
916*493740d7SArunpravin Paneer Selvam 	order = fls(pages) - 1;
917*493740d7SArunpravin Paneer Selvam 
918*493740d7SArunpravin Paneer Selvam 	tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
919*493740d7SArunpravin Paneer Selvam 		GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
920*493740d7SArunpravin Paneer Selvam 
921*493740d7SArunpravin Paneer Selvam 	for (tmp = order; tmp <= mm->max_order; ++tmp) {
922*493740d7SArunpravin Paneer Selvam 		block = gpu_buddy_find_block_aligned(mm, tree, tmp,
923*493740d7SArunpravin Paneer Selvam 						     alignment, flags);
924*493740d7SArunpravin Paneer Selvam 		if (!block) {
925*493740d7SArunpravin Paneer Selvam 			tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
926*493740d7SArunpravin Paneer Selvam 				GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
927*493740d7SArunpravin Paneer Selvam 			block = gpu_buddy_find_block_aligned(mm, tree, tmp,
928*493740d7SArunpravin Paneer Selvam 							     alignment, flags);
929*493740d7SArunpravin Paneer Selvam 		}
930*493740d7SArunpravin Paneer Selvam 
931*493740d7SArunpravin Paneer Selvam 		if (block)
932*493740d7SArunpravin Paneer Selvam 			break;
933*493740d7SArunpravin Paneer Selvam 	}
934*493740d7SArunpravin Paneer Selvam 
935*493740d7SArunpravin Paneer Selvam 	if (!block)
936*493740d7SArunpravin Paneer Selvam 		return ERR_PTR(-ENOSPC);
937*493740d7SArunpravin Paneer Selvam 
938*493740d7SArunpravin Paneer Selvam 	while (gpu_buddy_block_order(block) > order) {
939*493740d7SArunpravin Paneer Selvam 		struct gpu_buddy_block *left, *right;
940*493740d7SArunpravin Paneer Selvam 
941*493740d7SArunpravin Paneer Selvam 		err = split_block(mm, block);
942*493740d7SArunpravin Paneer Selvam 		if (unlikely(err))
943*493740d7SArunpravin Paneer Selvam 			goto err_undo;
944*493740d7SArunpravin Paneer Selvam 
945*493740d7SArunpravin Paneer Selvam 		left  = block->left;
946*493740d7SArunpravin Paneer Selvam 		right = block->right;
947*493740d7SArunpravin Paneer Selvam 
948*493740d7SArunpravin Paneer Selvam 		if (gpu_buddy_block_offset_alignment(right) >= alignment)
949*493740d7SArunpravin Paneer Selvam 			block = right;
950*493740d7SArunpravin Paneer Selvam 		else
951*493740d7SArunpravin Paneer Selvam 			block = left;
952*493740d7SArunpravin Paneer Selvam 	}
953*493740d7SArunpravin Paneer Selvam 
954*493740d7SArunpravin Paneer Selvam 	return block;
955*493740d7SArunpravin Paneer Selvam 
956*493740d7SArunpravin Paneer Selvam err_undo:
957*493740d7SArunpravin Paneer Selvam 	/*
958*493740d7SArunpravin Paneer Selvam 	 * We really don't want to leave around a bunch of split blocks, since
959*493740d7SArunpravin Paneer Selvam 	 * bigger is better, so make sure we merge everything back before we
960*493740d7SArunpravin Paneer Selvam 	 * free the allocated blocks.
961*493740d7SArunpravin Paneer Selvam 	 */
962*493740d7SArunpravin Paneer Selvam 	buddy = __get_buddy(block);
963*493740d7SArunpravin Paneer Selvam 	if (buddy &&
964*493740d7SArunpravin Paneer Selvam 	    (gpu_buddy_block_is_free(block) &&
965*493740d7SArunpravin Paneer Selvam 	     gpu_buddy_block_is_free(buddy)))
966*493740d7SArunpravin Paneer Selvam 		__gpu_buddy_free(mm, block, false);
967*493740d7SArunpravin Paneer Selvam 	return ERR_PTR(err);
968*493740d7SArunpravin Paneer Selvam }
969*493740d7SArunpravin Paneer Selvam 
970ba110db8SJoel Fernandes static int __alloc_range(struct gpu_buddy *mm,
9714a9671a0SJoel Fernandes 			 struct list_head *dfs,
9724a9671a0SJoel Fernandes 			 u64 start, u64 size,
9734a9671a0SJoel Fernandes 			 struct list_head *blocks,
9744a9671a0SJoel Fernandes 			 u64 *total_allocated_on_err)
9754a9671a0SJoel Fernandes {
976ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
977ba110db8SJoel Fernandes 	struct gpu_buddy_block *buddy;
9784a9671a0SJoel Fernandes 	u64 total_allocated = 0;
9794a9671a0SJoel Fernandes 	LIST_HEAD(allocated);
9804a9671a0SJoel Fernandes 	u64 end;
9814a9671a0SJoel Fernandes 	int err;
9824a9671a0SJoel Fernandes 
9834a9671a0SJoel Fernandes 	end = start + size - 1;
9844a9671a0SJoel Fernandes 
9854a9671a0SJoel Fernandes 	do {
9864a9671a0SJoel Fernandes 		u64 block_start;
9874a9671a0SJoel Fernandes 		u64 block_end;
9884a9671a0SJoel Fernandes 
9894a9671a0SJoel Fernandes 		block = list_first_entry_or_null(dfs,
990ba110db8SJoel Fernandes 						 struct gpu_buddy_block,
9914a9671a0SJoel Fernandes 						 tmp_link);
9924a9671a0SJoel Fernandes 		if (!block)
9934a9671a0SJoel Fernandes 			break;
9944a9671a0SJoel Fernandes 
9954a9671a0SJoel Fernandes 		list_del(&block->tmp_link);
9964a9671a0SJoel Fernandes 
997ba110db8SJoel Fernandes 		block_start = gpu_buddy_block_offset(block);
998ba110db8SJoel Fernandes 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
9994a9671a0SJoel Fernandes 
10004a9671a0SJoel Fernandes 		if (!overlaps(start, end, block_start, block_end))
10014a9671a0SJoel Fernandes 			continue;
10024a9671a0SJoel Fernandes 
1003ba110db8SJoel Fernandes 		if (gpu_buddy_block_is_allocated(block)) {
10044a9671a0SJoel Fernandes 			err = -ENOSPC;
10054a9671a0SJoel Fernandes 			goto err_free;
10064a9671a0SJoel Fernandes 		}
10074a9671a0SJoel Fernandes 
10084a9671a0SJoel Fernandes 		if (contains(start, end, block_start, block_end)) {
1009ba110db8SJoel Fernandes 			if (gpu_buddy_block_is_free(block)) {
10104a9671a0SJoel Fernandes 				mark_allocated(mm, block);
1011ba110db8SJoel Fernandes 				total_allocated += gpu_buddy_block_size(mm, block);
1012ba110db8SJoel Fernandes 				mm->avail -= gpu_buddy_block_size(mm, block);
1013ba110db8SJoel Fernandes 				if (gpu_buddy_block_is_clear(block))
1014ba110db8SJoel Fernandes 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
10154a9671a0SJoel Fernandes 				list_add_tail(&block->link, &allocated);
10164a9671a0SJoel Fernandes 				continue;
10174a9671a0SJoel Fernandes 			} else if (!mm->clear_avail) {
10184a9671a0SJoel Fernandes 				err = -ENOSPC;
10194a9671a0SJoel Fernandes 				goto err_free;
10204a9671a0SJoel Fernandes 			}
10214a9671a0SJoel Fernandes 		}
10224a9671a0SJoel Fernandes 
1023ba110db8SJoel Fernandes 		if (!gpu_buddy_block_is_split(block)) {
10244a9671a0SJoel Fernandes 			err = split_block(mm, block);
10254a9671a0SJoel Fernandes 			if (unlikely(err))
10264a9671a0SJoel Fernandes 				goto err_undo;
10274a9671a0SJoel Fernandes 		}
10284a9671a0SJoel Fernandes 
10294a9671a0SJoel Fernandes 		list_add(&block->right->tmp_link, dfs);
10304a9671a0SJoel Fernandes 		list_add(&block->left->tmp_link, dfs);
10314a9671a0SJoel Fernandes 	} while (1);
10324a9671a0SJoel Fernandes 
10334a9671a0SJoel Fernandes 	if (total_allocated < size) {
10344a9671a0SJoel Fernandes 		err = -ENOSPC;
10354a9671a0SJoel Fernandes 		goto err_free;
10364a9671a0SJoel Fernandes 	}
10374a9671a0SJoel Fernandes 
10384a9671a0SJoel Fernandes 	list_splice_tail(&allocated, blocks);
10394a9671a0SJoel Fernandes 
10404a9671a0SJoel Fernandes 	return 0;
10414a9671a0SJoel Fernandes 
10424a9671a0SJoel Fernandes err_undo:
10434a9671a0SJoel Fernandes 	/*
10444a9671a0SJoel Fernandes 	 * We really don't want to leave around a bunch of split blocks, since
10454a9671a0SJoel Fernandes 	 * bigger is better, so make sure we merge everything back before we
10464a9671a0SJoel Fernandes 	 * free the allocated blocks.
10474a9671a0SJoel Fernandes 	 */
10484a9671a0SJoel Fernandes 	buddy = __get_buddy(block);
10494a9671a0SJoel Fernandes 	if (buddy &&
1050ba110db8SJoel Fernandes 	    (gpu_buddy_block_is_free(block) &&
1051ba110db8SJoel Fernandes 	     gpu_buddy_block_is_free(buddy)))
1052ba110db8SJoel Fernandes 		__gpu_buddy_free(mm, block, false);
10534a9671a0SJoel Fernandes 
10544a9671a0SJoel Fernandes err_free:
10554a9671a0SJoel Fernandes 	if (err == -ENOSPC && total_allocated_on_err) {
10564a9671a0SJoel Fernandes 		list_splice_tail(&allocated, blocks);
10574a9671a0SJoel Fernandes 		*total_allocated_on_err = total_allocated;
10584a9671a0SJoel Fernandes 	} else {
1059ba110db8SJoel Fernandes 		gpu_buddy_free_list_internal(mm, &allocated);
10604a9671a0SJoel Fernandes 	}
10614a9671a0SJoel Fernandes 
10624a9671a0SJoel Fernandes 	return err;
10634a9671a0SJoel Fernandes }
10644a9671a0SJoel Fernandes 
1065ba110db8SJoel Fernandes static int __gpu_buddy_alloc_range(struct gpu_buddy *mm,
10664a9671a0SJoel Fernandes 				   u64 start,
10674a9671a0SJoel Fernandes 				   u64 size,
10684a9671a0SJoel Fernandes 				   u64 *total_allocated_on_err,
10694a9671a0SJoel Fernandes 				   struct list_head *blocks)
10704a9671a0SJoel Fernandes {
10714a9671a0SJoel Fernandes 	LIST_HEAD(dfs);
10724a9671a0SJoel Fernandes 	int i;
10734a9671a0SJoel Fernandes 
10744a9671a0SJoel Fernandes 	for (i = 0; i < mm->n_roots; ++i)
10754a9671a0SJoel Fernandes 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
10764a9671a0SJoel Fernandes 
10774a9671a0SJoel Fernandes 	return __alloc_range(mm, &dfs, start, size,
10784a9671a0SJoel Fernandes 			     blocks, total_allocated_on_err);
10794a9671a0SJoel Fernandes }
10804a9671a0SJoel Fernandes 
1081ba110db8SJoel Fernandes static int __alloc_contig_try_harder(struct gpu_buddy *mm,
10824a9671a0SJoel Fernandes 				     u64 size,
10834a9671a0SJoel Fernandes 				     u64 min_block_size,
10844a9671a0SJoel Fernandes 				     struct list_head *blocks)
10854a9671a0SJoel Fernandes {
10864a9671a0SJoel Fernandes 	u64 rhs_offset, lhs_offset, lhs_size, filled;
1087ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
10884a9671a0SJoel Fernandes 	unsigned int tree, order;
10894a9671a0SJoel Fernandes 	LIST_HEAD(blocks_lhs);
10904a9671a0SJoel Fernandes 	unsigned long pages;
10914a9671a0SJoel Fernandes 	u64 modify_size;
10924a9671a0SJoel Fernandes 	int err;
10934a9671a0SJoel Fernandes 
10944a9671a0SJoel Fernandes 	modify_size = rounddown_pow_of_two(size);
10954a9671a0SJoel Fernandes 	pages = modify_size >> ilog2(mm->chunk_size);
10964a9671a0SJoel Fernandes 	order = fls(pages) - 1;
10974a9671a0SJoel Fernandes 	if (order == 0)
10984a9671a0SJoel Fernandes 		return -ENOSPC;
10994a9671a0SJoel Fernandes 
11004a9671a0SJoel Fernandes 	for_each_free_tree(tree) {
11014a9671a0SJoel Fernandes 		struct rb_root *root;
11024a9671a0SJoel Fernandes 		struct rb_node *iter;
11034a9671a0SJoel Fernandes 
11044a9671a0SJoel Fernandes 		root = &mm->free_trees[tree][order];
11054a9671a0SJoel Fernandes 		if (rbtree_is_empty(root))
11064a9671a0SJoel Fernandes 			continue;
11074a9671a0SJoel Fernandes 
11084a9671a0SJoel Fernandes 		iter = rb_last(root);
11094a9671a0SJoel Fernandes 		while (iter) {
11104a9671a0SJoel Fernandes 			block = rbtree_get_free_block(iter);
11114a9671a0SJoel Fernandes 
11124a9671a0SJoel Fernandes 			/* Allocate blocks traversing RHS */
1113ba110db8SJoel Fernandes 			rhs_offset = gpu_buddy_block_offset(block);
1114ba110db8SJoel Fernandes 			err =  __gpu_buddy_alloc_range(mm, rhs_offset, size,
11154a9671a0SJoel Fernandes 						       &filled, blocks);
11164a9671a0SJoel Fernandes 			if (!err || err != -ENOSPC)
11174a9671a0SJoel Fernandes 				return err;
11184a9671a0SJoel Fernandes 
11194a9671a0SJoel Fernandes 			lhs_size = max((size - filled), min_block_size);
11204a9671a0SJoel Fernandes 			if (!IS_ALIGNED(lhs_size, min_block_size))
11214a9671a0SJoel Fernandes 				lhs_size = round_up(lhs_size, min_block_size);
11224a9671a0SJoel Fernandes 
11234a9671a0SJoel Fernandes 			/* Allocate blocks traversing LHS */
1124ba110db8SJoel Fernandes 			lhs_offset = gpu_buddy_block_offset(block) - lhs_size;
1125ba110db8SJoel Fernandes 			err =  __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size,
11264a9671a0SJoel Fernandes 						       NULL, &blocks_lhs);
11274a9671a0SJoel Fernandes 			if (!err) {
11284a9671a0SJoel Fernandes 				list_splice(&blocks_lhs, blocks);
11294a9671a0SJoel Fernandes 				return 0;
11304a9671a0SJoel Fernandes 			} else if (err != -ENOSPC) {
1131ba110db8SJoel Fernandes 				gpu_buddy_free_list_internal(mm, blocks);
11324a9671a0SJoel Fernandes 				return err;
11334a9671a0SJoel Fernandes 			}
11344a9671a0SJoel Fernandes 			/* Free blocks for the next iteration */
1135ba110db8SJoel Fernandes 			gpu_buddy_free_list_internal(mm, blocks);
11364a9671a0SJoel Fernandes 
11374a9671a0SJoel Fernandes 			iter = rb_prev(iter);
11384a9671a0SJoel Fernandes 		}
11394a9671a0SJoel Fernandes 	}
11404a9671a0SJoel Fernandes 
11414a9671a0SJoel Fernandes 	return -ENOSPC;
11424a9671a0SJoel Fernandes }
11434a9671a0SJoel Fernandes 
11444a9671a0SJoel Fernandes /**
1145ba110db8SJoel Fernandes  * gpu_buddy_block_trim - free unused pages
11464a9671a0SJoel Fernandes  *
1147ba110db8SJoel Fernandes  * @mm: GPU buddy manager
11484a9671a0SJoel Fernandes  * @start: start address to begin the trimming.
11494a9671a0SJoel Fernandes  * @new_size: original size requested
11504a9671a0SJoel Fernandes  * @blocks: Input and output list of allocated blocks.
11514a9671a0SJoel Fernandes  * MUST contain single block as input to be trimmed.
11524a9671a0SJoel Fernandes  * On success will contain the newly allocated blocks
11534a9671a0SJoel Fernandes  * making up the @new_size. Blocks always appear in
11544a9671a0SJoel Fernandes  * ascending order
11554a9671a0SJoel Fernandes  *
11564a9671a0SJoel Fernandes  * For contiguous allocation, we round up the size to the nearest
11574a9671a0SJoel Fernandes  * power of two value, drivers consume *actual* size, so remaining
11584a9671a0SJoel Fernandes  * portions are unused and can be optionally freed with this function
11594a9671a0SJoel Fernandes  *
11604a9671a0SJoel Fernandes  * Returns:
11614a9671a0SJoel Fernandes  * 0 on success, error code on failure.
11624a9671a0SJoel Fernandes  */
1163ba110db8SJoel Fernandes int gpu_buddy_block_trim(struct gpu_buddy *mm,
11644a9671a0SJoel Fernandes 			 u64 *start,
11654a9671a0SJoel Fernandes 			 u64 new_size,
11664a9671a0SJoel Fernandes 			 struct list_head *blocks)
11674a9671a0SJoel Fernandes {
1168ba110db8SJoel Fernandes 	struct gpu_buddy_block *parent;
1169ba110db8SJoel Fernandes 	struct gpu_buddy_block *block;
11704a9671a0SJoel Fernandes 	u64 block_start, block_end;
11714a9671a0SJoel Fernandes 	LIST_HEAD(dfs);
11724a9671a0SJoel Fernandes 	u64 new_start;
11734a9671a0SJoel Fernandes 	int err;
11744a9671a0SJoel Fernandes 
11754a9671a0SJoel Fernandes 	if (!list_is_singular(blocks))
11764a9671a0SJoel Fernandes 		return -EINVAL;
11774a9671a0SJoel Fernandes 
11784a9671a0SJoel Fernandes 	block = list_first_entry(blocks,
1179ba110db8SJoel Fernandes 				 struct gpu_buddy_block,
11804a9671a0SJoel Fernandes 				 link);
11814a9671a0SJoel Fernandes 
1182ba110db8SJoel Fernandes 	block_start = gpu_buddy_block_offset(block);
1183ba110db8SJoel Fernandes 	block_end = block_start + gpu_buddy_block_size(mm, block);
11844a9671a0SJoel Fernandes 
1185ba110db8SJoel Fernandes 	if (WARN_ON(!gpu_buddy_block_is_allocated(block)))
11864a9671a0SJoel Fernandes 		return -EINVAL;
11874a9671a0SJoel Fernandes 
1188ba110db8SJoel Fernandes 	if (new_size > gpu_buddy_block_size(mm, block))
11894a9671a0SJoel Fernandes 		return -EINVAL;
11904a9671a0SJoel Fernandes 
11914a9671a0SJoel Fernandes 	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
11924a9671a0SJoel Fernandes 		return -EINVAL;
11934a9671a0SJoel Fernandes 
1194ba110db8SJoel Fernandes 	if (new_size == gpu_buddy_block_size(mm, block))
11954a9671a0SJoel Fernandes 		return 0;
11964a9671a0SJoel Fernandes 
11974a9671a0SJoel Fernandes 	new_start = block_start;
11984a9671a0SJoel Fernandes 	if (start) {
11994a9671a0SJoel Fernandes 		new_start = *start;
12004a9671a0SJoel Fernandes 
12014a9671a0SJoel Fernandes 		if (new_start < block_start)
12024a9671a0SJoel Fernandes 			return -EINVAL;
12034a9671a0SJoel Fernandes 
12044a9671a0SJoel Fernandes 		if (!IS_ALIGNED(new_start, mm->chunk_size))
12054a9671a0SJoel Fernandes 			return -EINVAL;
12064a9671a0SJoel Fernandes 
12074a9671a0SJoel Fernandes 		if (range_overflows(new_start, new_size, block_end))
12084a9671a0SJoel Fernandes 			return -EINVAL;
12094a9671a0SJoel Fernandes 	}
12104a9671a0SJoel Fernandes 
12114a9671a0SJoel Fernandes 	list_del(&block->link);
12124a9671a0SJoel Fernandes 	mark_free(mm, block);
1213ba110db8SJoel Fernandes 	mm->avail += gpu_buddy_block_size(mm, block);
1214ba110db8SJoel Fernandes 	if (gpu_buddy_block_is_clear(block))
1215ba110db8SJoel Fernandes 		mm->clear_avail += gpu_buddy_block_size(mm, block);
12164a9671a0SJoel Fernandes 
12174a9671a0SJoel Fernandes 	/* Prevent recursively freeing this node */
12184a9671a0SJoel Fernandes 	parent = block->parent;
12194a9671a0SJoel Fernandes 	block->parent = NULL;
12204a9671a0SJoel Fernandes 
12214a9671a0SJoel Fernandes 	list_add(&block->tmp_link, &dfs);
12224a9671a0SJoel Fernandes 	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
12234a9671a0SJoel Fernandes 	if (err) {
12244a9671a0SJoel Fernandes 		mark_allocated(mm, block);
1225ba110db8SJoel Fernandes 		mm->avail -= gpu_buddy_block_size(mm, block);
1226ba110db8SJoel Fernandes 		if (gpu_buddy_block_is_clear(block))
1227ba110db8SJoel Fernandes 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
12284a9671a0SJoel Fernandes 		list_add(&block->link, blocks);
12294a9671a0SJoel Fernandes 	}
12304a9671a0SJoel Fernandes 
12314a9671a0SJoel Fernandes 	block->parent = parent;
12324a9671a0SJoel Fernandes 	return err;
12334a9671a0SJoel Fernandes }
1234ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_block_trim);
12354a9671a0SJoel Fernandes 
1236ba110db8SJoel Fernandes static struct gpu_buddy_block *
1237ba110db8SJoel Fernandes __gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
12384a9671a0SJoel Fernandes 			 u64 start, u64 end,
1239*493740d7SArunpravin Paneer Selvam 			 u64 size, u64 min_block_size,
12404a9671a0SJoel Fernandes 			 unsigned int order,
12414a9671a0SJoel Fernandes 			 unsigned long flags)
12424a9671a0SJoel Fernandes {
1243ba110db8SJoel Fernandes 	if (flags & GPU_BUDDY_RANGE_ALLOCATION)
12444a9671a0SJoel Fernandes 		/* Allocate traversing within the range */
1245ba110db8SJoel Fernandes 		return  __gpu_buddy_alloc_range_bias(mm, start, end,
12464a9671a0SJoel Fernandes 						     order, flags);
1247*493740d7SArunpravin Paneer Selvam 	else if (size < min_block_size)
1248*493740d7SArunpravin Paneer Selvam 		/* Allocate from an offset-aligned region without size rounding */
1249*493740d7SArunpravin Paneer Selvam 		return gpu_buddy_offset_aligned_allocation(mm, size,
1250*493740d7SArunpravin Paneer Selvam 							   min_block_size,
1251*493740d7SArunpravin Paneer Selvam 							   flags);
12524a9671a0SJoel Fernandes 	else
12534a9671a0SJoel Fernandes 		/* Allocate from freetree */
12544a9671a0SJoel Fernandes 		return alloc_from_freetree(mm, order, flags);
12554a9671a0SJoel Fernandes }
12564a9671a0SJoel Fernandes 
12574a9671a0SJoel Fernandes /**
1258ba110db8SJoel Fernandes  * gpu_buddy_alloc_blocks - allocate power-of-two blocks
12594a9671a0SJoel Fernandes  *
1260ba110db8SJoel Fernandes  * @mm: GPU buddy manager to allocate from
12614a9671a0SJoel Fernandes  * @start: start of the allowed range for this block
12624a9671a0SJoel Fernandes  * @end: end of the allowed range for this block
12634a9671a0SJoel Fernandes  * @size: size of the allocation in bytes
12644a9671a0SJoel Fernandes  * @min_block_size: alignment of the allocation
12654a9671a0SJoel Fernandes  * @blocks: output list head to add allocated blocks
1266ba110db8SJoel Fernandes  * @flags: GPU_BUDDY_*_ALLOCATION flags
12674a9671a0SJoel Fernandes  *
12684a9671a0SJoel Fernandes  * alloc_range_bias() called on range limitations, which traverses
12694a9671a0SJoel Fernandes  * the tree and returns the desired block.
12704a9671a0SJoel Fernandes  *
12714a9671a0SJoel Fernandes  * alloc_from_freetree() called when *no* range restrictions
12724a9671a0SJoel Fernandes  * are enforced, which picks the block from the freetree.
12734a9671a0SJoel Fernandes  *
12744a9671a0SJoel Fernandes  * Returns:
12754a9671a0SJoel Fernandes  * 0 on success, error code on failure.
12764a9671a0SJoel Fernandes  */
1277ba110db8SJoel Fernandes int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
12784a9671a0SJoel Fernandes 			   u64 start, u64 end, u64 size,
12794a9671a0SJoel Fernandes 			   u64 min_block_size,
12804a9671a0SJoel Fernandes 			   struct list_head *blocks,
12814a9671a0SJoel Fernandes 			   unsigned long flags)
12824a9671a0SJoel Fernandes {
1283ba110db8SJoel Fernandes 	struct gpu_buddy_block *block = NULL;
12844a9671a0SJoel Fernandes 	u64 original_size, original_min_size;
12854a9671a0SJoel Fernandes 	unsigned int min_order, order;
12864a9671a0SJoel Fernandes 	LIST_HEAD(allocated);
12874a9671a0SJoel Fernandes 	unsigned long pages;
12884a9671a0SJoel Fernandes 	int err;
12894a9671a0SJoel Fernandes 
12904a9671a0SJoel Fernandes 	if (size < mm->chunk_size)
12914a9671a0SJoel Fernandes 		return -EINVAL;
12924a9671a0SJoel Fernandes 
12934a9671a0SJoel Fernandes 	if (min_block_size < mm->chunk_size)
12944a9671a0SJoel Fernandes 		return -EINVAL;
12954a9671a0SJoel Fernandes 
12964a9671a0SJoel Fernandes 	if (!is_power_of_2(min_block_size))
12974a9671a0SJoel Fernandes 		return -EINVAL;
12984a9671a0SJoel Fernandes 
12994a9671a0SJoel Fernandes 	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
13004a9671a0SJoel Fernandes 		return -EINVAL;
13014a9671a0SJoel Fernandes 
13024a9671a0SJoel Fernandes 	if (end > mm->size)
13034a9671a0SJoel Fernandes 		return -EINVAL;
13044a9671a0SJoel Fernandes 
13054a9671a0SJoel Fernandes 	if (range_overflows(start, size, mm->size))
13064a9671a0SJoel Fernandes 		return -EINVAL;
13074a9671a0SJoel Fernandes 
13084a9671a0SJoel Fernandes 	/* Actual range allocation */
13094a9671a0SJoel Fernandes 	if (start + size == end) {
13104a9671a0SJoel Fernandes 		if (!IS_ALIGNED(start | end, min_block_size))
13114a9671a0SJoel Fernandes 			return -EINVAL;
13124a9671a0SJoel Fernandes 
1313ba110db8SJoel Fernandes 		return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks);
13144a9671a0SJoel Fernandes 	}
13154a9671a0SJoel Fernandes 
13164a9671a0SJoel Fernandes 	original_size = size;
13174a9671a0SJoel Fernandes 	original_min_size = min_block_size;
13184a9671a0SJoel Fernandes 
13194a9671a0SJoel Fernandes 	/* Roundup the size to power of 2 */
1320ba110db8SJoel Fernandes 	if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) {
13214a9671a0SJoel Fernandes 		size = roundup_pow_of_two(size);
13224a9671a0SJoel Fernandes 		min_block_size = size;
1323*493740d7SArunpravin Paneer Selvam 		/*
1324*493740d7SArunpravin Paneer Selvam 		 * Normalize the requested size to min_block_size for regular allocations.
1325*493740d7SArunpravin Paneer Selvam 		 * Offset-aligned allocations intentionally skip size rounding.
1326*493740d7SArunpravin Paneer Selvam 		 */
1327*493740d7SArunpravin Paneer Selvam 	} else if (!gpu_buddy_can_offset_align(size, min_block_size)) {
13284a9671a0SJoel Fernandes 		size = round_up(size, min_block_size);
13294a9671a0SJoel Fernandes 	}
13304a9671a0SJoel Fernandes 
13314a9671a0SJoel Fernandes 	pages = size >> ilog2(mm->chunk_size);
13324a9671a0SJoel Fernandes 	order = fls(pages) - 1;
13334a9671a0SJoel Fernandes 	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
13344a9671a0SJoel Fernandes 
13354a9671a0SJoel Fernandes 	if (order > mm->max_order || size > mm->size) {
1336ba110db8SJoel Fernandes 		if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) &&
1337ba110db8SJoel Fernandes 		    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
13384a9671a0SJoel Fernandes 			return __alloc_contig_try_harder(mm, original_size,
13394a9671a0SJoel Fernandes 							 original_min_size, blocks);
13404a9671a0SJoel Fernandes 
13414a9671a0SJoel Fernandes 		return -EINVAL;
13424a9671a0SJoel Fernandes 	}
13434a9671a0SJoel Fernandes 
13444a9671a0SJoel Fernandes 	do {
13454a9671a0SJoel Fernandes 		order = min(order, (unsigned int)fls(pages) - 1);
13464a9671a0SJoel Fernandes 		BUG_ON(order > mm->max_order);
1347*493740d7SArunpravin Paneer Selvam 		/*
1348*493740d7SArunpravin Paneer Selvam 		 * Regular allocations must not allocate blocks smaller than min_block_size.
1349*493740d7SArunpravin Paneer Selvam 		 * Offset-aligned allocations deliberately bypass this constraint.
1350*493740d7SArunpravin Paneer Selvam 		 */
1351*493740d7SArunpravin Paneer Selvam 		BUG_ON(size >= min_block_size && order < min_order);
13524a9671a0SJoel Fernandes 
13534a9671a0SJoel Fernandes 		do {
1354*493740d7SArunpravin Paneer Selvam 			unsigned int fallback_order;
1355*493740d7SArunpravin Paneer Selvam 
1356ba110db8SJoel Fernandes 			block = __gpu_buddy_alloc_blocks(mm, start,
13574a9671a0SJoel Fernandes 							 end,
1358*493740d7SArunpravin Paneer Selvam 							 size,
1359*493740d7SArunpravin Paneer Selvam 							 min_block_size,
13604a9671a0SJoel Fernandes 							 order,
13614a9671a0SJoel Fernandes 							 flags);
13624a9671a0SJoel Fernandes 			if (!IS_ERR(block))
13634a9671a0SJoel Fernandes 				break;
13644a9671a0SJoel Fernandes 
1365*493740d7SArunpravin Paneer Selvam 			if (size < min_block_size) {
1366*493740d7SArunpravin Paneer Selvam 				fallback_order = order;
1367*493740d7SArunpravin Paneer Selvam 			} else if (order == min_order) {
1368*493740d7SArunpravin Paneer Selvam 				fallback_order = min_order;
1369*493740d7SArunpravin Paneer Selvam 			} else {
1370*493740d7SArunpravin Paneer Selvam 				order--;
1371*493740d7SArunpravin Paneer Selvam 				continue;
1372*493740d7SArunpravin Paneer Selvam 			}
1373*493740d7SArunpravin Paneer Selvam 
13744a9671a0SJoel Fernandes 			/* Try allocation through force merge method */
13754a9671a0SJoel Fernandes 			if (mm->clear_avail &&
1376*493740d7SArunpravin Paneer Selvam 			    !__force_merge(mm, start, end, fallback_order)) {
1377ba110db8SJoel Fernandes 				block = __gpu_buddy_alloc_blocks(mm, start,
13784a9671a0SJoel Fernandes 								 end,
1379*493740d7SArunpravin Paneer Selvam 								 size,
1380*493740d7SArunpravin Paneer Selvam 								 min_block_size,
1381*493740d7SArunpravin Paneer Selvam 								 fallback_order,
13824a9671a0SJoel Fernandes 								 flags);
13834a9671a0SJoel Fernandes 				if (!IS_ERR(block)) {
1384*493740d7SArunpravin Paneer Selvam 					order = fallback_order;
13854a9671a0SJoel Fernandes 					break;
13864a9671a0SJoel Fernandes 				}
13874a9671a0SJoel Fernandes 			}
13884a9671a0SJoel Fernandes 
13894a9671a0SJoel Fernandes 			/*
13904a9671a0SJoel Fernandes 			 * Try contiguous block allocation through
13914a9671a0SJoel Fernandes 			 * try harder method.
13924a9671a0SJoel Fernandes 			 */
1393ba110db8SJoel Fernandes 			if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
1394ba110db8SJoel Fernandes 			    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
13954a9671a0SJoel Fernandes 				return __alloc_contig_try_harder(mm,
13964a9671a0SJoel Fernandes 								 original_size,
13974a9671a0SJoel Fernandes 								 original_min_size,
13984a9671a0SJoel Fernandes 								 blocks);
13994a9671a0SJoel Fernandes 			err = -ENOSPC;
14004a9671a0SJoel Fernandes 			goto err_free;
14014a9671a0SJoel Fernandes 		} while (1);
14024a9671a0SJoel Fernandes 
14034a9671a0SJoel Fernandes 		mark_allocated(mm, block);
1404ba110db8SJoel Fernandes 		mm->avail -= gpu_buddy_block_size(mm, block);
1405ba110db8SJoel Fernandes 		if (gpu_buddy_block_is_clear(block))
1406ba110db8SJoel Fernandes 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
14074a9671a0SJoel Fernandes 		kmemleak_update_trace(block);
14084a9671a0SJoel Fernandes 		list_add_tail(&block->link, &allocated);
14094a9671a0SJoel Fernandes 
14104a9671a0SJoel Fernandes 		pages -= BIT(order);
14114a9671a0SJoel Fernandes 
14124a9671a0SJoel Fernandes 		if (!pages)
14134a9671a0SJoel Fernandes 			break;
14144a9671a0SJoel Fernandes 	} while (1);
14154a9671a0SJoel Fernandes 
14164a9671a0SJoel Fernandes 	/* Trim the allocated block to the required size */
1417ba110db8SJoel Fernandes 	if (!(flags & GPU_BUDDY_TRIM_DISABLE) &&
14184a9671a0SJoel Fernandes 	    original_size != size) {
14194a9671a0SJoel Fernandes 		struct list_head *trim_list;
14204a9671a0SJoel Fernandes 		LIST_HEAD(temp);
14214a9671a0SJoel Fernandes 		u64 trim_size;
14224a9671a0SJoel Fernandes 
14234a9671a0SJoel Fernandes 		trim_list = &allocated;
14244a9671a0SJoel Fernandes 		trim_size = original_size;
14254a9671a0SJoel Fernandes 
14264a9671a0SJoel Fernandes 		if (!list_is_singular(&allocated)) {
14274a9671a0SJoel Fernandes 			block = list_last_entry(&allocated, typeof(*block), link);
14284a9671a0SJoel Fernandes 			list_move(&block->link, &temp);
14294a9671a0SJoel Fernandes 			trim_list = &temp;
1430ba110db8SJoel Fernandes 			trim_size = gpu_buddy_block_size(mm, block) -
14314a9671a0SJoel Fernandes 				(size - original_size);
14324a9671a0SJoel Fernandes 		}
14334a9671a0SJoel Fernandes 
1434ba110db8SJoel Fernandes 		gpu_buddy_block_trim(mm,
14354a9671a0SJoel Fernandes 				     NULL,
14364a9671a0SJoel Fernandes 				     trim_size,
14374a9671a0SJoel Fernandes 				     trim_list);
14384a9671a0SJoel Fernandes 
14394a9671a0SJoel Fernandes 		if (!list_empty(&temp))
14404a9671a0SJoel Fernandes 			list_splice_tail(trim_list, &allocated);
14414a9671a0SJoel Fernandes 	}
14424a9671a0SJoel Fernandes 
14434a9671a0SJoel Fernandes 	list_splice_tail(&allocated, blocks);
14444a9671a0SJoel Fernandes 	return 0;
14454a9671a0SJoel Fernandes 
14464a9671a0SJoel Fernandes err_free:
1447ba110db8SJoel Fernandes 	gpu_buddy_free_list_internal(mm, &allocated);
14484a9671a0SJoel Fernandes 	return err;
14494a9671a0SJoel Fernandes }
1450ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_alloc_blocks);
14514a9671a0SJoel Fernandes 
14524a9671a0SJoel Fernandes /**
1453ba110db8SJoel Fernandes  * gpu_buddy_block_print - print block information
14544a9671a0SJoel Fernandes  *
1455ba110db8SJoel Fernandes  * @mm: GPU buddy manager
1456ba110db8SJoel Fernandes  * @block: GPU buddy block
14574a9671a0SJoel Fernandes  */
1458ba110db8SJoel Fernandes void gpu_buddy_block_print(struct gpu_buddy *mm,
1459ba110db8SJoel Fernandes 			   struct gpu_buddy_block *block)
14604a9671a0SJoel Fernandes {
1461ba110db8SJoel Fernandes 	u64 start = gpu_buddy_block_offset(block);
1462ba110db8SJoel Fernandes 	u64 size = gpu_buddy_block_size(mm, block);
14634a9671a0SJoel Fernandes 
1464ba110db8SJoel Fernandes 	pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size);
14654a9671a0SJoel Fernandes }
1466ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_block_print);
14674a9671a0SJoel Fernandes 
14684a9671a0SJoel Fernandes /**
1469ba110db8SJoel Fernandes  * gpu_buddy_print - print allocator state
14704a9671a0SJoel Fernandes  *
1471ba110db8SJoel Fernandes  * @mm: GPU buddy manager
1472ba110db8SJoel Fernandes  * @p: GPU printer to use
14734a9671a0SJoel Fernandes  */
1474ba110db8SJoel Fernandes void gpu_buddy_print(struct gpu_buddy *mm)
14754a9671a0SJoel Fernandes {
14764a9671a0SJoel Fernandes 	int order;
14774a9671a0SJoel Fernandes 
1478ba110db8SJoel Fernandes 	pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
14794a9671a0SJoel Fernandes 		mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
14804a9671a0SJoel Fernandes 
14814a9671a0SJoel Fernandes 	for (order = mm->max_order; order >= 0; order--) {
1482ba110db8SJoel Fernandes 		struct gpu_buddy_block *block, *tmp;
14834a9671a0SJoel Fernandes 		struct rb_root *root;
14844a9671a0SJoel Fernandes 		u64 count = 0, free;
14854a9671a0SJoel Fernandes 		unsigned int tree;
14864a9671a0SJoel Fernandes 
14874a9671a0SJoel Fernandes 		for_each_free_tree(tree) {
14884a9671a0SJoel Fernandes 			root = &mm->free_trees[tree][order];
14894a9671a0SJoel Fernandes 
14904a9671a0SJoel Fernandes 			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
1491ba110db8SJoel Fernandes 				BUG_ON(!gpu_buddy_block_is_free(block));
14924a9671a0SJoel Fernandes 				count++;
14934a9671a0SJoel Fernandes 			}
14944a9671a0SJoel Fernandes 		}
14954a9671a0SJoel Fernandes 
14964a9671a0SJoel Fernandes 		free = count * (mm->chunk_size << order);
14974a9671a0SJoel Fernandes 		if (free < SZ_1M)
1498ba110db8SJoel Fernandes 			pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
1499ba110db8SJoel Fernandes 				order, free >> 10, count);
15004a9671a0SJoel Fernandes 		else
1501ba110db8SJoel Fernandes 			pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
1502ba110db8SJoel Fernandes 				order, free >> 20, count);
15034a9671a0SJoel Fernandes 	}
15044a9671a0SJoel Fernandes }
1505ba110db8SJoel Fernandes EXPORT_SYMBOL(gpu_buddy_print);
15064a9671a0SJoel Fernandes 
1507ba110db8SJoel Fernandes static void gpu_buddy_module_exit(void)
15084a9671a0SJoel Fernandes {
15094a9671a0SJoel Fernandes 	kmem_cache_destroy(slab_blocks);
15104a9671a0SJoel Fernandes }
15114a9671a0SJoel Fernandes 
1512ba110db8SJoel Fernandes static int __init gpu_buddy_module_init(void)
15134a9671a0SJoel Fernandes {
1514ba110db8SJoel Fernandes 	slab_blocks = KMEM_CACHE(gpu_buddy_block, 0);
15154a9671a0SJoel Fernandes 	if (!slab_blocks)
15164a9671a0SJoel Fernandes 		return -ENOMEM;
15174a9671a0SJoel Fernandes 
15184a9671a0SJoel Fernandes 	return 0;
15194a9671a0SJoel Fernandes }
15204a9671a0SJoel Fernandes 
1521ba110db8SJoel Fernandes module_init(gpu_buddy_module_init);
1522ba110db8SJoel Fernandes module_exit(gpu_buddy_module_exit);
15234a9671a0SJoel Fernandes 
1524ba110db8SJoel Fernandes MODULE_DESCRIPTION("GPU Buddy Allocator");
15254a9671a0SJoel Fernandes MODULE_LICENSE("Dual MIT/GPL");
1526