xref: /linux/drivers/gpu/buddy.c (revision 99676aed1fec109d62822e21a06760eb098dc5f4)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include <linux/bug.h>
7 #include <linux/export.h>
8 #include <linux/kmemleak.h>
9 #include <linux/module.h>
10 #include <linux/sizes.h>
11 
12 #include <linux/gpu_buddy.h>
13 
14 /**
15  * gpu_buddy_assert - assert a condition in the buddy allocator
16  * @condition: condition expected to be true
17  *
18  * When CONFIG_KUNIT is enabled, evaluates @condition and, if false, triggers
19  * a WARN_ON() and also calls kunit_fail_current_test() so that any running
20  * kunit test is properly marked as failed. The stringified condition is
21  * included in the failure message for easy identification.
22  *
23  * When CONFIG_KUNIT is not enabled, this reduces to WARN_ON() so production
24  * builds retain the same warning semantics as before.
25  */
26 #if IS_ENABLED(CONFIG_KUNIT)
27 #include <kunit/test-bug.h>
28 #define gpu_buddy_assert(condition) do {						\
29 	if (WARN_ON(!(condition)))						\
30 		kunit_fail_current_test("gpu_buddy_assert(" #condition ")");	\
31 } while (0)
32 #else
33 #define gpu_buddy_assert(condition) WARN_ON(!(condition))
34 #endif
35 
36 static struct kmem_cache *slab_blocks;
37 
38 static unsigned int
39 gpu_buddy_block_state(struct gpu_buddy_block *block)
40 {
41 	return block->header & GPU_BUDDY_HEADER_STATE;
42 }
43 
44 static bool
45 gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
46 {
47 	return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
48 }
49 
50 static bool
51 gpu_buddy_block_is_split(struct gpu_buddy_block *block)
52 {
53 	return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
54 }
55 
56 static unsigned int gpu_buddy_block_offset_alignment(struct gpu_buddy_block *block)
57 {
58 	u64 offset = gpu_buddy_block_offset(block);
59 
60 	if (!offset)
61 		/*
62 		 * __ffs64(0) is undefined; offset 0 is maximally aligned, so return
63 		 * a value greater than any possible alignment.
64 		 */
65 		return 64 + 1;
66 
67 	return __ffs64(offset);
68 }
69 
70 RB_DECLARE_CALLBACKS_MAX(static, gpu_buddy_augment_cb,
71 			 struct gpu_buddy_block, rb,
72 			 unsigned int, subtree_max_alignment,
73 			 gpu_buddy_block_offset_alignment);
74 
75 static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
76 					       struct gpu_buddy_block *parent,
77 					       unsigned int order,
78 					       u64 offset)
79 {
80 	struct gpu_buddy_block *block;
81 
82 	BUG_ON(order > GPU_BUDDY_MAX_ORDER);
83 
84 	block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
85 	if (!block)
86 		return NULL;
87 
88 	block->header = offset;
89 	block->header |= order;
90 	block->parent = parent;
91 
92 	RB_CLEAR_NODE(&block->rb);
93 
94 	BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED);
95 	return block;
96 }
97 
98 static void gpu_block_free(struct gpu_buddy *mm,
99 			   struct gpu_buddy_block *block)
100 {
101 	kmem_cache_free(slab_blocks, block);
102 }
103 
104 static enum gpu_buddy_free_tree
105 get_block_tree(struct gpu_buddy_block *block)
106 {
107 	return gpu_buddy_block_is_clear(block) ?
108 	       GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
109 }
110 
111 static struct gpu_buddy_block *
112 rbtree_get_free_block(const struct rb_node *node)
113 {
114 	return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL;
115 }
116 
117 static struct gpu_buddy_block *
118 rbtree_last_free_block(struct rb_root *root)
119 {
120 	return rbtree_get_free_block(rb_last(root));
121 }
122 
123 static bool rbtree_is_empty(struct rb_root *root)
124 {
125 	return RB_EMPTY_ROOT(root);
126 }
127 
128 static void rbtree_insert(struct gpu_buddy *mm,
129 			  struct gpu_buddy_block *block,
130 			  enum gpu_buddy_free_tree tree)
131 {
132 	struct rb_node **link, *parent = NULL;
133 	unsigned int block_alignment, order;
134 	struct gpu_buddy_block *node;
135 	struct rb_root *root;
136 
137 	order = gpu_buddy_block_order(block);
138 	block_alignment = gpu_buddy_block_offset_alignment(block);
139 
140 	root = &mm->free_trees[tree][order];
141 	link = &root->rb_node;
142 
143 	while (*link) {
144 		parent = *link;
145 		node = rbtree_get_free_block(parent);
146 		/*
147 		 * Manual augmentation update during insertion traversal. Required
148 		 * because rb_insert_augmented() only calls rotate callback during
149 		 * rotations. This ensures all ancestors on the insertion path have
150 		 * correct subtree_max_alignment values.
151 		 */
152 		if (node->subtree_max_alignment < block_alignment)
153 			node->subtree_max_alignment = block_alignment;
154 
155 		if (gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node))
156 			link = &parent->rb_left;
157 		else
158 			link = &parent->rb_right;
159 	}
160 
161 	block->subtree_max_alignment = block_alignment;
162 	rb_link_node(&block->rb, parent, link);
163 	rb_insert_augmented(&block->rb, root, &gpu_buddy_augment_cb);
164 }
165 
166 static void rbtree_remove(struct gpu_buddy *mm,
167 			  struct gpu_buddy_block *block)
168 {
169 	unsigned int order = gpu_buddy_block_order(block);
170 	enum gpu_buddy_free_tree tree;
171 	struct rb_root *root;
172 
173 	tree = get_block_tree(block);
174 	root = &mm->free_trees[tree][order];
175 
176 	rb_erase_augmented(&block->rb, root, &gpu_buddy_augment_cb);
177 	RB_CLEAR_NODE(&block->rb);
178 }
179 
180 static void clear_reset(struct gpu_buddy_block *block)
181 {
182 	block->header &= ~GPU_BUDDY_HEADER_CLEAR;
183 }
184 
185 static void mark_cleared(struct gpu_buddy_block *block)
186 {
187 	block->header |= GPU_BUDDY_HEADER_CLEAR;
188 }
189 
190 static void mark_allocated(struct gpu_buddy *mm,
191 			   struct gpu_buddy_block *block)
192 {
193 	block->header &= ~GPU_BUDDY_HEADER_STATE;
194 	block->header |= GPU_BUDDY_ALLOCATED;
195 
196 	rbtree_remove(mm, block);
197 }
198 
199 static void mark_free(struct gpu_buddy *mm,
200 		      struct gpu_buddy_block *block)
201 {
202 	enum gpu_buddy_free_tree tree;
203 
204 	block->header &= ~GPU_BUDDY_HEADER_STATE;
205 	block->header |= GPU_BUDDY_FREE;
206 
207 	tree = get_block_tree(block);
208 	rbtree_insert(mm, block, tree);
209 }
210 
211 static void mark_split(struct gpu_buddy *mm,
212 		       struct gpu_buddy_block *block)
213 {
214 	block->header &= ~GPU_BUDDY_HEADER_STATE;
215 	block->header |= GPU_BUDDY_SPLIT;
216 
217 	rbtree_remove(mm, block);
218 }
219 
220 static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
221 {
222 	return s1 <= e2 && e1 >= s2;
223 }
224 
225 static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
226 {
227 	return s1 <= s2 && e1 >= e2;
228 }
229 
230 static struct gpu_buddy_block *
231 __get_buddy(struct gpu_buddy_block *block)
232 {
233 	struct gpu_buddy_block *parent;
234 
235 	parent = block->parent;
236 	if (!parent)
237 		return NULL;
238 
239 	if (parent->left == block)
240 		return parent->right;
241 
242 	return parent->left;
243 }
244 
245 static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
246 				     struct gpu_buddy_block *block,
247 				     bool force_merge)
248 {
249 	struct gpu_buddy_block *parent;
250 	unsigned int order;
251 
252 	while ((parent = block->parent)) {
253 		struct gpu_buddy_block *buddy;
254 
255 		buddy = __get_buddy(block);
256 
257 		if (!gpu_buddy_block_is_free(buddy))
258 			break;
259 
260 		if (!force_merge) {
261 			/*
262 			 * Check the block and its buddy clear state and exit
263 			 * the loop if they both have the dissimilar state.
264 			 */
265 			if (gpu_buddy_block_is_clear(block) !=
266 			    gpu_buddy_block_is_clear(buddy))
267 				break;
268 
269 			if (gpu_buddy_block_is_clear(block))
270 				mark_cleared(parent);
271 		}
272 
273 		rbtree_remove(mm, buddy);
274 		if (force_merge && gpu_buddy_block_is_clear(buddy))
275 			mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
276 
277 		gpu_block_free(mm, block);
278 		gpu_block_free(mm, buddy);
279 
280 		block = parent;
281 	}
282 
283 	order = gpu_buddy_block_order(block);
284 	mark_free(mm, block);
285 
286 	return order;
287 }
288 
289 static int __force_merge(struct gpu_buddy *mm,
290 			 u64 start,
291 			 u64 end,
292 			 unsigned int min_order)
293 {
294 	unsigned int tree, order;
295 	int i;
296 
297 	if (!min_order)
298 		return -ENOMEM;
299 
300 	if (min_order > mm->max_order)
301 		return -EINVAL;
302 
303 	for_each_free_tree(tree) {
304 		for (i = min_order - 1; i >= 0; i--) {
305 			struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
306 
307 			while (iter) {
308 				struct gpu_buddy_block *block, *buddy;
309 				u64 block_start, block_end;
310 
311 				block = rbtree_get_free_block(iter);
312 				iter = rb_prev(iter);
313 
314 				if (!block || !block->parent)
315 					continue;
316 
317 				block_start = gpu_buddy_block_offset(block);
318 				block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
319 
320 				if (!contains(start, end, block_start, block_end))
321 					continue;
322 
323 				buddy = __get_buddy(block);
324 				if (!gpu_buddy_block_is_free(buddy))
325 					continue;
326 
327 				gpu_buddy_assert(gpu_buddy_block_is_clear(block) !=
328 						 gpu_buddy_block_is_clear(buddy));
329 
330 				/*
331 				 * Advance to the next node when the current node is the buddy,
332 				 * as freeing the block will also remove its buddy from the tree.
333 				 */
334 				if (iter == &buddy->rb)
335 					iter = rb_prev(iter);
336 
337 				rbtree_remove(mm, block);
338 				if (gpu_buddy_block_is_clear(block))
339 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
340 
341 				order = __gpu_buddy_free(mm, block, true);
342 				if (order >= min_order)
343 					return 0;
344 			}
345 		}
346 	}
347 
348 	return -ENOMEM;
349 }
350 
351 /**
352  * gpu_buddy_init - init memory manager
353  *
354  * @mm: GPU buddy manager to initialize
355  * @size: size in bytes to manage
356  * @chunk_size: minimum page size in bytes for our allocations
357  *
358  * Initializes the memory manager and its resources.
359  *
360  * Returns:
361  * 0 on success, error code on failure.
362  */
363 int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
364 {
365 	unsigned int i, j, root_count = 0;
366 	u64 offset = 0;
367 
368 	if (size < chunk_size)
369 		return -EINVAL;
370 
371 	if (chunk_size < SZ_4K)
372 		return -EINVAL;
373 
374 	if (!is_power_of_2(chunk_size))
375 		return -EINVAL;
376 
377 	size = round_down(size, chunk_size);
378 
379 	mm->size = size;
380 	mm->avail = size;
381 	mm->clear_avail = 0;
382 	mm->chunk_size = chunk_size;
383 	mm->max_order = ilog2(size) - ilog2(chunk_size);
384 
385 	BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
386 
387 	mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
388 				       sizeof(*mm->free_trees),
389 				       GFP_KERNEL);
390 	if (!mm->free_trees)
391 		return -ENOMEM;
392 
393 	for_each_free_tree(i) {
394 		mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
395 						  sizeof(struct rb_root),
396 						  GFP_KERNEL);
397 		if (!mm->free_trees[i])
398 			goto out_free_tree;
399 
400 		for (j = 0; j <= mm->max_order; ++j)
401 			mm->free_trees[i][j] = RB_ROOT;
402 	}
403 
404 	mm->n_roots = hweight64(size);
405 
406 	mm->roots = kmalloc_array(mm->n_roots,
407 				  sizeof(struct gpu_buddy_block *),
408 				  GFP_KERNEL);
409 	if (!mm->roots)
410 		goto out_free_tree;
411 
412 	/*
413 	 * Split into power-of-two blocks, in case we are given a size that is
414 	 * not itself a power-of-two.
415 	 */
416 	do {
417 		struct gpu_buddy_block *root;
418 		unsigned int order;
419 		u64 root_size;
420 
421 		order = ilog2(size) - ilog2(chunk_size);
422 		root_size = chunk_size << order;
423 
424 		root = gpu_block_alloc(mm, NULL, order, offset);
425 		if (!root)
426 			goto out_free_roots;
427 
428 		mark_free(mm, root);
429 
430 		BUG_ON(root_count > mm->max_order);
431 		BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size);
432 
433 		mm->roots[root_count] = root;
434 
435 		offset += root_size;
436 		size -= root_size;
437 		root_count++;
438 	} while (size);
439 
440 #ifdef CONFIG_LOCKDEP
441 	mm->lock_dep_map = NULL;
442 #endif
443 	return 0;
444 
445 out_free_roots:
446 	while (root_count--)
447 		gpu_block_free(mm, mm->roots[root_count]);
448 	kfree(mm->roots);
449 out_free_tree:
450 	while (i--)
451 		kfree(mm->free_trees[i]);
452 	kfree(mm->free_trees);
453 	return -ENOMEM;
454 }
455 EXPORT_SYMBOL(gpu_buddy_init);
456 
457 /**
458  * gpu_buddy_fini - tear down the memory manager
459  *
460  * @mm: GPU buddy manager to free
461  *
462  * Cleanup memory manager resources and the freetree
463  */
464 void gpu_buddy_fini(struct gpu_buddy *mm)
465 {
466 	u64 root_size, size, start;
467 	unsigned int order;
468 	int i;
469 
470 	size = mm->size;
471 
472 	for (i = 0; i < mm->n_roots; ++i) {
473 		order = ilog2(size) - ilog2(mm->chunk_size);
474 		start = gpu_buddy_block_offset(mm->roots[i]);
475 		__force_merge(mm, start, start + size, order);
476 
477 		gpu_buddy_assert(gpu_buddy_block_is_free(mm->roots[i]));
478 
479 		gpu_block_free(mm, mm->roots[i]);
480 
481 		root_size = mm->chunk_size << order;
482 		size -= root_size;
483 	}
484 
485 	gpu_buddy_assert(mm->avail == mm->size);
486 
487 	for_each_free_tree(i)
488 		kfree(mm->free_trees[i]);
489 	kfree(mm->free_trees);
490 	kfree(mm->roots);
491 }
492 EXPORT_SYMBOL(gpu_buddy_fini);
493 
494 static int split_block(struct gpu_buddy *mm,
495 		       struct gpu_buddy_block *block)
496 {
497 	unsigned int block_order = gpu_buddy_block_order(block) - 1;
498 	u64 offset = gpu_buddy_block_offset(block);
499 
500 	BUG_ON(!gpu_buddy_block_is_free(block));
501 	BUG_ON(!gpu_buddy_block_order(block));
502 
503 	block->left = gpu_block_alloc(mm, block, block_order, offset);
504 	if (!block->left)
505 		return -ENOMEM;
506 
507 	block->right = gpu_block_alloc(mm, block, block_order,
508 				       offset + (mm->chunk_size << block_order));
509 	if (!block->right) {
510 		gpu_block_free(mm, block->left);
511 		return -ENOMEM;
512 	}
513 
514 	mark_split(mm, block);
515 
516 	if (gpu_buddy_block_is_clear(block)) {
517 		mark_cleared(block->left);
518 		mark_cleared(block->right);
519 		clear_reset(block);
520 	}
521 
522 	mark_free(mm, block->left);
523 	mark_free(mm, block->right);
524 
525 	return 0;
526 }
527 
528 /**
529  * gpu_buddy_reset_clear - reset blocks clear state
530  *
531  * @mm: GPU buddy manager
532  * @is_clear: blocks clear state
533  *
534  * Reset the clear state based on @is_clear value for each block
535  * in the freetree.
536  */
537 void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear)
538 {
539 	enum gpu_buddy_free_tree src_tree, dst_tree;
540 	u64 root_size, size, start;
541 	unsigned int order;
542 	int i;
543 
544 	gpu_buddy_driver_lock_held(mm);
545 	size = mm->size;
546 	for (i = 0; i < mm->n_roots; ++i) {
547 		order = ilog2(size) - ilog2(mm->chunk_size);
548 		start = gpu_buddy_block_offset(mm->roots[i]);
549 		__force_merge(mm, start, start + size, order);
550 
551 		root_size = mm->chunk_size << order;
552 		size -= root_size;
553 	}
554 
555 	src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
556 	dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
557 
558 	for (i = 0; i <= mm->max_order; ++i) {
559 		struct rb_root *root = &mm->free_trees[src_tree][i];
560 		struct gpu_buddy_block *block, *tmp;
561 
562 		rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
563 			rbtree_remove(mm, block);
564 			if (is_clear) {
565 				mark_cleared(block);
566 				mm->clear_avail += gpu_buddy_block_size(mm, block);
567 			} else {
568 				clear_reset(block);
569 				mm->clear_avail -= gpu_buddy_block_size(mm, block);
570 			}
571 
572 			rbtree_insert(mm, block, dst_tree);
573 		}
574 	}
575 }
576 EXPORT_SYMBOL(gpu_buddy_reset_clear);
577 
578 /**
579  * gpu_buddy_free_block - free a block
580  *
581  * @mm: GPU buddy manager
582  * @block: block to be freed
583  */
584 void gpu_buddy_free_block(struct gpu_buddy *mm,
585 			  struct gpu_buddy_block *block)
586 {
587 	gpu_buddy_driver_lock_held(mm);
588 	BUG_ON(!gpu_buddy_block_is_allocated(block));
589 	mm->avail += gpu_buddy_block_size(mm, block);
590 	if (gpu_buddy_block_is_clear(block))
591 		mm->clear_avail += gpu_buddy_block_size(mm, block);
592 
593 	__gpu_buddy_free(mm, block, false);
594 }
595 EXPORT_SYMBOL(gpu_buddy_free_block);
596 
597 static void __gpu_buddy_free_list(struct gpu_buddy *mm,
598 				  struct list_head *objects,
599 				  bool mark_clear,
600 				  bool mark_dirty)
601 {
602 	struct gpu_buddy_block *block, *on;
603 
604 	gpu_buddy_assert(!(mark_dirty && mark_clear));
605 
606 	list_for_each_entry_safe(block, on, objects, link) {
607 		if (mark_clear)
608 			mark_cleared(block);
609 		else if (mark_dirty)
610 			clear_reset(block);
611 		gpu_buddy_free_block(mm, block);
612 		cond_resched();
613 	}
614 	INIT_LIST_HEAD(objects);
615 }
616 
617 static void gpu_buddy_free_list_internal(struct gpu_buddy *mm,
618 					 struct list_head *objects)
619 {
620 	/*
621 	 * Don't touch the clear/dirty bit, since allocation is still internal
622 	 * at this point. For example we might have just failed part of the
623 	 * allocation.
624 	 */
625 	__gpu_buddy_free_list(mm, objects, false, false);
626 }
627 
628 /**
629  * gpu_buddy_free_list - free blocks
630  *
631  * @mm: GPU buddy manager
632  * @objects: input list head to free blocks
633  * @flags: optional flags like GPU_BUDDY_CLEARED
634  */
635 void gpu_buddy_free_list(struct gpu_buddy *mm,
636 			 struct list_head *objects,
637 			 unsigned int flags)
638 {
639 	bool mark_clear = flags & GPU_BUDDY_CLEARED;
640 
641 	gpu_buddy_driver_lock_held(mm);
642 	__gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear);
643 }
644 EXPORT_SYMBOL(gpu_buddy_free_list);
645 
646 static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags)
647 {
648 	bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION;
649 
650 	return needs_clear != gpu_buddy_block_is_clear(block);
651 }
652 
653 static struct gpu_buddy_block *
654 __alloc_range_bias(struct gpu_buddy *mm,
655 		   u64 start, u64 end,
656 		   unsigned int order,
657 		   unsigned long flags,
658 		   bool fallback)
659 {
660 	u64 req_size = mm->chunk_size << order;
661 	struct gpu_buddy_block *block;
662 	struct gpu_buddy_block *buddy;
663 	LIST_HEAD(dfs);
664 	int err;
665 	int i;
666 
667 	end = end - 1;
668 
669 	for (i = 0; i < mm->n_roots; ++i)
670 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
671 
672 	do {
673 		u64 block_start;
674 		u64 block_end;
675 
676 		block = list_first_entry_or_null(&dfs,
677 						 struct gpu_buddy_block,
678 						 tmp_link);
679 		if (!block)
680 			break;
681 
682 		list_del(&block->tmp_link);
683 
684 		if (gpu_buddy_block_order(block) < order)
685 			continue;
686 
687 		block_start = gpu_buddy_block_offset(block);
688 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
689 
690 		if (!overlaps(start, end, block_start, block_end))
691 			continue;
692 
693 		if (gpu_buddy_block_is_allocated(block))
694 			continue;
695 
696 		if (block_start < start || block_end > end) {
697 			u64 adjusted_start = max(block_start, start);
698 			u64 adjusted_end = min(block_end, end);
699 
700 			if (round_down(adjusted_end + 1, req_size) <=
701 			    round_up(adjusted_start, req_size))
702 				continue;
703 		}
704 
705 		if (!fallback && block_incompatible(block, flags))
706 			continue;
707 
708 		if (contains(start, end, block_start, block_end) &&
709 		    order == gpu_buddy_block_order(block)) {
710 			/*
711 			 * Find the free block within the range.
712 			 */
713 			if (gpu_buddy_block_is_free(block))
714 				return block;
715 
716 			continue;
717 		}
718 
719 		if (!gpu_buddy_block_is_split(block)) {
720 			err = split_block(mm, block);
721 			if (unlikely(err))
722 				goto err_undo;
723 		}
724 
725 		list_add(&block->right->tmp_link, &dfs);
726 		list_add(&block->left->tmp_link, &dfs);
727 	} while (1);
728 
729 	return ERR_PTR(-ENOSPC);
730 
731 err_undo:
732 	/*
733 	 * We really don't want to leave around a bunch of split blocks, since
734 	 * bigger is better, so make sure we merge everything back before we
735 	 * free the allocated blocks.
736 	 */
737 	buddy = __get_buddy(block);
738 	if (buddy &&
739 	    (gpu_buddy_block_is_free(block) &&
740 	     gpu_buddy_block_is_free(buddy)))
741 		__gpu_buddy_free(mm, block, false);
742 	return ERR_PTR(err);
743 }
744 
745 static struct gpu_buddy_block *
746 __gpu_buddy_alloc_range_bias(struct gpu_buddy *mm,
747 			     u64 start, u64 end,
748 			     unsigned int order,
749 			     unsigned long flags)
750 {
751 	struct gpu_buddy_block *block;
752 	bool fallback = false;
753 
754 	block = __alloc_range_bias(mm, start, end, order,
755 				   flags, fallback);
756 	if (IS_ERR(block))
757 		return __alloc_range_bias(mm, start, end, order,
758 					  flags, !fallback);
759 
760 	return block;
761 }
762 
763 static struct gpu_buddy_block *
764 get_maxblock(struct gpu_buddy *mm,
765 	     unsigned int order,
766 	     enum gpu_buddy_free_tree tree)
767 {
768 	struct gpu_buddy_block *max_block = NULL, *block = NULL;
769 	struct rb_root *root;
770 	unsigned int i;
771 
772 	for (i = order; i <= mm->max_order; ++i) {
773 		root = &mm->free_trees[tree][i];
774 		block = rbtree_last_free_block(root);
775 		if (!block)
776 			continue;
777 
778 		if (!max_block) {
779 			max_block = block;
780 			continue;
781 		}
782 
783 		if (gpu_buddy_block_offset(block) >
784 		    gpu_buddy_block_offset(max_block)) {
785 			max_block = block;
786 		}
787 	}
788 
789 	return max_block;
790 }
791 
792 static struct gpu_buddy_block *
793 alloc_from_freetree(struct gpu_buddy *mm,
794 		    unsigned int order,
795 		    unsigned long flags)
796 {
797 	struct gpu_buddy_block *block = NULL;
798 	struct rb_root *root;
799 	enum gpu_buddy_free_tree tree;
800 	unsigned int tmp;
801 	int err;
802 
803 	tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
804 		GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
805 
806 	if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) {
807 		block = get_maxblock(mm, order, tree);
808 		if (block)
809 			/* Store the obtained block order */
810 			tmp = gpu_buddy_block_order(block);
811 	} else {
812 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
813 			/* Get RB tree root for this order and tree */
814 			root = &mm->free_trees[tree][tmp];
815 			block = rbtree_last_free_block(root);
816 			if (block)
817 				break;
818 		}
819 	}
820 
821 	if (!block) {
822 		/* Try allocating from the other tree */
823 		tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
824 			GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
825 
826 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
827 			root = &mm->free_trees[tree][tmp];
828 			block = rbtree_last_free_block(root);
829 			if (block)
830 				break;
831 		}
832 
833 		if (!block)
834 			return ERR_PTR(-ENOSPC);
835 	}
836 
837 	BUG_ON(!gpu_buddy_block_is_free(block));
838 
839 	while (tmp != order) {
840 		err = split_block(mm, block);
841 		if (unlikely(err))
842 			goto err_undo;
843 
844 		block = block->right;
845 		tmp--;
846 	}
847 	return block;
848 
849 err_undo:
850 	if (tmp != order)
851 		__gpu_buddy_free(mm, block, false);
852 	return ERR_PTR(err);
853 }
854 
855 static bool
856 gpu_buddy_can_offset_align(u64 size, u64 min_block_size)
857 {
858 	return size < min_block_size && is_power_of_2(size);
859 }
860 
861 static bool gpu_buddy_subtree_can_satisfy(struct rb_node *node,
862 					  unsigned int alignment)
863 {
864 	struct gpu_buddy_block *block;
865 
866 	block = rbtree_get_free_block(node);
867 	return block->subtree_max_alignment >= alignment;
868 }
869 
870 static struct gpu_buddy_block *
871 gpu_buddy_find_block_aligned(struct gpu_buddy *mm,
872 			     enum gpu_buddy_free_tree tree,
873 			     unsigned int order,
874 			     unsigned int alignment,
875 			     unsigned long flags)
876 {
877 	struct rb_root *root = &mm->free_trees[tree][order];
878 	struct rb_node *rb = root->rb_node;
879 
880 	while (rb) {
881 		struct gpu_buddy_block *block = rbtree_get_free_block(rb);
882 		struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right;
883 
884 		if (right_node) {
885 			if (gpu_buddy_subtree_can_satisfy(right_node, alignment)) {
886 				rb = right_node;
887 				continue;
888 			}
889 		}
890 
891 		if (gpu_buddy_block_offset_alignment(block) >= alignment)
892 			return block;
893 
894 		if (left_node) {
895 			if (gpu_buddy_subtree_can_satisfy(left_node, alignment)) {
896 				rb = left_node;
897 				continue;
898 			}
899 		}
900 
901 		break;
902 	}
903 
904 	return NULL;
905 }
906 
907 static struct gpu_buddy_block *
908 gpu_buddy_offset_aligned_allocation(struct gpu_buddy *mm,
909 				    u64 size,
910 				    u64 min_block_size,
911 				    unsigned long flags)
912 {
913 	struct gpu_buddy_block *block = NULL;
914 	unsigned int order, tmp, alignment;
915 	struct gpu_buddy_block *buddy;
916 	enum gpu_buddy_free_tree tree;
917 	unsigned long pages;
918 	int err;
919 
920 	alignment = ilog2(min_block_size);
921 	pages = size >> ilog2(mm->chunk_size);
922 	order = fls(pages) - 1;
923 
924 	tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
925 		GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
926 
927 	for (tmp = order; tmp <= mm->max_order; ++tmp) {
928 		block = gpu_buddy_find_block_aligned(mm, tree, tmp,
929 						     alignment, flags);
930 		if (!block) {
931 			tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
932 				GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
933 			block = gpu_buddy_find_block_aligned(mm, tree, tmp,
934 							     alignment, flags);
935 		}
936 
937 		if (block)
938 			break;
939 	}
940 
941 	if (!block)
942 		return ERR_PTR(-ENOSPC);
943 
944 	while (gpu_buddy_block_order(block) > order) {
945 		struct gpu_buddy_block *left, *right;
946 
947 		err = split_block(mm, block);
948 		if (unlikely(err))
949 			goto err_undo;
950 
951 		left  = block->left;
952 		right = block->right;
953 
954 		if (gpu_buddy_block_offset_alignment(right) >= alignment)
955 			block = right;
956 		else
957 			block = left;
958 	}
959 
960 	return block;
961 
962 err_undo:
963 	/*
964 	 * We really don't want to leave around a bunch of split blocks, since
965 	 * bigger is better, so make sure we merge everything back before we
966 	 * free the allocated blocks.
967 	 */
968 	buddy = __get_buddy(block);
969 	if (buddy &&
970 	    (gpu_buddy_block_is_free(block) &&
971 	     gpu_buddy_block_is_free(buddy)))
972 		__gpu_buddy_free(mm, block, false);
973 	return ERR_PTR(err);
974 }
975 
976 static int __alloc_range(struct gpu_buddy *mm,
977 			 struct list_head *dfs,
978 			 u64 start, u64 size,
979 			 struct list_head *blocks,
980 			 u64 *total_allocated_on_err)
981 {
982 	struct gpu_buddy_block *block;
983 	struct gpu_buddy_block *buddy;
984 	u64 total_allocated = 0;
985 	LIST_HEAD(allocated);
986 	u64 end;
987 	int err;
988 
989 	end = start + size - 1;
990 
991 	do {
992 		u64 block_start;
993 		u64 block_end;
994 
995 		block = list_first_entry_or_null(dfs,
996 						 struct gpu_buddy_block,
997 						 tmp_link);
998 		if (!block)
999 			break;
1000 
1001 		list_del(&block->tmp_link);
1002 
1003 		block_start = gpu_buddy_block_offset(block);
1004 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
1005 
1006 		if (!overlaps(start, end, block_start, block_end))
1007 			continue;
1008 
1009 		if (gpu_buddy_block_is_allocated(block)) {
1010 			err = -ENOSPC;
1011 			goto err_free;
1012 		}
1013 
1014 		if (contains(start, end, block_start, block_end)) {
1015 			if (gpu_buddy_block_is_free(block)) {
1016 				mark_allocated(mm, block);
1017 				total_allocated += gpu_buddy_block_size(mm, block);
1018 				mm->avail -= gpu_buddy_block_size(mm, block);
1019 				if (gpu_buddy_block_is_clear(block))
1020 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
1021 				list_add_tail(&block->link, &allocated);
1022 				continue;
1023 			} else if (!mm->clear_avail) {
1024 				err = -ENOSPC;
1025 				goto err_free;
1026 			}
1027 		}
1028 
1029 		if (!gpu_buddy_block_is_split(block)) {
1030 			err = split_block(mm, block);
1031 			if (unlikely(err))
1032 				goto err_undo;
1033 		}
1034 
1035 		list_add(&block->right->tmp_link, dfs);
1036 		list_add(&block->left->tmp_link, dfs);
1037 	} while (1);
1038 
1039 	if (total_allocated < size) {
1040 		err = -ENOSPC;
1041 		goto err_free;
1042 	}
1043 
1044 	list_splice_tail(&allocated, blocks);
1045 
1046 	return 0;
1047 
1048 err_undo:
1049 	/*
1050 	 * We really don't want to leave around a bunch of split blocks, since
1051 	 * bigger is better, so make sure we merge everything back before we
1052 	 * free the allocated blocks.
1053 	 */
1054 	buddy = __get_buddy(block);
1055 	if (buddy &&
1056 	    (gpu_buddy_block_is_free(block) &&
1057 	     gpu_buddy_block_is_free(buddy)))
1058 		__gpu_buddy_free(mm, block, false);
1059 
1060 err_free:
1061 	if (err == -ENOSPC && total_allocated_on_err) {
1062 		list_splice_tail(&allocated, blocks);
1063 		*total_allocated_on_err = total_allocated;
1064 	} else {
1065 		gpu_buddy_free_list_internal(mm, &allocated);
1066 	}
1067 
1068 	return err;
1069 }
1070 
1071 static int __gpu_buddy_alloc_range(struct gpu_buddy *mm,
1072 				   u64 start,
1073 				   u64 size,
1074 				   u64 *total_allocated_on_err,
1075 				   struct list_head *blocks)
1076 {
1077 	LIST_HEAD(dfs);
1078 	int i;
1079 
1080 	for (i = 0; i < mm->n_roots; ++i)
1081 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
1082 
1083 	return __alloc_range(mm, &dfs, start, size,
1084 			     blocks, total_allocated_on_err);
1085 }
1086 
1087 static int __alloc_contig_try_harder(struct gpu_buddy *mm,
1088 				     u64 size,
1089 				     u64 min_block_size,
1090 				     struct list_head *blocks)
1091 {
1092 	u64 rhs_offset, lhs_offset, lhs_size, filled;
1093 	struct gpu_buddy_block *block;
1094 	unsigned int tree, order;
1095 	LIST_HEAD(blocks_lhs);
1096 	unsigned long pages;
1097 	u64 modify_size;
1098 	int err;
1099 
1100 	modify_size = rounddown_pow_of_two(size);
1101 	pages = modify_size >> ilog2(mm->chunk_size);
1102 	order = fls(pages) - 1;
1103 	if (order == 0)
1104 		return -ENOSPC;
1105 
1106 	for_each_free_tree(tree) {
1107 		struct rb_root *root;
1108 		struct rb_node *iter;
1109 
1110 		root = &mm->free_trees[tree][order];
1111 		if (rbtree_is_empty(root))
1112 			continue;
1113 
1114 		iter = rb_last(root);
1115 		while (iter) {
1116 			block = rbtree_get_free_block(iter);
1117 
1118 			/* Allocate blocks traversing RHS */
1119 			rhs_offset = gpu_buddy_block_offset(block);
1120 			err =  __gpu_buddy_alloc_range(mm, rhs_offset, size,
1121 						       &filled, blocks);
1122 			if (!err || err != -ENOSPC)
1123 				return err;
1124 
1125 			lhs_size = max((size - filled), min_block_size);
1126 			if (!IS_ALIGNED(lhs_size, min_block_size))
1127 				lhs_size = round_up(lhs_size, min_block_size);
1128 
1129 			/* Allocate blocks traversing LHS */
1130 			lhs_offset = gpu_buddy_block_offset(block) - lhs_size;
1131 			err =  __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size,
1132 						       NULL, &blocks_lhs);
1133 			if (!err) {
1134 				list_splice(&blocks_lhs, blocks);
1135 				return 0;
1136 			} else if (err != -ENOSPC) {
1137 				gpu_buddy_free_list_internal(mm, blocks);
1138 				return err;
1139 			}
1140 			/* Free blocks for the next iteration */
1141 			gpu_buddy_free_list_internal(mm, blocks);
1142 
1143 			iter = rb_prev(iter);
1144 		}
1145 	}
1146 
1147 	return -ENOSPC;
1148 }
1149 
1150 /**
1151  * gpu_buddy_block_trim - free unused pages
1152  *
1153  * @mm: GPU buddy manager
1154  * @start: start address to begin the trimming.
1155  * @new_size: original size requested
1156  * @blocks: Input and output list of allocated blocks.
1157  * MUST contain single block as input to be trimmed.
1158  * On success will contain the newly allocated blocks
1159  * making up the @new_size. Blocks always appear in
1160  * ascending order
1161  *
1162  * For contiguous allocation, we round up the size to the nearest
1163  * power of two value, drivers consume *actual* size, so remaining
1164  * portions are unused and can be optionally freed with this function
1165  *
1166  * Returns:
1167  * 0 on success, error code on failure.
1168  */
1169 int gpu_buddy_block_trim(struct gpu_buddy *mm,
1170 			 u64 *start,
1171 			 u64 new_size,
1172 			 struct list_head *blocks)
1173 {
1174 	struct gpu_buddy_block *parent;
1175 	struct gpu_buddy_block *block;
1176 	u64 block_start, block_end;
1177 	LIST_HEAD(dfs);
1178 	u64 new_start;
1179 	int err;
1180 
1181 	gpu_buddy_driver_lock_held(mm);
1182 
1183 	if (!list_is_singular(blocks))
1184 		return -EINVAL;
1185 
1186 	block = list_first_entry(blocks,
1187 				 struct gpu_buddy_block,
1188 				 link);
1189 
1190 	block_start = gpu_buddy_block_offset(block);
1191 	block_end = block_start + gpu_buddy_block_size(mm, block);
1192 
1193 	if (WARN_ON(!gpu_buddy_block_is_allocated(block)))
1194 		return -EINVAL;
1195 
1196 	if (new_size > gpu_buddy_block_size(mm, block))
1197 		return -EINVAL;
1198 
1199 	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
1200 		return -EINVAL;
1201 
1202 	if (new_size == gpu_buddy_block_size(mm, block))
1203 		return 0;
1204 
1205 	new_start = block_start;
1206 	if (start) {
1207 		new_start = *start;
1208 
1209 		if (new_start < block_start)
1210 			return -EINVAL;
1211 
1212 		if (!IS_ALIGNED(new_start, mm->chunk_size))
1213 			return -EINVAL;
1214 
1215 		if (range_overflows(new_start, new_size, block_end))
1216 			return -EINVAL;
1217 	}
1218 
1219 	list_del(&block->link);
1220 	mark_free(mm, block);
1221 	mm->avail += gpu_buddy_block_size(mm, block);
1222 	if (gpu_buddy_block_is_clear(block))
1223 		mm->clear_avail += gpu_buddy_block_size(mm, block);
1224 
1225 	/* Prevent recursively freeing this node */
1226 	parent = block->parent;
1227 	block->parent = NULL;
1228 
1229 	list_add(&block->tmp_link, &dfs);
1230 	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
1231 	if (err) {
1232 		mark_allocated(mm, block);
1233 		mm->avail -= gpu_buddy_block_size(mm, block);
1234 		if (gpu_buddy_block_is_clear(block))
1235 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
1236 		list_add(&block->link, blocks);
1237 	}
1238 
1239 	block->parent = parent;
1240 	return err;
1241 }
1242 EXPORT_SYMBOL(gpu_buddy_block_trim);
1243 
1244 static struct gpu_buddy_block *
1245 __gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
1246 			 u64 start, u64 end,
1247 			 u64 size, u64 min_block_size,
1248 			 unsigned int order,
1249 			 unsigned long flags)
1250 {
1251 	if (flags & GPU_BUDDY_RANGE_ALLOCATION)
1252 		/* Allocate traversing within the range */
1253 		return  __gpu_buddy_alloc_range_bias(mm, start, end,
1254 						     order, flags);
1255 	else if (size < min_block_size)
1256 		/* Allocate from an offset-aligned region without size rounding */
1257 		return gpu_buddy_offset_aligned_allocation(mm, size,
1258 							   min_block_size,
1259 							   flags);
1260 	else
1261 		/* Allocate from freetree */
1262 		return alloc_from_freetree(mm, order, flags);
1263 }
1264 
1265 /**
1266  * gpu_buddy_alloc_blocks - allocate power-of-two blocks
1267  *
1268  * @mm: GPU buddy manager to allocate from
1269  * @start: start of the allowed range for this block
1270  * @end: end of the allowed range for this block
1271  * @size: size of the allocation in bytes
1272  * @min_block_size: alignment of the allocation
1273  * @blocks: output list head to add allocated blocks
1274  * @flags: GPU_BUDDY_*_ALLOCATION flags
1275  *
1276  * alloc_range_bias() called on range limitations, which traverses
1277  * the tree and returns the desired block.
1278  *
1279  * alloc_from_freetree() called when *no* range restrictions
1280  * are enforced, which picks the block from the freetree.
1281  *
1282  * Returns:
1283  * 0 on success, error code on failure.
1284  */
1285 int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
1286 			   u64 start, u64 end, u64 size,
1287 			   u64 min_block_size,
1288 			   struct list_head *blocks,
1289 			   unsigned long flags)
1290 {
1291 	struct gpu_buddy_block *block = NULL;
1292 	u64 original_size, original_min_size;
1293 	unsigned int min_order, order;
1294 	LIST_HEAD(allocated);
1295 	unsigned long pages;
1296 	int err;
1297 
1298 	gpu_buddy_driver_lock_held(mm);
1299 
1300 	if (size < mm->chunk_size)
1301 		return -EINVAL;
1302 
1303 	if (min_block_size < mm->chunk_size)
1304 		return -EINVAL;
1305 
1306 	if (!is_power_of_2(min_block_size))
1307 		return -EINVAL;
1308 
1309 	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
1310 		return -EINVAL;
1311 
1312 	if (end > mm->size)
1313 		return -EINVAL;
1314 
1315 	if (range_overflows(start, size, mm->size))
1316 		return -EINVAL;
1317 
1318 	/* Actual range allocation */
1319 	if (start + size == end) {
1320 		if (!IS_ALIGNED(start | end, min_block_size))
1321 			return -EINVAL;
1322 
1323 		return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks);
1324 	}
1325 
1326 	original_size = size;
1327 	original_min_size = min_block_size;
1328 
1329 	/* Roundup the size to power of 2 */
1330 	if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) {
1331 		size = roundup_pow_of_two(size);
1332 		min_block_size = size;
1333 		/*
1334 		 * Normalize the requested size to min_block_size for regular allocations.
1335 		 * Offset-aligned allocations intentionally skip size rounding.
1336 		 */
1337 	} else if (!gpu_buddy_can_offset_align(size, min_block_size)) {
1338 		size = round_up(size, min_block_size);
1339 	}
1340 
1341 	pages = size >> ilog2(mm->chunk_size);
1342 	order = fls(pages) - 1;
1343 	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
1344 
1345 	if (order > mm->max_order || size > mm->size) {
1346 		if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) &&
1347 		    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
1348 			return __alloc_contig_try_harder(mm, original_size,
1349 							 original_min_size, blocks);
1350 
1351 		return -EINVAL;
1352 	}
1353 
1354 	do {
1355 		order = min(order, (unsigned int)fls(pages) - 1);
1356 		BUG_ON(order > mm->max_order);
1357 		/*
1358 		 * Regular allocations must not allocate blocks smaller than min_block_size.
1359 		 * Offset-aligned allocations deliberately bypass this constraint.
1360 		 */
1361 		BUG_ON(size >= min_block_size && order < min_order);
1362 
1363 		do {
1364 			unsigned int fallback_order;
1365 
1366 			block = __gpu_buddy_alloc_blocks(mm, start,
1367 							 end,
1368 							 size,
1369 							 min_block_size,
1370 							 order,
1371 							 flags);
1372 			if (!IS_ERR(block))
1373 				break;
1374 
1375 			if (size < min_block_size) {
1376 				fallback_order = order;
1377 			} else if (order == min_order) {
1378 				fallback_order = min_order;
1379 			} else {
1380 				order--;
1381 				continue;
1382 			}
1383 
1384 			/* Try allocation through force merge method */
1385 			if (mm->clear_avail &&
1386 			    !__force_merge(mm, start, end, fallback_order)) {
1387 				block = __gpu_buddy_alloc_blocks(mm, start,
1388 								 end,
1389 								 size,
1390 								 min_block_size,
1391 								 fallback_order,
1392 								 flags);
1393 				if (!IS_ERR(block)) {
1394 					order = fallback_order;
1395 					break;
1396 				}
1397 			}
1398 
1399 			/*
1400 			 * Try contiguous block allocation through
1401 			 * try harder method.
1402 			 */
1403 			if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
1404 			    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
1405 				return __alloc_contig_try_harder(mm,
1406 								 original_size,
1407 								 original_min_size,
1408 								 blocks);
1409 			err = -ENOSPC;
1410 			goto err_free;
1411 		} while (1);
1412 
1413 		mark_allocated(mm, block);
1414 		mm->avail -= gpu_buddy_block_size(mm, block);
1415 		if (gpu_buddy_block_is_clear(block))
1416 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
1417 		kmemleak_update_trace(block);
1418 		list_add_tail(&block->link, &allocated);
1419 
1420 		pages -= BIT(order);
1421 
1422 		if (!pages)
1423 			break;
1424 	} while (1);
1425 
1426 	/* Trim the allocated block to the required size */
1427 	if (!(flags & GPU_BUDDY_TRIM_DISABLE) &&
1428 	    original_size != size) {
1429 		struct list_head *trim_list;
1430 		LIST_HEAD(temp);
1431 		u64 trim_size;
1432 
1433 		trim_list = &allocated;
1434 		trim_size = original_size;
1435 
1436 		if (!list_is_singular(&allocated)) {
1437 			block = list_last_entry(&allocated, typeof(*block), link);
1438 			list_move(&block->link, &temp);
1439 			trim_list = &temp;
1440 			trim_size = gpu_buddy_block_size(mm, block) -
1441 				(size - original_size);
1442 		}
1443 
1444 		gpu_buddy_block_trim(mm,
1445 				     NULL,
1446 				     trim_size,
1447 				     trim_list);
1448 
1449 		if (!list_empty(&temp))
1450 			list_splice_tail(trim_list, &allocated);
1451 	}
1452 
1453 	list_splice_tail(&allocated, blocks);
1454 	return 0;
1455 
1456 err_free:
1457 	gpu_buddy_free_list_internal(mm, &allocated);
1458 	return err;
1459 }
1460 EXPORT_SYMBOL(gpu_buddy_alloc_blocks);
1461 
1462 /**
1463  * gpu_buddy_block_print - print block information
1464  *
1465  * @mm: GPU buddy manager
1466  * @block: GPU buddy block
1467  */
1468 void gpu_buddy_block_print(struct gpu_buddy *mm,
1469 			   struct gpu_buddy_block *block)
1470 {
1471 	u64 start = gpu_buddy_block_offset(block);
1472 	u64 size = gpu_buddy_block_size(mm, block);
1473 
1474 	pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size);
1475 }
1476 EXPORT_SYMBOL(gpu_buddy_block_print);
1477 
1478 /**
1479  * gpu_buddy_print - print allocator state
1480  *
1481  * @mm: GPU buddy manager
1482  * @p: GPU printer to use
1483  */
1484 void gpu_buddy_print(struct gpu_buddy *mm)
1485 {
1486 	int order;
1487 
1488 	gpu_buddy_driver_lock_held(mm);
1489 	pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
1490 		mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
1491 
1492 	for (order = mm->max_order; order >= 0; order--) {
1493 		struct gpu_buddy_block *block, *tmp;
1494 		struct rb_root *root;
1495 		u64 count = 0, free;
1496 		unsigned int tree;
1497 
1498 		for_each_free_tree(tree) {
1499 			root = &mm->free_trees[tree][order];
1500 
1501 			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
1502 				BUG_ON(!gpu_buddy_block_is_free(block));
1503 				count++;
1504 			}
1505 		}
1506 
1507 		free = count * (mm->chunk_size << order);
1508 		if (free < SZ_1M)
1509 			pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
1510 				order, free >> 10, count);
1511 		else
1512 			pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
1513 				order, free >> 20, count);
1514 	}
1515 }
1516 EXPORT_SYMBOL(gpu_buddy_print);
1517 
1518 static void gpu_buddy_module_exit(void)
1519 {
1520 	kmem_cache_destroy(slab_blocks);
1521 }
1522 
1523 static int __init gpu_buddy_module_init(void)
1524 {
1525 	slab_blocks = KMEM_CACHE(gpu_buddy_block, 0);
1526 	if (!slab_blocks)
1527 		return -ENOMEM;
1528 
1529 	return 0;
1530 }
1531 
1532 module_init(gpu_buddy_module_init);
1533 module_exit(gpu_buddy_module_exit);
1534 
1535 MODULE_DESCRIPTION("GPU Buddy Allocator");
1536 MODULE_LICENSE("Dual MIT/GPL");
1537