xref: /linux/drivers/gpu/buddy.c (revision 40286d6379aacfcc053253ef78dc78b09addffda)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include <linux/bug.h>
7 #include <linux/export.h>
8 #include <linux/kmemleak.h>
9 #include <linux/module.h>
10 #include <linux/sizes.h>
11 
12 #include <linux/gpu_buddy.h>
13 
14 /**
15  * gpu_buddy_assert - assert a condition in the buddy allocator
16  * @condition: condition expected to be true
17  *
18  * When CONFIG_KUNIT is enabled, evaluates @condition and, if false, triggers
19  * a WARN_ON() and also calls kunit_fail_current_test() so that any running
20  * kunit test is properly marked as failed. The stringified condition is
21  * included in the failure message for easy identification.
22  *
23  * When CONFIG_KUNIT is not enabled, this reduces to WARN_ON() so production
24  * builds retain the same warning semantics as before.
25  */
26 #if IS_ENABLED(CONFIG_KUNIT)
27 #include <kunit/test-bug.h>
28 #define gpu_buddy_assert(condition) do {						\
29 	if (WARN_ON(!(condition)))						\
30 		kunit_fail_current_test("gpu_buddy_assert(" #condition ")");	\
31 } while (0)
32 #else
33 #define gpu_buddy_assert(condition) WARN_ON(!(condition))
34 #endif
35 
36 static struct kmem_cache *slab_blocks;
37 
38 static unsigned int
39 gpu_buddy_block_state(struct gpu_buddy_block *block)
40 {
41 	return block->header & GPU_BUDDY_HEADER_STATE;
42 }
43 
44 static bool
45 gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
46 {
47 	return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
48 }
49 
50 static bool
51 gpu_buddy_block_is_split(struct gpu_buddy_block *block)
52 {
53 	return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
54 }
55 
56 static unsigned int gpu_buddy_block_offset_alignment(struct gpu_buddy_block *block)
57 {
58 	u64 offset = gpu_buddy_block_offset(block);
59 
60 	if (!offset)
61 		/*
62 		 * __ffs64(0) is undefined; offset 0 is maximally aligned, so return
63 		 * a value greater than any possible alignment.
64 		 */
65 		return 64 + 1;
66 
67 	return __ffs64(offset);
68 }
69 
70 RB_DECLARE_CALLBACKS_MAX(static, gpu_buddy_augment_cb,
71 			 struct gpu_buddy_block, rb,
72 			 unsigned int, subtree_max_alignment,
73 			 gpu_buddy_block_offset_alignment);
74 
75 static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
76 					       struct gpu_buddy_block *parent,
77 					       unsigned int order,
78 					       u64 offset)
79 {
80 	struct gpu_buddy_block *block;
81 
82 	BUG_ON(order > GPU_BUDDY_MAX_ORDER);
83 
84 	block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
85 	if (!block)
86 		return NULL;
87 
88 	block->header = offset;
89 	block->header |= order;
90 	block->parent = parent;
91 
92 	RB_CLEAR_NODE(&block->rb);
93 
94 	BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED);
95 	return block;
96 }
97 
98 static void gpu_block_free(struct gpu_buddy *mm,
99 			   struct gpu_buddy_block *block)
100 {
101 	kmem_cache_free(slab_blocks, block);
102 }
103 
104 static enum gpu_buddy_free_tree
105 get_block_tree(struct gpu_buddy_block *block)
106 {
107 	return gpu_buddy_block_is_clear(block) ?
108 	       GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
109 }
110 
111 static struct gpu_buddy_block *
112 rbtree_get_free_block(const struct rb_node *node)
113 {
114 	return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL;
115 }
116 
117 static struct gpu_buddy_block *
118 rbtree_last_free_block(struct rb_root *root)
119 {
120 	return rbtree_get_free_block(rb_last(root));
121 }
122 
123 static bool rbtree_is_empty(struct rb_root *root)
124 {
125 	return RB_EMPTY_ROOT(root);
126 }
127 
128 static void rbtree_insert(struct gpu_buddy *mm,
129 			  struct gpu_buddy_block *block,
130 			  enum gpu_buddy_free_tree tree)
131 {
132 	struct rb_node **link, *parent = NULL;
133 	unsigned int block_alignment, order;
134 	struct gpu_buddy_block *node;
135 	struct rb_root *root;
136 
137 	order = gpu_buddy_block_order(block);
138 	block_alignment = gpu_buddy_block_offset_alignment(block);
139 
140 	root = &mm->free_trees[tree][order];
141 	link = &root->rb_node;
142 
143 	while (*link) {
144 		parent = *link;
145 		node = rbtree_get_free_block(parent);
146 		/*
147 		 * Manual augmentation update during insertion traversal. Required
148 		 * because rb_insert_augmented() only calls rotate callback during
149 		 * rotations. This ensures all ancestors on the insertion path have
150 		 * correct subtree_max_alignment values.
151 		 */
152 		if (node->subtree_max_alignment < block_alignment)
153 			node->subtree_max_alignment = block_alignment;
154 
155 		if (gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node))
156 			link = &parent->rb_left;
157 		else
158 			link = &parent->rb_right;
159 	}
160 
161 	block->subtree_max_alignment = block_alignment;
162 	rb_link_node(&block->rb, parent, link);
163 	rb_insert_augmented(&block->rb, root, &gpu_buddy_augment_cb);
164 }
165 
166 static void rbtree_remove(struct gpu_buddy *mm,
167 			  struct gpu_buddy_block *block)
168 {
169 	unsigned int order = gpu_buddy_block_order(block);
170 	enum gpu_buddy_free_tree tree;
171 	struct rb_root *root;
172 
173 	tree = get_block_tree(block);
174 	root = &mm->free_trees[tree][order];
175 
176 	rb_erase_augmented(&block->rb, root, &gpu_buddy_augment_cb);
177 	RB_CLEAR_NODE(&block->rb);
178 }
179 
180 static void clear_reset(struct gpu_buddy_block *block)
181 {
182 	block->header &= ~GPU_BUDDY_HEADER_CLEAR;
183 }
184 
185 static void mark_cleared(struct gpu_buddy_block *block)
186 {
187 	block->header |= GPU_BUDDY_HEADER_CLEAR;
188 }
189 
190 static void mark_allocated(struct gpu_buddy *mm,
191 			   struct gpu_buddy_block *block)
192 {
193 	block->header &= ~GPU_BUDDY_HEADER_STATE;
194 	block->header |= GPU_BUDDY_ALLOCATED;
195 
196 	rbtree_remove(mm, block);
197 }
198 
199 static void mark_free(struct gpu_buddy *mm,
200 		      struct gpu_buddy_block *block)
201 {
202 	enum gpu_buddy_free_tree tree;
203 
204 	block->header &= ~GPU_BUDDY_HEADER_STATE;
205 	block->header |= GPU_BUDDY_FREE;
206 
207 	tree = get_block_tree(block);
208 	rbtree_insert(mm, block, tree);
209 }
210 
211 static void mark_split(struct gpu_buddy *mm,
212 		       struct gpu_buddy_block *block)
213 {
214 	block->header &= ~GPU_BUDDY_HEADER_STATE;
215 	block->header |= GPU_BUDDY_SPLIT;
216 
217 	rbtree_remove(mm, block);
218 }
219 
220 static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
221 {
222 	return s1 <= e2 && e1 >= s2;
223 }
224 
225 static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
226 {
227 	return s1 <= s2 && e1 >= e2;
228 }
229 
230 static struct gpu_buddy_block *
231 __get_buddy(struct gpu_buddy_block *block)
232 {
233 	struct gpu_buddy_block *parent;
234 
235 	parent = block->parent;
236 	if (!parent)
237 		return NULL;
238 
239 	if (parent->left == block)
240 		return parent->right;
241 
242 	return parent->left;
243 }
244 
245 static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
246 				     struct gpu_buddy_block *block,
247 				     bool force_merge)
248 {
249 	struct gpu_buddy_block *parent;
250 	unsigned int order;
251 
252 	while ((parent = block->parent)) {
253 		struct gpu_buddy_block *buddy;
254 
255 		buddy = __get_buddy(block);
256 
257 		if (!gpu_buddy_block_is_free(buddy))
258 			break;
259 
260 		if (!force_merge) {
261 			/*
262 			 * Check the block and its buddy clear state and exit
263 			 * the loop if they both have the dissimilar state.
264 			 */
265 			if (gpu_buddy_block_is_clear(block) !=
266 			    gpu_buddy_block_is_clear(buddy))
267 				break;
268 
269 			if (gpu_buddy_block_is_clear(block))
270 				mark_cleared(parent);
271 		}
272 
273 		rbtree_remove(mm, buddy);
274 		if (force_merge && gpu_buddy_block_is_clear(buddy))
275 			mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
276 
277 		gpu_block_free(mm, block);
278 		gpu_block_free(mm, buddy);
279 
280 		block = parent;
281 	}
282 
283 	order = gpu_buddy_block_order(block);
284 	mark_free(mm, block);
285 
286 	return order;
287 }
288 
289 static int __force_merge(struct gpu_buddy *mm,
290 			 u64 start,
291 			 u64 end,
292 			 unsigned int min_order)
293 {
294 	unsigned int tree, order;
295 	int i;
296 
297 	if (!min_order)
298 		return -ENOMEM;
299 
300 	if (min_order > mm->max_order)
301 		return -EINVAL;
302 
303 	for_each_free_tree(tree) {
304 		for (i = min_order - 1; i >= 0; i--) {
305 			struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
306 
307 			while (iter) {
308 				struct gpu_buddy_block *block, *buddy;
309 				u64 block_start, block_end;
310 
311 				block = rbtree_get_free_block(iter);
312 				iter = rb_prev(iter);
313 
314 				if (!block || !block->parent)
315 					continue;
316 
317 				block_start = gpu_buddy_block_offset(block);
318 				block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
319 
320 				if (!contains(start, end, block_start, block_end))
321 					continue;
322 
323 				buddy = __get_buddy(block);
324 				if (!gpu_buddy_block_is_free(buddy))
325 					continue;
326 
327 				gpu_buddy_assert(gpu_buddy_block_is_clear(block) !=
328 						 gpu_buddy_block_is_clear(buddy));
329 
330 				/*
331 				 * Advance to the next node when the current node is the buddy,
332 				 * as freeing the block will also remove its buddy from the tree.
333 				 */
334 				if (iter == &buddy->rb)
335 					iter = rb_prev(iter);
336 
337 				rbtree_remove(mm, block);
338 				if (gpu_buddy_block_is_clear(block))
339 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
340 
341 				order = __gpu_buddy_free(mm, block, true);
342 				if (order >= min_order)
343 					return 0;
344 			}
345 		}
346 	}
347 
348 	return -ENOMEM;
349 }
350 
351 /**
352  * gpu_buddy_init - init memory manager
353  *
354  * @mm: GPU buddy manager to initialize
355  * @size: size in bytes to manage
356  * @chunk_size: minimum page size in bytes for our allocations
357  *
358  * Initializes the memory manager and its resources.
359  *
360  * Returns:
361  * 0 on success, error code on failure.
362  */
363 int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
364 {
365 	unsigned int i, j, root_count = 0;
366 	u64 offset = 0;
367 
368 	if (size < chunk_size)
369 		return -EINVAL;
370 
371 	if (chunk_size < SZ_4K)
372 		return -EINVAL;
373 
374 	if (!is_power_of_2(chunk_size))
375 		return -EINVAL;
376 
377 	size = round_down(size, chunk_size);
378 
379 	mm->size = size;
380 	mm->avail = size;
381 	mm->clear_avail = 0;
382 	mm->chunk_size = chunk_size;
383 	mm->max_order = ilog2(size) - ilog2(chunk_size);
384 
385 	BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
386 
387 	mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
388 				       sizeof(*mm->free_trees),
389 				       GFP_KERNEL);
390 	if (!mm->free_trees)
391 		return -ENOMEM;
392 
393 	for_each_free_tree(i) {
394 		mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
395 						  sizeof(struct rb_root),
396 						  GFP_KERNEL);
397 		if (!mm->free_trees[i])
398 			goto out_free_tree;
399 
400 		for (j = 0; j <= mm->max_order; ++j)
401 			mm->free_trees[i][j] = RB_ROOT;
402 	}
403 
404 	mm->n_roots = hweight64(size);
405 
406 	mm->roots = kmalloc_array(mm->n_roots,
407 				  sizeof(struct gpu_buddy_block *),
408 				  GFP_KERNEL);
409 	if (!mm->roots)
410 		goto out_free_tree;
411 
412 	/*
413 	 * Split into power-of-two blocks, in case we are given a size that is
414 	 * not itself a power-of-two.
415 	 */
416 	do {
417 		struct gpu_buddy_block *root;
418 		unsigned int order;
419 		u64 root_size;
420 
421 		order = ilog2(size) - ilog2(chunk_size);
422 		root_size = chunk_size << order;
423 
424 		root = gpu_block_alloc(mm, NULL, order, offset);
425 		if (!root)
426 			goto out_free_roots;
427 
428 		mark_free(mm, root);
429 
430 		BUG_ON(root_count > mm->max_order);
431 		BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size);
432 
433 		mm->roots[root_count] = root;
434 
435 		offset += root_size;
436 		size -= root_size;
437 		root_count++;
438 	} while (size);
439 
440 	return 0;
441 
442 out_free_roots:
443 	while (root_count--)
444 		gpu_block_free(mm, mm->roots[root_count]);
445 	kfree(mm->roots);
446 out_free_tree:
447 	while (i--)
448 		kfree(mm->free_trees[i]);
449 	kfree(mm->free_trees);
450 	return -ENOMEM;
451 }
452 EXPORT_SYMBOL(gpu_buddy_init);
453 
454 /**
455  * gpu_buddy_fini - tear down the memory manager
456  *
457  * @mm: GPU buddy manager to free
458  *
459  * Cleanup memory manager resources and the freetree
460  */
461 void gpu_buddy_fini(struct gpu_buddy *mm)
462 {
463 	u64 root_size, size, start;
464 	unsigned int order;
465 	int i;
466 
467 	size = mm->size;
468 
469 	for (i = 0; i < mm->n_roots; ++i) {
470 		order = ilog2(size) - ilog2(mm->chunk_size);
471 		start = gpu_buddy_block_offset(mm->roots[i]);
472 		__force_merge(mm, start, start + size, order);
473 
474 		gpu_buddy_assert(gpu_buddy_block_is_free(mm->roots[i]));
475 
476 		gpu_block_free(mm, mm->roots[i]);
477 
478 		root_size = mm->chunk_size << order;
479 		size -= root_size;
480 	}
481 
482 	gpu_buddy_assert(mm->avail == mm->size);
483 
484 	for_each_free_tree(i)
485 		kfree(mm->free_trees[i]);
486 	kfree(mm->free_trees);
487 	kfree(mm->roots);
488 }
489 EXPORT_SYMBOL(gpu_buddy_fini);
490 
491 static int split_block(struct gpu_buddy *mm,
492 		       struct gpu_buddy_block *block)
493 {
494 	unsigned int block_order = gpu_buddy_block_order(block) - 1;
495 	u64 offset = gpu_buddy_block_offset(block);
496 
497 	BUG_ON(!gpu_buddy_block_is_free(block));
498 	BUG_ON(!gpu_buddy_block_order(block));
499 
500 	block->left = gpu_block_alloc(mm, block, block_order, offset);
501 	if (!block->left)
502 		return -ENOMEM;
503 
504 	block->right = gpu_block_alloc(mm, block, block_order,
505 				       offset + (mm->chunk_size << block_order));
506 	if (!block->right) {
507 		gpu_block_free(mm, block->left);
508 		return -ENOMEM;
509 	}
510 
511 	mark_split(mm, block);
512 
513 	if (gpu_buddy_block_is_clear(block)) {
514 		mark_cleared(block->left);
515 		mark_cleared(block->right);
516 		clear_reset(block);
517 	}
518 
519 	mark_free(mm, block->left);
520 	mark_free(mm, block->right);
521 
522 	return 0;
523 }
524 
525 /**
526  * gpu_buddy_reset_clear - reset blocks clear state
527  *
528  * @mm: GPU buddy manager
529  * @is_clear: blocks clear state
530  *
531  * Reset the clear state based on @is_clear value for each block
532  * in the freetree.
533  */
534 void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear)
535 {
536 	enum gpu_buddy_free_tree src_tree, dst_tree;
537 	u64 root_size, size, start;
538 	unsigned int order;
539 	int i;
540 
541 	size = mm->size;
542 	for (i = 0; i < mm->n_roots; ++i) {
543 		order = ilog2(size) - ilog2(mm->chunk_size);
544 		start = gpu_buddy_block_offset(mm->roots[i]);
545 		__force_merge(mm, start, start + size, order);
546 
547 		root_size = mm->chunk_size << order;
548 		size -= root_size;
549 	}
550 
551 	src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
552 	dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
553 
554 	for (i = 0; i <= mm->max_order; ++i) {
555 		struct rb_root *root = &mm->free_trees[src_tree][i];
556 		struct gpu_buddy_block *block, *tmp;
557 
558 		rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
559 			rbtree_remove(mm, block);
560 			if (is_clear) {
561 				mark_cleared(block);
562 				mm->clear_avail += gpu_buddy_block_size(mm, block);
563 			} else {
564 				clear_reset(block);
565 				mm->clear_avail -= gpu_buddy_block_size(mm, block);
566 			}
567 
568 			rbtree_insert(mm, block, dst_tree);
569 		}
570 	}
571 }
572 EXPORT_SYMBOL(gpu_buddy_reset_clear);
573 
574 /**
575  * gpu_buddy_free_block - free a block
576  *
577  * @mm: GPU buddy manager
578  * @block: block to be freed
579  */
580 void gpu_buddy_free_block(struct gpu_buddy *mm,
581 			  struct gpu_buddy_block *block)
582 {
583 	BUG_ON(!gpu_buddy_block_is_allocated(block));
584 	mm->avail += gpu_buddy_block_size(mm, block);
585 	if (gpu_buddy_block_is_clear(block))
586 		mm->clear_avail += gpu_buddy_block_size(mm, block);
587 
588 	__gpu_buddy_free(mm, block, false);
589 }
590 EXPORT_SYMBOL(gpu_buddy_free_block);
591 
592 static void __gpu_buddy_free_list(struct gpu_buddy *mm,
593 				  struct list_head *objects,
594 				  bool mark_clear,
595 				  bool mark_dirty)
596 {
597 	struct gpu_buddy_block *block, *on;
598 
599 	gpu_buddy_assert(!(mark_dirty && mark_clear));
600 
601 	list_for_each_entry_safe(block, on, objects, link) {
602 		if (mark_clear)
603 			mark_cleared(block);
604 		else if (mark_dirty)
605 			clear_reset(block);
606 		gpu_buddy_free_block(mm, block);
607 		cond_resched();
608 	}
609 	INIT_LIST_HEAD(objects);
610 }
611 
612 static void gpu_buddy_free_list_internal(struct gpu_buddy *mm,
613 					 struct list_head *objects)
614 {
615 	/*
616 	 * Don't touch the clear/dirty bit, since allocation is still internal
617 	 * at this point. For example we might have just failed part of the
618 	 * allocation.
619 	 */
620 	__gpu_buddy_free_list(mm, objects, false, false);
621 }
622 
623 /**
624  * gpu_buddy_free_list - free blocks
625  *
626  * @mm: GPU buddy manager
627  * @objects: input list head to free blocks
628  * @flags: optional flags like GPU_BUDDY_CLEARED
629  */
630 void gpu_buddy_free_list(struct gpu_buddy *mm,
631 			 struct list_head *objects,
632 			 unsigned int flags)
633 {
634 	bool mark_clear = flags & GPU_BUDDY_CLEARED;
635 
636 	__gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear);
637 }
638 EXPORT_SYMBOL(gpu_buddy_free_list);
639 
640 static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags)
641 {
642 	bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION;
643 
644 	return needs_clear != gpu_buddy_block_is_clear(block);
645 }
646 
647 static struct gpu_buddy_block *
648 __alloc_range_bias(struct gpu_buddy *mm,
649 		   u64 start, u64 end,
650 		   unsigned int order,
651 		   unsigned long flags,
652 		   bool fallback)
653 {
654 	u64 req_size = mm->chunk_size << order;
655 	struct gpu_buddy_block *block;
656 	struct gpu_buddy_block *buddy;
657 	LIST_HEAD(dfs);
658 	int err;
659 	int i;
660 
661 	end = end - 1;
662 
663 	for (i = 0; i < mm->n_roots; ++i)
664 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
665 
666 	do {
667 		u64 block_start;
668 		u64 block_end;
669 
670 		block = list_first_entry_or_null(&dfs,
671 						 struct gpu_buddy_block,
672 						 tmp_link);
673 		if (!block)
674 			break;
675 
676 		list_del(&block->tmp_link);
677 
678 		if (gpu_buddy_block_order(block) < order)
679 			continue;
680 
681 		block_start = gpu_buddy_block_offset(block);
682 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
683 
684 		if (!overlaps(start, end, block_start, block_end))
685 			continue;
686 
687 		if (gpu_buddy_block_is_allocated(block))
688 			continue;
689 
690 		if (block_start < start || block_end > end) {
691 			u64 adjusted_start = max(block_start, start);
692 			u64 adjusted_end = min(block_end, end);
693 
694 			if (round_down(adjusted_end + 1, req_size) <=
695 			    round_up(adjusted_start, req_size))
696 				continue;
697 		}
698 
699 		if (!fallback && block_incompatible(block, flags))
700 			continue;
701 
702 		if (contains(start, end, block_start, block_end) &&
703 		    order == gpu_buddy_block_order(block)) {
704 			/*
705 			 * Find the free block within the range.
706 			 */
707 			if (gpu_buddy_block_is_free(block))
708 				return block;
709 
710 			continue;
711 		}
712 
713 		if (!gpu_buddy_block_is_split(block)) {
714 			err = split_block(mm, block);
715 			if (unlikely(err))
716 				goto err_undo;
717 		}
718 
719 		list_add(&block->right->tmp_link, &dfs);
720 		list_add(&block->left->tmp_link, &dfs);
721 	} while (1);
722 
723 	return ERR_PTR(-ENOSPC);
724 
725 err_undo:
726 	/*
727 	 * We really don't want to leave around a bunch of split blocks, since
728 	 * bigger is better, so make sure we merge everything back before we
729 	 * free the allocated blocks.
730 	 */
731 	buddy = __get_buddy(block);
732 	if (buddy &&
733 	    (gpu_buddy_block_is_free(block) &&
734 	     gpu_buddy_block_is_free(buddy)))
735 		__gpu_buddy_free(mm, block, false);
736 	return ERR_PTR(err);
737 }
738 
739 static struct gpu_buddy_block *
740 __gpu_buddy_alloc_range_bias(struct gpu_buddy *mm,
741 			     u64 start, u64 end,
742 			     unsigned int order,
743 			     unsigned long flags)
744 {
745 	struct gpu_buddy_block *block;
746 	bool fallback = false;
747 
748 	block = __alloc_range_bias(mm, start, end, order,
749 				   flags, fallback);
750 	if (IS_ERR(block))
751 		return __alloc_range_bias(mm, start, end, order,
752 					  flags, !fallback);
753 
754 	return block;
755 }
756 
757 static struct gpu_buddy_block *
758 get_maxblock(struct gpu_buddy *mm,
759 	     unsigned int order,
760 	     enum gpu_buddy_free_tree tree)
761 {
762 	struct gpu_buddy_block *max_block = NULL, *block = NULL;
763 	struct rb_root *root;
764 	unsigned int i;
765 
766 	for (i = order; i <= mm->max_order; ++i) {
767 		root = &mm->free_trees[tree][i];
768 		block = rbtree_last_free_block(root);
769 		if (!block)
770 			continue;
771 
772 		if (!max_block) {
773 			max_block = block;
774 			continue;
775 		}
776 
777 		if (gpu_buddy_block_offset(block) >
778 		    gpu_buddy_block_offset(max_block)) {
779 			max_block = block;
780 		}
781 	}
782 
783 	return max_block;
784 }
785 
786 static struct gpu_buddy_block *
787 alloc_from_freetree(struct gpu_buddy *mm,
788 		    unsigned int order,
789 		    unsigned long flags)
790 {
791 	struct gpu_buddy_block *block = NULL;
792 	struct rb_root *root;
793 	enum gpu_buddy_free_tree tree;
794 	unsigned int tmp;
795 	int err;
796 
797 	tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
798 		GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
799 
800 	if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) {
801 		block = get_maxblock(mm, order, tree);
802 		if (block)
803 			/* Store the obtained block order */
804 			tmp = gpu_buddy_block_order(block);
805 	} else {
806 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
807 			/* Get RB tree root for this order and tree */
808 			root = &mm->free_trees[tree][tmp];
809 			block = rbtree_last_free_block(root);
810 			if (block)
811 				break;
812 		}
813 	}
814 
815 	if (!block) {
816 		/* Try allocating from the other tree */
817 		tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
818 			GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
819 
820 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
821 			root = &mm->free_trees[tree][tmp];
822 			block = rbtree_last_free_block(root);
823 			if (block)
824 				break;
825 		}
826 
827 		if (!block)
828 			return ERR_PTR(-ENOSPC);
829 	}
830 
831 	BUG_ON(!gpu_buddy_block_is_free(block));
832 
833 	while (tmp != order) {
834 		err = split_block(mm, block);
835 		if (unlikely(err))
836 			goto err_undo;
837 
838 		block = block->right;
839 		tmp--;
840 	}
841 	return block;
842 
843 err_undo:
844 	if (tmp != order)
845 		__gpu_buddy_free(mm, block, false);
846 	return ERR_PTR(err);
847 }
848 
849 static bool
850 gpu_buddy_can_offset_align(u64 size, u64 min_block_size)
851 {
852 	return size < min_block_size && is_power_of_2(size);
853 }
854 
855 static bool gpu_buddy_subtree_can_satisfy(struct rb_node *node,
856 					  unsigned int alignment)
857 {
858 	struct gpu_buddy_block *block;
859 
860 	block = rbtree_get_free_block(node);
861 	return block->subtree_max_alignment >= alignment;
862 }
863 
864 static struct gpu_buddy_block *
865 gpu_buddy_find_block_aligned(struct gpu_buddy *mm,
866 			     enum gpu_buddy_free_tree tree,
867 			     unsigned int order,
868 			     unsigned int alignment,
869 			     unsigned long flags)
870 {
871 	struct rb_root *root = &mm->free_trees[tree][order];
872 	struct rb_node *rb = root->rb_node;
873 
874 	while (rb) {
875 		struct gpu_buddy_block *block = rbtree_get_free_block(rb);
876 		struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right;
877 
878 		if (right_node) {
879 			if (gpu_buddy_subtree_can_satisfy(right_node, alignment)) {
880 				rb = right_node;
881 				continue;
882 			}
883 		}
884 
885 		if (gpu_buddy_block_offset_alignment(block) >= alignment)
886 			return block;
887 
888 		if (left_node) {
889 			if (gpu_buddy_subtree_can_satisfy(left_node, alignment)) {
890 				rb = left_node;
891 				continue;
892 			}
893 		}
894 
895 		break;
896 	}
897 
898 	return NULL;
899 }
900 
901 static struct gpu_buddy_block *
902 gpu_buddy_offset_aligned_allocation(struct gpu_buddy *mm,
903 				    u64 size,
904 				    u64 min_block_size,
905 				    unsigned long flags)
906 {
907 	struct gpu_buddy_block *block = NULL;
908 	unsigned int order, tmp, alignment;
909 	struct gpu_buddy_block *buddy;
910 	enum gpu_buddy_free_tree tree;
911 	unsigned long pages;
912 	int err;
913 
914 	alignment = ilog2(min_block_size);
915 	pages = size >> ilog2(mm->chunk_size);
916 	order = fls(pages) - 1;
917 
918 	tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
919 		GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
920 
921 	for (tmp = order; tmp <= mm->max_order; ++tmp) {
922 		block = gpu_buddy_find_block_aligned(mm, tree, tmp,
923 						     alignment, flags);
924 		if (!block) {
925 			tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
926 				GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
927 			block = gpu_buddy_find_block_aligned(mm, tree, tmp,
928 							     alignment, flags);
929 		}
930 
931 		if (block)
932 			break;
933 	}
934 
935 	if (!block)
936 		return ERR_PTR(-ENOSPC);
937 
938 	while (gpu_buddy_block_order(block) > order) {
939 		struct gpu_buddy_block *left, *right;
940 
941 		err = split_block(mm, block);
942 		if (unlikely(err))
943 			goto err_undo;
944 
945 		left  = block->left;
946 		right = block->right;
947 
948 		if (gpu_buddy_block_offset_alignment(right) >= alignment)
949 			block = right;
950 		else
951 			block = left;
952 	}
953 
954 	return block;
955 
956 err_undo:
957 	/*
958 	 * We really don't want to leave around a bunch of split blocks, since
959 	 * bigger is better, so make sure we merge everything back before we
960 	 * free the allocated blocks.
961 	 */
962 	buddy = __get_buddy(block);
963 	if (buddy &&
964 	    (gpu_buddy_block_is_free(block) &&
965 	     gpu_buddy_block_is_free(buddy)))
966 		__gpu_buddy_free(mm, block, false);
967 	return ERR_PTR(err);
968 }
969 
970 static int __alloc_range(struct gpu_buddy *mm,
971 			 struct list_head *dfs,
972 			 u64 start, u64 size,
973 			 struct list_head *blocks,
974 			 u64 *total_allocated_on_err)
975 {
976 	struct gpu_buddy_block *block;
977 	struct gpu_buddy_block *buddy;
978 	u64 total_allocated = 0;
979 	LIST_HEAD(allocated);
980 	u64 end;
981 	int err;
982 
983 	end = start + size - 1;
984 
985 	do {
986 		u64 block_start;
987 		u64 block_end;
988 
989 		block = list_first_entry_or_null(dfs,
990 						 struct gpu_buddy_block,
991 						 tmp_link);
992 		if (!block)
993 			break;
994 
995 		list_del(&block->tmp_link);
996 
997 		block_start = gpu_buddy_block_offset(block);
998 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
999 
1000 		if (!overlaps(start, end, block_start, block_end))
1001 			continue;
1002 
1003 		if (gpu_buddy_block_is_allocated(block)) {
1004 			err = -ENOSPC;
1005 			goto err_free;
1006 		}
1007 
1008 		if (contains(start, end, block_start, block_end)) {
1009 			if (gpu_buddy_block_is_free(block)) {
1010 				mark_allocated(mm, block);
1011 				total_allocated += gpu_buddy_block_size(mm, block);
1012 				mm->avail -= gpu_buddy_block_size(mm, block);
1013 				if (gpu_buddy_block_is_clear(block))
1014 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
1015 				list_add_tail(&block->link, &allocated);
1016 				continue;
1017 			} else if (!mm->clear_avail) {
1018 				err = -ENOSPC;
1019 				goto err_free;
1020 			}
1021 		}
1022 
1023 		if (!gpu_buddy_block_is_split(block)) {
1024 			err = split_block(mm, block);
1025 			if (unlikely(err))
1026 				goto err_undo;
1027 		}
1028 
1029 		list_add(&block->right->tmp_link, dfs);
1030 		list_add(&block->left->tmp_link, dfs);
1031 	} while (1);
1032 
1033 	if (total_allocated < size) {
1034 		err = -ENOSPC;
1035 		goto err_free;
1036 	}
1037 
1038 	list_splice_tail(&allocated, blocks);
1039 
1040 	return 0;
1041 
1042 err_undo:
1043 	/*
1044 	 * We really don't want to leave around a bunch of split blocks, since
1045 	 * bigger is better, so make sure we merge everything back before we
1046 	 * free the allocated blocks.
1047 	 */
1048 	buddy = __get_buddy(block);
1049 	if (buddy &&
1050 	    (gpu_buddy_block_is_free(block) &&
1051 	     gpu_buddy_block_is_free(buddy)))
1052 		__gpu_buddy_free(mm, block, false);
1053 
1054 err_free:
1055 	if (err == -ENOSPC && total_allocated_on_err) {
1056 		list_splice_tail(&allocated, blocks);
1057 		*total_allocated_on_err = total_allocated;
1058 	} else {
1059 		gpu_buddy_free_list_internal(mm, &allocated);
1060 	}
1061 
1062 	return err;
1063 }
1064 
1065 static int __gpu_buddy_alloc_range(struct gpu_buddy *mm,
1066 				   u64 start,
1067 				   u64 size,
1068 				   u64 *total_allocated_on_err,
1069 				   struct list_head *blocks)
1070 {
1071 	LIST_HEAD(dfs);
1072 	int i;
1073 
1074 	for (i = 0; i < mm->n_roots; ++i)
1075 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
1076 
1077 	return __alloc_range(mm, &dfs, start, size,
1078 			     blocks, total_allocated_on_err);
1079 }
1080 
1081 static int __alloc_contig_try_harder(struct gpu_buddy *mm,
1082 				     u64 size,
1083 				     u64 min_block_size,
1084 				     struct list_head *blocks)
1085 {
1086 	u64 rhs_offset, lhs_offset, lhs_size, filled;
1087 	struct gpu_buddy_block *block;
1088 	unsigned int tree, order;
1089 	LIST_HEAD(blocks_lhs);
1090 	unsigned long pages;
1091 	u64 modify_size;
1092 	int err;
1093 
1094 	modify_size = rounddown_pow_of_two(size);
1095 	pages = modify_size >> ilog2(mm->chunk_size);
1096 	order = fls(pages) - 1;
1097 	if (order == 0)
1098 		return -ENOSPC;
1099 
1100 	for_each_free_tree(tree) {
1101 		struct rb_root *root;
1102 		struct rb_node *iter;
1103 
1104 		root = &mm->free_trees[tree][order];
1105 		if (rbtree_is_empty(root))
1106 			continue;
1107 
1108 		iter = rb_last(root);
1109 		while (iter) {
1110 			block = rbtree_get_free_block(iter);
1111 
1112 			/* Allocate blocks traversing RHS */
1113 			rhs_offset = gpu_buddy_block_offset(block);
1114 			err =  __gpu_buddy_alloc_range(mm, rhs_offset, size,
1115 						       &filled, blocks);
1116 			if (!err || err != -ENOSPC)
1117 				return err;
1118 
1119 			lhs_size = max((size - filled), min_block_size);
1120 			if (!IS_ALIGNED(lhs_size, min_block_size))
1121 				lhs_size = round_up(lhs_size, min_block_size);
1122 
1123 			/* Allocate blocks traversing LHS */
1124 			lhs_offset = gpu_buddy_block_offset(block) - lhs_size;
1125 			err =  __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size,
1126 						       NULL, &blocks_lhs);
1127 			if (!err) {
1128 				list_splice(&blocks_lhs, blocks);
1129 				return 0;
1130 			} else if (err != -ENOSPC) {
1131 				gpu_buddy_free_list_internal(mm, blocks);
1132 				return err;
1133 			}
1134 			/* Free blocks for the next iteration */
1135 			gpu_buddy_free_list_internal(mm, blocks);
1136 
1137 			iter = rb_prev(iter);
1138 		}
1139 	}
1140 
1141 	return -ENOSPC;
1142 }
1143 
1144 /**
1145  * gpu_buddy_block_trim - free unused pages
1146  *
1147  * @mm: GPU buddy manager
1148  * @start: start address to begin the trimming.
1149  * @new_size: original size requested
1150  * @blocks: Input and output list of allocated blocks.
1151  * MUST contain single block as input to be trimmed.
1152  * On success will contain the newly allocated blocks
1153  * making up the @new_size. Blocks always appear in
1154  * ascending order
1155  *
1156  * For contiguous allocation, we round up the size to the nearest
1157  * power of two value, drivers consume *actual* size, so remaining
1158  * portions are unused and can be optionally freed with this function
1159  *
1160  * Returns:
1161  * 0 on success, error code on failure.
1162  */
1163 int gpu_buddy_block_trim(struct gpu_buddy *mm,
1164 			 u64 *start,
1165 			 u64 new_size,
1166 			 struct list_head *blocks)
1167 {
1168 	struct gpu_buddy_block *parent;
1169 	struct gpu_buddy_block *block;
1170 	u64 block_start, block_end;
1171 	LIST_HEAD(dfs);
1172 	u64 new_start;
1173 	int err;
1174 
1175 	if (!list_is_singular(blocks))
1176 		return -EINVAL;
1177 
1178 	block = list_first_entry(blocks,
1179 				 struct gpu_buddy_block,
1180 				 link);
1181 
1182 	block_start = gpu_buddy_block_offset(block);
1183 	block_end = block_start + gpu_buddy_block_size(mm, block);
1184 
1185 	if (WARN_ON(!gpu_buddy_block_is_allocated(block)))
1186 		return -EINVAL;
1187 
1188 	if (new_size > gpu_buddy_block_size(mm, block))
1189 		return -EINVAL;
1190 
1191 	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
1192 		return -EINVAL;
1193 
1194 	if (new_size == gpu_buddy_block_size(mm, block))
1195 		return 0;
1196 
1197 	new_start = block_start;
1198 	if (start) {
1199 		new_start = *start;
1200 
1201 		if (new_start < block_start)
1202 			return -EINVAL;
1203 
1204 		if (!IS_ALIGNED(new_start, mm->chunk_size))
1205 			return -EINVAL;
1206 
1207 		if (range_overflows(new_start, new_size, block_end))
1208 			return -EINVAL;
1209 	}
1210 
1211 	list_del(&block->link);
1212 	mark_free(mm, block);
1213 	mm->avail += gpu_buddy_block_size(mm, block);
1214 	if (gpu_buddy_block_is_clear(block))
1215 		mm->clear_avail += gpu_buddy_block_size(mm, block);
1216 
1217 	/* Prevent recursively freeing this node */
1218 	parent = block->parent;
1219 	block->parent = NULL;
1220 
1221 	list_add(&block->tmp_link, &dfs);
1222 	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
1223 	if (err) {
1224 		mark_allocated(mm, block);
1225 		mm->avail -= gpu_buddy_block_size(mm, block);
1226 		if (gpu_buddy_block_is_clear(block))
1227 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
1228 		list_add(&block->link, blocks);
1229 	}
1230 
1231 	block->parent = parent;
1232 	return err;
1233 }
1234 EXPORT_SYMBOL(gpu_buddy_block_trim);
1235 
1236 static struct gpu_buddy_block *
1237 __gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
1238 			 u64 start, u64 end,
1239 			 u64 size, u64 min_block_size,
1240 			 unsigned int order,
1241 			 unsigned long flags)
1242 {
1243 	if (flags & GPU_BUDDY_RANGE_ALLOCATION)
1244 		/* Allocate traversing within the range */
1245 		return  __gpu_buddy_alloc_range_bias(mm, start, end,
1246 						     order, flags);
1247 	else if (size < min_block_size)
1248 		/* Allocate from an offset-aligned region without size rounding */
1249 		return gpu_buddy_offset_aligned_allocation(mm, size,
1250 							   min_block_size,
1251 							   flags);
1252 	else
1253 		/* Allocate from freetree */
1254 		return alloc_from_freetree(mm, order, flags);
1255 }
1256 
1257 /**
1258  * gpu_buddy_alloc_blocks - allocate power-of-two blocks
1259  *
1260  * @mm: GPU buddy manager to allocate from
1261  * @start: start of the allowed range for this block
1262  * @end: end of the allowed range for this block
1263  * @size: size of the allocation in bytes
1264  * @min_block_size: alignment of the allocation
1265  * @blocks: output list head to add allocated blocks
1266  * @flags: GPU_BUDDY_*_ALLOCATION flags
1267  *
1268  * alloc_range_bias() called on range limitations, which traverses
1269  * the tree and returns the desired block.
1270  *
1271  * alloc_from_freetree() called when *no* range restrictions
1272  * are enforced, which picks the block from the freetree.
1273  *
1274  * Returns:
1275  * 0 on success, error code on failure.
1276  */
1277 int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
1278 			   u64 start, u64 end, u64 size,
1279 			   u64 min_block_size,
1280 			   struct list_head *blocks,
1281 			   unsigned long flags)
1282 {
1283 	struct gpu_buddy_block *block = NULL;
1284 	u64 original_size, original_min_size;
1285 	unsigned int min_order, order;
1286 	LIST_HEAD(allocated);
1287 	unsigned long pages;
1288 	int err;
1289 
1290 	if (size < mm->chunk_size)
1291 		return -EINVAL;
1292 
1293 	if (min_block_size < mm->chunk_size)
1294 		return -EINVAL;
1295 
1296 	if (!is_power_of_2(min_block_size))
1297 		return -EINVAL;
1298 
1299 	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
1300 		return -EINVAL;
1301 
1302 	if (end > mm->size)
1303 		return -EINVAL;
1304 
1305 	if (range_overflows(start, size, mm->size))
1306 		return -EINVAL;
1307 
1308 	/* Actual range allocation */
1309 	if (start + size == end) {
1310 		if (!IS_ALIGNED(start | end, min_block_size))
1311 			return -EINVAL;
1312 
1313 		return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks);
1314 	}
1315 
1316 	original_size = size;
1317 	original_min_size = min_block_size;
1318 
1319 	/* Roundup the size to power of 2 */
1320 	if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) {
1321 		size = roundup_pow_of_two(size);
1322 		min_block_size = size;
1323 		/*
1324 		 * Normalize the requested size to min_block_size for regular allocations.
1325 		 * Offset-aligned allocations intentionally skip size rounding.
1326 		 */
1327 	} else if (!gpu_buddy_can_offset_align(size, min_block_size)) {
1328 		size = round_up(size, min_block_size);
1329 	}
1330 
1331 	pages = size >> ilog2(mm->chunk_size);
1332 	order = fls(pages) - 1;
1333 	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
1334 
1335 	if (order > mm->max_order || size > mm->size) {
1336 		if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) &&
1337 		    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
1338 			return __alloc_contig_try_harder(mm, original_size,
1339 							 original_min_size, blocks);
1340 
1341 		return -EINVAL;
1342 	}
1343 
1344 	do {
1345 		order = min(order, (unsigned int)fls(pages) - 1);
1346 		BUG_ON(order > mm->max_order);
1347 		/*
1348 		 * Regular allocations must not allocate blocks smaller than min_block_size.
1349 		 * Offset-aligned allocations deliberately bypass this constraint.
1350 		 */
1351 		BUG_ON(size >= min_block_size && order < min_order);
1352 
1353 		do {
1354 			unsigned int fallback_order;
1355 
1356 			block = __gpu_buddy_alloc_blocks(mm, start,
1357 							 end,
1358 							 size,
1359 							 min_block_size,
1360 							 order,
1361 							 flags);
1362 			if (!IS_ERR(block))
1363 				break;
1364 
1365 			if (size < min_block_size) {
1366 				fallback_order = order;
1367 			} else if (order == min_order) {
1368 				fallback_order = min_order;
1369 			} else {
1370 				order--;
1371 				continue;
1372 			}
1373 
1374 			/* Try allocation through force merge method */
1375 			if (mm->clear_avail &&
1376 			    !__force_merge(mm, start, end, fallback_order)) {
1377 				block = __gpu_buddy_alloc_blocks(mm, start,
1378 								 end,
1379 								 size,
1380 								 min_block_size,
1381 								 fallback_order,
1382 								 flags);
1383 				if (!IS_ERR(block)) {
1384 					order = fallback_order;
1385 					break;
1386 				}
1387 			}
1388 
1389 			/*
1390 			 * Try contiguous block allocation through
1391 			 * try harder method.
1392 			 */
1393 			if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
1394 			    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
1395 				return __alloc_contig_try_harder(mm,
1396 								 original_size,
1397 								 original_min_size,
1398 								 blocks);
1399 			err = -ENOSPC;
1400 			goto err_free;
1401 		} while (1);
1402 
1403 		mark_allocated(mm, block);
1404 		mm->avail -= gpu_buddy_block_size(mm, block);
1405 		if (gpu_buddy_block_is_clear(block))
1406 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
1407 		kmemleak_update_trace(block);
1408 		list_add_tail(&block->link, &allocated);
1409 
1410 		pages -= BIT(order);
1411 
1412 		if (!pages)
1413 			break;
1414 	} while (1);
1415 
1416 	/* Trim the allocated block to the required size */
1417 	if (!(flags & GPU_BUDDY_TRIM_DISABLE) &&
1418 	    original_size != size) {
1419 		struct list_head *trim_list;
1420 		LIST_HEAD(temp);
1421 		u64 trim_size;
1422 
1423 		trim_list = &allocated;
1424 		trim_size = original_size;
1425 
1426 		if (!list_is_singular(&allocated)) {
1427 			block = list_last_entry(&allocated, typeof(*block), link);
1428 			list_move(&block->link, &temp);
1429 			trim_list = &temp;
1430 			trim_size = gpu_buddy_block_size(mm, block) -
1431 				(size - original_size);
1432 		}
1433 
1434 		gpu_buddy_block_trim(mm,
1435 				     NULL,
1436 				     trim_size,
1437 				     trim_list);
1438 
1439 		if (!list_empty(&temp))
1440 			list_splice_tail(trim_list, &allocated);
1441 	}
1442 
1443 	list_splice_tail(&allocated, blocks);
1444 	return 0;
1445 
1446 err_free:
1447 	gpu_buddy_free_list_internal(mm, &allocated);
1448 	return err;
1449 }
1450 EXPORT_SYMBOL(gpu_buddy_alloc_blocks);
1451 
1452 /**
1453  * gpu_buddy_block_print - print block information
1454  *
1455  * @mm: GPU buddy manager
1456  * @block: GPU buddy block
1457  */
1458 void gpu_buddy_block_print(struct gpu_buddy *mm,
1459 			   struct gpu_buddy_block *block)
1460 {
1461 	u64 start = gpu_buddy_block_offset(block);
1462 	u64 size = gpu_buddy_block_size(mm, block);
1463 
1464 	pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size);
1465 }
1466 EXPORT_SYMBOL(gpu_buddy_block_print);
1467 
1468 /**
1469  * gpu_buddy_print - print allocator state
1470  *
1471  * @mm: GPU buddy manager
1472  * @p: GPU printer to use
1473  */
1474 void gpu_buddy_print(struct gpu_buddy *mm)
1475 {
1476 	int order;
1477 
1478 	pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
1479 		mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
1480 
1481 	for (order = mm->max_order; order >= 0; order--) {
1482 		struct gpu_buddy_block *block, *tmp;
1483 		struct rb_root *root;
1484 		u64 count = 0, free;
1485 		unsigned int tree;
1486 
1487 		for_each_free_tree(tree) {
1488 			root = &mm->free_trees[tree][order];
1489 
1490 			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
1491 				BUG_ON(!gpu_buddy_block_is_free(block));
1492 				count++;
1493 			}
1494 		}
1495 
1496 		free = count * (mm->chunk_size << order);
1497 		if (free < SZ_1M)
1498 			pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
1499 				order, free >> 10, count);
1500 		else
1501 			pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
1502 				order, free >> 20, count);
1503 	}
1504 }
1505 EXPORT_SYMBOL(gpu_buddy_print);
1506 
1507 static void gpu_buddy_module_exit(void)
1508 {
1509 	kmem_cache_destroy(slab_blocks);
1510 }
1511 
1512 static int __init gpu_buddy_module_init(void)
1513 {
1514 	slab_blocks = KMEM_CACHE(gpu_buddy_block, 0);
1515 	if (!slab_blocks)
1516 		return -ENOMEM;
1517 
1518 	return 0;
1519 }
1520 
1521 module_init(gpu_buddy_module_init);
1522 module_exit(gpu_buddy_module_exit);
1523 
1524 MODULE_DESCRIPTION("GPU Buddy Allocator");
1525 MODULE_LICENSE("Dual MIT/GPL");
1526