xref: /linux/drivers/gpu/buddy.c (revision ba110db8e1bc206c13fd7d985e79b033f53bfdea)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include <kunit/test-bug.h>
7 
8 #include <linux/export.h>
9 #include <linux/kmemleak.h>
10 #include <linux/module.h>
11 #include <linux/sizes.h>
12 
13 #include <linux/gpu_buddy.h>
14 
15 static struct kmem_cache *slab_blocks;
16 
17 static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
18 					       struct gpu_buddy_block *parent,
19 					       unsigned int order,
20 					       u64 offset)
21 {
22 	struct gpu_buddy_block *block;
23 
24 	BUG_ON(order > GPU_BUDDY_MAX_ORDER);
25 
26 	block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
27 	if (!block)
28 		return NULL;
29 
30 	block->header = offset;
31 	block->header |= order;
32 	block->parent = parent;
33 
34 	RB_CLEAR_NODE(&block->rb);
35 
36 	BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED);
37 	return block;
38 }
39 
40 static void gpu_block_free(struct gpu_buddy *mm,
41 			   struct gpu_buddy_block *block)
42 {
43 	kmem_cache_free(slab_blocks, block);
44 }
45 
46 static enum gpu_buddy_free_tree
47 get_block_tree(struct gpu_buddy_block *block)
48 {
49 	return gpu_buddy_block_is_clear(block) ?
50 	       GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
51 }
52 
53 static struct gpu_buddy_block *
54 rbtree_get_free_block(const struct rb_node *node)
55 {
56 	return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL;
57 }
58 
59 static struct gpu_buddy_block *
60 rbtree_last_free_block(struct rb_root *root)
61 {
62 	return rbtree_get_free_block(rb_last(root));
63 }
64 
65 static bool rbtree_is_empty(struct rb_root *root)
66 {
67 	return RB_EMPTY_ROOT(root);
68 }
69 
70 static bool gpu_buddy_block_offset_less(const struct gpu_buddy_block *block,
71 					const struct gpu_buddy_block *node)
72 {
73 	return gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node);
74 }
75 
76 static bool rbtree_block_offset_less(struct rb_node *block,
77 				     const struct rb_node *node)
78 {
79 	return gpu_buddy_block_offset_less(rbtree_get_free_block(block),
80 					   rbtree_get_free_block(node));
81 }
82 
83 static void rbtree_insert(struct gpu_buddy *mm,
84 			  struct gpu_buddy_block *block,
85 			  enum gpu_buddy_free_tree tree)
86 {
87 	rb_add(&block->rb,
88 	       &mm->free_trees[tree][gpu_buddy_block_order(block)],
89 	       rbtree_block_offset_less);
90 }
91 
92 static void rbtree_remove(struct gpu_buddy *mm,
93 			  struct gpu_buddy_block *block)
94 {
95 	unsigned int order = gpu_buddy_block_order(block);
96 	enum gpu_buddy_free_tree tree;
97 	struct rb_root *root;
98 
99 	tree = get_block_tree(block);
100 	root = &mm->free_trees[tree][order];
101 
102 	rb_erase(&block->rb, root);
103 	RB_CLEAR_NODE(&block->rb);
104 }
105 
106 static void clear_reset(struct gpu_buddy_block *block)
107 {
108 	block->header &= ~GPU_BUDDY_HEADER_CLEAR;
109 }
110 
111 static void mark_cleared(struct gpu_buddy_block *block)
112 {
113 	block->header |= GPU_BUDDY_HEADER_CLEAR;
114 }
115 
116 static void mark_allocated(struct gpu_buddy *mm,
117 			   struct gpu_buddy_block *block)
118 {
119 	block->header &= ~GPU_BUDDY_HEADER_STATE;
120 	block->header |= GPU_BUDDY_ALLOCATED;
121 
122 	rbtree_remove(mm, block);
123 }
124 
125 static void mark_free(struct gpu_buddy *mm,
126 		      struct gpu_buddy_block *block)
127 {
128 	enum gpu_buddy_free_tree tree;
129 
130 	block->header &= ~GPU_BUDDY_HEADER_STATE;
131 	block->header |= GPU_BUDDY_FREE;
132 
133 	tree = get_block_tree(block);
134 	rbtree_insert(mm, block, tree);
135 }
136 
137 static void mark_split(struct gpu_buddy *mm,
138 		       struct gpu_buddy_block *block)
139 {
140 	block->header &= ~GPU_BUDDY_HEADER_STATE;
141 	block->header |= GPU_BUDDY_SPLIT;
142 
143 	rbtree_remove(mm, block);
144 }
145 
146 static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
147 {
148 	return s1 <= e2 && e1 >= s2;
149 }
150 
151 static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
152 {
153 	return s1 <= s2 && e1 >= e2;
154 }
155 
156 static struct gpu_buddy_block *
157 __get_buddy(struct gpu_buddy_block *block)
158 {
159 	struct gpu_buddy_block *parent;
160 
161 	parent = block->parent;
162 	if (!parent)
163 		return NULL;
164 
165 	if (parent->left == block)
166 		return parent->right;
167 
168 	return parent->left;
169 }
170 
171 static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
172 				     struct gpu_buddy_block *block,
173 				     bool force_merge)
174 {
175 	struct gpu_buddy_block *parent;
176 	unsigned int order;
177 
178 	while ((parent = block->parent)) {
179 		struct gpu_buddy_block *buddy;
180 
181 		buddy = __get_buddy(block);
182 
183 		if (!gpu_buddy_block_is_free(buddy))
184 			break;
185 
186 		if (!force_merge) {
187 			/*
188 			 * Check the block and its buddy clear state and exit
189 			 * the loop if they both have the dissimilar state.
190 			 */
191 			if (gpu_buddy_block_is_clear(block) !=
192 			    gpu_buddy_block_is_clear(buddy))
193 				break;
194 
195 			if (gpu_buddy_block_is_clear(block))
196 				mark_cleared(parent);
197 		}
198 
199 		rbtree_remove(mm, buddy);
200 		if (force_merge && gpu_buddy_block_is_clear(buddy))
201 			mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
202 
203 		gpu_block_free(mm, block);
204 		gpu_block_free(mm, buddy);
205 
206 		block = parent;
207 	}
208 
209 	order = gpu_buddy_block_order(block);
210 	mark_free(mm, block);
211 
212 	return order;
213 }
214 
215 static int __force_merge(struct gpu_buddy *mm,
216 			 u64 start,
217 			 u64 end,
218 			 unsigned int min_order)
219 {
220 	unsigned int tree, order;
221 	int i;
222 
223 	if (!min_order)
224 		return -ENOMEM;
225 
226 	if (min_order > mm->max_order)
227 		return -EINVAL;
228 
229 	for_each_free_tree(tree) {
230 		for (i = min_order - 1; i >= 0; i--) {
231 			struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
232 
233 			while (iter) {
234 				struct gpu_buddy_block *block, *buddy;
235 				u64 block_start, block_end;
236 
237 				block = rbtree_get_free_block(iter);
238 				iter = rb_prev(iter);
239 
240 				if (!block || !block->parent)
241 					continue;
242 
243 				block_start = gpu_buddy_block_offset(block);
244 				block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
245 
246 				if (!contains(start, end, block_start, block_end))
247 					continue;
248 
249 				buddy = __get_buddy(block);
250 				if (!gpu_buddy_block_is_free(buddy))
251 					continue;
252 
253 				WARN_ON(gpu_buddy_block_is_clear(block) ==
254 					gpu_buddy_block_is_clear(buddy));
255 
256 				/*
257 				 * Advance to the next node when the current node is the buddy,
258 				 * as freeing the block will also remove its buddy from the tree.
259 				 */
260 				if (iter == &buddy->rb)
261 					iter = rb_prev(iter);
262 
263 				rbtree_remove(mm, block);
264 				if (gpu_buddy_block_is_clear(block))
265 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
266 
267 				order = __gpu_buddy_free(mm, block, true);
268 				if (order >= min_order)
269 					return 0;
270 			}
271 		}
272 	}
273 
274 	return -ENOMEM;
275 }
276 
277 /**
278  * gpu_buddy_init - init memory manager
279  *
280  * @mm: GPU buddy manager to initialize
281  * @size: size in bytes to manage
282  * @chunk_size: minimum page size in bytes for our allocations
283  *
284  * Initializes the memory manager and its resources.
285  *
286  * Returns:
287  * 0 on success, error code on failure.
288  */
289 int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
290 {
291 	unsigned int i, j, root_count = 0;
292 	u64 offset = 0;
293 
294 	if (size < chunk_size)
295 		return -EINVAL;
296 
297 	if (chunk_size < SZ_4K)
298 		return -EINVAL;
299 
300 	if (!is_power_of_2(chunk_size))
301 		return -EINVAL;
302 
303 	size = round_down(size, chunk_size);
304 
305 	mm->size = size;
306 	mm->avail = size;
307 	mm->clear_avail = 0;
308 	mm->chunk_size = chunk_size;
309 	mm->max_order = ilog2(size) - ilog2(chunk_size);
310 
311 	BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
312 
313 	mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
314 				       sizeof(*mm->free_trees),
315 				       GFP_KERNEL);
316 	if (!mm->free_trees)
317 		return -ENOMEM;
318 
319 	for_each_free_tree(i) {
320 		mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
321 						  sizeof(struct rb_root),
322 						  GFP_KERNEL);
323 		if (!mm->free_trees[i])
324 			goto out_free_tree;
325 
326 		for (j = 0; j <= mm->max_order; ++j)
327 			mm->free_trees[i][j] = RB_ROOT;
328 	}
329 
330 	mm->n_roots = hweight64(size);
331 
332 	mm->roots = kmalloc_array(mm->n_roots,
333 				  sizeof(struct gpu_buddy_block *),
334 				  GFP_KERNEL);
335 	if (!mm->roots)
336 		goto out_free_tree;
337 
338 	/*
339 	 * Split into power-of-two blocks, in case we are given a size that is
340 	 * not itself a power-of-two.
341 	 */
342 	do {
343 		struct gpu_buddy_block *root;
344 		unsigned int order;
345 		u64 root_size;
346 
347 		order = ilog2(size) - ilog2(chunk_size);
348 		root_size = chunk_size << order;
349 
350 		root = gpu_block_alloc(mm, NULL, order, offset);
351 		if (!root)
352 			goto out_free_roots;
353 
354 		mark_free(mm, root);
355 
356 		BUG_ON(root_count > mm->max_order);
357 		BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size);
358 
359 		mm->roots[root_count] = root;
360 
361 		offset += root_size;
362 		size -= root_size;
363 		root_count++;
364 	} while (size);
365 
366 	return 0;
367 
368 out_free_roots:
369 	while (root_count--)
370 		gpu_block_free(mm, mm->roots[root_count]);
371 	kfree(mm->roots);
372 out_free_tree:
373 	while (i--)
374 		kfree(mm->free_trees[i]);
375 	kfree(mm->free_trees);
376 	return -ENOMEM;
377 }
378 EXPORT_SYMBOL(gpu_buddy_init);
379 
380 /**
381  * gpu_buddy_fini - tear down the memory manager
382  *
383  * @mm: GPU buddy manager to free
384  *
385  * Cleanup memory manager resources and the freetree
386  */
387 void gpu_buddy_fini(struct gpu_buddy *mm)
388 {
389 	u64 root_size, size, start;
390 	unsigned int order;
391 	int i;
392 
393 	size = mm->size;
394 
395 	for (i = 0; i < mm->n_roots; ++i) {
396 		order = ilog2(size) - ilog2(mm->chunk_size);
397 		start = gpu_buddy_block_offset(mm->roots[i]);
398 		__force_merge(mm, start, start + size, order);
399 
400 		if (WARN_ON(!gpu_buddy_block_is_free(mm->roots[i])))
401 			kunit_fail_current_test("buddy_fini() root");
402 
403 		gpu_block_free(mm, mm->roots[i]);
404 
405 		root_size = mm->chunk_size << order;
406 		size -= root_size;
407 	}
408 
409 	WARN_ON(mm->avail != mm->size);
410 
411 	for_each_free_tree(i)
412 		kfree(mm->free_trees[i]);
413 	kfree(mm->free_trees);
414 	kfree(mm->roots);
415 }
416 EXPORT_SYMBOL(gpu_buddy_fini);
417 
418 static int split_block(struct gpu_buddy *mm,
419 		       struct gpu_buddy_block *block)
420 {
421 	unsigned int block_order = gpu_buddy_block_order(block) - 1;
422 	u64 offset = gpu_buddy_block_offset(block);
423 
424 	BUG_ON(!gpu_buddy_block_is_free(block));
425 	BUG_ON(!gpu_buddy_block_order(block));
426 
427 	block->left = gpu_block_alloc(mm, block, block_order, offset);
428 	if (!block->left)
429 		return -ENOMEM;
430 
431 	block->right = gpu_block_alloc(mm, block, block_order,
432 				       offset + (mm->chunk_size << block_order));
433 	if (!block->right) {
434 		gpu_block_free(mm, block->left);
435 		return -ENOMEM;
436 	}
437 
438 	mark_split(mm, block);
439 
440 	if (gpu_buddy_block_is_clear(block)) {
441 		mark_cleared(block->left);
442 		mark_cleared(block->right);
443 		clear_reset(block);
444 	}
445 
446 	mark_free(mm, block->left);
447 	mark_free(mm, block->right);
448 
449 	return 0;
450 }
451 
452 /**
453  * gpu_get_buddy - get buddy address
454  *
455  * @block: GPU buddy block
456  *
457  * Returns the corresponding buddy block for @block, or NULL
458  * if this is a root block and can't be merged further.
459  * Requires some kind of locking to protect against
460  * any concurrent allocate and free operations.
461  */
462 struct gpu_buddy_block *
463 gpu_get_buddy(struct gpu_buddy_block *block)
464 {
465 	return __get_buddy(block);
466 }
467 EXPORT_SYMBOL(gpu_get_buddy);
468 
469 /**
470  * gpu_buddy_reset_clear - reset blocks clear state
471  *
472  * @mm: GPU buddy manager
473  * @is_clear: blocks clear state
474  *
475  * Reset the clear state based on @is_clear value for each block
476  * in the freetree.
477  */
478 void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear)
479 {
480 	enum gpu_buddy_free_tree src_tree, dst_tree;
481 	u64 root_size, size, start;
482 	unsigned int order;
483 	int i;
484 
485 	size = mm->size;
486 	for (i = 0; i < mm->n_roots; ++i) {
487 		order = ilog2(size) - ilog2(mm->chunk_size);
488 		start = gpu_buddy_block_offset(mm->roots[i]);
489 		__force_merge(mm, start, start + size, order);
490 
491 		root_size = mm->chunk_size << order;
492 		size -= root_size;
493 	}
494 
495 	src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
496 	dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
497 
498 	for (i = 0; i <= mm->max_order; ++i) {
499 		struct rb_root *root = &mm->free_trees[src_tree][i];
500 		struct gpu_buddy_block *block, *tmp;
501 
502 		rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
503 			rbtree_remove(mm, block);
504 			if (is_clear) {
505 				mark_cleared(block);
506 				mm->clear_avail += gpu_buddy_block_size(mm, block);
507 			} else {
508 				clear_reset(block);
509 				mm->clear_avail -= gpu_buddy_block_size(mm, block);
510 			}
511 
512 			rbtree_insert(mm, block, dst_tree);
513 		}
514 	}
515 }
516 EXPORT_SYMBOL(gpu_buddy_reset_clear);
517 
518 /**
519  * gpu_buddy_free_block - free a block
520  *
521  * @mm: GPU buddy manager
522  * @block: block to be freed
523  */
524 void gpu_buddy_free_block(struct gpu_buddy *mm,
525 			  struct gpu_buddy_block *block)
526 {
527 	BUG_ON(!gpu_buddy_block_is_allocated(block));
528 	mm->avail += gpu_buddy_block_size(mm, block);
529 	if (gpu_buddy_block_is_clear(block))
530 		mm->clear_avail += gpu_buddy_block_size(mm, block);
531 
532 	__gpu_buddy_free(mm, block, false);
533 }
534 EXPORT_SYMBOL(gpu_buddy_free_block);
535 
536 static void __gpu_buddy_free_list(struct gpu_buddy *mm,
537 				  struct list_head *objects,
538 				  bool mark_clear,
539 				  bool mark_dirty)
540 {
541 	struct gpu_buddy_block *block, *on;
542 
543 	WARN_ON(mark_dirty && mark_clear);
544 
545 	list_for_each_entry_safe(block, on, objects, link) {
546 		if (mark_clear)
547 			mark_cleared(block);
548 		else if (mark_dirty)
549 			clear_reset(block);
550 		gpu_buddy_free_block(mm, block);
551 		cond_resched();
552 	}
553 	INIT_LIST_HEAD(objects);
554 }
555 
556 static void gpu_buddy_free_list_internal(struct gpu_buddy *mm,
557 					 struct list_head *objects)
558 {
559 	/*
560 	 * Don't touch the clear/dirty bit, since allocation is still internal
561 	 * at this point. For example we might have just failed part of the
562 	 * allocation.
563 	 */
564 	__gpu_buddy_free_list(mm, objects, false, false);
565 }
566 
567 /**
568  * gpu_buddy_free_list - free blocks
569  *
570  * @mm: GPU buddy manager
571  * @objects: input list head to free blocks
572  * @flags: optional flags like GPU_BUDDY_CLEARED
573  */
574 void gpu_buddy_free_list(struct gpu_buddy *mm,
575 			 struct list_head *objects,
576 			 unsigned int flags)
577 {
578 	bool mark_clear = flags & GPU_BUDDY_CLEARED;
579 
580 	__gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear);
581 }
582 EXPORT_SYMBOL(gpu_buddy_free_list);
583 
584 static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags)
585 {
586 	bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION;
587 
588 	return needs_clear != gpu_buddy_block_is_clear(block);
589 }
590 
591 static struct gpu_buddy_block *
592 __alloc_range_bias(struct gpu_buddy *mm,
593 		   u64 start, u64 end,
594 		   unsigned int order,
595 		   unsigned long flags,
596 		   bool fallback)
597 {
598 	u64 req_size = mm->chunk_size << order;
599 	struct gpu_buddy_block *block;
600 	struct gpu_buddy_block *buddy;
601 	LIST_HEAD(dfs);
602 	int err;
603 	int i;
604 
605 	end = end - 1;
606 
607 	for (i = 0; i < mm->n_roots; ++i)
608 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
609 
610 	do {
611 		u64 block_start;
612 		u64 block_end;
613 
614 		block = list_first_entry_or_null(&dfs,
615 						 struct gpu_buddy_block,
616 						 tmp_link);
617 		if (!block)
618 			break;
619 
620 		list_del(&block->tmp_link);
621 
622 		if (gpu_buddy_block_order(block) < order)
623 			continue;
624 
625 		block_start = gpu_buddy_block_offset(block);
626 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
627 
628 		if (!overlaps(start, end, block_start, block_end))
629 			continue;
630 
631 		if (gpu_buddy_block_is_allocated(block))
632 			continue;
633 
634 		if (block_start < start || block_end > end) {
635 			u64 adjusted_start = max(block_start, start);
636 			u64 adjusted_end = min(block_end, end);
637 
638 			if (round_down(adjusted_end + 1, req_size) <=
639 			    round_up(adjusted_start, req_size))
640 				continue;
641 		}
642 
643 		if (!fallback && block_incompatible(block, flags))
644 			continue;
645 
646 		if (contains(start, end, block_start, block_end) &&
647 		    order == gpu_buddy_block_order(block)) {
648 			/*
649 			 * Find the free block within the range.
650 			 */
651 			if (gpu_buddy_block_is_free(block))
652 				return block;
653 
654 			continue;
655 		}
656 
657 		if (!gpu_buddy_block_is_split(block)) {
658 			err = split_block(mm, block);
659 			if (unlikely(err))
660 				goto err_undo;
661 		}
662 
663 		list_add(&block->right->tmp_link, &dfs);
664 		list_add(&block->left->tmp_link, &dfs);
665 	} while (1);
666 
667 	return ERR_PTR(-ENOSPC);
668 
669 err_undo:
670 	/*
671 	 * We really don't want to leave around a bunch of split blocks, since
672 	 * bigger is better, so make sure we merge everything back before we
673 	 * free the allocated blocks.
674 	 */
675 	buddy = __get_buddy(block);
676 	if (buddy &&
677 	    (gpu_buddy_block_is_free(block) &&
678 	     gpu_buddy_block_is_free(buddy)))
679 		__gpu_buddy_free(mm, block, false);
680 	return ERR_PTR(err);
681 }
682 
683 static struct gpu_buddy_block *
684 __gpu_buddy_alloc_range_bias(struct gpu_buddy *mm,
685 			     u64 start, u64 end,
686 			     unsigned int order,
687 			     unsigned long flags)
688 {
689 	struct gpu_buddy_block *block;
690 	bool fallback = false;
691 
692 	block = __alloc_range_bias(mm, start, end, order,
693 				   flags, fallback);
694 	if (IS_ERR(block))
695 		return __alloc_range_bias(mm, start, end, order,
696 					  flags, !fallback);
697 
698 	return block;
699 }
700 
701 static struct gpu_buddy_block *
702 get_maxblock(struct gpu_buddy *mm,
703 	     unsigned int order,
704 	     enum gpu_buddy_free_tree tree)
705 {
706 	struct gpu_buddy_block *max_block = NULL, *block = NULL;
707 	struct rb_root *root;
708 	unsigned int i;
709 
710 	for (i = order; i <= mm->max_order; ++i) {
711 		root = &mm->free_trees[tree][i];
712 		block = rbtree_last_free_block(root);
713 		if (!block)
714 			continue;
715 
716 		if (!max_block) {
717 			max_block = block;
718 			continue;
719 		}
720 
721 		if (gpu_buddy_block_offset(block) >
722 		    gpu_buddy_block_offset(max_block)) {
723 			max_block = block;
724 		}
725 	}
726 
727 	return max_block;
728 }
729 
730 static struct gpu_buddy_block *
731 alloc_from_freetree(struct gpu_buddy *mm,
732 		    unsigned int order,
733 		    unsigned long flags)
734 {
735 	struct gpu_buddy_block *block = NULL;
736 	struct rb_root *root;
737 	enum gpu_buddy_free_tree tree;
738 	unsigned int tmp;
739 	int err;
740 
741 	tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
742 		GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
743 
744 	if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) {
745 		block = get_maxblock(mm, order, tree);
746 		if (block)
747 			/* Store the obtained block order */
748 			tmp = gpu_buddy_block_order(block);
749 	} else {
750 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
751 			/* Get RB tree root for this order and tree */
752 			root = &mm->free_trees[tree][tmp];
753 			block = rbtree_last_free_block(root);
754 			if (block)
755 				break;
756 		}
757 	}
758 
759 	if (!block) {
760 		/* Try allocating from the other tree */
761 		tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
762 			GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
763 
764 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
765 			root = &mm->free_trees[tree][tmp];
766 			block = rbtree_last_free_block(root);
767 			if (block)
768 				break;
769 		}
770 
771 		if (!block)
772 			return ERR_PTR(-ENOSPC);
773 	}
774 
775 	BUG_ON(!gpu_buddy_block_is_free(block));
776 
777 	while (tmp != order) {
778 		err = split_block(mm, block);
779 		if (unlikely(err))
780 			goto err_undo;
781 
782 		block = block->right;
783 		tmp--;
784 	}
785 	return block;
786 
787 err_undo:
788 	if (tmp != order)
789 		__gpu_buddy_free(mm, block, false);
790 	return ERR_PTR(err);
791 }
792 
793 static int __alloc_range(struct gpu_buddy *mm,
794 			 struct list_head *dfs,
795 			 u64 start, u64 size,
796 			 struct list_head *blocks,
797 			 u64 *total_allocated_on_err)
798 {
799 	struct gpu_buddy_block *block;
800 	struct gpu_buddy_block *buddy;
801 	u64 total_allocated = 0;
802 	LIST_HEAD(allocated);
803 	u64 end;
804 	int err;
805 
806 	end = start + size - 1;
807 
808 	do {
809 		u64 block_start;
810 		u64 block_end;
811 
812 		block = list_first_entry_or_null(dfs,
813 						 struct gpu_buddy_block,
814 						 tmp_link);
815 		if (!block)
816 			break;
817 
818 		list_del(&block->tmp_link);
819 
820 		block_start = gpu_buddy_block_offset(block);
821 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
822 
823 		if (!overlaps(start, end, block_start, block_end))
824 			continue;
825 
826 		if (gpu_buddy_block_is_allocated(block)) {
827 			err = -ENOSPC;
828 			goto err_free;
829 		}
830 
831 		if (contains(start, end, block_start, block_end)) {
832 			if (gpu_buddy_block_is_free(block)) {
833 				mark_allocated(mm, block);
834 				total_allocated += gpu_buddy_block_size(mm, block);
835 				mm->avail -= gpu_buddy_block_size(mm, block);
836 				if (gpu_buddy_block_is_clear(block))
837 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
838 				list_add_tail(&block->link, &allocated);
839 				continue;
840 			} else if (!mm->clear_avail) {
841 				err = -ENOSPC;
842 				goto err_free;
843 			}
844 		}
845 
846 		if (!gpu_buddy_block_is_split(block)) {
847 			err = split_block(mm, block);
848 			if (unlikely(err))
849 				goto err_undo;
850 		}
851 
852 		list_add(&block->right->tmp_link, dfs);
853 		list_add(&block->left->tmp_link, dfs);
854 	} while (1);
855 
856 	if (total_allocated < size) {
857 		err = -ENOSPC;
858 		goto err_free;
859 	}
860 
861 	list_splice_tail(&allocated, blocks);
862 
863 	return 0;
864 
865 err_undo:
866 	/*
867 	 * We really don't want to leave around a bunch of split blocks, since
868 	 * bigger is better, so make sure we merge everything back before we
869 	 * free the allocated blocks.
870 	 */
871 	buddy = __get_buddy(block);
872 	if (buddy &&
873 	    (gpu_buddy_block_is_free(block) &&
874 	     gpu_buddy_block_is_free(buddy)))
875 		__gpu_buddy_free(mm, block, false);
876 
877 err_free:
878 	if (err == -ENOSPC && total_allocated_on_err) {
879 		list_splice_tail(&allocated, blocks);
880 		*total_allocated_on_err = total_allocated;
881 	} else {
882 		gpu_buddy_free_list_internal(mm, &allocated);
883 	}
884 
885 	return err;
886 }
887 
888 static int __gpu_buddy_alloc_range(struct gpu_buddy *mm,
889 				   u64 start,
890 				   u64 size,
891 				   u64 *total_allocated_on_err,
892 				   struct list_head *blocks)
893 {
894 	LIST_HEAD(dfs);
895 	int i;
896 
897 	for (i = 0; i < mm->n_roots; ++i)
898 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
899 
900 	return __alloc_range(mm, &dfs, start, size,
901 			     blocks, total_allocated_on_err);
902 }
903 
904 static int __alloc_contig_try_harder(struct gpu_buddy *mm,
905 				     u64 size,
906 				     u64 min_block_size,
907 				     struct list_head *blocks)
908 {
909 	u64 rhs_offset, lhs_offset, lhs_size, filled;
910 	struct gpu_buddy_block *block;
911 	unsigned int tree, order;
912 	LIST_HEAD(blocks_lhs);
913 	unsigned long pages;
914 	u64 modify_size;
915 	int err;
916 
917 	modify_size = rounddown_pow_of_two(size);
918 	pages = modify_size >> ilog2(mm->chunk_size);
919 	order = fls(pages) - 1;
920 	if (order == 0)
921 		return -ENOSPC;
922 
923 	for_each_free_tree(tree) {
924 		struct rb_root *root;
925 		struct rb_node *iter;
926 
927 		root = &mm->free_trees[tree][order];
928 		if (rbtree_is_empty(root))
929 			continue;
930 
931 		iter = rb_last(root);
932 		while (iter) {
933 			block = rbtree_get_free_block(iter);
934 
935 			/* Allocate blocks traversing RHS */
936 			rhs_offset = gpu_buddy_block_offset(block);
937 			err =  __gpu_buddy_alloc_range(mm, rhs_offset, size,
938 						       &filled, blocks);
939 			if (!err || err != -ENOSPC)
940 				return err;
941 
942 			lhs_size = max((size - filled), min_block_size);
943 			if (!IS_ALIGNED(lhs_size, min_block_size))
944 				lhs_size = round_up(lhs_size, min_block_size);
945 
946 			/* Allocate blocks traversing LHS */
947 			lhs_offset = gpu_buddy_block_offset(block) - lhs_size;
948 			err =  __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size,
949 						       NULL, &blocks_lhs);
950 			if (!err) {
951 				list_splice(&blocks_lhs, blocks);
952 				return 0;
953 			} else if (err != -ENOSPC) {
954 				gpu_buddy_free_list_internal(mm, blocks);
955 				return err;
956 			}
957 			/* Free blocks for the next iteration */
958 			gpu_buddy_free_list_internal(mm, blocks);
959 
960 			iter = rb_prev(iter);
961 		}
962 	}
963 
964 	return -ENOSPC;
965 }
966 
967 /**
968  * gpu_buddy_block_trim - free unused pages
969  *
970  * @mm: GPU buddy manager
971  * @start: start address to begin the trimming.
972  * @new_size: original size requested
973  * @blocks: Input and output list of allocated blocks.
974  * MUST contain single block as input to be trimmed.
975  * On success will contain the newly allocated blocks
976  * making up the @new_size. Blocks always appear in
977  * ascending order
978  *
979  * For contiguous allocation, we round up the size to the nearest
980  * power of two value, drivers consume *actual* size, so remaining
981  * portions are unused and can be optionally freed with this function
982  *
983  * Returns:
984  * 0 on success, error code on failure.
985  */
986 int gpu_buddy_block_trim(struct gpu_buddy *mm,
987 			 u64 *start,
988 			 u64 new_size,
989 			 struct list_head *blocks)
990 {
991 	struct gpu_buddy_block *parent;
992 	struct gpu_buddy_block *block;
993 	u64 block_start, block_end;
994 	LIST_HEAD(dfs);
995 	u64 new_start;
996 	int err;
997 
998 	if (!list_is_singular(blocks))
999 		return -EINVAL;
1000 
1001 	block = list_first_entry(blocks,
1002 				 struct gpu_buddy_block,
1003 				 link);
1004 
1005 	block_start = gpu_buddy_block_offset(block);
1006 	block_end = block_start + gpu_buddy_block_size(mm, block);
1007 
1008 	if (WARN_ON(!gpu_buddy_block_is_allocated(block)))
1009 		return -EINVAL;
1010 
1011 	if (new_size > gpu_buddy_block_size(mm, block))
1012 		return -EINVAL;
1013 
1014 	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
1015 		return -EINVAL;
1016 
1017 	if (new_size == gpu_buddy_block_size(mm, block))
1018 		return 0;
1019 
1020 	new_start = block_start;
1021 	if (start) {
1022 		new_start = *start;
1023 
1024 		if (new_start < block_start)
1025 			return -EINVAL;
1026 
1027 		if (!IS_ALIGNED(new_start, mm->chunk_size))
1028 			return -EINVAL;
1029 
1030 		if (range_overflows(new_start, new_size, block_end))
1031 			return -EINVAL;
1032 	}
1033 
1034 	list_del(&block->link);
1035 	mark_free(mm, block);
1036 	mm->avail += gpu_buddy_block_size(mm, block);
1037 	if (gpu_buddy_block_is_clear(block))
1038 		mm->clear_avail += gpu_buddy_block_size(mm, block);
1039 
1040 	/* Prevent recursively freeing this node */
1041 	parent = block->parent;
1042 	block->parent = NULL;
1043 
1044 	list_add(&block->tmp_link, &dfs);
1045 	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
1046 	if (err) {
1047 		mark_allocated(mm, block);
1048 		mm->avail -= gpu_buddy_block_size(mm, block);
1049 		if (gpu_buddy_block_is_clear(block))
1050 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
1051 		list_add(&block->link, blocks);
1052 	}
1053 
1054 	block->parent = parent;
1055 	return err;
1056 }
1057 EXPORT_SYMBOL(gpu_buddy_block_trim);
1058 
1059 static struct gpu_buddy_block *
1060 __gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
1061 			 u64 start, u64 end,
1062 			 unsigned int order,
1063 			 unsigned long flags)
1064 {
1065 	if (flags & GPU_BUDDY_RANGE_ALLOCATION)
1066 		/* Allocate traversing within the range */
1067 		return  __gpu_buddy_alloc_range_bias(mm, start, end,
1068 						     order, flags);
1069 	else
1070 		/* Allocate from freetree */
1071 		return alloc_from_freetree(mm, order, flags);
1072 }
1073 
1074 /**
1075  * gpu_buddy_alloc_blocks - allocate power-of-two blocks
1076  *
1077  * @mm: GPU buddy manager to allocate from
1078  * @start: start of the allowed range for this block
1079  * @end: end of the allowed range for this block
1080  * @size: size of the allocation in bytes
1081  * @min_block_size: alignment of the allocation
1082  * @blocks: output list head to add allocated blocks
1083  * @flags: GPU_BUDDY_*_ALLOCATION flags
1084  *
1085  * alloc_range_bias() called on range limitations, which traverses
1086  * the tree and returns the desired block.
1087  *
1088  * alloc_from_freetree() called when *no* range restrictions
1089  * are enforced, which picks the block from the freetree.
1090  *
1091  * Returns:
1092  * 0 on success, error code on failure.
1093  */
1094 int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
1095 			   u64 start, u64 end, u64 size,
1096 			   u64 min_block_size,
1097 			   struct list_head *blocks,
1098 			   unsigned long flags)
1099 {
1100 	struct gpu_buddy_block *block = NULL;
1101 	u64 original_size, original_min_size;
1102 	unsigned int min_order, order;
1103 	LIST_HEAD(allocated);
1104 	unsigned long pages;
1105 	int err;
1106 
1107 	if (size < mm->chunk_size)
1108 		return -EINVAL;
1109 
1110 	if (min_block_size < mm->chunk_size)
1111 		return -EINVAL;
1112 
1113 	if (!is_power_of_2(min_block_size))
1114 		return -EINVAL;
1115 
1116 	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
1117 		return -EINVAL;
1118 
1119 	if (end > mm->size)
1120 		return -EINVAL;
1121 
1122 	if (range_overflows(start, size, mm->size))
1123 		return -EINVAL;
1124 
1125 	/* Actual range allocation */
1126 	if (start + size == end) {
1127 		if (!IS_ALIGNED(start | end, min_block_size))
1128 			return -EINVAL;
1129 
1130 		return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks);
1131 	}
1132 
1133 	original_size = size;
1134 	original_min_size = min_block_size;
1135 
1136 	/* Roundup the size to power of 2 */
1137 	if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) {
1138 		size = roundup_pow_of_two(size);
1139 		min_block_size = size;
1140 	/* Align size value to min_block_size */
1141 	} else if (!IS_ALIGNED(size, min_block_size)) {
1142 		size = round_up(size, min_block_size);
1143 	}
1144 
1145 	pages = size >> ilog2(mm->chunk_size);
1146 	order = fls(pages) - 1;
1147 	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
1148 
1149 	if (order > mm->max_order || size > mm->size) {
1150 		if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) &&
1151 		    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
1152 			return __alloc_contig_try_harder(mm, original_size,
1153 							 original_min_size, blocks);
1154 
1155 		return -EINVAL;
1156 	}
1157 
1158 	do {
1159 		order = min(order, (unsigned int)fls(pages) - 1);
1160 		BUG_ON(order > mm->max_order);
1161 		BUG_ON(order < min_order);
1162 
1163 		do {
1164 			block = __gpu_buddy_alloc_blocks(mm, start,
1165 							 end,
1166 							 order,
1167 							 flags);
1168 			if (!IS_ERR(block))
1169 				break;
1170 
1171 			if (order-- == min_order) {
1172 				/* Try allocation through force merge method */
1173 				if (mm->clear_avail &&
1174 				    !__force_merge(mm, start, end, min_order)) {
1175 					block = __gpu_buddy_alloc_blocks(mm, start,
1176 									 end,
1177 									 min_order,
1178 									 flags);
1179 					if (!IS_ERR(block)) {
1180 						order = min_order;
1181 						break;
1182 					}
1183 				}
1184 
1185 				/*
1186 				 * Try contiguous block allocation through
1187 				 * try harder method.
1188 				 */
1189 				if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
1190 				    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
1191 					return __alloc_contig_try_harder(mm,
1192 									 original_size,
1193 									 original_min_size,
1194 									 blocks);
1195 				err = -ENOSPC;
1196 				goto err_free;
1197 			}
1198 		} while (1);
1199 
1200 		mark_allocated(mm, block);
1201 		mm->avail -= gpu_buddy_block_size(mm, block);
1202 		if (gpu_buddy_block_is_clear(block))
1203 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
1204 		kmemleak_update_trace(block);
1205 		list_add_tail(&block->link, &allocated);
1206 
1207 		pages -= BIT(order);
1208 
1209 		if (!pages)
1210 			break;
1211 	} while (1);
1212 
1213 	/* Trim the allocated block to the required size */
1214 	if (!(flags & GPU_BUDDY_TRIM_DISABLE) &&
1215 	    original_size != size) {
1216 		struct list_head *trim_list;
1217 		LIST_HEAD(temp);
1218 		u64 trim_size;
1219 
1220 		trim_list = &allocated;
1221 		trim_size = original_size;
1222 
1223 		if (!list_is_singular(&allocated)) {
1224 			block = list_last_entry(&allocated, typeof(*block), link);
1225 			list_move(&block->link, &temp);
1226 			trim_list = &temp;
1227 			trim_size = gpu_buddy_block_size(mm, block) -
1228 				(size - original_size);
1229 		}
1230 
1231 		gpu_buddy_block_trim(mm,
1232 				     NULL,
1233 				     trim_size,
1234 				     trim_list);
1235 
1236 		if (!list_empty(&temp))
1237 			list_splice_tail(trim_list, &allocated);
1238 	}
1239 
1240 	list_splice_tail(&allocated, blocks);
1241 	return 0;
1242 
1243 err_free:
1244 	gpu_buddy_free_list_internal(mm, &allocated);
1245 	return err;
1246 }
1247 EXPORT_SYMBOL(gpu_buddy_alloc_blocks);
1248 
1249 /**
1250  * gpu_buddy_block_print - print block information
1251  *
1252  * @mm: GPU buddy manager
1253  * @block: GPU buddy block
1254  */
1255 void gpu_buddy_block_print(struct gpu_buddy *mm,
1256 			   struct gpu_buddy_block *block)
1257 {
1258 	u64 start = gpu_buddy_block_offset(block);
1259 	u64 size = gpu_buddy_block_size(mm, block);
1260 
1261 	pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size);
1262 }
1263 EXPORT_SYMBOL(gpu_buddy_block_print);
1264 
1265 /**
1266  * gpu_buddy_print - print allocator state
1267  *
1268  * @mm: GPU buddy manager
1269  * @p: GPU printer to use
1270  */
1271 void gpu_buddy_print(struct gpu_buddy *mm)
1272 {
1273 	int order;
1274 
1275 	pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
1276 		mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
1277 
1278 	for (order = mm->max_order; order >= 0; order--) {
1279 		struct gpu_buddy_block *block, *tmp;
1280 		struct rb_root *root;
1281 		u64 count = 0, free;
1282 		unsigned int tree;
1283 
1284 		for_each_free_tree(tree) {
1285 			root = &mm->free_trees[tree][order];
1286 
1287 			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
1288 				BUG_ON(!gpu_buddy_block_is_free(block));
1289 				count++;
1290 			}
1291 		}
1292 
1293 		free = count * (mm->chunk_size << order);
1294 		if (free < SZ_1M)
1295 			pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
1296 				order, free >> 10, count);
1297 		else
1298 			pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
1299 				order, free >> 20, count);
1300 	}
1301 }
1302 EXPORT_SYMBOL(gpu_buddy_print);
1303 
1304 static void gpu_buddy_module_exit(void)
1305 {
1306 	kmem_cache_destroy(slab_blocks);
1307 }
1308 
1309 static int __init gpu_buddy_module_init(void)
1310 {
1311 	slab_blocks = KMEM_CACHE(gpu_buddy_block, 0);
1312 	if (!slab_blocks)
1313 		return -ENOMEM;
1314 
1315 	return 0;
1316 }
1317 
1318 module_init(gpu_buddy_module_init);
1319 module_exit(gpu_buddy_module_exit);
1320 
1321 MODULE_DESCRIPTION("GPU Buddy Allocator");
1322 MODULE_LICENSE("Dual MIT/GPL");
1323