xref: /linux/drivers/gpu/buddy.c (revision 5ea5b6ff0d63aef1dc3fb25445acea183f61a934)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include <kunit/test-bug.h>
7 
8 #include <linux/export.h>
9 #include <linux/kmemleak.h>
10 #include <linux/module.h>
11 #include <linux/sizes.h>
12 
13 #include <linux/gpu_buddy.h>
14 
15 static struct kmem_cache *slab_blocks;
16 
17 static unsigned int
18 gpu_buddy_block_state(struct gpu_buddy_block *block)
19 {
20 	return block->header & GPU_BUDDY_HEADER_STATE;
21 }
22 
23 static bool
24 gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
25 {
26 	return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
27 }
28 
29 static bool
30 gpu_buddy_block_is_split(struct gpu_buddy_block *block)
31 {
32 	return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
33 }
34 
35 static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
36 					       struct gpu_buddy_block *parent,
37 					       unsigned int order,
38 					       u64 offset)
39 {
40 	struct gpu_buddy_block *block;
41 
42 	BUG_ON(order > GPU_BUDDY_MAX_ORDER);
43 
44 	block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
45 	if (!block)
46 		return NULL;
47 
48 	block->header = offset;
49 	block->header |= order;
50 	block->parent = parent;
51 
52 	RB_CLEAR_NODE(&block->rb);
53 
54 	BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED);
55 	return block;
56 }
57 
58 static void gpu_block_free(struct gpu_buddy *mm,
59 			   struct gpu_buddy_block *block)
60 {
61 	kmem_cache_free(slab_blocks, block);
62 }
63 
64 static enum gpu_buddy_free_tree
65 get_block_tree(struct gpu_buddy_block *block)
66 {
67 	return gpu_buddy_block_is_clear(block) ?
68 	       GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
69 }
70 
71 static struct gpu_buddy_block *
72 rbtree_get_free_block(const struct rb_node *node)
73 {
74 	return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL;
75 }
76 
77 static struct gpu_buddy_block *
78 rbtree_last_free_block(struct rb_root *root)
79 {
80 	return rbtree_get_free_block(rb_last(root));
81 }
82 
83 static bool rbtree_is_empty(struct rb_root *root)
84 {
85 	return RB_EMPTY_ROOT(root);
86 }
87 
88 static bool gpu_buddy_block_offset_less(const struct gpu_buddy_block *block,
89 					const struct gpu_buddy_block *node)
90 {
91 	return gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node);
92 }
93 
94 static bool rbtree_block_offset_less(struct rb_node *block,
95 				     const struct rb_node *node)
96 {
97 	return gpu_buddy_block_offset_less(rbtree_get_free_block(block),
98 					   rbtree_get_free_block(node));
99 }
100 
101 static void rbtree_insert(struct gpu_buddy *mm,
102 			  struct gpu_buddy_block *block,
103 			  enum gpu_buddy_free_tree tree)
104 {
105 	rb_add(&block->rb,
106 	       &mm->free_trees[tree][gpu_buddy_block_order(block)],
107 	       rbtree_block_offset_less);
108 }
109 
110 static void rbtree_remove(struct gpu_buddy *mm,
111 			  struct gpu_buddy_block *block)
112 {
113 	unsigned int order = gpu_buddy_block_order(block);
114 	enum gpu_buddy_free_tree tree;
115 	struct rb_root *root;
116 
117 	tree = get_block_tree(block);
118 	root = &mm->free_trees[tree][order];
119 
120 	rb_erase(&block->rb, root);
121 	RB_CLEAR_NODE(&block->rb);
122 }
123 
124 static void clear_reset(struct gpu_buddy_block *block)
125 {
126 	block->header &= ~GPU_BUDDY_HEADER_CLEAR;
127 }
128 
129 static void mark_cleared(struct gpu_buddy_block *block)
130 {
131 	block->header |= GPU_BUDDY_HEADER_CLEAR;
132 }
133 
134 static void mark_allocated(struct gpu_buddy *mm,
135 			   struct gpu_buddy_block *block)
136 {
137 	block->header &= ~GPU_BUDDY_HEADER_STATE;
138 	block->header |= GPU_BUDDY_ALLOCATED;
139 
140 	rbtree_remove(mm, block);
141 }
142 
143 static void mark_free(struct gpu_buddy *mm,
144 		      struct gpu_buddy_block *block)
145 {
146 	enum gpu_buddy_free_tree tree;
147 
148 	block->header &= ~GPU_BUDDY_HEADER_STATE;
149 	block->header |= GPU_BUDDY_FREE;
150 
151 	tree = get_block_tree(block);
152 	rbtree_insert(mm, block, tree);
153 }
154 
155 static void mark_split(struct gpu_buddy *mm,
156 		       struct gpu_buddy_block *block)
157 {
158 	block->header &= ~GPU_BUDDY_HEADER_STATE;
159 	block->header |= GPU_BUDDY_SPLIT;
160 
161 	rbtree_remove(mm, block);
162 }
163 
164 static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
165 {
166 	return s1 <= e2 && e1 >= s2;
167 }
168 
169 static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
170 {
171 	return s1 <= s2 && e1 >= e2;
172 }
173 
174 static struct gpu_buddy_block *
175 __get_buddy(struct gpu_buddy_block *block)
176 {
177 	struct gpu_buddy_block *parent;
178 
179 	parent = block->parent;
180 	if (!parent)
181 		return NULL;
182 
183 	if (parent->left == block)
184 		return parent->right;
185 
186 	return parent->left;
187 }
188 
189 static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
190 				     struct gpu_buddy_block *block,
191 				     bool force_merge)
192 {
193 	struct gpu_buddy_block *parent;
194 	unsigned int order;
195 
196 	while ((parent = block->parent)) {
197 		struct gpu_buddy_block *buddy;
198 
199 		buddy = __get_buddy(block);
200 
201 		if (!gpu_buddy_block_is_free(buddy))
202 			break;
203 
204 		if (!force_merge) {
205 			/*
206 			 * Check the block and its buddy clear state and exit
207 			 * the loop if they both have the dissimilar state.
208 			 */
209 			if (gpu_buddy_block_is_clear(block) !=
210 			    gpu_buddy_block_is_clear(buddy))
211 				break;
212 
213 			if (gpu_buddy_block_is_clear(block))
214 				mark_cleared(parent);
215 		}
216 
217 		rbtree_remove(mm, buddy);
218 		if (force_merge && gpu_buddy_block_is_clear(buddy))
219 			mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
220 
221 		gpu_block_free(mm, block);
222 		gpu_block_free(mm, buddy);
223 
224 		block = parent;
225 	}
226 
227 	order = gpu_buddy_block_order(block);
228 	mark_free(mm, block);
229 
230 	return order;
231 }
232 
233 static int __force_merge(struct gpu_buddy *mm,
234 			 u64 start,
235 			 u64 end,
236 			 unsigned int min_order)
237 {
238 	unsigned int tree, order;
239 	int i;
240 
241 	if (!min_order)
242 		return -ENOMEM;
243 
244 	if (min_order > mm->max_order)
245 		return -EINVAL;
246 
247 	for_each_free_tree(tree) {
248 		for (i = min_order - 1; i >= 0; i--) {
249 			struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
250 
251 			while (iter) {
252 				struct gpu_buddy_block *block, *buddy;
253 				u64 block_start, block_end;
254 
255 				block = rbtree_get_free_block(iter);
256 				iter = rb_prev(iter);
257 
258 				if (!block || !block->parent)
259 					continue;
260 
261 				block_start = gpu_buddy_block_offset(block);
262 				block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
263 
264 				if (!contains(start, end, block_start, block_end))
265 					continue;
266 
267 				buddy = __get_buddy(block);
268 				if (!gpu_buddy_block_is_free(buddy))
269 					continue;
270 
271 				WARN_ON(gpu_buddy_block_is_clear(block) ==
272 					gpu_buddy_block_is_clear(buddy));
273 
274 				/*
275 				 * Advance to the next node when the current node is the buddy,
276 				 * as freeing the block will also remove its buddy from the tree.
277 				 */
278 				if (iter == &buddy->rb)
279 					iter = rb_prev(iter);
280 
281 				rbtree_remove(mm, block);
282 				if (gpu_buddy_block_is_clear(block))
283 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
284 
285 				order = __gpu_buddy_free(mm, block, true);
286 				if (order >= min_order)
287 					return 0;
288 			}
289 		}
290 	}
291 
292 	return -ENOMEM;
293 }
294 
295 /**
296  * gpu_buddy_init - init memory manager
297  *
298  * @mm: GPU buddy manager to initialize
299  * @size: size in bytes to manage
300  * @chunk_size: minimum page size in bytes for our allocations
301  *
302  * Initializes the memory manager and its resources.
303  *
304  * Returns:
305  * 0 on success, error code on failure.
306  */
307 int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
308 {
309 	unsigned int i, j, root_count = 0;
310 	u64 offset = 0;
311 
312 	if (size < chunk_size)
313 		return -EINVAL;
314 
315 	if (chunk_size < SZ_4K)
316 		return -EINVAL;
317 
318 	if (!is_power_of_2(chunk_size))
319 		return -EINVAL;
320 
321 	size = round_down(size, chunk_size);
322 
323 	mm->size = size;
324 	mm->avail = size;
325 	mm->clear_avail = 0;
326 	mm->chunk_size = chunk_size;
327 	mm->max_order = ilog2(size) - ilog2(chunk_size);
328 
329 	BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
330 
331 	mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
332 				       sizeof(*mm->free_trees),
333 				       GFP_KERNEL);
334 	if (!mm->free_trees)
335 		return -ENOMEM;
336 
337 	for_each_free_tree(i) {
338 		mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
339 						  sizeof(struct rb_root),
340 						  GFP_KERNEL);
341 		if (!mm->free_trees[i])
342 			goto out_free_tree;
343 
344 		for (j = 0; j <= mm->max_order; ++j)
345 			mm->free_trees[i][j] = RB_ROOT;
346 	}
347 
348 	mm->n_roots = hweight64(size);
349 
350 	mm->roots = kmalloc_array(mm->n_roots,
351 				  sizeof(struct gpu_buddy_block *),
352 				  GFP_KERNEL);
353 	if (!mm->roots)
354 		goto out_free_tree;
355 
356 	/*
357 	 * Split into power-of-two blocks, in case we are given a size that is
358 	 * not itself a power-of-two.
359 	 */
360 	do {
361 		struct gpu_buddy_block *root;
362 		unsigned int order;
363 		u64 root_size;
364 
365 		order = ilog2(size) - ilog2(chunk_size);
366 		root_size = chunk_size << order;
367 
368 		root = gpu_block_alloc(mm, NULL, order, offset);
369 		if (!root)
370 			goto out_free_roots;
371 
372 		mark_free(mm, root);
373 
374 		BUG_ON(root_count > mm->max_order);
375 		BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size);
376 
377 		mm->roots[root_count] = root;
378 
379 		offset += root_size;
380 		size -= root_size;
381 		root_count++;
382 	} while (size);
383 
384 	return 0;
385 
386 out_free_roots:
387 	while (root_count--)
388 		gpu_block_free(mm, mm->roots[root_count]);
389 	kfree(mm->roots);
390 out_free_tree:
391 	while (i--)
392 		kfree(mm->free_trees[i]);
393 	kfree(mm->free_trees);
394 	return -ENOMEM;
395 }
396 EXPORT_SYMBOL(gpu_buddy_init);
397 
398 /**
399  * gpu_buddy_fini - tear down the memory manager
400  *
401  * @mm: GPU buddy manager to free
402  *
403  * Cleanup memory manager resources and the freetree
404  */
405 void gpu_buddy_fini(struct gpu_buddy *mm)
406 {
407 	u64 root_size, size, start;
408 	unsigned int order;
409 	int i;
410 
411 	size = mm->size;
412 
413 	for (i = 0; i < mm->n_roots; ++i) {
414 		order = ilog2(size) - ilog2(mm->chunk_size);
415 		start = gpu_buddy_block_offset(mm->roots[i]);
416 		__force_merge(mm, start, start + size, order);
417 
418 		if (WARN_ON(!gpu_buddy_block_is_free(mm->roots[i])))
419 			kunit_fail_current_test("buddy_fini() root");
420 
421 		gpu_block_free(mm, mm->roots[i]);
422 
423 		root_size = mm->chunk_size << order;
424 		size -= root_size;
425 	}
426 
427 	WARN_ON(mm->avail != mm->size);
428 
429 	for_each_free_tree(i)
430 		kfree(mm->free_trees[i]);
431 	kfree(mm->free_trees);
432 	kfree(mm->roots);
433 }
434 EXPORT_SYMBOL(gpu_buddy_fini);
435 
436 static int split_block(struct gpu_buddy *mm,
437 		       struct gpu_buddy_block *block)
438 {
439 	unsigned int block_order = gpu_buddy_block_order(block) - 1;
440 	u64 offset = gpu_buddy_block_offset(block);
441 
442 	BUG_ON(!gpu_buddy_block_is_free(block));
443 	BUG_ON(!gpu_buddy_block_order(block));
444 
445 	block->left = gpu_block_alloc(mm, block, block_order, offset);
446 	if (!block->left)
447 		return -ENOMEM;
448 
449 	block->right = gpu_block_alloc(mm, block, block_order,
450 				       offset + (mm->chunk_size << block_order));
451 	if (!block->right) {
452 		gpu_block_free(mm, block->left);
453 		return -ENOMEM;
454 	}
455 
456 	mark_split(mm, block);
457 
458 	if (gpu_buddy_block_is_clear(block)) {
459 		mark_cleared(block->left);
460 		mark_cleared(block->right);
461 		clear_reset(block);
462 	}
463 
464 	mark_free(mm, block->left);
465 	mark_free(mm, block->right);
466 
467 	return 0;
468 }
469 
470 /**
471  * gpu_buddy_reset_clear - reset blocks clear state
472  *
473  * @mm: GPU buddy manager
474  * @is_clear: blocks clear state
475  *
476  * Reset the clear state based on @is_clear value for each block
477  * in the freetree.
478  */
479 void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear)
480 {
481 	enum gpu_buddy_free_tree src_tree, dst_tree;
482 	u64 root_size, size, start;
483 	unsigned int order;
484 	int i;
485 
486 	size = mm->size;
487 	for (i = 0; i < mm->n_roots; ++i) {
488 		order = ilog2(size) - ilog2(mm->chunk_size);
489 		start = gpu_buddy_block_offset(mm->roots[i]);
490 		__force_merge(mm, start, start + size, order);
491 
492 		root_size = mm->chunk_size << order;
493 		size -= root_size;
494 	}
495 
496 	src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
497 	dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
498 
499 	for (i = 0; i <= mm->max_order; ++i) {
500 		struct rb_root *root = &mm->free_trees[src_tree][i];
501 		struct gpu_buddy_block *block, *tmp;
502 
503 		rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
504 			rbtree_remove(mm, block);
505 			if (is_clear) {
506 				mark_cleared(block);
507 				mm->clear_avail += gpu_buddy_block_size(mm, block);
508 			} else {
509 				clear_reset(block);
510 				mm->clear_avail -= gpu_buddy_block_size(mm, block);
511 			}
512 
513 			rbtree_insert(mm, block, dst_tree);
514 		}
515 	}
516 }
517 EXPORT_SYMBOL(gpu_buddy_reset_clear);
518 
519 /**
520  * gpu_buddy_free_block - free a block
521  *
522  * @mm: GPU buddy manager
523  * @block: block to be freed
524  */
525 void gpu_buddy_free_block(struct gpu_buddy *mm,
526 			  struct gpu_buddy_block *block)
527 {
528 	BUG_ON(!gpu_buddy_block_is_allocated(block));
529 	mm->avail += gpu_buddy_block_size(mm, block);
530 	if (gpu_buddy_block_is_clear(block))
531 		mm->clear_avail += gpu_buddy_block_size(mm, block);
532 
533 	__gpu_buddy_free(mm, block, false);
534 }
535 EXPORT_SYMBOL(gpu_buddy_free_block);
536 
537 static void __gpu_buddy_free_list(struct gpu_buddy *mm,
538 				  struct list_head *objects,
539 				  bool mark_clear,
540 				  bool mark_dirty)
541 {
542 	struct gpu_buddy_block *block, *on;
543 
544 	WARN_ON(mark_dirty && mark_clear);
545 
546 	list_for_each_entry_safe(block, on, objects, link) {
547 		if (mark_clear)
548 			mark_cleared(block);
549 		else if (mark_dirty)
550 			clear_reset(block);
551 		gpu_buddy_free_block(mm, block);
552 		cond_resched();
553 	}
554 	INIT_LIST_HEAD(objects);
555 }
556 
557 static void gpu_buddy_free_list_internal(struct gpu_buddy *mm,
558 					 struct list_head *objects)
559 {
560 	/*
561 	 * Don't touch the clear/dirty bit, since allocation is still internal
562 	 * at this point. For example we might have just failed part of the
563 	 * allocation.
564 	 */
565 	__gpu_buddy_free_list(mm, objects, false, false);
566 }
567 
568 /**
569  * gpu_buddy_free_list - free blocks
570  *
571  * @mm: GPU buddy manager
572  * @objects: input list head to free blocks
573  * @flags: optional flags like GPU_BUDDY_CLEARED
574  */
575 void gpu_buddy_free_list(struct gpu_buddy *mm,
576 			 struct list_head *objects,
577 			 unsigned int flags)
578 {
579 	bool mark_clear = flags & GPU_BUDDY_CLEARED;
580 
581 	__gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear);
582 }
583 EXPORT_SYMBOL(gpu_buddy_free_list);
584 
585 static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags)
586 {
587 	bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION;
588 
589 	return needs_clear != gpu_buddy_block_is_clear(block);
590 }
591 
592 static struct gpu_buddy_block *
593 __alloc_range_bias(struct gpu_buddy *mm,
594 		   u64 start, u64 end,
595 		   unsigned int order,
596 		   unsigned long flags,
597 		   bool fallback)
598 {
599 	u64 req_size = mm->chunk_size << order;
600 	struct gpu_buddy_block *block;
601 	struct gpu_buddy_block *buddy;
602 	LIST_HEAD(dfs);
603 	int err;
604 	int i;
605 
606 	end = end - 1;
607 
608 	for (i = 0; i < mm->n_roots; ++i)
609 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
610 
611 	do {
612 		u64 block_start;
613 		u64 block_end;
614 
615 		block = list_first_entry_or_null(&dfs,
616 						 struct gpu_buddy_block,
617 						 tmp_link);
618 		if (!block)
619 			break;
620 
621 		list_del(&block->tmp_link);
622 
623 		if (gpu_buddy_block_order(block) < order)
624 			continue;
625 
626 		block_start = gpu_buddy_block_offset(block);
627 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
628 
629 		if (!overlaps(start, end, block_start, block_end))
630 			continue;
631 
632 		if (gpu_buddy_block_is_allocated(block))
633 			continue;
634 
635 		if (block_start < start || block_end > end) {
636 			u64 adjusted_start = max(block_start, start);
637 			u64 adjusted_end = min(block_end, end);
638 
639 			if (round_down(adjusted_end + 1, req_size) <=
640 			    round_up(adjusted_start, req_size))
641 				continue;
642 		}
643 
644 		if (!fallback && block_incompatible(block, flags))
645 			continue;
646 
647 		if (contains(start, end, block_start, block_end) &&
648 		    order == gpu_buddy_block_order(block)) {
649 			/*
650 			 * Find the free block within the range.
651 			 */
652 			if (gpu_buddy_block_is_free(block))
653 				return block;
654 
655 			continue;
656 		}
657 
658 		if (!gpu_buddy_block_is_split(block)) {
659 			err = split_block(mm, block);
660 			if (unlikely(err))
661 				goto err_undo;
662 		}
663 
664 		list_add(&block->right->tmp_link, &dfs);
665 		list_add(&block->left->tmp_link, &dfs);
666 	} while (1);
667 
668 	return ERR_PTR(-ENOSPC);
669 
670 err_undo:
671 	/*
672 	 * We really don't want to leave around a bunch of split blocks, since
673 	 * bigger is better, so make sure we merge everything back before we
674 	 * free the allocated blocks.
675 	 */
676 	buddy = __get_buddy(block);
677 	if (buddy &&
678 	    (gpu_buddy_block_is_free(block) &&
679 	     gpu_buddy_block_is_free(buddy)))
680 		__gpu_buddy_free(mm, block, false);
681 	return ERR_PTR(err);
682 }
683 
684 static struct gpu_buddy_block *
685 __gpu_buddy_alloc_range_bias(struct gpu_buddy *mm,
686 			     u64 start, u64 end,
687 			     unsigned int order,
688 			     unsigned long flags)
689 {
690 	struct gpu_buddy_block *block;
691 	bool fallback = false;
692 
693 	block = __alloc_range_bias(mm, start, end, order,
694 				   flags, fallback);
695 	if (IS_ERR(block))
696 		return __alloc_range_bias(mm, start, end, order,
697 					  flags, !fallback);
698 
699 	return block;
700 }
701 
702 static struct gpu_buddy_block *
703 get_maxblock(struct gpu_buddy *mm,
704 	     unsigned int order,
705 	     enum gpu_buddy_free_tree tree)
706 {
707 	struct gpu_buddy_block *max_block = NULL, *block = NULL;
708 	struct rb_root *root;
709 	unsigned int i;
710 
711 	for (i = order; i <= mm->max_order; ++i) {
712 		root = &mm->free_trees[tree][i];
713 		block = rbtree_last_free_block(root);
714 		if (!block)
715 			continue;
716 
717 		if (!max_block) {
718 			max_block = block;
719 			continue;
720 		}
721 
722 		if (gpu_buddy_block_offset(block) >
723 		    gpu_buddy_block_offset(max_block)) {
724 			max_block = block;
725 		}
726 	}
727 
728 	return max_block;
729 }
730 
731 static struct gpu_buddy_block *
732 alloc_from_freetree(struct gpu_buddy *mm,
733 		    unsigned int order,
734 		    unsigned long flags)
735 {
736 	struct gpu_buddy_block *block = NULL;
737 	struct rb_root *root;
738 	enum gpu_buddy_free_tree tree;
739 	unsigned int tmp;
740 	int err;
741 
742 	tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
743 		GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
744 
745 	if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) {
746 		block = get_maxblock(mm, order, tree);
747 		if (block)
748 			/* Store the obtained block order */
749 			tmp = gpu_buddy_block_order(block);
750 	} else {
751 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
752 			/* Get RB tree root for this order and tree */
753 			root = &mm->free_trees[tree][tmp];
754 			block = rbtree_last_free_block(root);
755 			if (block)
756 				break;
757 		}
758 	}
759 
760 	if (!block) {
761 		/* Try allocating from the other tree */
762 		tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
763 			GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
764 
765 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
766 			root = &mm->free_trees[tree][tmp];
767 			block = rbtree_last_free_block(root);
768 			if (block)
769 				break;
770 		}
771 
772 		if (!block)
773 			return ERR_PTR(-ENOSPC);
774 	}
775 
776 	BUG_ON(!gpu_buddy_block_is_free(block));
777 
778 	while (tmp != order) {
779 		err = split_block(mm, block);
780 		if (unlikely(err))
781 			goto err_undo;
782 
783 		block = block->right;
784 		tmp--;
785 	}
786 	return block;
787 
788 err_undo:
789 	if (tmp != order)
790 		__gpu_buddy_free(mm, block, false);
791 	return ERR_PTR(err);
792 }
793 
794 static int __alloc_range(struct gpu_buddy *mm,
795 			 struct list_head *dfs,
796 			 u64 start, u64 size,
797 			 struct list_head *blocks,
798 			 u64 *total_allocated_on_err)
799 {
800 	struct gpu_buddy_block *block;
801 	struct gpu_buddy_block *buddy;
802 	u64 total_allocated = 0;
803 	LIST_HEAD(allocated);
804 	u64 end;
805 	int err;
806 
807 	end = start + size - 1;
808 
809 	do {
810 		u64 block_start;
811 		u64 block_end;
812 
813 		block = list_first_entry_or_null(dfs,
814 						 struct gpu_buddy_block,
815 						 tmp_link);
816 		if (!block)
817 			break;
818 
819 		list_del(&block->tmp_link);
820 
821 		block_start = gpu_buddy_block_offset(block);
822 		block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
823 
824 		if (!overlaps(start, end, block_start, block_end))
825 			continue;
826 
827 		if (gpu_buddy_block_is_allocated(block)) {
828 			err = -ENOSPC;
829 			goto err_free;
830 		}
831 
832 		if (contains(start, end, block_start, block_end)) {
833 			if (gpu_buddy_block_is_free(block)) {
834 				mark_allocated(mm, block);
835 				total_allocated += gpu_buddy_block_size(mm, block);
836 				mm->avail -= gpu_buddy_block_size(mm, block);
837 				if (gpu_buddy_block_is_clear(block))
838 					mm->clear_avail -= gpu_buddy_block_size(mm, block);
839 				list_add_tail(&block->link, &allocated);
840 				continue;
841 			} else if (!mm->clear_avail) {
842 				err = -ENOSPC;
843 				goto err_free;
844 			}
845 		}
846 
847 		if (!gpu_buddy_block_is_split(block)) {
848 			err = split_block(mm, block);
849 			if (unlikely(err))
850 				goto err_undo;
851 		}
852 
853 		list_add(&block->right->tmp_link, dfs);
854 		list_add(&block->left->tmp_link, dfs);
855 	} while (1);
856 
857 	if (total_allocated < size) {
858 		err = -ENOSPC;
859 		goto err_free;
860 	}
861 
862 	list_splice_tail(&allocated, blocks);
863 
864 	return 0;
865 
866 err_undo:
867 	/*
868 	 * We really don't want to leave around a bunch of split blocks, since
869 	 * bigger is better, so make sure we merge everything back before we
870 	 * free the allocated blocks.
871 	 */
872 	buddy = __get_buddy(block);
873 	if (buddy &&
874 	    (gpu_buddy_block_is_free(block) &&
875 	     gpu_buddy_block_is_free(buddy)))
876 		__gpu_buddy_free(mm, block, false);
877 
878 err_free:
879 	if (err == -ENOSPC && total_allocated_on_err) {
880 		list_splice_tail(&allocated, blocks);
881 		*total_allocated_on_err = total_allocated;
882 	} else {
883 		gpu_buddy_free_list_internal(mm, &allocated);
884 	}
885 
886 	return err;
887 }
888 
889 static int __gpu_buddy_alloc_range(struct gpu_buddy *mm,
890 				   u64 start,
891 				   u64 size,
892 				   u64 *total_allocated_on_err,
893 				   struct list_head *blocks)
894 {
895 	LIST_HEAD(dfs);
896 	int i;
897 
898 	for (i = 0; i < mm->n_roots; ++i)
899 		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
900 
901 	return __alloc_range(mm, &dfs, start, size,
902 			     blocks, total_allocated_on_err);
903 }
904 
905 static int __alloc_contig_try_harder(struct gpu_buddy *mm,
906 				     u64 size,
907 				     u64 min_block_size,
908 				     struct list_head *blocks)
909 {
910 	u64 rhs_offset, lhs_offset, lhs_size, filled;
911 	struct gpu_buddy_block *block;
912 	unsigned int tree, order;
913 	LIST_HEAD(blocks_lhs);
914 	unsigned long pages;
915 	u64 modify_size;
916 	int err;
917 
918 	modify_size = rounddown_pow_of_two(size);
919 	pages = modify_size >> ilog2(mm->chunk_size);
920 	order = fls(pages) - 1;
921 	if (order == 0)
922 		return -ENOSPC;
923 
924 	for_each_free_tree(tree) {
925 		struct rb_root *root;
926 		struct rb_node *iter;
927 
928 		root = &mm->free_trees[tree][order];
929 		if (rbtree_is_empty(root))
930 			continue;
931 
932 		iter = rb_last(root);
933 		while (iter) {
934 			block = rbtree_get_free_block(iter);
935 
936 			/* Allocate blocks traversing RHS */
937 			rhs_offset = gpu_buddy_block_offset(block);
938 			err =  __gpu_buddy_alloc_range(mm, rhs_offset, size,
939 						       &filled, blocks);
940 			if (!err || err != -ENOSPC)
941 				return err;
942 
943 			lhs_size = max((size - filled), min_block_size);
944 			if (!IS_ALIGNED(lhs_size, min_block_size))
945 				lhs_size = round_up(lhs_size, min_block_size);
946 
947 			/* Allocate blocks traversing LHS */
948 			lhs_offset = gpu_buddy_block_offset(block) - lhs_size;
949 			err =  __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size,
950 						       NULL, &blocks_lhs);
951 			if (!err) {
952 				list_splice(&blocks_lhs, blocks);
953 				return 0;
954 			} else if (err != -ENOSPC) {
955 				gpu_buddy_free_list_internal(mm, blocks);
956 				return err;
957 			}
958 			/* Free blocks for the next iteration */
959 			gpu_buddy_free_list_internal(mm, blocks);
960 
961 			iter = rb_prev(iter);
962 		}
963 	}
964 
965 	return -ENOSPC;
966 }
967 
968 /**
969  * gpu_buddy_block_trim - free unused pages
970  *
971  * @mm: GPU buddy manager
972  * @start: start address to begin the trimming.
973  * @new_size: original size requested
974  * @blocks: Input and output list of allocated blocks.
975  * MUST contain single block as input to be trimmed.
976  * On success will contain the newly allocated blocks
977  * making up the @new_size. Blocks always appear in
978  * ascending order
979  *
980  * For contiguous allocation, we round up the size to the nearest
981  * power of two value, drivers consume *actual* size, so remaining
982  * portions are unused and can be optionally freed with this function
983  *
984  * Returns:
985  * 0 on success, error code on failure.
986  */
987 int gpu_buddy_block_trim(struct gpu_buddy *mm,
988 			 u64 *start,
989 			 u64 new_size,
990 			 struct list_head *blocks)
991 {
992 	struct gpu_buddy_block *parent;
993 	struct gpu_buddy_block *block;
994 	u64 block_start, block_end;
995 	LIST_HEAD(dfs);
996 	u64 new_start;
997 	int err;
998 
999 	if (!list_is_singular(blocks))
1000 		return -EINVAL;
1001 
1002 	block = list_first_entry(blocks,
1003 				 struct gpu_buddy_block,
1004 				 link);
1005 
1006 	block_start = gpu_buddy_block_offset(block);
1007 	block_end = block_start + gpu_buddy_block_size(mm, block);
1008 
1009 	if (WARN_ON(!gpu_buddy_block_is_allocated(block)))
1010 		return -EINVAL;
1011 
1012 	if (new_size > gpu_buddy_block_size(mm, block))
1013 		return -EINVAL;
1014 
1015 	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
1016 		return -EINVAL;
1017 
1018 	if (new_size == gpu_buddy_block_size(mm, block))
1019 		return 0;
1020 
1021 	new_start = block_start;
1022 	if (start) {
1023 		new_start = *start;
1024 
1025 		if (new_start < block_start)
1026 			return -EINVAL;
1027 
1028 		if (!IS_ALIGNED(new_start, mm->chunk_size))
1029 			return -EINVAL;
1030 
1031 		if (range_overflows(new_start, new_size, block_end))
1032 			return -EINVAL;
1033 	}
1034 
1035 	list_del(&block->link);
1036 	mark_free(mm, block);
1037 	mm->avail += gpu_buddy_block_size(mm, block);
1038 	if (gpu_buddy_block_is_clear(block))
1039 		mm->clear_avail += gpu_buddy_block_size(mm, block);
1040 
1041 	/* Prevent recursively freeing this node */
1042 	parent = block->parent;
1043 	block->parent = NULL;
1044 
1045 	list_add(&block->tmp_link, &dfs);
1046 	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
1047 	if (err) {
1048 		mark_allocated(mm, block);
1049 		mm->avail -= gpu_buddy_block_size(mm, block);
1050 		if (gpu_buddy_block_is_clear(block))
1051 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
1052 		list_add(&block->link, blocks);
1053 	}
1054 
1055 	block->parent = parent;
1056 	return err;
1057 }
1058 EXPORT_SYMBOL(gpu_buddy_block_trim);
1059 
1060 static struct gpu_buddy_block *
1061 __gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
1062 			 u64 start, u64 end,
1063 			 unsigned int order,
1064 			 unsigned long flags)
1065 {
1066 	if (flags & GPU_BUDDY_RANGE_ALLOCATION)
1067 		/* Allocate traversing within the range */
1068 		return  __gpu_buddy_alloc_range_bias(mm, start, end,
1069 						     order, flags);
1070 	else
1071 		/* Allocate from freetree */
1072 		return alloc_from_freetree(mm, order, flags);
1073 }
1074 
1075 /**
1076  * gpu_buddy_alloc_blocks - allocate power-of-two blocks
1077  *
1078  * @mm: GPU buddy manager to allocate from
1079  * @start: start of the allowed range for this block
1080  * @end: end of the allowed range for this block
1081  * @size: size of the allocation in bytes
1082  * @min_block_size: alignment of the allocation
1083  * @blocks: output list head to add allocated blocks
1084  * @flags: GPU_BUDDY_*_ALLOCATION flags
1085  *
1086  * alloc_range_bias() called on range limitations, which traverses
1087  * the tree and returns the desired block.
1088  *
1089  * alloc_from_freetree() called when *no* range restrictions
1090  * are enforced, which picks the block from the freetree.
1091  *
1092  * Returns:
1093  * 0 on success, error code on failure.
1094  */
1095 int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
1096 			   u64 start, u64 end, u64 size,
1097 			   u64 min_block_size,
1098 			   struct list_head *blocks,
1099 			   unsigned long flags)
1100 {
1101 	struct gpu_buddy_block *block = NULL;
1102 	u64 original_size, original_min_size;
1103 	unsigned int min_order, order;
1104 	LIST_HEAD(allocated);
1105 	unsigned long pages;
1106 	int err;
1107 
1108 	if (size < mm->chunk_size)
1109 		return -EINVAL;
1110 
1111 	if (min_block_size < mm->chunk_size)
1112 		return -EINVAL;
1113 
1114 	if (!is_power_of_2(min_block_size))
1115 		return -EINVAL;
1116 
1117 	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
1118 		return -EINVAL;
1119 
1120 	if (end > mm->size)
1121 		return -EINVAL;
1122 
1123 	if (range_overflows(start, size, mm->size))
1124 		return -EINVAL;
1125 
1126 	/* Actual range allocation */
1127 	if (start + size == end) {
1128 		if (!IS_ALIGNED(start | end, min_block_size))
1129 			return -EINVAL;
1130 
1131 		return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks);
1132 	}
1133 
1134 	original_size = size;
1135 	original_min_size = min_block_size;
1136 
1137 	/* Roundup the size to power of 2 */
1138 	if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) {
1139 		size = roundup_pow_of_two(size);
1140 		min_block_size = size;
1141 	/* Align size value to min_block_size */
1142 	} else if (!IS_ALIGNED(size, min_block_size)) {
1143 		size = round_up(size, min_block_size);
1144 	}
1145 
1146 	pages = size >> ilog2(mm->chunk_size);
1147 	order = fls(pages) - 1;
1148 	min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
1149 
1150 	if (order > mm->max_order || size > mm->size) {
1151 		if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) &&
1152 		    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
1153 			return __alloc_contig_try_harder(mm, original_size,
1154 							 original_min_size, blocks);
1155 
1156 		return -EINVAL;
1157 	}
1158 
1159 	do {
1160 		order = min(order, (unsigned int)fls(pages) - 1);
1161 		BUG_ON(order > mm->max_order);
1162 		BUG_ON(order < min_order);
1163 
1164 		do {
1165 			block = __gpu_buddy_alloc_blocks(mm, start,
1166 							 end,
1167 							 order,
1168 							 flags);
1169 			if (!IS_ERR(block))
1170 				break;
1171 
1172 			if (order-- == min_order) {
1173 				/* Try allocation through force merge method */
1174 				if (mm->clear_avail &&
1175 				    !__force_merge(mm, start, end, min_order)) {
1176 					block = __gpu_buddy_alloc_blocks(mm, start,
1177 									 end,
1178 									 min_order,
1179 									 flags);
1180 					if (!IS_ERR(block)) {
1181 						order = min_order;
1182 						break;
1183 					}
1184 				}
1185 
1186 				/*
1187 				 * Try contiguous block allocation through
1188 				 * try harder method.
1189 				 */
1190 				if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
1191 				    !(flags & GPU_BUDDY_RANGE_ALLOCATION))
1192 					return __alloc_contig_try_harder(mm,
1193 									 original_size,
1194 									 original_min_size,
1195 									 blocks);
1196 				err = -ENOSPC;
1197 				goto err_free;
1198 			}
1199 		} while (1);
1200 
1201 		mark_allocated(mm, block);
1202 		mm->avail -= gpu_buddy_block_size(mm, block);
1203 		if (gpu_buddy_block_is_clear(block))
1204 			mm->clear_avail -= gpu_buddy_block_size(mm, block);
1205 		kmemleak_update_trace(block);
1206 		list_add_tail(&block->link, &allocated);
1207 
1208 		pages -= BIT(order);
1209 
1210 		if (!pages)
1211 			break;
1212 	} while (1);
1213 
1214 	/* Trim the allocated block to the required size */
1215 	if (!(flags & GPU_BUDDY_TRIM_DISABLE) &&
1216 	    original_size != size) {
1217 		struct list_head *trim_list;
1218 		LIST_HEAD(temp);
1219 		u64 trim_size;
1220 
1221 		trim_list = &allocated;
1222 		trim_size = original_size;
1223 
1224 		if (!list_is_singular(&allocated)) {
1225 			block = list_last_entry(&allocated, typeof(*block), link);
1226 			list_move(&block->link, &temp);
1227 			trim_list = &temp;
1228 			trim_size = gpu_buddy_block_size(mm, block) -
1229 				(size - original_size);
1230 		}
1231 
1232 		gpu_buddy_block_trim(mm,
1233 				     NULL,
1234 				     trim_size,
1235 				     trim_list);
1236 
1237 		if (!list_empty(&temp))
1238 			list_splice_tail(trim_list, &allocated);
1239 	}
1240 
1241 	list_splice_tail(&allocated, blocks);
1242 	return 0;
1243 
1244 err_free:
1245 	gpu_buddy_free_list_internal(mm, &allocated);
1246 	return err;
1247 }
1248 EXPORT_SYMBOL(gpu_buddy_alloc_blocks);
1249 
1250 /**
1251  * gpu_buddy_block_print - print block information
1252  *
1253  * @mm: GPU buddy manager
1254  * @block: GPU buddy block
1255  */
1256 void gpu_buddy_block_print(struct gpu_buddy *mm,
1257 			   struct gpu_buddy_block *block)
1258 {
1259 	u64 start = gpu_buddy_block_offset(block);
1260 	u64 size = gpu_buddy_block_size(mm, block);
1261 
1262 	pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size);
1263 }
1264 EXPORT_SYMBOL(gpu_buddy_block_print);
1265 
1266 /**
1267  * gpu_buddy_print - print allocator state
1268  *
1269  * @mm: GPU buddy manager
1270  * @p: GPU printer to use
1271  */
1272 void gpu_buddy_print(struct gpu_buddy *mm)
1273 {
1274 	int order;
1275 
1276 	pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
1277 		mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
1278 
1279 	for (order = mm->max_order; order >= 0; order--) {
1280 		struct gpu_buddy_block *block, *tmp;
1281 		struct rb_root *root;
1282 		u64 count = 0, free;
1283 		unsigned int tree;
1284 
1285 		for_each_free_tree(tree) {
1286 			root = &mm->free_trees[tree][order];
1287 
1288 			rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
1289 				BUG_ON(!gpu_buddy_block_is_free(block));
1290 				count++;
1291 			}
1292 		}
1293 
1294 		free = count * (mm->chunk_size << order);
1295 		if (free < SZ_1M)
1296 			pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
1297 				order, free >> 10, count);
1298 		else
1299 			pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
1300 				order, free >> 20, count);
1301 	}
1302 }
1303 EXPORT_SYMBOL(gpu_buddy_print);
1304 
1305 static void gpu_buddy_module_exit(void)
1306 {
1307 	kmem_cache_destroy(slab_blocks);
1308 }
1309 
1310 static int __init gpu_buddy_module_init(void)
1311 {
1312 	slab_blocks = KMEM_CACHE(gpu_buddy_block, 0);
1313 	if (!slab_blocks)
1314 		return -ENOMEM;
1315 
1316 	return 0;
1317 }
1318 
1319 module_init(gpu_buddy_module_init);
1320 module_exit(gpu_buddy_module_exit);
1321 
1322 MODULE_DESCRIPTION("GPU Buddy Allocator");
1323 MODULE_LICENSE("Dual MIT/GPL");
1324