xref: /linux/drivers/gpu/drm/panthor/panthor_heap.c (revision 815e260a18a3af4dab59025ee99a7156c0e8b5e0)
1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3 
4 #include <linux/iosys-map.h>
5 #include <linux/rwsem.h>
6 
7 #include <drm/drm_print.h>
8 #include <drm/panthor_drm.h>
9 
10 #include "panthor_device.h"
11 #include "panthor_gem.h"
12 #include "panthor_heap.h"
13 #include "panthor_mmu.h"
14 #include "panthor_regs.h"
15 
16 /*
17  * The GPU heap context is an opaque structure used by the GPU to track the
18  * heap allocations. The driver should only touch it to initialize it (zero all
19  * fields). Because the CPU and GPU can both access this structure it is
20  * required to be GPU cache line aligned.
21  */
22 #define HEAP_CONTEXT_SIZE	32
23 
24 /**
25  * struct panthor_heap_chunk_header - Heap chunk header
26  */
27 struct panthor_heap_chunk_header {
28 	/**
29 	 * @next: Next heap chunk in the list.
30 	 *
31 	 * This is a GPU VA.
32 	 */
33 	u64 next;
34 
35 	/** @unknown: MBZ. */
36 	u32 unknown[14];
37 };
38 
39 /**
40  * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
41  */
42 struct panthor_heap_chunk {
43 	/** @node: Used to insert the heap chunk in panthor_heap::chunks. */
44 	struct list_head node;
45 
46 	/** @bo: Buffer object backing the heap chunk. */
47 	struct panthor_kernel_bo *bo;
48 };
49 
50 /**
51  * struct panthor_heap - Structure used to manage tiler heap contexts.
52  */
53 struct panthor_heap {
54 	/** @chunks: List containing all heap chunks allocated so far. */
55 	struct list_head chunks;
56 
57 	/** @lock: Lock protecting insertion in the chunks list. */
58 	struct mutex lock;
59 
60 	/** @chunk_size: Size of each chunk. */
61 	u32 chunk_size;
62 
63 	/** @max_chunks: Maximum number of chunks. */
64 	u32 max_chunks;
65 
66 	/**
67 	 * @target_in_flight: Number of in-flight render passes after which
68 	 * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
69 	 */
70 	u32 target_in_flight;
71 
72 	/** @chunk_count: Number of heap chunks currently allocated. */
73 	u32 chunk_count;
74 };
75 
76 #define MAX_HEAPS_PER_POOL    128
77 
78 /**
79  * struct panthor_heap_pool - Pool of heap contexts
80  *
81  * The pool is attached to a panthor_file and can't be shared across processes.
82  */
83 struct panthor_heap_pool {
84 	/** @refcount: Reference count. */
85 	struct kref refcount;
86 
87 	/** @ptdev: Device. */
88 	struct panthor_device *ptdev;
89 
90 	/** @vm: VM this pool is bound to. */
91 	struct panthor_vm *vm;
92 
93 	/** @lock: Lock protecting access to @xa. */
94 	struct rw_semaphore lock;
95 
96 	/** @xa: Array storing panthor_heap objects. */
97 	struct xarray xa;
98 
99 	/** @gpu_contexts: Buffer object containing the GPU heap contexts. */
100 	struct panthor_kernel_bo *gpu_contexts;
101 
102 	/** @size: Size of all chunks across all heaps in the pool. */
103 	atomic_t size;
104 };
105 
106 static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
107 {
108 	u32 l2_features = ptdev->gpu_info.l2_features;
109 	u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
110 
111 	return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
112 }
113 
114 static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
115 {
116 	return panthor_heap_ctx_stride(pool->ptdev) * id;
117 }
118 
119 static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
120 {
121 	return pool->gpu_contexts->kmap +
122 	       panthor_get_heap_ctx_offset(pool, id);
123 }
124 
125 static void panthor_free_heap_chunk(struct panthor_heap_pool *pool,
126 				    struct panthor_heap *heap,
127 				    struct panthor_heap_chunk *chunk)
128 {
129 	mutex_lock(&heap->lock);
130 	list_del(&chunk->node);
131 	heap->chunk_count--;
132 	mutex_unlock(&heap->lock);
133 
134 	atomic_sub(heap->chunk_size, &pool->size);
135 
136 	panthor_kernel_bo_destroy(chunk->bo);
137 	kfree(chunk);
138 }
139 
140 static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool,
141 				    struct panthor_heap *heap,
142 				    bool initial_chunk)
143 {
144 	struct panthor_heap_chunk *chunk;
145 	struct panthor_heap_chunk_header *hdr;
146 	int ret;
147 
148 	chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
149 	if (!chunk)
150 		return -ENOMEM;
151 
152 	chunk->bo = panthor_kernel_bo_create(pool->ptdev, pool->vm, heap->chunk_size,
153 					     DRM_PANTHOR_BO_NO_MMAP,
154 					     DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
155 					     PANTHOR_VM_KERNEL_AUTO_VA,
156 					     "Tiler heap chunk");
157 	if (IS_ERR(chunk->bo)) {
158 		ret = PTR_ERR(chunk->bo);
159 		goto err_free_chunk;
160 	}
161 
162 	ret = panthor_kernel_bo_vmap(chunk->bo);
163 	if (ret)
164 		goto err_destroy_bo;
165 
166 	hdr = chunk->bo->kmap;
167 	memset(hdr, 0, sizeof(*hdr));
168 
169 	if (initial_chunk && !list_empty(&heap->chunks)) {
170 		struct panthor_heap_chunk *prev_chunk;
171 		u64 prev_gpuva;
172 
173 		prev_chunk = list_first_entry(&heap->chunks,
174 					      struct panthor_heap_chunk,
175 					      node);
176 
177 		prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
178 		hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
179 			    (heap->chunk_size >> 12);
180 	}
181 
182 	panthor_kernel_bo_vunmap(chunk->bo);
183 
184 	mutex_lock(&heap->lock);
185 	list_add(&chunk->node, &heap->chunks);
186 	heap->chunk_count++;
187 	mutex_unlock(&heap->lock);
188 
189 	atomic_add(heap->chunk_size, &pool->size);
190 
191 	return 0;
192 
193 err_destroy_bo:
194 	panthor_kernel_bo_destroy(chunk->bo);
195 
196 err_free_chunk:
197 	kfree(chunk);
198 
199 	return ret;
200 }
201 
202 static void panthor_free_heap_chunks(struct panthor_heap_pool *pool,
203 				     struct panthor_heap *heap)
204 {
205 	struct panthor_heap_chunk *chunk, *tmp;
206 
207 	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
208 		panthor_free_heap_chunk(pool, heap, chunk);
209 }
210 
211 static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool,
212 				     struct panthor_heap *heap,
213 				     u32 chunk_count)
214 {
215 	int ret;
216 	u32 i;
217 
218 	for (i = 0; i < chunk_count; i++) {
219 		ret = panthor_alloc_heap_chunk(pool, heap, true);
220 		if (ret)
221 			return ret;
222 	}
223 
224 	return 0;
225 }
226 
227 static int
228 panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
229 {
230 	struct panthor_heap *heap;
231 
232 	heap = xa_erase(&pool->xa, handle);
233 	if (!heap)
234 		return -EINVAL;
235 
236 	panthor_free_heap_chunks(pool, heap);
237 	mutex_destroy(&heap->lock);
238 	kfree(heap);
239 	return 0;
240 }
241 
242 /**
243  * panthor_heap_destroy() - Destroy a heap context
244  * @pool: Pool this context belongs to.
245  * @handle: Handle returned by panthor_heap_create().
246  */
247 int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
248 {
249 	int ret;
250 
251 	down_write(&pool->lock);
252 	ret = panthor_heap_destroy_locked(pool, handle);
253 	up_write(&pool->lock);
254 
255 	return ret;
256 }
257 
258 /**
259  * panthor_heap_create() - Create a heap context
260  * @pool: Pool to instantiate the heap context from.
261  * @initial_chunk_count: Number of chunk allocated at initialization time.
262  * Must be at least 1.
263  * @chunk_size: The size of each chunk. Must be page-aligned and lie in the
264  * [128k:8M] range.
265  * @max_chunks: Maximum number of chunks that can be allocated.
266  * @target_in_flight: Maximum number of in-flight render passes.
267  * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
268  * context.
269  * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
270  * assigned to the heap context.
271  *
272  * Return: a positive handle on success, a negative error otherwise.
273  */
274 int panthor_heap_create(struct panthor_heap_pool *pool,
275 			u32 initial_chunk_count,
276 			u32 chunk_size,
277 			u32 max_chunks,
278 			u32 target_in_flight,
279 			u64 *heap_ctx_gpu_va,
280 			u64 *first_chunk_gpu_va)
281 {
282 	struct panthor_heap *heap;
283 	struct panthor_heap_chunk *first_chunk;
284 	struct panthor_vm *vm;
285 	int ret = 0;
286 	u32 id;
287 
288 	if (initial_chunk_count == 0)
289 		return -EINVAL;
290 
291 	if (initial_chunk_count > max_chunks)
292 		return -EINVAL;
293 
294 	if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
295 	    chunk_size < SZ_128K || chunk_size > SZ_8M)
296 		return -EINVAL;
297 
298 	down_read(&pool->lock);
299 	vm = panthor_vm_get(pool->vm);
300 	up_read(&pool->lock);
301 
302 	/* The pool has been destroyed, we can't create a new heap. */
303 	if (!vm)
304 		return -EINVAL;
305 
306 	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
307 	if (!heap) {
308 		ret = -ENOMEM;
309 		goto err_put_vm;
310 	}
311 
312 	mutex_init(&heap->lock);
313 	INIT_LIST_HEAD(&heap->chunks);
314 	heap->chunk_size = chunk_size;
315 	heap->max_chunks = max_chunks;
316 	heap->target_in_flight = target_in_flight;
317 
318 	ret = panthor_alloc_heap_chunks(pool, heap, initial_chunk_count);
319 	if (ret)
320 		goto err_free_heap;
321 
322 	first_chunk = list_first_entry(&heap->chunks,
323 				       struct panthor_heap_chunk,
324 				       node);
325 	*first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
326 
327 	down_write(&pool->lock);
328 	/* The pool has been destroyed, we can't create a new heap. */
329 	if (!pool->vm) {
330 		ret = -EINVAL;
331 	} else {
332 		ret = xa_alloc(&pool->xa, &id, heap,
333 			       XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
334 		if (!ret) {
335 			void *gpu_ctx = panthor_get_heap_ctx(pool, id);
336 
337 			memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
338 			*heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
339 					   panthor_get_heap_ctx_offset(pool, id);
340 		}
341 	}
342 	up_write(&pool->lock);
343 
344 	if (ret)
345 		goto err_free_heap;
346 
347 	panthor_vm_put(vm);
348 	return id;
349 
350 err_free_heap:
351 	panthor_free_heap_chunks(pool, heap);
352 	mutex_destroy(&heap->lock);
353 	kfree(heap);
354 
355 err_put_vm:
356 	panthor_vm_put(vm);
357 	return ret;
358 }
359 
360 /**
361  * panthor_heap_return_chunk() - Return an unused heap chunk
362  * @pool: The pool this heap belongs to.
363  * @heap_gpu_va: The GPU address of the heap context.
364  * @chunk_gpu_va: The chunk VA to return.
365  *
366  * This function is used when a chunk allocated with panthor_heap_grow()
367  * couldn't be linked to the heap context through the FW interface because
368  * the group requesting the allocation was scheduled out in the meantime.
369  */
370 int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
371 			      u64 heap_gpu_va,
372 			      u64 chunk_gpu_va)
373 {
374 	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
375 	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
376 	struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
377 	struct panthor_heap *heap;
378 	int ret;
379 
380 	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
381 		return -EINVAL;
382 
383 	down_read(&pool->lock);
384 	heap = xa_load(&pool->xa, heap_id);
385 	if (!heap) {
386 		ret = -EINVAL;
387 		goto out_unlock;
388 	}
389 
390 	chunk_gpu_va &= GENMASK_ULL(63, 12);
391 
392 	mutex_lock(&heap->lock);
393 	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
394 		if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
395 			removed = chunk;
396 			list_del(&chunk->node);
397 			heap->chunk_count--;
398 			atomic_sub(heap->chunk_size, &pool->size);
399 			break;
400 		}
401 	}
402 	mutex_unlock(&heap->lock);
403 
404 	if (removed) {
405 		panthor_kernel_bo_destroy(chunk->bo);
406 		kfree(chunk);
407 		ret = 0;
408 	} else {
409 		ret = -EINVAL;
410 	}
411 
412 out_unlock:
413 	up_read(&pool->lock);
414 	return ret;
415 }
416 
417 /**
418  * panthor_heap_grow() - Make a heap context grow.
419  * @pool: The pool this heap belongs to.
420  * @heap_gpu_va: The GPU address of the heap context.
421  * @renderpasses_in_flight: Number of render passes currently in-flight.
422  * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
423  * @new_chunk_gpu_va: Pointer used to return the chunk VA.
424  *
425  * Return:
426  * - 0 if a new heap was allocated
427  * - -ENOMEM if the tiler context reached the maximum number of chunks
428  *   or if too many render passes are in-flight
429  *   or if the allocation failed
430  * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
431  */
432 int panthor_heap_grow(struct panthor_heap_pool *pool,
433 		      u64 heap_gpu_va,
434 		      u32 renderpasses_in_flight,
435 		      u32 pending_frag_count,
436 		      u64 *new_chunk_gpu_va)
437 {
438 	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
439 	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
440 	struct panthor_heap_chunk *chunk;
441 	struct panthor_heap *heap;
442 	int ret;
443 
444 	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
445 		return -EINVAL;
446 
447 	down_read(&pool->lock);
448 	heap = xa_load(&pool->xa, heap_id);
449 	if (!heap) {
450 		ret = -EINVAL;
451 		goto out_unlock;
452 	}
453 
454 	/* If we reached the target in-flight render passes, or if we
455 	 * reached the maximum number of chunks, let the FW figure another way to
456 	 * find some memory (wait for render passes to finish, or call the exception
457 	 * handler provided by the userspace driver, if any).
458 	 */
459 	if (renderpasses_in_flight > heap->target_in_flight ||
460 	    heap->chunk_count >= heap->max_chunks) {
461 		ret = -ENOMEM;
462 		goto out_unlock;
463 	}
464 
465 	/* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
466 	 * which goes through the blocking allocation path. Ultimately, we
467 	 * want a non-blocking allocation, so we can immediately report to the
468 	 * FW when the system is running out of memory. In that case, the FW
469 	 * can call a user-provided exception handler, which might try to free
470 	 * some tiler memory by issuing an intermediate fragment job. If the
471 	 * exception handler can't do anything, it will flag the queue as
472 	 * faulty so the job that triggered this tiler chunk allocation and all
473 	 * further jobs in this queue fail immediately instead of having to
474 	 * wait for the job timeout.
475 	 */
476 	ret = panthor_alloc_heap_chunk(pool, heap, false);
477 	if (ret)
478 		goto out_unlock;
479 
480 	chunk = list_first_entry(&heap->chunks,
481 				 struct panthor_heap_chunk,
482 				 node);
483 	*new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
484 			    (heap->chunk_size >> 12);
485 	ret = 0;
486 
487 out_unlock:
488 	up_read(&pool->lock);
489 	return ret;
490 }
491 
492 static void panthor_heap_pool_release(struct kref *refcount)
493 {
494 	struct panthor_heap_pool *pool =
495 		container_of(refcount, struct panthor_heap_pool, refcount);
496 
497 	xa_destroy(&pool->xa);
498 	kfree(pool);
499 }
500 
501 /**
502  * panthor_heap_pool_put() - Release a heap pool reference
503  * @pool: Pool to release the reference on. Can be NULL.
504  */
505 void panthor_heap_pool_put(struct panthor_heap_pool *pool)
506 {
507 	if (pool)
508 		kref_put(&pool->refcount, panthor_heap_pool_release);
509 }
510 
511 /**
512  * panthor_heap_pool_get() - Get a heap pool reference
513  * @pool: Pool to get the reference on. Can be NULL.
514  *
515  * Return: @pool.
516  */
517 struct panthor_heap_pool *
518 panthor_heap_pool_get(struct panthor_heap_pool *pool)
519 {
520 	if (pool)
521 		kref_get(&pool->refcount);
522 
523 	return pool;
524 }
525 
526 /**
527  * panthor_heap_pool_create() - Create a heap pool
528  * @ptdev: Device.
529  * @vm: The VM this heap pool will be attached to.
530  *
531  * Heap pools might contain up to 128 heap contexts, and are per-VM.
532  *
533  * Return: A valid pointer on success, a negative error code otherwise.
534  */
535 struct panthor_heap_pool *
536 panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
537 {
538 	size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
539 			      panthor_heap_ctx_stride(ptdev),
540 			      4096);
541 	struct panthor_heap_pool *pool;
542 	int ret = 0;
543 
544 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
545 	if (!pool)
546 		return ERR_PTR(-ENOMEM);
547 
548 	/* We want a weak ref here: the heap pool belongs to the VM, so we're
549 	 * sure that, as long as the heap pool exists, the VM exists too.
550 	 */
551 	pool->vm = vm;
552 	pool->ptdev = ptdev;
553 	init_rwsem(&pool->lock);
554 	xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
555 	kref_init(&pool->refcount);
556 
557 	pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
558 						      DRM_PANTHOR_BO_NO_MMAP,
559 						      DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
560 						      PANTHOR_VM_KERNEL_AUTO_VA,
561 						      "Heap pool");
562 	if (IS_ERR(pool->gpu_contexts)) {
563 		ret = PTR_ERR(pool->gpu_contexts);
564 		goto err_destroy_pool;
565 	}
566 
567 	ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
568 	if (ret)
569 		goto err_destroy_pool;
570 
571 	atomic_add(pool->gpu_contexts->obj->size, &pool->size);
572 
573 	return pool;
574 
575 err_destroy_pool:
576 	panthor_heap_pool_destroy(pool);
577 	return ERR_PTR(ret);
578 }
579 
580 /**
581  * panthor_heap_pool_destroy() - Destroy a heap pool.
582  * @pool: Pool to destroy.
583  *
584  * This function destroys all heap contexts and their resources. Thus
585  * preventing any use of the heap context or the chunk attached to them
586  * after that point.
587  *
588  * If the GPU still has access to some heap contexts, a fault should be
589  * triggered, which should flag the command stream groups using these
590  * context as faulty.
591  *
592  * The heap pool object is only released when all references to this pool
593  * are released.
594  */
595 void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
596 {
597 	struct panthor_heap *heap;
598 	unsigned long i;
599 
600 	if (!pool)
601 		return;
602 
603 	down_write(&pool->lock);
604 	xa_for_each(&pool->xa, i, heap)
605 		drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
606 
607 	if (!IS_ERR_OR_NULL(pool->gpu_contexts)) {
608 		atomic_sub(pool->gpu_contexts->obj->size, &pool->size);
609 		panthor_kernel_bo_destroy(pool->gpu_contexts);
610 	}
611 
612 	/* Reflects the fact the pool has been destroyed. */
613 	pool->vm = NULL;
614 	up_write(&pool->lock);
615 
616 	panthor_heap_pool_put(pool);
617 }
618 
619 /**
620  * panthor_heap_pool_size() - Get a heap pool's total size
621  * @pool: Pool whose total chunks size to return
622  *
623  * Returns the aggregated size of all chunks for all heaps in the pool
624  *
625  */
626 size_t panthor_heap_pool_size(struct panthor_heap_pool *pool)
627 {
628 	if (!pool)
629 		return 0;
630 
631 	return atomic_read(&pool->size);
632 }
633