xref: /linux/drivers/gpu/drm/panthor/panthor_heap.c (revision c2aa3089ad7e7fec3ec4a58d8d0904b5e9b392a1)
1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3 
4 #include <linux/iosys-map.h>
5 #include <linux/rwsem.h>
6 
7 #include <drm/panthor_drm.h>
8 
9 #include "panthor_device.h"
10 #include "panthor_gem.h"
11 #include "panthor_heap.h"
12 #include "panthor_mmu.h"
13 #include "panthor_regs.h"
14 
15 /*
16  * The GPU heap context is an opaque structure used by the GPU to track the
17  * heap allocations. The driver should only touch it to initialize it (zero all
18  * fields). Because the CPU and GPU can both access this structure it is
19  * required to be GPU cache line aligned.
20  */
21 #define HEAP_CONTEXT_SIZE	32
22 
23 /**
24  * struct panthor_heap_chunk_header - Heap chunk header
25  */
26 struct panthor_heap_chunk_header {
27 	/**
28 	 * @next: Next heap chunk in the list.
29 	 *
30 	 * This is a GPU VA.
31 	 */
32 	u64 next;
33 
34 	/** @unknown: MBZ. */
35 	u32 unknown[14];
36 };
37 
38 /**
39  * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
40  */
41 struct panthor_heap_chunk {
42 	/** @node: Used to insert the heap chunk in panthor_heap::chunks. */
43 	struct list_head node;
44 
45 	/** @bo: Buffer object backing the heap chunk. */
46 	struct panthor_kernel_bo *bo;
47 };
48 
49 /**
50  * struct panthor_heap - Structure used to manage tiler heap contexts.
51  */
52 struct panthor_heap {
53 	/** @chunks: List containing all heap chunks allocated so far. */
54 	struct list_head chunks;
55 
56 	/** @lock: Lock protecting insertion in the chunks list. */
57 	struct mutex lock;
58 
59 	/** @chunk_size: Size of each chunk. */
60 	u32 chunk_size;
61 
62 	/** @max_chunks: Maximum number of chunks. */
63 	u32 max_chunks;
64 
65 	/**
66 	 * @target_in_flight: Number of in-flight render passes after which
67 	 * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
68 	 */
69 	u32 target_in_flight;
70 
71 	/** @chunk_count: Number of heap chunks currently allocated. */
72 	u32 chunk_count;
73 };
74 
75 #define MAX_HEAPS_PER_POOL    128
76 
77 /**
78  * struct panthor_heap_pool - Pool of heap contexts
79  *
80  * The pool is attached to a panthor_file and can't be shared across processes.
81  */
82 struct panthor_heap_pool {
83 	/** @refcount: Reference count. */
84 	struct kref refcount;
85 
86 	/** @ptdev: Device. */
87 	struct panthor_device *ptdev;
88 
89 	/** @vm: VM this pool is bound to. */
90 	struct panthor_vm *vm;
91 
92 	/** @lock: Lock protecting access to @xa. */
93 	struct rw_semaphore lock;
94 
95 	/** @xa: Array storing panthor_heap objects. */
96 	struct xarray xa;
97 
98 	/** @gpu_contexts: Buffer object containing the GPU heap contexts. */
99 	struct panthor_kernel_bo *gpu_contexts;
100 
101 	/** @size: Size of all chunks across all heaps in the pool. */
102 	atomic_t size;
103 };
104 
105 static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
106 {
107 	u32 l2_features = ptdev->gpu_info.l2_features;
108 	u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
109 
110 	return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
111 }
112 
113 static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
114 {
115 	return panthor_heap_ctx_stride(pool->ptdev) * id;
116 }
117 
118 static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
119 {
120 	return pool->gpu_contexts->kmap +
121 	       panthor_get_heap_ctx_offset(pool, id);
122 }
123 
124 static void panthor_free_heap_chunk(struct panthor_heap_pool *pool,
125 				    struct panthor_heap *heap,
126 				    struct panthor_heap_chunk *chunk)
127 {
128 	mutex_lock(&heap->lock);
129 	list_del(&chunk->node);
130 	heap->chunk_count--;
131 	mutex_unlock(&heap->lock);
132 
133 	atomic_sub(heap->chunk_size, &pool->size);
134 
135 	panthor_kernel_bo_destroy(chunk->bo);
136 	kfree(chunk);
137 }
138 
139 static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool,
140 				    struct panthor_heap *heap,
141 				    bool initial_chunk)
142 {
143 	struct panthor_heap_chunk *chunk;
144 	struct panthor_heap_chunk_header *hdr;
145 	int ret;
146 
147 	chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
148 	if (!chunk)
149 		return -ENOMEM;
150 
151 	chunk->bo = panthor_kernel_bo_create(pool->ptdev, pool->vm, heap->chunk_size,
152 					     DRM_PANTHOR_BO_NO_MMAP,
153 					     DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
154 					     PANTHOR_VM_KERNEL_AUTO_VA,
155 					     "Tiler heap chunk");
156 	if (IS_ERR(chunk->bo)) {
157 		ret = PTR_ERR(chunk->bo);
158 		goto err_free_chunk;
159 	}
160 
161 	ret = panthor_kernel_bo_vmap(chunk->bo);
162 	if (ret)
163 		goto err_destroy_bo;
164 
165 	hdr = chunk->bo->kmap;
166 	memset(hdr, 0, sizeof(*hdr));
167 
168 	if (initial_chunk && !list_empty(&heap->chunks)) {
169 		struct panthor_heap_chunk *prev_chunk;
170 		u64 prev_gpuva;
171 
172 		prev_chunk = list_first_entry(&heap->chunks,
173 					      struct panthor_heap_chunk,
174 					      node);
175 
176 		prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
177 		hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
178 			    (heap->chunk_size >> 12);
179 	}
180 
181 	panthor_kernel_bo_vunmap(chunk->bo);
182 
183 	mutex_lock(&heap->lock);
184 	list_add(&chunk->node, &heap->chunks);
185 	heap->chunk_count++;
186 	mutex_unlock(&heap->lock);
187 
188 	atomic_add(heap->chunk_size, &pool->size);
189 
190 	return 0;
191 
192 err_destroy_bo:
193 	panthor_kernel_bo_destroy(chunk->bo);
194 
195 err_free_chunk:
196 	kfree(chunk);
197 
198 	return ret;
199 }
200 
201 static void panthor_free_heap_chunks(struct panthor_heap_pool *pool,
202 				     struct panthor_heap *heap)
203 {
204 	struct panthor_heap_chunk *chunk, *tmp;
205 
206 	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
207 		panthor_free_heap_chunk(pool, heap, chunk);
208 }
209 
210 static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool,
211 				     struct panthor_heap *heap,
212 				     u32 chunk_count)
213 {
214 	int ret;
215 	u32 i;
216 
217 	for (i = 0; i < chunk_count; i++) {
218 		ret = panthor_alloc_heap_chunk(pool, heap, true);
219 		if (ret)
220 			return ret;
221 	}
222 
223 	return 0;
224 }
225 
226 static int
227 panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
228 {
229 	struct panthor_heap *heap;
230 
231 	heap = xa_erase(&pool->xa, handle);
232 	if (!heap)
233 		return -EINVAL;
234 
235 	panthor_free_heap_chunks(pool, heap);
236 	mutex_destroy(&heap->lock);
237 	kfree(heap);
238 	return 0;
239 }
240 
241 /**
242  * panthor_heap_destroy() - Destroy a heap context
243  * @pool: Pool this context belongs to.
244  * @handle: Handle returned by panthor_heap_create().
245  */
246 int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
247 {
248 	int ret;
249 
250 	down_write(&pool->lock);
251 	ret = panthor_heap_destroy_locked(pool, handle);
252 	up_write(&pool->lock);
253 
254 	return ret;
255 }
256 
257 /**
258  * panthor_heap_create() - Create a heap context
259  * @pool: Pool to instantiate the heap context from.
260  * @initial_chunk_count: Number of chunk allocated at initialization time.
261  * Must be at least 1.
262  * @chunk_size: The size of each chunk. Must be page-aligned and lie in the
263  * [128k:8M] range.
264  * @max_chunks: Maximum number of chunks that can be allocated.
265  * @target_in_flight: Maximum number of in-flight render passes.
266  * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
267  * context.
268  * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
269  * assigned to the heap context.
270  *
271  * Return: a positive handle on success, a negative error otherwise.
272  */
273 int panthor_heap_create(struct panthor_heap_pool *pool,
274 			u32 initial_chunk_count,
275 			u32 chunk_size,
276 			u32 max_chunks,
277 			u32 target_in_flight,
278 			u64 *heap_ctx_gpu_va,
279 			u64 *first_chunk_gpu_va)
280 {
281 	struct panthor_heap *heap;
282 	struct panthor_heap_chunk *first_chunk;
283 	struct panthor_vm *vm;
284 	int ret = 0;
285 	u32 id;
286 
287 	if (initial_chunk_count == 0)
288 		return -EINVAL;
289 
290 	if (initial_chunk_count > max_chunks)
291 		return -EINVAL;
292 
293 	if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
294 	    chunk_size < SZ_128K || chunk_size > SZ_8M)
295 		return -EINVAL;
296 
297 	down_read(&pool->lock);
298 	vm = panthor_vm_get(pool->vm);
299 	up_read(&pool->lock);
300 
301 	/* The pool has been destroyed, we can't create a new heap. */
302 	if (!vm)
303 		return -EINVAL;
304 
305 	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
306 	if (!heap) {
307 		ret = -ENOMEM;
308 		goto err_put_vm;
309 	}
310 
311 	mutex_init(&heap->lock);
312 	INIT_LIST_HEAD(&heap->chunks);
313 	heap->chunk_size = chunk_size;
314 	heap->max_chunks = max_chunks;
315 	heap->target_in_flight = target_in_flight;
316 
317 	ret = panthor_alloc_heap_chunks(pool, heap, initial_chunk_count);
318 	if (ret)
319 		goto err_free_heap;
320 
321 	first_chunk = list_first_entry(&heap->chunks,
322 				       struct panthor_heap_chunk,
323 				       node);
324 	*first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
325 
326 	down_write(&pool->lock);
327 	/* The pool has been destroyed, we can't create a new heap. */
328 	if (!pool->vm) {
329 		ret = -EINVAL;
330 	} else {
331 		ret = xa_alloc(&pool->xa, &id, heap,
332 			       XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
333 		if (!ret) {
334 			void *gpu_ctx = panthor_get_heap_ctx(pool, id);
335 
336 			memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
337 			*heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
338 					   panthor_get_heap_ctx_offset(pool, id);
339 		}
340 	}
341 	up_write(&pool->lock);
342 
343 	if (ret)
344 		goto err_free_heap;
345 
346 	panthor_vm_put(vm);
347 	return id;
348 
349 err_free_heap:
350 	panthor_free_heap_chunks(pool, heap);
351 	mutex_destroy(&heap->lock);
352 	kfree(heap);
353 
354 err_put_vm:
355 	panthor_vm_put(vm);
356 	return ret;
357 }
358 
359 /**
360  * panthor_heap_return_chunk() - Return an unused heap chunk
361  * @pool: The pool this heap belongs to.
362  * @heap_gpu_va: The GPU address of the heap context.
363  * @chunk_gpu_va: The chunk VA to return.
364  *
365  * This function is used when a chunk allocated with panthor_heap_grow()
366  * couldn't be linked to the heap context through the FW interface because
367  * the group requesting the allocation was scheduled out in the meantime.
368  */
369 int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
370 			      u64 heap_gpu_va,
371 			      u64 chunk_gpu_va)
372 {
373 	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
374 	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
375 	struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
376 	struct panthor_heap *heap;
377 	int ret;
378 
379 	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
380 		return -EINVAL;
381 
382 	down_read(&pool->lock);
383 	heap = xa_load(&pool->xa, heap_id);
384 	if (!heap) {
385 		ret = -EINVAL;
386 		goto out_unlock;
387 	}
388 
389 	chunk_gpu_va &= GENMASK_ULL(63, 12);
390 
391 	mutex_lock(&heap->lock);
392 	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
393 		if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
394 			removed = chunk;
395 			list_del(&chunk->node);
396 			heap->chunk_count--;
397 			atomic_sub(heap->chunk_size, &pool->size);
398 			break;
399 		}
400 	}
401 	mutex_unlock(&heap->lock);
402 
403 	if (removed) {
404 		panthor_kernel_bo_destroy(chunk->bo);
405 		kfree(chunk);
406 		ret = 0;
407 	} else {
408 		ret = -EINVAL;
409 	}
410 
411 out_unlock:
412 	up_read(&pool->lock);
413 	return ret;
414 }
415 
416 /**
417  * panthor_heap_grow() - Make a heap context grow.
418  * @pool: The pool this heap belongs to.
419  * @heap_gpu_va: The GPU address of the heap context.
420  * @renderpasses_in_flight: Number of render passes currently in-flight.
421  * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
422  * @new_chunk_gpu_va: Pointer used to return the chunk VA.
423  *
424  * Return:
425  * - 0 if a new heap was allocated
426  * - -ENOMEM if the tiler context reached the maximum number of chunks
427  *   or if too many render passes are in-flight
428  *   or if the allocation failed
429  * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
430  */
431 int panthor_heap_grow(struct panthor_heap_pool *pool,
432 		      u64 heap_gpu_va,
433 		      u32 renderpasses_in_flight,
434 		      u32 pending_frag_count,
435 		      u64 *new_chunk_gpu_va)
436 {
437 	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
438 	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
439 	struct panthor_heap_chunk *chunk;
440 	struct panthor_heap *heap;
441 	int ret;
442 
443 	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
444 		return -EINVAL;
445 
446 	down_read(&pool->lock);
447 	heap = xa_load(&pool->xa, heap_id);
448 	if (!heap) {
449 		ret = -EINVAL;
450 		goto out_unlock;
451 	}
452 
453 	/* If we reached the target in-flight render passes, or if we
454 	 * reached the maximum number of chunks, let the FW figure another way to
455 	 * find some memory (wait for render passes to finish, or call the exception
456 	 * handler provided by the userspace driver, if any).
457 	 */
458 	if (renderpasses_in_flight > heap->target_in_flight ||
459 	    heap->chunk_count >= heap->max_chunks) {
460 		ret = -ENOMEM;
461 		goto out_unlock;
462 	}
463 
464 	/* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
465 	 * which goes through the blocking allocation path. Ultimately, we
466 	 * want a non-blocking allocation, so we can immediately report to the
467 	 * FW when the system is running out of memory. In that case, the FW
468 	 * can call a user-provided exception handler, which might try to free
469 	 * some tiler memory by issuing an intermediate fragment job. If the
470 	 * exception handler can't do anything, it will flag the queue as
471 	 * faulty so the job that triggered this tiler chunk allocation and all
472 	 * further jobs in this queue fail immediately instead of having to
473 	 * wait for the job timeout.
474 	 */
475 	ret = panthor_alloc_heap_chunk(pool, heap, false);
476 	if (ret)
477 		goto out_unlock;
478 
479 	chunk = list_first_entry(&heap->chunks,
480 				 struct panthor_heap_chunk,
481 				 node);
482 	*new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
483 			    (heap->chunk_size >> 12);
484 	ret = 0;
485 
486 out_unlock:
487 	up_read(&pool->lock);
488 	return ret;
489 }
490 
491 static void panthor_heap_pool_release(struct kref *refcount)
492 {
493 	struct panthor_heap_pool *pool =
494 		container_of(refcount, struct panthor_heap_pool, refcount);
495 
496 	xa_destroy(&pool->xa);
497 	kfree(pool);
498 }
499 
500 /**
501  * panthor_heap_pool_put() - Release a heap pool reference
502  * @pool: Pool to release the reference on. Can be NULL.
503  */
504 void panthor_heap_pool_put(struct panthor_heap_pool *pool)
505 {
506 	if (pool)
507 		kref_put(&pool->refcount, panthor_heap_pool_release);
508 }
509 
510 /**
511  * panthor_heap_pool_get() - Get a heap pool reference
512  * @pool: Pool to get the reference on. Can be NULL.
513  *
514  * Return: @pool.
515  */
516 struct panthor_heap_pool *
517 panthor_heap_pool_get(struct panthor_heap_pool *pool)
518 {
519 	if (pool)
520 		kref_get(&pool->refcount);
521 
522 	return pool;
523 }
524 
525 /**
526  * panthor_heap_pool_create() - Create a heap pool
527  * @ptdev: Device.
528  * @vm: The VM this heap pool will be attached to.
529  *
530  * Heap pools might contain up to 128 heap contexts, and are per-VM.
531  *
532  * Return: A valid pointer on success, a negative error code otherwise.
533  */
534 struct panthor_heap_pool *
535 panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
536 {
537 	size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
538 			      panthor_heap_ctx_stride(ptdev),
539 			      4096);
540 	struct panthor_heap_pool *pool;
541 	int ret = 0;
542 
543 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
544 	if (!pool)
545 		return ERR_PTR(-ENOMEM);
546 
547 	/* We want a weak ref here: the heap pool belongs to the VM, so we're
548 	 * sure that, as long as the heap pool exists, the VM exists too.
549 	 */
550 	pool->vm = vm;
551 	pool->ptdev = ptdev;
552 	init_rwsem(&pool->lock);
553 	xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
554 	kref_init(&pool->refcount);
555 
556 	pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
557 						      DRM_PANTHOR_BO_NO_MMAP,
558 						      DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
559 						      PANTHOR_VM_KERNEL_AUTO_VA,
560 						      "Heap pool");
561 	if (IS_ERR(pool->gpu_contexts)) {
562 		ret = PTR_ERR(pool->gpu_contexts);
563 		goto err_destroy_pool;
564 	}
565 
566 	ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
567 	if (ret)
568 		goto err_destroy_pool;
569 
570 	atomic_add(pool->gpu_contexts->obj->size, &pool->size);
571 
572 	return pool;
573 
574 err_destroy_pool:
575 	panthor_heap_pool_destroy(pool);
576 	return ERR_PTR(ret);
577 }
578 
579 /**
580  * panthor_heap_pool_destroy() - Destroy a heap pool.
581  * @pool: Pool to destroy.
582  *
583  * This function destroys all heap contexts and their resources. Thus
584  * preventing any use of the heap context or the chunk attached to them
585  * after that point.
586  *
587  * If the GPU still has access to some heap contexts, a fault should be
588  * triggered, which should flag the command stream groups using these
589  * context as faulty.
590  *
591  * The heap pool object is only released when all references to this pool
592  * are released.
593  */
594 void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
595 {
596 	struct panthor_heap *heap;
597 	unsigned long i;
598 
599 	if (!pool)
600 		return;
601 
602 	down_write(&pool->lock);
603 	xa_for_each(&pool->xa, i, heap)
604 		drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
605 
606 	if (!IS_ERR_OR_NULL(pool->gpu_contexts)) {
607 		atomic_sub(pool->gpu_contexts->obj->size, &pool->size);
608 		panthor_kernel_bo_destroy(pool->gpu_contexts);
609 	}
610 
611 	/* Reflects the fact the pool has been destroyed. */
612 	pool->vm = NULL;
613 	up_write(&pool->lock);
614 
615 	panthor_heap_pool_put(pool);
616 }
617 
618 /**
619  * panthor_heap_pool_size() - Get a heap pool's total size
620  * @pool: Pool whose total chunks size to return
621  *
622  * Returns the aggregated size of all chunks for all heaps in the pool
623  *
624  */
625 size_t panthor_heap_pool_size(struct panthor_heap_pool *pool)
626 {
627 	if (!pool)
628 		return 0;
629 
630 	return atomic_read(&pool->size);
631 }
632