xref: /linux/drivers/gpu/drm/panthor/panthor_heap.c (revision 56fb6f92854f29dcb6c3dc3ba92eeda1b615e88c)
1  // SPDX-License-Identifier: GPL-2.0 or MIT
2  /* Copyright 2023 Collabora ltd. */
3  
4  #include <linux/iosys-map.h>
5  #include <linux/rwsem.h>
6  
7  #include <drm/panthor_drm.h>
8  
9  #include "panthor_device.h"
10  #include "panthor_gem.h"
11  #include "panthor_heap.h"
12  #include "panthor_mmu.h"
13  #include "panthor_regs.h"
14  
15  /*
16   * The GPU heap context is an opaque structure used by the GPU to track the
17   * heap allocations. The driver should only touch it to initialize it (zero all
18   * fields). Because the CPU and GPU can both access this structure it is
19   * required to be GPU cache line aligned.
20   */
21  #define HEAP_CONTEXT_SIZE	32
22  
23  /**
24   * struct panthor_heap_chunk_header - Heap chunk header
25   */
26  struct panthor_heap_chunk_header {
27  	/**
28  	 * @next: Next heap chunk in the list.
29  	 *
30  	 * This is a GPU VA.
31  	 */
32  	u64 next;
33  
34  	/** @unknown: MBZ. */
35  	u32 unknown[14];
36  };
37  
38  /**
39   * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
40   */
41  struct panthor_heap_chunk {
42  	/** @node: Used to insert the heap chunk in panthor_heap::chunks. */
43  	struct list_head node;
44  
45  	/** @bo: Buffer object backing the heap chunk. */
46  	struct panthor_kernel_bo *bo;
47  };
48  
49  /**
50   * struct panthor_heap - Structure used to manage tiler heap contexts.
51   */
52  struct panthor_heap {
53  	/** @chunks: List containing all heap chunks allocated so far. */
54  	struct list_head chunks;
55  
56  	/** @lock: Lock protecting insertion in the chunks list. */
57  	struct mutex lock;
58  
59  	/** @chunk_size: Size of each chunk. */
60  	u32 chunk_size;
61  
62  	/** @max_chunks: Maximum number of chunks. */
63  	u32 max_chunks;
64  
65  	/**
66  	 * @target_in_flight: Number of in-flight render passes after which
67  	 * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
68  	 */
69  	u32 target_in_flight;
70  
71  	/** @chunk_count: Number of heap chunks currently allocated. */
72  	u32 chunk_count;
73  };
74  
75  #define MAX_HEAPS_PER_POOL    128
76  
77  /**
78   * struct panthor_heap_pool - Pool of heap contexts
79   *
80   * The pool is attached to a panthor_file and can't be shared across processes.
81   */
82  struct panthor_heap_pool {
83  	/** @refcount: Reference count. */
84  	struct kref refcount;
85  
86  	/** @ptdev: Device. */
87  	struct panthor_device *ptdev;
88  
89  	/** @vm: VM this pool is bound to. */
90  	struct panthor_vm *vm;
91  
92  	/** @lock: Lock protecting access to @xa. */
93  	struct rw_semaphore lock;
94  
95  	/** @xa: Array storing panthor_heap objects. */
96  	struct xarray xa;
97  
98  	/** @gpu_contexts: Buffer object containing the GPU heap contexts. */
99  	struct panthor_kernel_bo *gpu_contexts;
100  };
101  
panthor_heap_ctx_stride(struct panthor_device * ptdev)102  static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
103  {
104  	u32 l2_features = ptdev->gpu_info.l2_features;
105  	u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
106  
107  	return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
108  }
109  
panthor_get_heap_ctx_offset(struct panthor_heap_pool * pool,int id)110  static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
111  {
112  	return panthor_heap_ctx_stride(pool->ptdev) * id;
113  }
114  
panthor_get_heap_ctx(struct panthor_heap_pool * pool,int id)115  static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
116  {
117  	return pool->gpu_contexts->kmap +
118  	       panthor_get_heap_ctx_offset(pool, id);
119  }
120  
panthor_free_heap_chunk(struct panthor_vm * vm,struct panthor_heap * heap,struct panthor_heap_chunk * chunk)121  static void panthor_free_heap_chunk(struct panthor_vm *vm,
122  				    struct panthor_heap *heap,
123  				    struct panthor_heap_chunk *chunk)
124  {
125  	mutex_lock(&heap->lock);
126  	list_del(&chunk->node);
127  	heap->chunk_count--;
128  	mutex_unlock(&heap->lock);
129  
130  	panthor_kernel_bo_destroy(chunk->bo);
131  	kfree(chunk);
132  }
133  
panthor_alloc_heap_chunk(struct panthor_device * ptdev,struct panthor_vm * vm,struct panthor_heap * heap,bool initial_chunk)134  static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
135  				    struct panthor_vm *vm,
136  				    struct panthor_heap *heap,
137  				    bool initial_chunk)
138  {
139  	struct panthor_heap_chunk *chunk;
140  	struct panthor_heap_chunk_header *hdr;
141  	int ret;
142  
143  	chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
144  	if (!chunk)
145  		return -ENOMEM;
146  
147  	chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size,
148  					     DRM_PANTHOR_BO_NO_MMAP,
149  					     DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
150  					     PANTHOR_VM_KERNEL_AUTO_VA);
151  	if (IS_ERR(chunk->bo)) {
152  		ret = PTR_ERR(chunk->bo);
153  		goto err_free_chunk;
154  	}
155  
156  	ret = panthor_kernel_bo_vmap(chunk->bo);
157  	if (ret)
158  		goto err_destroy_bo;
159  
160  	hdr = chunk->bo->kmap;
161  	memset(hdr, 0, sizeof(*hdr));
162  
163  	if (initial_chunk && !list_empty(&heap->chunks)) {
164  		struct panthor_heap_chunk *prev_chunk;
165  		u64 prev_gpuva;
166  
167  		prev_chunk = list_first_entry(&heap->chunks,
168  					      struct panthor_heap_chunk,
169  					      node);
170  
171  		prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
172  		hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
173  			    (heap->chunk_size >> 12);
174  	}
175  
176  	panthor_kernel_bo_vunmap(chunk->bo);
177  
178  	mutex_lock(&heap->lock);
179  	list_add(&chunk->node, &heap->chunks);
180  	heap->chunk_count++;
181  	mutex_unlock(&heap->lock);
182  
183  	return 0;
184  
185  err_destroy_bo:
186  	panthor_kernel_bo_destroy(chunk->bo);
187  
188  err_free_chunk:
189  	kfree(chunk);
190  
191  	return ret;
192  }
193  
panthor_free_heap_chunks(struct panthor_vm * vm,struct panthor_heap * heap)194  static void panthor_free_heap_chunks(struct panthor_vm *vm,
195  				     struct panthor_heap *heap)
196  {
197  	struct panthor_heap_chunk *chunk, *tmp;
198  
199  	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
200  		panthor_free_heap_chunk(vm, heap, chunk);
201  }
202  
panthor_alloc_heap_chunks(struct panthor_device * ptdev,struct panthor_vm * vm,struct panthor_heap * heap,u32 chunk_count)203  static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
204  				     struct panthor_vm *vm,
205  				     struct panthor_heap *heap,
206  				     u32 chunk_count)
207  {
208  	int ret;
209  	u32 i;
210  
211  	for (i = 0; i < chunk_count; i++) {
212  		ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true);
213  		if (ret)
214  			return ret;
215  	}
216  
217  	return 0;
218  }
219  
220  static int
panthor_heap_destroy_locked(struct panthor_heap_pool * pool,u32 handle)221  panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
222  {
223  	struct panthor_heap *heap;
224  
225  	heap = xa_erase(&pool->xa, handle);
226  	if (!heap)
227  		return -EINVAL;
228  
229  	panthor_free_heap_chunks(pool->vm, heap);
230  	mutex_destroy(&heap->lock);
231  	kfree(heap);
232  	return 0;
233  }
234  
235  /**
236   * panthor_heap_destroy() - Destroy a heap context
237   * @pool: Pool this context belongs to.
238   * @handle: Handle returned by panthor_heap_create().
239   */
panthor_heap_destroy(struct panthor_heap_pool * pool,u32 handle)240  int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
241  {
242  	int ret;
243  
244  	down_write(&pool->lock);
245  	ret = panthor_heap_destroy_locked(pool, handle);
246  	up_write(&pool->lock);
247  
248  	return ret;
249  }
250  
251  /**
252   * panthor_heap_create() - Create a heap context
253   * @pool: Pool to instantiate the heap context from.
254   * @initial_chunk_count: Number of chunk allocated at initialization time.
255   * Must be at least 1.
256   * @chunk_size: The size of each chunk. Must be page-aligned and lie in the
257   * [128k:8M] range.
258   * @max_chunks: Maximum number of chunks that can be allocated.
259   * @target_in_flight: Maximum number of in-flight render passes.
260   * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
261   * context.
262   * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
263   * assigned to the heap context.
264   *
265   * Return: a positive handle on success, a negative error otherwise.
266   */
panthor_heap_create(struct panthor_heap_pool * pool,u32 initial_chunk_count,u32 chunk_size,u32 max_chunks,u32 target_in_flight,u64 * heap_ctx_gpu_va,u64 * first_chunk_gpu_va)267  int panthor_heap_create(struct panthor_heap_pool *pool,
268  			u32 initial_chunk_count,
269  			u32 chunk_size,
270  			u32 max_chunks,
271  			u32 target_in_flight,
272  			u64 *heap_ctx_gpu_va,
273  			u64 *first_chunk_gpu_va)
274  {
275  	struct panthor_heap *heap;
276  	struct panthor_heap_chunk *first_chunk;
277  	struct panthor_vm *vm;
278  	int ret = 0;
279  	u32 id;
280  
281  	if (initial_chunk_count == 0)
282  		return -EINVAL;
283  
284  	if (initial_chunk_count > max_chunks)
285  		return -EINVAL;
286  
287  	if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
288  	    chunk_size < SZ_128K || chunk_size > SZ_8M)
289  		return -EINVAL;
290  
291  	down_read(&pool->lock);
292  	vm = panthor_vm_get(pool->vm);
293  	up_read(&pool->lock);
294  
295  	/* The pool has been destroyed, we can't create a new heap. */
296  	if (!vm)
297  		return -EINVAL;
298  
299  	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
300  	if (!heap) {
301  		ret = -ENOMEM;
302  		goto err_put_vm;
303  	}
304  
305  	mutex_init(&heap->lock);
306  	INIT_LIST_HEAD(&heap->chunks);
307  	heap->chunk_size = chunk_size;
308  	heap->max_chunks = max_chunks;
309  	heap->target_in_flight = target_in_flight;
310  
311  	ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap,
312  					initial_chunk_count);
313  	if (ret)
314  		goto err_free_heap;
315  
316  	first_chunk = list_first_entry(&heap->chunks,
317  				       struct panthor_heap_chunk,
318  				       node);
319  	*first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
320  
321  	down_write(&pool->lock);
322  	/* The pool has been destroyed, we can't create a new heap. */
323  	if (!pool->vm) {
324  		ret = -EINVAL;
325  	} else {
326  		ret = xa_alloc(&pool->xa, &id, heap,
327  			       XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
328  		if (!ret) {
329  			void *gpu_ctx = panthor_get_heap_ctx(pool, id);
330  
331  			memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
332  			*heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
333  					   panthor_get_heap_ctx_offset(pool, id);
334  		}
335  	}
336  	up_write(&pool->lock);
337  
338  	if (ret)
339  		goto err_free_heap;
340  
341  	panthor_vm_put(vm);
342  	return id;
343  
344  err_free_heap:
345  	panthor_free_heap_chunks(pool->vm, heap);
346  	mutex_destroy(&heap->lock);
347  	kfree(heap);
348  
349  err_put_vm:
350  	panthor_vm_put(vm);
351  	return ret;
352  }
353  
354  /**
355   * panthor_heap_return_chunk() - Return an unused heap chunk
356   * @pool: The pool this heap belongs to.
357   * @heap_gpu_va: The GPU address of the heap context.
358   * @chunk_gpu_va: The chunk VA to return.
359   *
360   * This function is used when a chunk allocated with panthor_heap_grow()
361   * couldn't be linked to the heap context through the FW interface because
362   * the group requesting the allocation was scheduled out in the meantime.
363   */
panthor_heap_return_chunk(struct panthor_heap_pool * pool,u64 heap_gpu_va,u64 chunk_gpu_va)364  int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
365  			      u64 heap_gpu_va,
366  			      u64 chunk_gpu_va)
367  {
368  	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
369  	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
370  	struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
371  	struct panthor_heap *heap;
372  	int ret;
373  
374  	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
375  		return -EINVAL;
376  
377  	down_read(&pool->lock);
378  	heap = xa_load(&pool->xa, heap_id);
379  	if (!heap) {
380  		ret = -EINVAL;
381  		goto out_unlock;
382  	}
383  
384  	chunk_gpu_va &= GENMASK_ULL(63, 12);
385  
386  	mutex_lock(&heap->lock);
387  	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
388  		if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
389  			removed = chunk;
390  			list_del(&chunk->node);
391  			heap->chunk_count--;
392  			break;
393  		}
394  	}
395  	mutex_unlock(&heap->lock);
396  
397  	if (removed) {
398  		panthor_kernel_bo_destroy(chunk->bo);
399  		kfree(chunk);
400  		ret = 0;
401  	} else {
402  		ret = -EINVAL;
403  	}
404  
405  out_unlock:
406  	up_read(&pool->lock);
407  	return ret;
408  }
409  
410  /**
411   * panthor_heap_grow() - Make a heap context grow.
412   * @pool: The pool this heap belongs to.
413   * @heap_gpu_va: The GPU address of the heap context.
414   * @renderpasses_in_flight: Number of render passes currently in-flight.
415   * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
416   * @new_chunk_gpu_va: Pointer used to return the chunk VA.
417   *
418   * Return:
419   * - 0 if a new heap was allocated
420   * - -ENOMEM if the tiler context reached the maximum number of chunks
421   *   or if too many render passes are in-flight
422   *   or if the allocation failed
423   * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
424   */
panthor_heap_grow(struct panthor_heap_pool * pool,u64 heap_gpu_va,u32 renderpasses_in_flight,u32 pending_frag_count,u64 * new_chunk_gpu_va)425  int panthor_heap_grow(struct panthor_heap_pool *pool,
426  		      u64 heap_gpu_va,
427  		      u32 renderpasses_in_flight,
428  		      u32 pending_frag_count,
429  		      u64 *new_chunk_gpu_va)
430  {
431  	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
432  	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
433  	struct panthor_heap_chunk *chunk;
434  	struct panthor_heap *heap;
435  	int ret;
436  
437  	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
438  		return -EINVAL;
439  
440  	down_read(&pool->lock);
441  	heap = xa_load(&pool->xa, heap_id);
442  	if (!heap) {
443  		ret = -EINVAL;
444  		goto out_unlock;
445  	}
446  
447  	/* If we reached the target in-flight render passes, or if we
448  	 * reached the maximum number of chunks, let the FW figure another way to
449  	 * find some memory (wait for render passes to finish, or call the exception
450  	 * handler provided by the userspace driver, if any).
451  	 */
452  	if (renderpasses_in_flight > heap->target_in_flight ||
453  	    heap->chunk_count >= heap->max_chunks) {
454  		ret = -ENOMEM;
455  		goto out_unlock;
456  	}
457  
458  	/* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
459  	 * which goes through the blocking allocation path. Ultimately, we
460  	 * want a non-blocking allocation, so we can immediately report to the
461  	 * FW when the system is running out of memory. In that case, the FW
462  	 * can call a user-provided exception handler, which might try to free
463  	 * some tiler memory by issuing an intermediate fragment job. If the
464  	 * exception handler can't do anything, it will flag the queue as
465  	 * faulty so the job that triggered this tiler chunk allocation and all
466  	 * further jobs in this queue fail immediately instead of having to
467  	 * wait for the job timeout.
468  	 */
469  	ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false);
470  	if (ret)
471  		goto out_unlock;
472  
473  	chunk = list_first_entry(&heap->chunks,
474  				 struct panthor_heap_chunk,
475  				 node);
476  	*new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
477  			    (heap->chunk_size >> 12);
478  	ret = 0;
479  
480  out_unlock:
481  	up_read(&pool->lock);
482  	return ret;
483  }
484  
panthor_heap_pool_release(struct kref * refcount)485  static void panthor_heap_pool_release(struct kref *refcount)
486  {
487  	struct panthor_heap_pool *pool =
488  		container_of(refcount, struct panthor_heap_pool, refcount);
489  
490  	xa_destroy(&pool->xa);
491  	kfree(pool);
492  }
493  
494  /**
495   * panthor_heap_pool_put() - Release a heap pool reference
496   * @pool: Pool to release the reference on. Can be NULL.
497   */
panthor_heap_pool_put(struct panthor_heap_pool * pool)498  void panthor_heap_pool_put(struct panthor_heap_pool *pool)
499  {
500  	if (pool)
501  		kref_put(&pool->refcount, panthor_heap_pool_release);
502  }
503  
504  /**
505   * panthor_heap_pool_get() - Get a heap pool reference
506   * @pool: Pool to get the reference on. Can be NULL.
507   *
508   * Return: @pool.
509   */
510  struct panthor_heap_pool *
panthor_heap_pool_get(struct panthor_heap_pool * pool)511  panthor_heap_pool_get(struct panthor_heap_pool *pool)
512  {
513  	if (pool)
514  		kref_get(&pool->refcount);
515  
516  	return pool;
517  }
518  
519  /**
520   * panthor_heap_pool_create() - Create a heap pool
521   * @ptdev: Device.
522   * @vm: The VM this heap pool will be attached to.
523   *
524   * Heap pools might contain up to 128 heap contexts, and are per-VM.
525   *
526   * Return: A valid pointer on success, a negative error code otherwise.
527   */
528  struct panthor_heap_pool *
panthor_heap_pool_create(struct panthor_device * ptdev,struct panthor_vm * vm)529  panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
530  {
531  	size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
532  			      panthor_heap_ctx_stride(ptdev),
533  			      4096);
534  	struct panthor_heap_pool *pool;
535  	int ret = 0;
536  
537  	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
538  	if (!pool)
539  		return ERR_PTR(-ENOMEM);
540  
541  	/* We want a weak ref here: the heap pool belongs to the VM, so we're
542  	 * sure that, as long as the heap pool exists, the VM exists too.
543  	 */
544  	pool->vm = vm;
545  	pool->ptdev = ptdev;
546  	init_rwsem(&pool->lock);
547  	xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
548  	kref_init(&pool->refcount);
549  
550  	pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
551  						      DRM_PANTHOR_BO_NO_MMAP,
552  						      DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
553  						      PANTHOR_VM_KERNEL_AUTO_VA);
554  	if (IS_ERR(pool->gpu_contexts)) {
555  		ret = PTR_ERR(pool->gpu_contexts);
556  		goto err_destroy_pool;
557  	}
558  
559  	ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
560  	if (ret)
561  		goto err_destroy_pool;
562  
563  	return pool;
564  
565  err_destroy_pool:
566  	panthor_heap_pool_destroy(pool);
567  	return ERR_PTR(ret);
568  }
569  
570  /**
571   * panthor_heap_pool_destroy() - Destroy a heap pool.
572   * @pool: Pool to destroy.
573   *
574   * This function destroys all heap contexts and their resources. Thus
575   * preventing any use of the heap context or the chunk attached to them
576   * after that point.
577   *
578   * If the GPU still has access to some heap contexts, a fault should be
579   * triggered, which should flag the command stream groups using these
580   * context as faulty.
581   *
582   * The heap pool object is only released when all references to this pool
583   * are released.
584   */
panthor_heap_pool_destroy(struct panthor_heap_pool * pool)585  void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
586  {
587  	struct panthor_heap *heap;
588  	unsigned long i;
589  
590  	if (!pool)
591  		return;
592  
593  	down_write(&pool->lock);
594  	xa_for_each(&pool->xa, i, heap)
595  		drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
596  
597  	if (!IS_ERR_OR_NULL(pool->gpu_contexts))
598  		panthor_kernel_bo_destroy(pool->gpu_contexts);
599  
600  	/* Reflects the fact the pool has been destroyed. */
601  	pool->vm = NULL;
602  	up_write(&pool->lock);
603  
604  	panthor_heap_pool_put(pool);
605  }
606