xref: /linux/drivers/gpu/drm/panthor/panthor_mmu.c (revision 815e260a18a3af4dab59025ee99a7156c0e8b5e0)
1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
3 /* Copyright 2023 Collabora ltd. */
4 
5 #include <drm/drm_debugfs.h>
6 #include <drm/drm_drv.h>
7 #include <drm/drm_exec.h>
8 #include <drm/drm_gpuvm.h>
9 #include <drm/drm_managed.h>
10 #include <drm/drm_print.h>
11 #include <drm/gpu_scheduler.h>
12 #include <drm/panthor_drm.h>
13 
14 #include <linux/atomic.h>
15 #include <linux/bitfield.h>
16 #include <linux/delay.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/interrupt.h>
19 #include <linux/io.h>
20 #include <linux/iopoll.h>
21 #include <linux/io-pgtable.h>
22 #include <linux/iommu.h>
23 #include <linux/kmemleak.h>
24 #include <linux/platform_device.h>
25 #include <linux/pm_runtime.h>
26 #include <linux/rwsem.h>
27 #include <linux/sched.h>
28 #include <linux/shmem_fs.h>
29 #include <linux/sizes.h>
30 
31 #include "panthor_device.h"
32 #include "panthor_gem.h"
33 #include "panthor_gpu.h"
34 #include "panthor_heap.h"
35 #include "panthor_mmu.h"
36 #include "panthor_regs.h"
37 #include "panthor_sched.h"
38 
39 #define MAX_AS_SLOTS			32
40 
41 struct panthor_vm;
42 
43 /**
44  * struct panthor_as_slot - Address space slot
45  */
46 struct panthor_as_slot {
47 	/** @vm: VM bound to this slot. NULL is no VM is bound. */
48 	struct panthor_vm *vm;
49 };
50 
51 /**
52  * struct panthor_mmu - MMU related data
53  */
54 struct panthor_mmu {
55 	/** @irq: The MMU irq. */
56 	struct panthor_irq irq;
57 
58 	/**
59 	 * @as: Address space related fields.
60 	 *
61 	 * The GPU has a limited number of address spaces (AS) slots, forcing
62 	 * us to re-assign them to re-assign slots on-demand.
63 	 */
64 	struct {
65 		/** @as.slots_lock: Lock protecting access to all other AS fields. */
66 		struct mutex slots_lock;
67 
68 		/** @as.alloc_mask: Bitmask encoding the allocated slots. */
69 		unsigned long alloc_mask;
70 
71 		/** @as.faulty_mask: Bitmask encoding the faulty slots. */
72 		unsigned long faulty_mask;
73 
74 		/** @as.slots: VMs currently bound to the AS slots. */
75 		struct panthor_as_slot slots[MAX_AS_SLOTS];
76 
77 		/**
78 		 * @as.lru_list: List of least recently used VMs.
79 		 *
80 		 * We use this list to pick a VM to evict when all slots are
81 		 * used.
82 		 *
83 		 * There should be no more active VMs than there are AS slots,
84 		 * so this LRU is just here to keep VMs bound until there's
85 		 * a need to release a slot, thus avoid unnecessary TLB/cache
86 		 * flushes.
87 		 */
88 		struct list_head lru_list;
89 	} as;
90 
91 	/** @vm: VMs management fields */
92 	struct {
93 		/** @vm.lock: Lock protecting access to list. */
94 		struct mutex lock;
95 
96 		/** @vm.list: List containing all VMs. */
97 		struct list_head list;
98 
99 		/** @vm.reset_in_progress: True if a reset is in progress. */
100 		bool reset_in_progress;
101 
102 		/** @vm.wq: Workqueue used for the VM_BIND queues. */
103 		struct workqueue_struct *wq;
104 	} vm;
105 };
106 
107 /**
108  * struct panthor_vm_pool - VM pool object
109  */
110 struct panthor_vm_pool {
111 	/** @xa: Array used for VM handle tracking. */
112 	struct xarray xa;
113 };
114 
115 /**
116  * struct panthor_vma - GPU mapping object
117  *
118  * This is used to track GEM mappings in GPU space.
119  */
120 struct panthor_vma {
121 	/** @base: Inherits from drm_gpuva. */
122 	struct drm_gpuva base;
123 
124 	/** @node: Used to implement deferred release of VMAs. */
125 	struct list_head node;
126 
127 	/**
128 	 * @flags: Combination of drm_panthor_vm_bind_op_flags.
129 	 *
130 	 * Only map related flags are accepted.
131 	 */
132 	u32 flags;
133 };
134 
135 /**
136  * struct panthor_vm_op_ctx - VM operation context
137  *
138  * With VM operations potentially taking place in a dma-signaling path, we
139  * need to make sure everything that might require resource allocation is
140  * pre-allocated upfront. This is what this operation context is far.
141  *
142  * We also collect resources that have been freed, so we can release them
143  * asynchronously, and let the VM_BIND scheduler process the next VM_BIND
144  * request.
145  */
146 struct panthor_vm_op_ctx {
147 	/** @rsvd_page_tables: Pages reserved for the MMU page table update. */
148 	struct {
149 		/** @rsvd_page_tables.count: Number of pages reserved. */
150 		u32 count;
151 
152 		/** @rsvd_page_tables.ptr: Point to the first unused page in the @pages table. */
153 		u32 ptr;
154 
155 		/**
156 		 * @rsvd_page_tables.pages: Array of pages to be used for an MMU page table update.
157 		 *
158 		 * After an VM operation, there might be free pages left in this array.
159 		 * They should be returned to the pt_cache as part of the op_ctx cleanup.
160 		 */
161 		void **pages;
162 	} rsvd_page_tables;
163 
164 	/**
165 	 * @preallocated_vmas: Pre-allocated VMAs to handle the remap case.
166 	 *
167 	 * Partial unmap requests or map requests overlapping existing mappings will
168 	 * trigger a remap call, which need to register up to three panthor_vma objects
169 	 * (one for the new mapping, and two for the previous and next mappings).
170 	 */
171 	struct panthor_vma *preallocated_vmas[3];
172 
173 	/** @flags: Combination of drm_panthor_vm_bind_op_flags. */
174 	u32 flags;
175 
176 	/** @va: Virtual range targeted by the VM operation. */
177 	struct {
178 		/** @va.addr: Start address. */
179 		u64 addr;
180 
181 		/** @va.range: Range size. */
182 		u64 range;
183 	} va;
184 
185 	/**
186 	 * @returned_vmas: List of panthor_vma objects returned after a VM operation.
187 	 *
188 	 * For unmap operations, this will contain all VMAs that were covered by the
189 	 * specified VA range.
190 	 *
191 	 * For map operations, this will contain all VMAs that previously mapped to
192 	 * the specified VA range.
193 	 *
194 	 * Those VMAs, and the resources they point to will be released as part of
195 	 * the op_ctx cleanup operation.
196 	 */
197 	struct list_head returned_vmas;
198 
199 	/** @map: Fields specific to a map operation. */
200 	struct {
201 		/** @map.vm_bo: Buffer object to map. */
202 		struct drm_gpuvm_bo *vm_bo;
203 
204 		/** @map.bo_offset: Offset in the buffer object. */
205 		u64 bo_offset;
206 
207 		/**
208 		 * @map.sgt: sg-table pointing to pages backing the GEM object.
209 		 *
210 		 * This is gathered at job creation time, such that we don't have
211 		 * to allocate in ::run_job().
212 		 */
213 		struct sg_table *sgt;
214 
215 		/**
216 		 * @map.new_vma: The new VMA object that will be inserted to the VA tree.
217 		 */
218 		struct panthor_vma *new_vma;
219 	} map;
220 };
221 
222 /**
223  * struct panthor_vm - VM object
224  *
225  * A VM is an object representing a GPU (or MCU) virtual address space.
226  * It embeds the MMU page table for this address space, a tree containing
227  * all the virtual mappings of GEM objects, and other things needed to manage
228  * the VM.
229  *
230  * Except for the MCU VM, which is managed by the kernel, all other VMs are
231  * created by userspace and mostly managed by userspace, using the
232  * %DRM_IOCTL_PANTHOR_VM_BIND ioctl.
233  *
234  * A portion of the virtual address space is reserved for kernel objects,
235  * like heap chunks, and userspace gets to decide how much of the virtual
236  * address space is left to the kernel (half of the virtual address space
237  * by default).
238  */
239 struct panthor_vm {
240 	/**
241 	 * @base: Inherit from drm_gpuvm.
242 	 *
243 	 * We delegate all the VA management to the common drm_gpuvm framework
244 	 * and only implement hooks to update the MMU page table.
245 	 */
246 	struct drm_gpuvm base;
247 
248 	/**
249 	 * @sched: Scheduler used for asynchronous VM_BIND request.
250 	 *
251 	 * We use a 1:1 scheduler here.
252 	 */
253 	struct drm_gpu_scheduler sched;
254 
255 	/**
256 	 * @entity: Scheduling entity representing the VM_BIND queue.
257 	 *
258 	 * There's currently one bind queue per VM. It doesn't make sense to
259 	 * allow more given the VM operations are serialized anyway.
260 	 */
261 	struct drm_sched_entity entity;
262 
263 	/** @ptdev: Device. */
264 	struct panthor_device *ptdev;
265 
266 	/** @memattr: Value to program to the AS_MEMATTR register. */
267 	u64 memattr;
268 
269 	/** @pgtbl_ops: Page table operations. */
270 	struct io_pgtable_ops *pgtbl_ops;
271 
272 	/** @root_page_table: Stores the root page table pointer. */
273 	void *root_page_table;
274 
275 	/**
276 	 * @op_lock: Lock used to serialize operations on a VM.
277 	 *
278 	 * The serialization of jobs queued to the VM_BIND queue is already
279 	 * taken care of by drm_sched, but we need to serialize synchronous
280 	 * and asynchronous VM_BIND request. This is what this lock is for.
281 	 */
282 	struct mutex op_lock;
283 
284 	/**
285 	 * @op_ctx: The context attached to the currently executing VM operation.
286 	 *
287 	 * NULL when no operation is in progress.
288 	 */
289 	struct panthor_vm_op_ctx *op_ctx;
290 
291 	/**
292 	 * @mm: Memory management object representing the auto-VA/kernel-VA.
293 	 *
294 	 * Used to auto-allocate VA space for kernel-managed objects (tiler
295 	 * heaps, ...).
296 	 *
297 	 * For the MCU VM, this is managing the VA range that's used to map
298 	 * all shared interfaces.
299 	 *
300 	 * For user VMs, the range is specified by userspace, and must not
301 	 * exceed half of the VA space addressable.
302 	 */
303 	struct drm_mm mm;
304 
305 	/** @mm_lock: Lock protecting the @mm field. */
306 	struct mutex mm_lock;
307 
308 	/** @kernel_auto_va: Automatic VA-range for kernel BOs. */
309 	struct {
310 		/** @kernel_auto_va.start: Start of the automatic VA-range for kernel BOs. */
311 		u64 start;
312 
313 		/** @kernel_auto_va.size: Size of the automatic VA-range for kernel BOs. */
314 		u64 end;
315 	} kernel_auto_va;
316 
317 	/** @as: Address space related fields. */
318 	struct {
319 		/**
320 		 * @as.id: ID of the address space this VM is bound to.
321 		 *
322 		 * A value of -1 means the VM is inactive/not bound.
323 		 */
324 		int id;
325 
326 		/** @as.active_cnt: Number of active users of this VM. */
327 		refcount_t active_cnt;
328 
329 		/**
330 		 * @as.lru_node: Used to instead the VM in the panthor_mmu::as::lru_list.
331 		 *
332 		 * Active VMs should not be inserted in the LRU list.
333 		 */
334 		struct list_head lru_node;
335 	} as;
336 
337 	/**
338 	 * @heaps: Tiler heap related fields.
339 	 */
340 	struct {
341 		/**
342 		 * @heaps.pool: The heap pool attached to this VM.
343 		 *
344 		 * Will stay NULL until someone creates a heap context on this VM.
345 		 */
346 		struct panthor_heap_pool *pool;
347 
348 		/** @heaps.lock: Lock used to protect access to @pool. */
349 		struct mutex lock;
350 	} heaps;
351 
352 	/** @node: Used to insert the VM in the panthor_mmu::vm::list. */
353 	struct list_head node;
354 
355 	/** @for_mcu: True if this is the MCU VM. */
356 	bool for_mcu;
357 
358 	/**
359 	 * @destroyed: True if the VM was destroyed.
360 	 *
361 	 * No further bind requests should be queued to a destroyed VM.
362 	 */
363 	bool destroyed;
364 
365 	/**
366 	 * @unusable: True if the VM has turned unusable because something
367 	 * bad happened during an asynchronous request.
368 	 *
369 	 * We don't try to recover from such failures, because this implies
370 	 * informing userspace about the specific operation that failed, and
371 	 * hoping the userspace driver can replay things from there. This all
372 	 * sounds very complicated for little gain.
373 	 *
374 	 * Instead, we should just flag the VM as unusable, and fail any
375 	 * further request targeting this VM.
376 	 *
377 	 * We also provide a way to query a VM state, so userspace can destroy
378 	 * it and create a new one.
379 	 *
380 	 * As an analogy, this would be mapped to a VK_ERROR_DEVICE_LOST
381 	 * situation, where the logical device needs to be re-created.
382 	 */
383 	bool unusable;
384 
385 	/**
386 	 * @unhandled_fault: Unhandled fault happened.
387 	 *
388 	 * This should be reported to the scheduler, and the queue/group be
389 	 * flagged as faulty as a result.
390 	 */
391 	bool unhandled_fault;
392 };
393 
394 /**
395  * struct panthor_vm_bind_job - VM bind job
396  */
397 struct panthor_vm_bind_job {
398 	/** @base: Inherit from drm_sched_job. */
399 	struct drm_sched_job base;
400 
401 	/** @refcount: Reference count. */
402 	struct kref refcount;
403 
404 	/** @cleanup_op_ctx_work: Work used to cleanup the VM operation context. */
405 	struct work_struct cleanup_op_ctx_work;
406 
407 	/** @vm: VM targeted by the VM operation. */
408 	struct panthor_vm *vm;
409 
410 	/** @ctx: Operation context. */
411 	struct panthor_vm_op_ctx ctx;
412 };
413 
414 /*
415  * @pt_cache: Cache used to allocate MMU page tables.
416  *
417  * The pre-allocation pattern forces us to over-allocate to plan for
418  * the worst case scenario, and return the pages we didn't use.
419  *
420  * Having a kmem_cache allows us to speed allocations.
421  */
422 static struct kmem_cache *pt_cache;
423 
424 /**
425  * alloc_pt() - Custom page table allocator
426  * @cookie: Cookie passed at page table allocation time.
427  * @size: Size of the page table. This size should be fixed,
428  * and determined at creation time based on the granule size.
429  * @gfp: GFP flags.
430  *
431  * We want a custom allocator so we can use a cache for page table
432  * allocations and amortize the cost of the over-reservation that's
433  * done to allow asynchronous VM operations.
434  *
435  * Return: non-NULL on success, NULL if the allocation failed for any
436  * reason.
437  */
438 static void *alloc_pt(void *cookie, size_t size, gfp_t gfp)
439 {
440 	struct panthor_vm *vm = cookie;
441 	void *page;
442 
443 	/* Allocation of the root page table happening during init. */
444 	if (unlikely(!vm->root_page_table)) {
445 		struct page *p;
446 
447 		drm_WARN_ON(&vm->ptdev->base, vm->op_ctx);
448 		p = alloc_pages_node(dev_to_node(vm->ptdev->base.dev),
449 				     gfp | __GFP_ZERO, get_order(size));
450 		page = p ? page_address(p) : NULL;
451 		vm->root_page_table = page;
452 		return page;
453 	}
454 
455 	/* We're not supposed to have anything bigger than 4k here, because we picked a
456 	 * 4k granule size at init time.
457 	 */
458 	if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K))
459 		return NULL;
460 
461 	/* We must have some op_ctx attached to the VM and it must have at least one
462 	 * free page.
463 	 */
464 	if (drm_WARN_ON(&vm->ptdev->base, !vm->op_ctx) ||
465 	    drm_WARN_ON(&vm->ptdev->base,
466 			vm->op_ctx->rsvd_page_tables.ptr >= vm->op_ctx->rsvd_page_tables.count))
467 		return NULL;
468 
469 	page = vm->op_ctx->rsvd_page_tables.pages[vm->op_ctx->rsvd_page_tables.ptr++];
470 	memset(page, 0, SZ_4K);
471 
472 	/* Page table entries don't use virtual addresses, which trips out
473 	 * kmemleak. kmemleak_alloc_phys() might work, but physical addresses
474 	 * are mixed with other fields, and I fear kmemleak won't detect that
475 	 * either.
476 	 *
477 	 * Let's just ignore memory passed to the page-table driver for now.
478 	 */
479 	kmemleak_ignore(page);
480 	return page;
481 }
482 
483 /**
484  * free_pt() - Custom page table free function
485  * @cookie: Cookie passed at page table allocation time.
486  * @data: Page table to free.
487  * @size: Size of the page table. This size should be fixed,
488  * and determined at creation time based on the granule size.
489  */
490 static void free_pt(void *cookie, void *data, size_t size)
491 {
492 	struct panthor_vm *vm = cookie;
493 
494 	if (unlikely(vm->root_page_table == data)) {
495 		free_pages((unsigned long)data, get_order(size));
496 		vm->root_page_table = NULL;
497 		return;
498 	}
499 
500 	if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K))
501 		return;
502 
503 	/* Return the page to the pt_cache. */
504 	kmem_cache_free(pt_cache, data);
505 }
506 
507 static int wait_ready(struct panthor_device *ptdev, u32 as_nr)
508 {
509 	int ret;
510 	u32 val;
511 
512 	/* Wait for the MMU status to indicate there is no active command, in
513 	 * case one is pending.
514 	 */
515 	ret = gpu_read_relaxed_poll_timeout_atomic(ptdev, AS_STATUS(as_nr), val,
516 						   !(val & AS_STATUS_AS_ACTIVE),
517 						   10, 100000);
518 
519 	if (ret) {
520 		panthor_device_schedule_reset(ptdev);
521 		drm_err(&ptdev->base, "AS_ACTIVE bit stuck\n");
522 	}
523 
524 	return ret;
525 }
526 
527 static int write_cmd(struct panthor_device *ptdev, u32 as_nr, u32 cmd)
528 {
529 	int status;
530 
531 	/* write AS_COMMAND when MMU is ready to accept another command */
532 	status = wait_ready(ptdev, as_nr);
533 	if (!status)
534 		gpu_write(ptdev, AS_COMMAND(as_nr), cmd);
535 
536 	return status;
537 }
538 
539 static void lock_region(struct panthor_device *ptdev, u32 as_nr,
540 			u64 region_start, u64 size)
541 {
542 	u8 region_width;
543 	u64 region;
544 	u64 region_end = region_start + size;
545 
546 	if (!size)
547 		return;
548 
549 	/*
550 	 * The locked region is a naturally aligned power of 2 block encoded as
551 	 * log2 minus(1).
552 	 * Calculate the desired start/end and look for the highest bit which
553 	 * differs. The smallest naturally aligned block must include this bit
554 	 * change, the desired region starts with this bit (and subsequent bits)
555 	 * zeroed and ends with the bit (and subsequent bits) set to one.
556 	 */
557 	region_width = max(fls64(region_start ^ (region_end - 1)),
558 			   const_ilog2(AS_LOCK_REGION_MIN_SIZE)) - 1;
559 
560 	/*
561 	 * Mask off the low bits of region_start (which would be ignored by
562 	 * the hardware anyway)
563 	 */
564 	region_start &= GENMASK_ULL(63, region_width);
565 
566 	region = region_width | region_start;
567 
568 	/* Lock the region that needs to be updated */
569 	gpu_write64(ptdev, AS_LOCKADDR(as_nr), region);
570 	write_cmd(ptdev, as_nr, AS_COMMAND_LOCK);
571 }
572 
573 static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr,
574 				      u64 iova, u64 size, u32 op)
575 {
576 	const u32 l2_flush_op = CACHE_CLEAN | CACHE_INV;
577 	u32 lsc_flush_op;
578 	int ret;
579 
580 	lockdep_assert_held(&ptdev->mmu->as.slots_lock);
581 
582 	switch (op) {
583 	case AS_COMMAND_FLUSH_MEM:
584 		lsc_flush_op = CACHE_CLEAN | CACHE_INV;
585 		break;
586 	case AS_COMMAND_FLUSH_PT:
587 		lsc_flush_op = 0;
588 		break;
589 	default:
590 		drm_WARN(&ptdev->base, 1, "Unexpected AS_COMMAND: %d", op);
591 		return -EINVAL;
592 	}
593 
594 	if (as_nr < 0)
595 		return 0;
596 
597 	/*
598 	 * If the AS number is greater than zero, then we can be sure
599 	 * the device is up and running, so we don't need to explicitly
600 	 * power it up
601 	 */
602 
603 	lock_region(ptdev, as_nr, iova, size);
604 
605 	ret = wait_ready(ptdev, as_nr);
606 	if (ret)
607 		return ret;
608 
609 	ret = panthor_gpu_flush_caches(ptdev, l2_flush_op, lsc_flush_op, 0);
610 	if (ret)
611 		return ret;
612 
613 	/*
614 	 * Explicitly unlock the region as the AS is not unlocked automatically
615 	 * at the end of the GPU_CONTROL cache flush command, unlike
616 	 * AS_COMMAND_FLUSH_MEM or AS_COMMAND_FLUSH_PT.
617 	 */
618 	write_cmd(ptdev, as_nr, AS_COMMAND_UNLOCK);
619 
620 	/* Wait for the unlock command to complete */
621 	return wait_ready(ptdev, as_nr);
622 }
623 
624 static int mmu_hw_do_operation(struct panthor_vm *vm,
625 			       u64 iova, u64 size, u32 op)
626 {
627 	struct panthor_device *ptdev = vm->ptdev;
628 	int ret;
629 
630 	mutex_lock(&ptdev->mmu->as.slots_lock);
631 	ret = mmu_hw_do_operation_locked(ptdev, vm->as.id, iova, size, op);
632 	mutex_unlock(&ptdev->mmu->as.slots_lock);
633 
634 	return ret;
635 }
636 
637 static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr,
638 				 u64 transtab, u64 transcfg, u64 memattr)
639 {
640 	int ret;
641 
642 	ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
643 	if (ret)
644 		return ret;
645 
646 	gpu_write64(ptdev, AS_TRANSTAB(as_nr), transtab);
647 	gpu_write64(ptdev, AS_MEMATTR(as_nr), memattr);
648 	gpu_write64(ptdev, AS_TRANSCFG(as_nr), transcfg);
649 
650 	return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE);
651 }
652 
653 static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr)
654 {
655 	int ret;
656 
657 	ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
658 	if (ret)
659 		return ret;
660 
661 	gpu_write64(ptdev, AS_TRANSTAB(as_nr), 0);
662 	gpu_write64(ptdev, AS_MEMATTR(as_nr), 0);
663 	gpu_write64(ptdev, AS_TRANSCFG(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED);
664 
665 	return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE);
666 }
667 
668 static u32 panthor_mmu_fault_mask(struct panthor_device *ptdev, u32 value)
669 {
670 	/* Bits 16 to 31 mean REQ_COMPLETE. */
671 	return value & GENMASK(15, 0);
672 }
673 
674 static u32 panthor_mmu_as_fault_mask(struct panthor_device *ptdev, u32 as)
675 {
676 	return BIT(as);
677 }
678 
679 /**
680  * panthor_vm_has_unhandled_faults() - Check if a VM has unhandled faults
681  * @vm: VM to check.
682  *
683  * Return: true if the VM has unhandled faults, false otherwise.
684  */
685 bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm)
686 {
687 	return vm->unhandled_fault;
688 }
689 
690 /**
691  * panthor_vm_is_unusable() - Check if the VM is still usable
692  * @vm: VM to check.
693  *
694  * Return: true if the VM is unusable, false otherwise.
695  */
696 bool panthor_vm_is_unusable(struct panthor_vm *vm)
697 {
698 	return vm->unusable;
699 }
700 
701 static void panthor_vm_release_as_locked(struct panthor_vm *vm)
702 {
703 	struct panthor_device *ptdev = vm->ptdev;
704 
705 	lockdep_assert_held(&ptdev->mmu->as.slots_lock);
706 
707 	if (drm_WARN_ON(&ptdev->base, vm->as.id < 0))
708 		return;
709 
710 	ptdev->mmu->as.slots[vm->as.id].vm = NULL;
711 	clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask);
712 	refcount_set(&vm->as.active_cnt, 0);
713 	list_del_init(&vm->as.lru_node);
714 	vm->as.id = -1;
715 }
716 
717 /**
718  * panthor_vm_active() - Flag a VM as active
719  * @vm: VM to flag as active.
720  *
721  * Assigns an address space to a VM so it can be used by the GPU/MCU.
722  *
723  * Return: 0 on success, a negative error code otherwise.
724  */
725 int panthor_vm_active(struct panthor_vm *vm)
726 {
727 	struct panthor_device *ptdev = vm->ptdev;
728 	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
729 	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
730 	int ret = 0, as, cookie;
731 	u64 transtab, transcfg;
732 
733 	if (!drm_dev_enter(&ptdev->base, &cookie))
734 		return -ENODEV;
735 
736 	if (refcount_inc_not_zero(&vm->as.active_cnt))
737 		goto out_dev_exit;
738 
739 	mutex_lock(&ptdev->mmu->as.slots_lock);
740 
741 	if (refcount_inc_not_zero(&vm->as.active_cnt))
742 		goto out_unlock;
743 
744 	as = vm->as.id;
745 	if (as >= 0) {
746 		/* Unhandled pagefault on this AS, the MMU was disabled. We need to
747 		 * re-enable the MMU after clearing+unmasking the AS interrupts.
748 		 */
749 		if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
750 			goto out_enable_as;
751 
752 		goto out_make_active;
753 	}
754 
755 	/* Check for a free AS */
756 	if (vm->for_mcu) {
757 		drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0));
758 		as = 0;
759 	} else {
760 		as = ffz(ptdev->mmu->as.alloc_mask | BIT(0));
761 	}
762 
763 	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
764 		struct panthor_vm *lru_vm;
765 
766 		lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
767 						  struct panthor_vm,
768 						  as.lru_node);
769 		if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
770 			ret = -EBUSY;
771 			goto out_unlock;
772 		}
773 
774 		drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
775 		as = lru_vm->as.id;
776 		panthor_vm_release_as_locked(lru_vm);
777 	}
778 
779 	/* Assign the free or reclaimed AS to the FD */
780 	vm->as.id = as;
781 	set_bit(as, &ptdev->mmu->as.alloc_mask);
782 	ptdev->mmu->as.slots[as].vm = vm;
783 
784 out_enable_as:
785 	transtab = cfg->arm_lpae_s1_cfg.ttbr;
786 	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
787 		   AS_TRANSCFG_PTW_RA |
788 		   AS_TRANSCFG_ADRMODE_AARCH64_4K |
789 		   AS_TRANSCFG_INA_BITS(55 - va_bits);
790 	if (ptdev->coherent)
791 		transcfg |= AS_TRANSCFG_PTW_SH_OS;
792 
793 	/* If the VM is re-activated, we clear the fault. */
794 	vm->unhandled_fault = false;
795 
796 	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
797 	 * before enabling the AS.
798 	 */
799 	if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
800 		gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as));
801 		ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
802 		ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as);
803 		gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
804 	}
805 
806 	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
807 
808 out_make_active:
809 	if (!ret) {
810 		refcount_set(&vm->as.active_cnt, 1);
811 		list_del_init(&vm->as.lru_node);
812 	}
813 
814 out_unlock:
815 	mutex_unlock(&ptdev->mmu->as.slots_lock);
816 
817 out_dev_exit:
818 	drm_dev_exit(cookie);
819 	return ret;
820 }
821 
822 /**
823  * panthor_vm_idle() - Flag a VM idle
824  * @vm: VM to flag as idle.
825  *
826  * When we know the GPU is done with the VM (no more jobs to process),
827  * we can relinquish the AS slot attached to this VM, if any.
828  *
829  * We don't release the slot immediately, but instead place the VM in
830  * the LRU list, so it can be evicted if another VM needs an AS slot.
831  * This way, VMs keep attached to the AS they were given until we run
832  * out of free slot, limiting the number of MMU operations (TLB flush
833  * and other AS updates).
834  */
835 void panthor_vm_idle(struct panthor_vm *vm)
836 {
837 	struct panthor_device *ptdev = vm->ptdev;
838 
839 	if (!refcount_dec_and_mutex_lock(&vm->as.active_cnt, &ptdev->mmu->as.slots_lock))
840 		return;
841 
842 	if (!drm_WARN_ON(&ptdev->base, vm->as.id == -1 || !list_empty(&vm->as.lru_node)))
843 		list_add_tail(&vm->as.lru_node, &ptdev->mmu->as.lru_list);
844 
845 	refcount_set(&vm->as.active_cnt, 0);
846 	mutex_unlock(&ptdev->mmu->as.slots_lock);
847 }
848 
849 u32 panthor_vm_page_size(struct panthor_vm *vm)
850 {
851 	const struct io_pgtable *pgt = io_pgtable_ops_to_pgtable(vm->pgtbl_ops);
852 	u32 pg_shift = ffs(pgt->cfg.pgsize_bitmap) - 1;
853 
854 	return 1u << pg_shift;
855 }
856 
857 static void panthor_vm_stop(struct panthor_vm *vm)
858 {
859 	drm_sched_stop(&vm->sched, NULL);
860 }
861 
862 static void panthor_vm_start(struct panthor_vm *vm)
863 {
864 	drm_sched_start(&vm->sched, 0);
865 }
866 
867 /**
868  * panthor_vm_as() - Get the AS slot attached to a VM
869  * @vm: VM to get the AS slot of.
870  *
871  * Return: -1 if the VM is not assigned an AS slot yet, >= 0 otherwise.
872  */
873 int panthor_vm_as(struct panthor_vm *vm)
874 {
875 	return vm->as.id;
876 }
877 
878 static size_t get_pgsize(u64 addr, size_t size, size_t *count)
879 {
880 	/*
881 	 * io-pgtable only operates on multiple pages within a single table
882 	 * entry, so we need to split at boundaries of the table size, i.e.
883 	 * the next block size up. The distance from address A to the next
884 	 * boundary of block size B is logically B - A % B, but in unsigned
885 	 * two's complement where B is a power of two we get the equivalence
886 	 * B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :)
887 	 */
888 	size_t blk_offset = -addr % SZ_2M;
889 
890 	if (blk_offset || size < SZ_2M) {
891 		*count = min_not_zero(blk_offset, size) / SZ_4K;
892 		return SZ_4K;
893 	}
894 	blk_offset = -addr % SZ_1G ?: SZ_1G;
895 	*count = min(blk_offset, size) / SZ_2M;
896 	return SZ_2M;
897 }
898 
899 static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size)
900 {
901 	struct panthor_device *ptdev = vm->ptdev;
902 	int ret = 0, cookie;
903 
904 	if (vm->as.id < 0)
905 		return 0;
906 
907 	/* If the device is unplugged, we just silently skip the flush. */
908 	if (!drm_dev_enter(&ptdev->base, &cookie))
909 		return 0;
910 
911 	ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT);
912 
913 	drm_dev_exit(cookie);
914 	return ret;
915 }
916 
917 static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
918 {
919 	struct panthor_device *ptdev = vm->ptdev;
920 	struct io_pgtable_ops *ops = vm->pgtbl_ops;
921 	u64 offset = 0;
922 
923 	drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size);
924 
925 	while (offset < size) {
926 		size_t unmapped_sz = 0, pgcount;
927 		size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount);
928 
929 		unmapped_sz = ops->unmap_pages(ops, iova + offset, pgsize, pgcount, NULL);
930 
931 		if (drm_WARN_ON(&ptdev->base, unmapped_sz != pgsize * pgcount)) {
932 			drm_err(&ptdev->base, "failed to unmap range %llx-%llx (requested range %llx-%llx)\n",
933 				iova + offset + unmapped_sz,
934 				iova + offset + pgsize * pgcount,
935 				iova, iova + size);
936 			panthor_vm_flush_range(vm, iova, offset + unmapped_sz);
937 			return  -EINVAL;
938 		}
939 		offset += unmapped_sz;
940 	}
941 
942 	return panthor_vm_flush_range(vm, iova, size);
943 }
944 
945 static int
946 panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
947 		     struct sg_table *sgt, u64 offset, u64 size)
948 {
949 	struct panthor_device *ptdev = vm->ptdev;
950 	unsigned int count;
951 	struct scatterlist *sgl;
952 	struct io_pgtable_ops *ops = vm->pgtbl_ops;
953 	u64 start_iova = iova;
954 	int ret;
955 
956 	if (!size)
957 		return 0;
958 
959 	for_each_sgtable_dma_sg(sgt, sgl, count) {
960 		dma_addr_t paddr = sg_dma_address(sgl);
961 		size_t len = sg_dma_len(sgl);
962 
963 		if (len <= offset) {
964 			offset -= len;
965 			continue;
966 		}
967 
968 		paddr += offset;
969 		len -= offset;
970 		len = min_t(size_t, len, size);
971 		size -= len;
972 
973 		drm_dbg(&ptdev->base, "map: as=%d, iova=%llx, paddr=%pad, len=%zx",
974 			vm->as.id, iova, &paddr, len);
975 
976 		while (len) {
977 			size_t pgcount, mapped = 0;
978 			size_t pgsize = get_pgsize(iova | paddr, len, &pgcount);
979 
980 			ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
981 					     GFP_KERNEL, &mapped);
982 			iova += mapped;
983 			paddr += mapped;
984 			len -= mapped;
985 
986 			if (drm_WARN_ON(&ptdev->base, !ret && !mapped))
987 				ret = -ENOMEM;
988 
989 			if (ret) {
990 				/* If something failed, unmap what we've already mapped before
991 				 * returning. The unmap call is not supposed to fail.
992 				 */
993 				drm_WARN_ON(&ptdev->base,
994 					    panthor_vm_unmap_pages(vm, start_iova,
995 								   iova - start_iova));
996 				return ret;
997 			}
998 		}
999 
1000 		if (!size)
1001 			break;
1002 
1003 		offset = 0;
1004 	}
1005 
1006 	return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
1007 }
1008 
1009 static int flags_to_prot(u32 flags)
1010 {
1011 	int prot = 0;
1012 
1013 	if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC)
1014 		prot |= IOMMU_NOEXEC;
1015 
1016 	if (!(flags & DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED))
1017 		prot |= IOMMU_CACHE;
1018 
1019 	if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_READONLY)
1020 		prot |= IOMMU_READ;
1021 	else
1022 		prot |= IOMMU_READ | IOMMU_WRITE;
1023 
1024 	return prot;
1025 }
1026 
1027 /**
1028  * panthor_vm_alloc_va() - Allocate a region in the auto-va space
1029  * @vm: VM to allocate a region on.
1030  * @va: start of the VA range. Can be PANTHOR_VM_KERNEL_AUTO_VA if the user
1031  * wants the VA to be automatically allocated from the auto-VA range.
1032  * @size: size of the VA range.
1033  * @va_node: drm_mm_node to initialize. Must be zero-initialized.
1034  *
1035  * Some GPU objects, like heap chunks, are fully managed by the kernel and
1036  * need to be mapped to the userspace VM, in the region reserved for kernel
1037  * objects.
1038  *
1039  * This function takes care of allocating a region in the kernel auto-VA space.
1040  *
1041  * Return: 0 on success, an error code otherwise.
1042  */
1043 int
1044 panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size,
1045 		    struct drm_mm_node *va_node)
1046 {
1047 	ssize_t vm_pgsz = panthor_vm_page_size(vm);
1048 	int ret;
1049 
1050 	if (!size || !IS_ALIGNED(size, vm_pgsz))
1051 		return -EINVAL;
1052 
1053 	if (va != PANTHOR_VM_KERNEL_AUTO_VA && !IS_ALIGNED(va, vm_pgsz))
1054 		return -EINVAL;
1055 
1056 	mutex_lock(&vm->mm_lock);
1057 	if (va != PANTHOR_VM_KERNEL_AUTO_VA) {
1058 		va_node->start = va;
1059 		va_node->size = size;
1060 		ret = drm_mm_reserve_node(&vm->mm, va_node);
1061 	} else {
1062 		ret = drm_mm_insert_node_in_range(&vm->mm, va_node, size,
1063 						  size >= SZ_2M ? SZ_2M : SZ_4K,
1064 						  0, vm->kernel_auto_va.start,
1065 						  vm->kernel_auto_va.end,
1066 						  DRM_MM_INSERT_BEST);
1067 	}
1068 	mutex_unlock(&vm->mm_lock);
1069 
1070 	return ret;
1071 }
1072 
1073 /**
1074  * panthor_vm_free_va() - Free a region allocated with panthor_vm_alloc_va()
1075  * @vm: VM to free the region on.
1076  * @va_node: Memory node representing the region to free.
1077  */
1078 void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node)
1079 {
1080 	mutex_lock(&vm->mm_lock);
1081 	drm_mm_remove_node(va_node);
1082 	mutex_unlock(&vm->mm_lock);
1083 }
1084 
1085 static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo)
1086 {
1087 	struct panthor_gem_object *bo = to_panthor_bo(vm_bo->obj);
1088 	struct drm_gpuvm *vm = vm_bo->vm;
1089 	bool unpin;
1090 
1091 	/* We must retain the GEM before calling drm_gpuvm_bo_put(),
1092 	 * otherwise the mutex might be destroyed while we hold it.
1093 	 * Same goes for the VM, since we take the VM resv lock.
1094 	 */
1095 	drm_gem_object_get(&bo->base.base);
1096 	drm_gpuvm_get(vm);
1097 
1098 	/* We take the resv lock to protect against concurrent accesses to the
1099 	 * gpuvm evicted/extobj lists that are modified in
1100 	 * drm_gpuvm_bo_destroy(), which is called if drm_gpuvm_bo_put()
1101 	 * releases sthe last vm_bo reference.
1102 	 * We take the BO GPUVA list lock to protect the vm_bo removal from the
1103 	 * GEM vm_bo list.
1104 	 */
1105 	dma_resv_lock(drm_gpuvm_resv(vm), NULL);
1106 	mutex_lock(&bo->base.base.gpuva.lock);
1107 	unpin = drm_gpuvm_bo_put(vm_bo);
1108 	mutex_unlock(&bo->base.base.gpuva.lock);
1109 	dma_resv_unlock(drm_gpuvm_resv(vm));
1110 
1111 	/* If the vm_bo object was destroyed, release the pin reference that
1112 	 * was hold by this object.
1113 	 */
1114 	if (unpin && !drm_gem_is_imported(&bo->base.base))
1115 		drm_gem_shmem_unpin(&bo->base);
1116 
1117 	drm_gpuvm_put(vm);
1118 	drm_gem_object_put(&bo->base.base);
1119 }
1120 
1121 static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx,
1122 				      struct panthor_vm *vm)
1123 {
1124 	struct panthor_vma *vma, *tmp_vma;
1125 
1126 	u32 remaining_pt_count = op_ctx->rsvd_page_tables.count -
1127 				 op_ctx->rsvd_page_tables.ptr;
1128 
1129 	if (remaining_pt_count) {
1130 		kmem_cache_free_bulk(pt_cache, remaining_pt_count,
1131 				     op_ctx->rsvd_page_tables.pages +
1132 				     op_ctx->rsvd_page_tables.ptr);
1133 	}
1134 
1135 	kfree(op_ctx->rsvd_page_tables.pages);
1136 
1137 	if (op_ctx->map.vm_bo)
1138 		panthor_vm_bo_put(op_ctx->map.vm_bo);
1139 
1140 	for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++)
1141 		kfree(op_ctx->preallocated_vmas[i]);
1142 
1143 	list_for_each_entry_safe(vma, tmp_vma, &op_ctx->returned_vmas, node) {
1144 		list_del(&vma->node);
1145 		panthor_vm_bo_put(vma->base.vm_bo);
1146 		kfree(vma);
1147 	}
1148 }
1149 
1150 static struct panthor_vma *
1151 panthor_vm_op_ctx_get_vma(struct panthor_vm_op_ctx *op_ctx)
1152 {
1153 	for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) {
1154 		struct panthor_vma *vma = op_ctx->preallocated_vmas[i];
1155 
1156 		if (vma) {
1157 			op_ctx->preallocated_vmas[i] = NULL;
1158 			return vma;
1159 		}
1160 	}
1161 
1162 	return NULL;
1163 }
1164 
1165 static int
1166 panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx)
1167 {
1168 	u32 vma_count;
1169 
1170 	switch (op_ctx->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) {
1171 	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
1172 		/* One VMA for the new mapping, and two more VMAs for the remap case
1173 		 * which might contain both a prev and next VA.
1174 		 */
1175 		vma_count = 3;
1176 		break;
1177 
1178 	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
1179 		/* Partial unmaps might trigger a remap with either a prev or a next VA,
1180 		 * but not both.
1181 		 */
1182 		vma_count = 1;
1183 		break;
1184 
1185 	default:
1186 		return 0;
1187 	}
1188 
1189 	for (u32 i = 0; i < vma_count; i++) {
1190 		struct panthor_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
1191 
1192 		if (!vma)
1193 			return -ENOMEM;
1194 
1195 		op_ctx->preallocated_vmas[i] = vma;
1196 	}
1197 
1198 	return 0;
1199 }
1200 
1201 #define PANTHOR_VM_BIND_OP_MAP_FLAGS \
1202 	(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \
1203 	 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \
1204 	 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED | \
1205 	 DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
1206 
1207 static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
1208 					 struct panthor_vm *vm,
1209 					 struct panthor_gem_object *bo,
1210 					 u64 offset,
1211 					 u64 size, u64 va,
1212 					 u32 flags)
1213 {
1214 	struct drm_gpuvm_bo *preallocated_vm_bo;
1215 	struct sg_table *sgt = NULL;
1216 	u64 pt_count;
1217 	int ret;
1218 
1219 	if (!bo)
1220 		return -EINVAL;
1221 
1222 	if ((flags & ~PANTHOR_VM_BIND_OP_MAP_FLAGS) ||
1223 	    (flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) != DRM_PANTHOR_VM_BIND_OP_TYPE_MAP)
1224 		return -EINVAL;
1225 
1226 	/* Make sure the VA and size are in-bounds. */
1227 	if (size > bo->base.base.size || offset > bo->base.base.size - size)
1228 		return -EINVAL;
1229 
1230 	/* If the BO has an exclusive VM attached, it can't be mapped to other VMs. */
1231 	if (bo->exclusive_vm_root_gem &&
1232 	    bo->exclusive_vm_root_gem != panthor_vm_root_gem(vm))
1233 		return -EINVAL;
1234 
1235 	memset(op_ctx, 0, sizeof(*op_ctx));
1236 	INIT_LIST_HEAD(&op_ctx->returned_vmas);
1237 	op_ctx->flags = flags;
1238 	op_ctx->va.range = size;
1239 	op_ctx->va.addr = va;
1240 
1241 	ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx);
1242 	if (ret)
1243 		goto err_cleanup;
1244 
1245 	if (!drm_gem_is_imported(&bo->base.base)) {
1246 		/* Pre-reserve the BO pages, so the map operation doesn't have to
1247 		 * allocate.
1248 		 */
1249 		ret = drm_gem_shmem_pin(&bo->base);
1250 		if (ret)
1251 			goto err_cleanup;
1252 	}
1253 
1254 	sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
1255 	if (IS_ERR(sgt)) {
1256 		if (!drm_gem_is_imported(&bo->base.base))
1257 			drm_gem_shmem_unpin(&bo->base);
1258 
1259 		ret = PTR_ERR(sgt);
1260 		goto err_cleanup;
1261 	}
1262 
1263 	op_ctx->map.sgt = sgt;
1264 
1265 	preallocated_vm_bo = drm_gpuvm_bo_create(&vm->base, &bo->base.base);
1266 	if (!preallocated_vm_bo) {
1267 		if (!drm_gem_is_imported(&bo->base.base))
1268 			drm_gem_shmem_unpin(&bo->base);
1269 
1270 		ret = -ENOMEM;
1271 		goto err_cleanup;
1272 	}
1273 
1274 	/* drm_gpuvm_bo_obtain_prealloc() will call drm_gpuvm_bo_put() on our
1275 	 * pre-allocated BO if the <BO,VM> association exists. Given we
1276 	 * only have one ref on preallocated_vm_bo, drm_gpuvm_bo_destroy() will
1277 	 * be called immediately, and we have to hold the VM resv lock when
1278 	 * calling this function.
1279 	 */
1280 	dma_resv_lock(panthor_vm_resv(vm), NULL);
1281 	mutex_lock(&bo->base.base.gpuva.lock);
1282 	op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo);
1283 	mutex_unlock(&bo->base.base.gpuva.lock);
1284 	dma_resv_unlock(panthor_vm_resv(vm));
1285 
1286 	/* If the a vm_bo for this <VM,BO> combination exists, it already
1287 	 * retains a pin ref, and we can release the one we took earlier.
1288 	 *
1289 	 * If our pre-allocated vm_bo is picked, it now retains the pin ref,
1290 	 * which will be released in panthor_vm_bo_put().
1291 	 */
1292 	if (preallocated_vm_bo != op_ctx->map.vm_bo &&
1293 	    !drm_gem_is_imported(&bo->base.base))
1294 		drm_gem_shmem_unpin(&bo->base);
1295 
1296 	op_ctx->map.bo_offset = offset;
1297 
1298 	/* L1, L2 and L3 page tables.
1299 	 * We could optimize L3 allocation by iterating over the sgt and merging
1300 	 * 2M contiguous blocks, but it's simpler to over-provision and return
1301 	 * the pages if they're not used.
1302 	 */
1303 	pt_count = ((ALIGN(va + size, 1ull << 39) - ALIGN_DOWN(va, 1ull << 39)) >> 39) +
1304 		   ((ALIGN(va + size, 1ull << 30) - ALIGN_DOWN(va, 1ull << 30)) >> 30) +
1305 		   ((ALIGN(va + size, 1ull << 21) - ALIGN_DOWN(va, 1ull << 21)) >> 21);
1306 
1307 	op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
1308 						 sizeof(*op_ctx->rsvd_page_tables.pages),
1309 						 GFP_KERNEL);
1310 	if (!op_ctx->rsvd_page_tables.pages) {
1311 		ret = -ENOMEM;
1312 		goto err_cleanup;
1313 	}
1314 
1315 	ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
1316 				    op_ctx->rsvd_page_tables.pages);
1317 	op_ctx->rsvd_page_tables.count = ret;
1318 	if (ret != pt_count) {
1319 		ret = -ENOMEM;
1320 		goto err_cleanup;
1321 	}
1322 
1323 	/* Insert BO into the extobj list last, when we know nothing can fail. */
1324 	dma_resv_lock(panthor_vm_resv(vm), NULL);
1325 	drm_gpuvm_bo_extobj_add(op_ctx->map.vm_bo);
1326 	dma_resv_unlock(panthor_vm_resv(vm));
1327 
1328 	return 0;
1329 
1330 err_cleanup:
1331 	panthor_vm_cleanup_op_ctx(op_ctx, vm);
1332 	return ret;
1333 }
1334 
1335 static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx,
1336 					   struct panthor_vm *vm,
1337 					   u64 va, u64 size)
1338 {
1339 	u32 pt_count = 0;
1340 	int ret;
1341 
1342 	memset(op_ctx, 0, sizeof(*op_ctx));
1343 	INIT_LIST_HEAD(&op_ctx->returned_vmas);
1344 	op_ctx->va.range = size;
1345 	op_ctx->va.addr = va;
1346 	op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP;
1347 
1348 	/* Pre-allocate L3 page tables to account for the split-2M-block
1349 	 * situation on unmap.
1350 	 */
1351 	if (va != ALIGN(va, SZ_2M))
1352 		pt_count++;
1353 
1354 	if (va + size != ALIGN(va + size, SZ_2M) &&
1355 	    ALIGN(va + size, SZ_2M) != ALIGN(va, SZ_2M))
1356 		pt_count++;
1357 
1358 	ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx);
1359 	if (ret)
1360 		goto err_cleanup;
1361 
1362 	if (pt_count) {
1363 		op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
1364 							 sizeof(*op_ctx->rsvd_page_tables.pages),
1365 							 GFP_KERNEL);
1366 		if (!op_ctx->rsvd_page_tables.pages) {
1367 			ret = -ENOMEM;
1368 			goto err_cleanup;
1369 		}
1370 
1371 		ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
1372 					    op_ctx->rsvd_page_tables.pages);
1373 		if (ret != pt_count) {
1374 			ret = -ENOMEM;
1375 			goto err_cleanup;
1376 		}
1377 		op_ctx->rsvd_page_tables.count = pt_count;
1378 	}
1379 
1380 	return 0;
1381 
1382 err_cleanup:
1383 	panthor_vm_cleanup_op_ctx(op_ctx, vm);
1384 	return ret;
1385 }
1386 
1387 static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx,
1388 						struct panthor_vm *vm)
1389 {
1390 	memset(op_ctx, 0, sizeof(*op_ctx));
1391 	INIT_LIST_HEAD(&op_ctx->returned_vmas);
1392 	op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY;
1393 }
1394 
1395 /**
1396  * panthor_vm_get_bo_for_va() - Get the GEM object mapped at a virtual address
1397  * @vm: VM to look into.
1398  * @va: Virtual address to search for.
1399  * @bo_offset: Offset of the GEM object mapped at this virtual address.
1400  * Only valid on success.
1401  *
1402  * The object returned by this function might no longer be mapped when the
1403  * function returns. It's the caller responsibility to ensure there's no
1404  * concurrent map/unmap operations making the returned value invalid, or
1405  * make sure it doesn't matter if the object is no longer mapped.
1406  *
1407  * Return: A valid pointer on success, an ERR_PTR() otherwise.
1408  */
1409 struct panthor_gem_object *
1410 panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset)
1411 {
1412 	struct panthor_gem_object *bo = ERR_PTR(-ENOENT);
1413 	struct drm_gpuva *gpuva;
1414 	struct panthor_vma *vma;
1415 
1416 	/* Take the VM lock to prevent concurrent map/unmap operations. */
1417 	mutex_lock(&vm->op_lock);
1418 	gpuva = drm_gpuva_find_first(&vm->base, va, 1);
1419 	vma = gpuva ? container_of(gpuva, struct panthor_vma, base) : NULL;
1420 	if (vma && vma->base.gem.obj) {
1421 		drm_gem_object_get(vma->base.gem.obj);
1422 		bo = to_panthor_bo(vma->base.gem.obj);
1423 		*bo_offset = vma->base.gem.offset + (va - vma->base.va.addr);
1424 	}
1425 	mutex_unlock(&vm->op_lock);
1426 
1427 	return bo;
1428 }
1429 
1430 #define PANTHOR_VM_MIN_KERNEL_VA_SIZE	SZ_256M
1431 
1432 static u64
1433 panthor_vm_create_get_user_va_range(const struct drm_panthor_vm_create *args,
1434 				    u64 full_va_range)
1435 {
1436 	u64 user_va_range;
1437 
1438 	/* Make sure we have a minimum amount of VA space for kernel objects. */
1439 	if (full_va_range < PANTHOR_VM_MIN_KERNEL_VA_SIZE)
1440 		return 0;
1441 
1442 	if (args->user_va_range) {
1443 		/* Use the user provided value if != 0. */
1444 		user_va_range = args->user_va_range;
1445 	} else if (TASK_SIZE_OF(current) < full_va_range) {
1446 		/* If the task VM size is smaller than the GPU VA range, pick this
1447 		 * as our default user VA range, so userspace can CPU/GPU map buffers
1448 		 * at the same address.
1449 		 */
1450 		user_va_range = TASK_SIZE_OF(current);
1451 	} else {
1452 		/* If the GPU VA range is smaller than the task VM size, we
1453 		 * just have to live with the fact we won't be able to map
1454 		 * all buffers at the same GPU/CPU address.
1455 		 *
1456 		 * If the GPU VA range is bigger than 4G (more than 32-bit of
1457 		 * VA), we split the range in two, and assign half of it to
1458 		 * the user and the other half to the kernel, if it's not, we
1459 		 * keep the kernel VA space as small as possible.
1460 		 */
1461 		user_va_range = full_va_range > SZ_4G ?
1462 				full_va_range / 2 :
1463 				full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE;
1464 	}
1465 
1466 	if (full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE < user_va_range)
1467 		user_va_range = full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE;
1468 
1469 	return user_va_range;
1470 }
1471 
1472 #define PANTHOR_VM_CREATE_FLAGS		0
1473 
1474 static int
1475 panthor_vm_create_check_args(const struct panthor_device *ptdev,
1476 			     const struct drm_panthor_vm_create *args,
1477 			     u64 *kernel_va_start, u64 *kernel_va_range)
1478 {
1479 	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
1480 	u64 full_va_range = 1ull << va_bits;
1481 	u64 user_va_range;
1482 
1483 	if (args->flags & ~PANTHOR_VM_CREATE_FLAGS)
1484 		return -EINVAL;
1485 
1486 	user_va_range = panthor_vm_create_get_user_va_range(args, full_va_range);
1487 	if (!user_va_range || (args->user_va_range && args->user_va_range > user_va_range))
1488 		return -EINVAL;
1489 
1490 	/* Pick a kernel VA range that's a power of two, to have a clear split. */
1491 	*kernel_va_range = rounddown_pow_of_two(full_va_range - user_va_range);
1492 	*kernel_va_start = full_va_range - *kernel_va_range;
1493 	return 0;
1494 }
1495 
1496 /*
1497  * Only 32 VMs per open file. If that becomes a limiting factor, we can
1498  * increase this number.
1499  */
1500 #define PANTHOR_MAX_VMS_PER_FILE	32
1501 
1502 /**
1503  * panthor_vm_pool_create_vm() - Create a VM
1504  * @ptdev: The panthor device
1505  * @pool: The VM to create this VM on.
1506  * @args: VM creation args.
1507  *
1508  * Return: a positive VM ID on success, a negative error code otherwise.
1509  */
1510 int panthor_vm_pool_create_vm(struct panthor_device *ptdev,
1511 			      struct panthor_vm_pool *pool,
1512 			      struct drm_panthor_vm_create *args)
1513 {
1514 	u64 kernel_va_start, kernel_va_range;
1515 	struct panthor_vm *vm;
1516 	int ret;
1517 	u32 id;
1518 
1519 	ret = panthor_vm_create_check_args(ptdev, args, &kernel_va_start, &kernel_va_range);
1520 	if (ret)
1521 		return ret;
1522 
1523 	vm = panthor_vm_create(ptdev, false, kernel_va_start, kernel_va_range,
1524 			       kernel_va_start, kernel_va_range);
1525 	if (IS_ERR(vm))
1526 		return PTR_ERR(vm);
1527 
1528 	ret = xa_alloc(&pool->xa, &id, vm,
1529 		       XA_LIMIT(1, PANTHOR_MAX_VMS_PER_FILE), GFP_KERNEL);
1530 
1531 	if (ret) {
1532 		panthor_vm_put(vm);
1533 		return ret;
1534 	}
1535 
1536 	args->user_va_range = kernel_va_start;
1537 	return id;
1538 }
1539 
1540 static void panthor_vm_destroy(struct panthor_vm *vm)
1541 {
1542 	if (!vm)
1543 		return;
1544 
1545 	vm->destroyed = true;
1546 
1547 	mutex_lock(&vm->heaps.lock);
1548 	panthor_heap_pool_destroy(vm->heaps.pool);
1549 	vm->heaps.pool = NULL;
1550 	mutex_unlock(&vm->heaps.lock);
1551 
1552 	drm_WARN_ON(&vm->ptdev->base,
1553 		    panthor_vm_unmap_range(vm, vm->base.mm_start, vm->base.mm_range));
1554 	panthor_vm_put(vm);
1555 }
1556 
1557 /**
1558  * panthor_vm_pool_destroy_vm() - Destroy a VM.
1559  * @pool: VM pool.
1560  * @handle: VM handle.
1561  *
1562  * This function doesn't free the VM object or its resources, it just kills
1563  * all mappings, and makes sure nothing can be mapped after that point.
1564  *
1565  * If there was any active jobs at the time this function is called, these
1566  * jobs should experience page faults and be killed as a result.
1567  *
1568  * The VM resources are freed when the last reference on the VM object is
1569  * dropped.
1570  *
1571  * Return: %0 for success, negative errno value for failure
1572  */
1573 int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle)
1574 {
1575 	struct panthor_vm *vm;
1576 
1577 	vm = xa_erase(&pool->xa, handle);
1578 
1579 	panthor_vm_destroy(vm);
1580 
1581 	return vm ? 0 : -EINVAL;
1582 }
1583 
1584 /**
1585  * panthor_vm_pool_get_vm() - Retrieve VM object bound to a VM handle
1586  * @pool: VM pool to check.
1587  * @handle: Handle of the VM to retrieve.
1588  *
1589  * Return: A valid pointer if the VM exists, NULL otherwise.
1590  */
1591 struct panthor_vm *
1592 panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
1593 {
1594 	struct panthor_vm *vm;
1595 
1596 	xa_lock(&pool->xa);
1597 	vm = panthor_vm_get(xa_load(&pool->xa, handle));
1598 	xa_unlock(&pool->xa);
1599 
1600 	return vm;
1601 }
1602 
1603 /**
1604  * panthor_vm_pool_destroy() - Destroy a VM pool.
1605  * @pfile: File.
1606  *
1607  * Destroy all VMs in the pool, and release the pool resources.
1608  *
1609  * Note that VMs can outlive the pool they were created from if other
1610  * objects hold a reference to there VMs.
1611  */
1612 void panthor_vm_pool_destroy(struct panthor_file *pfile)
1613 {
1614 	struct panthor_vm *vm;
1615 	unsigned long i;
1616 
1617 	if (!pfile->vms)
1618 		return;
1619 
1620 	xa_for_each(&pfile->vms->xa, i, vm)
1621 		panthor_vm_destroy(vm);
1622 
1623 	xa_destroy(&pfile->vms->xa);
1624 	kfree(pfile->vms);
1625 }
1626 
1627 /**
1628  * panthor_vm_pool_create() - Create a VM pool
1629  * @pfile: File.
1630  *
1631  * Return: 0 on success, a negative error code otherwise.
1632  */
1633 int panthor_vm_pool_create(struct panthor_file *pfile)
1634 {
1635 	pfile->vms = kzalloc(sizeof(*pfile->vms), GFP_KERNEL);
1636 	if (!pfile->vms)
1637 		return -ENOMEM;
1638 
1639 	xa_init_flags(&pfile->vms->xa, XA_FLAGS_ALLOC1);
1640 	return 0;
1641 }
1642 
1643 /* dummy TLB ops, the real TLB flush happens in panthor_vm_flush_range() */
1644 static void mmu_tlb_flush_all(void *cookie)
1645 {
1646 }
1647 
1648 static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie)
1649 {
1650 }
1651 
1652 static const struct iommu_flush_ops mmu_tlb_ops = {
1653 	.tlb_flush_all = mmu_tlb_flush_all,
1654 	.tlb_flush_walk = mmu_tlb_flush_walk,
1655 };
1656 
1657 static const char *access_type_name(struct panthor_device *ptdev,
1658 				    u32 fault_status)
1659 {
1660 	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
1661 	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
1662 		return "ATOMIC";
1663 	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
1664 		return "READ";
1665 	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
1666 		return "WRITE";
1667 	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
1668 		return "EXECUTE";
1669 	default:
1670 		drm_WARN_ON(&ptdev->base, 1);
1671 		return NULL;
1672 	}
1673 }
1674 
1675 static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
1676 {
1677 	bool has_unhandled_faults = false;
1678 
1679 	status = panthor_mmu_fault_mask(ptdev, status);
1680 	while (status) {
1681 		u32 as = ffs(status | (status >> 16)) - 1;
1682 		u32 mask = panthor_mmu_as_fault_mask(ptdev, as);
1683 		u32 new_int_mask;
1684 		u64 addr;
1685 		u32 fault_status;
1686 		u32 exception_type;
1687 		u32 access_type;
1688 		u32 source_id;
1689 
1690 		fault_status = gpu_read(ptdev, AS_FAULTSTATUS(as));
1691 		addr = gpu_read64(ptdev, AS_FAULTADDRESS(as));
1692 
1693 		/* decode the fault status */
1694 		exception_type = fault_status & 0xFF;
1695 		access_type = (fault_status >> 8) & 0x3;
1696 		source_id = (fault_status >> 16);
1697 
1698 		mutex_lock(&ptdev->mmu->as.slots_lock);
1699 
1700 		ptdev->mmu->as.faulty_mask |= mask;
1701 		new_int_mask =
1702 			panthor_mmu_fault_mask(ptdev, ~ptdev->mmu->as.faulty_mask);
1703 
1704 		/* terminal fault, print info about the fault */
1705 		drm_err(&ptdev->base,
1706 			"Unhandled Page fault in AS%d at VA 0x%016llX\n"
1707 			"raw fault status: 0x%X\n"
1708 			"decoded fault status: %s\n"
1709 			"exception type 0x%X: %s\n"
1710 			"access type 0x%X: %s\n"
1711 			"source id 0x%X\n",
1712 			as, addr,
1713 			fault_status,
1714 			(fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
1715 			exception_type, panthor_exception_name(ptdev, exception_type),
1716 			access_type, access_type_name(ptdev, fault_status),
1717 			source_id);
1718 
1719 		/* We don't handle VM faults at the moment, so let's just clear the
1720 		 * interrupt and let the writer/reader crash.
1721 		 * Note that COMPLETED irqs are never cleared, but this is fine
1722 		 * because they are always masked.
1723 		 */
1724 		gpu_write(ptdev, MMU_INT_CLEAR, mask);
1725 
1726 		/* Ignore MMU interrupts on this AS until it's been
1727 		 * re-enabled.
1728 		 */
1729 		ptdev->mmu->irq.mask = new_int_mask;
1730 
1731 		if (ptdev->mmu->as.slots[as].vm)
1732 			ptdev->mmu->as.slots[as].vm->unhandled_fault = true;
1733 
1734 		/* Disable the MMU to kill jobs on this AS. */
1735 		panthor_mmu_as_disable(ptdev, as);
1736 		mutex_unlock(&ptdev->mmu->as.slots_lock);
1737 
1738 		status &= ~mask;
1739 		has_unhandled_faults = true;
1740 	}
1741 
1742 	if (has_unhandled_faults)
1743 		panthor_sched_report_mmu_fault(ptdev);
1744 }
1745 PANTHOR_IRQ_HANDLER(mmu, MMU, panthor_mmu_irq_handler);
1746 
1747 /**
1748  * panthor_mmu_suspend() - Suspend the MMU logic
1749  * @ptdev: Device.
1750  *
1751  * All we do here is de-assign the AS slots on all active VMs, so things
1752  * get flushed to the main memory, and no further access to these VMs are
1753  * possible.
1754  *
1755  * We also suspend the MMU IRQ.
1756  */
1757 void panthor_mmu_suspend(struct panthor_device *ptdev)
1758 {
1759 	mutex_lock(&ptdev->mmu->as.slots_lock);
1760 	for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
1761 		struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
1762 
1763 		if (vm) {
1764 			drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i));
1765 			panthor_vm_release_as_locked(vm);
1766 		}
1767 	}
1768 	mutex_unlock(&ptdev->mmu->as.slots_lock);
1769 
1770 	panthor_mmu_irq_suspend(&ptdev->mmu->irq);
1771 }
1772 
1773 /**
1774  * panthor_mmu_resume() - Resume the MMU logic
1775  * @ptdev: Device.
1776  *
1777  * Resume the IRQ.
1778  *
1779  * We don't re-enable previously active VMs. We assume other parts of the
1780  * driver will call panthor_vm_active() on the VMs they intend to use.
1781  */
1782 void panthor_mmu_resume(struct panthor_device *ptdev)
1783 {
1784 	mutex_lock(&ptdev->mmu->as.slots_lock);
1785 	ptdev->mmu->as.alloc_mask = 0;
1786 	ptdev->mmu->as.faulty_mask = 0;
1787 	mutex_unlock(&ptdev->mmu->as.slots_lock);
1788 
1789 	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
1790 }
1791 
1792 /**
1793  * panthor_mmu_pre_reset() - Prepare for a reset
1794  * @ptdev: Device.
1795  *
1796  * Suspend the IRQ, and make sure all VM_BIND queues are stopped, so we
1797  * don't get asked to do a VM operation while the GPU is down.
1798  *
1799  * We don't cleanly shutdown the AS slots here, because the reset might
1800  * come from an AS_ACTIVE_BIT stuck situation.
1801  */
1802 void panthor_mmu_pre_reset(struct panthor_device *ptdev)
1803 {
1804 	struct panthor_vm *vm;
1805 
1806 	panthor_mmu_irq_suspend(&ptdev->mmu->irq);
1807 
1808 	mutex_lock(&ptdev->mmu->vm.lock);
1809 	ptdev->mmu->vm.reset_in_progress = true;
1810 	list_for_each_entry(vm, &ptdev->mmu->vm.list, node)
1811 		panthor_vm_stop(vm);
1812 	mutex_unlock(&ptdev->mmu->vm.lock);
1813 }
1814 
1815 /**
1816  * panthor_mmu_post_reset() - Restore things after a reset
1817  * @ptdev: Device.
1818  *
1819  * Put the MMU logic back in action after a reset. That implies resuming the
1820  * IRQ and re-enabling the VM_BIND queues.
1821  */
1822 void panthor_mmu_post_reset(struct panthor_device *ptdev)
1823 {
1824 	struct panthor_vm *vm;
1825 
1826 	mutex_lock(&ptdev->mmu->as.slots_lock);
1827 
1828 	/* Now that the reset is effective, we can assume that none of the
1829 	 * AS slots are setup, and clear the faulty flags too.
1830 	 */
1831 	ptdev->mmu->as.alloc_mask = 0;
1832 	ptdev->mmu->as.faulty_mask = 0;
1833 
1834 	for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
1835 		struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
1836 
1837 		if (vm)
1838 			panthor_vm_release_as_locked(vm);
1839 	}
1840 
1841 	mutex_unlock(&ptdev->mmu->as.slots_lock);
1842 
1843 	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
1844 
1845 	/* Restart the VM_BIND queues. */
1846 	mutex_lock(&ptdev->mmu->vm.lock);
1847 	list_for_each_entry(vm, &ptdev->mmu->vm.list, node) {
1848 		panthor_vm_start(vm);
1849 	}
1850 	ptdev->mmu->vm.reset_in_progress = false;
1851 	mutex_unlock(&ptdev->mmu->vm.lock);
1852 }
1853 
1854 static void panthor_vm_free(struct drm_gpuvm *gpuvm)
1855 {
1856 	struct panthor_vm *vm = container_of(gpuvm, struct panthor_vm, base);
1857 	struct panthor_device *ptdev = vm->ptdev;
1858 
1859 	mutex_lock(&vm->heaps.lock);
1860 	if (drm_WARN_ON(&ptdev->base, vm->heaps.pool))
1861 		panthor_heap_pool_destroy(vm->heaps.pool);
1862 	mutex_unlock(&vm->heaps.lock);
1863 	mutex_destroy(&vm->heaps.lock);
1864 
1865 	mutex_lock(&ptdev->mmu->vm.lock);
1866 	list_del(&vm->node);
1867 	/* Restore the scheduler state so we can call drm_sched_entity_destroy()
1868 	 * and drm_sched_fini(). If get there, that means we have no job left
1869 	 * and no new jobs can be queued, so we can start the scheduler without
1870 	 * risking interfering with the reset.
1871 	 */
1872 	if (ptdev->mmu->vm.reset_in_progress)
1873 		panthor_vm_start(vm);
1874 	mutex_unlock(&ptdev->mmu->vm.lock);
1875 
1876 	drm_sched_entity_destroy(&vm->entity);
1877 	drm_sched_fini(&vm->sched);
1878 
1879 	mutex_lock(&ptdev->mmu->as.slots_lock);
1880 	if (vm->as.id >= 0) {
1881 		int cookie;
1882 
1883 		if (drm_dev_enter(&ptdev->base, &cookie)) {
1884 			panthor_mmu_as_disable(ptdev, vm->as.id);
1885 			drm_dev_exit(cookie);
1886 		}
1887 
1888 		ptdev->mmu->as.slots[vm->as.id].vm = NULL;
1889 		clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask);
1890 		list_del(&vm->as.lru_node);
1891 	}
1892 	mutex_unlock(&ptdev->mmu->as.slots_lock);
1893 
1894 	free_io_pgtable_ops(vm->pgtbl_ops);
1895 
1896 	drm_mm_takedown(&vm->mm);
1897 	kfree(vm);
1898 }
1899 
1900 /**
1901  * panthor_vm_put() - Release a reference on a VM
1902  * @vm: VM to release the reference on. Can be NULL.
1903  */
1904 void panthor_vm_put(struct panthor_vm *vm)
1905 {
1906 	drm_gpuvm_put(vm ? &vm->base : NULL);
1907 }
1908 
1909 /**
1910  * panthor_vm_get() - Get a VM reference
1911  * @vm: VM to get the reference on. Can be NULL.
1912  *
1913  * Return: @vm value.
1914  */
1915 struct panthor_vm *panthor_vm_get(struct panthor_vm *vm)
1916 {
1917 	if (vm)
1918 		drm_gpuvm_get(&vm->base);
1919 
1920 	return vm;
1921 }
1922 
1923 /**
1924  * panthor_vm_get_heap_pool() - Get the heap pool attached to a VM
1925  * @vm: VM to query the heap pool on.
1926  * @create: True if the heap pool should be created when it doesn't exist.
1927  *
1928  * Heap pools are per-VM. This function allows one to retrieve the heap pool
1929  * attached to a VM.
1930  *
1931  * If no heap pool exists yet, and @create is true, we create one.
1932  *
1933  * The returned panthor_heap_pool should be released with panthor_heap_pool_put().
1934  *
1935  * Return: A valid pointer on success, an ERR_PTR() otherwise.
1936  */
1937 struct panthor_heap_pool *panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create)
1938 {
1939 	struct panthor_heap_pool *pool;
1940 
1941 	mutex_lock(&vm->heaps.lock);
1942 	if (!vm->heaps.pool && create) {
1943 		if (vm->destroyed)
1944 			pool = ERR_PTR(-EINVAL);
1945 		else
1946 			pool = panthor_heap_pool_create(vm->ptdev, vm);
1947 
1948 		if (!IS_ERR(pool))
1949 			vm->heaps.pool = panthor_heap_pool_get(pool);
1950 	} else {
1951 		pool = panthor_heap_pool_get(vm->heaps.pool);
1952 		if (!pool)
1953 			pool = ERR_PTR(-ENOENT);
1954 	}
1955 	mutex_unlock(&vm->heaps.lock);
1956 
1957 	return pool;
1958 }
1959 
1960 /**
1961  * panthor_vm_heaps_sizes() - Calculate size of all heap chunks across all
1962  * heaps over all the heap pools in a VM
1963  * @pfile: File.
1964  * @stats: Memory stats to be updated.
1965  *
1966  * Calculate all heap chunk sizes in all heap pools bound to a VM. If the VM
1967  * is active, record the size as active as well.
1968  */
1969 void panthor_vm_heaps_sizes(struct panthor_file *pfile, struct drm_memory_stats *stats)
1970 {
1971 	struct panthor_vm *vm;
1972 	unsigned long i;
1973 
1974 	if (!pfile->vms)
1975 		return;
1976 
1977 	xa_lock(&pfile->vms->xa);
1978 	xa_for_each(&pfile->vms->xa, i, vm) {
1979 		size_t size = panthor_heap_pool_size(vm->heaps.pool);
1980 		stats->resident += size;
1981 		if (vm->as.id >= 0)
1982 			stats->active += size;
1983 	}
1984 	xa_unlock(&pfile->vms->xa);
1985 }
1986 
1987 static u64 mair_to_memattr(u64 mair, bool coherent)
1988 {
1989 	u64 memattr = 0;
1990 	u32 i;
1991 
1992 	for (i = 0; i < 8; i++) {
1993 		u8 in_attr = mair >> (8 * i), out_attr;
1994 		u8 outer = in_attr >> 4, inner = in_attr & 0xf;
1995 
1996 		/* For caching to be enabled, inner and outer caching policy
1997 		 * have to be both write-back, if one of them is write-through
1998 		 * or non-cacheable, we just choose non-cacheable. Device
1999 		 * memory is also translated to non-cacheable.
2000 		 */
2001 		if (!(outer & 3) || !(outer & 4) || !(inner & 4)) {
2002 			out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_NC |
2003 				   AS_MEMATTR_AARCH64_SH_MIDGARD_INNER |
2004 				   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false);
2005 		} else {
2006 			out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB |
2007 				   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2);
2008 			/* Use SH_MIDGARD_INNER mode when device isn't coherent,
2009 			 * so SH_IS, which is used when IOMMU_CACHE is set, maps
2010 			 * to Mali's internal-shareable mode. As per the Mali
2011 			 * Spec, inner and outer-shareable modes aren't allowed
2012 			 * for WB memory when coherency is disabled.
2013 			 * Use SH_CPU_INNER mode when coherency is enabled, so
2014 			 * that SH_IS actually maps to the standard definition of
2015 			 * inner-shareable.
2016 			 */
2017 			if (!coherent)
2018 				out_attr |= AS_MEMATTR_AARCH64_SH_MIDGARD_INNER;
2019 			else
2020 				out_attr |= AS_MEMATTR_AARCH64_SH_CPU_INNER;
2021 		}
2022 
2023 		memattr |= (u64)out_attr << (8 * i);
2024 	}
2025 
2026 	return memattr;
2027 }
2028 
2029 static void panthor_vma_link(struct panthor_vm *vm,
2030 			     struct panthor_vma *vma,
2031 			     struct drm_gpuvm_bo *vm_bo)
2032 {
2033 	struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj);
2034 
2035 	mutex_lock(&bo->base.base.gpuva.lock);
2036 	drm_gpuva_link(&vma->base, vm_bo);
2037 	drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo));
2038 	mutex_unlock(&bo->base.base.gpuva.lock);
2039 }
2040 
2041 static void panthor_vma_unlink(struct panthor_vm *vm,
2042 			       struct panthor_vma *vma)
2043 {
2044 	struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj);
2045 	struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo);
2046 
2047 	mutex_lock(&bo->base.base.gpuva.lock);
2048 	drm_gpuva_unlink(&vma->base);
2049 	mutex_unlock(&bo->base.base.gpuva.lock);
2050 
2051 	/* drm_gpuva_unlink() release the vm_bo, but we manually retained it
2052 	 * when entering this function, so we can implement deferred VMA
2053 	 * destruction. Re-assign it here.
2054 	 */
2055 	vma->base.vm_bo = vm_bo;
2056 	list_add_tail(&vma->node, &vm->op_ctx->returned_vmas);
2057 }
2058 
2059 static void panthor_vma_init(struct panthor_vma *vma, u32 flags)
2060 {
2061 	INIT_LIST_HEAD(&vma->node);
2062 	vma->flags = flags;
2063 }
2064 
2065 #define PANTHOR_VM_MAP_FLAGS \
2066 	(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \
2067 	 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \
2068 	 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED)
2069 
2070 static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv)
2071 {
2072 	struct panthor_vm *vm = priv;
2073 	struct panthor_vm_op_ctx *op_ctx = vm->op_ctx;
2074 	struct panthor_vma *vma = panthor_vm_op_ctx_get_vma(op_ctx);
2075 	int ret;
2076 
2077 	if (!vma)
2078 		return -EINVAL;
2079 
2080 	panthor_vma_init(vma, op_ctx->flags & PANTHOR_VM_MAP_FLAGS);
2081 
2082 	ret = panthor_vm_map_pages(vm, op->map.va.addr, flags_to_prot(vma->flags),
2083 				   op_ctx->map.sgt, op->map.gem.offset,
2084 				   op->map.va.range);
2085 	if (ret)
2086 		return ret;
2087 
2088 	/* Ref owned by the mapping now, clear the obj field so we don't release the
2089 	 * pinning/obj ref behind GPUVA's back.
2090 	 */
2091 	drm_gpuva_map(&vm->base, &vma->base, &op->map);
2092 	panthor_vma_link(vm, vma, op_ctx->map.vm_bo);
2093 	op_ctx->map.vm_bo = NULL;
2094 	return 0;
2095 }
2096 
2097 static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op,
2098 				       void *priv)
2099 {
2100 	struct panthor_vma *unmap_vma = container_of(op->remap.unmap->va, struct panthor_vma, base);
2101 	struct panthor_vm *vm = priv;
2102 	struct panthor_vm_op_ctx *op_ctx = vm->op_ctx;
2103 	struct panthor_vma *prev_vma = NULL, *next_vma = NULL;
2104 	u64 unmap_start, unmap_range;
2105 	int ret;
2106 
2107 	drm_gpuva_op_remap_to_unmap_range(&op->remap, &unmap_start, &unmap_range);
2108 	ret = panthor_vm_unmap_pages(vm, unmap_start, unmap_range);
2109 	if (ret)
2110 		return ret;
2111 
2112 	if (op->remap.prev) {
2113 		prev_vma = panthor_vm_op_ctx_get_vma(op_ctx);
2114 		panthor_vma_init(prev_vma, unmap_vma->flags);
2115 	}
2116 
2117 	if (op->remap.next) {
2118 		next_vma = panthor_vm_op_ctx_get_vma(op_ctx);
2119 		panthor_vma_init(next_vma, unmap_vma->flags);
2120 	}
2121 
2122 	drm_gpuva_remap(prev_vma ? &prev_vma->base : NULL,
2123 			next_vma ? &next_vma->base : NULL,
2124 			&op->remap);
2125 
2126 	if (prev_vma) {
2127 		/* panthor_vma_link() transfers the vm_bo ownership to
2128 		 * the VMA object. Since the vm_bo we're passing is still
2129 		 * owned by the old mapping which will be released when this
2130 		 * mapping is destroyed, we need to grab a ref here.
2131 		 */
2132 		panthor_vma_link(vm, prev_vma,
2133 				 drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo));
2134 	}
2135 
2136 	if (next_vma) {
2137 		panthor_vma_link(vm, next_vma,
2138 				 drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo));
2139 	}
2140 
2141 	panthor_vma_unlink(vm, unmap_vma);
2142 	return 0;
2143 }
2144 
2145 static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op,
2146 				       void *priv)
2147 {
2148 	struct panthor_vma *unmap_vma = container_of(op->unmap.va, struct panthor_vma, base);
2149 	struct panthor_vm *vm = priv;
2150 	int ret;
2151 
2152 	ret = panthor_vm_unmap_pages(vm, unmap_vma->base.va.addr,
2153 				     unmap_vma->base.va.range);
2154 	if (drm_WARN_ON(&vm->ptdev->base, ret))
2155 		return ret;
2156 
2157 	drm_gpuva_unmap(&op->unmap);
2158 	panthor_vma_unlink(vm, unmap_vma);
2159 	return 0;
2160 }
2161 
2162 static const struct drm_gpuvm_ops panthor_gpuvm_ops = {
2163 	.vm_free = panthor_vm_free,
2164 	.sm_step_map = panthor_gpuva_sm_step_map,
2165 	.sm_step_remap = panthor_gpuva_sm_step_remap,
2166 	.sm_step_unmap = panthor_gpuva_sm_step_unmap,
2167 };
2168 
2169 /**
2170  * panthor_vm_resv() - Get the dma_resv object attached to a VM.
2171  * @vm: VM to get the dma_resv of.
2172  *
2173  * Return: A dma_resv object.
2174  */
2175 struct dma_resv *panthor_vm_resv(struct panthor_vm *vm)
2176 {
2177 	return drm_gpuvm_resv(&vm->base);
2178 }
2179 
2180 struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm)
2181 {
2182 	if (!vm)
2183 		return NULL;
2184 
2185 	return vm->base.r_obj;
2186 }
2187 
2188 static int
2189 panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op,
2190 		   bool flag_vm_unusable_on_failure)
2191 {
2192 	u32 op_type = op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK;
2193 	int ret;
2194 
2195 	if (op_type == DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY)
2196 		return 0;
2197 
2198 	mutex_lock(&vm->op_lock);
2199 	vm->op_ctx = op;
2200 	switch (op_type) {
2201 	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: {
2202 		const struct drm_gpuvm_map_req map_req = {
2203 			.map.va.addr = op->va.addr,
2204 			.map.va.range = op->va.range,
2205 			.map.gem.obj = op->map.vm_bo->obj,
2206 			.map.gem.offset = op->map.bo_offset,
2207 		};
2208 
2209 		if (vm->unusable) {
2210 			ret = -EINVAL;
2211 			break;
2212 		}
2213 
2214 		ret = drm_gpuvm_sm_map(&vm->base, vm, &map_req);
2215 		break;
2216 	}
2217 
2218 	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
2219 		ret = drm_gpuvm_sm_unmap(&vm->base, vm, op->va.addr, op->va.range);
2220 		break;
2221 
2222 	default:
2223 		ret = -EINVAL;
2224 		break;
2225 	}
2226 
2227 	if (ret && flag_vm_unusable_on_failure)
2228 		vm->unusable = true;
2229 
2230 	vm->op_ctx = NULL;
2231 	mutex_unlock(&vm->op_lock);
2232 
2233 	return ret;
2234 }
2235 
2236 static struct dma_fence *
2237 panthor_vm_bind_run_job(struct drm_sched_job *sched_job)
2238 {
2239 	struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base);
2240 	bool cookie;
2241 	int ret;
2242 
2243 	/* Not only we report an error whose result is propagated to the
2244 	 * drm_sched finished fence, but we also flag the VM as unusable, because
2245 	 * a failure in the async VM_BIND results in an inconsistent state. VM needs
2246 	 * to be destroyed and recreated.
2247 	 */
2248 	cookie = dma_fence_begin_signalling();
2249 	ret = panthor_vm_exec_op(job->vm, &job->ctx, true);
2250 	dma_fence_end_signalling(cookie);
2251 
2252 	return ret ? ERR_PTR(ret) : NULL;
2253 }
2254 
2255 static void panthor_vm_bind_job_release(struct kref *kref)
2256 {
2257 	struct panthor_vm_bind_job *job = container_of(kref, struct panthor_vm_bind_job, refcount);
2258 
2259 	if (job->base.s_fence)
2260 		drm_sched_job_cleanup(&job->base);
2261 
2262 	panthor_vm_cleanup_op_ctx(&job->ctx, job->vm);
2263 	panthor_vm_put(job->vm);
2264 	kfree(job);
2265 }
2266 
2267 /**
2268  * panthor_vm_bind_job_put() - Release a VM_BIND job reference
2269  * @sched_job: Job to release the reference on.
2270  */
2271 void panthor_vm_bind_job_put(struct drm_sched_job *sched_job)
2272 {
2273 	struct panthor_vm_bind_job *job =
2274 		container_of(sched_job, struct panthor_vm_bind_job, base);
2275 
2276 	if (sched_job)
2277 		kref_put(&job->refcount, panthor_vm_bind_job_release);
2278 }
2279 
2280 static void
2281 panthor_vm_bind_free_job(struct drm_sched_job *sched_job)
2282 {
2283 	struct panthor_vm_bind_job *job =
2284 		container_of(sched_job, struct panthor_vm_bind_job, base);
2285 
2286 	drm_sched_job_cleanup(sched_job);
2287 
2288 	/* Do the heavy cleanups asynchronously, so we're out of the
2289 	 * dma-signaling path and can acquire dma-resv locks safely.
2290 	 */
2291 	queue_work(panthor_cleanup_wq, &job->cleanup_op_ctx_work);
2292 }
2293 
2294 static enum drm_gpu_sched_stat
2295 panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job)
2296 {
2297 	WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!");
2298 	return DRM_GPU_SCHED_STAT_RESET;
2299 }
2300 
2301 static const struct drm_sched_backend_ops panthor_vm_bind_ops = {
2302 	.run_job = panthor_vm_bind_run_job,
2303 	.free_job = panthor_vm_bind_free_job,
2304 	.timedout_job = panthor_vm_bind_timedout_job,
2305 };
2306 
2307 /**
2308  * panthor_vm_create() - Create a VM
2309  * @ptdev: Device.
2310  * @for_mcu: True if this is the FW MCU VM.
2311  * @kernel_va_start: Start of the range reserved for kernel BO mapping.
2312  * @kernel_va_size: Size of the range reserved for kernel BO mapping.
2313  * @auto_kernel_va_start: Start of the auto-VA kernel range.
2314  * @auto_kernel_va_size: Size of the auto-VA kernel range.
2315  *
2316  * Return: A valid pointer on success, an ERR_PTR() otherwise.
2317  */
2318 struct panthor_vm *
2319 panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
2320 		  u64 kernel_va_start, u64 kernel_va_size,
2321 		  u64 auto_kernel_va_start, u64 auto_kernel_va_size)
2322 {
2323 	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
2324 	u32 pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features);
2325 	u64 full_va_range = 1ull << va_bits;
2326 	struct drm_gem_object *dummy_gem;
2327 	struct drm_gpu_scheduler *sched;
2328 	const struct drm_sched_init_args sched_args = {
2329 		.ops = &panthor_vm_bind_ops,
2330 		.submit_wq = ptdev->mmu->vm.wq,
2331 		.num_rqs = 1,
2332 		.credit_limit = 1,
2333 		/* Bind operations are synchronous for now, no timeout needed. */
2334 		.timeout = MAX_SCHEDULE_TIMEOUT,
2335 		.name = "panthor-vm-bind",
2336 		.dev = ptdev->base.dev,
2337 	};
2338 	struct io_pgtable_cfg pgtbl_cfg;
2339 	u64 mair, min_va, va_range;
2340 	struct panthor_vm *vm;
2341 	int ret;
2342 
2343 	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
2344 	if (!vm)
2345 		return ERR_PTR(-ENOMEM);
2346 
2347 	/* We allocate a dummy GEM for the VM. */
2348 	dummy_gem = drm_gpuvm_resv_object_alloc(&ptdev->base);
2349 	if (!dummy_gem) {
2350 		ret = -ENOMEM;
2351 		goto err_free_vm;
2352 	}
2353 
2354 	mutex_init(&vm->heaps.lock);
2355 	vm->for_mcu = for_mcu;
2356 	vm->ptdev = ptdev;
2357 	mutex_init(&vm->op_lock);
2358 
2359 	if (for_mcu) {
2360 		/* CSF MCU is a cortex M7, and can only address 4G */
2361 		min_va = 0;
2362 		va_range = SZ_4G;
2363 	} else {
2364 		min_va = 0;
2365 		va_range = full_va_range;
2366 	}
2367 
2368 	mutex_init(&vm->mm_lock);
2369 	drm_mm_init(&vm->mm, kernel_va_start, kernel_va_size);
2370 	vm->kernel_auto_va.start = auto_kernel_va_start;
2371 	vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size - 1;
2372 
2373 	INIT_LIST_HEAD(&vm->node);
2374 	INIT_LIST_HEAD(&vm->as.lru_node);
2375 	vm->as.id = -1;
2376 	refcount_set(&vm->as.active_cnt, 0);
2377 
2378 	pgtbl_cfg = (struct io_pgtable_cfg) {
2379 		.pgsize_bitmap	= SZ_4K | SZ_2M,
2380 		.ias		= va_bits,
2381 		.oas		= pa_bits,
2382 		.coherent_walk	= ptdev->coherent,
2383 		.tlb		= &mmu_tlb_ops,
2384 		.iommu_dev	= ptdev->base.dev,
2385 		.alloc		= alloc_pt,
2386 		.free		= free_pt,
2387 	};
2388 
2389 	vm->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, &pgtbl_cfg, vm);
2390 	if (!vm->pgtbl_ops) {
2391 		ret = -EINVAL;
2392 		goto err_mm_takedown;
2393 	}
2394 
2395 	ret = drm_sched_init(&vm->sched, &sched_args);
2396 	if (ret)
2397 		goto err_free_io_pgtable;
2398 
2399 	sched = &vm->sched;
2400 	ret = drm_sched_entity_init(&vm->entity, 0, &sched, 1, NULL);
2401 	if (ret)
2402 		goto err_sched_fini;
2403 
2404 	mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair;
2405 	vm->memattr = mair_to_memattr(mair, ptdev->coherent);
2406 
2407 	mutex_lock(&ptdev->mmu->vm.lock);
2408 	list_add_tail(&vm->node, &ptdev->mmu->vm.list);
2409 
2410 	/* If a reset is in progress, stop the scheduler. */
2411 	if (ptdev->mmu->vm.reset_in_progress)
2412 		panthor_vm_stop(vm);
2413 	mutex_unlock(&ptdev->mmu->vm.lock);
2414 
2415 	/* We intentionally leave the reserved range to zero, because we want kernel VMAs
2416 	 * to be handled the same way user VMAs are.
2417 	 */
2418 	drm_gpuvm_init(&vm->base, for_mcu ? "panthor-MCU-VM" : "panthor-GPU-VM",
2419 		       DRM_GPUVM_RESV_PROTECTED | DRM_GPUVM_IMMEDIATE_MODE,
2420 		       &ptdev->base, dummy_gem, min_va, va_range, 0, 0,
2421 		       &panthor_gpuvm_ops);
2422 	drm_gem_object_put(dummy_gem);
2423 	return vm;
2424 
2425 err_sched_fini:
2426 	drm_sched_fini(&vm->sched);
2427 
2428 err_free_io_pgtable:
2429 	free_io_pgtable_ops(vm->pgtbl_ops);
2430 
2431 err_mm_takedown:
2432 	drm_mm_takedown(&vm->mm);
2433 	drm_gem_object_put(dummy_gem);
2434 
2435 err_free_vm:
2436 	kfree(vm);
2437 	return ERR_PTR(ret);
2438 }
2439 
2440 static int
2441 panthor_vm_bind_prepare_op_ctx(struct drm_file *file,
2442 			       struct panthor_vm *vm,
2443 			       const struct drm_panthor_vm_bind_op *op,
2444 			       struct panthor_vm_op_ctx *op_ctx)
2445 {
2446 	ssize_t vm_pgsz = panthor_vm_page_size(vm);
2447 	struct drm_gem_object *gem;
2448 	int ret;
2449 
2450 	/* Aligned on page size. */
2451 	if (!IS_ALIGNED(op->va | op->size | op->bo_offset, vm_pgsz))
2452 		return -EINVAL;
2453 
2454 	switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) {
2455 	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
2456 		gem = drm_gem_object_lookup(file, op->bo_handle);
2457 		ret = panthor_vm_prepare_map_op_ctx(op_ctx, vm,
2458 						    gem ? to_panthor_bo(gem) : NULL,
2459 						    op->bo_offset,
2460 						    op->size,
2461 						    op->va,
2462 						    op->flags);
2463 		drm_gem_object_put(gem);
2464 		return ret;
2465 
2466 	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
2467 		if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
2468 			return -EINVAL;
2469 
2470 		if (op->bo_handle || op->bo_offset)
2471 			return -EINVAL;
2472 
2473 		return panthor_vm_prepare_unmap_op_ctx(op_ctx, vm, op->va, op->size);
2474 
2475 	case DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY:
2476 		if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
2477 			return -EINVAL;
2478 
2479 		if (op->bo_handle || op->bo_offset)
2480 			return -EINVAL;
2481 
2482 		if (op->va || op->size)
2483 			return -EINVAL;
2484 
2485 		if (!op->syncs.count)
2486 			return -EINVAL;
2487 
2488 		panthor_vm_prepare_sync_only_op_ctx(op_ctx, vm);
2489 		return 0;
2490 
2491 	default:
2492 		return -EINVAL;
2493 	}
2494 }
2495 
2496 static void panthor_vm_bind_job_cleanup_op_ctx_work(struct work_struct *work)
2497 {
2498 	struct panthor_vm_bind_job *job =
2499 		container_of(work, struct panthor_vm_bind_job, cleanup_op_ctx_work);
2500 
2501 	panthor_vm_bind_job_put(&job->base);
2502 }
2503 
2504 /**
2505  * panthor_vm_bind_job_create() - Create a VM_BIND job
2506  * @file: File.
2507  * @vm: VM targeted by the VM_BIND job.
2508  * @op: VM operation data.
2509  *
2510  * Return: A valid pointer on success, an ERR_PTR() otherwise.
2511  */
2512 struct drm_sched_job *
2513 panthor_vm_bind_job_create(struct drm_file *file,
2514 			   struct panthor_vm *vm,
2515 			   const struct drm_panthor_vm_bind_op *op)
2516 {
2517 	struct panthor_vm_bind_job *job;
2518 	int ret;
2519 
2520 	if (!vm)
2521 		return ERR_PTR(-EINVAL);
2522 
2523 	if (vm->destroyed || vm->unusable)
2524 		return ERR_PTR(-EINVAL);
2525 
2526 	job = kzalloc(sizeof(*job), GFP_KERNEL);
2527 	if (!job)
2528 		return ERR_PTR(-ENOMEM);
2529 
2530 	ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &job->ctx);
2531 	if (ret) {
2532 		kfree(job);
2533 		return ERR_PTR(ret);
2534 	}
2535 
2536 	INIT_WORK(&job->cleanup_op_ctx_work, panthor_vm_bind_job_cleanup_op_ctx_work);
2537 	kref_init(&job->refcount);
2538 	job->vm = panthor_vm_get(vm);
2539 
2540 	ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm, file->client_id);
2541 	if (ret)
2542 		goto err_put_job;
2543 
2544 	return &job->base;
2545 
2546 err_put_job:
2547 	panthor_vm_bind_job_put(&job->base);
2548 	return ERR_PTR(ret);
2549 }
2550 
2551 /**
2552  * panthor_vm_bind_job_prepare_resvs() - Prepare VM_BIND job dma_resvs
2553  * @exec: The locking/preparation context.
2554  * @sched_job: The job to prepare resvs on.
2555  *
2556  * Locks and prepare the VM resv.
2557  *
2558  * If this is a map operation, locks and prepares the GEM resv.
2559  *
2560  * Return: 0 on success, a negative error code otherwise.
2561  */
2562 int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec,
2563 				      struct drm_sched_job *sched_job)
2564 {
2565 	struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base);
2566 	int ret;
2567 
2568 	/* Acquire the VM lock an reserve a slot for this VM bind job. */
2569 	ret = drm_gpuvm_prepare_vm(&job->vm->base, exec, 1);
2570 	if (ret)
2571 		return ret;
2572 
2573 	if (job->ctx.map.vm_bo) {
2574 		/* Lock/prepare the GEM being mapped. */
2575 		ret = drm_exec_prepare_obj(exec, job->ctx.map.vm_bo->obj, 1);
2576 		if (ret)
2577 			return ret;
2578 	}
2579 
2580 	return 0;
2581 }
2582 
2583 /**
2584  * panthor_vm_bind_job_update_resvs() - Update the resv objects touched by a job
2585  * @exec: drm_exec context.
2586  * @sched_job: Job to update the resvs on.
2587  */
2588 void panthor_vm_bind_job_update_resvs(struct drm_exec *exec,
2589 				      struct drm_sched_job *sched_job)
2590 {
2591 	struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base);
2592 
2593 	/* Explicit sync => we just register our job finished fence as bookkeep. */
2594 	drm_gpuvm_resv_add_fence(&job->vm->base, exec,
2595 				 &sched_job->s_fence->finished,
2596 				 DMA_RESV_USAGE_BOOKKEEP,
2597 				 DMA_RESV_USAGE_BOOKKEEP);
2598 }
2599 
2600 void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec,
2601 			     struct dma_fence *fence,
2602 			     enum dma_resv_usage private_usage,
2603 			     enum dma_resv_usage extobj_usage)
2604 {
2605 	drm_gpuvm_resv_add_fence(&vm->base, exec, fence, private_usage, extobj_usage);
2606 }
2607 
2608 /**
2609  * panthor_vm_bind_exec_sync_op() - Execute a VM_BIND operation synchronously.
2610  * @file: File.
2611  * @vm: VM targeted by the VM operation.
2612  * @op: Data describing the VM operation.
2613  *
2614  * Return: 0 on success, a negative error code otherwise.
2615  */
2616 int panthor_vm_bind_exec_sync_op(struct drm_file *file,
2617 				 struct panthor_vm *vm,
2618 				 struct drm_panthor_vm_bind_op *op)
2619 {
2620 	struct panthor_vm_op_ctx op_ctx;
2621 	int ret;
2622 
2623 	/* No sync objects allowed on synchronous operations. */
2624 	if (op->syncs.count)
2625 		return -EINVAL;
2626 
2627 	if (!op->size)
2628 		return 0;
2629 
2630 	ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &op_ctx);
2631 	if (ret)
2632 		return ret;
2633 
2634 	ret = panthor_vm_exec_op(vm, &op_ctx, false);
2635 	panthor_vm_cleanup_op_ctx(&op_ctx, vm);
2636 
2637 	return ret;
2638 }
2639 
2640 /**
2641  * panthor_vm_map_bo_range() - Map a GEM object range to a VM
2642  * @vm: VM to map the GEM to.
2643  * @bo: GEM object to map.
2644  * @offset: Offset in the GEM object.
2645  * @size: Size to map.
2646  * @va: Virtual address to map the object to.
2647  * @flags: Combination of drm_panthor_vm_bind_op_flags flags.
2648  * Only map-related flags are valid.
2649  *
2650  * Internal use only. For userspace requests, use
2651  * panthor_vm_bind_exec_sync_op() instead.
2652  *
2653  * Return: 0 on success, a negative error code otherwise.
2654  */
2655 int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo,
2656 			    u64 offset, u64 size, u64 va, u32 flags)
2657 {
2658 	struct panthor_vm_op_ctx op_ctx;
2659 	int ret;
2660 
2661 	ret = panthor_vm_prepare_map_op_ctx(&op_ctx, vm, bo, offset, size, va, flags);
2662 	if (ret)
2663 		return ret;
2664 
2665 	ret = panthor_vm_exec_op(vm, &op_ctx, false);
2666 	panthor_vm_cleanup_op_ctx(&op_ctx, vm);
2667 
2668 	return ret;
2669 }
2670 
2671 /**
2672  * panthor_vm_unmap_range() - Unmap a portion of the VA space
2673  * @vm: VM to unmap the region from.
2674  * @va: Virtual address to unmap. Must be 4k aligned.
2675  * @size: Size of the region to unmap. Must be 4k aligned.
2676  *
2677  * Internal use only. For userspace requests, use
2678  * panthor_vm_bind_exec_sync_op() instead.
2679  *
2680  * Return: 0 on success, a negative error code otherwise.
2681  */
2682 int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size)
2683 {
2684 	struct panthor_vm_op_ctx op_ctx;
2685 	int ret;
2686 
2687 	ret = panthor_vm_prepare_unmap_op_ctx(&op_ctx, vm, va, size);
2688 	if (ret)
2689 		return ret;
2690 
2691 	ret = panthor_vm_exec_op(vm, &op_ctx, false);
2692 	panthor_vm_cleanup_op_ctx(&op_ctx, vm);
2693 
2694 	return ret;
2695 }
2696 
2697 /**
2698  * panthor_vm_prepare_mapped_bos_resvs() - Prepare resvs on VM BOs.
2699  * @exec: Locking/preparation context.
2700  * @vm: VM targeted by the GPU job.
2701  * @slot_count: Number of slots to reserve.
2702  *
2703  * GPU jobs assume all BOs bound to the VM at the time the job is submitted
2704  * are available when the job is executed. In order to guarantee that, we
2705  * need to reserve a slot on all BOs mapped to a VM and update this slot with
2706  * the job fence after its submission.
2707  *
2708  * Return: 0 on success, a negative error code otherwise.
2709  */
2710 int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, struct panthor_vm *vm,
2711 					u32 slot_count)
2712 {
2713 	int ret;
2714 
2715 	/* Acquire the VM lock and reserve a slot for this GPU job. */
2716 	ret = drm_gpuvm_prepare_vm(&vm->base, exec, slot_count);
2717 	if (ret)
2718 		return ret;
2719 
2720 	return drm_gpuvm_prepare_objects(&vm->base, exec, slot_count);
2721 }
2722 
2723 /**
2724  * panthor_mmu_unplug() - Unplug the MMU logic
2725  * @ptdev: Device.
2726  *
2727  * No access to the MMU regs should be done after this function is called.
2728  * We suspend the IRQ and disable all VMs to guarantee that.
2729  */
2730 void panthor_mmu_unplug(struct panthor_device *ptdev)
2731 {
2732 	if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
2733 		panthor_mmu_irq_suspend(&ptdev->mmu->irq);
2734 
2735 	mutex_lock(&ptdev->mmu->as.slots_lock);
2736 	for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
2737 		struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
2738 
2739 		if (vm) {
2740 			drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i));
2741 			panthor_vm_release_as_locked(vm);
2742 		}
2743 	}
2744 	mutex_unlock(&ptdev->mmu->as.slots_lock);
2745 }
2746 
2747 static void panthor_mmu_release_wq(struct drm_device *ddev, void *res)
2748 {
2749 	destroy_workqueue(res);
2750 }
2751 
2752 /**
2753  * panthor_mmu_init() - Initialize the MMU logic.
2754  * @ptdev: Device.
2755  *
2756  * Return: 0 on success, a negative error code otherwise.
2757  */
2758 int panthor_mmu_init(struct panthor_device *ptdev)
2759 {
2760 	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
2761 	struct panthor_mmu *mmu;
2762 	int ret, irq;
2763 
2764 	mmu = drmm_kzalloc(&ptdev->base, sizeof(*mmu), GFP_KERNEL);
2765 	if (!mmu)
2766 		return -ENOMEM;
2767 
2768 	INIT_LIST_HEAD(&mmu->as.lru_list);
2769 
2770 	ret = drmm_mutex_init(&ptdev->base, &mmu->as.slots_lock);
2771 	if (ret)
2772 		return ret;
2773 
2774 	INIT_LIST_HEAD(&mmu->vm.list);
2775 	ret = drmm_mutex_init(&ptdev->base, &mmu->vm.lock);
2776 	if (ret)
2777 		return ret;
2778 
2779 	ptdev->mmu = mmu;
2780 
2781 	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "mmu");
2782 	if (irq <= 0)
2783 		return -ENODEV;
2784 
2785 	ret = panthor_request_mmu_irq(ptdev, &mmu->irq, irq,
2786 				      panthor_mmu_fault_mask(ptdev, ~0));
2787 	if (ret)
2788 		return ret;
2789 
2790 	mmu->vm.wq = alloc_workqueue("panthor-vm-bind", WQ_UNBOUND, 0);
2791 	if (!mmu->vm.wq)
2792 		return -ENOMEM;
2793 
2794 	/* On 32-bit kernels, the VA space is limited by the io_pgtable_ops abstraction,
2795 	 * which passes iova as an unsigned long. Patch the mmu_features to reflect this
2796 	 * limitation.
2797 	 */
2798 	if (va_bits > BITS_PER_LONG) {
2799 		ptdev->gpu_info.mmu_features &= ~GENMASK(7, 0);
2800 		ptdev->gpu_info.mmu_features |= BITS_PER_LONG;
2801 	}
2802 
2803 	return drmm_add_action_or_reset(&ptdev->base, panthor_mmu_release_wq, mmu->vm.wq);
2804 }
2805 
2806 #ifdef CONFIG_DEBUG_FS
2807 static int show_vm_gpuvas(struct panthor_vm *vm, struct seq_file *m)
2808 {
2809 	int ret;
2810 
2811 	mutex_lock(&vm->op_lock);
2812 	ret = drm_debugfs_gpuva_info(m, &vm->base);
2813 	mutex_unlock(&vm->op_lock);
2814 
2815 	return ret;
2816 }
2817 
2818 static int show_each_vm(struct seq_file *m, void *arg)
2819 {
2820 	struct drm_info_node *node = (struct drm_info_node *)m->private;
2821 	struct drm_device *ddev = node->minor->dev;
2822 	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
2823 	int (*show)(struct panthor_vm *, struct seq_file *) = node->info_ent->data;
2824 	struct panthor_vm *vm;
2825 	int ret = 0;
2826 
2827 	mutex_lock(&ptdev->mmu->vm.lock);
2828 	list_for_each_entry(vm, &ptdev->mmu->vm.list, node) {
2829 		ret = show(vm, m);
2830 		if (ret < 0)
2831 			break;
2832 
2833 		seq_puts(m, "\n");
2834 	}
2835 	mutex_unlock(&ptdev->mmu->vm.lock);
2836 
2837 	return ret;
2838 }
2839 
2840 static struct drm_info_list panthor_mmu_debugfs_list[] = {
2841 	DRM_DEBUGFS_GPUVA_INFO(show_each_vm, show_vm_gpuvas),
2842 };
2843 
2844 /**
2845  * panthor_mmu_debugfs_init() - Initialize MMU debugfs entries
2846  * @minor: Minor.
2847  */
2848 void panthor_mmu_debugfs_init(struct drm_minor *minor)
2849 {
2850 	drm_debugfs_create_files(panthor_mmu_debugfs_list,
2851 				 ARRAY_SIZE(panthor_mmu_debugfs_list),
2852 				 minor->debugfs_root, minor);
2853 }
2854 #endif /* CONFIG_DEBUG_FS */
2855 
2856 /**
2857  * panthor_mmu_pt_cache_init() - Initialize the page table cache.
2858  *
2859  * Return: 0 on success, a negative error code otherwise.
2860  */
2861 int panthor_mmu_pt_cache_init(void)
2862 {
2863 	pt_cache = kmem_cache_create("panthor-mmu-pt", SZ_4K, SZ_4K, 0, NULL);
2864 	if (!pt_cache)
2865 		return -ENOMEM;
2866 
2867 	return 0;
2868 }
2869 
2870 /**
2871  * panthor_mmu_pt_cache_fini() - Destroy the page table cache.
2872  */
2873 void panthor_mmu_pt_cache_fini(void)
2874 {
2875 	kmem_cache_destroy(pt_cache);
2876 }
2877