xref: /linux/drivers/gpu/drm/panfrost/panfrost_mmu.c (revision f6e8dc9edf963dbc99085e54f6ced6da9daa6100)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
3 
4 #include <drm/panfrost_drm.h>
5 #include <drm/drm_print.h>
6 
7 #include <linux/atomic.h>
8 #include <linux/bitfield.h>
9 #include <linux/delay.h>
10 #include <linux/dma-mapping.h>
11 #include <linux/interrupt.h>
12 #include <linux/io.h>
13 #include <linux/iopoll.h>
14 #include <linux/io-pgtable.h>
15 #include <linux/iommu.h>
16 #include <linux/platform_device.h>
17 #include <linux/pm_runtime.h>
18 #include <linux/shmem_fs.h>
19 #include <linux/sizes.h>
20 
21 #include "panfrost_device.h"
22 #include "panfrost_mmu.h"
23 #include "panfrost_gem.h"
24 #include "panfrost_features.h"
25 #include "panfrost_regs.h"
26 
27 #define mmu_write(dev, reg, data) writel(data, dev->iomem + reg)
28 #define mmu_read(dev, reg) readl(dev->iomem + reg)
29 
30 static u64 mair_to_memattr(u64 mair, bool coherent)
31 {
32 	u64 memattr = 0;
33 	u32 i;
34 
35 	for (i = 0; i < 8; i++) {
36 		u8 in_attr = mair >> (8 * i), out_attr;
37 		u8 outer = in_attr >> 4, inner = in_attr & 0xf;
38 
39 		/* For caching to be enabled, inner and outer caching policy
40 		 * have to be both write-back, if one of them is write-through
41 		 * or non-cacheable, we just choose non-cacheable. Device
42 		 * memory is also translated to non-cacheable.
43 		 */
44 		if (!(outer & 3) || !(outer & 4) || !(inner & 4)) {
45 			out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_NC |
46 				   AS_MEMATTR_AARCH64_SH_MIDGARD_INNER |
47 				   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false);
48 		} else {
49 			out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB |
50 				   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2);
51 			/* Use SH_MIDGARD_INNER mode when device isn't coherent,
52 			 * so SH_IS, which is used when IOMMU_CACHE is set, maps
53 			 * to Mali's internal-shareable mode. As per the Mali
54 			 * Spec, inner and outer-shareable modes aren't allowed
55 			 * for WB memory when coherency is disabled.
56 			 * Use SH_CPU_INNER mode when coherency is enabled, so
57 			 * that SH_IS actually maps to the standard definition of
58 			 * inner-shareable.
59 			 */
60 			if (!coherent)
61 				out_attr |= AS_MEMATTR_AARCH64_SH_MIDGARD_INNER;
62 			else
63 				out_attr |= AS_MEMATTR_AARCH64_SH_CPU_INNER;
64 		}
65 
66 		memattr |= (u64)out_attr << (8 * i);
67 	}
68 
69 	return memattr;
70 }
71 
72 static int wait_ready(struct panfrost_device *pfdev, u32 as_nr)
73 {
74 	int ret;
75 	u32 val;
76 
77 	/* Wait for the MMU status to indicate there is no active command, in
78 	 * case one is pending. */
79 	ret = readl_relaxed_poll_timeout_atomic(pfdev->iomem + AS_STATUS(as_nr),
80 		val, !(val & AS_STATUS_AS_ACTIVE), 10, 100000);
81 
82 	if (ret) {
83 		/* The GPU hung, let's trigger a reset */
84 		panfrost_device_schedule_reset(pfdev);
85 		dev_err(pfdev->base.dev, "AS_ACTIVE bit stuck\n");
86 	}
87 
88 	return ret;
89 }
90 
91 static int write_cmd(struct panfrost_device *pfdev, u32 as_nr, u32 cmd)
92 {
93 	int status;
94 
95 	/* write AS_COMMAND when MMU is ready to accept another command */
96 	status = wait_ready(pfdev, as_nr);
97 	if (!status)
98 		mmu_write(pfdev, AS_COMMAND(as_nr), cmd);
99 
100 	return status;
101 }
102 
103 static void lock_region(struct panfrost_device *pfdev, u32 as_nr,
104 			u64 region_start, u64 size)
105 {
106 	u8 region_width;
107 	u64 region;
108 	u64 region_end = region_start + size;
109 
110 	if (!size)
111 		return;
112 
113 	/*
114 	 * The locked region is a naturally aligned power of 2 block encoded as
115 	 * log2 minus(1).
116 	 * Calculate the desired start/end and look for the highest bit which
117 	 * differs. The smallest naturally aligned block must include this bit
118 	 * change, the desired region starts with this bit (and subsequent bits)
119 	 * zeroed and ends with the bit (and subsequent bits) set to one.
120 	 */
121 	region_width = max(fls64(region_start ^ (region_end - 1)),
122 			   const_ilog2(AS_LOCK_REGION_MIN_SIZE)) - 1;
123 
124 	/*
125 	 * Mask off the low bits of region_start (which would be ignored by
126 	 * the hardware anyway)
127 	 */
128 	region_start &= GENMASK_ULL(63, region_width);
129 
130 	region = region_width | region_start;
131 
132 	/* Lock the region that needs to be updated */
133 	mmu_write(pfdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region));
134 	mmu_write(pfdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region));
135 	write_cmd(pfdev, as_nr, AS_COMMAND_LOCK);
136 }
137 
138 
139 static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr,
140 				      u64 iova, u64 size, u32 op)
141 {
142 	if (as_nr < 0)
143 		return 0;
144 
145 	if (op != AS_COMMAND_UNLOCK)
146 		lock_region(pfdev, as_nr, iova, size);
147 
148 	/* Run the MMU operation */
149 	write_cmd(pfdev, as_nr, op);
150 
151 	/* Wait for the flush to complete */
152 	return wait_ready(pfdev, as_nr);
153 }
154 
155 static int mmu_hw_do_operation(struct panfrost_device *pfdev,
156 			       struct panfrost_mmu *mmu,
157 			       u64 iova, u64 size, u32 op)
158 {
159 	int ret;
160 
161 	spin_lock(&pfdev->as_lock);
162 	ret = mmu_hw_do_operation_locked(pfdev, mmu->as, iova, size, op);
163 	spin_unlock(&pfdev->as_lock);
164 	return ret;
165 }
166 
167 static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_mmu *mmu)
168 {
169 	int as_nr = mmu->as;
170 	u64 transtab = mmu->cfg.transtab;
171 	u64 memattr = mmu->cfg.memattr;
172 	u64 transcfg = mmu->cfg.transcfg;
173 
174 	mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
175 
176 	mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab));
177 	mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab));
178 
179 	/* Need to revisit mem attrs.
180 	 * NC is the default, Mali driver is inner WT.
181 	 */
182 	mmu_write(pfdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr));
183 	mmu_write(pfdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr));
184 
185 	mmu_write(pfdev, AS_TRANSCFG_LO(as_nr), lower_32_bits(transcfg));
186 	mmu_write(pfdev, AS_TRANSCFG_HI(as_nr), upper_32_bits(transcfg));
187 
188 	write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE);
189 }
190 
191 static void panfrost_mmu_disable(struct panfrost_device *pfdev, u32 as_nr)
192 {
193 	mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
194 
195 	mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), 0);
196 	mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), 0);
197 
198 	mmu_write(pfdev, AS_MEMATTR_LO(as_nr), 0);
199 	mmu_write(pfdev, AS_MEMATTR_HI(as_nr), 0);
200 
201 	mmu_write(pfdev, AS_TRANSCFG_LO(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED);
202 	mmu_write(pfdev, AS_TRANSCFG_HI(as_nr), 0);
203 
204 	write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE);
205 }
206 
207 static int mmu_cfg_init_mali_lpae(struct panfrost_mmu *mmu)
208 {
209 	struct io_pgtable_cfg *pgtbl_cfg = &mmu->pgtbl_cfg;
210 
211 	/* TODO: The following fields are duplicated between the MMU and Page
212 	 * Table config structs. Ideally, should be kept in one place.
213 	 */
214 	mmu->cfg.transtab = pgtbl_cfg->arm_mali_lpae_cfg.transtab;
215 	mmu->cfg.memattr = pgtbl_cfg->arm_mali_lpae_cfg.memattr;
216 	mmu->cfg.transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
217 
218 	return 0;
219 }
220 
221 static int mmu_cfg_init_aarch64_4k(struct panfrost_mmu *mmu)
222 {
223 	struct io_pgtable_cfg *pgtbl_cfg = &mmu->pgtbl_cfg;
224 	struct panfrost_device *pfdev = mmu->pfdev;
225 
226 	if (drm_WARN_ON(&pfdev->base, pgtbl_cfg->arm_lpae_s1_cfg.ttbr &
227 				     ~AS_TRANSTAB_AARCH64_4K_ADDR_MASK))
228 		return -EINVAL;
229 
230 	mmu->cfg.transtab = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
231 
232 	mmu->cfg.memattr = mair_to_memattr(pgtbl_cfg->arm_lpae_s1_cfg.mair,
233 					   pgtbl_cfg->coherent_walk);
234 
235 	mmu->cfg.transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
236 			    AS_TRANSCFG_PTW_RA |
237 			    AS_TRANSCFG_ADRMODE_AARCH64_4K |
238 			    AS_TRANSCFG_INA_BITS(55 - pgtbl_cfg->ias);
239 	if (pgtbl_cfg->coherent_walk)
240 		mmu->cfg.transcfg |= AS_TRANSCFG_PTW_SH_OS;
241 
242 	return 0;
243 }
244 
245 static int panfrost_mmu_cfg_init(struct panfrost_mmu *mmu,
246 				 enum io_pgtable_fmt fmt)
247 {
248 	struct panfrost_device *pfdev = mmu->pfdev;
249 
250 	switch (fmt) {
251 	case ARM_64_LPAE_S1:
252 		return mmu_cfg_init_aarch64_4k(mmu);
253 	case ARM_MALI_LPAE:
254 		return mmu_cfg_init_mali_lpae(mmu);
255 	default:
256 		/* This should never happen */
257 		drm_WARN(&pfdev->base, 1, "Invalid pgtable format");
258 		return -EINVAL;
259 	}
260 }
261 
262 int panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu)
263 {
264 	int as;
265 
266 	spin_lock(&pfdev->as_lock);
267 
268 	as = mmu->as;
269 	if (as >= 0) {
270 		int en = atomic_inc_return(&mmu->as_count);
271 		u32 mask = BIT(as) | BIT(16 + as);
272 
273 		/*
274 		 * AS can be retained by active jobs or a perfcnt context,
275 		 * hence the '+ 1' here.
276 		 */
277 		WARN_ON(en >= (NUM_JOB_SLOTS + 1));
278 
279 		list_move(&mmu->list, &pfdev->as_lru_list);
280 
281 		if (pfdev->as_faulty_mask & mask) {
282 			/* Unhandled pagefault on this AS, the MMU was
283 			 * disabled. We need to re-enable the MMU after
284 			 * clearing+unmasking the AS interrupts.
285 			 */
286 			mmu_write(pfdev, MMU_INT_CLEAR, mask);
287 			mmu_write(pfdev, MMU_INT_MASK, ~pfdev->as_faulty_mask);
288 			pfdev->as_faulty_mask &= ~mask;
289 			panfrost_mmu_enable(pfdev, mmu);
290 		}
291 
292 		goto out;
293 	}
294 
295 	/* Check for a free AS */
296 	as = ffz(pfdev->as_alloc_mask);
297 	if (!(BIT(as) & pfdev->features.as_present)) {
298 		struct panfrost_mmu *lru_mmu;
299 
300 		list_for_each_entry_reverse(lru_mmu, &pfdev->as_lru_list, list) {
301 			if (!atomic_read(&lru_mmu->as_count))
302 				break;
303 		}
304 		if (WARN_ON(&lru_mmu->list == &pfdev->as_lru_list)) {
305 			as = -EBUSY;
306 			goto out;
307 		}
308 
309 		list_del_init(&lru_mmu->list);
310 		as = lru_mmu->as;
311 
312 		WARN_ON(as < 0);
313 		lru_mmu->as = -1;
314 	}
315 
316 	/* Assign the free or reclaimed AS to the FD */
317 	mmu->as = as;
318 	set_bit(as, &pfdev->as_alloc_mask);
319 	atomic_set(&mmu->as_count, 1);
320 	list_add(&mmu->list, &pfdev->as_lru_list);
321 
322 	dev_dbg(pfdev->base.dev,
323 		"Assigned AS%d to mmu %p, alloc_mask=%lx",
324 		as, mmu, pfdev->as_alloc_mask);
325 
326 	panfrost_mmu_enable(pfdev, mmu);
327 
328 out:
329 	spin_unlock(&pfdev->as_lock);
330 	return as;
331 }
332 
333 void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu)
334 {
335 	atomic_dec(&mmu->as_count);
336 	WARN_ON(atomic_read(&mmu->as_count) < 0);
337 }
338 
339 void panfrost_mmu_reset(struct panfrost_device *pfdev)
340 {
341 	struct panfrost_mmu *mmu, *mmu_tmp;
342 
343 	clear_bit(PANFROST_COMP_BIT_MMU, pfdev->is_suspended);
344 
345 	spin_lock(&pfdev->as_lock);
346 
347 	pfdev->as_alloc_mask = 0;
348 	pfdev->as_faulty_mask = 0;
349 
350 	list_for_each_entry_safe(mmu, mmu_tmp, &pfdev->as_lru_list, list) {
351 		mmu->as = -1;
352 		atomic_set(&mmu->as_count, 0);
353 		list_del_init(&mmu->list);
354 	}
355 
356 	spin_unlock(&pfdev->as_lock);
357 
358 	mmu_write(pfdev, MMU_INT_CLEAR, ~0);
359 	mmu_write(pfdev, MMU_INT_MASK, ~0);
360 }
361 
362 static size_t get_pgsize(u64 addr, size_t size, size_t *count)
363 {
364 	/*
365 	 * io-pgtable only operates on multiple pages within a single table
366 	 * entry, so we need to split at boundaries of the table size, i.e.
367 	 * the next block size up. The distance from address A to the next
368 	 * boundary of block size B is logically B - A % B, but in unsigned
369 	 * two's complement where B is a power of two we get the equivalence
370 	 * B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :)
371 	 */
372 	size_t blk_offset = -addr % SZ_2M;
373 
374 	if (blk_offset || size < SZ_2M) {
375 		*count = min_not_zero(blk_offset, size) / SZ_4K;
376 		return SZ_4K;
377 	}
378 	blk_offset = -addr % SZ_1G ?: SZ_1G;
379 	*count = min(blk_offset, size) / SZ_2M;
380 	return SZ_2M;
381 }
382 
383 static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
384 				     struct panfrost_mmu *mmu,
385 				     u64 iova, u64 size)
386 {
387 	if (mmu->as < 0)
388 		return;
389 
390 	pm_runtime_get_noresume(pfdev->base.dev);
391 
392 	/* Flush the PTs only if we're already awake */
393 	if (pm_runtime_active(pfdev->base.dev))
394 		mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT);
395 
396 	pm_runtime_put_autosuspend(pfdev->base.dev);
397 }
398 
399 static void mmu_unmap_range(struct panfrost_mmu *mmu, u64 iova, size_t len)
400 {
401 	struct io_pgtable_ops *ops = mmu->pgtbl_ops;
402 	size_t pgsize, unmapped_len = 0;
403 	size_t unmapped_page, pgcount;
404 
405 	while (unmapped_len < len) {
406 		pgsize = get_pgsize(iova, len - unmapped_len, &pgcount);
407 
408 		unmapped_page = ops->unmap_pages(ops, iova, pgsize, pgcount, NULL);
409 		WARN_ON(unmapped_page != pgsize * pgcount);
410 
411 		iova += pgsize * pgcount;
412 		unmapped_len += pgsize * pgcount;
413 	}
414 }
415 
416 static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
417 		      u64 iova, int prot, struct sg_table *sgt)
418 {
419 	unsigned int count;
420 	struct scatterlist *sgl;
421 	struct io_pgtable_ops *ops = mmu->pgtbl_ops;
422 	size_t total_mapped = 0;
423 	u64 start_iova = iova;
424 	int ret;
425 
426 	for_each_sgtable_dma_sg(sgt, sgl, count) {
427 		unsigned long paddr = sg_dma_address(sgl);
428 		size_t len = sg_dma_len(sgl);
429 
430 		dev_dbg(pfdev->base.dev,
431 			"map: as=%d, iova=%llx, paddr=%lx, len=%zx",
432 			mmu->as, iova, paddr, len);
433 
434 		while (len) {
435 			size_t pgcount, mapped = 0;
436 			size_t pgsize = get_pgsize(iova | paddr, len, &pgcount);
437 
438 			ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
439 				       GFP_KERNEL, &mapped);
440 			if (ret)
441 				goto err_unmap_pages;
442 
443 			/* Don't get stuck if things have gone wrong */
444 			mapped = max(mapped, pgsize);
445 			total_mapped += mapped;
446 			iova += mapped;
447 			paddr += mapped;
448 			len -= mapped;
449 		}
450 	}
451 
452 	panfrost_mmu_flush_range(pfdev, mmu, start_iova, iova - start_iova);
453 
454 	return 0;
455 
456 err_unmap_pages:
457 	mmu_unmap_range(mmu, start_iova, total_mapped);
458 	return ret;
459 }
460 
461 int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
462 {
463 	struct panfrost_gem_object *bo = mapping->obj;
464 	struct drm_gem_shmem_object *shmem = &bo->base;
465 	struct drm_gem_object *obj = &shmem->base;
466 	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
467 	struct sg_table *sgt;
468 	int prot = IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE;
469 	int ret;
470 
471 	if (WARN_ON(mapping->active))
472 		return 0;
473 
474 	if (bo->noexec)
475 		prot |= IOMMU_NOEXEC;
476 
477 	sgt = drm_gem_shmem_get_pages_sgt(shmem);
478 	if (WARN_ON(IS_ERR(sgt)))
479 		return PTR_ERR(sgt);
480 
481 	ret = mmu_map_sg(pfdev, mapping->mmu, mapping->mmnode.start << PAGE_SHIFT,
482 			 prot, sgt);
483 	if (ret)
484 		goto err_put_pages;
485 
486 	mapping->active = true;
487 
488 	return 0;
489 
490 err_put_pages:
491 	drm_gem_shmem_put_pages_locked(shmem);
492 	return ret;
493 }
494 
495 void panfrost_mmu_unmap(struct panfrost_gem_mapping *mapping)
496 {
497 	struct panfrost_gem_object *bo = mapping->obj;
498 	struct drm_gem_object *obj = &bo->base.base;
499 	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
500 	struct io_pgtable_ops *ops = mapping->mmu->pgtbl_ops;
501 	u64 iova = mapping->mmnode.start << PAGE_SHIFT;
502 	size_t len = mapping->mmnode.size << PAGE_SHIFT;
503 	size_t unmapped_len = 0;
504 
505 	if (WARN_ON(!mapping->active))
506 		return;
507 
508 	dev_dbg(pfdev->base.dev, "unmap: as=%d, iova=%llx, len=%zx",
509 		mapping->mmu->as, iova, len);
510 
511 	while (unmapped_len < len) {
512 		size_t unmapped_page, pgcount;
513 		size_t pgsize = get_pgsize(iova, len - unmapped_len, &pgcount);
514 
515 		if (bo->is_heap)
516 			pgcount = 1;
517 		if (!bo->is_heap || ops->iova_to_phys(ops, iova)) {
518 			unmapped_page = ops->unmap_pages(ops, iova, pgsize, pgcount, NULL);
519 			WARN_ON(unmapped_page != pgsize * pgcount);
520 		}
521 		iova += pgsize * pgcount;
522 		unmapped_len += pgsize * pgcount;
523 	}
524 
525 	panfrost_mmu_flush_range(pfdev, mapping->mmu,
526 				 mapping->mmnode.start << PAGE_SHIFT, len);
527 	mapping->active = false;
528 }
529 
530 static void mmu_tlb_inv_context_s1(void *cookie)
531 {}
532 
533 static void mmu_tlb_sync_context(void *cookie)
534 {
535 	//struct panfrost_mmu *mmu = cookie;
536 	// TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X
537 }
538 
539 static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule,
540 			       void *cookie)
541 {
542 	mmu_tlb_sync_context(cookie);
543 }
544 
545 static const struct iommu_flush_ops mmu_tlb_ops = {
546 	.tlb_flush_all	= mmu_tlb_inv_context_s1,
547 	.tlb_flush_walk = mmu_tlb_flush_walk,
548 };
549 
550 static struct panfrost_gem_mapping *
551 addr_to_mapping(struct panfrost_device *pfdev, int as, u64 addr)
552 {
553 	struct panfrost_gem_mapping *mapping = NULL;
554 	struct drm_mm_node *node;
555 	u64 offset = addr >> PAGE_SHIFT;
556 	struct panfrost_mmu *mmu;
557 
558 	spin_lock(&pfdev->as_lock);
559 	list_for_each_entry(mmu, &pfdev->as_lru_list, list) {
560 		if (as == mmu->as)
561 			goto found_mmu;
562 	}
563 	goto out;
564 
565 found_mmu:
566 
567 	spin_lock(&mmu->mm_lock);
568 
569 	drm_mm_for_each_node(node, &mmu->mm) {
570 		if (offset >= node->start &&
571 		    offset < (node->start + node->size)) {
572 			mapping = drm_mm_node_to_panfrost_mapping(node);
573 
574 			kref_get(&mapping->refcount);
575 			break;
576 		}
577 	}
578 
579 	spin_unlock(&mmu->mm_lock);
580 out:
581 	spin_unlock(&pfdev->as_lock);
582 	return mapping;
583 }
584 
585 #define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE)
586 
587 static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
588 				       u64 addr)
589 {
590 	int ret, i;
591 	struct panfrost_gem_mapping *bomapping;
592 	struct panfrost_gem_object *bo;
593 	struct address_space *mapping;
594 	struct drm_gem_object *obj;
595 	pgoff_t page_offset;
596 	struct sg_table *sgt;
597 	struct page **pages;
598 
599 	bomapping = addr_to_mapping(pfdev, as, addr);
600 	if (!bomapping)
601 		return -ENOENT;
602 
603 	bo = bomapping->obj;
604 	if (!bo->is_heap) {
605 		dev_WARN(pfdev->base.dev, "matching BO is not heap type (GPU VA = %llx)",
606 			 bomapping->mmnode.start << PAGE_SHIFT);
607 		ret = -EINVAL;
608 		goto err_bo;
609 	}
610 	WARN_ON(bomapping->mmu->as != as);
611 
612 	/* Assume 2MB alignment and size multiple */
613 	addr &= ~((u64)SZ_2M - 1);
614 	page_offset = addr >> PAGE_SHIFT;
615 	page_offset -= bomapping->mmnode.start;
616 
617 	obj = &bo->base.base;
618 
619 	dma_resv_lock(obj->resv, NULL);
620 
621 	if (!bo->base.pages) {
622 		bo->sgts = kvmalloc_array(bo->base.base.size / SZ_2M,
623 				     sizeof(struct sg_table), GFP_KERNEL | __GFP_ZERO);
624 		if (!bo->sgts) {
625 			ret = -ENOMEM;
626 			goto err_unlock;
627 		}
628 
629 		pages = kvmalloc_array(bo->base.base.size >> PAGE_SHIFT,
630 				       sizeof(struct page *), GFP_KERNEL | __GFP_ZERO);
631 		if (!pages) {
632 			kvfree(bo->sgts);
633 			bo->sgts = NULL;
634 			ret = -ENOMEM;
635 			goto err_unlock;
636 		}
637 		bo->base.pages = pages;
638 		refcount_set(&bo->base.pages_use_count, 1);
639 	} else {
640 		pages = bo->base.pages;
641 	}
642 
643 	sgt = &bo->sgts[page_offset / (SZ_2M / PAGE_SIZE)];
644 	if (sgt->sgl) {
645 		/* Pages are already mapped, bail out. */
646 		goto out;
647 	}
648 
649 	mapping = bo->base.base.filp->f_mapping;
650 	mapping_set_unevictable(mapping);
651 
652 	for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) {
653 		/* Can happen if the last fault only partially filled this
654 		 * section of the pages array before failing. In that case
655 		 * we skip already filled pages.
656 		 */
657 		if (pages[i])
658 			continue;
659 
660 		pages[i] = shmem_read_mapping_page(mapping, i);
661 		if (IS_ERR(pages[i])) {
662 			ret = PTR_ERR(pages[i]);
663 			pages[i] = NULL;
664 			goto err_unlock;
665 		}
666 	}
667 
668 	ret = sg_alloc_table_from_pages(sgt, pages + page_offset,
669 					NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL);
670 	if (ret)
671 		goto err_unlock;
672 
673 	ret = dma_map_sgtable(pfdev->base.dev, sgt, DMA_BIDIRECTIONAL, 0);
674 	if (ret)
675 		goto err_map;
676 
677 	ret = mmu_map_sg(pfdev, bomapping->mmu, addr,
678 			 IOMMU_WRITE | IOMMU_READ | IOMMU_CACHE | IOMMU_NOEXEC, sgt);
679 	if (ret)
680 		goto err_mmu_map_sg;
681 
682 	bomapping->active = true;
683 	bo->heap_rss_size += SZ_2M;
684 
685 	dev_dbg(pfdev->base.dev, "mapped page fault @ AS%d %llx", as, addr);
686 
687 out:
688 	dma_resv_unlock(obj->resv);
689 
690 	panfrost_gem_mapping_put(bomapping);
691 
692 	return 0;
693 
694 err_mmu_map_sg:
695 	dma_unmap_sgtable(pfdev->base.dev, sgt, DMA_BIDIRECTIONAL, 0);
696 err_map:
697 	sg_free_table(sgt);
698 err_unlock:
699 	dma_resv_unlock(obj->resv);
700 err_bo:
701 	panfrost_gem_mapping_put(bomapping);
702 	return ret;
703 }
704 
705 static void panfrost_mmu_release_ctx(struct kref *kref)
706 {
707 	struct panfrost_mmu *mmu = container_of(kref, struct panfrost_mmu,
708 						refcount);
709 	struct panfrost_device *pfdev = mmu->pfdev;
710 
711 	spin_lock(&pfdev->as_lock);
712 	if (mmu->as >= 0) {
713 		pm_runtime_get_noresume(pfdev->base.dev);
714 		if (pm_runtime_active(pfdev->base.dev))
715 			panfrost_mmu_disable(pfdev, mmu->as);
716 		pm_runtime_put_autosuspend(pfdev->base.dev);
717 
718 		clear_bit(mmu->as, &pfdev->as_alloc_mask);
719 		list_del(&mmu->list);
720 	}
721 	spin_unlock(&pfdev->as_lock);
722 
723 	free_io_pgtable_ops(mmu->pgtbl_ops);
724 	drm_mm_takedown(&mmu->mm);
725 	kfree(mmu);
726 }
727 
728 void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu)
729 {
730 	kref_put(&mmu->refcount, panfrost_mmu_release_ctx);
731 }
732 
733 struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu)
734 {
735 	kref_get(&mmu->refcount);
736 
737 	return mmu;
738 }
739 
740 #define PFN_4G		(SZ_4G >> PAGE_SHIFT)
741 #define PFN_4G_MASK	(PFN_4G - 1)
742 #define PFN_16M		(SZ_16M >> PAGE_SHIFT)
743 
744 static void panfrost_drm_mm_color_adjust(const struct drm_mm_node *node,
745 					 unsigned long color,
746 					 u64 *start, u64 *end)
747 {
748 	/* Executable buffers can't start or end on a 4GB boundary */
749 	if (!(color & PANFROST_BO_NOEXEC)) {
750 		u64 next_seg;
751 
752 		if ((*start & PFN_4G_MASK) == 0)
753 			(*start)++;
754 
755 		if ((*end & PFN_4G_MASK) == 0)
756 			(*end)--;
757 
758 		next_seg = ALIGN(*start, PFN_4G);
759 		if (next_seg - *start <= PFN_16M)
760 			*start = next_seg + 1;
761 
762 		*end = min(*end, ALIGN(*start, PFN_4G) - 1);
763 	}
764 }
765 
766 struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev)
767 {
768 	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(pfdev->features.mmu_features);
769 	u32 pa_bits = GPU_MMU_FEATURES_PA_BITS(pfdev->features.mmu_features);
770 	struct panfrost_mmu *mmu;
771 	enum io_pgtable_fmt fmt;
772 	int ret;
773 
774 	if (pfdev->comp->gpu_quirks & BIT(GPU_QUIRK_FORCE_AARCH64_PGTABLE)) {
775 		if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_AARCH64_MMU)) {
776 			dev_err_once(pfdev->base.dev,
777 				     "AARCH64_4K page table not supported\n");
778 			return ERR_PTR(-EINVAL);
779 		}
780 		fmt = ARM_64_LPAE_S1;
781 	} else {
782 		fmt = ARM_MALI_LPAE;
783 	}
784 
785 	mmu = kzalloc(sizeof(*mmu), GFP_KERNEL);
786 	if (!mmu)
787 		return ERR_PTR(-ENOMEM);
788 
789 	mmu->pfdev = pfdev;
790 	spin_lock_init(&mmu->mm_lock);
791 
792 	/* 4G enough for now. can be 48-bit */
793 	drm_mm_init(&mmu->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT);
794 	mmu->mm.color_adjust = panfrost_drm_mm_color_adjust;
795 
796 	INIT_LIST_HEAD(&mmu->list);
797 	mmu->as = -1;
798 
799 	mmu->pgtbl_cfg = (struct io_pgtable_cfg) {
800 		.pgsize_bitmap	= SZ_4K | SZ_2M,
801 		.ias		= va_bits,
802 		.oas		= pa_bits,
803 		.coherent_walk	= pfdev->coherent,
804 		.tlb		= &mmu_tlb_ops,
805 		.iommu_dev	= pfdev->base.dev,
806 	};
807 
808 	mmu->pgtbl_ops = alloc_io_pgtable_ops(fmt, &mmu->pgtbl_cfg, mmu);
809 	if (!mmu->pgtbl_ops) {
810 		ret = -EINVAL;
811 		goto err_free_mmu;
812 	}
813 
814 	ret = panfrost_mmu_cfg_init(mmu, fmt);
815 	if (ret)
816 		goto err_free_io_pgtable;
817 
818 	kref_init(&mmu->refcount);
819 
820 	return mmu;
821 
822 err_free_io_pgtable:
823 	free_io_pgtable_ops(mmu->pgtbl_ops);
824 
825 err_free_mmu:
826 	kfree(mmu);
827 	return ERR_PTR(ret);
828 }
829 
830 static const char *access_type_name(struct panfrost_device *pfdev,
831 		u32 fault_status)
832 {
833 	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
834 	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
835 		if (panfrost_has_hw_feature(pfdev, HW_FEATURE_AARCH64_MMU))
836 			return "ATOMIC";
837 		else
838 			return "UNKNOWN";
839 	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
840 		return "READ";
841 	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
842 		return "WRITE";
843 	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
844 		return "EXECUTE";
845 	default:
846 		WARN_ON(1);
847 		return NULL;
848 	}
849 }
850 
851 static irqreturn_t panfrost_mmu_irq_handler(int irq, void *data)
852 {
853 	struct panfrost_device *pfdev = data;
854 
855 	if (test_bit(PANFROST_COMP_BIT_MMU, pfdev->is_suspended))
856 		return IRQ_NONE;
857 
858 	if (!mmu_read(pfdev, MMU_INT_STAT))
859 		return IRQ_NONE;
860 
861 	mmu_write(pfdev, MMU_INT_MASK, 0);
862 	return IRQ_WAKE_THREAD;
863 }
864 
865 static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data)
866 {
867 	struct panfrost_device *pfdev = data;
868 	u32 status = mmu_read(pfdev, MMU_INT_RAWSTAT);
869 	int ret;
870 
871 	while (status) {
872 		u32 as = ffs(status | (status >> 16)) - 1;
873 		u32 mask = BIT(as) | BIT(as + 16);
874 		u64 addr;
875 		u32 fault_status;
876 		u32 exception_type;
877 		u32 access_type;
878 		u32 source_id;
879 
880 		fault_status = mmu_read(pfdev, AS_FAULTSTATUS(as));
881 		addr = mmu_read(pfdev, AS_FAULTADDRESS_LO(as));
882 		addr |= (u64)mmu_read(pfdev, AS_FAULTADDRESS_HI(as)) << 32;
883 
884 		/* decode the fault status */
885 		exception_type = fault_status & 0xFF;
886 		access_type = (fault_status >> 8) & 0x3;
887 		source_id = (fault_status >> 16);
888 
889 		mmu_write(pfdev, MMU_INT_CLEAR, mask);
890 
891 		/* Page fault only */
892 		ret = -1;
893 		if ((status & mask) == BIT(as) && (exception_type & 0xF8) == 0xC0)
894 			ret = panfrost_mmu_map_fault_addr(pfdev, as, addr);
895 
896 		if (ret) {
897 			/* terminal fault, print info about the fault */
898 			dev_err(pfdev->base.dev,
899 				"Unhandled Page fault in AS%d at VA 0x%016llX\n"
900 				"Reason: %s\n"
901 				"raw fault status: 0x%X\n"
902 				"decoded fault status: %s\n"
903 				"exception type 0x%X: %s\n"
904 				"access type 0x%X: %s\n"
905 				"source id 0x%X\n",
906 				as, addr,
907 				"TODO",
908 				fault_status,
909 				(fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
910 				exception_type, panfrost_exception_name(exception_type),
911 				access_type, access_type_name(pfdev, fault_status),
912 				source_id);
913 
914 			spin_lock(&pfdev->as_lock);
915 			/* Ignore MMU interrupts on this AS until it's been
916 			 * re-enabled.
917 			 */
918 			pfdev->as_faulty_mask |= mask;
919 
920 			/* Disable the MMU to kill jobs on this AS. */
921 			panfrost_mmu_disable(pfdev, as);
922 			spin_unlock(&pfdev->as_lock);
923 		}
924 
925 		status &= ~mask;
926 
927 		/* If we received new MMU interrupts, process them before returning. */
928 		if (!status)
929 			status = mmu_read(pfdev, MMU_INT_RAWSTAT) & ~pfdev->as_faulty_mask;
930 	}
931 
932 	/* Enable interrupts only if we're not about to get suspended */
933 	if (!test_bit(PANFROST_COMP_BIT_MMU, pfdev->is_suspended)) {
934 		spin_lock(&pfdev->as_lock);
935 		mmu_write(pfdev, MMU_INT_MASK, ~pfdev->as_faulty_mask);
936 		spin_unlock(&pfdev->as_lock);
937 	}
938 
939 	return IRQ_HANDLED;
940 };
941 
942 int panfrost_mmu_init(struct panfrost_device *pfdev)
943 {
944 	int err;
945 
946 	pfdev->mmu_irq = platform_get_irq_byname(to_platform_device(pfdev->base.dev), "mmu");
947 	if (pfdev->mmu_irq < 0)
948 		return pfdev->mmu_irq;
949 
950 	err = devm_request_threaded_irq(pfdev->base.dev, pfdev->mmu_irq,
951 					panfrost_mmu_irq_handler,
952 					panfrost_mmu_irq_handler_thread,
953 					IRQF_SHARED, KBUILD_MODNAME "-mmu",
954 					pfdev);
955 
956 	if (err) {
957 		dev_err(pfdev->base.dev, "failed to request mmu irq");
958 		return err;
959 	}
960 
961 	return 0;
962 }
963 
964 void panfrost_mmu_fini(struct panfrost_device *pfdev)
965 {
966 	mmu_write(pfdev, MMU_INT_MASK, 0);
967 }
968 
969 void panfrost_mmu_suspend_irq(struct panfrost_device *pfdev)
970 {
971 	set_bit(PANFROST_COMP_BIT_MMU, pfdev->is_suspended);
972 
973 	mmu_write(pfdev, MMU_INT_MASK, 0);
974 	synchronize_irq(pfdev->mmu_irq);
975 }
976