xref: /linux/drivers/iommu/io-pgtable-arm.c (revision 4b01712311c6e209137c4fa3e7d7920ec509456a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * CPU-agnostic ARM page table allocator.
4  *
5  * Copyright (C) 2014 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  */
9 
10 #define pr_fmt(fmt)	"arm-lpae io-pgtable: " fmt
11 
12 #include <linux/atomic.h>
13 #include <linux/bitops.h>
14 #include <linux/io-pgtable.h>
15 #include <linux/kernel.h>
16 #include <linux/sizes.h>
17 #include <linux/slab.h>
18 #include <linux/types.h>
19 #include <linux/dma-mapping.h>
20 
21 #include <asm/barrier.h>
22 
23 #include "io-pgtable-arm.h"
24 #include "iommu-pages.h"
25 
26 #define ARM_LPAE_MAX_ADDR_BITS		52
27 #define ARM_LPAE_S2_MAX_CONCAT_PAGES	16
28 #define ARM_LPAE_MAX_LEVELS		4
29 
30 /* Struct accessors */
31 #define io_pgtable_to_data(x)						\
32 	container_of((x), struct arm_lpae_io_pgtable, iop)
33 
34 #define io_pgtable_ops_to_data(x)					\
35 	io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
36 
37 /*
38  * Calculate the right shift amount to get to the portion describing level l
39  * in a virtual address mapped by the pagetable in d.
40  */
41 #define ARM_LPAE_LVL_SHIFT(l,d)						\
42 	(((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) +		\
43 	ilog2(sizeof(arm_lpae_iopte)))
44 
45 #define ARM_LPAE_GRANULE(d)						\
46 	(sizeof(arm_lpae_iopte) << (d)->bits_per_level)
47 #define ARM_LPAE_PGD_SIZE(d)						\
48 	(sizeof(arm_lpae_iopte) << (d)->pgd_bits)
49 
50 #define ARM_LPAE_PTES_PER_TABLE(d)					\
51 	(ARM_LPAE_GRANULE(d) >> ilog2(sizeof(arm_lpae_iopte)))
52 
53 /*
54  * Calculate the index at level l used to map virtual address a using the
55  * pagetable in d.
56  */
57 #define ARM_LPAE_PGD_IDX(l,d)						\
58 	((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0)
59 
60 #define ARM_LPAE_LVL_IDX(a,l,d)						\
61 	(((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) &			\
62 	 ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1))
63 
64 /* Calculate the block/page mapping size at level l for pagetable in d. */
65 #define ARM_LPAE_BLOCK_SIZE(l,d)	(1ULL << ARM_LPAE_LVL_SHIFT(l,d))
66 
67 /* Page table bits */
68 #define ARM_LPAE_PTE_TYPE_SHIFT		0
69 #define ARM_LPAE_PTE_TYPE_MASK		0x3
70 
71 #define ARM_LPAE_PTE_TYPE_BLOCK		1
72 #define ARM_LPAE_PTE_TYPE_TABLE		3
73 #define ARM_LPAE_PTE_TYPE_PAGE		3
74 
75 #define ARM_LPAE_PTE_ADDR_MASK		GENMASK_ULL(47,12)
76 
77 #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
78 #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
79 #define ARM_LPAE_PTE_DBM		(((arm_lpae_iopte)1) << 51)
80 #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
81 #define ARM_LPAE_PTE_SH_NS		(((arm_lpae_iopte)0) << 8)
82 #define ARM_LPAE_PTE_SH_OS		(((arm_lpae_iopte)2) << 8)
83 #define ARM_LPAE_PTE_SH_IS		(((arm_lpae_iopte)3) << 8)
84 #define ARM_LPAE_PTE_NS			(((arm_lpae_iopte)1) << 5)
85 #define ARM_LPAE_PTE_VALID		(((arm_lpae_iopte)1) << 0)
86 
87 #define ARM_LPAE_PTE_ATTR_LO_MASK	(((arm_lpae_iopte)0x3ff) << 2)
88 /* Ignore the contiguous bit for block splitting */
89 #define ARM_LPAE_PTE_ATTR_HI_MASK	(ARM_LPAE_PTE_XN | ARM_LPAE_PTE_DBM)
90 #define ARM_LPAE_PTE_ATTR_MASK		(ARM_LPAE_PTE_ATTR_LO_MASK |	\
91 					 ARM_LPAE_PTE_ATTR_HI_MASK)
92 /* Software bit for solving coherency races */
93 #define ARM_LPAE_PTE_SW_SYNC		(((arm_lpae_iopte)1) << 55)
94 
95 /* Stage-1 PTE */
96 #define ARM_LPAE_PTE_AP_UNPRIV		(((arm_lpae_iopte)1) << 6)
97 #define ARM_LPAE_PTE_AP_RDONLY_BIT	7
98 #define ARM_LPAE_PTE_AP_RDONLY		(((arm_lpae_iopte)1) << \
99 					   ARM_LPAE_PTE_AP_RDONLY_BIT)
100 #define ARM_LPAE_PTE_AP_WR_CLEAN_MASK	(ARM_LPAE_PTE_AP_RDONLY | \
101 					 ARM_LPAE_PTE_DBM)
102 #define ARM_LPAE_PTE_ATTRINDX_SHIFT	2
103 #define ARM_LPAE_PTE_nG			(((arm_lpae_iopte)1) << 11)
104 
105 /* Stage-2 PTE */
106 #define ARM_LPAE_PTE_HAP_FAULT		(((arm_lpae_iopte)0) << 6)
107 #define ARM_LPAE_PTE_HAP_READ		(((arm_lpae_iopte)1) << 6)
108 #define ARM_LPAE_PTE_HAP_WRITE		(((arm_lpae_iopte)2) << 6)
109 /*
110  * For !FWB these code to:
111  *  1111 = Normal outer write back cachable / Inner Write Back Cachable
112  *         Permit S1 to override
113  *  0101 = Normal Non-cachable / Inner Non-cachable
114  *  0001 = Device / Device-nGnRE
115  * For S2FWB these code:
116  *  0110 Force Normal Write Back
117  *  0101 Normal* is forced Normal-NC, Device unchanged
118  *  0001 Force Device-nGnRE
119  */
120 #define ARM_LPAE_PTE_MEMATTR_FWB_WB	(((arm_lpae_iopte)0x6) << 2)
121 #define ARM_LPAE_PTE_MEMATTR_OIWB	(((arm_lpae_iopte)0xf) << 2)
122 #define ARM_LPAE_PTE_MEMATTR_NC		(((arm_lpae_iopte)0x5) << 2)
123 #define ARM_LPAE_PTE_MEMATTR_DEV	(((arm_lpae_iopte)0x1) << 2)
124 
125 /* Register bits */
126 #define ARM_LPAE_VTCR_SL0_MASK		0x3
127 
128 #define ARM_LPAE_TCR_T0SZ_SHIFT		0
129 
130 #define ARM_LPAE_VTCR_PS_SHIFT		16
131 #define ARM_LPAE_VTCR_PS_MASK		0x7
132 
133 #define ARM_LPAE_MAIR_ATTR_SHIFT(n)	((n) << 3)
134 #define ARM_LPAE_MAIR_ATTR_MASK		0xff
135 #define ARM_LPAE_MAIR_ATTR_DEVICE	0x04
136 #define ARM_LPAE_MAIR_ATTR_NC		0x44
137 #define ARM_LPAE_MAIR_ATTR_INC_OWBRWA	0xf4
138 #define ARM_LPAE_MAIR_ATTR_WBRWA	0xff
139 #define ARM_LPAE_MAIR_ATTR_IDX_NC	0
140 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE	1
141 #define ARM_LPAE_MAIR_ATTR_IDX_DEV	2
142 #define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE	3
143 
144 #define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0)
145 #define ARM_MALI_LPAE_TTBR_READ_INNER	BIT(2)
146 #define ARM_MALI_LPAE_TTBR_SHARE_OUTER	BIT(4)
147 
148 #define ARM_MALI_LPAE_MEMATTR_IMP_DEF	0x88ULL
149 #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
150 
151 /* IOPTE accessors */
152 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
153 
154 #define iopte_type(pte)					\
155 	(((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
156 
157 #define iopte_prot(pte)	((pte) & ARM_LPAE_PTE_ATTR_MASK)
158 
159 #define iopte_writeable_dirty(pte)				\
160 	(((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM)
161 
162 #define iopte_set_writeable_clean(ptep)				\
163 	set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)(ptep))
164 
165 struct arm_lpae_io_pgtable {
166 	struct io_pgtable	iop;
167 
168 	int			pgd_bits;
169 	int			start_level;
170 	int			bits_per_level;
171 
172 	void			*pgd;
173 };
174 
175 typedef u64 arm_lpae_iopte;
176 
177 static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl,
178 			      enum io_pgtable_fmt fmt)
179 {
180 	if (lvl == (ARM_LPAE_MAX_LEVELS - 1) && fmt != ARM_MALI_LPAE)
181 		return iopte_type(pte) == ARM_LPAE_PTE_TYPE_PAGE;
182 
183 	return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK;
184 }
185 
186 static inline bool iopte_table(arm_lpae_iopte pte, int lvl)
187 {
188 	if (lvl == (ARM_LPAE_MAX_LEVELS - 1))
189 		return false;
190 	return iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE;
191 }
192 
193 static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
194 				     struct arm_lpae_io_pgtable *data)
195 {
196 	arm_lpae_iopte pte = paddr;
197 
198 	/* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
199 	return (pte | (pte >> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK;
200 }
201 
202 static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
203 				  struct arm_lpae_io_pgtable *data)
204 {
205 	u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
206 
207 	if (ARM_LPAE_GRANULE(data) < SZ_64K)
208 		return paddr;
209 
210 	/* Rotate the packed high-order bits back to the top */
211 	return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
212 }
213 
214 static bool selftest_running = false;
215 
216 static dma_addr_t __arm_lpae_dma_addr(void *pages)
217 {
218 	return (dma_addr_t)virt_to_phys(pages);
219 }
220 
221 static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
222 				    struct io_pgtable_cfg *cfg,
223 				    void *cookie)
224 {
225 	struct device *dev = cfg->iommu_dev;
226 	int order = get_order(size);
227 	dma_addr_t dma;
228 	void *pages;
229 
230 	VM_BUG_ON((gfp & __GFP_HIGHMEM));
231 
232 	if (cfg->alloc)
233 		pages = cfg->alloc(cookie, size, gfp);
234 	else
235 		pages = iommu_alloc_pages_node(dev_to_node(dev), gfp, order);
236 
237 	if (!pages)
238 		return NULL;
239 
240 	if (!cfg->coherent_walk) {
241 		dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
242 		if (dma_mapping_error(dev, dma))
243 			goto out_free;
244 		/*
245 		 * We depend on the IOMMU being able to work with any physical
246 		 * address directly, so if the DMA layer suggests otherwise by
247 		 * translating or truncating them, that bodes very badly...
248 		 */
249 		if (dma != virt_to_phys(pages))
250 			goto out_unmap;
251 	}
252 
253 	return pages;
254 
255 out_unmap:
256 	dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
257 	dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
258 
259 out_free:
260 	if (cfg->free)
261 		cfg->free(cookie, pages, size);
262 	else
263 		iommu_free_pages(pages, order);
264 
265 	return NULL;
266 }
267 
268 static void __arm_lpae_free_pages(void *pages, size_t size,
269 				  struct io_pgtable_cfg *cfg,
270 				  void *cookie)
271 {
272 	if (!cfg->coherent_walk)
273 		dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
274 				 size, DMA_TO_DEVICE);
275 
276 	if (cfg->free)
277 		cfg->free(cookie, pages, size);
278 	else
279 		iommu_free_pages(pages, get_order(size));
280 }
281 
282 static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
283 				struct io_pgtable_cfg *cfg)
284 {
285 	dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
286 				   sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
287 }
288 
289 static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries)
290 {
291 	for (int i = 0; i < num_entries; i++)
292 		ptep[i] = 0;
293 
294 	if (!cfg->coherent_walk && num_entries)
295 		__arm_lpae_sync_pte(ptep, num_entries, cfg);
296 }
297 
298 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
299 			       struct iommu_iotlb_gather *gather,
300 			       unsigned long iova, size_t size, size_t pgcount,
301 			       int lvl, arm_lpae_iopte *ptep);
302 
303 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
304 				phys_addr_t paddr, arm_lpae_iopte prot,
305 				int lvl, int num_entries, arm_lpae_iopte *ptep)
306 {
307 	arm_lpae_iopte pte = prot;
308 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
309 	size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
310 	int i;
311 
312 	if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
313 		pte |= ARM_LPAE_PTE_TYPE_PAGE;
314 	else
315 		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
316 
317 	for (i = 0; i < num_entries; i++)
318 		ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data);
319 
320 	if (!cfg->coherent_walk)
321 		__arm_lpae_sync_pte(ptep, num_entries, cfg);
322 }
323 
324 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
325 			     unsigned long iova, phys_addr_t paddr,
326 			     arm_lpae_iopte prot, int lvl, int num_entries,
327 			     arm_lpae_iopte *ptep)
328 {
329 	int i;
330 
331 	for (i = 0; i < num_entries; i++)
332 		if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) {
333 			/* We require an unmap first */
334 			WARN_ON(!selftest_running);
335 			return -EEXIST;
336 		} else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) {
337 			/*
338 			 * We need to unmap and free the old table before
339 			 * overwriting it with a block entry.
340 			 */
341 			arm_lpae_iopte *tblp;
342 			size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
343 
344 			tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
345 			if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1,
346 					     lvl, tblp) != sz) {
347 				WARN_ON(1);
348 				return -EINVAL;
349 			}
350 		}
351 
352 	__arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep);
353 	return 0;
354 }
355 
356 static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
357 					     arm_lpae_iopte *ptep,
358 					     arm_lpae_iopte curr,
359 					     struct arm_lpae_io_pgtable *data)
360 {
361 	arm_lpae_iopte old, new;
362 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
363 
364 	new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE;
365 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
366 		new |= ARM_LPAE_PTE_NSTABLE;
367 
368 	/*
369 	 * Ensure the table itself is visible before its PTE can be.
370 	 * Whilst we could get away with cmpxchg64_release below, this
371 	 * doesn't have any ordering semantics when !CONFIG_SMP.
372 	 */
373 	dma_wmb();
374 
375 	old = cmpxchg64_relaxed(ptep, curr, new);
376 
377 	if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC))
378 		return old;
379 
380 	/* Even if it's not ours, there's no point waiting; just kick it */
381 	__arm_lpae_sync_pte(ptep, 1, cfg);
382 	if (old == curr)
383 		WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC);
384 
385 	return old;
386 }
387 
388 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
389 			  phys_addr_t paddr, size_t size, size_t pgcount,
390 			  arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
391 			  gfp_t gfp, size_t *mapped)
392 {
393 	arm_lpae_iopte *cptep, pte;
394 	size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
395 	size_t tblsz = ARM_LPAE_GRANULE(data);
396 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
397 	int ret = 0, num_entries, max_entries, map_idx_start;
398 
399 	/* Find our entry at the current level */
400 	map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
401 	ptep += map_idx_start;
402 
403 	/* If we can install a leaf entry at this level, then do so */
404 	if (size == block_size) {
405 		max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
406 		num_entries = min_t(int, pgcount, max_entries);
407 		ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
408 		if (!ret)
409 			*mapped += num_entries * size;
410 
411 		return ret;
412 	}
413 
414 	/* We can't allocate tables at the final level */
415 	if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
416 		return -EINVAL;
417 
418 	/* Grab a pointer to the next level */
419 	pte = READ_ONCE(*ptep);
420 	if (!pte) {
421 		cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg, data->iop.cookie);
422 		if (!cptep)
423 			return -ENOMEM;
424 
425 		pte = arm_lpae_install_table(cptep, ptep, 0, data);
426 		if (pte)
427 			__arm_lpae_free_pages(cptep, tblsz, cfg, data->iop.cookie);
428 	} else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
429 		__arm_lpae_sync_pte(ptep, 1, cfg);
430 	}
431 
432 	if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) {
433 		cptep = iopte_deref(pte, data);
434 	} else if (pte) {
435 		/* We require an unmap first */
436 		WARN_ON(!selftest_running);
437 		return -EEXIST;
438 	}
439 
440 	/* Rinse, repeat */
441 	return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1,
442 			      cptep, gfp, mapped);
443 }
444 
445 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
446 					   int prot)
447 {
448 	arm_lpae_iopte pte;
449 
450 	if (data->iop.fmt == ARM_64_LPAE_S1 ||
451 	    data->iop.fmt == ARM_32_LPAE_S1) {
452 		pte = ARM_LPAE_PTE_nG;
453 		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
454 			pte |= ARM_LPAE_PTE_AP_RDONLY;
455 		else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD)
456 			pte |= ARM_LPAE_PTE_DBM;
457 		if (!(prot & IOMMU_PRIV))
458 			pte |= ARM_LPAE_PTE_AP_UNPRIV;
459 	} else {
460 		pte = ARM_LPAE_PTE_HAP_FAULT;
461 		if (prot & IOMMU_READ)
462 			pte |= ARM_LPAE_PTE_HAP_READ;
463 		if (prot & IOMMU_WRITE)
464 			pte |= ARM_LPAE_PTE_HAP_WRITE;
465 	}
466 
467 	/*
468 	 * Note that this logic is structured to accommodate Mali LPAE
469 	 * having stage-1-like attributes but stage-2-like permissions.
470 	 */
471 	if (data->iop.fmt == ARM_64_LPAE_S2 ||
472 	    data->iop.fmt == ARM_32_LPAE_S2) {
473 		if (prot & IOMMU_MMIO) {
474 			pte |= ARM_LPAE_PTE_MEMATTR_DEV;
475 		} else if (prot & IOMMU_CACHE) {
476 			if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_S2FWB)
477 				pte |= ARM_LPAE_PTE_MEMATTR_FWB_WB;
478 			else
479 				pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
480 		} else {
481 			pte |= ARM_LPAE_PTE_MEMATTR_NC;
482 		}
483 	} else {
484 		if (prot & IOMMU_MMIO)
485 			pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
486 				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
487 		else if (prot & IOMMU_CACHE)
488 			pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
489 				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
490 	}
491 
492 	/*
493 	 * Also Mali has its own notions of shareability wherein its Inner
494 	 * domain covers the cores within the GPU, and its Outer domain is
495 	 * "outside the GPU" (i.e. either the Inner or System domain in CPU
496 	 * terms, depending on coherency).
497 	 */
498 	if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE)
499 		pte |= ARM_LPAE_PTE_SH_IS;
500 	else
501 		pte |= ARM_LPAE_PTE_SH_OS;
502 
503 	if (prot & IOMMU_NOEXEC)
504 		pte |= ARM_LPAE_PTE_XN;
505 
506 	if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
507 		pte |= ARM_LPAE_PTE_NS;
508 
509 	if (data->iop.fmt != ARM_MALI_LPAE)
510 		pte |= ARM_LPAE_PTE_AF;
511 
512 	return pte;
513 }
514 
515 static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
516 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
517 			      int iommu_prot, gfp_t gfp, size_t *mapped)
518 {
519 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
520 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
521 	arm_lpae_iopte *ptep = data->pgd;
522 	int ret, lvl = data->start_level;
523 	arm_lpae_iopte prot;
524 	long iaext = (s64)iova >> cfg->ias;
525 
526 	if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
527 		return -EINVAL;
528 
529 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
530 		iaext = ~iaext;
531 	if (WARN_ON(iaext || paddr >> cfg->oas))
532 		return -ERANGE;
533 
534 	if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
535 		return -EINVAL;
536 
537 	prot = arm_lpae_prot_to_pte(data, iommu_prot);
538 	ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl,
539 			     ptep, gfp, mapped);
540 	/*
541 	 * Synchronise all PTE updates for the new mapping before there's
542 	 * a chance for anything to kick off a table walk for the new iova.
543 	 */
544 	wmb();
545 
546 	return ret;
547 }
548 
549 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
550 				    arm_lpae_iopte *ptep)
551 {
552 	arm_lpae_iopte *start, *end;
553 	unsigned long table_size;
554 
555 	if (lvl == data->start_level)
556 		table_size = ARM_LPAE_PGD_SIZE(data);
557 	else
558 		table_size = ARM_LPAE_GRANULE(data);
559 
560 	start = ptep;
561 
562 	/* Only leaf entries at the last level */
563 	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
564 		end = ptep;
565 	else
566 		end = (void *)ptep + table_size;
567 
568 	while (ptep != end) {
569 		arm_lpae_iopte pte = *ptep++;
570 
571 		if (!pte || iopte_leaf(pte, lvl, data->iop.fmt))
572 			continue;
573 
574 		__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
575 	}
576 
577 	__arm_lpae_free_pages(start, table_size, &data->iop.cfg, data->iop.cookie);
578 }
579 
580 static void arm_lpae_free_pgtable(struct io_pgtable *iop)
581 {
582 	struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop);
583 
584 	__arm_lpae_free_pgtable(data, data->start_level, data->pgd);
585 	kfree(data);
586 }
587 
588 static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
589 				       struct iommu_iotlb_gather *gather,
590 				       unsigned long iova, size_t size,
591 				       arm_lpae_iopte blk_pte, int lvl,
592 				       arm_lpae_iopte *ptep, size_t pgcount)
593 {
594 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
595 	arm_lpae_iopte pte, *tablep;
596 	phys_addr_t blk_paddr;
597 	size_t tablesz = ARM_LPAE_GRANULE(data);
598 	size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
599 	int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
600 	int i, unmap_idx_start = -1, num_entries = 0, max_entries;
601 
602 	if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
603 		return 0;
604 
605 	tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg, data->iop.cookie);
606 	if (!tablep)
607 		return 0; /* Bytes unmapped */
608 
609 	if (size == split_sz) {
610 		unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
611 		max_entries = ptes_per_table - unmap_idx_start;
612 		num_entries = min_t(int, pgcount, max_entries);
613 	}
614 
615 	blk_paddr = iopte_to_paddr(blk_pte, data);
616 	pte = iopte_prot(blk_pte);
617 
618 	for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) {
619 		/* Unmap! */
620 		if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries))
621 			continue;
622 
623 		__arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
624 	}
625 
626 	pte = arm_lpae_install_table(tablep, ptep, blk_pte, data);
627 	if (pte != blk_pte) {
628 		__arm_lpae_free_pages(tablep, tablesz, cfg, data->iop.cookie);
629 		/*
630 		 * We may race against someone unmapping another part of this
631 		 * block, but anything else is invalid. We can't misinterpret
632 		 * a page entry here since we're never at the last level.
633 		 */
634 		if (iopte_type(pte) != ARM_LPAE_PTE_TYPE_TABLE)
635 			return 0;
636 
637 		tablep = iopte_deref(pte, data);
638 	} else if (unmap_idx_start >= 0) {
639 		for (i = 0; i < num_entries; i++)
640 			io_pgtable_tlb_add_page(&data->iop, gather, iova + i * size, size);
641 
642 		return num_entries * size;
643 	}
644 
645 	return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
646 }
647 
648 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
649 			       struct iommu_iotlb_gather *gather,
650 			       unsigned long iova, size_t size, size_t pgcount,
651 			       int lvl, arm_lpae_iopte *ptep)
652 {
653 	arm_lpae_iopte pte;
654 	struct io_pgtable *iop = &data->iop;
655 	int i = 0, num_entries, max_entries, unmap_idx_start;
656 
657 	/* Something went horribly wrong and we ran out of page table */
658 	if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
659 		return 0;
660 
661 	unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
662 	ptep += unmap_idx_start;
663 	pte = READ_ONCE(*ptep);
664 	if (WARN_ON(!pte))
665 		return 0;
666 
667 	/* If the size matches this level, we're in the right place */
668 	if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
669 		max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
670 		num_entries = min_t(int, pgcount, max_entries);
671 
672 		/* Find and handle non-leaf entries */
673 		for (i = 0; i < num_entries; i++) {
674 			pte = READ_ONCE(ptep[i]);
675 			if (WARN_ON(!pte))
676 				break;
677 
678 			if (!iopte_leaf(pte, lvl, iop->fmt)) {
679 				__arm_lpae_clear_pte(&ptep[i], &iop->cfg, 1);
680 
681 				/* Also flush any partial walks */
682 				io_pgtable_tlb_flush_walk(iop, iova + i * size, size,
683 							  ARM_LPAE_GRANULE(data));
684 				__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
685 			}
686 		}
687 
688 		/* Clear the remaining entries */
689 		__arm_lpae_clear_pte(ptep, &iop->cfg, i);
690 
691 		if (gather && !iommu_iotlb_gather_queued(gather))
692 			for (int j = 0; j < i; j++)
693 				io_pgtable_tlb_add_page(iop, gather, iova + j * size, size);
694 
695 		return i * size;
696 	} else if (iopte_leaf(pte, lvl, iop->fmt)) {
697 		/*
698 		 * Insert a table at the next level to map the old region,
699 		 * minus the part we want to unmap
700 		 */
701 		return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
702 						lvl + 1, ptep, pgcount);
703 	}
704 
705 	/* Keep on walkin' */
706 	ptep = iopte_deref(pte, data);
707 	return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl + 1, ptep);
708 }
709 
710 static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
711 				   size_t pgsize, size_t pgcount,
712 				   struct iommu_iotlb_gather *gather)
713 {
714 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
715 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
716 	arm_lpae_iopte *ptep = data->pgd;
717 	long iaext = (s64)iova >> cfg->ias;
718 
719 	if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
720 		return 0;
721 
722 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
723 		iaext = ~iaext;
724 	if (WARN_ON(iaext))
725 		return 0;
726 
727 	return __arm_lpae_unmap(data, gather, iova, pgsize, pgcount,
728 				data->start_level, ptep);
729 }
730 
731 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
732 					 unsigned long iova)
733 {
734 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
735 	arm_lpae_iopte pte, *ptep = data->pgd;
736 	int lvl = data->start_level;
737 
738 	do {
739 		/* Valid IOPTE pointer? */
740 		if (!ptep)
741 			return 0;
742 
743 		/* Grab the IOPTE we're interested in */
744 		ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
745 		pte = READ_ONCE(*ptep);
746 
747 		/* Valid entry? */
748 		if (!pte)
749 			return 0;
750 
751 		/* Leaf entry? */
752 		if (iopte_leaf(pte, lvl, data->iop.fmt))
753 			goto found_translation;
754 
755 		/* Take it to the next level */
756 		ptep = iopte_deref(pte, data);
757 	} while (++lvl < ARM_LPAE_MAX_LEVELS);
758 
759 	/* Ran out of page tables to walk */
760 	return 0;
761 
762 found_translation:
763 	iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
764 	return iopte_to_paddr(pte, data) | iova;
765 }
766 
767 struct io_pgtable_walk_data {
768 	struct iommu_dirty_bitmap	*dirty;
769 	unsigned long			flags;
770 	u64				addr;
771 	const u64			end;
772 };
773 
774 static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
775 				       struct io_pgtable_walk_data *walk_data,
776 				       arm_lpae_iopte *ptep,
777 				       int lvl);
778 
779 static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data,
780 				  struct io_pgtable_walk_data *walk_data,
781 				  arm_lpae_iopte *ptep, int lvl)
782 {
783 	struct io_pgtable *iop = &data->iop;
784 	arm_lpae_iopte pte = READ_ONCE(*ptep);
785 
786 	if (iopte_leaf(pte, lvl, iop->fmt)) {
787 		size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data);
788 
789 		if (iopte_writeable_dirty(pte)) {
790 			iommu_dirty_bitmap_record(walk_data->dirty,
791 						  walk_data->addr, size);
792 			if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR))
793 				iopte_set_writeable_clean(ptep);
794 		}
795 		walk_data->addr += size;
796 		return 0;
797 	}
798 
799 	if (WARN_ON(!iopte_table(pte, lvl)))
800 		return -EINVAL;
801 
802 	ptep = iopte_deref(pte, data);
803 	return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1);
804 }
805 
806 static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
807 				       struct io_pgtable_walk_data *walk_data,
808 				       arm_lpae_iopte *ptep,
809 				       int lvl)
810 {
811 	u32 idx;
812 	int max_entries, ret;
813 
814 	if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
815 		return -EINVAL;
816 
817 	if (lvl == data->start_level)
818 		max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
819 	else
820 		max_entries = ARM_LPAE_PTES_PER_TABLE(data);
821 
822 	for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data);
823 	     (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) {
824 		ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl);
825 		if (ret)
826 			return ret;
827 	}
828 
829 	return 0;
830 }
831 
832 static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops,
833 					 unsigned long iova, size_t size,
834 					 unsigned long flags,
835 					 struct iommu_dirty_bitmap *dirty)
836 {
837 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
838 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
839 	struct io_pgtable_walk_data walk_data = {
840 		.dirty = dirty,
841 		.flags = flags,
842 		.addr = iova,
843 		.end = iova + size,
844 	};
845 	arm_lpae_iopte *ptep = data->pgd;
846 	int lvl = data->start_level;
847 
848 	if (WARN_ON(!size))
849 		return -EINVAL;
850 	if (WARN_ON((iova + size - 1) & ~(BIT(cfg->ias) - 1)))
851 		return -EINVAL;
852 	if (data->iop.fmt != ARM_64_LPAE_S1)
853 		return -EINVAL;
854 
855 	return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl);
856 }
857 
858 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
859 {
860 	unsigned long granule, page_sizes;
861 	unsigned int max_addr_bits = 48;
862 
863 	/*
864 	 * We need to restrict the supported page sizes to match the
865 	 * translation regime for a particular granule. Aim to match
866 	 * the CPU page size if possible, otherwise prefer smaller sizes.
867 	 * While we're at it, restrict the block sizes to match the
868 	 * chosen granule.
869 	 */
870 	if (cfg->pgsize_bitmap & PAGE_SIZE)
871 		granule = PAGE_SIZE;
872 	else if (cfg->pgsize_bitmap & ~PAGE_MASK)
873 		granule = 1UL << __fls(cfg->pgsize_bitmap & ~PAGE_MASK);
874 	else if (cfg->pgsize_bitmap & PAGE_MASK)
875 		granule = 1UL << __ffs(cfg->pgsize_bitmap & PAGE_MASK);
876 	else
877 		granule = 0;
878 
879 	switch (granule) {
880 	case SZ_4K:
881 		page_sizes = (SZ_4K | SZ_2M | SZ_1G);
882 		break;
883 	case SZ_16K:
884 		page_sizes = (SZ_16K | SZ_32M);
885 		break;
886 	case SZ_64K:
887 		max_addr_bits = 52;
888 		page_sizes = (SZ_64K | SZ_512M);
889 		if (cfg->oas > 48)
890 			page_sizes |= 1ULL << 42; /* 4TB */
891 		break;
892 	default:
893 		page_sizes = 0;
894 	}
895 
896 	cfg->pgsize_bitmap &= page_sizes;
897 	cfg->ias = min(cfg->ias, max_addr_bits);
898 	cfg->oas = min(cfg->oas, max_addr_bits);
899 }
900 
901 static struct arm_lpae_io_pgtable *
902 arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
903 {
904 	struct arm_lpae_io_pgtable *data;
905 	int levels, va_bits, pg_shift;
906 
907 	arm_lpae_restrict_pgsizes(cfg);
908 
909 	if (!(cfg->pgsize_bitmap & (SZ_4K | SZ_16K | SZ_64K)))
910 		return NULL;
911 
912 	if (cfg->ias > ARM_LPAE_MAX_ADDR_BITS)
913 		return NULL;
914 
915 	if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS)
916 		return NULL;
917 
918 	data = kmalloc(sizeof(*data), GFP_KERNEL);
919 	if (!data)
920 		return NULL;
921 
922 	pg_shift = __ffs(cfg->pgsize_bitmap);
923 	data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte));
924 
925 	va_bits = cfg->ias - pg_shift;
926 	levels = DIV_ROUND_UP(va_bits, data->bits_per_level);
927 	data->start_level = ARM_LPAE_MAX_LEVELS - levels;
928 
929 	/* Calculate the actual size of our pgd (without concatenation) */
930 	data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1));
931 
932 	data->iop.ops = (struct io_pgtable_ops) {
933 		.map_pages	= arm_lpae_map_pages,
934 		.unmap_pages	= arm_lpae_unmap_pages,
935 		.iova_to_phys	= arm_lpae_iova_to_phys,
936 		.read_and_clear_dirty = arm_lpae_read_and_clear_dirty,
937 	};
938 
939 	return data;
940 }
941 
942 static struct io_pgtable *
943 arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
944 {
945 	u64 reg;
946 	struct arm_lpae_io_pgtable *data;
947 	typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr;
948 	bool tg1;
949 
950 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
951 			    IO_PGTABLE_QUIRK_ARM_TTBR1 |
952 			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |
953 			    IO_PGTABLE_QUIRK_ARM_HD))
954 		return NULL;
955 
956 	data = arm_lpae_alloc_pgtable(cfg);
957 	if (!data)
958 		return NULL;
959 
960 	/* TCR */
961 	if (cfg->coherent_walk) {
962 		tcr->sh = ARM_LPAE_TCR_SH_IS;
963 		tcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
964 		tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
965 		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)
966 			goto out_free_data;
967 	} else {
968 		tcr->sh = ARM_LPAE_TCR_SH_OS;
969 		tcr->irgn = ARM_LPAE_TCR_RGN_NC;
970 		if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
971 			tcr->orgn = ARM_LPAE_TCR_RGN_NC;
972 		else
973 			tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
974 	}
975 
976 	tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1;
977 	switch (ARM_LPAE_GRANULE(data)) {
978 	case SZ_4K:
979 		tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K;
980 		break;
981 	case SZ_16K:
982 		tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K;
983 		break;
984 	case SZ_64K:
985 		tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K;
986 		break;
987 	}
988 
989 	switch (cfg->oas) {
990 	case 32:
991 		tcr->ips = ARM_LPAE_TCR_PS_32_BIT;
992 		break;
993 	case 36:
994 		tcr->ips = ARM_LPAE_TCR_PS_36_BIT;
995 		break;
996 	case 40:
997 		tcr->ips = ARM_LPAE_TCR_PS_40_BIT;
998 		break;
999 	case 42:
1000 		tcr->ips = ARM_LPAE_TCR_PS_42_BIT;
1001 		break;
1002 	case 44:
1003 		tcr->ips = ARM_LPAE_TCR_PS_44_BIT;
1004 		break;
1005 	case 48:
1006 		tcr->ips = ARM_LPAE_TCR_PS_48_BIT;
1007 		break;
1008 	case 52:
1009 		tcr->ips = ARM_LPAE_TCR_PS_52_BIT;
1010 		break;
1011 	default:
1012 		goto out_free_data;
1013 	}
1014 
1015 	tcr->tsz = 64ULL - cfg->ias;
1016 
1017 	/* MAIRs */
1018 	reg = (ARM_LPAE_MAIR_ATTR_NC
1019 	       << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
1020 	      (ARM_LPAE_MAIR_ATTR_WBRWA
1021 	       << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
1022 	      (ARM_LPAE_MAIR_ATTR_DEVICE
1023 	       << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) |
1024 	      (ARM_LPAE_MAIR_ATTR_INC_OWBRWA
1025 	       << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE));
1026 
1027 	cfg->arm_lpae_s1_cfg.mair = reg;
1028 
1029 	/* Looking good; allocate a pgd */
1030 	data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
1031 					   GFP_KERNEL, cfg, cookie);
1032 	if (!data->pgd)
1033 		goto out_free_data;
1034 
1035 	/* Ensure the empty pgd is visible before any actual TTBR write */
1036 	wmb();
1037 
1038 	/* TTBR */
1039 	cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd);
1040 	return &data->iop;
1041 
1042 out_free_data:
1043 	kfree(data);
1044 	return NULL;
1045 }
1046 
1047 static struct io_pgtable *
1048 arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
1049 {
1050 	u64 sl;
1051 	struct arm_lpae_io_pgtable *data;
1052 	typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr;
1053 
1054 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_S2FWB))
1055 		return NULL;
1056 
1057 	data = arm_lpae_alloc_pgtable(cfg);
1058 	if (!data)
1059 		return NULL;
1060 
1061 	/*
1062 	 * Concatenate PGDs at level 1 if possible in order to reduce
1063 	 * the depth of the stage-2 walk.
1064 	 */
1065 	if (data->start_level == 0) {
1066 		unsigned long pgd_pages;
1067 
1068 		pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
1069 		if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) {
1070 			data->pgd_bits += data->bits_per_level;
1071 			data->start_level++;
1072 		}
1073 	}
1074 
1075 	/* VTCR */
1076 	if (cfg->coherent_walk) {
1077 		vtcr->sh = ARM_LPAE_TCR_SH_IS;
1078 		vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
1079 		vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
1080 	} else {
1081 		vtcr->sh = ARM_LPAE_TCR_SH_OS;
1082 		vtcr->irgn = ARM_LPAE_TCR_RGN_NC;
1083 		vtcr->orgn = ARM_LPAE_TCR_RGN_NC;
1084 	}
1085 
1086 	sl = data->start_level;
1087 
1088 	switch (ARM_LPAE_GRANULE(data)) {
1089 	case SZ_4K:
1090 		vtcr->tg = ARM_LPAE_TCR_TG0_4K;
1091 		sl++; /* SL0 format is different for 4K granule size */
1092 		break;
1093 	case SZ_16K:
1094 		vtcr->tg = ARM_LPAE_TCR_TG0_16K;
1095 		break;
1096 	case SZ_64K:
1097 		vtcr->tg = ARM_LPAE_TCR_TG0_64K;
1098 		break;
1099 	}
1100 
1101 	switch (cfg->oas) {
1102 	case 32:
1103 		vtcr->ps = ARM_LPAE_TCR_PS_32_BIT;
1104 		break;
1105 	case 36:
1106 		vtcr->ps = ARM_LPAE_TCR_PS_36_BIT;
1107 		break;
1108 	case 40:
1109 		vtcr->ps = ARM_LPAE_TCR_PS_40_BIT;
1110 		break;
1111 	case 42:
1112 		vtcr->ps = ARM_LPAE_TCR_PS_42_BIT;
1113 		break;
1114 	case 44:
1115 		vtcr->ps = ARM_LPAE_TCR_PS_44_BIT;
1116 		break;
1117 	case 48:
1118 		vtcr->ps = ARM_LPAE_TCR_PS_48_BIT;
1119 		break;
1120 	case 52:
1121 		vtcr->ps = ARM_LPAE_TCR_PS_52_BIT;
1122 		break;
1123 	default:
1124 		goto out_free_data;
1125 	}
1126 
1127 	vtcr->tsz = 64ULL - cfg->ias;
1128 	vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK;
1129 
1130 	/* Allocate pgd pages */
1131 	data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
1132 					   GFP_KERNEL, cfg, cookie);
1133 	if (!data->pgd)
1134 		goto out_free_data;
1135 
1136 	/* Ensure the empty pgd is visible before any actual TTBR write */
1137 	wmb();
1138 
1139 	/* VTTBR */
1140 	cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
1141 	return &data->iop;
1142 
1143 out_free_data:
1144 	kfree(data);
1145 	return NULL;
1146 }
1147 
1148 static struct io_pgtable *
1149 arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
1150 {
1151 	if (cfg->ias > 32 || cfg->oas > 40)
1152 		return NULL;
1153 
1154 	cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1155 	return arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
1156 }
1157 
1158 static struct io_pgtable *
1159 arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
1160 {
1161 	if (cfg->ias > 40 || cfg->oas > 40)
1162 		return NULL;
1163 
1164 	cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1165 	return arm_64_lpae_alloc_pgtable_s2(cfg, cookie);
1166 }
1167 
1168 static struct io_pgtable *
1169 arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
1170 {
1171 	struct arm_lpae_io_pgtable *data;
1172 
1173 	/* No quirks for Mali (hopefully) */
1174 	if (cfg->quirks)
1175 		return NULL;
1176 
1177 	if (cfg->ias > 48 || cfg->oas > 40)
1178 		return NULL;
1179 
1180 	cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1181 
1182 	data = arm_lpae_alloc_pgtable(cfg);
1183 	if (!data)
1184 		return NULL;
1185 
1186 	/* Mali seems to need a full 4-level table regardless of IAS */
1187 	if (data->start_level > 0) {
1188 		data->start_level = 0;
1189 		data->pgd_bits = 0;
1190 	}
1191 	/*
1192 	 * MEMATTR: Mali has no actual notion of a non-cacheable type, so the
1193 	 * best we can do is mimic the out-of-tree driver and hope that the
1194 	 * "implementation-defined caching policy" is good enough. Similarly,
1195 	 * we'll use it for the sake of a valid attribute for our 'device'
1196 	 * index, although callers should never request that in practice.
1197 	 */
1198 	cfg->arm_mali_lpae_cfg.memattr =
1199 		(ARM_MALI_LPAE_MEMATTR_IMP_DEF
1200 		 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
1201 		(ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC
1202 		 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
1203 		(ARM_MALI_LPAE_MEMATTR_IMP_DEF
1204 		 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
1205 
1206 	data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
1207 					   cfg, cookie);
1208 	if (!data->pgd)
1209 		goto out_free_data;
1210 
1211 	/* Ensure the empty pgd is visible before TRANSTAB can be written */
1212 	wmb();
1213 
1214 	cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
1215 					  ARM_MALI_LPAE_TTBR_READ_INNER |
1216 					  ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
1217 	if (cfg->coherent_walk)
1218 		cfg->arm_mali_lpae_cfg.transtab |= ARM_MALI_LPAE_TTBR_SHARE_OUTER;
1219 
1220 	return &data->iop;
1221 
1222 out_free_data:
1223 	kfree(data);
1224 	return NULL;
1225 }
1226 
1227 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
1228 	.caps	= IO_PGTABLE_CAP_CUSTOM_ALLOCATOR,
1229 	.alloc	= arm_64_lpae_alloc_pgtable_s1,
1230 	.free	= arm_lpae_free_pgtable,
1231 };
1232 
1233 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = {
1234 	.caps	= IO_PGTABLE_CAP_CUSTOM_ALLOCATOR,
1235 	.alloc	= arm_64_lpae_alloc_pgtable_s2,
1236 	.free	= arm_lpae_free_pgtable,
1237 };
1238 
1239 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = {
1240 	.caps	= IO_PGTABLE_CAP_CUSTOM_ALLOCATOR,
1241 	.alloc	= arm_32_lpae_alloc_pgtable_s1,
1242 	.free	= arm_lpae_free_pgtable,
1243 };
1244 
1245 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = {
1246 	.caps	= IO_PGTABLE_CAP_CUSTOM_ALLOCATOR,
1247 	.alloc	= arm_32_lpae_alloc_pgtable_s2,
1248 	.free	= arm_lpae_free_pgtable,
1249 };
1250 
1251 struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
1252 	.caps	= IO_PGTABLE_CAP_CUSTOM_ALLOCATOR,
1253 	.alloc	= arm_mali_lpae_alloc_pgtable,
1254 	.free	= arm_lpae_free_pgtable,
1255 };
1256 
1257 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
1258 
1259 static struct io_pgtable_cfg *cfg_cookie __initdata;
1260 
1261 static void __init dummy_tlb_flush_all(void *cookie)
1262 {
1263 	WARN_ON(cookie != cfg_cookie);
1264 }
1265 
1266 static void __init dummy_tlb_flush(unsigned long iova, size_t size,
1267 				   size_t granule, void *cookie)
1268 {
1269 	WARN_ON(cookie != cfg_cookie);
1270 	WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
1271 }
1272 
1273 static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
1274 				      unsigned long iova, size_t granule,
1275 				      void *cookie)
1276 {
1277 	dummy_tlb_flush(iova, granule, granule, cookie);
1278 }
1279 
1280 static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
1281 	.tlb_flush_all	= dummy_tlb_flush_all,
1282 	.tlb_flush_walk	= dummy_tlb_flush,
1283 	.tlb_add_page	= dummy_tlb_add_page,
1284 };
1285 
1286 static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
1287 {
1288 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
1289 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
1290 
1291 	pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n",
1292 		cfg->pgsize_bitmap, cfg->ias);
1293 	pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n",
1294 		ARM_LPAE_MAX_LEVELS - data->start_level, ARM_LPAE_PGD_SIZE(data),
1295 		ilog2(ARM_LPAE_GRANULE(data)), data->bits_per_level, data->pgd);
1296 }
1297 
1298 #define __FAIL(ops, i)	({						\
1299 		WARN(1, "selftest: test failed for fmt idx %d\n", (i));	\
1300 		arm_lpae_dump_ops(ops);					\
1301 		selftest_running = false;				\
1302 		-EFAULT;						\
1303 })
1304 
1305 static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
1306 {
1307 	static const enum io_pgtable_fmt fmts[] __initconst = {
1308 		ARM_64_LPAE_S1,
1309 		ARM_64_LPAE_S2,
1310 	};
1311 
1312 	int i, j;
1313 	unsigned long iova;
1314 	size_t size, mapped;
1315 	struct io_pgtable_ops *ops;
1316 
1317 	selftest_running = true;
1318 
1319 	for (i = 0; i < ARRAY_SIZE(fmts); ++i) {
1320 		cfg_cookie = cfg;
1321 		ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg);
1322 		if (!ops) {
1323 			pr_err("selftest: failed to allocate io pgtable ops\n");
1324 			return -ENOMEM;
1325 		}
1326 
1327 		/*
1328 		 * Initial sanity checks.
1329 		 * Empty page tables shouldn't provide any translations.
1330 		 */
1331 		if (ops->iova_to_phys(ops, 42))
1332 			return __FAIL(ops, i);
1333 
1334 		if (ops->iova_to_phys(ops, SZ_1G + 42))
1335 			return __FAIL(ops, i);
1336 
1337 		if (ops->iova_to_phys(ops, SZ_2G + 42))
1338 			return __FAIL(ops, i);
1339 
1340 		/*
1341 		 * Distinct mappings of different granule sizes.
1342 		 */
1343 		iova = 0;
1344 		for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
1345 			size = 1UL << j;
1346 
1347 			if (ops->map_pages(ops, iova, iova, size, 1,
1348 					   IOMMU_READ | IOMMU_WRITE |
1349 					   IOMMU_NOEXEC | IOMMU_CACHE,
1350 					   GFP_KERNEL, &mapped))
1351 				return __FAIL(ops, i);
1352 
1353 			/* Overlapping mappings */
1354 			if (!ops->map_pages(ops, iova, iova + size, size, 1,
1355 					    IOMMU_READ | IOMMU_NOEXEC,
1356 					    GFP_KERNEL, &mapped))
1357 				return __FAIL(ops, i);
1358 
1359 			if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
1360 				return __FAIL(ops, i);
1361 
1362 			iova += SZ_1G;
1363 		}
1364 
1365 		/* Partial unmap */
1366 		size = 1UL << __ffs(cfg->pgsize_bitmap);
1367 		if (ops->unmap_pages(ops, SZ_1G + size, size, 1, NULL) != size)
1368 			return __FAIL(ops, i);
1369 
1370 		/* Remap of partial unmap */
1371 		if (ops->map_pages(ops, SZ_1G + size, size, size, 1,
1372 				   IOMMU_READ, GFP_KERNEL, &mapped))
1373 			return __FAIL(ops, i);
1374 
1375 		if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42))
1376 			return __FAIL(ops, i);
1377 
1378 		/* Full unmap */
1379 		iova = 0;
1380 		for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
1381 			size = 1UL << j;
1382 
1383 			if (ops->unmap_pages(ops, iova, size, 1, NULL) != size)
1384 				return __FAIL(ops, i);
1385 
1386 			if (ops->iova_to_phys(ops, iova + 42))
1387 				return __FAIL(ops, i);
1388 
1389 			/* Remap full block */
1390 			if (ops->map_pages(ops, iova, iova, size, 1,
1391 					   IOMMU_WRITE, GFP_KERNEL, &mapped))
1392 				return __FAIL(ops, i);
1393 
1394 			if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
1395 				return __FAIL(ops, i);
1396 
1397 			iova += SZ_1G;
1398 		}
1399 
1400 		free_io_pgtable_ops(ops);
1401 	}
1402 
1403 	selftest_running = false;
1404 	return 0;
1405 }
1406 
1407 static int __init arm_lpae_do_selftests(void)
1408 {
1409 	static const unsigned long pgsize[] __initconst = {
1410 		SZ_4K | SZ_2M | SZ_1G,
1411 		SZ_16K | SZ_32M,
1412 		SZ_64K | SZ_512M,
1413 	};
1414 
1415 	static const unsigned int ias[] __initconst = {
1416 		32, 36, 40, 42, 44, 48,
1417 	};
1418 
1419 	int i, j, pass = 0, fail = 0;
1420 	struct device dev;
1421 	struct io_pgtable_cfg cfg = {
1422 		.tlb = &dummy_tlb_ops,
1423 		.oas = 48,
1424 		.coherent_walk = true,
1425 		.iommu_dev = &dev,
1426 	};
1427 
1428 	/* __arm_lpae_alloc_pages() merely needs dev_to_node() to work */
1429 	set_dev_node(&dev, NUMA_NO_NODE);
1430 
1431 	for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
1432 		for (j = 0; j < ARRAY_SIZE(ias); ++j) {
1433 			cfg.pgsize_bitmap = pgsize[i];
1434 			cfg.ias = ias[j];
1435 			pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n",
1436 				pgsize[i], ias[j]);
1437 			if (arm_lpae_run_tests(&cfg))
1438 				fail++;
1439 			else
1440 				pass++;
1441 		}
1442 	}
1443 
1444 	pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail);
1445 	return fail ? -EFAULT : 0;
1446 }
1447 subsys_initcall(arm_lpae_do_selftests);
1448 #endif
1449