1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * CPU-agnostic ARM page table allocator.
4 *
5 * Copyright (C) 2014 ARM Limited
6 *
7 * Author: Will Deacon <will.deacon@arm.com>
8 */
9
10 #define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt
11
12 #include <linux/atomic.h>
13 #include <linux/bitops.h>
14 #include <linux/io-pgtable.h>
15 #include <linux/kernel.h>
16 #include <linux/sizes.h>
17 #include <linux/slab.h>
18 #include <linux/types.h>
19 #include <linux/dma-mapping.h>
20
21 #include <asm/barrier.h>
22
23 #include "io-pgtable-arm.h"
24 #include "iommu-pages.h"
25
26 #define ARM_LPAE_MAX_ADDR_BITS 52
27 #define ARM_LPAE_S2_MAX_CONCAT_PAGES 16
28 #define ARM_LPAE_MAX_LEVELS 4
29
30 /* Struct accessors */
31 #define io_pgtable_to_data(x) \
32 container_of((x), struct arm_lpae_io_pgtable, iop)
33
34 #define io_pgtable_ops_to_data(x) \
35 io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
36
37 /*
38 * Calculate the right shift amount to get to the portion describing level l
39 * in a virtual address mapped by the pagetable in d.
40 */
41 #define ARM_LPAE_LVL_SHIFT(l,d) \
42 (((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) + \
43 ilog2(sizeof(arm_lpae_iopte)))
44
45 #define ARM_LPAE_GRANULE(d) \
46 (sizeof(arm_lpae_iopte) << (d)->bits_per_level)
47 #define ARM_LPAE_PGD_SIZE(d) \
48 (sizeof(arm_lpae_iopte) << (d)->pgd_bits)
49
50 #define ARM_LPAE_PTES_PER_TABLE(d) \
51 (ARM_LPAE_GRANULE(d) >> ilog2(sizeof(arm_lpae_iopte)))
52
53 /*
54 * Calculate the index at level l used to map virtual address a using the
55 * pagetable in d.
56 */
57 #define ARM_LPAE_PGD_IDX(l,d) \
58 ((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0)
59
60 #define ARM_LPAE_LVL_IDX(a,l,d) \
61 (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \
62 ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1))
63
64 /* Calculate the block/page mapping size at level l for pagetable in d. */
65 #define ARM_LPAE_BLOCK_SIZE(l,d) (1ULL << ARM_LPAE_LVL_SHIFT(l,d))
66
67 /* Page table bits */
68 #define ARM_LPAE_PTE_TYPE_SHIFT 0
69 #define ARM_LPAE_PTE_TYPE_MASK 0x3
70
71 #define ARM_LPAE_PTE_TYPE_BLOCK 1
72 #define ARM_LPAE_PTE_TYPE_TABLE 3
73 #define ARM_LPAE_PTE_TYPE_PAGE 3
74
75 #define ARM_LPAE_PTE_ADDR_MASK GENMASK_ULL(47,12)
76
77 #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63)
78 #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53)
79 #define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51)
80 #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10)
81 #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8)
82 #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8)
83 #define ARM_LPAE_PTE_SH_IS (((arm_lpae_iopte)3) << 8)
84 #define ARM_LPAE_PTE_NS (((arm_lpae_iopte)1) << 5)
85 #define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0)
86
87 #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2)
88 /* Ignore the contiguous bit for block splitting */
89 #define ARM_LPAE_PTE_ATTR_HI_MASK (ARM_LPAE_PTE_XN | ARM_LPAE_PTE_DBM)
90 #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \
91 ARM_LPAE_PTE_ATTR_HI_MASK)
92 /* Software bit for solving coherency races */
93 #define ARM_LPAE_PTE_SW_SYNC (((arm_lpae_iopte)1) << 55)
94
95 /* Stage-1 PTE */
96 #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6)
97 #define ARM_LPAE_PTE_AP_RDONLY_BIT 7
98 #define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)1) << \
99 ARM_LPAE_PTE_AP_RDONLY_BIT)
100 #define ARM_LPAE_PTE_AP_WR_CLEAN_MASK (ARM_LPAE_PTE_AP_RDONLY | \
101 ARM_LPAE_PTE_DBM)
102 #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2
103 #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11)
104
105 /* Stage-2 PTE */
106 #define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6)
107 #define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6)
108 #define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6)
109 #define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2)
110 #define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2)
111 #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2)
112
113 /* Register bits */
114 #define ARM_LPAE_VTCR_SL0_MASK 0x3
115
116 #define ARM_LPAE_TCR_T0SZ_SHIFT 0
117
118 #define ARM_LPAE_VTCR_PS_SHIFT 16
119 #define ARM_LPAE_VTCR_PS_MASK 0x7
120
121 #define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3)
122 #define ARM_LPAE_MAIR_ATTR_MASK 0xff
123 #define ARM_LPAE_MAIR_ATTR_DEVICE 0x04
124 #define ARM_LPAE_MAIR_ATTR_NC 0x44
125 #define ARM_LPAE_MAIR_ATTR_INC_OWBRWA 0xf4
126 #define ARM_LPAE_MAIR_ATTR_WBRWA 0xff
127 #define ARM_LPAE_MAIR_ATTR_IDX_NC 0
128 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1
129 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2
130 #define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE 3
131
132 #define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0)
133 #define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2)
134 #define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4)
135
136 #define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL
137 #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
138
139 /* IOPTE accessors */
140 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
141
142 #define iopte_type(pte) \
143 (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
144
145 #define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK)
146
147 #define iopte_writeable_dirty(pte) \
148 (((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM)
149
150 #define iopte_set_writeable_clean(ptep) \
151 set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)(ptep))
152
153 struct arm_lpae_io_pgtable {
154 struct io_pgtable iop;
155
156 int pgd_bits;
157 int start_level;
158 int bits_per_level;
159
160 void *pgd;
161 };
162
163 typedef u64 arm_lpae_iopte;
164
iopte_leaf(arm_lpae_iopte pte,int lvl,enum io_pgtable_fmt fmt)165 static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl,
166 enum io_pgtable_fmt fmt)
167 {
168 if (lvl == (ARM_LPAE_MAX_LEVELS - 1) && fmt != ARM_MALI_LPAE)
169 return iopte_type(pte) == ARM_LPAE_PTE_TYPE_PAGE;
170
171 return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK;
172 }
173
iopte_table(arm_lpae_iopte pte,int lvl)174 static inline bool iopte_table(arm_lpae_iopte pte, int lvl)
175 {
176 if (lvl == (ARM_LPAE_MAX_LEVELS - 1))
177 return false;
178 return iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE;
179 }
180
paddr_to_iopte(phys_addr_t paddr,struct arm_lpae_io_pgtable * data)181 static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
182 struct arm_lpae_io_pgtable *data)
183 {
184 arm_lpae_iopte pte = paddr;
185
186 /* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
187 return (pte | (pte >> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK;
188 }
189
iopte_to_paddr(arm_lpae_iopte pte,struct arm_lpae_io_pgtable * data)190 static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
191 struct arm_lpae_io_pgtable *data)
192 {
193 u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
194
195 if (ARM_LPAE_GRANULE(data) < SZ_64K)
196 return paddr;
197
198 /* Rotate the packed high-order bits back to the top */
199 return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
200 }
201
202 static bool selftest_running = false;
203
__arm_lpae_dma_addr(void * pages)204 static dma_addr_t __arm_lpae_dma_addr(void *pages)
205 {
206 return (dma_addr_t)virt_to_phys(pages);
207 }
208
__arm_lpae_alloc_pages(size_t size,gfp_t gfp,struct io_pgtable_cfg * cfg,void * cookie)209 static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
210 struct io_pgtable_cfg *cfg,
211 void *cookie)
212 {
213 struct device *dev = cfg->iommu_dev;
214 int order = get_order(size);
215 dma_addr_t dma;
216 void *pages;
217
218 VM_BUG_ON((gfp & __GFP_HIGHMEM));
219
220 if (cfg->alloc)
221 pages = cfg->alloc(cookie, size, gfp);
222 else
223 pages = iommu_alloc_pages_node(dev_to_node(dev), gfp, order);
224
225 if (!pages)
226 return NULL;
227
228 if (!cfg->coherent_walk) {
229 dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
230 if (dma_mapping_error(dev, dma))
231 goto out_free;
232 /*
233 * We depend on the IOMMU being able to work with any physical
234 * address directly, so if the DMA layer suggests otherwise by
235 * translating or truncating them, that bodes very badly...
236 */
237 if (dma != virt_to_phys(pages))
238 goto out_unmap;
239 }
240
241 return pages;
242
243 out_unmap:
244 dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
245 dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
246
247 out_free:
248 if (cfg->free)
249 cfg->free(cookie, pages, size);
250 else
251 iommu_free_pages(pages, order);
252
253 return NULL;
254 }
255
__arm_lpae_free_pages(void * pages,size_t size,struct io_pgtable_cfg * cfg,void * cookie)256 static void __arm_lpae_free_pages(void *pages, size_t size,
257 struct io_pgtable_cfg *cfg,
258 void *cookie)
259 {
260 if (!cfg->coherent_walk)
261 dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
262 size, DMA_TO_DEVICE);
263
264 if (cfg->free)
265 cfg->free(cookie, pages, size);
266 else
267 iommu_free_pages(pages, get_order(size));
268 }
269
__arm_lpae_sync_pte(arm_lpae_iopte * ptep,int num_entries,struct io_pgtable_cfg * cfg)270 static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
271 struct io_pgtable_cfg *cfg)
272 {
273 dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
274 sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
275 }
276
__arm_lpae_clear_pte(arm_lpae_iopte * ptep,struct io_pgtable_cfg * cfg,int num_entries)277 static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries)
278 {
279 for (int i = 0; i < num_entries; i++)
280 ptep[i] = 0;
281
282 if (!cfg->coherent_walk && num_entries)
283 __arm_lpae_sync_pte(ptep, num_entries, cfg);
284 }
285
286 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
287 struct iommu_iotlb_gather *gather,
288 unsigned long iova, size_t size, size_t pgcount,
289 int lvl, arm_lpae_iopte *ptep);
290
__arm_lpae_init_pte(struct arm_lpae_io_pgtable * data,phys_addr_t paddr,arm_lpae_iopte prot,int lvl,int num_entries,arm_lpae_iopte * ptep)291 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
292 phys_addr_t paddr, arm_lpae_iopte prot,
293 int lvl, int num_entries, arm_lpae_iopte *ptep)
294 {
295 arm_lpae_iopte pte = prot;
296 struct io_pgtable_cfg *cfg = &data->iop.cfg;
297 size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
298 int i;
299
300 if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
301 pte |= ARM_LPAE_PTE_TYPE_PAGE;
302 else
303 pte |= ARM_LPAE_PTE_TYPE_BLOCK;
304
305 for (i = 0; i < num_entries; i++)
306 ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data);
307
308 if (!cfg->coherent_walk)
309 __arm_lpae_sync_pte(ptep, num_entries, cfg);
310 }
311
arm_lpae_init_pte(struct arm_lpae_io_pgtable * data,unsigned long iova,phys_addr_t paddr,arm_lpae_iopte prot,int lvl,int num_entries,arm_lpae_iopte * ptep)312 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
313 unsigned long iova, phys_addr_t paddr,
314 arm_lpae_iopte prot, int lvl, int num_entries,
315 arm_lpae_iopte *ptep)
316 {
317 int i;
318
319 for (i = 0; i < num_entries; i++)
320 if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) {
321 /* We require an unmap first */
322 WARN_ON(!selftest_running);
323 return -EEXIST;
324 } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) {
325 /*
326 * We need to unmap and free the old table before
327 * overwriting it with a block entry.
328 */
329 arm_lpae_iopte *tblp;
330 size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
331
332 tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
333 if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1,
334 lvl, tblp) != sz) {
335 WARN_ON(1);
336 return -EINVAL;
337 }
338 }
339
340 __arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep);
341 return 0;
342 }
343
arm_lpae_install_table(arm_lpae_iopte * table,arm_lpae_iopte * ptep,arm_lpae_iopte curr,struct arm_lpae_io_pgtable * data)344 static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
345 arm_lpae_iopte *ptep,
346 arm_lpae_iopte curr,
347 struct arm_lpae_io_pgtable *data)
348 {
349 arm_lpae_iopte old, new;
350 struct io_pgtable_cfg *cfg = &data->iop.cfg;
351
352 new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE;
353 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
354 new |= ARM_LPAE_PTE_NSTABLE;
355
356 /*
357 * Ensure the table itself is visible before its PTE can be.
358 * Whilst we could get away with cmpxchg64_release below, this
359 * doesn't have any ordering semantics when !CONFIG_SMP.
360 */
361 dma_wmb();
362
363 old = cmpxchg64_relaxed(ptep, curr, new);
364
365 if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC))
366 return old;
367
368 /* Even if it's not ours, there's no point waiting; just kick it */
369 __arm_lpae_sync_pte(ptep, 1, cfg);
370 if (old == curr)
371 WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC);
372
373 return old;
374 }
375
__arm_lpae_map(struct arm_lpae_io_pgtable * data,unsigned long iova,phys_addr_t paddr,size_t size,size_t pgcount,arm_lpae_iopte prot,int lvl,arm_lpae_iopte * ptep,gfp_t gfp,size_t * mapped)376 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
377 phys_addr_t paddr, size_t size, size_t pgcount,
378 arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
379 gfp_t gfp, size_t *mapped)
380 {
381 arm_lpae_iopte *cptep, pte;
382 size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
383 size_t tblsz = ARM_LPAE_GRANULE(data);
384 struct io_pgtable_cfg *cfg = &data->iop.cfg;
385 int ret = 0, num_entries, max_entries, map_idx_start;
386
387 /* Find our entry at the current level */
388 map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
389 ptep += map_idx_start;
390
391 /* If we can install a leaf entry at this level, then do so */
392 if (size == block_size) {
393 max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
394 num_entries = min_t(int, pgcount, max_entries);
395 ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
396 if (!ret)
397 *mapped += num_entries * size;
398
399 return ret;
400 }
401
402 /* We can't allocate tables at the final level */
403 if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
404 return -EINVAL;
405
406 /* Grab a pointer to the next level */
407 pte = READ_ONCE(*ptep);
408 if (!pte) {
409 cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg, data->iop.cookie);
410 if (!cptep)
411 return -ENOMEM;
412
413 pte = arm_lpae_install_table(cptep, ptep, 0, data);
414 if (pte)
415 __arm_lpae_free_pages(cptep, tblsz, cfg, data->iop.cookie);
416 } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
417 __arm_lpae_sync_pte(ptep, 1, cfg);
418 }
419
420 if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) {
421 cptep = iopte_deref(pte, data);
422 } else if (pte) {
423 /* We require an unmap first */
424 WARN_ON(!selftest_running);
425 return -EEXIST;
426 }
427
428 /* Rinse, repeat */
429 return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1,
430 cptep, gfp, mapped);
431 }
432
arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable * data,int prot)433 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
434 int prot)
435 {
436 arm_lpae_iopte pte;
437
438 if (data->iop.fmt == ARM_64_LPAE_S1 ||
439 data->iop.fmt == ARM_32_LPAE_S1) {
440 pte = ARM_LPAE_PTE_nG;
441 if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
442 pte |= ARM_LPAE_PTE_AP_RDONLY;
443 else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD)
444 pte |= ARM_LPAE_PTE_DBM;
445 if (!(prot & IOMMU_PRIV))
446 pte |= ARM_LPAE_PTE_AP_UNPRIV;
447 } else {
448 pte = ARM_LPAE_PTE_HAP_FAULT;
449 if (prot & IOMMU_READ)
450 pte |= ARM_LPAE_PTE_HAP_READ;
451 if (prot & IOMMU_WRITE)
452 pte |= ARM_LPAE_PTE_HAP_WRITE;
453 }
454
455 /*
456 * Note that this logic is structured to accommodate Mali LPAE
457 * having stage-1-like attributes but stage-2-like permissions.
458 */
459 if (data->iop.fmt == ARM_64_LPAE_S2 ||
460 data->iop.fmt == ARM_32_LPAE_S2) {
461 if (prot & IOMMU_MMIO)
462 pte |= ARM_LPAE_PTE_MEMATTR_DEV;
463 else if (prot & IOMMU_CACHE)
464 pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
465 else
466 pte |= ARM_LPAE_PTE_MEMATTR_NC;
467 } else {
468 if (prot & IOMMU_MMIO)
469 pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
470 << ARM_LPAE_PTE_ATTRINDX_SHIFT);
471 else if (prot & IOMMU_CACHE)
472 pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
473 << ARM_LPAE_PTE_ATTRINDX_SHIFT);
474 }
475
476 /*
477 * Also Mali has its own notions of shareability wherein its Inner
478 * domain covers the cores within the GPU, and its Outer domain is
479 * "outside the GPU" (i.e. either the Inner or System domain in CPU
480 * terms, depending on coherency).
481 */
482 if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE)
483 pte |= ARM_LPAE_PTE_SH_IS;
484 else
485 pte |= ARM_LPAE_PTE_SH_OS;
486
487 if (prot & IOMMU_NOEXEC)
488 pte |= ARM_LPAE_PTE_XN;
489
490 if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
491 pte |= ARM_LPAE_PTE_NS;
492
493 if (data->iop.fmt != ARM_MALI_LPAE)
494 pte |= ARM_LPAE_PTE_AF;
495
496 return pte;
497 }
498
arm_lpae_map_pages(struct io_pgtable_ops * ops,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int iommu_prot,gfp_t gfp,size_t * mapped)499 static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
500 phys_addr_t paddr, size_t pgsize, size_t pgcount,
501 int iommu_prot, gfp_t gfp, size_t *mapped)
502 {
503 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
504 struct io_pgtable_cfg *cfg = &data->iop.cfg;
505 arm_lpae_iopte *ptep = data->pgd;
506 int ret, lvl = data->start_level;
507 arm_lpae_iopte prot;
508 long iaext = (s64)iova >> cfg->ias;
509
510 if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
511 return -EINVAL;
512
513 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
514 iaext = ~iaext;
515 if (WARN_ON(iaext || paddr >> cfg->oas))
516 return -ERANGE;
517
518 if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
519 return -EINVAL;
520
521 prot = arm_lpae_prot_to_pte(data, iommu_prot);
522 ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl,
523 ptep, gfp, mapped);
524 /*
525 * Synchronise all PTE updates for the new mapping before there's
526 * a chance for anything to kick off a table walk for the new iova.
527 */
528 wmb();
529
530 return ret;
531 }
532
__arm_lpae_free_pgtable(struct arm_lpae_io_pgtable * data,int lvl,arm_lpae_iopte * ptep)533 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
534 arm_lpae_iopte *ptep)
535 {
536 arm_lpae_iopte *start, *end;
537 unsigned long table_size;
538
539 if (lvl == data->start_level)
540 table_size = ARM_LPAE_PGD_SIZE(data);
541 else
542 table_size = ARM_LPAE_GRANULE(data);
543
544 start = ptep;
545
546 /* Only leaf entries at the last level */
547 if (lvl == ARM_LPAE_MAX_LEVELS - 1)
548 end = ptep;
549 else
550 end = (void *)ptep + table_size;
551
552 while (ptep != end) {
553 arm_lpae_iopte pte = *ptep++;
554
555 if (!pte || iopte_leaf(pte, lvl, data->iop.fmt))
556 continue;
557
558 __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
559 }
560
561 __arm_lpae_free_pages(start, table_size, &data->iop.cfg, data->iop.cookie);
562 }
563
arm_lpae_free_pgtable(struct io_pgtable * iop)564 static void arm_lpae_free_pgtable(struct io_pgtable *iop)
565 {
566 struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop);
567
568 __arm_lpae_free_pgtable(data, data->start_level, data->pgd);
569 kfree(data);
570 }
571
arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable * data,struct iommu_iotlb_gather * gather,unsigned long iova,size_t size,arm_lpae_iopte blk_pte,int lvl,arm_lpae_iopte * ptep,size_t pgcount)572 static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
573 struct iommu_iotlb_gather *gather,
574 unsigned long iova, size_t size,
575 arm_lpae_iopte blk_pte, int lvl,
576 arm_lpae_iopte *ptep, size_t pgcount)
577 {
578 struct io_pgtable_cfg *cfg = &data->iop.cfg;
579 arm_lpae_iopte pte, *tablep;
580 phys_addr_t blk_paddr;
581 size_t tablesz = ARM_LPAE_GRANULE(data);
582 size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
583 int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
584 int i, unmap_idx_start = -1, num_entries = 0, max_entries;
585
586 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
587 return 0;
588
589 tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg, data->iop.cookie);
590 if (!tablep)
591 return 0; /* Bytes unmapped */
592
593 if (size == split_sz) {
594 unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
595 max_entries = ptes_per_table - unmap_idx_start;
596 num_entries = min_t(int, pgcount, max_entries);
597 }
598
599 blk_paddr = iopte_to_paddr(blk_pte, data);
600 pte = iopte_prot(blk_pte);
601
602 for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) {
603 /* Unmap! */
604 if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries))
605 continue;
606
607 __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
608 }
609
610 pte = arm_lpae_install_table(tablep, ptep, blk_pte, data);
611 if (pte != blk_pte) {
612 __arm_lpae_free_pages(tablep, tablesz, cfg, data->iop.cookie);
613 /*
614 * We may race against someone unmapping another part of this
615 * block, but anything else is invalid. We can't misinterpret
616 * a page entry here since we're never at the last level.
617 */
618 if (iopte_type(pte) != ARM_LPAE_PTE_TYPE_TABLE)
619 return 0;
620
621 tablep = iopte_deref(pte, data);
622 } else if (unmap_idx_start >= 0) {
623 for (i = 0; i < num_entries; i++)
624 io_pgtable_tlb_add_page(&data->iop, gather, iova + i * size, size);
625
626 return num_entries * size;
627 }
628
629 return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
630 }
631
__arm_lpae_unmap(struct arm_lpae_io_pgtable * data,struct iommu_iotlb_gather * gather,unsigned long iova,size_t size,size_t pgcount,int lvl,arm_lpae_iopte * ptep)632 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
633 struct iommu_iotlb_gather *gather,
634 unsigned long iova, size_t size, size_t pgcount,
635 int lvl, arm_lpae_iopte *ptep)
636 {
637 arm_lpae_iopte pte;
638 struct io_pgtable *iop = &data->iop;
639 int i = 0, num_entries, max_entries, unmap_idx_start;
640
641 /* Something went horribly wrong and we ran out of page table */
642 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
643 return 0;
644
645 unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
646 ptep += unmap_idx_start;
647 pte = READ_ONCE(*ptep);
648 if (WARN_ON(!pte))
649 return 0;
650
651 /* If the size matches this level, we're in the right place */
652 if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
653 max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
654 num_entries = min_t(int, pgcount, max_entries);
655
656 /* Find and handle non-leaf entries */
657 for (i = 0; i < num_entries; i++) {
658 pte = READ_ONCE(ptep[i]);
659 if (WARN_ON(!pte))
660 break;
661
662 if (!iopte_leaf(pte, lvl, iop->fmt)) {
663 __arm_lpae_clear_pte(&ptep[i], &iop->cfg, 1);
664
665 /* Also flush any partial walks */
666 io_pgtable_tlb_flush_walk(iop, iova + i * size, size,
667 ARM_LPAE_GRANULE(data));
668 __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
669 }
670 }
671
672 /* Clear the remaining entries */
673 __arm_lpae_clear_pte(ptep, &iop->cfg, i);
674
675 if (gather && !iommu_iotlb_gather_queued(gather))
676 for (int j = 0; j < i; j++)
677 io_pgtable_tlb_add_page(iop, gather, iova + j * size, size);
678
679 return i * size;
680 } else if (iopte_leaf(pte, lvl, iop->fmt)) {
681 /*
682 * Insert a table at the next level to map the old region,
683 * minus the part we want to unmap
684 */
685 return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
686 lvl + 1, ptep, pgcount);
687 }
688
689 /* Keep on walkin' */
690 ptep = iopte_deref(pte, data);
691 return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl + 1, ptep);
692 }
693
arm_lpae_unmap_pages(struct io_pgtable_ops * ops,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)694 static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
695 size_t pgsize, size_t pgcount,
696 struct iommu_iotlb_gather *gather)
697 {
698 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
699 struct io_pgtable_cfg *cfg = &data->iop.cfg;
700 arm_lpae_iopte *ptep = data->pgd;
701 long iaext = (s64)iova >> cfg->ias;
702
703 if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
704 return 0;
705
706 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
707 iaext = ~iaext;
708 if (WARN_ON(iaext))
709 return 0;
710
711 return __arm_lpae_unmap(data, gather, iova, pgsize, pgcount,
712 data->start_level, ptep);
713 }
714
arm_lpae_iova_to_phys(struct io_pgtable_ops * ops,unsigned long iova)715 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
716 unsigned long iova)
717 {
718 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
719 arm_lpae_iopte pte, *ptep = data->pgd;
720 int lvl = data->start_level;
721
722 do {
723 /* Valid IOPTE pointer? */
724 if (!ptep)
725 return 0;
726
727 /* Grab the IOPTE we're interested in */
728 ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
729 pte = READ_ONCE(*ptep);
730
731 /* Valid entry? */
732 if (!pte)
733 return 0;
734
735 /* Leaf entry? */
736 if (iopte_leaf(pte, lvl, data->iop.fmt))
737 goto found_translation;
738
739 /* Take it to the next level */
740 ptep = iopte_deref(pte, data);
741 } while (++lvl < ARM_LPAE_MAX_LEVELS);
742
743 /* Ran out of page tables to walk */
744 return 0;
745
746 found_translation:
747 iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
748 return iopte_to_paddr(pte, data) | iova;
749 }
750
751 struct io_pgtable_walk_data {
752 struct iommu_dirty_bitmap *dirty;
753 unsigned long flags;
754 u64 addr;
755 const u64 end;
756 };
757
758 static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
759 struct io_pgtable_walk_data *walk_data,
760 arm_lpae_iopte *ptep,
761 int lvl);
762
io_pgtable_visit_dirty(struct arm_lpae_io_pgtable * data,struct io_pgtable_walk_data * walk_data,arm_lpae_iopte * ptep,int lvl)763 static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data,
764 struct io_pgtable_walk_data *walk_data,
765 arm_lpae_iopte *ptep, int lvl)
766 {
767 struct io_pgtable *iop = &data->iop;
768 arm_lpae_iopte pte = READ_ONCE(*ptep);
769
770 if (iopte_leaf(pte, lvl, iop->fmt)) {
771 size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data);
772
773 if (iopte_writeable_dirty(pte)) {
774 iommu_dirty_bitmap_record(walk_data->dirty,
775 walk_data->addr, size);
776 if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR))
777 iopte_set_writeable_clean(ptep);
778 }
779 walk_data->addr += size;
780 return 0;
781 }
782
783 if (WARN_ON(!iopte_table(pte, lvl)))
784 return -EINVAL;
785
786 ptep = iopte_deref(pte, data);
787 return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1);
788 }
789
__arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable * data,struct io_pgtable_walk_data * walk_data,arm_lpae_iopte * ptep,int lvl)790 static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
791 struct io_pgtable_walk_data *walk_data,
792 arm_lpae_iopte *ptep,
793 int lvl)
794 {
795 u32 idx;
796 int max_entries, ret;
797
798 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
799 return -EINVAL;
800
801 if (lvl == data->start_level)
802 max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
803 else
804 max_entries = ARM_LPAE_PTES_PER_TABLE(data);
805
806 for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data);
807 (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) {
808 ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl);
809 if (ret)
810 return ret;
811 }
812
813 return 0;
814 }
815
arm_lpae_read_and_clear_dirty(struct io_pgtable_ops * ops,unsigned long iova,size_t size,unsigned long flags,struct iommu_dirty_bitmap * dirty)816 static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops,
817 unsigned long iova, size_t size,
818 unsigned long flags,
819 struct iommu_dirty_bitmap *dirty)
820 {
821 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
822 struct io_pgtable_cfg *cfg = &data->iop.cfg;
823 struct io_pgtable_walk_data walk_data = {
824 .dirty = dirty,
825 .flags = flags,
826 .addr = iova,
827 .end = iova + size,
828 };
829 arm_lpae_iopte *ptep = data->pgd;
830 int lvl = data->start_level;
831
832 if (WARN_ON(!size))
833 return -EINVAL;
834 if (WARN_ON((iova + size - 1) & ~(BIT(cfg->ias) - 1)))
835 return -EINVAL;
836 if (data->iop.fmt != ARM_64_LPAE_S1)
837 return -EINVAL;
838
839 return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl);
840 }
841
arm_lpae_restrict_pgsizes(struct io_pgtable_cfg * cfg)842 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
843 {
844 unsigned long granule, page_sizes;
845 unsigned int max_addr_bits = 48;
846
847 /*
848 * We need to restrict the supported page sizes to match the
849 * translation regime for a particular granule. Aim to match
850 * the CPU page size if possible, otherwise prefer smaller sizes.
851 * While we're at it, restrict the block sizes to match the
852 * chosen granule.
853 */
854 if (cfg->pgsize_bitmap & PAGE_SIZE)
855 granule = PAGE_SIZE;
856 else if (cfg->pgsize_bitmap & ~PAGE_MASK)
857 granule = 1UL << __fls(cfg->pgsize_bitmap & ~PAGE_MASK);
858 else if (cfg->pgsize_bitmap & PAGE_MASK)
859 granule = 1UL << __ffs(cfg->pgsize_bitmap & PAGE_MASK);
860 else
861 granule = 0;
862
863 switch (granule) {
864 case SZ_4K:
865 page_sizes = (SZ_4K | SZ_2M | SZ_1G);
866 break;
867 case SZ_16K:
868 page_sizes = (SZ_16K | SZ_32M);
869 break;
870 case SZ_64K:
871 max_addr_bits = 52;
872 page_sizes = (SZ_64K | SZ_512M);
873 if (cfg->oas > 48)
874 page_sizes |= 1ULL << 42; /* 4TB */
875 break;
876 default:
877 page_sizes = 0;
878 }
879
880 cfg->pgsize_bitmap &= page_sizes;
881 cfg->ias = min(cfg->ias, max_addr_bits);
882 cfg->oas = min(cfg->oas, max_addr_bits);
883 }
884
885 static struct arm_lpae_io_pgtable *
arm_lpae_alloc_pgtable(struct io_pgtable_cfg * cfg)886 arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
887 {
888 struct arm_lpae_io_pgtable *data;
889 int levels, va_bits, pg_shift;
890
891 arm_lpae_restrict_pgsizes(cfg);
892
893 if (!(cfg->pgsize_bitmap & (SZ_4K | SZ_16K | SZ_64K)))
894 return NULL;
895
896 if (cfg->ias > ARM_LPAE_MAX_ADDR_BITS)
897 return NULL;
898
899 if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS)
900 return NULL;
901
902 data = kmalloc(sizeof(*data), GFP_KERNEL);
903 if (!data)
904 return NULL;
905
906 pg_shift = __ffs(cfg->pgsize_bitmap);
907 data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte));
908
909 va_bits = cfg->ias - pg_shift;
910 levels = DIV_ROUND_UP(va_bits, data->bits_per_level);
911 data->start_level = ARM_LPAE_MAX_LEVELS - levels;
912
913 /* Calculate the actual size of our pgd (without concatenation) */
914 data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1));
915
916 data->iop.ops = (struct io_pgtable_ops) {
917 .map_pages = arm_lpae_map_pages,
918 .unmap_pages = arm_lpae_unmap_pages,
919 .iova_to_phys = arm_lpae_iova_to_phys,
920 .read_and_clear_dirty = arm_lpae_read_and_clear_dirty,
921 };
922
923 return data;
924 }
925
926 static struct io_pgtable *
arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg * cfg,void * cookie)927 arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
928 {
929 u64 reg;
930 struct arm_lpae_io_pgtable *data;
931 typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr;
932 bool tg1;
933
934 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
935 IO_PGTABLE_QUIRK_ARM_TTBR1 |
936 IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |
937 IO_PGTABLE_QUIRK_ARM_HD))
938 return NULL;
939
940 data = arm_lpae_alloc_pgtable(cfg);
941 if (!data)
942 return NULL;
943
944 /* TCR */
945 if (cfg->coherent_walk) {
946 tcr->sh = ARM_LPAE_TCR_SH_IS;
947 tcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
948 tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
949 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)
950 goto out_free_data;
951 } else {
952 tcr->sh = ARM_LPAE_TCR_SH_OS;
953 tcr->irgn = ARM_LPAE_TCR_RGN_NC;
954 if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
955 tcr->orgn = ARM_LPAE_TCR_RGN_NC;
956 else
957 tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
958 }
959
960 tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1;
961 switch (ARM_LPAE_GRANULE(data)) {
962 case SZ_4K:
963 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K;
964 break;
965 case SZ_16K:
966 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K;
967 break;
968 case SZ_64K:
969 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K;
970 break;
971 }
972
973 switch (cfg->oas) {
974 case 32:
975 tcr->ips = ARM_LPAE_TCR_PS_32_BIT;
976 break;
977 case 36:
978 tcr->ips = ARM_LPAE_TCR_PS_36_BIT;
979 break;
980 case 40:
981 tcr->ips = ARM_LPAE_TCR_PS_40_BIT;
982 break;
983 case 42:
984 tcr->ips = ARM_LPAE_TCR_PS_42_BIT;
985 break;
986 case 44:
987 tcr->ips = ARM_LPAE_TCR_PS_44_BIT;
988 break;
989 case 48:
990 tcr->ips = ARM_LPAE_TCR_PS_48_BIT;
991 break;
992 case 52:
993 tcr->ips = ARM_LPAE_TCR_PS_52_BIT;
994 break;
995 default:
996 goto out_free_data;
997 }
998
999 tcr->tsz = 64ULL - cfg->ias;
1000
1001 /* MAIRs */
1002 reg = (ARM_LPAE_MAIR_ATTR_NC
1003 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
1004 (ARM_LPAE_MAIR_ATTR_WBRWA
1005 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
1006 (ARM_LPAE_MAIR_ATTR_DEVICE
1007 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) |
1008 (ARM_LPAE_MAIR_ATTR_INC_OWBRWA
1009 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE));
1010
1011 cfg->arm_lpae_s1_cfg.mair = reg;
1012
1013 /* Looking good; allocate a pgd */
1014 data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
1015 GFP_KERNEL, cfg, cookie);
1016 if (!data->pgd)
1017 goto out_free_data;
1018
1019 /* Ensure the empty pgd is visible before any actual TTBR write */
1020 wmb();
1021
1022 /* TTBR */
1023 cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd);
1024 return &data->iop;
1025
1026 out_free_data:
1027 kfree(data);
1028 return NULL;
1029 }
1030
1031 static struct io_pgtable *
arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg * cfg,void * cookie)1032 arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
1033 {
1034 u64 sl;
1035 struct arm_lpae_io_pgtable *data;
1036 typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr;
1037
1038 /* The NS quirk doesn't apply at stage 2 */
1039 if (cfg->quirks)
1040 return NULL;
1041
1042 data = arm_lpae_alloc_pgtable(cfg);
1043 if (!data)
1044 return NULL;
1045
1046 /*
1047 * Concatenate PGDs at level 1 if possible in order to reduce
1048 * the depth of the stage-2 walk.
1049 */
1050 if (data->start_level == 0) {
1051 unsigned long pgd_pages;
1052
1053 pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
1054 if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) {
1055 data->pgd_bits += data->bits_per_level;
1056 data->start_level++;
1057 }
1058 }
1059
1060 /* VTCR */
1061 if (cfg->coherent_walk) {
1062 vtcr->sh = ARM_LPAE_TCR_SH_IS;
1063 vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
1064 vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
1065 } else {
1066 vtcr->sh = ARM_LPAE_TCR_SH_OS;
1067 vtcr->irgn = ARM_LPAE_TCR_RGN_NC;
1068 vtcr->orgn = ARM_LPAE_TCR_RGN_NC;
1069 }
1070
1071 sl = data->start_level;
1072
1073 switch (ARM_LPAE_GRANULE(data)) {
1074 case SZ_4K:
1075 vtcr->tg = ARM_LPAE_TCR_TG0_4K;
1076 sl++; /* SL0 format is different for 4K granule size */
1077 break;
1078 case SZ_16K:
1079 vtcr->tg = ARM_LPAE_TCR_TG0_16K;
1080 break;
1081 case SZ_64K:
1082 vtcr->tg = ARM_LPAE_TCR_TG0_64K;
1083 break;
1084 }
1085
1086 switch (cfg->oas) {
1087 case 32:
1088 vtcr->ps = ARM_LPAE_TCR_PS_32_BIT;
1089 break;
1090 case 36:
1091 vtcr->ps = ARM_LPAE_TCR_PS_36_BIT;
1092 break;
1093 case 40:
1094 vtcr->ps = ARM_LPAE_TCR_PS_40_BIT;
1095 break;
1096 case 42:
1097 vtcr->ps = ARM_LPAE_TCR_PS_42_BIT;
1098 break;
1099 case 44:
1100 vtcr->ps = ARM_LPAE_TCR_PS_44_BIT;
1101 break;
1102 case 48:
1103 vtcr->ps = ARM_LPAE_TCR_PS_48_BIT;
1104 break;
1105 case 52:
1106 vtcr->ps = ARM_LPAE_TCR_PS_52_BIT;
1107 break;
1108 default:
1109 goto out_free_data;
1110 }
1111
1112 vtcr->tsz = 64ULL - cfg->ias;
1113 vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK;
1114
1115 /* Allocate pgd pages */
1116 data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
1117 GFP_KERNEL, cfg, cookie);
1118 if (!data->pgd)
1119 goto out_free_data;
1120
1121 /* Ensure the empty pgd is visible before any actual TTBR write */
1122 wmb();
1123
1124 /* VTTBR */
1125 cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
1126 return &data->iop;
1127
1128 out_free_data:
1129 kfree(data);
1130 return NULL;
1131 }
1132
1133 static struct io_pgtable *
arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg * cfg,void * cookie)1134 arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
1135 {
1136 if (cfg->ias > 32 || cfg->oas > 40)
1137 return NULL;
1138
1139 cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1140 return arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
1141 }
1142
1143 static struct io_pgtable *
arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg * cfg,void * cookie)1144 arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
1145 {
1146 if (cfg->ias > 40 || cfg->oas > 40)
1147 return NULL;
1148
1149 cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1150 return arm_64_lpae_alloc_pgtable_s2(cfg, cookie);
1151 }
1152
1153 static struct io_pgtable *
arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg * cfg,void * cookie)1154 arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
1155 {
1156 struct arm_lpae_io_pgtable *data;
1157
1158 /* No quirks for Mali (hopefully) */
1159 if (cfg->quirks)
1160 return NULL;
1161
1162 if (cfg->ias > 48 || cfg->oas > 40)
1163 return NULL;
1164
1165 cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1166
1167 data = arm_lpae_alloc_pgtable(cfg);
1168 if (!data)
1169 return NULL;
1170
1171 /* Mali seems to need a full 4-level table regardless of IAS */
1172 if (data->start_level > 0) {
1173 data->start_level = 0;
1174 data->pgd_bits = 0;
1175 }
1176 /*
1177 * MEMATTR: Mali has no actual notion of a non-cacheable type, so the
1178 * best we can do is mimic the out-of-tree driver and hope that the
1179 * "implementation-defined caching policy" is good enough. Similarly,
1180 * we'll use it for the sake of a valid attribute for our 'device'
1181 * index, although callers should never request that in practice.
1182 */
1183 cfg->arm_mali_lpae_cfg.memattr =
1184 (ARM_MALI_LPAE_MEMATTR_IMP_DEF
1185 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
1186 (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC
1187 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
1188 (ARM_MALI_LPAE_MEMATTR_IMP_DEF
1189 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
1190
1191 data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
1192 cfg, cookie);
1193 if (!data->pgd)
1194 goto out_free_data;
1195
1196 /* Ensure the empty pgd is visible before TRANSTAB can be written */
1197 wmb();
1198
1199 cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
1200 ARM_MALI_LPAE_TTBR_READ_INNER |
1201 ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
1202 if (cfg->coherent_walk)
1203 cfg->arm_mali_lpae_cfg.transtab |= ARM_MALI_LPAE_TTBR_SHARE_OUTER;
1204
1205 return &data->iop;
1206
1207 out_free_data:
1208 kfree(data);
1209 return NULL;
1210 }
1211
1212 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
1213 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR,
1214 .alloc = arm_64_lpae_alloc_pgtable_s1,
1215 .free = arm_lpae_free_pgtable,
1216 };
1217
1218 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = {
1219 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR,
1220 .alloc = arm_64_lpae_alloc_pgtable_s2,
1221 .free = arm_lpae_free_pgtable,
1222 };
1223
1224 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = {
1225 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR,
1226 .alloc = arm_32_lpae_alloc_pgtable_s1,
1227 .free = arm_lpae_free_pgtable,
1228 };
1229
1230 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = {
1231 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR,
1232 .alloc = arm_32_lpae_alloc_pgtable_s2,
1233 .free = arm_lpae_free_pgtable,
1234 };
1235
1236 struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
1237 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR,
1238 .alloc = arm_mali_lpae_alloc_pgtable,
1239 .free = arm_lpae_free_pgtable,
1240 };
1241
1242 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
1243
1244 static struct io_pgtable_cfg *cfg_cookie __initdata;
1245
dummy_tlb_flush_all(void * cookie)1246 static void __init dummy_tlb_flush_all(void *cookie)
1247 {
1248 WARN_ON(cookie != cfg_cookie);
1249 }
1250
dummy_tlb_flush(unsigned long iova,size_t size,size_t granule,void * cookie)1251 static void __init dummy_tlb_flush(unsigned long iova, size_t size,
1252 size_t granule, void *cookie)
1253 {
1254 WARN_ON(cookie != cfg_cookie);
1255 WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
1256 }
1257
dummy_tlb_add_page(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)1258 static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
1259 unsigned long iova, size_t granule,
1260 void *cookie)
1261 {
1262 dummy_tlb_flush(iova, granule, granule, cookie);
1263 }
1264
1265 static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
1266 .tlb_flush_all = dummy_tlb_flush_all,
1267 .tlb_flush_walk = dummy_tlb_flush,
1268 .tlb_add_page = dummy_tlb_add_page,
1269 };
1270
arm_lpae_dump_ops(struct io_pgtable_ops * ops)1271 static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
1272 {
1273 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
1274 struct io_pgtable_cfg *cfg = &data->iop.cfg;
1275
1276 pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n",
1277 cfg->pgsize_bitmap, cfg->ias);
1278 pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n",
1279 ARM_LPAE_MAX_LEVELS - data->start_level, ARM_LPAE_PGD_SIZE(data),
1280 ilog2(ARM_LPAE_GRANULE(data)), data->bits_per_level, data->pgd);
1281 }
1282
1283 #define __FAIL(ops, i) ({ \
1284 WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \
1285 arm_lpae_dump_ops(ops); \
1286 selftest_running = false; \
1287 -EFAULT; \
1288 })
1289
arm_lpae_run_tests(struct io_pgtable_cfg * cfg)1290 static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
1291 {
1292 static const enum io_pgtable_fmt fmts[] __initconst = {
1293 ARM_64_LPAE_S1,
1294 ARM_64_LPAE_S2,
1295 };
1296
1297 int i, j;
1298 unsigned long iova;
1299 size_t size, mapped;
1300 struct io_pgtable_ops *ops;
1301
1302 selftest_running = true;
1303
1304 for (i = 0; i < ARRAY_SIZE(fmts); ++i) {
1305 cfg_cookie = cfg;
1306 ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg);
1307 if (!ops) {
1308 pr_err("selftest: failed to allocate io pgtable ops\n");
1309 return -ENOMEM;
1310 }
1311
1312 /*
1313 * Initial sanity checks.
1314 * Empty page tables shouldn't provide any translations.
1315 */
1316 if (ops->iova_to_phys(ops, 42))
1317 return __FAIL(ops, i);
1318
1319 if (ops->iova_to_phys(ops, SZ_1G + 42))
1320 return __FAIL(ops, i);
1321
1322 if (ops->iova_to_phys(ops, SZ_2G + 42))
1323 return __FAIL(ops, i);
1324
1325 /*
1326 * Distinct mappings of different granule sizes.
1327 */
1328 iova = 0;
1329 for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
1330 size = 1UL << j;
1331
1332 if (ops->map_pages(ops, iova, iova, size, 1,
1333 IOMMU_READ | IOMMU_WRITE |
1334 IOMMU_NOEXEC | IOMMU_CACHE,
1335 GFP_KERNEL, &mapped))
1336 return __FAIL(ops, i);
1337
1338 /* Overlapping mappings */
1339 if (!ops->map_pages(ops, iova, iova + size, size, 1,
1340 IOMMU_READ | IOMMU_NOEXEC,
1341 GFP_KERNEL, &mapped))
1342 return __FAIL(ops, i);
1343
1344 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
1345 return __FAIL(ops, i);
1346
1347 iova += SZ_1G;
1348 }
1349
1350 /* Partial unmap */
1351 size = 1UL << __ffs(cfg->pgsize_bitmap);
1352 if (ops->unmap_pages(ops, SZ_1G + size, size, 1, NULL) != size)
1353 return __FAIL(ops, i);
1354
1355 /* Remap of partial unmap */
1356 if (ops->map_pages(ops, SZ_1G + size, size, size, 1,
1357 IOMMU_READ, GFP_KERNEL, &mapped))
1358 return __FAIL(ops, i);
1359
1360 if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42))
1361 return __FAIL(ops, i);
1362
1363 /* Full unmap */
1364 iova = 0;
1365 for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
1366 size = 1UL << j;
1367
1368 if (ops->unmap_pages(ops, iova, size, 1, NULL) != size)
1369 return __FAIL(ops, i);
1370
1371 if (ops->iova_to_phys(ops, iova + 42))
1372 return __FAIL(ops, i);
1373
1374 /* Remap full block */
1375 if (ops->map_pages(ops, iova, iova, size, 1,
1376 IOMMU_WRITE, GFP_KERNEL, &mapped))
1377 return __FAIL(ops, i);
1378
1379 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
1380 return __FAIL(ops, i);
1381
1382 iova += SZ_1G;
1383 }
1384
1385 free_io_pgtable_ops(ops);
1386 }
1387
1388 selftest_running = false;
1389 return 0;
1390 }
1391
arm_lpae_do_selftests(void)1392 static int __init arm_lpae_do_selftests(void)
1393 {
1394 static const unsigned long pgsize[] __initconst = {
1395 SZ_4K | SZ_2M | SZ_1G,
1396 SZ_16K | SZ_32M,
1397 SZ_64K | SZ_512M,
1398 };
1399
1400 static const unsigned int ias[] __initconst = {
1401 32, 36, 40, 42, 44, 48,
1402 };
1403
1404 int i, j, pass = 0, fail = 0;
1405 struct device dev;
1406 struct io_pgtable_cfg cfg = {
1407 .tlb = &dummy_tlb_ops,
1408 .oas = 48,
1409 .coherent_walk = true,
1410 .iommu_dev = &dev,
1411 };
1412
1413 /* __arm_lpae_alloc_pages() merely needs dev_to_node() to work */
1414 set_dev_node(&dev, NUMA_NO_NODE);
1415
1416 for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
1417 for (j = 0; j < ARRAY_SIZE(ias); ++j) {
1418 cfg.pgsize_bitmap = pgsize[i];
1419 cfg.ias = ias[j];
1420 pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n",
1421 pgsize[i], ias[j]);
1422 if (arm_lpae_run_tests(&cfg))
1423 fail++;
1424 else
1425 pass++;
1426 }
1427 }
1428
1429 pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail);
1430 return fail ? -EFAULT : 0;
1431 }
1432 subsys_initcall(arm_lpae_do_selftests);
1433 #endif
1434