1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * CPU-agnostic ARM page table allocator. 4 * 5 * Copyright (C) 2014 ARM Limited 6 * 7 * Author: Will Deacon <will.deacon@arm.com> 8 */ 9 10 #define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt 11 12 #include <linux/atomic.h> 13 #include <linux/bitops.h> 14 #include <linux/io-pgtable.h> 15 #include <linux/kernel.h> 16 #include <linux/sizes.h> 17 #include <linux/slab.h> 18 #include <linux/types.h> 19 #include <linux/dma-mapping.h> 20 21 #include <asm/barrier.h> 22 23 #include "io-pgtable-arm.h" 24 #include "iommu-pages.h" 25 26 #define ARM_LPAE_MAX_ADDR_BITS 52 27 #define ARM_LPAE_S2_MAX_CONCAT_PAGES 16 28 #define ARM_LPAE_MAX_LEVELS 4 29 30 /* Struct accessors */ 31 #define io_pgtable_to_data(x) \ 32 container_of((x), struct arm_lpae_io_pgtable, iop) 33 34 #define io_pgtable_ops_to_data(x) \ 35 io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) 36 37 /* 38 * Calculate the right shift amount to get to the portion describing level l 39 * in a virtual address mapped by the pagetable in d. 40 */ 41 #define ARM_LPAE_LVL_SHIFT(l,d) \ 42 (((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) + \ 43 ilog2(sizeof(arm_lpae_iopte))) 44 45 #define ARM_LPAE_GRANULE(d) \ 46 (sizeof(arm_lpae_iopte) << (d)->bits_per_level) 47 #define ARM_LPAE_PGD_SIZE(d) \ 48 (sizeof(arm_lpae_iopte) << (d)->pgd_bits) 49 50 #define ARM_LPAE_PTES_PER_TABLE(d) \ 51 (ARM_LPAE_GRANULE(d) >> ilog2(sizeof(arm_lpae_iopte))) 52 53 /* 54 * Calculate the index at level l used to map virtual address a using the 55 * pagetable in d. 56 */ 57 #define ARM_LPAE_PGD_IDX(l,d) \ 58 ((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0) 59 60 #define ARM_LPAE_LVL_IDX(a,l,d) \ 61 (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \ 62 ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1)) 63 64 /* Calculate the block/page mapping size at level l for pagetable in d. */ 65 #define ARM_LPAE_BLOCK_SIZE(l,d) (1ULL << ARM_LPAE_LVL_SHIFT(l,d)) 66 67 /* Page table bits */ 68 #define ARM_LPAE_PTE_TYPE_SHIFT 0 69 #define ARM_LPAE_PTE_TYPE_MASK 0x3 70 71 #define ARM_LPAE_PTE_TYPE_BLOCK 1 72 #define ARM_LPAE_PTE_TYPE_TABLE 3 73 #define ARM_LPAE_PTE_TYPE_PAGE 3 74 75 #define ARM_LPAE_PTE_ADDR_MASK GENMASK_ULL(47,12) 76 77 #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) 78 #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) 79 #define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) 80 #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) 81 #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) 82 #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) 83 #define ARM_LPAE_PTE_SH_IS (((arm_lpae_iopte)3) << 8) 84 #define ARM_LPAE_PTE_NS (((arm_lpae_iopte)1) << 5) 85 #define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0) 86 87 #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) 88 /* Ignore the contiguous bit for block splitting */ 89 #define ARM_LPAE_PTE_ATTR_HI_MASK (ARM_LPAE_PTE_XN | ARM_LPAE_PTE_DBM) 90 #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ 91 ARM_LPAE_PTE_ATTR_HI_MASK) 92 /* Software bit for solving coherency races */ 93 #define ARM_LPAE_PTE_SW_SYNC (((arm_lpae_iopte)1) << 55) 94 95 /* Stage-1 PTE */ 96 #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) 97 #define ARM_LPAE_PTE_AP_RDONLY_BIT 7 98 #define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)1) << \ 99 ARM_LPAE_PTE_AP_RDONLY_BIT) 100 #define ARM_LPAE_PTE_AP_WR_CLEAN_MASK (ARM_LPAE_PTE_AP_RDONLY | \ 101 ARM_LPAE_PTE_DBM) 102 #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 103 #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) 104 105 /* Stage-2 PTE */ 106 #define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6) 107 #define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6) 108 #define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6) 109 #define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2) 110 #define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2) 111 #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) 112 113 /* Register bits */ 114 #define ARM_LPAE_VTCR_SL0_MASK 0x3 115 116 #define ARM_LPAE_TCR_T0SZ_SHIFT 0 117 118 #define ARM_LPAE_VTCR_PS_SHIFT 16 119 #define ARM_LPAE_VTCR_PS_MASK 0x7 120 121 #define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3) 122 #define ARM_LPAE_MAIR_ATTR_MASK 0xff 123 #define ARM_LPAE_MAIR_ATTR_DEVICE 0x04 124 #define ARM_LPAE_MAIR_ATTR_NC 0x44 125 #define ARM_LPAE_MAIR_ATTR_INC_OWBRWA 0xf4 126 #define ARM_LPAE_MAIR_ATTR_WBRWA 0xff 127 #define ARM_LPAE_MAIR_ATTR_IDX_NC 0 128 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1 129 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2 130 #define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE 3 131 132 #define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0) 133 #define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2) 134 #define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4) 135 136 #define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL 137 #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL 138 139 /* IOPTE accessors */ 140 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d)) 141 142 #define iopte_type(pte) \ 143 (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK) 144 145 #define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK) 146 147 #define iopte_writeable_dirty(pte) \ 148 (((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM) 149 150 #define iopte_set_writeable_clean(ptep) \ 151 set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)(ptep)) 152 153 struct arm_lpae_io_pgtable { 154 struct io_pgtable iop; 155 156 int pgd_bits; 157 int start_level; 158 int bits_per_level; 159 160 void *pgd; 161 }; 162 163 typedef u64 arm_lpae_iopte; 164 165 static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl, 166 enum io_pgtable_fmt fmt) 167 { 168 if (lvl == (ARM_LPAE_MAX_LEVELS - 1) && fmt != ARM_MALI_LPAE) 169 return iopte_type(pte) == ARM_LPAE_PTE_TYPE_PAGE; 170 171 return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK; 172 } 173 174 static inline bool iopte_table(arm_lpae_iopte pte, int lvl) 175 { 176 if (lvl == (ARM_LPAE_MAX_LEVELS - 1)) 177 return false; 178 return iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE; 179 } 180 181 static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr, 182 struct arm_lpae_io_pgtable *data) 183 { 184 arm_lpae_iopte pte = paddr; 185 186 /* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */ 187 return (pte | (pte >> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK; 188 } 189 190 static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte, 191 struct arm_lpae_io_pgtable *data) 192 { 193 u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK; 194 195 if (ARM_LPAE_GRANULE(data) < SZ_64K) 196 return paddr; 197 198 /* Rotate the packed high-order bits back to the top */ 199 return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4); 200 } 201 202 static bool selftest_running = false; 203 204 static dma_addr_t __arm_lpae_dma_addr(void *pages) 205 { 206 return (dma_addr_t)virt_to_phys(pages); 207 } 208 209 static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, 210 struct io_pgtable_cfg *cfg, 211 void *cookie) 212 { 213 struct device *dev = cfg->iommu_dev; 214 int order = get_order(size); 215 dma_addr_t dma; 216 void *pages; 217 218 VM_BUG_ON((gfp & __GFP_HIGHMEM)); 219 220 if (cfg->alloc) 221 pages = cfg->alloc(cookie, size, gfp); 222 else 223 pages = iommu_alloc_pages_node(dev_to_node(dev), gfp, order); 224 225 if (!pages) 226 return NULL; 227 228 if (!cfg->coherent_walk) { 229 dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); 230 if (dma_mapping_error(dev, dma)) 231 goto out_free; 232 /* 233 * We depend on the IOMMU being able to work with any physical 234 * address directly, so if the DMA layer suggests otherwise by 235 * translating or truncating them, that bodes very badly... 236 */ 237 if (dma != virt_to_phys(pages)) 238 goto out_unmap; 239 } 240 241 return pages; 242 243 out_unmap: 244 dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n"); 245 dma_unmap_single(dev, dma, size, DMA_TO_DEVICE); 246 247 out_free: 248 if (cfg->free) 249 cfg->free(cookie, pages, size); 250 else 251 iommu_free_pages(pages, order); 252 253 return NULL; 254 } 255 256 static void __arm_lpae_free_pages(void *pages, size_t size, 257 struct io_pgtable_cfg *cfg, 258 void *cookie) 259 { 260 if (!cfg->coherent_walk) 261 dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), 262 size, DMA_TO_DEVICE); 263 264 if (cfg->free) 265 cfg->free(cookie, pages, size); 266 else 267 iommu_free_pages(pages, get_order(size)); 268 } 269 270 static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries, 271 struct io_pgtable_cfg *cfg) 272 { 273 dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep), 274 sizeof(*ptep) * num_entries, DMA_TO_DEVICE); 275 } 276 277 static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg) 278 { 279 280 *ptep = 0; 281 282 if (!cfg->coherent_walk) 283 __arm_lpae_sync_pte(ptep, 1, cfg); 284 } 285 286 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, 287 struct iommu_iotlb_gather *gather, 288 unsigned long iova, size_t size, size_t pgcount, 289 int lvl, arm_lpae_iopte *ptep); 290 291 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, 292 phys_addr_t paddr, arm_lpae_iopte prot, 293 int lvl, int num_entries, arm_lpae_iopte *ptep) 294 { 295 arm_lpae_iopte pte = prot; 296 struct io_pgtable_cfg *cfg = &data->iop.cfg; 297 size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data); 298 int i; 299 300 if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1) 301 pte |= ARM_LPAE_PTE_TYPE_PAGE; 302 else 303 pte |= ARM_LPAE_PTE_TYPE_BLOCK; 304 305 for (i = 0; i < num_entries; i++) 306 ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data); 307 308 if (!cfg->coherent_walk) 309 __arm_lpae_sync_pte(ptep, num_entries, cfg); 310 } 311 312 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, 313 unsigned long iova, phys_addr_t paddr, 314 arm_lpae_iopte prot, int lvl, int num_entries, 315 arm_lpae_iopte *ptep) 316 { 317 int i; 318 319 for (i = 0; i < num_entries; i++) 320 if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) { 321 /* We require an unmap first */ 322 WARN_ON(!selftest_running); 323 return -EEXIST; 324 } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) { 325 /* 326 * We need to unmap and free the old table before 327 * overwriting it with a block entry. 328 */ 329 arm_lpae_iopte *tblp; 330 size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data); 331 332 tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data); 333 if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1, 334 lvl, tblp) != sz) { 335 WARN_ON(1); 336 return -EINVAL; 337 } 338 } 339 340 __arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep); 341 return 0; 342 } 343 344 static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, 345 arm_lpae_iopte *ptep, 346 arm_lpae_iopte curr, 347 struct arm_lpae_io_pgtable *data) 348 { 349 arm_lpae_iopte old, new; 350 struct io_pgtable_cfg *cfg = &data->iop.cfg; 351 352 new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE; 353 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) 354 new |= ARM_LPAE_PTE_NSTABLE; 355 356 /* 357 * Ensure the table itself is visible before its PTE can be. 358 * Whilst we could get away with cmpxchg64_release below, this 359 * doesn't have any ordering semantics when !CONFIG_SMP. 360 */ 361 dma_wmb(); 362 363 old = cmpxchg64_relaxed(ptep, curr, new); 364 365 if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC)) 366 return old; 367 368 /* Even if it's not ours, there's no point waiting; just kick it */ 369 __arm_lpae_sync_pte(ptep, 1, cfg); 370 if (old == curr) 371 WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC); 372 373 return old; 374 } 375 376 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, 377 phys_addr_t paddr, size_t size, size_t pgcount, 378 arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep, 379 gfp_t gfp, size_t *mapped) 380 { 381 arm_lpae_iopte *cptep, pte; 382 size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); 383 size_t tblsz = ARM_LPAE_GRANULE(data); 384 struct io_pgtable_cfg *cfg = &data->iop.cfg; 385 int ret = 0, num_entries, max_entries, map_idx_start; 386 387 /* Find our entry at the current level */ 388 map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data); 389 ptep += map_idx_start; 390 391 /* If we can install a leaf entry at this level, then do so */ 392 if (size == block_size) { 393 max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start; 394 num_entries = min_t(int, pgcount, max_entries); 395 ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep); 396 if (!ret) 397 *mapped += num_entries * size; 398 399 return ret; 400 } 401 402 /* We can't allocate tables at the final level */ 403 if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1)) 404 return -EINVAL; 405 406 /* Grab a pointer to the next level */ 407 pte = READ_ONCE(*ptep); 408 if (!pte) { 409 cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg, data->iop.cookie); 410 if (!cptep) 411 return -ENOMEM; 412 413 pte = arm_lpae_install_table(cptep, ptep, 0, data); 414 if (pte) 415 __arm_lpae_free_pages(cptep, tblsz, cfg, data->iop.cookie); 416 } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) { 417 __arm_lpae_sync_pte(ptep, 1, cfg); 418 } 419 420 if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) { 421 cptep = iopte_deref(pte, data); 422 } else if (pte) { 423 /* We require an unmap first */ 424 WARN_ON(!selftest_running); 425 return -EEXIST; 426 } 427 428 /* Rinse, repeat */ 429 return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1, 430 cptep, gfp, mapped); 431 } 432 433 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, 434 int prot) 435 { 436 arm_lpae_iopte pte; 437 438 if (data->iop.fmt == ARM_64_LPAE_S1 || 439 data->iop.fmt == ARM_32_LPAE_S1) { 440 pte = ARM_LPAE_PTE_nG; 441 if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) 442 pte |= ARM_LPAE_PTE_AP_RDONLY; 443 else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) 444 pte |= ARM_LPAE_PTE_DBM; 445 if (!(prot & IOMMU_PRIV)) 446 pte |= ARM_LPAE_PTE_AP_UNPRIV; 447 } else { 448 pte = ARM_LPAE_PTE_HAP_FAULT; 449 if (prot & IOMMU_READ) 450 pte |= ARM_LPAE_PTE_HAP_READ; 451 if (prot & IOMMU_WRITE) 452 pte |= ARM_LPAE_PTE_HAP_WRITE; 453 } 454 455 /* 456 * Note that this logic is structured to accommodate Mali LPAE 457 * having stage-1-like attributes but stage-2-like permissions. 458 */ 459 if (data->iop.fmt == ARM_64_LPAE_S2 || 460 data->iop.fmt == ARM_32_LPAE_S2) { 461 if (prot & IOMMU_MMIO) 462 pte |= ARM_LPAE_PTE_MEMATTR_DEV; 463 else if (prot & IOMMU_CACHE) 464 pte |= ARM_LPAE_PTE_MEMATTR_OIWB; 465 else 466 pte |= ARM_LPAE_PTE_MEMATTR_NC; 467 } else { 468 if (prot & IOMMU_MMIO) 469 pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV 470 << ARM_LPAE_PTE_ATTRINDX_SHIFT); 471 else if (prot & IOMMU_CACHE) 472 pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE 473 << ARM_LPAE_PTE_ATTRINDX_SHIFT); 474 } 475 476 /* 477 * Also Mali has its own notions of shareability wherein its Inner 478 * domain covers the cores within the GPU, and its Outer domain is 479 * "outside the GPU" (i.e. either the Inner or System domain in CPU 480 * terms, depending on coherency). 481 */ 482 if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE) 483 pte |= ARM_LPAE_PTE_SH_IS; 484 else 485 pte |= ARM_LPAE_PTE_SH_OS; 486 487 if (prot & IOMMU_NOEXEC) 488 pte |= ARM_LPAE_PTE_XN; 489 490 if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) 491 pte |= ARM_LPAE_PTE_NS; 492 493 if (data->iop.fmt != ARM_MALI_LPAE) 494 pte |= ARM_LPAE_PTE_AF; 495 496 return pte; 497 } 498 499 static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova, 500 phys_addr_t paddr, size_t pgsize, size_t pgcount, 501 int iommu_prot, gfp_t gfp, size_t *mapped) 502 { 503 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 504 struct io_pgtable_cfg *cfg = &data->iop.cfg; 505 arm_lpae_iopte *ptep = data->pgd; 506 int ret, lvl = data->start_level; 507 arm_lpae_iopte prot; 508 long iaext = (s64)iova >> cfg->ias; 509 510 if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize)) 511 return -EINVAL; 512 513 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) 514 iaext = ~iaext; 515 if (WARN_ON(iaext || paddr >> cfg->oas)) 516 return -ERANGE; 517 518 if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) 519 return -EINVAL; 520 521 prot = arm_lpae_prot_to_pte(data, iommu_prot); 522 ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl, 523 ptep, gfp, mapped); 524 /* 525 * Synchronise all PTE updates for the new mapping before there's 526 * a chance for anything to kick off a table walk for the new iova. 527 */ 528 wmb(); 529 530 return ret; 531 } 532 533 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, 534 arm_lpae_iopte *ptep) 535 { 536 arm_lpae_iopte *start, *end; 537 unsigned long table_size; 538 539 if (lvl == data->start_level) 540 table_size = ARM_LPAE_PGD_SIZE(data); 541 else 542 table_size = ARM_LPAE_GRANULE(data); 543 544 start = ptep; 545 546 /* Only leaf entries at the last level */ 547 if (lvl == ARM_LPAE_MAX_LEVELS - 1) 548 end = ptep; 549 else 550 end = (void *)ptep + table_size; 551 552 while (ptep != end) { 553 arm_lpae_iopte pte = *ptep++; 554 555 if (!pte || iopte_leaf(pte, lvl, data->iop.fmt)) 556 continue; 557 558 __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); 559 } 560 561 __arm_lpae_free_pages(start, table_size, &data->iop.cfg, data->iop.cookie); 562 } 563 564 static void arm_lpae_free_pgtable(struct io_pgtable *iop) 565 { 566 struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop); 567 568 __arm_lpae_free_pgtable(data, data->start_level, data->pgd); 569 kfree(data); 570 } 571 572 static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, 573 struct iommu_iotlb_gather *gather, 574 unsigned long iova, size_t size, 575 arm_lpae_iopte blk_pte, int lvl, 576 arm_lpae_iopte *ptep, size_t pgcount) 577 { 578 struct io_pgtable_cfg *cfg = &data->iop.cfg; 579 arm_lpae_iopte pte, *tablep; 580 phys_addr_t blk_paddr; 581 size_t tablesz = ARM_LPAE_GRANULE(data); 582 size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data); 583 int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data); 584 int i, unmap_idx_start = -1, num_entries = 0, max_entries; 585 586 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) 587 return 0; 588 589 tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg, data->iop.cookie); 590 if (!tablep) 591 return 0; /* Bytes unmapped */ 592 593 if (size == split_sz) { 594 unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data); 595 max_entries = ptes_per_table - unmap_idx_start; 596 num_entries = min_t(int, pgcount, max_entries); 597 } 598 599 blk_paddr = iopte_to_paddr(blk_pte, data); 600 pte = iopte_prot(blk_pte); 601 602 for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) { 603 /* Unmap! */ 604 if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries)) 605 continue; 606 607 __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]); 608 } 609 610 pte = arm_lpae_install_table(tablep, ptep, blk_pte, data); 611 if (pte != blk_pte) { 612 __arm_lpae_free_pages(tablep, tablesz, cfg, data->iop.cookie); 613 /* 614 * We may race against someone unmapping another part of this 615 * block, but anything else is invalid. We can't misinterpret 616 * a page entry here since we're never at the last level. 617 */ 618 if (iopte_type(pte) != ARM_LPAE_PTE_TYPE_TABLE) 619 return 0; 620 621 tablep = iopte_deref(pte, data); 622 } else if (unmap_idx_start >= 0) { 623 for (i = 0; i < num_entries; i++) 624 io_pgtable_tlb_add_page(&data->iop, gather, iova + i * size, size); 625 626 return num_entries * size; 627 } 628 629 return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep); 630 } 631 632 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, 633 struct iommu_iotlb_gather *gather, 634 unsigned long iova, size_t size, size_t pgcount, 635 int lvl, arm_lpae_iopte *ptep) 636 { 637 arm_lpae_iopte pte; 638 struct io_pgtable *iop = &data->iop; 639 int i = 0, num_entries, max_entries, unmap_idx_start; 640 641 /* Something went horribly wrong and we ran out of page table */ 642 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) 643 return 0; 644 645 unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data); 646 ptep += unmap_idx_start; 647 pte = READ_ONCE(*ptep); 648 if (WARN_ON(!pte)) 649 return 0; 650 651 /* If the size matches this level, we're in the right place */ 652 if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { 653 max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start; 654 num_entries = min_t(int, pgcount, max_entries); 655 656 while (i < num_entries) { 657 pte = READ_ONCE(*ptep); 658 if (WARN_ON(!pte)) 659 break; 660 661 __arm_lpae_clear_pte(ptep, &iop->cfg); 662 663 if (!iopte_leaf(pte, lvl, iop->fmt)) { 664 /* Also flush any partial walks */ 665 io_pgtable_tlb_flush_walk(iop, iova + i * size, size, 666 ARM_LPAE_GRANULE(data)); 667 __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); 668 } else if (!iommu_iotlb_gather_queued(gather)) { 669 io_pgtable_tlb_add_page(iop, gather, iova + i * size, size); 670 } 671 672 ptep++; 673 i++; 674 } 675 676 return i * size; 677 } else if (iopte_leaf(pte, lvl, iop->fmt)) { 678 /* 679 * Insert a table at the next level to map the old region, 680 * minus the part we want to unmap 681 */ 682 return arm_lpae_split_blk_unmap(data, gather, iova, size, pte, 683 lvl + 1, ptep, pgcount); 684 } 685 686 /* Keep on walkin' */ 687 ptep = iopte_deref(pte, data); 688 return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl + 1, ptep); 689 } 690 691 static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova, 692 size_t pgsize, size_t pgcount, 693 struct iommu_iotlb_gather *gather) 694 { 695 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 696 struct io_pgtable_cfg *cfg = &data->iop.cfg; 697 arm_lpae_iopte *ptep = data->pgd; 698 long iaext = (s64)iova >> cfg->ias; 699 700 if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount)) 701 return 0; 702 703 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) 704 iaext = ~iaext; 705 if (WARN_ON(iaext)) 706 return 0; 707 708 return __arm_lpae_unmap(data, gather, iova, pgsize, pgcount, 709 data->start_level, ptep); 710 } 711 712 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, 713 unsigned long iova) 714 { 715 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 716 arm_lpae_iopte pte, *ptep = data->pgd; 717 int lvl = data->start_level; 718 719 do { 720 /* Valid IOPTE pointer? */ 721 if (!ptep) 722 return 0; 723 724 /* Grab the IOPTE we're interested in */ 725 ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); 726 pte = READ_ONCE(*ptep); 727 728 /* Valid entry? */ 729 if (!pte) 730 return 0; 731 732 /* Leaf entry? */ 733 if (iopte_leaf(pte, lvl, data->iop.fmt)) 734 goto found_translation; 735 736 /* Take it to the next level */ 737 ptep = iopte_deref(pte, data); 738 } while (++lvl < ARM_LPAE_MAX_LEVELS); 739 740 /* Ran out of page tables to walk */ 741 return 0; 742 743 found_translation: 744 iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1); 745 return iopte_to_paddr(pte, data) | iova; 746 } 747 748 struct io_pgtable_walk_data { 749 struct iommu_dirty_bitmap *dirty; 750 unsigned long flags; 751 u64 addr; 752 const u64 end; 753 }; 754 755 static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, 756 struct io_pgtable_walk_data *walk_data, 757 arm_lpae_iopte *ptep, 758 int lvl); 759 760 static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data, 761 struct io_pgtable_walk_data *walk_data, 762 arm_lpae_iopte *ptep, int lvl) 763 { 764 struct io_pgtable *iop = &data->iop; 765 arm_lpae_iopte pte = READ_ONCE(*ptep); 766 767 if (iopte_leaf(pte, lvl, iop->fmt)) { 768 size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data); 769 770 if (iopte_writeable_dirty(pte)) { 771 iommu_dirty_bitmap_record(walk_data->dirty, 772 walk_data->addr, size); 773 if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR)) 774 iopte_set_writeable_clean(ptep); 775 } 776 walk_data->addr += size; 777 return 0; 778 } 779 780 if (WARN_ON(!iopte_table(pte, lvl))) 781 return -EINVAL; 782 783 ptep = iopte_deref(pte, data); 784 return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1); 785 } 786 787 static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, 788 struct io_pgtable_walk_data *walk_data, 789 arm_lpae_iopte *ptep, 790 int lvl) 791 { 792 u32 idx; 793 int max_entries, ret; 794 795 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) 796 return -EINVAL; 797 798 if (lvl == data->start_level) 799 max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); 800 else 801 max_entries = ARM_LPAE_PTES_PER_TABLE(data); 802 803 for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data); 804 (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) { 805 ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl); 806 if (ret) 807 return ret; 808 } 809 810 return 0; 811 } 812 813 static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, 814 unsigned long iova, size_t size, 815 unsigned long flags, 816 struct iommu_dirty_bitmap *dirty) 817 { 818 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 819 struct io_pgtable_cfg *cfg = &data->iop.cfg; 820 struct io_pgtable_walk_data walk_data = { 821 .dirty = dirty, 822 .flags = flags, 823 .addr = iova, 824 .end = iova + size, 825 }; 826 arm_lpae_iopte *ptep = data->pgd; 827 int lvl = data->start_level; 828 829 if (WARN_ON(!size)) 830 return -EINVAL; 831 if (WARN_ON((iova + size - 1) & ~(BIT(cfg->ias) - 1))) 832 return -EINVAL; 833 if (data->iop.fmt != ARM_64_LPAE_S1) 834 return -EINVAL; 835 836 return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl); 837 } 838 839 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) 840 { 841 unsigned long granule, page_sizes; 842 unsigned int max_addr_bits = 48; 843 844 /* 845 * We need to restrict the supported page sizes to match the 846 * translation regime for a particular granule. Aim to match 847 * the CPU page size if possible, otherwise prefer smaller sizes. 848 * While we're at it, restrict the block sizes to match the 849 * chosen granule. 850 */ 851 if (cfg->pgsize_bitmap & PAGE_SIZE) 852 granule = PAGE_SIZE; 853 else if (cfg->pgsize_bitmap & ~PAGE_MASK) 854 granule = 1UL << __fls(cfg->pgsize_bitmap & ~PAGE_MASK); 855 else if (cfg->pgsize_bitmap & PAGE_MASK) 856 granule = 1UL << __ffs(cfg->pgsize_bitmap & PAGE_MASK); 857 else 858 granule = 0; 859 860 switch (granule) { 861 case SZ_4K: 862 page_sizes = (SZ_4K | SZ_2M | SZ_1G); 863 break; 864 case SZ_16K: 865 page_sizes = (SZ_16K | SZ_32M); 866 break; 867 case SZ_64K: 868 max_addr_bits = 52; 869 page_sizes = (SZ_64K | SZ_512M); 870 if (cfg->oas > 48) 871 page_sizes |= 1ULL << 42; /* 4TB */ 872 break; 873 default: 874 page_sizes = 0; 875 } 876 877 cfg->pgsize_bitmap &= page_sizes; 878 cfg->ias = min(cfg->ias, max_addr_bits); 879 cfg->oas = min(cfg->oas, max_addr_bits); 880 } 881 882 static struct arm_lpae_io_pgtable * 883 arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) 884 { 885 struct arm_lpae_io_pgtable *data; 886 int levels, va_bits, pg_shift; 887 888 arm_lpae_restrict_pgsizes(cfg); 889 890 if (!(cfg->pgsize_bitmap & (SZ_4K | SZ_16K | SZ_64K))) 891 return NULL; 892 893 if (cfg->ias > ARM_LPAE_MAX_ADDR_BITS) 894 return NULL; 895 896 if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS) 897 return NULL; 898 899 data = kmalloc(sizeof(*data), GFP_KERNEL); 900 if (!data) 901 return NULL; 902 903 pg_shift = __ffs(cfg->pgsize_bitmap); 904 data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte)); 905 906 va_bits = cfg->ias - pg_shift; 907 levels = DIV_ROUND_UP(va_bits, data->bits_per_level); 908 data->start_level = ARM_LPAE_MAX_LEVELS - levels; 909 910 /* Calculate the actual size of our pgd (without concatenation) */ 911 data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1)); 912 913 data->iop.ops = (struct io_pgtable_ops) { 914 .map_pages = arm_lpae_map_pages, 915 .unmap_pages = arm_lpae_unmap_pages, 916 .iova_to_phys = arm_lpae_iova_to_phys, 917 .read_and_clear_dirty = arm_lpae_read_and_clear_dirty, 918 }; 919 920 return data; 921 } 922 923 static struct io_pgtable * 924 arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) 925 { 926 u64 reg; 927 struct arm_lpae_io_pgtable *data; 928 typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr; 929 bool tg1; 930 931 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | 932 IO_PGTABLE_QUIRK_ARM_TTBR1 | 933 IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | 934 IO_PGTABLE_QUIRK_ARM_HD)) 935 return NULL; 936 937 data = arm_lpae_alloc_pgtable(cfg); 938 if (!data) 939 return NULL; 940 941 /* TCR */ 942 if (cfg->coherent_walk) { 943 tcr->sh = ARM_LPAE_TCR_SH_IS; 944 tcr->irgn = ARM_LPAE_TCR_RGN_WBWA; 945 tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; 946 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA) 947 goto out_free_data; 948 } else { 949 tcr->sh = ARM_LPAE_TCR_SH_OS; 950 tcr->irgn = ARM_LPAE_TCR_RGN_NC; 951 if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) 952 tcr->orgn = ARM_LPAE_TCR_RGN_NC; 953 else 954 tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; 955 } 956 957 tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1; 958 switch (ARM_LPAE_GRANULE(data)) { 959 case SZ_4K: 960 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K; 961 break; 962 case SZ_16K: 963 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K; 964 break; 965 case SZ_64K: 966 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K; 967 break; 968 } 969 970 switch (cfg->oas) { 971 case 32: 972 tcr->ips = ARM_LPAE_TCR_PS_32_BIT; 973 break; 974 case 36: 975 tcr->ips = ARM_LPAE_TCR_PS_36_BIT; 976 break; 977 case 40: 978 tcr->ips = ARM_LPAE_TCR_PS_40_BIT; 979 break; 980 case 42: 981 tcr->ips = ARM_LPAE_TCR_PS_42_BIT; 982 break; 983 case 44: 984 tcr->ips = ARM_LPAE_TCR_PS_44_BIT; 985 break; 986 case 48: 987 tcr->ips = ARM_LPAE_TCR_PS_48_BIT; 988 break; 989 case 52: 990 tcr->ips = ARM_LPAE_TCR_PS_52_BIT; 991 break; 992 default: 993 goto out_free_data; 994 } 995 996 tcr->tsz = 64ULL - cfg->ias; 997 998 /* MAIRs */ 999 reg = (ARM_LPAE_MAIR_ATTR_NC 1000 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) | 1001 (ARM_LPAE_MAIR_ATTR_WBRWA 1002 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | 1003 (ARM_LPAE_MAIR_ATTR_DEVICE 1004 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) | 1005 (ARM_LPAE_MAIR_ATTR_INC_OWBRWA 1006 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE)); 1007 1008 cfg->arm_lpae_s1_cfg.mair = reg; 1009 1010 /* Looking good; allocate a pgd */ 1011 data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), 1012 GFP_KERNEL, cfg, cookie); 1013 if (!data->pgd) 1014 goto out_free_data; 1015 1016 /* Ensure the empty pgd is visible before any actual TTBR write */ 1017 wmb(); 1018 1019 /* TTBR */ 1020 cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd); 1021 return &data->iop; 1022 1023 out_free_data: 1024 kfree(data); 1025 return NULL; 1026 } 1027 1028 static struct io_pgtable * 1029 arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) 1030 { 1031 u64 sl; 1032 struct arm_lpae_io_pgtable *data; 1033 typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr; 1034 1035 /* The NS quirk doesn't apply at stage 2 */ 1036 if (cfg->quirks) 1037 return NULL; 1038 1039 data = arm_lpae_alloc_pgtable(cfg); 1040 if (!data) 1041 return NULL; 1042 1043 /* 1044 * Concatenate PGDs at level 1 if possible in order to reduce 1045 * the depth of the stage-2 walk. 1046 */ 1047 if (data->start_level == 0) { 1048 unsigned long pgd_pages; 1049 1050 pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); 1051 if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) { 1052 data->pgd_bits += data->bits_per_level; 1053 data->start_level++; 1054 } 1055 } 1056 1057 /* VTCR */ 1058 if (cfg->coherent_walk) { 1059 vtcr->sh = ARM_LPAE_TCR_SH_IS; 1060 vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA; 1061 vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA; 1062 } else { 1063 vtcr->sh = ARM_LPAE_TCR_SH_OS; 1064 vtcr->irgn = ARM_LPAE_TCR_RGN_NC; 1065 vtcr->orgn = ARM_LPAE_TCR_RGN_NC; 1066 } 1067 1068 sl = data->start_level; 1069 1070 switch (ARM_LPAE_GRANULE(data)) { 1071 case SZ_4K: 1072 vtcr->tg = ARM_LPAE_TCR_TG0_4K; 1073 sl++; /* SL0 format is different for 4K granule size */ 1074 break; 1075 case SZ_16K: 1076 vtcr->tg = ARM_LPAE_TCR_TG0_16K; 1077 break; 1078 case SZ_64K: 1079 vtcr->tg = ARM_LPAE_TCR_TG0_64K; 1080 break; 1081 } 1082 1083 switch (cfg->oas) { 1084 case 32: 1085 vtcr->ps = ARM_LPAE_TCR_PS_32_BIT; 1086 break; 1087 case 36: 1088 vtcr->ps = ARM_LPAE_TCR_PS_36_BIT; 1089 break; 1090 case 40: 1091 vtcr->ps = ARM_LPAE_TCR_PS_40_BIT; 1092 break; 1093 case 42: 1094 vtcr->ps = ARM_LPAE_TCR_PS_42_BIT; 1095 break; 1096 case 44: 1097 vtcr->ps = ARM_LPAE_TCR_PS_44_BIT; 1098 break; 1099 case 48: 1100 vtcr->ps = ARM_LPAE_TCR_PS_48_BIT; 1101 break; 1102 case 52: 1103 vtcr->ps = ARM_LPAE_TCR_PS_52_BIT; 1104 break; 1105 default: 1106 goto out_free_data; 1107 } 1108 1109 vtcr->tsz = 64ULL - cfg->ias; 1110 vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK; 1111 1112 /* Allocate pgd pages */ 1113 data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), 1114 GFP_KERNEL, cfg, cookie); 1115 if (!data->pgd) 1116 goto out_free_data; 1117 1118 /* Ensure the empty pgd is visible before any actual TTBR write */ 1119 wmb(); 1120 1121 /* VTTBR */ 1122 cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd); 1123 return &data->iop; 1124 1125 out_free_data: 1126 kfree(data); 1127 return NULL; 1128 } 1129 1130 static struct io_pgtable * 1131 arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) 1132 { 1133 if (cfg->ias > 32 || cfg->oas > 40) 1134 return NULL; 1135 1136 cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); 1137 return arm_64_lpae_alloc_pgtable_s1(cfg, cookie); 1138 } 1139 1140 static struct io_pgtable * 1141 arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) 1142 { 1143 if (cfg->ias > 40 || cfg->oas > 40) 1144 return NULL; 1145 1146 cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); 1147 return arm_64_lpae_alloc_pgtable_s2(cfg, cookie); 1148 } 1149 1150 static struct io_pgtable * 1151 arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) 1152 { 1153 struct arm_lpae_io_pgtable *data; 1154 1155 /* No quirks for Mali (hopefully) */ 1156 if (cfg->quirks) 1157 return NULL; 1158 1159 if (cfg->ias > 48 || cfg->oas > 40) 1160 return NULL; 1161 1162 cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); 1163 1164 data = arm_lpae_alloc_pgtable(cfg); 1165 if (!data) 1166 return NULL; 1167 1168 /* Mali seems to need a full 4-level table regardless of IAS */ 1169 if (data->start_level > 0) { 1170 data->start_level = 0; 1171 data->pgd_bits = 0; 1172 } 1173 /* 1174 * MEMATTR: Mali has no actual notion of a non-cacheable type, so the 1175 * best we can do is mimic the out-of-tree driver and hope that the 1176 * "implementation-defined caching policy" is good enough. Similarly, 1177 * we'll use it for the sake of a valid attribute for our 'device' 1178 * index, although callers should never request that in practice. 1179 */ 1180 cfg->arm_mali_lpae_cfg.memattr = 1181 (ARM_MALI_LPAE_MEMATTR_IMP_DEF 1182 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) | 1183 (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 1184 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | 1185 (ARM_MALI_LPAE_MEMATTR_IMP_DEF 1186 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); 1187 1188 data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL, 1189 cfg, cookie); 1190 if (!data->pgd) 1191 goto out_free_data; 1192 1193 /* Ensure the empty pgd is visible before TRANSTAB can be written */ 1194 wmb(); 1195 1196 cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) | 1197 ARM_MALI_LPAE_TTBR_READ_INNER | 1198 ARM_MALI_LPAE_TTBR_ADRMODE_TABLE; 1199 if (cfg->coherent_walk) 1200 cfg->arm_mali_lpae_cfg.transtab |= ARM_MALI_LPAE_TTBR_SHARE_OUTER; 1201 1202 return &data->iop; 1203 1204 out_free_data: 1205 kfree(data); 1206 return NULL; 1207 } 1208 1209 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { 1210 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1211 .alloc = arm_64_lpae_alloc_pgtable_s1, 1212 .free = arm_lpae_free_pgtable, 1213 }; 1214 1215 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = { 1216 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1217 .alloc = arm_64_lpae_alloc_pgtable_s2, 1218 .free = arm_lpae_free_pgtable, 1219 }; 1220 1221 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = { 1222 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1223 .alloc = arm_32_lpae_alloc_pgtable_s1, 1224 .free = arm_lpae_free_pgtable, 1225 }; 1226 1227 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = { 1228 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1229 .alloc = arm_32_lpae_alloc_pgtable_s2, 1230 .free = arm_lpae_free_pgtable, 1231 }; 1232 1233 struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = { 1234 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1235 .alloc = arm_mali_lpae_alloc_pgtable, 1236 .free = arm_lpae_free_pgtable, 1237 }; 1238 1239 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST 1240 1241 static struct io_pgtable_cfg *cfg_cookie __initdata; 1242 1243 static void __init dummy_tlb_flush_all(void *cookie) 1244 { 1245 WARN_ON(cookie != cfg_cookie); 1246 } 1247 1248 static void __init dummy_tlb_flush(unsigned long iova, size_t size, 1249 size_t granule, void *cookie) 1250 { 1251 WARN_ON(cookie != cfg_cookie); 1252 WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); 1253 } 1254 1255 static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather, 1256 unsigned long iova, size_t granule, 1257 void *cookie) 1258 { 1259 dummy_tlb_flush(iova, granule, granule, cookie); 1260 } 1261 1262 static const struct iommu_flush_ops dummy_tlb_ops __initconst = { 1263 .tlb_flush_all = dummy_tlb_flush_all, 1264 .tlb_flush_walk = dummy_tlb_flush, 1265 .tlb_add_page = dummy_tlb_add_page, 1266 }; 1267 1268 static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) 1269 { 1270 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 1271 struct io_pgtable_cfg *cfg = &data->iop.cfg; 1272 1273 pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n", 1274 cfg->pgsize_bitmap, cfg->ias); 1275 pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n", 1276 ARM_LPAE_MAX_LEVELS - data->start_level, ARM_LPAE_PGD_SIZE(data), 1277 ilog2(ARM_LPAE_GRANULE(data)), data->bits_per_level, data->pgd); 1278 } 1279 1280 #define __FAIL(ops, i) ({ \ 1281 WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \ 1282 arm_lpae_dump_ops(ops); \ 1283 selftest_running = false; \ 1284 -EFAULT; \ 1285 }) 1286 1287 static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) 1288 { 1289 static const enum io_pgtable_fmt fmts[] __initconst = { 1290 ARM_64_LPAE_S1, 1291 ARM_64_LPAE_S2, 1292 }; 1293 1294 int i, j; 1295 unsigned long iova; 1296 size_t size, mapped; 1297 struct io_pgtable_ops *ops; 1298 1299 selftest_running = true; 1300 1301 for (i = 0; i < ARRAY_SIZE(fmts); ++i) { 1302 cfg_cookie = cfg; 1303 ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg); 1304 if (!ops) { 1305 pr_err("selftest: failed to allocate io pgtable ops\n"); 1306 return -ENOMEM; 1307 } 1308 1309 /* 1310 * Initial sanity checks. 1311 * Empty page tables shouldn't provide any translations. 1312 */ 1313 if (ops->iova_to_phys(ops, 42)) 1314 return __FAIL(ops, i); 1315 1316 if (ops->iova_to_phys(ops, SZ_1G + 42)) 1317 return __FAIL(ops, i); 1318 1319 if (ops->iova_to_phys(ops, SZ_2G + 42)) 1320 return __FAIL(ops, i); 1321 1322 /* 1323 * Distinct mappings of different granule sizes. 1324 */ 1325 iova = 0; 1326 for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { 1327 size = 1UL << j; 1328 1329 if (ops->map_pages(ops, iova, iova, size, 1, 1330 IOMMU_READ | IOMMU_WRITE | 1331 IOMMU_NOEXEC | IOMMU_CACHE, 1332 GFP_KERNEL, &mapped)) 1333 return __FAIL(ops, i); 1334 1335 /* Overlapping mappings */ 1336 if (!ops->map_pages(ops, iova, iova + size, size, 1, 1337 IOMMU_READ | IOMMU_NOEXEC, 1338 GFP_KERNEL, &mapped)) 1339 return __FAIL(ops, i); 1340 1341 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) 1342 return __FAIL(ops, i); 1343 1344 iova += SZ_1G; 1345 } 1346 1347 /* Partial unmap */ 1348 size = 1UL << __ffs(cfg->pgsize_bitmap); 1349 if (ops->unmap_pages(ops, SZ_1G + size, size, 1, NULL) != size) 1350 return __FAIL(ops, i); 1351 1352 /* Remap of partial unmap */ 1353 if (ops->map_pages(ops, SZ_1G + size, size, size, 1, 1354 IOMMU_READ, GFP_KERNEL, &mapped)) 1355 return __FAIL(ops, i); 1356 1357 if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42)) 1358 return __FAIL(ops, i); 1359 1360 /* Full unmap */ 1361 iova = 0; 1362 for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { 1363 size = 1UL << j; 1364 1365 if (ops->unmap_pages(ops, iova, size, 1, NULL) != size) 1366 return __FAIL(ops, i); 1367 1368 if (ops->iova_to_phys(ops, iova + 42)) 1369 return __FAIL(ops, i); 1370 1371 /* Remap full block */ 1372 if (ops->map_pages(ops, iova, iova, size, 1, 1373 IOMMU_WRITE, GFP_KERNEL, &mapped)) 1374 return __FAIL(ops, i); 1375 1376 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) 1377 return __FAIL(ops, i); 1378 1379 iova += SZ_1G; 1380 } 1381 1382 free_io_pgtable_ops(ops); 1383 } 1384 1385 selftest_running = false; 1386 return 0; 1387 } 1388 1389 static int __init arm_lpae_do_selftests(void) 1390 { 1391 static const unsigned long pgsize[] __initconst = { 1392 SZ_4K | SZ_2M | SZ_1G, 1393 SZ_16K | SZ_32M, 1394 SZ_64K | SZ_512M, 1395 }; 1396 1397 static const unsigned int ias[] __initconst = { 1398 32, 36, 40, 42, 44, 48, 1399 }; 1400 1401 int i, j, pass = 0, fail = 0; 1402 struct device dev; 1403 struct io_pgtable_cfg cfg = { 1404 .tlb = &dummy_tlb_ops, 1405 .oas = 48, 1406 .coherent_walk = true, 1407 .iommu_dev = &dev, 1408 }; 1409 1410 /* __arm_lpae_alloc_pages() merely needs dev_to_node() to work */ 1411 set_dev_node(&dev, NUMA_NO_NODE); 1412 1413 for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { 1414 for (j = 0; j < ARRAY_SIZE(ias); ++j) { 1415 cfg.pgsize_bitmap = pgsize[i]; 1416 cfg.ias = ias[j]; 1417 pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n", 1418 pgsize[i], ias[j]); 1419 if (arm_lpae_run_tests(&cfg)) 1420 fail++; 1421 else 1422 pass++; 1423 } 1424 } 1425 1426 pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail); 1427 return fail ? -EFAULT : 0; 1428 } 1429 subsys_initcall(arm_lpae_do_selftests); 1430 #endif 1431