1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * CPU-agnostic ARM page table allocator. 4 * 5 * Copyright (C) 2014 ARM Limited 6 * 7 * Author: Will Deacon <will.deacon@arm.com> 8 */ 9 10 #define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt 11 12 #include <linux/atomic.h> 13 #include <linux/bitops.h> 14 #include <linux/io-pgtable.h> 15 #include <linux/kernel.h> 16 #include <linux/sizes.h> 17 #include <linux/slab.h> 18 #include <linux/types.h> 19 #include <linux/dma-mapping.h> 20 21 #include <asm/barrier.h> 22 23 #include "io-pgtable-arm.h" 24 #include "iommu-pages.h" 25 26 #define ARM_LPAE_MAX_ADDR_BITS 52 27 #define ARM_LPAE_S2_MAX_CONCAT_PAGES 16 28 #define ARM_LPAE_MAX_LEVELS 4 29 30 /* Struct accessors */ 31 #define io_pgtable_to_data(x) \ 32 container_of((x), struct arm_lpae_io_pgtable, iop) 33 34 #define io_pgtable_ops_to_data(x) \ 35 io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) 36 37 /* 38 * Calculate the right shift amount to get to the portion describing level l 39 * in a virtual address mapped by the pagetable in d. 40 */ 41 #define ARM_LPAE_LVL_SHIFT(l,d) \ 42 (((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) + \ 43 ilog2(sizeof(arm_lpae_iopte))) 44 45 #define ARM_LPAE_GRANULE(d) \ 46 (sizeof(arm_lpae_iopte) << (d)->bits_per_level) 47 #define ARM_LPAE_PGD_SIZE(d) \ 48 (sizeof(arm_lpae_iopte) << (d)->pgd_bits) 49 50 #define ARM_LPAE_PTES_PER_TABLE(d) \ 51 (ARM_LPAE_GRANULE(d) >> ilog2(sizeof(arm_lpae_iopte))) 52 53 /* 54 * Calculate the index at level l used to map virtual address a using the 55 * pagetable in d. 56 */ 57 #define ARM_LPAE_PGD_IDX(l,d) \ 58 ((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0) 59 60 #define ARM_LPAE_LVL_IDX(a,l,d) \ 61 (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \ 62 ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1)) 63 64 /* Calculate the block/page mapping size at level l for pagetable in d. */ 65 #define ARM_LPAE_BLOCK_SIZE(l,d) (1ULL << ARM_LPAE_LVL_SHIFT(l,d)) 66 67 /* Page table bits */ 68 #define ARM_LPAE_PTE_TYPE_SHIFT 0 69 #define ARM_LPAE_PTE_TYPE_MASK 0x3 70 71 #define ARM_LPAE_PTE_TYPE_BLOCK 1 72 #define ARM_LPAE_PTE_TYPE_TABLE 3 73 #define ARM_LPAE_PTE_TYPE_PAGE 3 74 75 #define ARM_LPAE_PTE_ADDR_MASK GENMASK_ULL(47,12) 76 77 #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) 78 #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) 79 #define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) 80 #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) 81 #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) 82 #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) 83 #define ARM_LPAE_PTE_SH_IS (((arm_lpae_iopte)3) << 8) 84 #define ARM_LPAE_PTE_NS (((arm_lpae_iopte)1) << 5) 85 #define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0) 86 87 #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) 88 /* Ignore the contiguous bit for block splitting */ 89 #define ARM_LPAE_PTE_ATTR_HI_MASK (ARM_LPAE_PTE_XN | ARM_LPAE_PTE_DBM) 90 #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ 91 ARM_LPAE_PTE_ATTR_HI_MASK) 92 /* Software bit for solving coherency races */ 93 #define ARM_LPAE_PTE_SW_SYNC (((arm_lpae_iopte)1) << 55) 94 95 /* Stage-1 PTE */ 96 #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) 97 #define ARM_LPAE_PTE_AP_RDONLY_BIT 7 98 #define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)1) << \ 99 ARM_LPAE_PTE_AP_RDONLY_BIT) 100 #define ARM_LPAE_PTE_AP_WR_CLEAN_MASK (ARM_LPAE_PTE_AP_RDONLY | \ 101 ARM_LPAE_PTE_DBM) 102 #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 103 #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) 104 105 /* Stage-2 PTE */ 106 #define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6) 107 #define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6) 108 #define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6) 109 #define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2) 110 #define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2) 111 #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) 112 113 /* Register bits */ 114 #define ARM_LPAE_VTCR_SL0_MASK 0x3 115 116 #define ARM_LPAE_TCR_T0SZ_SHIFT 0 117 118 #define ARM_LPAE_VTCR_PS_SHIFT 16 119 #define ARM_LPAE_VTCR_PS_MASK 0x7 120 121 #define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3) 122 #define ARM_LPAE_MAIR_ATTR_MASK 0xff 123 #define ARM_LPAE_MAIR_ATTR_DEVICE 0x04 124 #define ARM_LPAE_MAIR_ATTR_NC 0x44 125 #define ARM_LPAE_MAIR_ATTR_INC_OWBRWA 0xf4 126 #define ARM_LPAE_MAIR_ATTR_WBRWA 0xff 127 #define ARM_LPAE_MAIR_ATTR_IDX_NC 0 128 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1 129 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2 130 #define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE 3 131 132 #define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0) 133 #define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2) 134 #define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4) 135 136 #define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL 137 #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL 138 139 /* IOPTE accessors */ 140 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d)) 141 142 #define iopte_type(pte) \ 143 (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK) 144 145 #define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK) 146 147 #define iopte_writeable_dirty(pte) \ 148 (((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM) 149 150 #define iopte_set_writeable_clean(ptep) \ 151 set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)(ptep)) 152 153 struct arm_lpae_io_pgtable { 154 struct io_pgtable iop; 155 156 int pgd_bits; 157 int start_level; 158 int bits_per_level; 159 160 void *pgd; 161 }; 162 163 typedef u64 arm_lpae_iopte; 164 165 static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl, 166 enum io_pgtable_fmt fmt) 167 { 168 if (lvl == (ARM_LPAE_MAX_LEVELS - 1) && fmt != ARM_MALI_LPAE) 169 return iopte_type(pte) == ARM_LPAE_PTE_TYPE_PAGE; 170 171 return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK; 172 } 173 174 static inline bool iopte_table(arm_lpae_iopte pte, int lvl) 175 { 176 if (lvl == (ARM_LPAE_MAX_LEVELS - 1)) 177 return false; 178 return iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE; 179 } 180 181 static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr, 182 struct arm_lpae_io_pgtable *data) 183 { 184 arm_lpae_iopte pte = paddr; 185 186 /* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */ 187 return (pte | (pte >> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK; 188 } 189 190 static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte, 191 struct arm_lpae_io_pgtable *data) 192 { 193 u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK; 194 195 if (ARM_LPAE_GRANULE(data) < SZ_64K) 196 return paddr; 197 198 /* Rotate the packed high-order bits back to the top */ 199 return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4); 200 } 201 202 static bool selftest_running = false; 203 204 static dma_addr_t __arm_lpae_dma_addr(void *pages) 205 { 206 return (dma_addr_t)virt_to_phys(pages); 207 } 208 209 static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, 210 struct io_pgtable_cfg *cfg, 211 void *cookie) 212 { 213 struct device *dev = cfg->iommu_dev; 214 int order = get_order(size); 215 dma_addr_t dma; 216 void *pages; 217 218 VM_BUG_ON((gfp & __GFP_HIGHMEM)); 219 220 if (cfg->alloc) 221 pages = cfg->alloc(cookie, size, gfp); 222 else 223 pages = iommu_alloc_pages_node(dev_to_node(dev), gfp, order); 224 225 if (!pages) 226 return NULL; 227 228 if (!cfg->coherent_walk) { 229 dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); 230 if (dma_mapping_error(dev, dma)) 231 goto out_free; 232 /* 233 * We depend on the IOMMU being able to work with any physical 234 * address directly, so if the DMA layer suggests otherwise by 235 * translating or truncating them, that bodes very badly... 236 */ 237 if (dma != virt_to_phys(pages)) 238 goto out_unmap; 239 } 240 241 return pages; 242 243 out_unmap: 244 dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n"); 245 dma_unmap_single(dev, dma, size, DMA_TO_DEVICE); 246 247 out_free: 248 if (cfg->free) 249 cfg->free(cookie, pages, size); 250 else 251 iommu_free_pages(pages, order); 252 253 return NULL; 254 } 255 256 static void __arm_lpae_free_pages(void *pages, size_t size, 257 struct io_pgtable_cfg *cfg, 258 void *cookie) 259 { 260 if (!cfg->coherent_walk) 261 dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), 262 size, DMA_TO_DEVICE); 263 264 if (cfg->free) 265 cfg->free(cookie, pages, size); 266 else 267 iommu_free_pages(pages, get_order(size)); 268 } 269 270 static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries, 271 struct io_pgtable_cfg *cfg) 272 { 273 dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep), 274 sizeof(*ptep) * num_entries, DMA_TO_DEVICE); 275 } 276 277 static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg) 278 { 279 280 *ptep = 0; 281 282 if (!cfg->coherent_walk) 283 __arm_lpae_sync_pte(ptep, 1, cfg); 284 } 285 286 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, 287 struct iommu_iotlb_gather *gather, 288 unsigned long iova, size_t size, size_t pgcount, 289 int lvl, arm_lpae_iopte *ptep); 290 291 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, 292 phys_addr_t paddr, arm_lpae_iopte prot, 293 int lvl, int num_entries, arm_lpae_iopte *ptep) 294 { 295 arm_lpae_iopte pte = prot; 296 struct io_pgtable_cfg *cfg = &data->iop.cfg; 297 size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data); 298 int i; 299 300 if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1) 301 pte |= ARM_LPAE_PTE_TYPE_PAGE; 302 else 303 pte |= ARM_LPAE_PTE_TYPE_BLOCK; 304 305 for (i = 0; i < num_entries; i++) 306 ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data); 307 308 if (!cfg->coherent_walk) 309 __arm_lpae_sync_pte(ptep, num_entries, cfg); 310 } 311 312 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, 313 unsigned long iova, phys_addr_t paddr, 314 arm_lpae_iopte prot, int lvl, int num_entries, 315 arm_lpae_iopte *ptep) 316 { 317 int i; 318 319 for (i = 0; i < num_entries; i++) 320 if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) { 321 /* We require an unmap first */ 322 WARN_ON(!selftest_running); 323 return -EEXIST; 324 } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) { 325 /* 326 * We need to unmap and free the old table before 327 * overwriting it with a block entry. 328 */ 329 arm_lpae_iopte *tblp; 330 size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data); 331 332 tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data); 333 if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1, 334 lvl, tblp) != sz) { 335 WARN_ON(1); 336 return -EINVAL; 337 } 338 } 339 340 __arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep); 341 return 0; 342 } 343 344 static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, 345 arm_lpae_iopte *ptep, 346 arm_lpae_iopte curr, 347 struct arm_lpae_io_pgtable *data) 348 { 349 arm_lpae_iopte old, new; 350 struct io_pgtable_cfg *cfg = &data->iop.cfg; 351 352 new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE; 353 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) 354 new |= ARM_LPAE_PTE_NSTABLE; 355 356 /* 357 * Ensure the table itself is visible before its PTE can be. 358 * Whilst we could get away with cmpxchg64_release below, this 359 * doesn't have any ordering semantics when !CONFIG_SMP. 360 */ 361 dma_wmb(); 362 363 old = cmpxchg64_relaxed(ptep, curr, new); 364 365 if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC)) 366 return old; 367 368 /* Even if it's not ours, there's no point waiting; just kick it */ 369 __arm_lpae_sync_pte(ptep, 1, cfg); 370 if (old == curr) 371 WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC); 372 373 return old; 374 } 375 376 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, 377 phys_addr_t paddr, size_t size, size_t pgcount, 378 arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep, 379 gfp_t gfp, size_t *mapped) 380 { 381 arm_lpae_iopte *cptep, pte; 382 size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); 383 size_t tblsz = ARM_LPAE_GRANULE(data); 384 struct io_pgtable_cfg *cfg = &data->iop.cfg; 385 int ret = 0, num_entries, max_entries, map_idx_start; 386 387 /* Find our entry at the current level */ 388 map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data); 389 ptep += map_idx_start; 390 391 /* If we can install a leaf entry at this level, then do so */ 392 if (size == block_size) { 393 max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start; 394 num_entries = min_t(int, pgcount, max_entries); 395 ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep); 396 if (!ret) 397 *mapped += num_entries * size; 398 399 return ret; 400 } 401 402 /* We can't allocate tables at the final level */ 403 if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1)) 404 return -EINVAL; 405 406 /* Grab a pointer to the next level */ 407 pte = READ_ONCE(*ptep); 408 if (!pte) { 409 cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg, data->iop.cookie); 410 if (!cptep) 411 return -ENOMEM; 412 413 pte = arm_lpae_install_table(cptep, ptep, 0, data); 414 if (pte) 415 __arm_lpae_free_pages(cptep, tblsz, cfg, data->iop.cookie); 416 } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) { 417 __arm_lpae_sync_pte(ptep, 1, cfg); 418 } 419 420 if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) { 421 cptep = iopte_deref(pte, data); 422 } else if (pte) { 423 /* We require an unmap first */ 424 WARN_ON(!selftest_running); 425 return -EEXIST; 426 } 427 428 /* Rinse, repeat */ 429 return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1, 430 cptep, gfp, mapped); 431 } 432 433 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, 434 int prot) 435 { 436 arm_lpae_iopte pte; 437 438 if (data->iop.fmt == ARM_64_LPAE_S1 || 439 data->iop.fmt == ARM_32_LPAE_S1) { 440 pte = ARM_LPAE_PTE_nG; 441 if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) 442 pte |= ARM_LPAE_PTE_AP_RDONLY; 443 else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) 444 pte |= ARM_LPAE_PTE_DBM; 445 if (!(prot & IOMMU_PRIV)) 446 pte |= ARM_LPAE_PTE_AP_UNPRIV; 447 } else { 448 pte = ARM_LPAE_PTE_HAP_FAULT; 449 if (prot & IOMMU_READ) 450 pte |= ARM_LPAE_PTE_HAP_READ; 451 if (prot & IOMMU_WRITE) 452 pte |= ARM_LPAE_PTE_HAP_WRITE; 453 } 454 455 /* 456 * Note that this logic is structured to accommodate Mali LPAE 457 * having stage-1-like attributes but stage-2-like permissions. 458 */ 459 if (data->iop.fmt == ARM_64_LPAE_S2 || 460 data->iop.fmt == ARM_32_LPAE_S2) { 461 if (prot & IOMMU_MMIO) 462 pte |= ARM_LPAE_PTE_MEMATTR_DEV; 463 else if (prot & IOMMU_CACHE) 464 pte |= ARM_LPAE_PTE_MEMATTR_OIWB; 465 else 466 pte |= ARM_LPAE_PTE_MEMATTR_NC; 467 } else { 468 if (prot & IOMMU_MMIO) 469 pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV 470 << ARM_LPAE_PTE_ATTRINDX_SHIFT); 471 else if (prot & IOMMU_CACHE) 472 pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE 473 << ARM_LPAE_PTE_ATTRINDX_SHIFT); 474 } 475 476 /* 477 * Also Mali has its own notions of shareability wherein its Inner 478 * domain covers the cores within the GPU, and its Outer domain is 479 * "outside the GPU" (i.e. either the Inner or System domain in CPU 480 * terms, depending on coherency). 481 */ 482 if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE) 483 pte |= ARM_LPAE_PTE_SH_IS; 484 else 485 pte |= ARM_LPAE_PTE_SH_OS; 486 487 if (prot & IOMMU_NOEXEC) 488 pte |= ARM_LPAE_PTE_XN; 489 490 if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) 491 pte |= ARM_LPAE_PTE_NS; 492 493 if (data->iop.fmt != ARM_MALI_LPAE) 494 pte |= ARM_LPAE_PTE_AF; 495 496 return pte; 497 } 498 499 static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova, 500 phys_addr_t paddr, size_t pgsize, size_t pgcount, 501 int iommu_prot, gfp_t gfp, size_t *mapped) 502 { 503 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 504 struct io_pgtable_cfg *cfg = &data->iop.cfg; 505 arm_lpae_iopte *ptep = data->pgd; 506 int ret, lvl = data->start_level; 507 arm_lpae_iopte prot; 508 long iaext = (s64)iova >> cfg->ias; 509 510 if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize)) 511 return -EINVAL; 512 513 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) 514 iaext = ~iaext; 515 if (WARN_ON(iaext || paddr >> cfg->oas)) 516 return -ERANGE; 517 518 /* If no access, then nothing to do */ 519 if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) 520 return 0; 521 522 prot = arm_lpae_prot_to_pte(data, iommu_prot); 523 ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl, 524 ptep, gfp, mapped); 525 /* 526 * Synchronise all PTE updates for the new mapping before there's 527 * a chance for anything to kick off a table walk for the new iova. 528 */ 529 wmb(); 530 531 return ret; 532 } 533 534 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, 535 arm_lpae_iopte *ptep) 536 { 537 arm_lpae_iopte *start, *end; 538 unsigned long table_size; 539 540 if (lvl == data->start_level) 541 table_size = ARM_LPAE_PGD_SIZE(data); 542 else 543 table_size = ARM_LPAE_GRANULE(data); 544 545 start = ptep; 546 547 /* Only leaf entries at the last level */ 548 if (lvl == ARM_LPAE_MAX_LEVELS - 1) 549 end = ptep; 550 else 551 end = (void *)ptep + table_size; 552 553 while (ptep != end) { 554 arm_lpae_iopte pte = *ptep++; 555 556 if (!pte || iopte_leaf(pte, lvl, data->iop.fmt)) 557 continue; 558 559 __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); 560 } 561 562 __arm_lpae_free_pages(start, table_size, &data->iop.cfg, data->iop.cookie); 563 } 564 565 static void arm_lpae_free_pgtable(struct io_pgtable *iop) 566 { 567 struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop); 568 569 __arm_lpae_free_pgtable(data, data->start_level, data->pgd); 570 kfree(data); 571 } 572 573 static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, 574 struct iommu_iotlb_gather *gather, 575 unsigned long iova, size_t size, 576 arm_lpae_iopte blk_pte, int lvl, 577 arm_lpae_iopte *ptep, size_t pgcount) 578 { 579 struct io_pgtable_cfg *cfg = &data->iop.cfg; 580 arm_lpae_iopte pte, *tablep; 581 phys_addr_t blk_paddr; 582 size_t tablesz = ARM_LPAE_GRANULE(data); 583 size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data); 584 int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data); 585 int i, unmap_idx_start = -1, num_entries = 0, max_entries; 586 587 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) 588 return 0; 589 590 tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg, data->iop.cookie); 591 if (!tablep) 592 return 0; /* Bytes unmapped */ 593 594 if (size == split_sz) { 595 unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data); 596 max_entries = ptes_per_table - unmap_idx_start; 597 num_entries = min_t(int, pgcount, max_entries); 598 } 599 600 blk_paddr = iopte_to_paddr(blk_pte, data); 601 pte = iopte_prot(blk_pte); 602 603 for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) { 604 /* Unmap! */ 605 if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries)) 606 continue; 607 608 __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]); 609 } 610 611 pte = arm_lpae_install_table(tablep, ptep, blk_pte, data); 612 if (pte != blk_pte) { 613 __arm_lpae_free_pages(tablep, tablesz, cfg, data->iop.cookie); 614 /* 615 * We may race against someone unmapping another part of this 616 * block, but anything else is invalid. We can't misinterpret 617 * a page entry here since we're never at the last level. 618 */ 619 if (iopte_type(pte) != ARM_LPAE_PTE_TYPE_TABLE) 620 return 0; 621 622 tablep = iopte_deref(pte, data); 623 } else if (unmap_idx_start >= 0) { 624 for (i = 0; i < num_entries; i++) 625 io_pgtable_tlb_add_page(&data->iop, gather, iova + i * size, size); 626 627 return num_entries * size; 628 } 629 630 return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep); 631 } 632 633 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, 634 struct iommu_iotlb_gather *gather, 635 unsigned long iova, size_t size, size_t pgcount, 636 int lvl, arm_lpae_iopte *ptep) 637 { 638 arm_lpae_iopte pte; 639 struct io_pgtable *iop = &data->iop; 640 int i = 0, num_entries, max_entries, unmap_idx_start; 641 642 /* Something went horribly wrong and we ran out of page table */ 643 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) 644 return 0; 645 646 unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data); 647 ptep += unmap_idx_start; 648 pte = READ_ONCE(*ptep); 649 if (WARN_ON(!pte)) 650 return 0; 651 652 /* If the size matches this level, we're in the right place */ 653 if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { 654 max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start; 655 num_entries = min_t(int, pgcount, max_entries); 656 657 while (i < num_entries) { 658 pte = READ_ONCE(*ptep); 659 if (WARN_ON(!pte)) 660 break; 661 662 __arm_lpae_clear_pte(ptep, &iop->cfg); 663 664 if (!iopte_leaf(pte, lvl, iop->fmt)) { 665 /* Also flush any partial walks */ 666 io_pgtable_tlb_flush_walk(iop, iova + i * size, size, 667 ARM_LPAE_GRANULE(data)); 668 __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); 669 } else if (!iommu_iotlb_gather_queued(gather)) { 670 io_pgtable_tlb_add_page(iop, gather, iova + i * size, size); 671 } 672 673 ptep++; 674 i++; 675 } 676 677 return i * size; 678 } else if (iopte_leaf(pte, lvl, iop->fmt)) { 679 /* 680 * Insert a table at the next level to map the old region, 681 * minus the part we want to unmap 682 */ 683 return arm_lpae_split_blk_unmap(data, gather, iova, size, pte, 684 lvl + 1, ptep, pgcount); 685 } 686 687 /* Keep on walkin' */ 688 ptep = iopte_deref(pte, data); 689 return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl + 1, ptep); 690 } 691 692 static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova, 693 size_t pgsize, size_t pgcount, 694 struct iommu_iotlb_gather *gather) 695 { 696 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 697 struct io_pgtable_cfg *cfg = &data->iop.cfg; 698 arm_lpae_iopte *ptep = data->pgd; 699 long iaext = (s64)iova >> cfg->ias; 700 701 if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount)) 702 return 0; 703 704 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) 705 iaext = ~iaext; 706 if (WARN_ON(iaext)) 707 return 0; 708 709 return __arm_lpae_unmap(data, gather, iova, pgsize, pgcount, 710 data->start_level, ptep); 711 } 712 713 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, 714 unsigned long iova) 715 { 716 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 717 arm_lpae_iopte pte, *ptep = data->pgd; 718 int lvl = data->start_level; 719 720 do { 721 /* Valid IOPTE pointer? */ 722 if (!ptep) 723 return 0; 724 725 /* Grab the IOPTE we're interested in */ 726 ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); 727 pte = READ_ONCE(*ptep); 728 729 /* Valid entry? */ 730 if (!pte) 731 return 0; 732 733 /* Leaf entry? */ 734 if (iopte_leaf(pte, lvl, data->iop.fmt)) 735 goto found_translation; 736 737 /* Take it to the next level */ 738 ptep = iopte_deref(pte, data); 739 } while (++lvl < ARM_LPAE_MAX_LEVELS); 740 741 /* Ran out of page tables to walk */ 742 return 0; 743 744 found_translation: 745 iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1); 746 return iopte_to_paddr(pte, data) | iova; 747 } 748 749 struct io_pgtable_walk_data { 750 struct iommu_dirty_bitmap *dirty; 751 unsigned long flags; 752 u64 addr; 753 const u64 end; 754 }; 755 756 static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, 757 struct io_pgtable_walk_data *walk_data, 758 arm_lpae_iopte *ptep, 759 int lvl); 760 761 static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data, 762 struct io_pgtable_walk_data *walk_data, 763 arm_lpae_iopte *ptep, int lvl) 764 { 765 struct io_pgtable *iop = &data->iop; 766 arm_lpae_iopte pte = READ_ONCE(*ptep); 767 768 if (iopte_leaf(pte, lvl, iop->fmt)) { 769 size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data); 770 771 if (iopte_writeable_dirty(pte)) { 772 iommu_dirty_bitmap_record(walk_data->dirty, 773 walk_data->addr, size); 774 if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR)) 775 iopte_set_writeable_clean(ptep); 776 } 777 walk_data->addr += size; 778 return 0; 779 } 780 781 if (WARN_ON(!iopte_table(pte, lvl))) 782 return -EINVAL; 783 784 ptep = iopte_deref(pte, data); 785 return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1); 786 } 787 788 static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, 789 struct io_pgtable_walk_data *walk_data, 790 arm_lpae_iopte *ptep, 791 int lvl) 792 { 793 u32 idx; 794 int max_entries, ret; 795 796 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) 797 return -EINVAL; 798 799 if (lvl == data->start_level) 800 max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); 801 else 802 max_entries = ARM_LPAE_PTES_PER_TABLE(data); 803 804 for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data); 805 (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) { 806 ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl); 807 if (ret) 808 return ret; 809 } 810 811 return 0; 812 } 813 814 static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, 815 unsigned long iova, size_t size, 816 unsigned long flags, 817 struct iommu_dirty_bitmap *dirty) 818 { 819 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 820 struct io_pgtable_cfg *cfg = &data->iop.cfg; 821 struct io_pgtable_walk_data walk_data = { 822 .dirty = dirty, 823 .flags = flags, 824 .addr = iova, 825 .end = iova + size, 826 }; 827 arm_lpae_iopte *ptep = data->pgd; 828 int lvl = data->start_level; 829 830 if (WARN_ON(!size)) 831 return -EINVAL; 832 if (WARN_ON((iova + size - 1) & ~(BIT(cfg->ias) - 1))) 833 return -EINVAL; 834 if (data->iop.fmt != ARM_64_LPAE_S1) 835 return -EINVAL; 836 837 return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl); 838 } 839 840 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) 841 { 842 unsigned long granule, page_sizes; 843 unsigned int max_addr_bits = 48; 844 845 /* 846 * We need to restrict the supported page sizes to match the 847 * translation regime for a particular granule. Aim to match 848 * the CPU page size if possible, otherwise prefer smaller sizes. 849 * While we're at it, restrict the block sizes to match the 850 * chosen granule. 851 */ 852 if (cfg->pgsize_bitmap & PAGE_SIZE) 853 granule = PAGE_SIZE; 854 else if (cfg->pgsize_bitmap & ~PAGE_MASK) 855 granule = 1UL << __fls(cfg->pgsize_bitmap & ~PAGE_MASK); 856 else if (cfg->pgsize_bitmap & PAGE_MASK) 857 granule = 1UL << __ffs(cfg->pgsize_bitmap & PAGE_MASK); 858 else 859 granule = 0; 860 861 switch (granule) { 862 case SZ_4K: 863 page_sizes = (SZ_4K | SZ_2M | SZ_1G); 864 break; 865 case SZ_16K: 866 page_sizes = (SZ_16K | SZ_32M); 867 break; 868 case SZ_64K: 869 max_addr_bits = 52; 870 page_sizes = (SZ_64K | SZ_512M); 871 if (cfg->oas > 48) 872 page_sizes |= 1ULL << 42; /* 4TB */ 873 break; 874 default: 875 page_sizes = 0; 876 } 877 878 cfg->pgsize_bitmap &= page_sizes; 879 cfg->ias = min(cfg->ias, max_addr_bits); 880 cfg->oas = min(cfg->oas, max_addr_bits); 881 } 882 883 static struct arm_lpae_io_pgtable * 884 arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) 885 { 886 struct arm_lpae_io_pgtable *data; 887 int levels, va_bits, pg_shift; 888 889 arm_lpae_restrict_pgsizes(cfg); 890 891 if (!(cfg->pgsize_bitmap & (SZ_4K | SZ_16K | SZ_64K))) 892 return NULL; 893 894 if (cfg->ias > ARM_LPAE_MAX_ADDR_BITS) 895 return NULL; 896 897 if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS) 898 return NULL; 899 900 data = kmalloc(sizeof(*data), GFP_KERNEL); 901 if (!data) 902 return NULL; 903 904 pg_shift = __ffs(cfg->pgsize_bitmap); 905 data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte)); 906 907 va_bits = cfg->ias - pg_shift; 908 levels = DIV_ROUND_UP(va_bits, data->bits_per_level); 909 data->start_level = ARM_LPAE_MAX_LEVELS - levels; 910 911 /* Calculate the actual size of our pgd (without concatenation) */ 912 data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1)); 913 914 data->iop.ops = (struct io_pgtable_ops) { 915 .map_pages = arm_lpae_map_pages, 916 .unmap_pages = arm_lpae_unmap_pages, 917 .iova_to_phys = arm_lpae_iova_to_phys, 918 .read_and_clear_dirty = arm_lpae_read_and_clear_dirty, 919 }; 920 921 return data; 922 } 923 924 static struct io_pgtable * 925 arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) 926 { 927 u64 reg; 928 struct arm_lpae_io_pgtable *data; 929 typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr; 930 bool tg1; 931 932 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | 933 IO_PGTABLE_QUIRK_ARM_TTBR1 | 934 IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | 935 IO_PGTABLE_QUIRK_ARM_HD)) 936 return NULL; 937 938 data = arm_lpae_alloc_pgtable(cfg); 939 if (!data) 940 return NULL; 941 942 /* TCR */ 943 if (cfg->coherent_walk) { 944 tcr->sh = ARM_LPAE_TCR_SH_IS; 945 tcr->irgn = ARM_LPAE_TCR_RGN_WBWA; 946 tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; 947 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA) 948 goto out_free_data; 949 } else { 950 tcr->sh = ARM_LPAE_TCR_SH_OS; 951 tcr->irgn = ARM_LPAE_TCR_RGN_NC; 952 if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) 953 tcr->orgn = ARM_LPAE_TCR_RGN_NC; 954 else 955 tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; 956 } 957 958 tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1; 959 switch (ARM_LPAE_GRANULE(data)) { 960 case SZ_4K: 961 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K; 962 break; 963 case SZ_16K: 964 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K; 965 break; 966 case SZ_64K: 967 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K; 968 break; 969 } 970 971 switch (cfg->oas) { 972 case 32: 973 tcr->ips = ARM_LPAE_TCR_PS_32_BIT; 974 break; 975 case 36: 976 tcr->ips = ARM_LPAE_TCR_PS_36_BIT; 977 break; 978 case 40: 979 tcr->ips = ARM_LPAE_TCR_PS_40_BIT; 980 break; 981 case 42: 982 tcr->ips = ARM_LPAE_TCR_PS_42_BIT; 983 break; 984 case 44: 985 tcr->ips = ARM_LPAE_TCR_PS_44_BIT; 986 break; 987 case 48: 988 tcr->ips = ARM_LPAE_TCR_PS_48_BIT; 989 break; 990 case 52: 991 tcr->ips = ARM_LPAE_TCR_PS_52_BIT; 992 break; 993 default: 994 goto out_free_data; 995 } 996 997 tcr->tsz = 64ULL - cfg->ias; 998 999 /* MAIRs */ 1000 reg = (ARM_LPAE_MAIR_ATTR_NC 1001 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) | 1002 (ARM_LPAE_MAIR_ATTR_WBRWA 1003 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | 1004 (ARM_LPAE_MAIR_ATTR_DEVICE 1005 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) | 1006 (ARM_LPAE_MAIR_ATTR_INC_OWBRWA 1007 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE)); 1008 1009 cfg->arm_lpae_s1_cfg.mair = reg; 1010 1011 /* Looking good; allocate a pgd */ 1012 data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), 1013 GFP_KERNEL, cfg, cookie); 1014 if (!data->pgd) 1015 goto out_free_data; 1016 1017 /* Ensure the empty pgd is visible before any actual TTBR write */ 1018 wmb(); 1019 1020 /* TTBR */ 1021 cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd); 1022 return &data->iop; 1023 1024 out_free_data: 1025 kfree(data); 1026 return NULL; 1027 } 1028 1029 static struct io_pgtable * 1030 arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) 1031 { 1032 u64 sl; 1033 struct arm_lpae_io_pgtable *data; 1034 typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr; 1035 1036 /* The NS quirk doesn't apply at stage 2 */ 1037 if (cfg->quirks) 1038 return NULL; 1039 1040 data = arm_lpae_alloc_pgtable(cfg); 1041 if (!data) 1042 return NULL; 1043 1044 /* 1045 * Concatenate PGDs at level 1 if possible in order to reduce 1046 * the depth of the stage-2 walk. 1047 */ 1048 if (data->start_level == 0) { 1049 unsigned long pgd_pages; 1050 1051 pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); 1052 if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) { 1053 data->pgd_bits += data->bits_per_level; 1054 data->start_level++; 1055 } 1056 } 1057 1058 /* VTCR */ 1059 if (cfg->coherent_walk) { 1060 vtcr->sh = ARM_LPAE_TCR_SH_IS; 1061 vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA; 1062 vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA; 1063 } else { 1064 vtcr->sh = ARM_LPAE_TCR_SH_OS; 1065 vtcr->irgn = ARM_LPAE_TCR_RGN_NC; 1066 vtcr->orgn = ARM_LPAE_TCR_RGN_NC; 1067 } 1068 1069 sl = data->start_level; 1070 1071 switch (ARM_LPAE_GRANULE(data)) { 1072 case SZ_4K: 1073 vtcr->tg = ARM_LPAE_TCR_TG0_4K; 1074 sl++; /* SL0 format is different for 4K granule size */ 1075 break; 1076 case SZ_16K: 1077 vtcr->tg = ARM_LPAE_TCR_TG0_16K; 1078 break; 1079 case SZ_64K: 1080 vtcr->tg = ARM_LPAE_TCR_TG0_64K; 1081 break; 1082 } 1083 1084 switch (cfg->oas) { 1085 case 32: 1086 vtcr->ps = ARM_LPAE_TCR_PS_32_BIT; 1087 break; 1088 case 36: 1089 vtcr->ps = ARM_LPAE_TCR_PS_36_BIT; 1090 break; 1091 case 40: 1092 vtcr->ps = ARM_LPAE_TCR_PS_40_BIT; 1093 break; 1094 case 42: 1095 vtcr->ps = ARM_LPAE_TCR_PS_42_BIT; 1096 break; 1097 case 44: 1098 vtcr->ps = ARM_LPAE_TCR_PS_44_BIT; 1099 break; 1100 case 48: 1101 vtcr->ps = ARM_LPAE_TCR_PS_48_BIT; 1102 break; 1103 case 52: 1104 vtcr->ps = ARM_LPAE_TCR_PS_52_BIT; 1105 break; 1106 default: 1107 goto out_free_data; 1108 } 1109 1110 vtcr->tsz = 64ULL - cfg->ias; 1111 vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK; 1112 1113 /* Allocate pgd pages */ 1114 data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), 1115 GFP_KERNEL, cfg, cookie); 1116 if (!data->pgd) 1117 goto out_free_data; 1118 1119 /* Ensure the empty pgd is visible before any actual TTBR write */ 1120 wmb(); 1121 1122 /* VTTBR */ 1123 cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd); 1124 return &data->iop; 1125 1126 out_free_data: 1127 kfree(data); 1128 return NULL; 1129 } 1130 1131 static struct io_pgtable * 1132 arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) 1133 { 1134 if (cfg->ias > 32 || cfg->oas > 40) 1135 return NULL; 1136 1137 cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); 1138 return arm_64_lpae_alloc_pgtable_s1(cfg, cookie); 1139 } 1140 1141 static struct io_pgtable * 1142 arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) 1143 { 1144 if (cfg->ias > 40 || cfg->oas > 40) 1145 return NULL; 1146 1147 cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); 1148 return arm_64_lpae_alloc_pgtable_s2(cfg, cookie); 1149 } 1150 1151 static struct io_pgtable * 1152 arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) 1153 { 1154 struct arm_lpae_io_pgtable *data; 1155 1156 /* No quirks for Mali (hopefully) */ 1157 if (cfg->quirks) 1158 return NULL; 1159 1160 if (cfg->ias > 48 || cfg->oas > 40) 1161 return NULL; 1162 1163 cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); 1164 1165 data = arm_lpae_alloc_pgtable(cfg); 1166 if (!data) 1167 return NULL; 1168 1169 /* Mali seems to need a full 4-level table regardless of IAS */ 1170 if (data->start_level > 0) { 1171 data->start_level = 0; 1172 data->pgd_bits = 0; 1173 } 1174 /* 1175 * MEMATTR: Mali has no actual notion of a non-cacheable type, so the 1176 * best we can do is mimic the out-of-tree driver and hope that the 1177 * "implementation-defined caching policy" is good enough. Similarly, 1178 * we'll use it for the sake of a valid attribute for our 'device' 1179 * index, although callers should never request that in practice. 1180 */ 1181 cfg->arm_mali_lpae_cfg.memattr = 1182 (ARM_MALI_LPAE_MEMATTR_IMP_DEF 1183 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) | 1184 (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 1185 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | 1186 (ARM_MALI_LPAE_MEMATTR_IMP_DEF 1187 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); 1188 1189 data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL, 1190 cfg, cookie); 1191 if (!data->pgd) 1192 goto out_free_data; 1193 1194 /* Ensure the empty pgd is visible before TRANSTAB can be written */ 1195 wmb(); 1196 1197 cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) | 1198 ARM_MALI_LPAE_TTBR_READ_INNER | 1199 ARM_MALI_LPAE_TTBR_ADRMODE_TABLE; 1200 if (cfg->coherent_walk) 1201 cfg->arm_mali_lpae_cfg.transtab |= ARM_MALI_LPAE_TTBR_SHARE_OUTER; 1202 1203 return &data->iop; 1204 1205 out_free_data: 1206 kfree(data); 1207 return NULL; 1208 } 1209 1210 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { 1211 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1212 .alloc = arm_64_lpae_alloc_pgtable_s1, 1213 .free = arm_lpae_free_pgtable, 1214 }; 1215 1216 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = { 1217 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1218 .alloc = arm_64_lpae_alloc_pgtable_s2, 1219 .free = arm_lpae_free_pgtable, 1220 }; 1221 1222 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = { 1223 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1224 .alloc = arm_32_lpae_alloc_pgtable_s1, 1225 .free = arm_lpae_free_pgtable, 1226 }; 1227 1228 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = { 1229 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1230 .alloc = arm_32_lpae_alloc_pgtable_s2, 1231 .free = arm_lpae_free_pgtable, 1232 }; 1233 1234 struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = { 1235 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1236 .alloc = arm_mali_lpae_alloc_pgtable, 1237 .free = arm_lpae_free_pgtable, 1238 }; 1239 1240 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST 1241 1242 static struct io_pgtable_cfg *cfg_cookie __initdata; 1243 1244 static void __init dummy_tlb_flush_all(void *cookie) 1245 { 1246 WARN_ON(cookie != cfg_cookie); 1247 } 1248 1249 static void __init dummy_tlb_flush(unsigned long iova, size_t size, 1250 size_t granule, void *cookie) 1251 { 1252 WARN_ON(cookie != cfg_cookie); 1253 WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); 1254 } 1255 1256 static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather, 1257 unsigned long iova, size_t granule, 1258 void *cookie) 1259 { 1260 dummy_tlb_flush(iova, granule, granule, cookie); 1261 } 1262 1263 static const struct iommu_flush_ops dummy_tlb_ops __initconst = { 1264 .tlb_flush_all = dummy_tlb_flush_all, 1265 .tlb_flush_walk = dummy_tlb_flush, 1266 .tlb_add_page = dummy_tlb_add_page, 1267 }; 1268 1269 static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) 1270 { 1271 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 1272 struct io_pgtable_cfg *cfg = &data->iop.cfg; 1273 1274 pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n", 1275 cfg->pgsize_bitmap, cfg->ias); 1276 pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n", 1277 ARM_LPAE_MAX_LEVELS - data->start_level, ARM_LPAE_PGD_SIZE(data), 1278 ilog2(ARM_LPAE_GRANULE(data)), data->bits_per_level, data->pgd); 1279 } 1280 1281 #define __FAIL(ops, i) ({ \ 1282 WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \ 1283 arm_lpae_dump_ops(ops); \ 1284 selftest_running = false; \ 1285 -EFAULT; \ 1286 }) 1287 1288 static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) 1289 { 1290 static const enum io_pgtable_fmt fmts[] __initconst = { 1291 ARM_64_LPAE_S1, 1292 ARM_64_LPAE_S2, 1293 }; 1294 1295 int i, j; 1296 unsigned long iova; 1297 size_t size, mapped; 1298 struct io_pgtable_ops *ops; 1299 1300 selftest_running = true; 1301 1302 for (i = 0; i < ARRAY_SIZE(fmts); ++i) { 1303 cfg_cookie = cfg; 1304 ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg); 1305 if (!ops) { 1306 pr_err("selftest: failed to allocate io pgtable ops\n"); 1307 return -ENOMEM; 1308 } 1309 1310 /* 1311 * Initial sanity checks. 1312 * Empty page tables shouldn't provide any translations. 1313 */ 1314 if (ops->iova_to_phys(ops, 42)) 1315 return __FAIL(ops, i); 1316 1317 if (ops->iova_to_phys(ops, SZ_1G + 42)) 1318 return __FAIL(ops, i); 1319 1320 if (ops->iova_to_phys(ops, SZ_2G + 42)) 1321 return __FAIL(ops, i); 1322 1323 /* 1324 * Distinct mappings of different granule sizes. 1325 */ 1326 iova = 0; 1327 for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { 1328 size = 1UL << j; 1329 1330 if (ops->map_pages(ops, iova, iova, size, 1, 1331 IOMMU_READ | IOMMU_WRITE | 1332 IOMMU_NOEXEC | IOMMU_CACHE, 1333 GFP_KERNEL, &mapped)) 1334 return __FAIL(ops, i); 1335 1336 /* Overlapping mappings */ 1337 if (!ops->map_pages(ops, iova, iova + size, size, 1, 1338 IOMMU_READ | IOMMU_NOEXEC, 1339 GFP_KERNEL, &mapped)) 1340 return __FAIL(ops, i); 1341 1342 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) 1343 return __FAIL(ops, i); 1344 1345 iova += SZ_1G; 1346 } 1347 1348 /* Partial unmap */ 1349 size = 1UL << __ffs(cfg->pgsize_bitmap); 1350 if (ops->unmap_pages(ops, SZ_1G + size, size, 1, NULL) != size) 1351 return __FAIL(ops, i); 1352 1353 /* Remap of partial unmap */ 1354 if (ops->map_pages(ops, SZ_1G + size, size, size, 1, 1355 IOMMU_READ, GFP_KERNEL, &mapped)) 1356 return __FAIL(ops, i); 1357 1358 if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42)) 1359 return __FAIL(ops, i); 1360 1361 /* Full unmap */ 1362 iova = 0; 1363 for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { 1364 size = 1UL << j; 1365 1366 if (ops->unmap_pages(ops, iova, size, 1, NULL) != size) 1367 return __FAIL(ops, i); 1368 1369 if (ops->iova_to_phys(ops, iova + 42)) 1370 return __FAIL(ops, i); 1371 1372 /* Remap full block */ 1373 if (ops->map_pages(ops, iova, iova, size, 1, 1374 IOMMU_WRITE, GFP_KERNEL, &mapped)) 1375 return __FAIL(ops, i); 1376 1377 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) 1378 return __FAIL(ops, i); 1379 1380 iova += SZ_1G; 1381 } 1382 1383 free_io_pgtable_ops(ops); 1384 } 1385 1386 selftest_running = false; 1387 return 0; 1388 } 1389 1390 static int __init arm_lpae_do_selftests(void) 1391 { 1392 static const unsigned long pgsize[] __initconst = { 1393 SZ_4K | SZ_2M | SZ_1G, 1394 SZ_16K | SZ_32M, 1395 SZ_64K | SZ_512M, 1396 }; 1397 1398 static const unsigned int ias[] __initconst = { 1399 32, 36, 40, 42, 44, 48, 1400 }; 1401 1402 int i, j, pass = 0, fail = 0; 1403 struct device dev; 1404 struct io_pgtable_cfg cfg = { 1405 .tlb = &dummy_tlb_ops, 1406 .oas = 48, 1407 .coherent_walk = true, 1408 .iommu_dev = &dev, 1409 }; 1410 1411 /* __arm_lpae_alloc_pages() merely needs dev_to_node() to work */ 1412 set_dev_node(&dev, NUMA_NO_NODE); 1413 1414 for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { 1415 for (j = 0; j < ARRAY_SIZE(ias); ++j) { 1416 cfg.pgsize_bitmap = pgsize[i]; 1417 cfg.ias = ias[j]; 1418 pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n", 1419 pgsize[i], ias[j]); 1420 if (arm_lpae_run_tests(&cfg)) 1421 fail++; 1422 else 1423 pass++; 1424 } 1425 } 1426 1427 pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail); 1428 return fail ? -EFAULT : 0; 1429 } 1430 subsys_initcall(arm_lpae_do_selftests); 1431 #endif 1432