1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * CPU-agnostic ARM page table allocator. 4 * 5 * Copyright (C) 2014 ARM Limited 6 * 7 * Author: Will Deacon <will.deacon@arm.com> 8 */ 9 10 #define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt 11 12 #include <linux/atomic.h> 13 #include <linux/bitops.h> 14 #include <linux/io-pgtable.h> 15 #include <linux/kernel.h> 16 #include <linux/sizes.h> 17 #include <linux/slab.h> 18 #include <linux/types.h> 19 #include <linux/dma-mapping.h> 20 21 #include <asm/barrier.h> 22 23 #include "io-pgtable-arm.h" 24 #include "iommu-pages.h" 25 26 #define ARM_LPAE_MAX_ADDR_BITS 52 27 #define ARM_LPAE_S2_MAX_CONCAT_PAGES 16 28 #define ARM_LPAE_MAX_LEVELS 4 29 30 /* Struct accessors */ 31 #define io_pgtable_to_data(x) \ 32 container_of((x), struct arm_lpae_io_pgtable, iop) 33 34 #define io_pgtable_ops_to_data(x) \ 35 io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) 36 37 /* 38 * Calculate the right shift amount to get to the portion describing level l 39 * in a virtual address mapped by the pagetable in d. 40 */ 41 #define ARM_LPAE_LVL_SHIFT(l,d) \ 42 (((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) + \ 43 ilog2(sizeof(arm_lpae_iopte))) 44 45 #define ARM_LPAE_GRANULE(d) \ 46 (sizeof(arm_lpae_iopte) << (d)->bits_per_level) 47 #define ARM_LPAE_PGD_SIZE(d) \ 48 (sizeof(arm_lpae_iopte) << (d)->pgd_bits) 49 50 #define ARM_LPAE_PTES_PER_TABLE(d) \ 51 (ARM_LPAE_GRANULE(d) >> ilog2(sizeof(arm_lpae_iopte))) 52 53 /* 54 * Calculate the index at level l used to map virtual address a using the 55 * pagetable in d. 56 */ 57 #define ARM_LPAE_PGD_IDX(l,d) \ 58 ((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0) 59 60 #define ARM_LPAE_LVL_IDX(a,l,d) \ 61 (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \ 62 ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1)) 63 64 /* Calculate the block/page mapping size at level l for pagetable in d. */ 65 #define ARM_LPAE_BLOCK_SIZE(l,d) (1ULL << ARM_LPAE_LVL_SHIFT(l,d)) 66 67 /* Page table bits */ 68 #define ARM_LPAE_PTE_TYPE_SHIFT 0 69 #define ARM_LPAE_PTE_TYPE_MASK 0x3 70 71 #define ARM_LPAE_PTE_TYPE_BLOCK 1 72 #define ARM_LPAE_PTE_TYPE_TABLE 3 73 #define ARM_LPAE_PTE_TYPE_PAGE 3 74 75 #define ARM_LPAE_PTE_ADDR_MASK GENMASK_ULL(47,12) 76 77 #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) 78 #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) 79 #define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) 80 #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) 81 #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) 82 #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) 83 #define ARM_LPAE_PTE_SH_IS (((arm_lpae_iopte)3) << 8) 84 #define ARM_LPAE_PTE_NS (((arm_lpae_iopte)1) << 5) 85 #define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0) 86 87 #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) 88 /* Ignore the contiguous bit for block splitting */ 89 #define ARM_LPAE_PTE_ATTR_HI_MASK (ARM_LPAE_PTE_XN | ARM_LPAE_PTE_DBM) 90 #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ 91 ARM_LPAE_PTE_ATTR_HI_MASK) 92 /* Software bit for solving coherency races */ 93 #define ARM_LPAE_PTE_SW_SYNC (((arm_lpae_iopte)1) << 55) 94 95 /* Stage-1 PTE */ 96 #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) 97 #define ARM_LPAE_PTE_AP_RDONLY_BIT 7 98 #define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)1) << \ 99 ARM_LPAE_PTE_AP_RDONLY_BIT) 100 #define ARM_LPAE_PTE_AP_WR_CLEAN_MASK (ARM_LPAE_PTE_AP_RDONLY | \ 101 ARM_LPAE_PTE_DBM) 102 #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 103 #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) 104 105 /* Stage-2 PTE */ 106 #define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6) 107 #define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6) 108 #define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6) 109 #define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2) 110 #define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2) 111 #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) 112 113 /* Register bits */ 114 #define ARM_LPAE_VTCR_SL0_MASK 0x3 115 116 #define ARM_LPAE_TCR_T0SZ_SHIFT 0 117 118 #define ARM_LPAE_VTCR_PS_SHIFT 16 119 #define ARM_LPAE_VTCR_PS_MASK 0x7 120 121 #define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3) 122 #define ARM_LPAE_MAIR_ATTR_MASK 0xff 123 #define ARM_LPAE_MAIR_ATTR_DEVICE 0x04 124 #define ARM_LPAE_MAIR_ATTR_NC 0x44 125 #define ARM_LPAE_MAIR_ATTR_INC_OWBRWA 0xf4 126 #define ARM_LPAE_MAIR_ATTR_WBRWA 0xff 127 #define ARM_LPAE_MAIR_ATTR_IDX_NC 0 128 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1 129 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2 130 #define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE 3 131 132 #define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0) 133 #define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2) 134 #define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4) 135 136 #define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL 137 #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL 138 139 /* IOPTE accessors */ 140 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d)) 141 142 #define iopte_type(pte) \ 143 (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK) 144 145 #define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK) 146 147 #define iopte_writeable_dirty(pte) \ 148 (((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM) 149 150 #define iopte_set_writeable_clean(ptep) \ 151 set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)(ptep)) 152 153 struct arm_lpae_io_pgtable { 154 struct io_pgtable iop; 155 156 int pgd_bits; 157 int start_level; 158 int bits_per_level; 159 160 void *pgd; 161 }; 162 163 typedef u64 arm_lpae_iopte; 164 165 static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl, 166 enum io_pgtable_fmt fmt) 167 { 168 if (lvl == (ARM_LPAE_MAX_LEVELS - 1) && fmt != ARM_MALI_LPAE) 169 return iopte_type(pte) == ARM_LPAE_PTE_TYPE_PAGE; 170 171 return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK; 172 } 173 174 static inline bool iopte_table(arm_lpae_iopte pte, int lvl) 175 { 176 if (lvl == (ARM_LPAE_MAX_LEVELS - 1)) 177 return false; 178 return iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE; 179 } 180 181 static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr, 182 struct arm_lpae_io_pgtable *data) 183 { 184 arm_lpae_iopte pte = paddr; 185 186 /* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */ 187 return (pte | (pte >> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK; 188 } 189 190 static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte, 191 struct arm_lpae_io_pgtable *data) 192 { 193 u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK; 194 195 if (ARM_LPAE_GRANULE(data) < SZ_64K) 196 return paddr; 197 198 /* Rotate the packed high-order bits back to the top */ 199 return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4); 200 } 201 202 static bool selftest_running = false; 203 204 static dma_addr_t __arm_lpae_dma_addr(void *pages) 205 { 206 return (dma_addr_t)virt_to_phys(pages); 207 } 208 209 static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, 210 struct io_pgtable_cfg *cfg, 211 void *cookie) 212 { 213 struct device *dev = cfg->iommu_dev; 214 int order = get_order(size); 215 dma_addr_t dma; 216 void *pages; 217 218 VM_BUG_ON((gfp & __GFP_HIGHMEM)); 219 220 if (cfg->alloc) 221 pages = cfg->alloc(cookie, size, gfp); 222 else 223 pages = iommu_alloc_pages_node(dev_to_node(dev), gfp, order); 224 225 if (!pages) 226 return NULL; 227 228 if (!cfg->coherent_walk) { 229 dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); 230 if (dma_mapping_error(dev, dma)) 231 goto out_free; 232 /* 233 * We depend on the IOMMU being able to work with any physical 234 * address directly, so if the DMA layer suggests otherwise by 235 * translating or truncating them, that bodes very badly... 236 */ 237 if (dma != virt_to_phys(pages)) 238 goto out_unmap; 239 } 240 241 return pages; 242 243 out_unmap: 244 dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n"); 245 dma_unmap_single(dev, dma, size, DMA_TO_DEVICE); 246 247 out_free: 248 if (cfg->free) 249 cfg->free(cookie, pages, size); 250 else 251 iommu_free_pages(pages, order); 252 253 return NULL; 254 } 255 256 static void __arm_lpae_free_pages(void *pages, size_t size, 257 struct io_pgtable_cfg *cfg, 258 void *cookie) 259 { 260 if (!cfg->coherent_walk) 261 dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), 262 size, DMA_TO_DEVICE); 263 264 if (cfg->free) 265 cfg->free(cookie, pages, size); 266 else 267 iommu_free_pages(pages, get_order(size)); 268 } 269 270 static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries, 271 struct io_pgtable_cfg *cfg) 272 { 273 dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep), 274 sizeof(*ptep) * num_entries, DMA_TO_DEVICE); 275 } 276 277 static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries) 278 { 279 for (int i = 0; i < num_entries; i++) 280 ptep[i] = 0; 281 282 if (!cfg->coherent_walk && num_entries) 283 __arm_lpae_sync_pte(ptep, num_entries, cfg); 284 } 285 286 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, 287 struct iommu_iotlb_gather *gather, 288 unsigned long iova, size_t size, size_t pgcount, 289 int lvl, arm_lpae_iopte *ptep); 290 291 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, 292 phys_addr_t paddr, arm_lpae_iopte prot, 293 int lvl, int num_entries, arm_lpae_iopte *ptep) 294 { 295 arm_lpae_iopte pte = prot; 296 struct io_pgtable_cfg *cfg = &data->iop.cfg; 297 size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data); 298 int i; 299 300 if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1) 301 pte |= ARM_LPAE_PTE_TYPE_PAGE; 302 else 303 pte |= ARM_LPAE_PTE_TYPE_BLOCK; 304 305 for (i = 0; i < num_entries; i++) 306 ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data); 307 308 if (!cfg->coherent_walk) 309 __arm_lpae_sync_pte(ptep, num_entries, cfg); 310 } 311 312 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, 313 unsigned long iova, phys_addr_t paddr, 314 arm_lpae_iopte prot, int lvl, int num_entries, 315 arm_lpae_iopte *ptep) 316 { 317 int i; 318 319 for (i = 0; i < num_entries; i++) 320 if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) { 321 /* We require an unmap first */ 322 WARN_ON(!selftest_running); 323 return -EEXIST; 324 } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) { 325 /* 326 * We need to unmap and free the old table before 327 * overwriting it with a block entry. 328 */ 329 arm_lpae_iopte *tblp; 330 size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data); 331 332 tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data); 333 if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1, 334 lvl, tblp) != sz) { 335 WARN_ON(1); 336 return -EINVAL; 337 } 338 } 339 340 __arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep); 341 return 0; 342 } 343 344 static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, 345 arm_lpae_iopte *ptep, 346 arm_lpae_iopte curr, 347 struct arm_lpae_io_pgtable *data) 348 { 349 arm_lpae_iopte old, new; 350 struct io_pgtable_cfg *cfg = &data->iop.cfg; 351 352 new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE; 353 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) 354 new |= ARM_LPAE_PTE_NSTABLE; 355 356 /* 357 * Ensure the table itself is visible before its PTE can be. 358 * Whilst we could get away with cmpxchg64_release below, this 359 * doesn't have any ordering semantics when !CONFIG_SMP. 360 */ 361 dma_wmb(); 362 363 old = cmpxchg64_relaxed(ptep, curr, new); 364 365 if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC)) 366 return old; 367 368 /* Even if it's not ours, there's no point waiting; just kick it */ 369 __arm_lpae_sync_pte(ptep, 1, cfg); 370 if (old == curr) 371 WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC); 372 373 return old; 374 } 375 376 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, 377 phys_addr_t paddr, size_t size, size_t pgcount, 378 arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep, 379 gfp_t gfp, size_t *mapped) 380 { 381 arm_lpae_iopte *cptep, pte; 382 size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); 383 size_t tblsz = ARM_LPAE_GRANULE(data); 384 struct io_pgtable_cfg *cfg = &data->iop.cfg; 385 int ret = 0, num_entries, max_entries, map_idx_start; 386 387 /* Find our entry at the current level */ 388 map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data); 389 ptep += map_idx_start; 390 391 /* If we can install a leaf entry at this level, then do so */ 392 if (size == block_size) { 393 max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start; 394 num_entries = min_t(int, pgcount, max_entries); 395 ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep); 396 if (!ret) 397 *mapped += num_entries * size; 398 399 return ret; 400 } 401 402 /* We can't allocate tables at the final level */ 403 if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1)) 404 return -EINVAL; 405 406 /* Grab a pointer to the next level */ 407 pte = READ_ONCE(*ptep); 408 if (!pte) { 409 cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg, data->iop.cookie); 410 if (!cptep) 411 return -ENOMEM; 412 413 pte = arm_lpae_install_table(cptep, ptep, 0, data); 414 if (pte) 415 __arm_lpae_free_pages(cptep, tblsz, cfg, data->iop.cookie); 416 } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) { 417 __arm_lpae_sync_pte(ptep, 1, cfg); 418 } 419 420 if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) { 421 cptep = iopte_deref(pte, data); 422 } else if (pte) { 423 /* We require an unmap first */ 424 WARN_ON(!selftest_running); 425 return -EEXIST; 426 } 427 428 /* Rinse, repeat */ 429 return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1, 430 cptep, gfp, mapped); 431 } 432 433 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, 434 int prot) 435 { 436 arm_lpae_iopte pte; 437 438 if (data->iop.fmt == ARM_64_LPAE_S1 || 439 data->iop.fmt == ARM_32_LPAE_S1) { 440 pte = ARM_LPAE_PTE_nG; 441 if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) 442 pte |= ARM_LPAE_PTE_AP_RDONLY; 443 else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) 444 pte |= ARM_LPAE_PTE_DBM; 445 if (!(prot & IOMMU_PRIV)) 446 pte |= ARM_LPAE_PTE_AP_UNPRIV; 447 } else { 448 pte = ARM_LPAE_PTE_HAP_FAULT; 449 if (prot & IOMMU_READ) 450 pte |= ARM_LPAE_PTE_HAP_READ; 451 if (prot & IOMMU_WRITE) 452 pte |= ARM_LPAE_PTE_HAP_WRITE; 453 } 454 455 /* 456 * Note that this logic is structured to accommodate Mali LPAE 457 * having stage-1-like attributes but stage-2-like permissions. 458 */ 459 if (data->iop.fmt == ARM_64_LPAE_S2 || 460 data->iop.fmt == ARM_32_LPAE_S2) { 461 if (prot & IOMMU_MMIO) 462 pte |= ARM_LPAE_PTE_MEMATTR_DEV; 463 else if (prot & IOMMU_CACHE) 464 pte |= ARM_LPAE_PTE_MEMATTR_OIWB; 465 else 466 pte |= ARM_LPAE_PTE_MEMATTR_NC; 467 } else { 468 if (prot & IOMMU_MMIO) 469 pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV 470 << ARM_LPAE_PTE_ATTRINDX_SHIFT); 471 else if (prot & IOMMU_CACHE) 472 pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE 473 << ARM_LPAE_PTE_ATTRINDX_SHIFT); 474 } 475 476 /* 477 * Also Mali has its own notions of shareability wherein its Inner 478 * domain covers the cores within the GPU, and its Outer domain is 479 * "outside the GPU" (i.e. either the Inner or System domain in CPU 480 * terms, depending on coherency). 481 */ 482 if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE) 483 pte |= ARM_LPAE_PTE_SH_IS; 484 else 485 pte |= ARM_LPAE_PTE_SH_OS; 486 487 if (prot & IOMMU_NOEXEC) 488 pte |= ARM_LPAE_PTE_XN; 489 490 if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) 491 pte |= ARM_LPAE_PTE_NS; 492 493 if (data->iop.fmt != ARM_MALI_LPAE) 494 pte |= ARM_LPAE_PTE_AF; 495 496 return pte; 497 } 498 499 static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova, 500 phys_addr_t paddr, size_t pgsize, size_t pgcount, 501 int iommu_prot, gfp_t gfp, size_t *mapped) 502 { 503 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 504 struct io_pgtable_cfg *cfg = &data->iop.cfg; 505 arm_lpae_iopte *ptep = data->pgd; 506 int ret, lvl = data->start_level; 507 arm_lpae_iopte prot; 508 long iaext = (s64)iova >> cfg->ias; 509 510 if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize)) 511 return -EINVAL; 512 513 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) 514 iaext = ~iaext; 515 if (WARN_ON(iaext || paddr >> cfg->oas)) 516 return -ERANGE; 517 518 if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) 519 return -EINVAL; 520 521 prot = arm_lpae_prot_to_pte(data, iommu_prot); 522 ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl, 523 ptep, gfp, mapped); 524 /* 525 * Synchronise all PTE updates for the new mapping before there's 526 * a chance for anything to kick off a table walk for the new iova. 527 */ 528 wmb(); 529 530 return ret; 531 } 532 533 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, 534 arm_lpae_iopte *ptep) 535 { 536 arm_lpae_iopte *start, *end; 537 unsigned long table_size; 538 539 if (lvl == data->start_level) 540 table_size = ARM_LPAE_PGD_SIZE(data); 541 else 542 table_size = ARM_LPAE_GRANULE(data); 543 544 start = ptep; 545 546 /* Only leaf entries at the last level */ 547 if (lvl == ARM_LPAE_MAX_LEVELS - 1) 548 end = ptep; 549 else 550 end = (void *)ptep + table_size; 551 552 while (ptep != end) { 553 arm_lpae_iopte pte = *ptep++; 554 555 if (!pte || iopte_leaf(pte, lvl, data->iop.fmt)) 556 continue; 557 558 __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); 559 } 560 561 __arm_lpae_free_pages(start, table_size, &data->iop.cfg, data->iop.cookie); 562 } 563 564 static void arm_lpae_free_pgtable(struct io_pgtable *iop) 565 { 566 struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop); 567 568 __arm_lpae_free_pgtable(data, data->start_level, data->pgd); 569 kfree(data); 570 } 571 572 static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, 573 struct iommu_iotlb_gather *gather, 574 unsigned long iova, size_t size, 575 arm_lpae_iopte blk_pte, int lvl, 576 arm_lpae_iopte *ptep, size_t pgcount) 577 { 578 struct io_pgtable_cfg *cfg = &data->iop.cfg; 579 arm_lpae_iopte pte, *tablep; 580 phys_addr_t blk_paddr; 581 size_t tablesz = ARM_LPAE_GRANULE(data); 582 size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data); 583 int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data); 584 int i, unmap_idx_start = -1, num_entries = 0, max_entries; 585 586 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) 587 return 0; 588 589 tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg, data->iop.cookie); 590 if (!tablep) 591 return 0; /* Bytes unmapped */ 592 593 if (size == split_sz) { 594 unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data); 595 max_entries = ptes_per_table - unmap_idx_start; 596 num_entries = min_t(int, pgcount, max_entries); 597 } 598 599 blk_paddr = iopte_to_paddr(blk_pte, data); 600 pte = iopte_prot(blk_pte); 601 602 for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) { 603 /* Unmap! */ 604 if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries)) 605 continue; 606 607 __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]); 608 } 609 610 pte = arm_lpae_install_table(tablep, ptep, blk_pte, data); 611 if (pte != blk_pte) { 612 __arm_lpae_free_pages(tablep, tablesz, cfg, data->iop.cookie); 613 /* 614 * We may race against someone unmapping another part of this 615 * block, but anything else is invalid. We can't misinterpret 616 * a page entry here since we're never at the last level. 617 */ 618 if (iopte_type(pte) != ARM_LPAE_PTE_TYPE_TABLE) 619 return 0; 620 621 tablep = iopte_deref(pte, data); 622 } else if (unmap_idx_start >= 0) { 623 for (i = 0; i < num_entries; i++) 624 io_pgtable_tlb_add_page(&data->iop, gather, iova + i * size, size); 625 626 return num_entries * size; 627 } 628 629 return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep); 630 } 631 632 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, 633 struct iommu_iotlb_gather *gather, 634 unsigned long iova, size_t size, size_t pgcount, 635 int lvl, arm_lpae_iopte *ptep) 636 { 637 arm_lpae_iopte pte; 638 struct io_pgtable *iop = &data->iop; 639 int i = 0, num_entries, max_entries, unmap_idx_start; 640 641 /* Something went horribly wrong and we ran out of page table */ 642 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) 643 return 0; 644 645 unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data); 646 ptep += unmap_idx_start; 647 pte = READ_ONCE(*ptep); 648 if (WARN_ON(!pte)) 649 return 0; 650 651 /* If the size matches this level, we're in the right place */ 652 if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { 653 max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start; 654 num_entries = min_t(int, pgcount, max_entries); 655 656 /* Find and handle non-leaf entries */ 657 for (i = 0; i < num_entries; i++) { 658 pte = READ_ONCE(ptep[i]); 659 if (WARN_ON(!pte)) 660 break; 661 662 if (!iopte_leaf(pte, lvl, iop->fmt)) { 663 __arm_lpae_clear_pte(&ptep[i], &iop->cfg, 1); 664 665 /* Also flush any partial walks */ 666 io_pgtable_tlb_flush_walk(iop, iova + i * size, size, 667 ARM_LPAE_GRANULE(data)); 668 __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); 669 } 670 } 671 672 /* Clear the remaining entries */ 673 __arm_lpae_clear_pte(ptep, &iop->cfg, i); 674 675 if (gather && !iommu_iotlb_gather_queued(gather)) 676 for (int j = 0; j < i; j++) 677 io_pgtable_tlb_add_page(iop, gather, iova + j * size, size); 678 679 return i * size; 680 } else if (iopte_leaf(pte, lvl, iop->fmt)) { 681 /* 682 * Insert a table at the next level to map the old region, 683 * minus the part we want to unmap 684 */ 685 return arm_lpae_split_blk_unmap(data, gather, iova, size, pte, 686 lvl + 1, ptep, pgcount); 687 } 688 689 /* Keep on walkin' */ 690 ptep = iopte_deref(pte, data); 691 return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl + 1, ptep); 692 } 693 694 static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova, 695 size_t pgsize, size_t pgcount, 696 struct iommu_iotlb_gather *gather) 697 { 698 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 699 struct io_pgtable_cfg *cfg = &data->iop.cfg; 700 arm_lpae_iopte *ptep = data->pgd; 701 long iaext = (s64)iova >> cfg->ias; 702 703 if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount)) 704 return 0; 705 706 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) 707 iaext = ~iaext; 708 if (WARN_ON(iaext)) 709 return 0; 710 711 return __arm_lpae_unmap(data, gather, iova, pgsize, pgcount, 712 data->start_level, ptep); 713 } 714 715 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, 716 unsigned long iova) 717 { 718 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 719 arm_lpae_iopte pte, *ptep = data->pgd; 720 int lvl = data->start_level; 721 722 do { 723 /* Valid IOPTE pointer? */ 724 if (!ptep) 725 return 0; 726 727 /* Grab the IOPTE we're interested in */ 728 ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); 729 pte = READ_ONCE(*ptep); 730 731 /* Valid entry? */ 732 if (!pte) 733 return 0; 734 735 /* Leaf entry? */ 736 if (iopte_leaf(pte, lvl, data->iop.fmt)) 737 goto found_translation; 738 739 /* Take it to the next level */ 740 ptep = iopte_deref(pte, data); 741 } while (++lvl < ARM_LPAE_MAX_LEVELS); 742 743 /* Ran out of page tables to walk */ 744 return 0; 745 746 found_translation: 747 iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1); 748 return iopte_to_paddr(pte, data) | iova; 749 } 750 751 struct io_pgtable_walk_data { 752 struct iommu_dirty_bitmap *dirty; 753 unsigned long flags; 754 u64 addr; 755 const u64 end; 756 }; 757 758 static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, 759 struct io_pgtable_walk_data *walk_data, 760 arm_lpae_iopte *ptep, 761 int lvl); 762 763 static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data, 764 struct io_pgtable_walk_data *walk_data, 765 arm_lpae_iopte *ptep, int lvl) 766 { 767 struct io_pgtable *iop = &data->iop; 768 arm_lpae_iopte pte = READ_ONCE(*ptep); 769 770 if (iopte_leaf(pte, lvl, iop->fmt)) { 771 size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data); 772 773 if (iopte_writeable_dirty(pte)) { 774 iommu_dirty_bitmap_record(walk_data->dirty, 775 walk_data->addr, size); 776 if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR)) 777 iopte_set_writeable_clean(ptep); 778 } 779 walk_data->addr += size; 780 return 0; 781 } 782 783 if (WARN_ON(!iopte_table(pte, lvl))) 784 return -EINVAL; 785 786 ptep = iopte_deref(pte, data); 787 return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1); 788 } 789 790 static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, 791 struct io_pgtable_walk_data *walk_data, 792 arm_lpae_iopte *ptep, 793 int lvl) 794 { 795 u32 idx; 796 int max_entries, ret; 797 798 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) 799 return -EINVAL; 800 801 if (lvl == data->start_level) 802 max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); 803 else 804 max_entries = ARM_LPAE_PTES_PER_TABLE(data); 805 806 for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data); 807 (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) { 808 ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl); 809 if (ret) 810 return ret; 811 } 812 813 return 0; 814 } 815 816 static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, 817 unsigned long iova, size_t size, 818 unsigned long flags, 819 struct iommu_dirty_bitmap *dirty) 820 { 821 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 822 struct io_pgtable_cfg *cfg = &data->iop.cfg; 823 struct io_pgtable_walk_data walk_data = { 824 .dirty = dirty, 825 .flags = flags, 826 .addr = iova, 827 .end = iova + size, 828 }; 829 arm_lpae_iopte *ptep = data->pgd; 830 int lvl = data->start_level; 831 832 if (WARN_ON(!size)) 833 return -EINVAL; 834 if (WARN_ON((iova + size - 1) & ~(BIT(cfg->ias) - 1))) 835 return -EINVAL; 836 if (data->iop.fmt != ARM_64_LPAE_S1) 837 return -EINVAL; 838 839 return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl); 840 } 841 842 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) 843 { 844 unsigned long granule, page_sizes; 845 unsigned int max_addr_bits = 48; 846 847 /* 848 * We need to restrict the supported page sizes to match the 849 * translation regime for a particular granule. Aim to match 850 * the CPU page size if possible, otherwise prefer smaller sizes. 851 * While we're at it, restrict the block sizes to match the 852 * chosen granule. 853 */ 854 if (cfg->pgsize_bitmap & PAGE_SIZE) 855 granule = PAGE_SIZE; 856 else if (cfg->pgsize_bitmap & ~PAGE_MASK) 857 granule = 1UL << __fls(cfg->pgsize_bitmap & ~PAGE_MASK); 858 else if (cfg->pgsize_bitmap & PAGE_MASK) 859 granule = 1UL << __ffs(cfg->pgsize_bitmap & PAGE_MASK); 860 else 861 granule = 0; 862 863 switch (granule) { 864 case SZ_4K: 865 page_sizes = (SZ_4K | SZ_2M | SZ_1G); 866 break; 867 case SZ_16K: 868 page_sizes = (SZ_16K | SZ_32M); 869 break; 870 case SZ_64K: 871 max_addr_bits = 52; 872 page_sizes = (SZ_64K | SZ_512M); 873 if (cfg->oas > 48) 874 page_sizes |= 1ULL << 42; /* 4TB */ 875 break; 876 default: 877 page_sizes = 0; 878 } 879 880 cfg->pgsize_bitmap &= page_sizes; 881 cfg->ias = min(cfg->ias, max_addr_bits); 882 cfg->oas = min(cfg->oas, max_addr_bits); 883 } 884 885 static struct arm_lpae_io_pgtable * 886 arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) 887 { 888 struct arm_lpae_io_pgtable *data; 889 int levels, va_bits, pg_shift; 890 891 arm_lpae_restrict_pgsizes(cfg); 892 893 if (!(cfg->pgsize_bitmap & (SZ_4K | SZ_16K | SZ_64K))) 894 return NULL; 895 896 if (cfg->ias > ARM_LPAE_MAX_ADDR_BITS) 897 return NULL; 898 899 if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS) 900 return NULL; 901 902 data = kmalloc(sizeof(*data), GFP_KERNEL); 903 if (!data) 904 return NULL; 905 906 pg_shift = __ffs(cfg->pgsize_bitmap); 907 data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte)); 908 909 va_bits = cfg->ias - pg_shift; 910 levels = DIV_ROUND_UP(va_bits, data->bits_per_level); 911 data->start_level = ARM_LPAE_MAX_LEVELS - levels; 912 913 /* Calculate the actual size of our pgd (without concatenation) */ 914 data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1)); 915 916 data->iop.ops = (struct io_pgtable_ops) { 917 .map_pages = arm_lpae_map_pages, 918 .unmap_pages = arm_lpae_unmap_pages, 919 .iova_to_phys = arm_lpae_iova_to_phys, 920 .read_and_clear_dirty = arm_lpae_read_and_clear_dirty, 921 }; 922 923 return data; 924 } 925 926 static struct io_pgtable * 927 arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) 928 { 929 u64 reg; 930 struct arm_lpae_io_pgtable *data; 931 typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr; 932 bool tg1; 933 934 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | 935 IO_PGTABLE_QUIRK_ARM_TTBR1 | 936 IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | 937 IO_PGTABLE_QUIRK_ARM_HD)) 938 return NULL; 939 940 data = arm_lpae_alloc_pgtable(cfg); 941 if (!data) 942 return NULL; 943 944 /* TCR */ 945 if (cfg->coherent_walk) { 946 tcr->sh = ARM_LPAE_TCR_SH_IS; 947 tcr->irgn = ARM_LPAE_TCR_RGN_WBWA; 948 tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; 949 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA) 950 goto out_free_data; 951 } else { 952 tcr->sh = ARM_LPAE_TCR_SH_OS; 953 tcr->irgn = ARM_LPAE_TCR_RGN_NC; 954 if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) 955 tcr->orgn = ARM_LPAE_TCR_RGN_NC; 956 else 957 tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; 958 } 959 960 tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1; 961 switch (ARM_LPAE_GRANULE(data)) { 962 case SZ_4K: 963 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K; 964 break; 965 case SZ_16K: 966 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K; 967 break; 968 case SZ_64K: 969 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K; 970 break; 971 } 972 973 switch (cfg->oas) { 974 case 32: 975 tcr->ips = ARM_LPAE_TCR_PS_32_BIT; 976 break; 977 case 36: 978 tcr->ips = ARM_LPAE_TCR_PS_36_BIT; 979 break; 980 case 40: 981 tcr->ips = ARM_LPAE_TCR_PS_40_BIT; 982 break; 983 case 42: 984 tcr->ips = ARM_LPAE_TCR_PS_42_BIT; 985 break; 986 case 44: 987 tcr->ips = ARM_LPAE_TCR_PS_44_BIT; 988 break; 989 case 48: 990 tcr->ips = ARM_LPAE_TCR_PS_48_BIT; 991 break; 992 case 52: 993 tcr->ips = ARM_LPAE_TCR_PS_52_BIT; 994 break; 995 default: 996 goto out_free_data; 997 } 998 999 tcr->tsz = 64ULL - cfg->ias; 1000 1001 /* MAIRs */ 1002 reg = (ARM_LPAE_MAIR_ATTR_NC 1003 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) | 1004 (ARM_LPAE_MAIR_ATTR_WBRWA 1005 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | 1006 (ARM_LPAE_MAIR_ATTR_DEVICE 1007 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) | 1008 (ARM_LPAE_MAIR_ATTR_INC_OWBRWA 1009 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE)); 1010 1011 cfg->arm_lpae_s1_cfg.mair = reg; 1012 1013 /* Looking good; allocate a pgd */ 1014 data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), 1015 GFP_KERNEL, cfg, cookie); 1016 if (!data->pgd) 1017 goto out_free_data; 1018 1019 /* Ensure the empty pgd is visible before any actual TTBR write */ 1020 wmb(); 1021 1022 /* TTBR */ 1023 cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd); 1024 return &data->iop; 1025 1026 out_free_data: 1027 kfree(data); 1028 return NULL; 1029 } 1030 1031 static struct io_pgtable * 1032 arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) 1033 { 1034 u64 sl; 1035 struct arm_lpae_io_pgtable *data; 1036 typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr; 1037 1038 /* The NS quirk doesn't apply at stage 2 */ 1039 if (cfg->quirks) 1040 return NULL; 1041 1042 data = arm_lpae_alloc_pgtable(cfg); 1043 if (!data) 1044 return NULL; 1045 1046 /* 1047 * Concatenate PGDs at level 1 if possible in order to reduce 1048 * the depth of the stage-2 walk. 1049 */ 1050 if (data->start_level == 0) { 1051 unsigned long pgd_pages; 1052 1053 pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); 1054 if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) { 1055 data->pgd_bits += data->bits_per_level; 1056 data->start_level++; 1057 } 1058 } 1059 1060 /* VTCR */ 1061 if (cfg->coherent_walk) { 1062 vtcr->sh = ARM_LPAE_TCR_SH_IS; 1063 vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA; 1064 vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA; 1065 } else { 1066 vtcr->sh = ARM_LPAE_TCR_SH_OS; 1067 vtcr->irgn = ARM_LPAE_TCR_RGN_NC; 1068 vtcr->orgn = ARM_LPAE_TCR_RGN_NC; 1069 } 1070 1071 sl = data->start_level; 1072 1073 switch (ARM_LPAE_GRANULE(data)) { 1074 case SZ_4K: 1075 vtcr->tg = ARM_LPAE_TCR_TG0_4K; 1076 sl++; /* SL0 format is different for 4K granule size */ 1077 break; 1078 case SZ_16K: 1079 vtcr->tg = ARM_LPAE_TCR_TG0_16K; 1080 break; 1081 case SZ_64K: 1082 vtcr->tg = ARM_LPAE_TCR_TG0_64K; 1083 break; 1084 } 1085 1086 switch (cfg->oas) { 1087 case 32: 1088 vtcr->ps = ARM_LPAE_TCR_PS_32_BIT; 1089 break; 1090 case 36: 1091 vtcr->ps = ARM_LPAE_TCR_PS_36_BIT; 1092 break; 1093 case 40: 1094 vtcr->ps = ARM_LPAE_TCR_PS_40_BIT; 1095 break; 1096 case 42: 1097 vtcr->ps = ARM_LPAE_TCR_PS_42_BIT; 1098 break; 1099 case 44: 1100 vtcr->ps = ARM_LPAE_TCR_PS_44_BIT; 1101 break; 1102 case 48: 1103 vtcr->ps = ARM_LPAE_TCR_PS_48_BIT; 1104 break; 1105 case 52: 1106 vtcr->ps = ARM_LPAE_TCR_PS_52_BIT; 1107 break; 1108 default: 1109 goto out_free_data; 1110 } 1111 1112 vtcr->tsz = 64ULL - cfg->ias; 1113 vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK; 1114 1115 /* Allocate pgd pages */ 1116 data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), 1117 GFP_KERNEL, cfg, cookie); 1118 if (!data->pgd) 1119 goto out_free_data; 1120 1121 /* Ensure the empty pgd is visible before any actual TTBR write */ 1122 wmb(); 1123 1124 /* VTTBR */ 1125 cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd); 1126 return &data->iop; 1127 1128 out_free_data: 1129 kfree(data); 1130 return NULL; 1131 } 1132 1133 static struct io_pgtable * 1134 arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) 1135 { 1136 if (cfg->ias > 32 || cfg->oas > 40) 1137 return NULL; 1138 1139 cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); 1140 return arm_64_lpae_alloc_pgtable_s1(cfg, cookie); 1141 } 1142 1143 static struct io_pgtable * 1144 arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) 1145 { 1146 if (cfg->ias > 40 || cfg->oas > 40) 1147 return NULL; 1148 1149 cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); 1150 return arm_64_lpae_alloc_pgtable_s2(cfg, cookie); 1151 } 1152 1153 static struct io_pgtable * 1154 arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) 1155 { 1156 struct arm_lpae_io_pgtable *data; 1157 1158 /* No quirks for Mali (hopefully) */ 1159 if (cfg->quirks) 1160 return NULL; 1161 1162 if (cfg->ias > 48 || cfg->oas > 40) 1163 return NULL; 1164 1165 cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); 1166 1167 data = arm_lpae_alloc_pgtable(cfg); 1168 if (!data) 1169 return NULL; 1170 1171 /* Mali seems to need a full 4-level table regardless of IAS */ 1172 if (data->start_level > 0) { 1173 data->start_level = 0; 1174 data->pgd_bits = 0; 1175 } 1176 /* 1177 * MEMATTR: Mali has no actual notion of a non-cacheable type, so the 1178 * best we can do is mimic the out-of-tree driver and hope that the 1179 * "implementation-defined caching policy" is good enough. Similarly, 1180 * we'll use it for the sake of a valid attribute for our 'device' 1181 * index, although callers should never request that in practice. 1182 */ 1183 cfg->arm_mali_lpae_cfg.memattr = 1184 (ARM_MALI_LPAE_MEMATTR_IMP_DEF 1185 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) | 1186 (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 1187 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | 1188 (ARM_MALI_LPAE_MEMATTR_IMP_DEF 1189 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); 1190 1191 data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL, 1192 cfg, cookie); 1193 if (!data->pgd) 1194 goto out_free_data; 1195 1196 /* Ensure the empty pgd is visible before TRANSTAB can be written */ 1197 wmb(); 1198 1199 cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) | 1200 ARM_MALI_LPAE_TTBR_READ_INNER | 1201 ARM_MALI_LPAE_TTBR_ADRMODE_TABLE; 1202 if (cfg->coherent_walk) 1203 cfg->arm_mali_lpae_cfg.transtab |= ARM_MALI_LPAE_TTBR_SHARE_OUTER; 1204 1205 return &data->iop; 1206 1207 out_free_data: 1208 kfree(data); 1209 return NULL; 1210 } 1211 1212 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { 1213 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1214 .alloc = arm_64_lpae_alloc_pgtable_s1, 1215 .free = arm_lpae_free_pgtable, 1216 }; 1217 1218 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = { 1219 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1220 .alloc = arm_64_lpae_alloc_pgtable_s2, 1221 .free = arm_lpae_free_pgtable, 1222 }; 1223 1224 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = { 1225 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1226 .alloc = arm_32_lpae_alloc_pgtable_s1, 1227 .free = arm_lpae_free_pgtable, 1228 }; 1229 1230 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = { 1231 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1232 .alloc = arm_32_lpae_alloc_pgtable_s2, 1233 .free = arm_lpae_free_pgtable, 1234 }; 1235 1236 struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = { 1237 .caps = IO_PGTABLE_CAP_CUSTOM_ALLOCATOR, 1238 .alloc = arm_mali_lpae_alloc_pgtable, 1239 .free = arm_lpae_free_pgtable, 1240 }; 1241 1242 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST 1243 1244 static struct io_pgtable_cfg *cfg_cookie __initdata; 1245 1246 static void __init dummy_tlb_flush_all(void *cookie) 1247 { 1248 WARN_ON(cookie != cfg_cookie); 1249 } 1250 1251 static void __init dummy_tlb_flush(unsigned long iova, size_t size, 1252 size_t granule, void *cookie) 1253 { 1254 WARN_ON(cookie != cfg_cookie); 1255 WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); 1256 } 1257 1258 static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather, 1259 unsigned long iova, size_t granule, 1260 void *cookie) 1261 { 1262 dummy_tlb_flush(iova, granule, granule, cookie); 1263 } 1264 1265 static const struct iommu_flush_ops dummy_tlb_ops __initconst = { 1266 .tlb_flush_all = dummy_tlb_flush_all, 1267 .tlb_flush_walk = dummy_tlb_flush, 1268 .tlb_add_page = dummy_tlb_add_page, 1269 }; 1270 1271 static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) 1272 { 1273 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 1274 struct io_pgtable_cfg *cfg = &data->iop.cfg; 1275 1276 pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n", 1277 cfg->pgsize_bitmap, cfg->ias); 1278 pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n", 1279 ARM_LPAE_MAX_LEVELS - data->start_level, ARM_LPAE_PGD_SIZE(data), 1280 ilog2(ARM_LPAE_GRANULE(data)), data->bits_per_level, data->pgd); 1281 } 1282 1283 #define __FAIL(ops, i) ({ \ 1284 WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \ 1285 arm_lpae_dump_ops(ops); \ 1286 selftest_running = false; \ 1287 -EFAULT; \ 1288 }) 1289 1290 static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) 1291 { 1292 static const enum io_pgtable_fmt fmts[] __initconst = { 1293 ARM_64_LPAE_S1, 1294 ARM_64_LPAE_S2, 1295 }; 1296 1297 int i, j; 1298 unsigned long iova; 1299 size_t size, mapped; 1300 struct io_pgtable_ops *ops; 1301 1302 selftest_running = true; 1303 1304 for (i = 0; i < ARRAY_SIZE(fmts); ++i) { 1305 cfg_cookie = cfg; 1306 ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg); 1307 if (!ops) { 1308 pr_err("selftest: failed to allocate io pgtable ops\n"); 1309 return -ENOMEM; 1310 } 1311 1312 /* 1313 * Initial sanity checks. 1314 * Empty page tables shouldn't provide any translations. 1315 */ 1316 if (ops->iova_to_phys(ops, 42)) 1317 return __FAIL(ops, i); 1318 1319 if (ops->iova_to_phys(ops, SZ_1G + 42)) 1320 return __FAIL(ops, i); 1321 1322 if (ops->iova_to_phys(ops, SZ_2G + 42)) 1323 return __FAIL(ops, i); 1324 1325 /* 1326 * Distinct mappings of different granule sizes. 1327 */ 1328 iova = 0; 1329 for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { 1330 size = 1UL << j; 1331 1332 if (ops->map_pages(ops, iova, iova, size, 1, 1333 IOMMU_READ | IOMMU_WRITE | 1334 IOMMU_NOEXEC | IOMMU_CACHE, 1335 GFP_KERNEL, &mapped)) 1336 return __FAIL(ops, i); 1337 1338 /* Overlapping mappings */ 1339 if (!ops->map_pages(ops, iova, iova + size, size, 1, 1340 IOMMU_READ | IOMMU_NOEXEC, 1341 GFP_KERNEL, &mapped)) 1342 return __FAIL(ops, i); 1343 1344 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) 1345 return __FAIL(ops, i); 1346 1347 iova += SZ_1G; 1348 } 1349 1350 /* Partial unmap */ 1351 size = 1UL << __ffs(cfg->pgsize_bitmap); 1352 if (ops->unmap_pages(ops, SZ_1G + size, size, 1, NULL) != size) 1353 return __FAIL(ops, i); 1354 1355 /* Remap of partial unmap */ 1356 if (ops->map_pages(ops, SZ_1G + size, size, size, 1, 1357 IOMMU_READ, GFP_KERNEL, &mapped)) 1358 return __FAIL(ops, i); 1359 1360 if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42)) 1361 return __FAIL(ops, i); 1362 1363 /* Full unmap */ 1364 iova = 0; 1365 for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { 1366 size = 1UL << j; 1367 1368 if (ops->unmap_pages(ops, iova, size, 1, NULL) != size) 1369 return __FAIL(ops, i); 1370 1371 if (ops->iova_to_phys(ops, iova + 42)) 1372 return __FAIL(ops, i); 1373 1374 /* Remap full block */ 1375 if (ops->map_pages(ops, iova, iova, size, 1, 1376 IOMMU_WRITE, GFP_KERNEL, &mapped)) 1377 return __FAIL(ops, i); 1378 1379 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) 1380 return __FAIL(ops, i); 1381 1382 iova += SZ_1G; 1383 } 1384 1385 free_io_pgtable_ops(ops); 1386 } 1387 1388 selftest_running = false; 1389 return 0; 1390 } 1391 1392 static int __init arm_lpae_do_selftests(void) 1393 { 1394 static const unsigned long pgsize[] __initconst = { 1395 SZ_4K | SZ_2M | SZ_1G, 1396 SZ_16K | SZ_32M, 1397 SZ_64K | SZ_512M, 1398 }; 1399 1400 static const unsigned int ias[] __initconst = { 1401 32, 36, 40, 42, 44, 48, 1402 }; 1403 1404 int i, j, pass = 0, fail = 0; 1405 struct device dev; 1406 struct io_pgtable_cfg cfg = { 1407 .tlb = &dummy_tlb_ops, 1408 .oas = 48, 1409 .coherent_walk = true, 1410 .iommu_dev = &dev, 1411 }; 1412 1413 /* __arm_lpae_alloc_pages() merely needs dev_to_node() to work */ 1414 set_dev_node(&dev, NUMA_NO_NODE); 1415 1416 for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { 1417 for (j = 0; j < ARRAY_SIZE(ias); ++j) { 1418 cfg.pgsize_bitmap = pgsize[i]; 1419 cfg.ias = ias[j]; 1420 pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n", 1421 pgsize[i], ias[j]); 1422 if (arm_lpae_run_tests(&cfg)) 1423 fail++; 1424 else 1425 pass++; 1426 } 1427 } 1428 1429 pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail); 1430 return fail ? -EFAULT : 0; 1431 } 1432 subsys_initcall(arm_lpae_do_selftests); 1433 #endif 1434