1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * IOMMU API for ARM architected SMMUv3 implementations. 4 * 5 * Copyright (C) 2015 ARM Limited 6 * 7 * Author: Will Deacon <will.deacon@arm.com> 8 * 9 * This driver is powered by bad coffee and bombay mix. 10 */ 11 12 #include <linux/acpi.h> 13 #include <linux/acpi_iort.h> 14 #include <linux/bitops.h> 15 #include <linux/crash_dump.h> 16 #include <linux/delay.h> 17 #include <linux/err.h> 18 #include <linux/interrupt.h> 19 #include <linux/io-pgtable.h> 20 #include <linux/iopoll.h> 21 #include <linux/module.h> 22 #include <linux/msi.h> 23 #include <linux/of.h> 24 #include <linux/of_address.h> 25 #include <linux/of_platform.h> 26 #include <linux/pci.h> 27 #include <linux/pci-ats.h> 28 #include <linux/platform_device.h> 29 30 #include "arm-smmu-v3.h" 31 #include "../../dma-iommu.h" 32 #include "../../iommu-sva.h" 33 34 static bool disable_bypass = true; 35 module_param(disable_bypass, bool, 0444); 36 MODULE_PARM_DESC(disable_bypass, 37 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU."); 38 39 static bool disable_msipolling; 40 module_param(disable_msipolling, bool, 0444); 41 MODULE_PARM_DESC(disable_msipolling, 42 "Disable MSI-based polling for CMD_SYNC completion."); 43 44 enum arm_smmu_msi_index { 45 EVTQ_MSI_INDEX, 46 GERROR_MSI_INDEX, 47 PRIQ_MSI_INDEX, 48 ARM_SMMU_MAX_MSIS, 49 }; 50 51 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { 52 [EVTQ_MSI_INDEX] = { 53 ARM_SMMU_EVTQ_IRQ_CFG0, 54 ARM_SMMU_EVTQ_IRQ_CFG1, 55 ARM_SMMU_EVTQ_IRQ_CFG2, 56 }, 57 [GERROR_MSI_INDEX] = { 58 ARM_SMMU_GERROR_IRQ_CFG0, 59 ARM_SMMU_GERROR_IRQ_CFG1, 60 ARM_SMMU_GERROR_IRQ_CFG2, 61 }, 62 [PRIQ_MSI_INDEX] = { 63 ARM_SMMU_PRIQ_IRQ_CFG0, 64 ARM_SMMU_PRIQ_IRQ_CFG1, 65 ARM_SMMU_PRIQ_IRQ_CFG2, 66 }, 67 }; 68 69 struct arm_smmu_option_prop { 70 u32 opt; 71 const char *prop; 72 }; 73 74 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa); 75 DEFINE_MUTEX(arm_smmu_asid_lock); 76 77 /* 78 * Special value used by SVA when a process dies, to quiesce a CD without 79 * disabling it. 80 */ 81 struct arm_smmu_ctx_desc quiet_cd = { 0 }; 82 83 static struct arm_smmu_option_prop arm_smmu_options[] = { 84 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, 85 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, 86 { 0, NULL}, 87 }; 88 89 static void parse_driver_options(struct arm_smmu_device *smmu) 90 { 91 int i = 0; 92 93 do { 94 if (of_property_read_bool(smmu->dev->of_node, 95 arm_smmu_options[i].prop)) { 96 smmu->options |= arm_smmu_options[i].opt; 97 dev_notice(smmu->dev, "option %s\n", 98 arm_smmu_options[i].prop); 99 } 100 } while (arm_smmu_options[++i].opt); 101 } 102 103 /* Low-level queue manipulation functions */ 104 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n) 105 { 106 u32 space, prod, cons; 107 108 prod = Q_IDX(q, q->prod); 109 cons = Q_IDX(q, q->cons); 110 111 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons)) 112 space = (1 << q->max_n_shift) - (prod - cons); 113 else 114 space = cons - prod; 115 116 return space >= n; 117 } 118 119 static bool queue_full(struct arm_smmu_ll_queue *q) 120 { 121 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) && 122 Q_WRP(q, q->prod) != Q_WRP(q, q->cons); 123 } 124 125 static bool queue_empty(struct arm_smmu_ll_queue *q) 126 { 127 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) && 128 Q_WRP(q, q->prod) == Q_WRP(q, q->cons); 129 } 130 131 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod) 132 { 133 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) && 134 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) || 135 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) && 136 (Q_IDX(q, q->cons) <= Q_IDX(q, prod))); 137 } 138 139 static void queue_sync_cons_out(struct arm_smmu_queue *q) 140 { 141 /* 142 * Ensure that all CPU accesses (reads and writes) to the queue 143 * are complete before we update the cons pointer. 144 */ 145 __iomb(); 146 writel_relaxed(q->llq.cons, q->cons_reg); 147 } 148 149 static void queue_inc_cons(struct arm_smmu_ll_queue *q) 150 { 151 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1; 152 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); 153 } 154 155 static void queue_sync_cons_ovf(struct arm_smmu_queue *q) 156 { 157 struct arm_smmu_ll_queue *llq = &q->llq; 158 159 if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons))) 160 return; 161 162 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | 163 Q_IDX(llq, llq->cons); 164 queue_sync_cons_out(q); 165 } 166 167 static int queue_sync_prod_in(struct arm_smmu_queue *q) 168 { 169 u32 prod; 170 int ret = 0; 171 172 /* 173 * We can't use the _relaxed() variant here, as we must prevent 174 * speculative reads of the queue before we have determined that 175 * prod has indeed moved. 176 */ 177 prod = readl(q->prod_reg); 178 179 if (Q_OVF(prod) != Q_OVF(q->llq.prod)) 180 ret = -EOVERFLOW; 181 182 q->llq.prod = prod; 183 return ret; 184 } 185 186 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n) 187 { 188 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n; 189 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod); 190 } 191 192 static void queue_poll_init(struct arm_smmu_device *smmu, 193 struct arm_smmu_queue_poll *qp) 194 { 195 qp->delay = 1; 196 qp->spin_cnt = 0; 197 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); 198 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US); 199 } 200 201 static int queue_poll(struct arm_smmu_queue_poll *qp) 202 { 203 if (ktime_compare(ktime_get(), qp->timeout) > 0) 204 return -ETIMEDOUT; 205 206 if (qp->wfe) { 207 wfe(); 208 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) { 209 cpu_relax(); 210 } else { 211 udelay(qp->delay); 212 qp->delay *= 2; 213 qp->spin_cnt = 0; 214 } 215 216 return 0; 217 } 218 219 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords) 220 { 221 int i; 222 223 for (i = 0; i < n_dwords; ++i) 224 *dst++ = cpu_to_le64(*src++); 225 } 226 227 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords) 228 { 229 int i; 230 231 for (i = 0; i < n_dwords; ++i) 232 *dst++ = le64_to_cpu(*src++); 233 } 234 235 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent) 236 { 237 if (queue_empty(&q->llq)) 238 return -EAGAIN; 239 240 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords); 241 queue_inc_cons(&q->llq); 242 queue_sync_cons_out(q); 243 return 0; 244 } 245 246 /* High-level queue accessors */ 247 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) 248 { 249 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT); 250 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode); 251 252 switch (ent->opcode) { 253 case CMDQ_OP_TLBI_EL2_ALL: 254 case CMDQ_OP_TLBI_NSNH_ALL: 255 break; 256 case CMDQ_OP_PREFETCH_CFG: 257 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid); 258 break; 259 case CMDQ_OP_CFGI_CD: 260 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid); 261 fallthrough; 262 case CMDQ_OP_CFGI_STE: 263 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); 264 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf); 265 break; 266 case CMDQ_OP_CFGI_CD_ALL: 267 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); 268 break; 269 case CMDQ_OP_CFGI_ALL: 270 /* Cover the entire SID range */ 271 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); 272 break; 273 case CMDQ_OP_TLBI_NH_VA: 274 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 275 fallthrough; 276 case CMDQ_OP_TLBI_EL2_VA: 277 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); 278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); 279 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 280 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); 281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); 282 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); 283 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK; 284 break; 285 case CMDQ_OP_TLBI_S2_IPA: 286 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); 287 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); 288 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 289 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); 290 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); 291 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); 292 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK; 293 break; 294 case CMDQ_OP_TLBI_NH_ASID: 295 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 296 fallthrough; 297 case CMDQ_OP_TLBI_S12_VMALL: 298 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 299 break; 300 case CMDQ_OP_TLBI_EL2_ASID: 301 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 302 break; 303 case CMDQ_OP_ATC_INV: 304 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); 305 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global); 306 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid); 307 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid); 308 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size); 309 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK; 310 break; 311 case CMDQ_OP_PRI_RESP: 312 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); 313 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid); 314 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid); 315 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid); 316 switch (ent->pri.resp) { 317 case PRI_RESP_DENY: 318 case PRI_RESP_FAIL: 319 case PRI_RESP_SUCC: 320 break; 321 default: 322 return -EINVAL; 323 } 324 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp); 325 break; 326 case CMDQ_OP_RESUME: 327 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid); 328 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp); 329 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag); 330 break; 331 case CMDQ_OP_CMD_SYNC: 332 if (ent->sync.msiaddr) { 333 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ); 334 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK; 335 } else { 336 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); 337 } 338 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH); 339 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); 340 break; 341 default: 342 return -ENOENT; 343 } 344 345 return 0; 346 } 347 348 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) 349 { 350 return &smmu->cmdq; 351 } 352 353 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, 354 struct arm_smmu_queue *q, u32 prod) 355 { 356 struct arm_smmu_cmdq_ent ent = { 357 .opcode = CMDQ_OP_CMD_SYNC, 358 }; 359 360 /* 361 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI 362 * payload, so the write will zero the entire command on that platform. 363 */ 364 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) { 365 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) * 366 q->ent_dwords * 8; 367 } 368 369 arm_smmu_cmdq_build_cmd(cmd, &ent); 370 } 371 372 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, 373 struct arm_smmu_queue *q) 374 { 375 static const char * const cerror_str[] = { 376 [CMDQ_ERR_CERROR_NONE_IDX] = "No error", 377 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command", 378 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", 379 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout", 380 }; 381 382 int i; 383 u64 cmd[CMDQ_ENT_DWORDS]; 384 u32 cons = readl_relaxed(q->cons_reg); 385 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons); 386 struct arm_smmu_cmdq_ent cmd_sync = { 387 .opcode = CMDQ_OP_CMD_SYNC, 388 }; 389 390 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons, 391 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown"); 392 393 switch (idx) { 394 case CMDQ_ERR_CERROR_ABT_IDX: 395 dev_err(smmu->dev, "retrying command fetch\n"); 396 return; 397 case CMDQ_ERR_CERROR_NONE_IDX: 398 return; 399 case CMDQ_ERR_CERROR_ATC_INV_IDX: 400 /* 401 * ATC Invalidation Completion timeout. CONS is still pointing 402 * at the CMD_SYNC. Attempt to complete other pending commands 403 * by repeating the CMD_SYNC, though we might well end up back 404 * here since the ATC invalidation may still be pending. 405 */ 406 return; 407 case CMDQ_ERR_CERROR_ILL_IDX: 408 default: 409 break; 410 } 411 412 /* 413 * We may have concurrent producers, so we need to be careful 414 * not to touch any of the shadow cmdq state. 415 */ 416 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords); 417 dev_err(smmu->dev, "skipping command in error state:\n"); 418 for (i = 0; i < ARRAY_SIZE(cmd); ++i) 419 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]); 420 421 /* Convert the erroneous command into a CMD_SYNC */ 422 arm_smmu_cmdq_build_cmd(cmd, &cmd_sync); 423 424 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); 425 } 426 427 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) 428 { 429 __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q); 430 } 431 432 /* 433 * Command queue locking. 434 * This is a form of bastardised rwlock with the following major changes: 435 * 436 * - The only LOCK routines are exclusive_trylock() and shared_lock(). 437 * Neither have barrier semantics, and instead provide only a control 438 * dependency. 439 * 440 * - The UNLOCK routines are supplemented with shared_tryunlock(), which 441 * fails if the caller appears to be the last lock holder (yes, this is 442 * racy). All successful UNLOCK routines have RELEASE semantics. 443 */ 444 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq) 445 { 446 int val; 447 448 /* 449 * We can try to avoid the cmpxchg() loop by simply incrementing the 450 * lock counter. When held in exclusive state, the lock counter is set 451 * to INT_MIN so these increments won't hurt as the value will remain 452 * negative. 453 */ 454 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0) 455 return; 456 457 do { 458 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0); 459 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val); 460 } 461 462 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq) 463 { 464 (void)atomic_dec_return_release(&cmdq->lock); 465 } 466 467 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq) 468 { 469 if (atomic_read(&cmdq->lock) == 1) 470 return false; 471 472 arm_smmu_cmdq_shared_unlock(cmdq); 473 return true; 474 } 475 476 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \ 477 ({ \ 478 bool __ret; \ 479 local_irq_save(flags); \ 480 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \ 481 if (!__ret) \ 482 local_irq_restore(flags); \ 483 __ret; \ 484 }) 485 486 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \ 487 ({ \ 488 atomic_set_release(&cmdq->lock, 0); \ 489 local_irq_restore(flags); \ 490 }) 491 492 493 /* 494 * Command queue insertion. 495 * This is made fiddly by our attempts to achieve some sort of scalability 496 * since there is one queue shared amongst all of the CPUs in the system. If 497 * you like mixed-size concurrency, dependency ordering and relaxed atomics, 498 * then you'll *love* this monstrosity. 499 * 500 * The basic idea is to split the queue up into ranges of commands that are 501 * owned by a given CPU; the owner may not have written all of the commands 502 * itself, but is responsible for advancing the hardware prod pointer when 503 * the time comes. The algorithm is roughly: 504 * 505 * 1. Allocate some space in the queue. At this point we also discover 506 * whether the head of the queue is currently owned by another CPU, 507 * or whether we are the owner. 508 * 509 * 2. Write our commands into our allocated slots in the queue. 510 * 511 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map. 512 * 513 * 4. If we are an owner: 514 * a. Wait for the previous owner to finish. 515 * b. Mark the queue head as unowned, which tells us the range 516 * that we are responsible for publishing. 517 * c. Wait for all commands in our owned range to become valid. 518 * d. Advance the hardware prod pointer. 519 * e. Tell the next owner we've finished. 520 * 521 * 5. If we are inserting a CMD_SYNC (we may or may not have been an 522 * owner), then we need to stick around until it has completed: 523 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC 524 * to clear the first 4 bytes. 525 * b. Otherwise, we spin waiting for the hardware cons pointer to 526 * advance past our command. 527 * 528 * The devil is in the details, particularly the use of locking for handling 529 * SYNC completion and freeing up space in the queue before we think that it is 530 * full. 531 */ 532 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq, 533 u32 sprod, u32 eprod, bool set) 534 { 535 u32 swidx, sbidx, ewidx, ebidx; 536 struct arm_smmu_ll_queue llq = { 537 .max_n_shift = cmdq->q.llq.max_n_shift, 538 .prod = sprod, 539 }; 540 541 ewidx = BIT_WORD(Q_IDX(&llq, eprod)); 542 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG; 543 544 while (llq.prod != eprod) { 545 unsigned long mask; 546 atomic_long_t *ptr; 547 u32 limit = BITS_PER_LONG; 548 549 swidx = BIT_WORD(Q_IDX(&llq, llq.prod)); 550 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG; 551 552 ptr = &cmdq->valid_map[swidx]; 553 554 if ((swidx == ewidx) && (sbidx < ebidx)) 555 limit = ebidx; 556 557 mask = GENMASK(limit - 1, sbidx); 558 559 /* 560 * The valid bit is the inverse of the wrap bit. This means 561 * that a zero-initialised queue is invalid and, after marking 562 * all entries as valid, they become invalid again when we 563 * wrap. 564 */ 565 if (set) { 566 atomic_long_xor(mask, ptr); 567 } else { /* Poll */ 568 unsigned long valid; 569 570 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask; 571 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid); 572 } 573 574 llq.prod = queue_inc_prod_n(&llq, limit - sbidx); 575 } 576 } 577 578 /* Mark all entries in the range [sprod, eprod) as valid */ 579 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq, 580 u32 sprod, u32 eprod) 581 { 582 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true); 583 } 584 585 /* Wait for all entries in the range [sprod, eprod) to become valid */ 586 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq, 587 u32 sprod, u32 eprod) 588 { 589 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false); 590 } 591 592 /* Wait for the command queue to become non-full */ 593 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, 594 struct arm_smmu_ll_queue *llq) 595 { 596 unsigned long flags; 597 struct arm_smmu_queue_poll qp; 598 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); 599 int ret = 0; 600 601 /* 602 * Try to update our copy of cons by grabbing exclusive cmdq access. If 603 * that fails, spin until somebody else updates it for us. 604 */ 605 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) { 606 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg)); 607 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags); 608 llq->val = READ_ONCE(cmdq->q.llq.val); 609 return 0; 610 } 611 612 queue_poll_init(smmu, &qp); 613 do { 614 llq->val = READ_ONCE(cmdq->q.llq.val); 615 if (!queue_full(llq)) 616 break; 617 618 ret = queue_poll(&qp); 619 } while (!ret); 620 621 return ret; 622 } 623 624 /* 625 * Wait until the SMMU signals a CMD_SYNC completion MSI. 626 * Must be called with the cmdq lock held in some capacity. 627 */ 628 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, 629 struct arm_smmu_ll_queue *llq) 630 { 631 int ret = 0; 632 struct arm_smmu_queue_poll qp; 633 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); 634 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod)); 635 636 queue_poll_init(smmu, &qp); 637 638 /* 639 * The MSI won't generate an event, since it's being written back 640 * into the command queue. 641 */ 642 qp.wfe = false; 643 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp))); 644 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1); 645 return ret; 646 } 647 648 /* 649 * Wait until the SMMU cons index passes llq->prod. 650 * Must be called with the cmdq lock held in some capacity. 651 */ 652 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, 653 struct arm_smmu_ll_queue *llq) 654 { 655 struct arm_smmu_queue_poll qp; 656 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); 657 u32 prod = llq->prod; 658 int ret = 0; 659 660 queue_poll_init(smmu, &qp); 661 llq->val = READ_ONCE(cmdq->q.llq.val); 662 do { 663 if (queue_consumed(llq, prod)) 664 break; 665 666 ret = queue_poll(&qp); 667 668 /* 669 * This needs to be a readl() so that our subsequent call 670 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately. 671 * 672 * Specifically, we need to ensure that we observe all 673 * shared_lock()s by other CMD_SYNCs that share our owner, 674 * so that a failing call to tryunlock() means that we're 675 * the last one out and therefore we can safely advance 676 * cmdq->q.llq.cons. Roughly speaking: 677 * 678 * CPU 0 CPU1 CPU2 (us) 679 * 680 * if (sync) 681 * shared_lock(); 682 * 683 * dma_wmb(); 684 * set_valid_map(); 685 * 686 * if (owner) { 687 * poll_valid_map(); 688 * <control dependency> 689 * writel(prod_reg); 690 * 691 * readl(cons_reg); 692 * tryunlock(); 693 * 694 * Requires us to see CPU 0's shared_lock() acquisition. 695 */ 696 llq->cons = readl(cmdq->q.cons_reg); 697 } while (!ret); 698 699 return ret; 700 } 701 702 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, 703 struct arm_smmu_ll_queue *llq) 704 { 705 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) 706 return __arm_smmu_cmdq_poll_until_msi(smmu, llq); 707 708 return __arm_smmu_cmdq_poll_until_consumed(smmu, llq); 709 } 710 711 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, 712 u32 prod, int n) 713 { 714 int i; 715 struct arm_smmu_ll_queue llq = { 716 .max_n_shift = cmdq->q.llq.max_n_shift, 717 .prod = prod, 718 }; 719 720 for (i = 0; i < n; ++i) { 721 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS]; 722 723 prod = queue_inc_prod_n(&llq, i); 724 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS); 725 } 726 } 727 728 /* 729 * This is the actual insertion function, and provides the following 730 * ordering guarantees to callers: 731 * 732 * - There is a dma_wmb() before publishing any commands to the queue. 733 * This can be relied upon to order prior writes to data structures 734 * in memory (such as a CD or an STE) before the command. 735 * 736 * - On completion of a CMD_SYNC, there is a control dependency. 737 * This can be relied upon to order subsequent writes to memory (e.g. 738 * freeing an IOVA) after completion of the CMD_SYNC. 739 * 740 * - Command insertion is totally ordered, so if two CPUs each race to 741 * insert their own list of commands then all of the commands from one 742 * CPU will appear before any of the commands from the other CPU. 743 */ 744 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, 745 u64 *cmds, int n, bool sync) 746 { 747 u64 cmd_sync[CMDQ_ENT_DWORDS]; 748 u32 prod; 749 unsigned long flags; 750 bool owner; 751 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); 752 struct arm_smmu_ll_queue llq, head; 753 int ret = 0; 754 755 llq.max_n_shift = cmdq->q.llq.max_n_shift; 756 757 /* 1. Allocate some space in the queue */ 758 local_irq_save(flags); 759 llq.val = READ_ONCE(cmdq->q.llq.val); 760 do { 761 u64 old; 762 763 while (!queue_has_space(&llq, n + sync)) { 764 local_irq_restore(flags); 765 if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq)) 766 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); 767 local_irq_save(flags); 768 } 769 770 head.cons = llq.cons; 771 head.prod = queue_inc_prod_n(&llq, n + sync) | 772 CMDQ_PROD_OWNED_FLAG; 773 774 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val); 775 if (old == llq.val) 776 break; 777 778 llq.val = old; 779 } while (1); 780 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG); 781 head.prod &= ~CMDQ_PROD_OWNED_FLAG; 782 llq.prod &= ~CMDQ_PROD_OWNED_FLAG; 783 784 /* 785 * 2. Write our commands into the queue 786 * Dependency ordering from the cmpxchg() loop above. 787 */ 788 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); 789 if (sync) { 790 prod = queue_inc_prod_n(&llq, n); 791 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod); 792 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); 793 794 /* 795 * In order to determine completion of our CMD_SYNC, we must 796 * ensure that the queue can't wrap twice without us noticing. 797 * We achieve that by taking the cmdq lock as shared before 798 * marking our slot as valid. 799 */ 800 arm_smmu_cmdq_shared_lock(cmdq); 801 } 802 803 /* 3. Mark our slots as valid, ensuring commands are visible first */ 804 dma_wmb(); 805 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod); 806 807 /* 4. If we are the owner, take control of the SMMU hardware */ 808 if (owner) { 809 /* a. Wait for previous owner to finish */ 810 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod); 811 812 /* b. Stop gathering work by clearing the owned flag */ 813 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG, 814 &cmdq->q.llq.atomic.prod); 815 prod &= ~CMDQ_PROD_OWNED_FLAG; 816 817 /* 818 * c. Wait for any gathered work to be written to the queue. 819 * Note that we read our own entries so that we have the control 820 * dependency required by (d). 821 */ 822 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod); 823 824 /* 825 * d. Advance the hardware prod pointer 826 * Control dependency ordering from the entries becoming valid. 827 */ 828 writel_relaxed(prod, cmdq->q.prod_reg); 829 830 /* 831 * e. Tell the next owner we're done 832 * Make sure we've updated the hardware first, so that we don't 833 * race to update prod and potentially move it backwards. 834 */ 835 atomic_set_release(&cmdq->owner_prod, prod); 836 } 837 838 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ 839 if (sync) { 840 llq.prod = queue_inc_prod_n(&llq, n); 841 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq); 842 if (ret) { 843 dev_err_ratelimited(smmu->dev, 844 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", 845 llq.prod, 846 readl_relaxed(cmdq->q.prod_reg), 847 readl_relaxed(cmdq->q.cons_reg)); 848 } 849 850 /* 851 * Try to unlock the cmdq lock. This will fail if we're the last 852 * reader, in which case we can safely update cmdq->q.llq.cons 853 */ 854 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) { 855 WRITE_ONCE(cmdq->q.llq.cons, llq.cons); 856 arm_smmu_cmdq_shared_unlock(cmdq); 857 } 858 } 859 860 local_irq_restore(flags); 861 return ret; 862 } 863 864 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, 865 struct arm_smmu_cmdq_ent *ent, 866 bool sync) 867 { 868 u64 cmd[CMDQ_ENT_DWORDS]; 869 870 if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) { 871 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", 872 ent->opcode); 873 return -EINVAL; 874 } 875 876 return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync); 877 } 878 879 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, 880 struct arm_smmu_cmdq_ent *ent) 881 { 882 return __arm_smmu_cmdq_issue_cmd(smmu, ent, false); 883 } 884 885 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, 886 struct arm_smmu_cmdq_ent *ent) 887 { 888 return __arm_smmu_cmdq_issue_cmd(smmu, ent, true); 889 } 890 891 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, 892 struct arm_smmu_cmdq_batch *cmds, 893 struct arm_smmu_cmdq_ent *cmd) 894 { 895 int index; 896 897 if (cmds->num == CMDQ_BATCH_ENTRIES - 1 && 898 (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) { 899 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); 900 cmds->num = 0; 901 } 902 903 if (cmds->num == CMDQ_BATCH_ENTRIES) { 904 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false); 905 cmds->num = 0; 906 } 907 908 index = cmds->num * CMDQ_ENT_DWORDS; 909 if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) { 910 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", 911 cmd->opcode); 912 return; 913 } 914 915 cmds->num++; 916 } 917 918 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, 919 struct arm_smmu_cmdq_batch *cmds) 920 { 921 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); 922 } 923 924 static int arm_smmu_page_response(struct device *dev, 925 struct iommu_fault_event *unused, 926 struct iommu_page_response *resp) 927 { 928 struct arm_smmu_cmdq_ent cmd = {0}; 929 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 930 int sid = master->streams[0].id; 931 932 if (master->stall_enabled) { 933 cmd.opcode = CMDQ_OP_RESUME; 934 cmd.resume.sid = sid; 935 cmd.resume.stag = resp->grpid; 936 switch (resp->code) { 937 case IOMMU_PAGE_RESP_INVALID: 938 case IOMMU_PAGE_RESP_FAILURE: 939 cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT; 940 break; 941 case IOMMU_PAGE_RESP_SUCCESS: 942 cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY; 943 break; 944 default: 945 return -EINVAL; 946 } 947 } else { 948 return -ENODEV; 949 } 950 951 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); 952 /* 953 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP. 954 * RESUME consumption guarantees that the stalled transaction will be 955 * terminated... at some point in the future. PRI_RESP is fire and 956 * forget. 957 */ 958 959 return 0; 960 } 961 962 /* Context descriptor manipulation functions */ 963 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) 964 { 965 struct arm_smmu_cmdq_ent cmd = { 966 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ? 967 CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID, 968 .tlbi.asid = asid, 969 }; 970 971 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 972 } 973 974 static void arm_smmu_sync_cd(struct arm_smmu_master *master, 975 int ssid, bool leaf) 976 { 977 size_t i; 978 struct arm_smmu_cmdq_batch cmds; 979 struct arm_smmu_device *smmu = master->smmu; 980 struct arm_smmu_cmdq_ent cmd = { 981 .opcode = CMDQ_OP_CFGI_CD, 982 .cfgi = { 983 .ssid = ssid, 984 .leaf = leaf, 985 }, 986 }; 987 988 cmds.num = 0; 989 for (i = 0; i < master->num_streams; i++) { 990 cmd.cfgi.sid = master->streams[i].id; 991 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); 992 } 993 994 arm_smmu_cmdq_batch_submit(smmu, &cmds); 995 } 996 997 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, 998 struct arm_smmu_l1_ctx_desc *l1_desc) 999 { 1000 size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); 1001 1002 l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, 1003 &l1_desc->l2ptr_dma, GFP_KERNEL); 1004 if (!l1_desc->l2ptr) { 1005 dev_warn(smmu->dev, 1006 "failed to allocate context descriptor table\n"); 1007 return -ENOMEM; 1008 } 1009 return 0; 1010 } 1011 1012 static void arm_smmu_write_cd_l1_desc(__le64 *dst, 1013 struct arm_smmu_l1_ctx_desc *l1_desc) 1014 { 1015 u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | 1016 CTXDESC_L1_DESC_V; 1017 1018 /* See comment in arm_smmu_write_ctx_desc() */ 1019 WRITE_ONCE(*dst, cpu_to_le64(val)); 1020 } 1021 1022 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid) 1023 { 1024 __le64 *l1ptr; 1025 unsigned int idx; 1026 struct arm_smmu_l1_ctx_desc *l1_desc; 1027 struct arm_smmu_device *smmu = master->smmu; 1028 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1029 1030 if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) 1031 return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS; 1032 1033 idx = ssid >> CTXDESC_SPLIT; 1034 l1_desc = &cd_table->l1_desc[idx]; 1035 if (!l1_desc->l2ptr) { 1036 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) 1037 return NULL; 1038 1039 l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS; 1040 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); 1041 /* An invalid L1CD can be cached */ 1042 arm_smmu_sync_cd(master, ssid, false); 1043 } 1044 idx = ssid & (CTXDESC_L2_ENTRIES - 1); 1045 return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS; 1046 } 1047 1048 int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, 1049 struct arm_smmu_ctx_desc *cd) 1050 { 1051 /* 1052 * This function handles the following cases: 1053 * 1054 * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0). 1055 * (2) Install a secondary CD, for SID+SSID traffic. 1056 * (3) Update ASID of a CD. Atomically write the first 64 bits of the 1057 * CD, then invalidate the old entry and mappings. 1058 * (4) Quiesce the context without clearing the valid bit. Disable 1059 * translation, and ignore any translation fault. 1060 * (5) Remove a secondary CD. 1061 */ 1062 u64 val; 1063 bool cd_live; 1064 __le64 *cdptr; 1065 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1066 1067 if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) 1068 return -E2BIG; 1069 1070 cdptr = arm_smmu_get_cd_ptr(master, ssid); 1071 if (!cdptr) 1072 return -ENOMEM; 1073 1074 val = le64_to_cpu(cdptr[0]); 1075 cd_live = !!(val & CTXDESC_CD_0_V); 1076 1077 if (!cd) { /* (5) */ 1078 val = 0; 1079 } else if (cd == &quiet_cd) { /* (4) */ 1080 val |= CTXDESC_CD_0_TCR_EPD0; 1081 } else if (cd_live) { /* (3) */ 1082 val &= ~CTXDESC_CD_0_ASID; 1083 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid); 1084 /* 1085 * Until CD+TLB invalidation, both ASIDs may be used for tagging 1086 * this substream's traffic 1087 */ 1088 } else { /* (1) and (2) */ 1089 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); 1090 cdptr[2] = 0; 1091 cdptr[3] = cpu_to_le64(cd->mair); 1092 1093 /* 1094 * STE may be live, and the SMMU might read dwords of this CD in any 1095 * order. Ensure that it observes valid values before reading 1096 * V=1. 1097 */ 1098 arm_smmu_sync_cd(master, ssid, true); 1099 1100 val = cd->tcr | 1101 #ifdef __BIG_ENDIAN 1102 CTXDESC_CD_0_ENDI | 1103 #endif 1104 CTXDESC_CD_0_R | CTXDESC_CD_0_A | 1105 (cd->mm ? 0 : CTXDESC_CD_0_ASET) | 1106 CTXDESC_CD_0_AA64 | 1107 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | 1108 CTXDESC_CD_0_V; 1109 1110 if (cd_table->stall_enabled) 1111 val |= CTXDESC_CD_0_S; 1112 } 1113 1114 /* 1115 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3 1116 * "Configuration structures and configuration invalidation completion" 1117 * 1118 * The size of single-copy atomic reads made by the SMMU is 1119 * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single 1120 * field within an aligned 64-bit span of a structure can be altered 1121 * without first making the structure invalid. 1122 */ 1123 WRITE_ONCE(cdptr[0], cpu_to_le64(val)); 1124 arm_smmu_sync_cd(master, ssid, true); 1125 return 0; 1126 } 1127 1128 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) 1129 { 1130 int ret; 1131 size_t l1size; 1132 size_t max_contexts; 1133 struct arm_smmu_device *smmu = master->smmu; 1134 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1135 1136 cd_table->stall_enabled = master->stall_enabled; 1137 cd_table->s1cdmax = master->ssid_bits; 1138 max_contexts = 1 << cd_table->s1cdmax; 1139 1140 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || 1141 max_contexts <= CTXDESC_L2_ENTRIES) { 1142 cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; 1143 cd_table->num_l1_ents = max_contexts; 1144 1145 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3); 1146 } else { 1147 cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; 1148 cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts, 1149 CTXDESC_L2_ENTRIES); 1150 1151 cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents, 1152 sizeof(*cd_table->l1_desc), 1153 GFP_KERNEL); 1154 if (!cd_table->l1_desc) 1155 return -ENOMEM; 1156 1157 l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); 1158 } 1159 1160 cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma, 1161 GFP_KERNEL); 1162 if (!cd_table->cdtab) { 1163 dev_warn(smmu->dev, "failed to allocate context descriptor\n"); 1164 ret = -ENOMEM; 1165 goto err_free_l1; 1166 } 1167 1168 return 0; 1169 1170 err_free_l1: 1171 if (cd_table->l1_desc) { 1172 devm_kfree(smmu->dev, cd_table->l1_desc); 1173 cd_table->l1_desc = NULL; 1174 } 1175 return ret; 1176 } 1177 1178 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) 1179 { 1180 int i; 1181 size_t size, l1size; 1182 struct arm_smmu_device *smmu = master->smmu; 1183 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1184 1185 if (cd_table->l1_desc) { 1186 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); 1187 1188 for (i = 0; i < cd_table->num_l1_ents; i++) { 1189 if (!cd_table->l1_desc[i].l2ptr) 1190 continue; 1191 1192 dmam_free_coherent(smmu->dev, size, 1193 cd_table->l1_desc[i].l2ptr, 1194 cd_table->l1_desc[i].l2ptr_dma); 1195 } 1196 devm_kfree(smmu->dev, cd_table->l1_desc); 1197 cd_table->l1_desc = NULL; 1198 1199 l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); 1200 } else { 1201 l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3); 1202 } 1203 1204 dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma); 1205 cd_table->cdtab_dma = 0; 1206 cd_table->cdtab = NULL; 1207 } 1208 1209 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) 1210 { 1211 bool free; 1212 struct arm_smmu_ctx_desc *old_cd; 1213 1214 if (!cd->asid) 1215 return false; 1216 1217 free = refcount_dec_and_test(&cd->refs); 1218 if (free) { 1219 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid); 1220 WARN_ON(old_cd != cd); 1221 } 1222 return free; 1223 } 1224 1225 /* Stream table manipulation functions */ 1226 static void 1227 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) 1228 { 1229 u64 val = 0; 1230 1231 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span); 1232 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; 1233 1234 /* See comment in arm_smmu_write_ctx_desc() */ 1235 WRITE_ONCE(*dst, cpu_to_le64(val)); 1236 } 1237 1238 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) 1239 { 1240 struct arm_smmu_cmdq_ent cmd = { 1241 .opcode = CMDQ_OP_CFGI_STE, 1242 .cfgi = { 1243 .sid = sid, 1244 .leaf = true, 1245 }, 1246 }; 1247 1248 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 1249 } 1250 1251 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, 1252 __le64 *dst) 1253 { 1254 /* 1255 * This is hideously complicated, but we only really care about 1256 * three cases at the moment: 1257 * 1258 * 1. Invalid (all zero) -> bypass/fault (init) 1259 * 2. Bypass/fault -> translation/bypass (attach) 1260 * 3. Translation/bypass -> bypass/fault (detach) 1261 * 1262 * Given that we can't update the STE atomically and the SMMU 1263 * doesn't read the thing in a defined order, that leaves us 1264 * with the following maintenance requirements: 1265 * 1266 * 1. Update Config, return (init time STEs aren't live) 1267 * 2. Write everything apart from dword 0, sync, write dword 0, sync 1268 * 3. Update Config, sync 1269 */ 1270 u64 val = le64_to_cpu(dst[0]); 1271 bool ste_live = false; 1272 struct arm_smmu_device *smmu = NULL; 1273 struct arm_smmu_ctx_desc_cfg *cd_table = NULL; 1274 struct arm_smmu_s2_cfg *s2_cfg = NULL; 1275 struct arm_smmu_domain *smmu_domain = NULL; 1276 struct arm_smmu_cmdq_ent prefetch_cmd = { 1277 .opcode = CMDQ_OP_PREFETCH_CFG, 1278 .prefetch = { 1279 .sid = sid, 1280 }, 1281 }; 1282 1283 if (master) { 1284 smmu_domain = master->domain; 1285 smmu = master->smmu; 1286 } 1287 1288 if (smmu_domain) { 1289 switch (smmu_domain->stage) { 1290 case ARM_SMMU_DOMAIN_S1: 1291 cd_table = &master->cd_table; 1292 break; 1293 case ARM_SMMU_DOMAIN_S2: 1294 case ARM_SMMU_DOMAIN_NESTED: 1295 s2_cfg = &smmu_domain->s2_cfg; 1296 break; 1297 default: 1298 break; 1299 } 1300 } 1301 1302 if (val & STRTAB_STE_0_V) { 1303 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) { 1304 case STRTAB_STE_0_CFG_BYPASS: 1305 break; 1306 case STRTAB_STE_0_CFG_S1_TRANS: 1307 case STRTAB_STE_0_CFG_S2_TRANS: 1308 ste_live = true; 1309 break; 1310 case STRTAB_STE_0_CFG_ABORT: 1311 BUG_ON(!disable_bypass); 1312 break; 1313 default: 1314 BUG(); /* STE corruption */ 1315 } 1316 } 1317 1318 /* Nuke the existing STE_0 value, as we're going to rewrite it */ 1319 val = STRTAB_STE_0_V; 1320 1321 /* Bypass/fault */ 1322 if (!smmu_domain || !(cd_table || s2_cfg)) { 1323 if (!smmu_domain && disable_bypass) 1324 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT); 1325 else 1326 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); 1327 1328 dst[0] = cpu_to_le64(val); 1329 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, 1330 STRTAB_STE_1_SHCFG_INCOMING)); 1331 dst[2] = 0; /* Nuke the VMID */ 1332 /* 1333 * The SMMU can perform negative caching, so we must sync 1334 * the STE regardless of whether the old value was live. 1335 */ 1336 if (smmu) 1337 arm_smmu_sync_ste_for_sid(smmu, sid); 1338 return; 1339 } 1340 1341 if (cd_table) { 1342 u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ? 1343 STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1; 1344 1345 BUG_ON(ste_live); 1346 dst[1] = cpu_to_le64( 1347 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | 1348 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | 1349 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | 1350 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | 1351 FIELD_PREP(STRTAB_STE_1_STRW, strw)); 1352 1353 if (smmu->features & ARM_SMMU_FEAT_STALLS && 1354 !master->stall_enabled) 1355 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); 1356 1357 val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | 1358 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | 1359 FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) | 1360 FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt); 1361 } 1362 1363 if (s2_cfg) { 1364 BUG_ON(ste_live); 1365 dst[2] = cpu_to_le64( 1366 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | 1367 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) | 1368 #ifdef __BIG_ENDIAN 1369 STRTAB_STE_2_S2ENDI | 1370 #endif 1371 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | 1372 STRTAB_STE_2_S2R); 1373 1374 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); 1375 1376 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS); 1377 } 1378 1379 if (master->ats_enabled) 1380 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS, 1381 STRTAB_STE_1_EATS_TRANS)); 1382 1383 arm_smmu_sync_ste_for_sid(smmu, sid); 1384 /* See comment in arm_smmu_write_ctx_desc() */ 1385 WRITE_ONCE(dst[0], cpu_to_le64(val)); 1386 arm_smmu_sync_ste_for_sid(smmu, sid); 1387 1388 /* It's likely that we'll want to use the new STE soon */ 1389 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) 1390 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); 1391 } 1392 1393 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force) 1394 { 1395 unsigned int i; 1396 u64 val = STRTAB_STE_0_V; 1397 1398 if (disable_bypass && !force) 1399 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT); 1400 else 1401 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); 1402 1403 for (i = 0; i < nent; ++i) { 1404 strtab[0] = cpu_to_le64(val); 1405 strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, 1406 STRTAB_STE_1_SHCFG_INCOMING)); 1407 strtab[2] = 0; 1408 strtab += STRTAB_STE_DWORDS; 1409 } 1410 } 1411 1412 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) 1413 { 1414 size_t size; 1415 void *strtab; 1416 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 1417 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT]; 1418 1419 if (desc->l2ptr) 1420 return 0; 1421 1422 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); 1423 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; 1424 1425 desc->span = STRTAB_SPLIT + 1; 1426 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma, 1427 GFP_KERNEL); 1428 if (!desc->l2ptr) { 1429 dev_err(smmu->dev, 1430 "failed to allocate l2 stream table for SID %u\n", 1431 sid); 1432 return -ENOMEM; 1433 } 1434 1435 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false); 1436 arm_smmu_write_strtab_l1_desc(strtab, desc); 1437 return 0; 1438 } 1439 1440 static struct arm_smmu_master * 1441 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) 1442 { 1443 struct rb_node *node; 1444 struct arm_smmu_stream *stream; 1445 1446 lockdep_assert_held(&smmu->streams_mutex); 1447 1448 node = smmu->streams.rb_node; 1449 while (node) { 1450 stream = rb_entry(node, struct arm_smmu_stream, node); 1451 if (stream->id < sid) 1452 node = node->rb_right; 1453 else if (stream->id > sid) 1454 node = node->rb_left; 1455 else 1456 return stream->master; 1457 } 1458 1459 return NULL; 1460 } 1461 1462 /* IRQ and event handlers */ 1463 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) 1464 { 1465 int ret; 1466 u32 reason; 1467 u32 perm = 0; 1468 struct arm_smmu_master *master; 1469 bool ssid_valid = evt[0] & EVTQ_0_SSV; 1470 u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]); 1471 struct iommu_fault_event fault_evt = { }; 1472 struct iommu_fault *flt = &fault_evt.fault; 1473 1474 switch (FIELD_GET(EVTQ_0_ID, evt[0])) { 1475 case EVT_ID_TRANSLATION_FAULT: 1476 reason = IOMMU_FAULT_REASON_PTE_FETCH; 1477 break; 1478 case EVT_ID_ADDR_SIZE_FAULT: 1479 reason = IOMMU_FAULT_REASON_OOR_ADDRESS; 1480 break; 1481 case EVT_ID_ACCESS_FAULT: 1482 reason = IOMMU_FAULT_REASON_ACCESS; 1483 break; 1484 case EVT_ID_PERMISSION_FAULT: 1485 reason = IOMMU_FAULT_REASON_PERMISSION; 1486 break; 1487 default: 1488 return -EOPNOTSUPP; 1489 } 1490 1491 /* Stage-2 is always pinned at the moment */ 1492 if (evt[1] & EVTQ_1_S2) 1493 return -EFAULT; 1494 1495 if (evt[1] & EVTQ_1_RnW) 1496 perm |= IOMMU_FAULT_PERM_READ; 1497 else 1498 perm |= IOMMU_FAULT_PERM_WRITE; 1499 1500 if (evt[1] & EVTQ_1_InD) 1501 perm |= IOMMU_FAULT_PERM_EXEC; 1502 1503 if (evt[1] & EVTQ_1_PnU) 1504 perm |= IOMMU_FAULT_PERM_PRIV; 1505 1506 if (evt[1] & EVTQ_1_STALL) { 1507 flt->type = IOMMU_FAULT_PAGE_REQ; 1508 flt->prm = (struct iommu_fault_page_request) { 1509 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, 1510 .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]), 1511 .perm = perm, 1512 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), 1513 }; 1514 1515 if (ssid_valid) { 1516 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 1517 flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); 1518 } 1519 } else { 1520 flt->type = IOMMU_FAULT_DMA_UNRECOV; 1521 flt->event = (struct iommu_fault_unrecoverable) { 1522 .reason = reason, 1523 .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID, 1524 .perm = perm, 1525 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), 1526 }; 1527 1528 if (ssid_valid) { 1529 flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID; 1530 flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); 1531 } 1532 } 1533 1534 mutex_lock(&smmu->streams_mutex); 1535 master = arm_smmu_find_master(smmu, sid); 1536 if (!master) { 1537 ret = -EINVAL; 1538 goto out_unlock; 1539 } 1540 1541 ret = iommu_report_device_fault(master->dev, &fault_evt); 1542 if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) { 1543 /* Nobody cared, abort the access */ 1544 struct iommu_page_response resp = { 1545 .pasid = flt->prm.pasid, 1546 .grpid = flt->prm.grpid, 1547 .code = IOMMU_PAGE_RESP_FAILURE, 1548 }; 1549 arm_smmu_page_response(master->dev, &fault_evt, &resp); 1550 } 1551 1552 out_unlock: 1553 mutex_unlock(&smmu->streams_mutex); 1554 return ret; 1555 } 1556 1557 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) 1558 { 1559 int i, ret; 1560 struct arm_smmu_device *smmu = dev; 1561 struct arm_smmu_queue *q = &smmu->evtq.q; 1562 struct arm_smmu_ll_queue *llq = &q->llq; 1563 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, 1564 DEFAULT_RATELIMIT_BURST); 1565 u64 evt[EVTQ_ENT_DWORDS]; 1566 1567 do { 1568 while (!queue_remove_raw(q, evt)) { 1569 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); 1570 1571 ret = arm_smmu_handle_evt(smmu, evt); 1572 if (!ret || !__ratelimit(&rs)) 1573 continue; 1574 1575 dev_info(smmu->dev, "event 0x%02x received:\n", id); 1576 for (i = 0; i < ARRAY_SIZE(evt); ++i) 1577 dev_info(smmu->dev, "\t0x%016llx\n", 1578 (unsigned long long)evt[i]); 1579 1580 cond_resched(); 1581 } 1582 1583 /* 1584 * Not much we can do on overflow, so scream and pretend we're 1585 * trying harder. 1586 */ 1587 if (queue_sync_prod_in(q) == -EOVERFLOW) 1588 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n"); 1589 } while (!queue_empty(llq)); 1590 1591 /* Sync our overflow flag, as we believe we're up to speed */ 1592 queue_sync_cons_ovf(q); 1593 return IRQ_HANDLED; 1594 } 1595 1596 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt) 1597 { 1598 u32 sid, ssid; 1599 u16 grpid; 1600 bool ssv, last; 1601 1602 sid = FIELD_GET(PRIQ_0_SID, evt[0]); 1603 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]); 1604 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID; 1605 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]); 1606 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]); 1607 1608 dev_info(smmu->dev, "unexpected PRI request received:\n"); 1609 dev_info(smmu->dev, 1610 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n", 1611 sid, ssid, grpid, last ? "L" : "", 1612 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un", 1613 evt[0] & PRIQ_0_PERM_READ ? "R" : "", 1614 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "", 1615 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "", 1616 evt[1] & PRIQ_1_ADDR_MASK); 1617 1618 if (last) { 1619 struct arm_smmu_cmdq_ent cmd = { 1620 .opcode = CMDQ_OP_PRI_RESP, 1621 .substream_valid = ssv, 1622 .pri = { 1623 .sid = sid, 1624 .ssid = ssid, 1625 .grpid = grpid, 1626 .resp = PRI_RESP_DENY, 1627 }, 1628 }; 1629 1630 arm_smmu_cmdq_issue_cmd(smmu, &cmd); 1631 } 1632 } 1633 1634 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) 1635 { 1636 struct arm_smmu_device *smmu = dev; 1637 struct arm_smmu_queue *q = &smmu->priq.q; 1638 struct arm_smmu_ll_queue *llq = &q->llq; 1639 u64 evt[PRIQ_ENT_DWORDS]; 1640 1641 do { 1642 while (!queue_remove_raw(q, evt)) 1643 arm_smmu_handle_ppr(smmu, evt); 1644 1645 if (queue_sync_prod_in(q) == -EOVERFLOW) 1646 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n"); 1647 } while (!queue_empty(llq)); 1648 1649 /* Sync our overflow flag, as we believe we're up to speed */ 1650 queue_sync_cons_ovf(q); 1651 return IRQ_HANDLED; 1652 } 1653 1654 static int arm_smmu_device_disable(struct arm_smmu_device *smmu); 1655 1656 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) 1657 { 1658 u32 gerror, gerrorn, active; 1659 struct arm_smmu_device *smmu = dev; 1660 1661 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR); 1662 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN); 1663 1664 active = gerror ^ gerrorn; 1665 if (!(active & GERROR_ERR_MASK)) 1666 return IRQ_NONE; /* No errors pending */ 1667 1668 dev_warn(smmu->dev, 1669 "unexpected global error reported (0x%08x), this could be serious\n", 1670 active); 1671 1672 if (active & GERROR_SFM_ERR) { 1673 dev_err(smmu->dev, "device has entered Service Failure Mode!\n"); 1674 arm_smmu_device_disable(smmu); 1675 } 1676 1677 if (active & GERROR_MSI_GERROR_ABT_ERR) 1678 dev_warn(smmu->dev, "GERROR MSI write aborted\n"); 1679 1680 if (active & GERROR_MSI_PRIQ_ABT_ERR) 1681 dev_warn(smmu->dev, "PRIQ MSI write aborted\n"); 1682 1683 if (active & GERROR_MSI_EVTQ_ABT_ERR) 1684 dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); 1685 1686 if (active & GERROR_MSI_CMDQ_ABT_ERR) 1687 dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); 1688 1689 if (active & GERROR_PRIQ_ABT_ERR) 1690 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); 1691 1692 if (active & GERROR_EVTQ_ABT_ERR) 1693 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n"); 1694 1695 if (active & GERROR_CMDQ_ERR) 1696 arm_smmu_cmdq_skip_err(smmu); 1697 1698 writel(gerror, smmu->base + ARM_SMMU_GERRORN); 1699 return IRQ_HANDLED; 1700 } 1701 1702 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) 1703 { 1704 struct arm_smmu_device *smmu = dev; 1705 1706 arm_smmu_evtq_thread(irq, dev); 1707 if (smmu->features & ARM_SMMU_FEAT_PRI) 1708 arm_smmu_priq_thread(irq, dev); 1709 1710 return IRQ_HANDLED; 1711 } 1712 1713 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) 1714 { 1715 arm_smmu_gerror_handler(irq, dev); 1716 return IRQ_WAKE_THREAD; 1717 } 1718 1719 static void 1720 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, 1721 struct arm_smmu_cmdq_ent *cmd) 1722 { 1723 size_t log2_span; 1724 size_t span_mask; 1725 /* ATC invalidates are always on 4096-bytes pages */ 1726 size_t inval_grain_shift = 12; 1727 unsigned long page_start, page_end; 1728 1729 /* 1730 * ATS and PASID: 1731 * 1732 * If substream_valid is clear, the PCIe TLP is sent without a PASID 1733 * prefix. In that case all ATC entries within the address range are 1734 * invalidated, including those that were requested with a PASID! There 1735 * is no way to invalidate only entries without PASID. 1736 * 1737 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID 1738 * traffic), translation requests without PASID create ATC entries 1739 * without PASID, which must be invalidated with substream_valid clear. 1740 * This has the unpleasant side-effect of invalidating all PASID-tagged 1741 * ATC entries within the address range. 1742 */ 1743 *cmd = (struct arm_smmu_cmdq_ent) { 1744 .opcode = CMDQ_OP_ATC_INV, 1745 .substream_valid = (ssid != IOMMU_NO_PASID), 1746 .atc.ssid = ssid, 1747 }; 1748 1749 if (!size) { 1750 cmd->atc.size = ATC_INV_SIZE_ALL; 1751 return; 1752 } 1753 1754 page_start = iova >> inval_grain_shift; 1755 page_end = (iova + size - 1) >> inval_grain_shift; 1756 1757 /* 1758 * In an ATS Invalidate Request, the address must be aligned on the 1759 * range size, which must be a power of two number of page sizes. We 1760 * thus have to choose between grossly over-invalidating the region, or 1761 * splitting the invalidation into multiple commands. For simplicity 1762 * we'll go with the first solution, but should refine it in the future 1763 * if multiple commands are shown to be more efficient. 1764 * 1765 * Find the smallest power of two that covers the range. The most 1766 * significant differing bit between the start and end addresses, 1767 * fls(start ^ end), indicates the required span. For example: 1768 * 1769 * We want to invalidate pages [8; 11]. This is already the ideal range: 1770 * x = 0b1000 ^ 0b1011 = 0b11 1771 * span = 1 << fls(x) = 4 1772 * 1773 * To invalidate pages [7; 10], we need to invalidate [0; 15]: 1774 * x = 0b0111 ^ 0b1010 = 0b1101 1775 * span = 1 << fls(x) = 16 1776 */ 1777 log2_span = fls_long(page_start ^ page_end); 1778 span_mask = (1ULL << log2_span) - 1; 1779 1780 page_start &= ~span_mask; 1781 1782 cmd->atc.addr = page_start << inval_grain_shift; 1783 cmd->atc.size = log2_span; 1784 } 1785 1786 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) 1787 { 1788 int i; 1789 struct arm_smmu_cmdq_ent cmd; 1790 struct arm_smmu_cmdq_batch cmds; 1791 1792 arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); 1793 1794 cmds.num = 0; 1795 for (i = 0; i < master->num_streams; i++) { 1796 cmd.atc.sid = master->streams[i].id; 1797 arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); 1798 } 1799 1800 return arm_smmu_cmdq_batch_submit(master->smmu, &cmds); 1801 } 1802 1803 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, 1804 unsigned long iova, size_t size) 1805 { 1806 int i; 1807 unsigned long flags; 1808 struct arm_smmu_cmdq_ent cmd; 1809 struct arm_smmu_master *master; 1810 struct arm_smmu_cmdq_batch cmds; 1811 1812 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) 1813 return 0; 1814 1815 /* 1816 * Ensure that we've completed prior invalidation of the main TLBs 1817 * before we read 'nr_ats_masters' in case of a concurrent call to 1818 * arm_smmu_enable_ats(): 1819 * 1820 * // unmap() // arm_smmu_enable_ats() 1821 * TLBI+SYNC atomic_inc(&nr_ats_masters); 1822 * smp_mb(); [...] 1823 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel() 1824 * 1825 * Ensures that we always see the incremented 'nr_ats_masters' count if 1826 * ATS was enabled at the PCI device before completion of the TLBI. 1827 */ 1828 smp_mb(); 1829 if (!atomic_read(&smmu_domain->nr_ats_masters)) 1830 return 0; 1831 1832 arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); 1833 1834 cmds.num = 0; 1835 1836 spin_lock_irqsave(&smmu_domain->devices_lock, flags); 1837 list_for_each_entry(master, &smmu_domain->devices, domain_head) { 1838 if (!master->ats_enabled) 1839 continue; 1840 1841 for (i = 0; i < master->num_streams; i++) { 1842 cmd.atc.sid = master->streams[i].id; 1843 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd); 1844 } 1845 } 1846 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 1847 1848 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds); 1849 } 1850 1851 /* IO_PGTABLE API */ 1852 static void arm_smmu_tlb_inv_context(void *cookie) 1853 { 1854 struct arm_smmu_domain *smmu_domain = cookie; 1855 struct arm_smmu_device *smmu = smmu_domain->smmu; 1856 struct arm_smmu_cmdq_ent cmd; 1857 1858 /* 1859 * NOTE: when io-pgtable is in non-strict mode, we may get here with 1860 * PTEs previously cleared by unmaps on the current CPU not yet visible 1861 * to the SMMU. We are relying on the dma_wmb() implicit during cmd 1862 * insertion to guarantee those are observed before the TLBI. Do be 1863 * careful, 007. 1864 */ 1865 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 1866 arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid); 1867 } else { 1868 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL; 1869 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; 1870 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 1871 } 1872 arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); 1873 } 1874 1875 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, 1876 unsigned long iova, size_t size, 1877 size_t granule, 1878 struct arm_smmu_domain *smmu_domain) 1879 { 1880 struct arm_smmu_device *smmu = smmu_domain->smmu; 1881 unsigned long end = iova + size, num_pages = 0, tg = 0; 1882 size_t inv_range = granule; 1883 struct arm_smmu_cmdq_batch cmds; 1884 1885 if (!size) 1886 return; 1887 1888 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { 1889 /* Get the leaf page size */ 1890 tg = __ffs(smmu_domain->domain.pgsize_bitmap); 1891 1892 num_pages = size >> tg; 1893 1894 /* Convert page size of 12,14,16 (log2) to 1,2,3 */ 1895 cmd->tlbi.tg = (tg - 10) / 2; 1896 1897 /* 1898 * Determine what level the granule is at. For non-leaf, both 1899 * io-pgtable and SVA pass a nominal last-level granule because 1900 * they don't know what level(s) actually apply, so ignore that 1901 * and leave TTL=0. However for various errata reasons we still 1902 * want to use a range command, so avoid the SVA corner case 1903 * where both scale and num could be 0 as well. 1904 */ 1905 if (cmd->tlbi.leaf) 1906 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); 1907 else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1) 1908 num_pages++; 1909 } 1910 1911 cmds.num = 0; 1912 1913 while (iova < end) { 1914 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { 1915 /* 1916 * On each iteration of the loop, the range is 5 bits 1917 * worth of the aligned size remaining. 1918 * The range in pages is: 1919 * 1920 * range = (num_pages & (0x1f << __ffs(num_pages))) 1921 */ 1922 unsigned long scale, num; 1923 1924 /* Determine the power of 2 multiple number of pages */ 1925 scale = __ffs(num_pages); 1926 cmd->tlbi.scale = scale; 1927 1928 /* Determine how many chunks of 2^scale size we have */ 1929 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX; 1930 cmd->tlbi.num = num - 1; 1931 1932 /* range is num * 2^scale * pgsize */ 1933 inv_range = num << (scale + tg); 1934 1935 /* Clear out the lower order bits for the next iteration */ 1936 num_pages -= num << scale; 1937 } 1938 1939 cmd->tlbi.addr = iova; 1940 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd); 1941 iova += inv_range; 1942 } 1943 arm_smmu_cmdq_batch_submit(smmu, &cmds); 1944 } 1945 1946 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, 1947 size_t granule, bool leaf, 1948 struct arm_smmu_domain *smmu_domain) 1949 { 1950 struct arm_smmu_cmdq_ent cmd = { 1951 .tlbi = { 1952 .leaf = leaf, 1953 }, 1954 }; 1955 1956 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 1957 cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? 1958 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA; 1959 cmd.tlbi.asid = smmu_domain->cd.asid; 1960 } else { 1961 cmd.opcode = CMDQ_OP_TLBI_S2_IPA; 1962 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; 1963 } 1964 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); 1965 1966 /* 1967 * Unfortunately, this can't be leaf-only since we may have 1968 * zapped an entire table. 1969 */ 1970 arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size); 1971 } 1972 1973 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, 1974 size_t granule, bool leaf, 1975 struct arm_smmu_domain *smmu_domain) 1976 { 1977 struct arm_smmu_cmdq_ent cmd = { 1978 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? 1979 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA, 1980 .tlbi = { 1981 .asid = asid, 1982 .leaf = leaf, 1983 }, 1984 }; 1985 1986 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); 1987 } 1988 1989 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, 1990 unsigned long iova, size_t granule, 1991 void *cookie) 1992 { 1993 struct arm_smmu_domain *smmu_domain = cookie; 1994 struct iommu_domain *domain = &smmu_domain->domain; 1995 1996 iommu_iotlb_gather_add_page(domain, gather, iova, granule); 1997 } 1998 1999 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, 2000 size_t granule, void *cookie) 2001 { 2002 arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie); 2003 } 2004 2005 static const struct iommu_flush_ops arm_smmu_flush_ops = { 2006 .tlb_flush_all = arm_smmu_tlb_inv_context, 2007 .tlb_flush_walk = arm_smmu_tlb_inv_walk, 2008 .tlb_add_page = arm_smmu_tlb_inv_page_nosync, 2009 }; 2010 2011 /* IOMMU API */ 2012 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) 2013 { 2014 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 2015 2016 switch (cap) { 2017 case IOMMU_CAP_CACHE_COHERENCY: 2018 /* Assume that a coherent TCU implies coherent TBUs */ 2019 return master->smmu->features & ARM_SMMU_FEAT_COHERENCY; 2020 case IOMMU_CAP_NOEXEC: 2021 case IOMMU_CAP_DEFERRED_FLUSH: 2022 return true; 2023 default: 2024 return false; 2025 } 2026 } 2027 2028 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) 2029 { 2030 struct arm_smmu_domain *smmu_domain; 2031 2032 if (type == IOMMU_DOMAIN_SVA) 2033 return arm_smmu_sva_domain_alloc(); 2034 2035 if (type != IOMMU_DOMAIN_UNMANAGED && 2036 type != IOMMU_DOMAIN_DMA && 2037 type != IOMMU_DOMAIN_IDENTITY) 2038 return NULL; 2039 2040 /* 2041 * Allocate the domain and initialise some of its data structures. 2042 * We can't really do anything meaningful until we've added a 2043 * master. 2044 */ 2045 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL); 2046 if (!smmu_domain) 2047 return NULL; 2048 2049 mutex_init(&smmu_domain->init_mutex); 2050 INIT_LIST_HEAD(&smmu_domain->devices); 2051 spin_lock_init(&smmu_domain->devices_lock); 2052 INIT_LIST_HEAD(&smmu_domain->mmu_notifiers); 2053 2054 return &smmu_domain->domain; 2055 } 2056 2057 static void arm_smmu_domain_free(struct iommu_domain *domain) 2058 { 2059 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2060 struct arm_smmu_device *smmu = smmu_domain->smmu; 2061 2062 free_io_pgtable_ops(smmu_domain->pgtbl_ops); 2063 2064 /* Free the ASID or VMID */ 2065 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 2066 /* Prevent SVA from touching the CD while we're freeing it */ 2067 mutex_lock(&arm_smmu_asid_lock); 2068 arm_smmu_free_asid(&smmu_domain->cd); 2069 mutex_unlock(&arm_smmu_asid_lock); 2070 } else { 2071 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; 2072 if (cfg->vmid) 2073 ida_free(&smmu->vmid_map, cfg->vmid); 2074 } 2075 2076 kfree(smmu_domain); 2077 } 2078 2079 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, 2080 struct io_pgtable_cfg *pgtbl_cfg) 2081 { 2082 int ret; 2083 u32 asid; 2084 struct arm_smmu_device *smmu = smmu_domain->smmu; 2085 struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; 2086 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; 2087 2088 refcount_set(&cd->refs, 1); 2089 2090 /* Prevent SVA from modifying the ASID until it is written to the CD */ 2091 mutex_lock(&arm_smmu_asid_lock); 2092 ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd, 2093 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); 2094 if (ret) 2095 goto out_unlock; 2096 2097 cd->asid = (u16)asid; 2098 cd->ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; 2099 cd->tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | 2100 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) | 2101 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) | 2102 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | 2103 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | 2104 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | 2105 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; 2106 cd->mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; 2107 2108 mutex_unlock(&arm_smmu_asid_lock); 2109 return 0; 2110 2111 out_unlock: 2112 mutex_unlock(&arm_smmu_asid_lock); 2113 return ret; 2114 } 2115 2116 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, 2117 struct io_pgtable_cfg *pgtbl_cfg) 2118 { 2119 int vmid; 2120 struct arm_smmu_device *smmu = smmu_domain->smmu; 2121 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; 2122 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr; 2123 2124 /* Reserve VMID 0 for stage-2 bypass STEs */ 2125 vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1, 2126 GFP_KERNEL); 2127 if (vmid < 0) 2128 return vmid; 2129 2130 vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr; 2131 cfg->vmid = (u16)vmid; 2132 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; 2133 cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) | 2134 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) | 2135 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) | 2136 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) | 2137 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) | 2138 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) | 2139 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps); 2140 return 0; 2141 } 2142 2143 static int arm_smmu_domain_finalise(struct iommu_domain *domain) 2144 { 2145 int ret; 2146 unsigned long ias, oas; 2147 enum io_pgtable_fmt fmt; 2148 struct io_pgtable_cfg pgtbl_cfg; 2149 struct io_pgtable_ops *pgtbl_ops; 2150 int (*finalise_stage_fn)(struct arm_smmu_domain *, 2151 struct io_pgtable_cfg *); 2152 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2153 struct arm_smmu_device *smmu = smmu_domain->smmu; 2154 2155 if (domain->type == IOMMU_DOMAIN_IDENTITY) { 2156 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; 2157 return 0; 2158 } 2159 2160 /* Restrict the stage to what we can actually support */ 2161 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) 2162 smmu_domain->stage = ARM_SMMU_DOMAIN_S2; 2163 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2)) 2164 smmu_domain->stage = ARM_SMMU_DOMAIN_S1; 2165 2166 switch (smmu_domain->stage) { 2167 case ARM_SMMU_DOMAIN_S1: 2168 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48; 2169 ias = min_t(unsigned long, ias, VA_BITS); 2170 oas = smmu->ias; 2171 fmt = ARM_64_LPAE_S1; 2172 finalise_stage_fn = arm_smmu_domain_finalise_s1; 2173 break; 2174 case ARM_SMMU_DOMAIN_NESTED: 2175 case ARM_SMMU_DOMAIN_S2: 2176 ias = smmu->ias; 2177 oas = smmu->oas; 2178 fmt = ARM_64_LPAE_S2; 2179 finalise_stage_fn = arm_smmu_domain_finalise_s2; 2180 break; 2181 default: 2182 return -EINVAL; 2183 } 2184 2185 pgtbl_cfg = (struct io_pgtable_cfg) { 2186 .pgsize_bitmap = smmu->pgsize_bitmap, 2187 .ias = ias, 2188 .oas = oas, 2189 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, 2190 .tlb = &arm_smmu_flush_ops, 2191 .iommu_dev = smmu->dev, 2192 }; 2193 2194 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); 2195 if (!pgtbl_ops) 2196 return -ENOMEM; 2197 2198 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; 2199 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; 2200 domain->geometry.force_aperture = true; 2201 2202 ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); 2203 if (ret < 0) { 2204 free_io_pgtable_ops(pgtbl_ops); 2205 return ret; 2206 } 2207 2208 smmu_domain->pgtbl_ops = pgtbl_ops; 2209 return 0; 2210 } 2211 2212 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) 2213 { 2214 __le64 *step; 2215 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 2216 2217 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { 2218 struct arm_smmu_strtab_l1_desc *l1_desc; 2219 int idx; 2220 2221 /* Two-level walk */ 2222 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS; 2223 l1_desc = &cfg->l1_desc[idx]; 2224 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS; 2225 step = &l1_desc->l2ptr[idx]; 2226 } else { 2227 /* Simple linear lookup */ 2228 step = &cfg->strtab[sid * STRTAB_STE_DWORDS]; 2229 } 2230 2231 return step; 2232 } 2233 2234 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) 2235 { 2236 int i, j; 2237 struct arm_smmu_device *smmu = master->smmu; 2238 2239 for (i = 0; i < master->num_streams; ++i) { 2240 u32 sid = master->streams[i].id; 2241 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); 2242 2243 /* Bridged PCI devices may end up with duplicated IDs */ 2244 for (j = 0; j < i; j++) 2245 if (master->streams[j].id == sid) 2246 break; 2247 if (j < i) 2248 continue; 2249 2250 arm_smmu_write_strtab_ent(master, sid, step); 2251 } 2252 } 2253 2254 static bool arm_smmu_ats_supported(struct arm_smmu_master *master) 2255 { 2256 struct device *dev = master->dev; 2257 struct arm_smmu_device *smmu = master->smmu; 2258 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2259 2260 if (!(smmu->features & ARM_SMMU_FEAT_ATS)) 2261 return false; 2262 2263 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS)) 2264 return false; 2265 2266 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)); 2267 } 2268 2269 static void arm_smmu_enable_ats(struct arm_smmu_master *master) 2270 { 2271 size_t stu; 2272 struct pci_dev *pdev; 2273 struct arm_smmu_device *smmu = master->smmu; 2274 struct arm_smmu_domain *smmu_domain = master->domain; 2275 2276 /* Don't enable ATS at the endpoint if it's not enabled in the STE */ 2277 if (!master->ats_enabled) 2278 return; 2279 2280 /* Smallest Translation Unit: log2 of the smallest supported granule */ 2281 stu = __ffs(smmu->pgsize_bitmap); 2282 pdev = to_pci_dev(master->dev); 2283 2284 atomic_inc(&smmu_domain->nr_ats_masters); 2285 arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); 2286 if (pci_enable_ats(pdev, stu)) 2287 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); 2288 } 2289 2290 static void arm_smmu_disable_ats(struct arm_smmu_master *master) 2291 { 2292 struct arm_smmu_domain *smmu_domain = master->domain; 2293 2294 if (!master->ats_enabled) 2295 return; 2296 2297 pci_disable_ats(to_pci_dev(master->dev)); 2298 /* 2299 * Ensure ATS is disabled at the endpoint before we issue the 2300 * ATC invalidation via the SMMU. 2301 */ 2302 wmb(); 2303 arm_smmu_atc_inv_master(master); 2304 atomic_dec(&smmu_domain->nr_ats_masters); 2305 } 2306 2307 static int arm_smmu_enable_pasid(struct arm_smmu_master *master) 2308 { 2309 int ret; 2310 int features; 2311 int num_pasids; 2312 struct pci_dev *pdev; 2313 2314 if (!dev_is_pci(master->dev)) 2315 return -ENODEV; 2316 2317 pdev = to_pci_dev(master->dev); 2318 2319 features = pci_pasid_features(pdev); 2320 if (features < 0) 2321 return features; 2322 2323 num_pasids = pci_max_pasids(pdev); 2324 if (num_pasids <= 0) 2325 return num_pasids; 2326 2327 ret = pci_enable_pasid(pdev, features); 2328 if (ret) { 2329 dev_err(&pdev->dev, "Failed to enable PASID\n"); 2330 return ret; 2331 } 2332 2333 master->ssid_bits = min_t(u8, ilog2(num_pasids), 2334 master->smmu->ssid_bits); 2335 return 0; 2336 } 2337 2338 static void arm_smmu_disable_pasid(struct arm_smmu_master *master) 2339 { 2340 struct pci_dev *pdev; 2341 2342 if (!dev_is_pci(master->dev)) 2343 return; 2344 2345 pdev = to_pci_dev(master->dev); 2346 2347 if (!pdev->pasid_enabled) 2348 return; 2349 2350 master->ssid_bits = 0; 2351 pci_disable_pasid(pdev); 2352 } 2353 2354 static void arm_smmu_detach_dev(struct arm_smmu_master *master) 2355 { 2356 unsigned long flags; 2357 struct arm_smmu_domain *smmu_domain = master->domain; 2358 2359 if (!smmu_domain) 2360 return; 2361 2362 arm_smmu_disable_ats(master); 2363 2364 spin_lock_irqsave(&smmu_domain->devices_lock, flags); 2365 list_del(&master->domain_head); 2366 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 2367 2368 master->domain = NULL; 2369 master->ats_enabled = false; 2370 arm_smmu_install_ste_for_dev(master); 2371 /* 2372 * Clearing the CD entry isn't strictly required to detach the domain 2373 * since the table is uninstalled anyway, but it helps avoid confusion 2374 * in the call to arm_smmu_write_ctx_desc on the next attach (which 2375 * expects the entry to be empty). 2376 */ 2377 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab) 2378 arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL); 2379 } 2380 2381 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) 2382 { 2383 int ret = 0; 2384 unsigned long flags; 2385 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2386 struct arm_smmu_device *smmu; 2387 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2388 struct arm_smmu_master *master; 2389 2390 if (!fwspec) 2391 return -ENOENT; 2392 2393 master = dev_iommu_priv_get(dev); 2394 smmu = master->smmu; 2395 2396 /* 2397 * Checking that SVA is disabled ensures that this device isn't bound to 2398 * any mm, and can be safely detached from its old domain. Bonds cannot 2399 * be removed concurrently since we're holding the group mutex. 2400 */ 2401 if (arm_smmu_master_sva_enabled(master)) { 2402 dev_err(dev, "cannot attach - SVA enabled\n"); 2403 return -EBUSY; 2404 } 2405 2406 arm_smmu_detach_dev(master); 2407 2408 mutex_lock(&smmu_domain->init_mutex); 2409 2410 if (!smmu_domain->smmu) { 2411 smmu_domain->smmu = smmu; 2412 ret = arm_smmu_domain_finalise(domain); 2413 if (ret) 2414 smmu_domain->smmu = NULL; 2415 } else if (smmu_domain->smmu != smmu) 2416 ret = -EINVAL; 2417 2418 mutex_unlock(&smmu_domain->init_mutex); 2419 if (ret) 2420 return ret; 2421 2422 master->domain = smmu_domain; 2423 2424 /* 2425 * The SMMU does not support enabling ATS with bypass. When the STE is 2426 * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and 2427 * Translated transactions are denied as though ATS is disabled for the 2428 * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and 2429 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry). 2430 */ 2431 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) 2432 master->ats_enabled = arm_smmu_ats_supported(master); 2433 2434 spin_lock_irqsave(&smmu_domain->devices_lock, flags); 2435 list_add(&master->domain_head, &smmu_domain->devices); 2436 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 2437 2438 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 2439 if (!master->cd_table.cdtab) { 2440 ret = arm_smmu_alloc_cd_tables(master); 2441 if (ret) { 2442 master->domain = NULL; 2443 goto out_list_del; 2444 } 2445 } 2446 2447 /* 2448 * Prevent SVA from concurrently modifying the CD or writing to 2449 * the CD entry 2450 */ 2451 mutex_lock(&arm_smmu_asid_lock); 2452 ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd); 2453 mutex_unlock(&arm_smmu_asid_lock); 2454 if (ret) { 2455 master->domain = NULL; 2456 goto out_list_del; 2457 } 2458 } 2459 2460 arm_smmu_install_ste_for_dev(master); 2461 2462 arm_smmu_enable_ats(master); 2463 return 0; 2464 2465 out_list_del: 2466 spin_lock_irqsave(&smmu_domain->devices_lock, flags); 2467 list_del(&master->domain_head); 2468 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 2469 2470 return ret; 2471 } 2472 2473 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova, 2474 phys_addr_t paddr, size_t pgsize, size_t pgcount, 2475 int prot, gfp_t gfp, size_t *mapped) 2476 { 2477 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 2478 2479 if (!ops) 2480 return -ENODEV; 2481 2482 return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped); 2483 } 2484 2485 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova, 2486 size_t pgsize, size_t pgcount, 2487 struct iommu_iotlb_gather *gather) 2488 { 2489 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2490 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; 2491 2492 if (!ops) 2493 return 0; 2494 2495 return ops->unmap_pages(ops, iova, pgsize, pgcount, gather); 2496 } 2497 2498 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) 2499 { 2500 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2501 2502 if (smmu_domain->smmu) 2503 arm_smmu_tlb_inv_context(smmu_domain); 2504 } 2505 2506 static void arm_smmu_iotlb_sync(struct iommu_domain *domain, 2507 struct iommu_iotlb_gather *gather) 2508 { 2509 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2510 2511 if (!gather->pgsize) 2512 return; 2513 2514 arm_smmu_tlb_inv_range_domain(gather->start, 2515 gather->end - gather->start + 1, 2516 gather->pgsize, true, smmu_domain); 2517 } 2518 2519 static phys_addr_t 2520 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2521 { 2522 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 2523 2524 if (!ops) 2525 return 0; 2526 2527 return ops->iova_to_phys(ops, iova); 2528 } 2529 2530 static struct platform_driver arm_smmu_driver; 2531 2532 static 2533 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) 2534 { 2535 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver, 2536 fwnode); 2537 put_device(dev); 2538 return dev ? dev_get_drvdata(dev) : NULL; 2539 } 2540 2541 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid) 2542 { 2543 unsigned long limit = smmu->strtab_cfg.num_l1_ents; 2544 2545 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) 2546 limit *= 1UL << STRTAB_SPLIT; 2547 2548 return sid < limit; 2549 } 2550 2551 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid) 2552 { 2553 /* Check the SIDs are in range of the SMMU and our stream table */ 2554 if (!arm_smmu_sid_in_range(smmu, sid)) 2555 return -ERANGE; 2556 2557 /* Ensure l2 strtab is initialised */ 2558 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) 2559 return arm_smmu_init_l2_strtab(smmu, sid); 2560 2561 return 0; 2562 } 2563 2564 static int arm_smmu_insert_master(struct arm_smmu_device *smmu, 2565 struct arm_smmu_master *master) 2566 { 2567 int i; 2568 int ret = 0; 2569 struct arm_smmu_stream *new_stream, *cur_stream; 2570 struct rb_node **new_node, *parent_node = NULL; 2571 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); 2572 2573 master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams), 2574 GFP_KERNEL); 2575 if (!master->streams) 2576 return -ENOMEM; 2577 master->num_streams = fwspec->num_ids; 2578 2579 mutex_lock(&smmu->streams_mutex); 2580 for (i = 0; i < fwspec->num_ids; i++) { 2581 u32 sid = fwspec->ids[i]; 2582 2583 new_stream = &master->streams[i]; 2584 new_stream->id = sid; 2585 new_stream->master = master; 2586 2587 ret = arm_smmu_init_sid_strtab(smmu, sid); 2588 if (ret) 2589 break; 2590 2591 /* Insert into SID tree */ 2592 new_node = &(smmu->streams.rb_node); 2593 while (*new_node) { 2594 cur_stream = rb_entry(*new_node, struct arm_smmu_stream, 2595 node); 2596 parent_node = *new_node; 2597 if (cur_stream->id > new_stream->id) { 2598 new_node = &((*new_node)->rb_left); 2599 } else if (cur_stream->id < new_stream->id) { 2600 new_node = &((*new_node)->rb_right); 2601 } else { 2602 dev_warn(master->dev, 2603 "stream %u already in tree\n", 2604 cur_stream->id); 2605 ret = -EINVAL; 2606 break; 2607 } 2608 } 2609 if (ret) 2610 break; 2611 2612 rb_link_node(&new_stream->node, parent_node, new_node); 2613 rb_insert_color(&new_stream->node, &smmu->streams); 2614 } 2615 2616 if (ret) { 2617 for (i--; i >= 0; i--) 2618 rb_erase(&master->streams[i].node, &smmu->streams); 2619 kfree(master->streams); 2620 } 2621 mutex_unlock(&smmu->streams_mutex); 2622 2623 return ret; 2624 } 2625 2626 static void arm_smmu_remove_master(struct arm_smmu_master *master) 2627 { 2628 int i; 2629 struct arm_smmu_device *smmu = master->smmu; 2630 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); 2631 2632 if (!smmu || !master->streams) 2633 return; 2634 2635 mutex_lock(&smmu->streams_mutex); 2636 for (i = 0; i < fwspec->num_ids; i++) 2637 rb_erase(&master->streams[i].node, &smmu->streams); 2638 mutex_unlock(&smmu->streams_mutex); 2639 2640 kfree(master->streams); 2641 } 2642 2643 static struct iommu_ops arm_smmu_ops; 2644 2645 static struct iommu_device *arm_smmu_probe_device(struct device *dev) 2646 { 2647 int ret; 2648 struct arm_smmu_device *smmu; 2649 struct arm_smmu_master *master; 2650 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2651 2652 if (!fwspec || fwspec->ops != &arm_smmu_ops) 2653 return ERR_PTR(-ENODEV); 2654 2655 if (WARN_ON_ONCE(dev_iommu_priv_get(dev))) 2656 return ERR_PTR(-EBUSY); 2657 2658 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); 2659 if (!smmu) 2660 return ERR_PTR(-ENODEV); 2661 2662 master = kzalloc(sizeof(*master), GFP_KERNEL); 2663 if (!master) 2664 return ERR_PTR(-ENOMEM); 2665 2666 master->dev = dev; 2667 master->smmu = smmu; 2668 INIT_LIST_HEAD(&master->bonds); 2669 dev_iommu_priv_set(dev, master); 2670 2671 ret = arm_smmu_insert_master(smmu, master); 2672 if (ret) 2673 goto err_free_master; 2674 2675 device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits); 2676 master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits); 2677 2678 /* 2679 * Note that PASID must be enabled before, and disabled after ATS: 2680 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register 2681 * 2682 * Behavior is undefined if this bit is Set and the value of the PASID 2683 * Enable, Execute Requested Enable, or Privileged Mode Requested bits 2684 * are changed. 2685 */ 2686 arm_smmu_enable_pasid(master); 2687 2688 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB)) 2689 master->ssid_bits = min_t(u8, master->ssid_bits, 2690 CTXDESC_LINEAR_CDMAX); 2691 2692 if ((smmu->features & ARM_SMMU_FEAT_STALLS && 2693 device_property_read_bool(dev, "dma-can-stall")) || 2694 smmu->features & ARM_SMMU_FEAT_STALL_FORCE) 2695 master->stall_enabled = true; 2696 2697 return &smmu->iommu; 2698 2699 err_free_master: 2700 kfree(master); 2701 dev_iommu_priv_set(dev, NULL); 2702 return ERR_PTR(ret); 2703 } 2704 2705 static void arm_smmu_release_device(struct device *dev) 2706 { 2707 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 2708 2709 if (WARN_ON(arm_smmu_master_sva_enabled(master))) 2710 iopf_queue_remove_device(master->smmu->evtq.iopf, dev); 2711 arm_smmu_detach_dev(master); 2712 arm_smmu_disable_pasid(master); 2713 arm_smmu_remove_master(master); 2714 if (master->cd_table.cdtab) 2715 arm_smmu_free_cd_tables(master); 2716 kfree(master); 2717 } 2718 2719 static struct iommu_group *arm_smmu_device_group(struct device *dev) 2720 { 2721 struct iommu_group *group; 2722 2723 /* 2724 * We don't support devices sharing stream IDs other than PCI RID 2725 * aliases, since the necessary ID-to-device lookup becomes rather 2726 * impractical given a potential sparse 32-bit stream ID space. 2727 */ 2728 if (dev_is_pci(dev)) 2729 group = pci_device_group(dev); 2730 else 2731 group = generic_device_group(dev); 2732 2733 return group; 2734 } 2735 2736 static int arm_smmu_enable_nesting(struct iommu_domain *domain) 2737 { 2738 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2739 int ret = 0; 2740 2741 mutex_lock(&smmu_domain->init_mutex); 2742 if (smmu_domain->smmu) 2743 ret = -EPERM; 2744 else 2745 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; 2746 mutex_unlock(&smmu_domain->init_mutex); 2747 2748 return ret; 2749 } 2750 2751 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) 2752 { 2753 return iommu_fwspec_add_ids(dev, args->args, 1); 2754 } 2755 2756 static void arm_smmu_get_resv_regions(struct device *dev, 2757 struct list_head *head) 2758 { 2759 struct iommu_resv_region *region; 2760 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; 2761 2762 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, 2763 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL); 2764 if (!region) 2765 return; 2766 2767 list_add_tail(®ion->list, head); 2768 2769 iommu_dma_get_resv_regions(dev, head); 2770 } 2771 2772 static int arm_smmu_dev_enable_feature(struct device *dev, 2773 enum iommu_dev_features feat) 2774 { 2775 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 2776 2777 if (!master) 2778 return -ENODEV; 2779 2780 switch (feat) { 2781 case IOMMU_DEV_FEAT_IOPF: 2782 if (!arm_smmu_master_iopf_supported(master)) 2783 return -EINVAL; 2784 if (master->iopf_enabled) 2785 return -EBUSY; 2786 master->iopf_enabled = true; 2787 return 0; 2788 case IOMMU_DEV_FEAT_SVA: 2789 if (!arm_smmu_master_sva_supported(master)) 2790 return -EINVAL; 2791 if (arm_smmu_master_sva_enabled(master)) 2792 return -EBUSY; 2793 return arm_smmu_master_enable_sva(master); 2794 default: 2795 return -EINVAL; 2796 } 2797 } 2798 2799 static int arm_smmu_dev_disable_feature(struct device *dev, 2800 enum iommu_dev_features feat) 2801 { 2802 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 2803 2804 if (!master) 2805 return -EINVAL; 2806 2807 switch (feat) { 2808 case IOMMU_DEV_FEAT_IOPF: 2809 if (!master->iopf_enabled) 2810 return -EINVAL; 2811 if (master->sva_enabled) 2812 return -EBUSY; 2813 master->iopf_enabled = false; 2814 return 0; 2815 case IOMMU_DEV_FEAT_SVA: 2816 if (!arm_smmu_master_sva_enabled(master)) 2817 return -EINVAL; 2818 return arm_smmu_master_disable_sva(master); 2819 default: 2820 return -EINVAL; 2821 } 2822 } 2823 2824 /* 2825 * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the 2826 * PCIe link and save the data to memory by DMA. The hardware is restricted to 2827 * use identity mapping only. 2828 */ 2829 #define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \ 2830 (pdev)->device == 0xa12e) 2831 2832 static int arm_smmu_def_domain_type(struct device *dev) 2833 { 2834 if (dev_is_pci(dev)) { 2835 struct pci_dev *pdev = to_pci_dev(dev); 2836 2837 if (IS_HISI_PTT_DEVICE(pdev)) 2838 return IOMMU_DOMAIN_IDENTITY; 2839 } 2840 2841 return 0; 2842 } 2843 2844 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid) 2845 { 2846 struct iommu_domain *domain; 2847 2848 domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA); 2849 if (WARN_ON(IS_ERR(domain)) || !domain) 2850 return; 2851 2852 arm_smmu_sva_remove_dev_pasid(domain, dev, pasid); 2853 } 2854 2855 static struct iommu_ops arm_smmu_ops = { 2856 .capable = arm_smmu_capable, 2857 .domain_alloc = arm_smmu_domain_alloc, 2858 .probe_device = arm_smmu_probe_device, 2859 .release_device = arm_smmu_release_device, 2860 .device_group = arm_smmu_device_group, 2861 .of_xlate = arm_smmu_of_xlate, 2862 .get_resv_regions = arm_smmu_get_resv_regions, 2863 .remove_dev_pasid = arm_smmu_remove_dev_pasid, 2864 .dev_enable_feat = arm_smmu_dev_enable_feature, 2865 .dev_disable_feat = arm_smmu_dev_disable_feature, 2866 .page_response = arm_smmu_page_response, 2867 .def_domain_type = arm_smmu_def_domain_type, 2868 .pgsize_bitmap = -1UL, /* Restricted during device attach */ 2869 .owner = THIS_MODULE, 2870 .default_domain_ops = &(const struct iommu_domain_ops) { 2871 .attach_dev = arm_smmu_attach_dev, 2872 .map_pages = arm_smmu_map_pages, 2873 .unmap_pages = arm_smmu_unmap_pages, 2874 .flush_iotlb_all = arm_smmu_flush_iotlb_all, 2875 .iotlb_sync = arm_smmu_iotlb_sync, 2876 .iova_to_phys = arm_smmu_iova_to_phys, 2877 .enable_nesting = arm_smmu_enable_nesting, 2878 .free = arm_smmu_domain_free, 2879 } 2880 }; 2881 2882 /* Probing and initialisation functions */ 2883 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, 2884 struct arm_smmu_queue *q, 2885 void __iomem *page, 2886 unsigned long prod_off, 2887 unsigned long cons_off, 2888 size_t dwords, const char *name) 2889 { 2890 size_t qsz; 2891 2892 do { 2893 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3; 2894 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, 2895 GFP_KERNEL); 2896 if (q->base || qsz < PAGE_SIZE) 2897 break; 2898 2899 q->llq.max_n_shift--; 2900 } while (1); 2901 2902 if (!q->base) { 2903 dev_err(smmu->dev, 2904 "failed to allocate queue (0x%zx bytes) for %s\n", 2905 qsz, name); 2906 return -ENOMEM; 2907 } 2908 2909 if (!WARN_ON(q->base_dma & (qsz - 1))) { 2910 dev_info(smmu->dev, "allocated %u entries for %s\n", 2911 1 << q->llq.max_n_shift, name); 2912 } 2913 2914 q->prod_reg = page + prod_off; 2915 q->cons_reg = page + cons_off; 2916 q->ent_dwords = dwords; 2917 2918 q->q_base = Q_BASE_RWA; 2919 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK; 2920 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift); 2921 2922 q->llq.prod = q->llq.cons = 0; 2923 return 0; 2924 } 2925 2926 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) 2927 { 2928 struct arm_smmu_cmdq *cmdq = &smmu->cmdq; 2929 unsigned int nents = 1 << cmdq->q.llq.max_n_shift; 2930 2931 atomic_set(&cmdq->owner_prod, 0); 2932 atomic_set(&cmdq->lock, 0); 2933 2934 cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents, 2935 GFP_KERNEL); 2936 if (!cmdq->valid_map) 2937 return -ENOMEM; 2938 2939 return 0; 2940 } 2941 2942 static int arm_smmu_init_queues(struct arm_smmu_device *smmu) 2943 { 2944 int ret; 2945 2946 /* cmdq */ 2947 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base, 2948 ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS, 2949 CMDQ_ENT_DWORDS, "cmdq"); 2950 if (ret) 2951 return ret; 2952 2953 ret = arm_smmu_cmdq_init(smmu); 2954 if (ret) 2955 return ret; 2956 2957 /* evtq */ 2958 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1, 2959 ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS, 2960 EVTQ_ENT_DWORDS, "evtq"); 2961 if (ret) 2962 return ret; 2963 2964 if ((smmu->features & ARM_SMMU_FEAT_SVA) && 2965 (smmu->features & ARM_SMMU_FEAT_STALLS)) { 2966 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev)); 2967 if (!smmu->evtq.iopf) 2968 return -ENOMEM; 2969 } 2970 2971 /* priq */ 2972 if (!(smmu->features & ARM_SMMU_FEAT_PRI)) 2973 return 0; 2974 2975 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1, 2976 ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS, 2977 PRIQ_ENT_DWORDS, "priq"); 2978 } 2979 2980 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) 2981 { 2982 unsigned int i; 2983 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 2984 void *strtab = smmu->strtab_cfg.strtab; 2985 2986 cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents, 2987 sizeof(*cfg->l1_desc), GFP_KERNEL); 2988 if (!cfg->l1_desc) 2989 return -ENOMEM; 2990 2991 for (i = 0; i < cfg->num_l1_ents; ++i) { 2992 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]); 2993 strtab += STRTAB_L1_DESC_DWORDS << 3; 2994 } 2995 2996 return 0; 2997 } 2998 2999 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) 3000 { 3001 void *strtab; 3002 u64 reg; 3003 u32 size, l1size; 3004 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 3005 3006 /* Calculate the L1 size, capped to the SIDSIZE. */ 3007 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); 3008 size = min(size, smmu->sid_bits - STRTAB_SPLIT); 3009 cfg->num_l1_ents = 1 << size; 3010 3011 size += STRTAB_SPLIT; 3012 if (size < smmu->sid_bits) 3013 dev_warn(smmu->dev, 3014 "2-level strtab only covers %u/%u bits of SID\n", 3015 size, smmu->sid_bits); 3016 3017 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); 3018 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, 3019 GFP_KERNEL); 3020 if (!strtab) { 3021 dev_err(smmu->dev, 3022 "failed to allocate l1 stream table (%u bytes)\n", 3023 l1size); 3024 return -ENOMEM; 3025 } 3026 cfg->strtab = strtab; 3027 3028 /* Configure strtab_base_cfg for 2 levels */ 3029 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL); 3030 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size); 3031 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); 3032 cfg->strtab_base_cfg = reg; 3033 3034 return arm_smmu_init_l1_strtab(smmu); 3035 } 3036 3037 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) 3038 { 3039 void *strtab; 3040 u64 reg; 3041 u32 size; 3042 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 3043 3044 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3); 3045 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma, 3046 GFP_KERNEL); 3047 if (!strtab) { 3048 dev_err(smmu->dev, 3049 "failed to allocate linear stream table (%u bytes)\n", 3050 size); 3051 return -ENOMEM; 3052 } 3053 cfg->strtab = strtab; 3054 cfg->num_l1_ents = 1 << smmu->sid_bits; 3055 3056 /* Configure strtab_base_cfg for a linear table covering all SIDs */ 3057 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR); 3058 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); 3059 cfg->strtab_base_cfg = reg; 3060 3061 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false); 3062 return 0; 3063 } 3064 3065 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) 3066 { 3067 u64 reg; 3068 int ret; 3069 3070 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) 3071 ret = arm_smmu_init_strtab_2lvl(smmu); 3072 else 3073 ret = arm_smmu_init_strtab_linear(smmu); 3074 3075 if (ret) 3076 return ret; 3077 3078 /* Set the strtab base address */ 3079 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK; 3080 reg |= STRTAB_BASE_RA; 3081 smmu->strtab_cfg.strtab_base = reg; 3082 3083 ida_init(&smmu->vmid_map); 3084 3085 return 0; 3086 } 3087 3088 static int arm_smmu_init_structures(struct arm_smmu_device *smmu) 3089 { 3090 int ret; 3091 3092 mutex_init(&smmu->streams_mutex); 3093 smmu->streams = RB_ROOT; 3094 3095 ret = arm_smmu_init_queues(smmu); 3096 if (ret) 3097 return ret; 3098 3099 return arm_smmu_init_strtab(smmu); 3100 } 3101 3102 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, 3103 unsigned int reg_off, unsigned int ack_off) 3104 { 3105 u32 reg; 3106 3107 writel_relaxed(val, smmu->base + reg_off); 3108 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val, 3109 1, ARM_SMMU_POLL_TIMEOUT_US); 3110 } 3111 3112 /* GBPA is "special" */ 3113 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr) 3114 { 3115 int ret; 3116 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA; 3117 3118 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE), 3119 1, ARM_SMMU_POLL_TIMEOUT_US); 3120 if (ret) 3121 return ret; 3122 3123 reg &= ~clr; 3124 reg |= set; 3125 writel_relaxed(reg | GBPA_UPDATE, gbpa); 3126 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE), 3127 1, ARM_SMMU_POLL_TIMEOUT_US); 3128 3129 if (ret) 3130 dev_err(smmu->dev, "GBPA not responding to update\n"); 3131 return ret; 3132 } 3133 3134 static void arm_smmu_free_msis(void *data) 3135 { 3136 struct device *dev = data; 3137 platform_msi_domain_free_irqs(dev); 3138 } 3139 3140 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) 3141 { 3142 phys_addr_t doorbell; 3143 struct device *dev = msi_desc_to_dev(desc); 3144 struct arm_smmu_device *smmu = dev_get_drvdata(dev); 3145 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index]; 3146 3147 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; 3148 doorbell &= MSI_CFG0_ADDR_MASK; 3149 3150 writeq_relaxed(doorbell, smmu->base + cfg[0]); 3151 writel_relaxed(msg->data, smmu->base + cfg[1]); 3152 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]); 3153 } 3154 3155 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) 3156 { 3157 int ret, nvec = ARM_SMMU_MAX_MSIS; 3158 struct device *dev = smmu->dev; 3159 3160 /* Clear the MSI address regs */ 3161 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); 3162 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); 3163 3164 if (smmu->features & ARM_SMMU_FEAT_PRI) 3165 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); 3166 else 3167 nvec--; 3168 3169 if (!(smmu->features & ARM_SMMU_FEAT_MSI)) 3170 return; 3171 3172 if (!dev->msi.domain) { 3173 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n"); 3174 return; 3175 } 3176 3177 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */ 3178 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); 3179 if (ret) { 3180 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n"); 3181 return; 3182 } 3183 3184 smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX); 3185 smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX); 3186 smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX); 3187 3188 /* Add callback to free MSIs on teardown */ 3189 devm_add_action(dev, arm_smmu_free_msis, dev); 3190 } 3191 3192 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) 3193 { 3194 int irq, ret; 3195 3196 arm_smmu_setup_msis(smmu); 3197 3198 /* Request interrupt lines */ 3199 irq = smmu->evtq.q.irq; 3200 if (irq) { 3201 ret = devm_request_threaded_irq(smmu->dev, irq, NULL, 3202 arm_smmu_evtq_thread, 3203 IRQF_ONESHOT, 3204 "arm-smmu-v3-evtq", smmu); 3205 if (ret < 0) 3206 dev_warn(smmu->dev, "failed to enable evtq irq\n"); 3207 } else { 3208 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n"); 3209 } 3210 3211 irq = smmu->gerr_irq; 3212 if (irq) { 3213 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler, 3214 0, "arm-smmu-v3-gerror", smmu); 3215 if (ret < 0) 3216 dev_warn(smmu->dev, "failed to enable gerror irq\n"); 3217 } else { 3218 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n"); 3219 } 3220 3221 if (smmu->features & ARM_SMMU_FEAT_PRI) { 3222 irq = smmu->priq.q.irq; 3223 if (irq) { 3224 ret = devm_request_threaded_irq(smmu->dev, irq, NULL, 3225 arm_smmu_priq_thread, 3226 IRQF_ONESHOT, 3227 "arm-smmu-v3-priq", 3228 smmu); 3229 if (ret < 0) 3230 dev_warn(smmu->dev, 3231 "failed to enable priq irq\n"); 3232 } else { 3233 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n"); 3234 } 3235 } 3236 } 3237 3238 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) 3239 { 3240 int ret, irq; 3241 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; 3242 3243 /* Disable IRQs first */ 3244 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, 3245 ARM_SMMU_IRQ_CTRLACK); 3246 if (ret) { 3247 dev_err(smmu->dev, "failed to disable irqs\n"); 3248 return ret; 3249 } 3250 3251 irq = smmu->combined_irq; 3252 if (irq) { 3253 /* 3254 * Cavium ThunderX2 implementation doesn't support unique irq 3255 * lines. Use a single irq line for all the SMMUv3 interrupts. 3256 */ 3257 ret = devm_request_threaded_irq(smmu->dev, irq, 3258 arm_smmu_combined_irq_handler, 3259 arm_smmu_combined_irq_thread, 3260 IRQF_ONESHOT, 3261 "arm-smmu-v3-combined-irq", smmu); 3262 if (ret < 0) 3263 dev_warn(smmu->dev, "failed to enable combined irq\n"); 3264 } else 3265 arm_smmu_setup_unique_irqs(smmu); 3266 3267 if (smmu->features & ARM_SMMU_FEAT_PRI) 3268 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; 3269 3270 /* Enable interrupt generation on the SMMU */ 3271 ret = arm_smmu_write_reg_sync(smmu, irqen_flags, 3272 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); 3273 if (ret) 3274 dev_warn(smmu->dev, "failed to enable irqs\n"); 3275 3276 return 0; 3277 } 3278 3279 static int arm_smmu_device_disable(struct arm_smmu_device *smmu) 3280 { 3281 int ret; 3282 3283 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK); 3284 if (ret) 3285 dev_err(smmu->dev, "failed to clear cr0\n"); 3286 3287 return ret; 3288 } 3289 3290 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) 3291 { 3292 int ret; 3293 u32 reg, enables; 3294 struct arm_smmu_cmdq_ent cmd; 3295 3296 /* Clear CR0 and sync (disables SMMU and queue processing) */ 3297 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0); 3298 if (reg & CR0_SMMUEN) { 3299 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n"); 3300 WARN_ON(is_kdump_kernel() && !disable_bypass); 3301 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0); 3302 } 3303 3304 ret = arm_smmu_device_disable(smmu); 3305 if (ret) 3306 return ret; 3307 3308 /* CR1 (table and queue memory attributes) */ 3309 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) | 3310 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) | 3311 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) | 3312 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) | 3313 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) | 3314 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB); 3315 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1); 3316 3317 /* CR2 (random crap) */ 3318 reg = CR2_PTM | CR2_RECINVSID; 3319 3320 if (smmu->features & ARM_SMMU_FEAT_E2H) 3321 reg |= CR2_E2H; 3322 3323 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2); 3324 3325 /* Stream table */ 3326 writeq_relaxed(smmu->strtab_cfg.strtab_base, 3327 smmu->base + ARM_SMMU_STRTAB_BASE); 3328 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg, 3329 smmu->base + ARM_SMMU_STRTAB_BASE_CFG); 3330 3331 /* Command queue */ 3332 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE); 3333 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD); 3334 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS); 3335 3336 enables = CR0_CMDQEN; 3337 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 3338 ARM_SMMU_CR0ACK); 3339 if (ret) { 3340 dev_err(smmu->dev, "failed to enable command queue\n"); 3341 return ret; 3342 } 3343 3344 /* Invalidate any cached configuration */ 3345 cmd.opcode = CMDQ_OP_CFGI_ALL; 3346 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 3347 3348 /* Invalidate any stale TLB entries */ 3349 if (smmu->features & ARM_SMMU_FEAT_HYP) { 3350 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL; 3351 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 3352 } 3353 3354 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; 3355 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 3356 3357 /* Event queue */ 3358 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); 3359 writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD); 3360 writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS); 3361 3362 enables |= CR0_EVTQEN; 3363 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 3364 ARM_SMMU_CR0ACK); 3365 if (ret) { 3366 dev_err(smmu->dev, "failed to enable event queue\n"); 3367 return ret; 3368 } 3369 3370 /* PRI queue */ 3371 if (smmu->features & ARM_SMMU_FEAT_PRI) { 3372 writeq_relaxed(smmu->priq.q.q_base, 3373 smmu->base + ARM_SMMU_PRIQ_BASE); 3374 writel_relaxed(smmu->priq.q.llq.prod, 3375 smmu->page1 + ARM_SMMU_PRIQ_PROD); 3376 writel_relaxed(smmu->priq.q.llq.cons, 3377 smmu->page1 + ARM_SMMU_PRIQ_CONS); 3378 3379 enables |= CR0_PRIQEN; 3380 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 3381 ARM_SMMU_CR0ACK); 3382 if (ret) { 3383 dev_err(smmu->dev, "failed to enable PRI queue\n"); 3384 return ret; 3385 } 3386 } 3387 3388 if (smmu->features & ARM_SMMU_FEAT_ATS) { 3389 enables |= CR0_ATSCHK; 3390 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 3391 ARM_SMMU_CR0ACK); 3392 if (ret) { 3393 dev_err(smmu->dev, "failed to enable ATS check\n"); 3394 return ret; 3395 } 3396 } 3397 3398 ret = arm_smmu_setup_irqs(smmu); 3399 if (ret) { 3400 dev_err(smmu->dev, "failed to setup irqs\n"); 3401 return ret; 3402 } 3403 3404 if (is_kdump_kernel()) 3405 enables &= ~(CR0_EVTQEN | CR0_PRIQEN); 3406 3407 /* Enable the SMMU interface, or ensure bypass */ 3408 if (!bypass || disable_bypass) { 3409 enables |= CR0_SMMUEN; 3410 } else { 3411 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT); 3412 if (ret) 3413 return ret; 3414 } 3415 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 3416 ARM_SMMU_CR0ACK); 3417 if (ret) { 3418 dev_err(smmu->dev, "failed to enable SMMU interface\n"); 3419 return ret; 3420 } 3421 3422 return 0; 3423 } 3424 3425 #define IIDR_IMPLEMENTER_ARM 0x43b 3426 #define IIDR_PRODUCTID_ARM_MMU_600 0x483 3427 #define IIDR_PRODUCTID_ARM_MMU_700 0x487 3428 3429 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu) 3430 { 3431 u32 reg; 3432 unsigned int implementer, productid, variant, revision; 3433 3434 reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR); 3435 implementer = FIELD_GET(IIDR_IMPLEMENTER, reg); 3436 productid = FIELD_GET(IIDR_PRODUCTID, reg); 3437 variant = FIELD_GET(IIDR_VARIANT, reg); 3438 revision = FIELD_GET(IIDR_REVISION, reg); 3439 3440 switch (implementer) { 3441 case IIDR_IMPLEMENTER_ARM: 3442 switch (productid) { 3443 case IIDR_PRODUCTID_ARM_MMU_600: 3444 /* Arm erratum 1076982 */ 3445 if (variant == 0 && revision <= 2) 3446 smmu->features &= ~ARM_SMMU_FEAT_SEV; 3447 /* Arm erratum 1209401 */ 3448 if (variant < 2) 3449 smmu->features &= ~ARM_SMMU_FEAT_NESTING; 3450 break; 3451 case IIDR_PRODUCTID_ARM_MMU_700: 3452 /* Arm erratum 2812531 */ 3453 smmu->features &= ~ARM_SMMU_FEAT_BTM; 3454 smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC; 3455 /* Arm errata 2268618, 2812531 */ 3456 smmu->features &= ~ARM_SMMU_FEAT_NESTING; 3457 break; 3458 } 3459 break; 3460 } 3461 } 3462 3463 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) 3464 { 3465 u32 reg; 3466 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY; 3467 3468 /* IDR0 */ 3469 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0); 3470 3471 /* 2-level structures */ 3472 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL) 3473 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB; 3474 3475 if (reg & IDR0_CD2L) 3476 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB; 3477 3478 /* 3479 * Translation table endianness. 3480 * We currently require the same endianness as the CPU, but this 3481 * could be changed later by adding a new IO_PGTABLE_QUIRK. 3482 */ 3483 switch (FIELD_GET(IDR0_TTENDIAN, reg)) { 3484 case IDR0_TTENDIAN_MIXED: 3485 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE; 3486 break; 3487 #ifdef __BIG_ENDIAN 3488 case IDR0_TTENDIAN_BE: 3489 smmu->features |= ARM_SMMU_FEAT_TT_BE; 3490 break; 3491 #else 3492 case IDR0_TTENDIAN_LE: 3493 smmu->features |= ARM_SMMU_FEAT_TT_LE; 3494 break; 3495 #endif 3496 default: 3497 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n"); 3498 return -ENXIO; 3499 } 3500 3501 /* Boolean feature flags */ 3502 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI) 3503 smmu->features |= ARM_SMMU_FEAT_PRI; 3504 3505 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS) 3506 smmu->features |= ARM_SMMU_FEAT_ATS; 3507 3508 if (reg & IDR0_SEV) 3509 smmu->features |= ARM_SMMU_FEAT_SEV; 3510 3511 if (reg & IDR0_MSI) { 3512 smmu->features |= ARM_SMMU_FEAT_MSI; 3513 if (coherent && !disable_msipolling) 3514 smmu->options |= ARM_SMMU_OPT_MSIPOLL; 3515 } 3516 3517 if (reg & IDR0_HYP) { 3518 smmu->features |= ARM_SMMU_FEAT_HYP; 3519 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) 3520 smmu->features |= ARM_SMMU_FEAT_E2H; 3521 } 3522 3523 /* 3524 * The coherency feature as set by FW is used in preference to the ID 3525 * register, but warn on mismatch. 3526 */ 3527 if (!!(reg & IDR0_COHACC) != coherent) 3528 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n", 3529 coherent ? "true" : "false"); 3530 3531 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) { 3532 case IDR0_STALL_MODEL_FORCE: 3533 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE; 3534 fallthrough; 3535 case IDR0_STALL_MODEL_STALL: 3536 smmu->features |= ARM_SMMU_FEAT_STALLS; 3537 } 3538 3539 if (reg & IDR0_S1P) 3540 smmu->features |= ARM_SMMU_FEAT_TRANS_S1; 3541 3542 if (reg & IDR0_S2P) 3543 smmu->features |= ARM_SMMU_FEAT_TRANS_S2; 3544 3545 if (!(reg & (IDR0_S1P | IDR0_S2P))) { 3546 dev_err(smmu->dev, "no translation support!\n"); 3547 return -ENXIO; 3548 } 3549 3550 /* We only support the AArch64 table format at present */ 3551 switch (FIELD_GET(IDR0_TTF, reg)) { 3552 case IDR0_TTF_AARCH32_64: 3553 smmu->ias = 40; 3554 fallthrough; 3555 case IDR0_TTF_AARCH64: 3556 break; 3557 default: 3558 dev_err(smmu->dev, "AArch64 table format not supported!\n"); 3559 return -ENXIO; 3560 } 3561 3562 /* ASID/VMID sizes */ 3563 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8; 3564 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8; 3565 3566 /* IDR1 */ 3567 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1); 3568 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) { 3569 dev_err(smmu->dev, "embedded implementation not supported\n"); 3570 return -ENXIO; 3571 } 3572 3573 /* Queue sizes, capped to ensure natural alignment */ 3574 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, 3575 FIELD_GET(IDR1_CMDQS, reg)); 3576 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) { 3577 /* 3578 * We don't support splitting up batches, so one batch of 3579 * commands plus an extra sync needs to fit inside the command 3580 * queue. There's also no way we can handle the weird alignment 3581 * restrictions on the base pointer for a unit-length queue. 3582 */ 3583 dev_err(smmu->dev, "command queue size <= %d entries not supported\n", 3584 CMDQ_BATCH_ENTRIES); 3585 return -ENXIO; 3586 } 3587 3588 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT, 3589 FIELD_GET(IDR1_EVTQS, reg)); 3590 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT, 3591 FIELD_GET(IDR1_PRIQS, reg)); 3592 3593 /* SID/SSID sizes */ 3594 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg); 3595 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg); 3596 smmu->iommu.max_pasids = 1UL << smmu->ssid_bits; 3597 3598 /* 3599 * If the SMMU supports fewer bits than would fill a single L2 stream 3600 * table, use a linear table instead. 3601 */ 3602 if (smmu->sid_bits <= STRTAB_SPLIT) 3603 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB; 3604 3605 /* IDR3 */ 3606 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); 3607 if (FIELD_GET(IDR3_RIL, reg)) 3608 smmu->features |= ARM_SMMU_FEAT_RANGE_INV; 3609 3610 /* IDR5 */ 3611 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); 3612 3613 /* Maximum number of outstanding stalls */ 3614 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg); 3615 3616 /* Page sizes */ 3617 if (reg & IDR5_GRAN64K) 3618 smmu->pgsize_bitmap |= SZ_64K | SZ_512M; 3619 if (reg & IDR5_GRAN16K) 3620 smmu->pgsize_bitmap |= SZ_16K | SZ_32M; 3621 if (reg & IDR5_GRAN4K) 3622 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G; 3623 3624 /* Input address size */ 3625 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT) 3626 smmu->features |= ARM_SMMU_FEAT_VAX; 3627 3628 /* Output address size */ 3629 switch (FIELD_GET(IDR5_OAS, reg)) { 3630 case IDR5_OAS_32_BIT: 3631 smmu->oas = 32; 3632 break; 3633 case IDR5_OAS_36_BIT: 3634 smmu->oas = 36; 3635 break; 3636 case IDR5_OAS_40_BIT: 3637 smmu->oas = 40; 3638 break; 3639 case IDR5_OAS_42_BIT: 3640 smmu->oas = 42; 3641 break; 3642 case IDR5_OAS_44_BIT: 3643 smmu->oas = 44; 3644 break; 3645 case IDR5_OAS_52_BIT: 3646 smmu->oas = 52; 3647 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */ 3648 break; 3649 default: 3650 dev_info(smmu->dev, 3651 "unknown output address size. Truncating to 48-bit\n"); 3652 fallthrough; 3653 case IDR5_OAS_48_BIT: 3654 smmu->oas = 48; 3655 } 3656 3657 if (arm_smmu_ops.pgsize_bitmap == -1UL) 3658 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap; 3659 else 3660 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap; 3661 3662 /* Set the DMA mask for our table walker */ 3663 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas))) 3664 dev_warn(smmu->dev, 3665 "failed to set DMA mask for table walker\n"); 3666 3667 smmu->ias = max(smmu->ias, smmu->oas); 3668 3669 if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) && 3670 (smmu->features & ARM_SMMU_FEAT_TRANS_S2)) 3671 smmu->features |= ARM_SMMU_FEAT_NESTING; 3672 3673 arm_smmu_device_iidr_probe(smmu); 3674 3675 if (arm_smmu_sva_supported(smmu)) 3676 smmu->features |= ARM_SMMU_FEAT_SVA; 3677 3678 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n", 3679 smmu->ias, smmu->oas, smmu->features); 3680 return 0; 3681 } 3682 3683 #ifdef CONFIG_ACPI 3684 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) 3685 { 3686 switch (model) { 3687 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: 3688 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; 3689 break; 3690 case ACPI_IORT_SMMU_V3_HISILICON_HI161X: 3691 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; 3692 break; 3693 } 3694 3695 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); 3696 } 3697 3698 static int arm_smmu_device_acpi_probe(struct platform_device *pdev, 3699 struct arm_smmu_device *smmu) 3700 { 3701 struct acpi_iort_smmu_v3 *iort_smmu; 3702 struct device *dev = smmu->dev; 3703 struct acpi_iort_node *node; 3704 3705 node = *(struct acpi_iort_node **)dev_get_platdata(dev); 3706 3707 /* Retrieve SMMUv3 specific data */ 3708 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; 3709 3710 acpi_smmu_get_options(iort_smmu->model, smmu); 3711 3712 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) 3713 smmu->features |= ARM_SMMU_FEAT_COHERENCY; 3714 3715 return 0; 3716 } 3717 #else 3718 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev, 3719 struct arm_smmu_device *smmu) 3720 { 3721 return -ENODEV; 3722 } 3723 #endif 3724 3725 static int arm_smmu_device_dt_probe(struct platform_device *pdev, 3726 struct arm_smmu_device *smmu) 3727 { 3728 struct device *dev = &pdev->dev; 3729 u32 cells; 3730 int ret = -EINVAL; 3731 3732 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells)) 3733 dev_err(dev, "missing #iommu-cells property\n"); 3734 else if (cells != 1) 3735 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells); 3736 else 3737 ret = 0; 3738 3739 parse_driver_options(smmu); 3740 3741 if (of_dma_is_coherent(dev->of_node)) 3742 smmu->features |= ARM_SMMU_FEAT_COHERENCY; 3743 3744 return ret; 3745 } 3746 3747 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu) 3748 { 3749 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY) 3750 return SZ_64K; 3751 else 3752 return SZ_128K; 3753 } 3754 3755 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, 3756 resource_size_t size) 3757 { 3758 struct resource res = DEFINE_RES_MEM(start, size); 3759 3760 return devm_ioremap_resource(dev, &res); 3761 } 3762 3763 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) 3764 { 3765 struct list_head rmr_list; 3766 struct iommu_resv_region *e; 3767 3768 INIT_LIST_HEAD(&rmr_list); 3769 iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); 3770 3771 list_for_each_entry(e, &rmr_list, list) { 3772 __le64 *step; 3773 struct iommu_iort_rmr_data *rmr; 3774 int ret, i; 3775 3776 rmr = container_of(e, struct iommu_iort_rmr_data, rr); 3777 for (i = 0; i < rmr->num_sids; i++) { 3778 ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]); 3779 if (ret) { 3780 dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n", 3781 rmr->sids[i]); 3782 continue; 3783 } 3784 3785 step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]); 3786 arm_smmu_init_bypass_stes(step, 1, true); 3787 } 3788 } 3789 3790 iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); 3791 } 3792 3793 static int arm_smmu_device_probe(struct platform_device *pdev) 3794 { 3795 int irq, ret; 3796 struct resource *res; 3797 resource_size_t ioaddr; 3798 struct arm_smmu_device *smmu; 3799 struct device *dev = &pdev->dev; 3800 bool bypass; 3801 3802 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); 3803 if (!smmu) 3804 return -ENOMEM; 3805 smmu->dev = dev; 3806 3807 if (dev->of_node) { 3808 ret = arm_smmu_device_dt_probe(pdev, smmu); 3809 } else { 3810 ret = arm_smmu_device_acpi_probe(pdev, smmu); 3811 if (ret == -ENODEV) 3812 return ret; 3813 } 3814 3815 /* Set bypass mode according to firmware probing result */ 3816 bypass = !!ret; 3817 3818 /* Base address */ 3819 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 3820 if (!res) 3821 return -EINVAL; 3822 if (resource_size(res) < arm_smmu_resource_size(smmu)) { 3823 dev_err(dev, "MMIO region too small (%pr)\n", res); 3824 return -EINVAL; 3825 } 3826 ioaddr = res->start; 3827 3828 /* 3829 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain 3830 * the PMCG registers which are reserved by the PMU driver. 3831 */ 3832 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ); 3833 if (IS_ERR(smmu->base)) 3834 return PTR_ERR(smmu->base); 3835 3836 if (arm_smmu_resource_size(smmu) > SZ_64K) { 3837 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K, 3838 ARM_SMMU_REG_SZ); 3839 if (IS_ERR(smmu->page1)) 3840 return PTR_ERR(smmu->page1); 3841 } else { 3842 smmu->page1 = smmu->base; 3843 } 3844 3845 /* Interrupt lines */ 3846 3847 irq = platform_get_irq_byname_optional(pdev, "combined"); 3848 if (irq > 0) 3849 smmu->combined_irq = irq; 3850 else { 3851 irq = platform_get_irq_byname_optional(pdev, "eventq"); 3852 if (irq > 0) 3853 smmu->evtq.q.irq = irq; 3854 3855 irq = platform_get_irq_byname_optional(pdev, "priq"); 3856 if (irq > 0) 3857 smmu->priq.q.irq = irq; 3858 3859 irq = platform_get_irq_byname_optional(pdev, "gerror"); 3860 if (irq > 0) 3861 smmu->gerr_irq = irq; 3862 } 3863 /* Probe the h/w */ 3864 ret = arm_smmu_device_hw_probe(smmu); 3865 if (ret) 3866 return ret; 3867 3868 /* Initialise in-memory data structures */ 3869 ret = arm_smmu_init_structures(smmu); 3870 if (ret) 3871 return ret; 3872 3873 /* Record our private device structure */ 3874 platform_set_drvdata(pdev, smmu); 3875 3876 /* Check for RMRs and install bypass STEs if any */ 3877 arm_smmu_rmr_install_bypass_ste(smmu); 3878 3879 /* Reset the device */ 3880 ret = arm_smmu_device_reset(smmu, bypass); 3881 if (ret) 3882 return ret; 3883 3884 /* And we're up. Go go go! */ 3885 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, 3886 "smmu3.%pa", &ioaddr); 3887 if (ret) 3888 return ret; 3889 3890 ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); 3891 if (ret) { 3892 dev_err(dev, "Failed to register iommu\n"); 3893 iommu_device_sysfs_remove(&smmu->iommu); 3894 return ret; 3895 } 3896 3897 return 0; 3898 } 3899 3900 static void arm_smmu_device_remove(struct platform_device *pdev) 3901 { 3902 struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 3903 3904 iommu_device_unregister(&smmu->iommu); 3905 iommu_device_sysfs_remove(&smmu->iommu); 3906 arm_smmu_device_disable(smmu); 3907 iopf_queue_free(smmu->evtq.iopf); 3908 ida_destroy(&smmu->vmid_map); 3909 } 3910 3911 static void arm_smmu_device_shutdown(struct platform_device *pdev) 3912 { 3913 struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 3914 3915 arm_smmu_device_disable(smmu); 3916 } 3917 3918 static const struct of_device_id arm_smmu_of_match[] = { 3919 { .compatible = "arm,smmu-v3", }, 3920 { }, 3921 }; 3922 MODULE_DEVICE_TABLE(of, arm_smmu_of_match); 3923 3924 static void arm_smmu_driver_unregister(struct platform_driver *drv) 3925 { 3926 arm_smmu_sva_notifier_synchronize(); 3927 platform_driver_unregister(drv); 3928 } 3929 3930 static struct platform_driver arm_smmu_driver = { 3931 .driver = { 3932 .name = "arm-smmu-v3", 3933 .of_match_table = arm_smmu_of_match, 3934 .suppress_bind_attrs = true, 3935 }, 3936 .probe = arm_smmu_device_probe, 3937 .remove_new = arm_smmu_device_remove, 3938 .shutdown = arm_smmu_device_shutdown, 3939 }; 3940 module_driver(arm_smmu_driver, platform_driver_register, 3941 arm_smmu_driver_unregister); 3942 3943 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations"); 3944 MODULE_AUTHOR("Will Deacon <will@kernel.org>"); 3945 MODULE_ALIAS("platform:arm-smmu-v3"); 3946 MODULE_LICENSE("GPL v2"); 3947