1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * IOMMU API for ARM architected SMMUv3 implementations. 4 * 5 * Copyright (C) 2015 ARM Limited 6 * 7 * Author: Will Deacon <will.deacon@arm.com> 8 * 9 * This driver is powered by bad coffee and bombay mix. 10 */ 11 12 #include <linux/acpi.h> 13 #include <linux/acpi_iort.h> 14 #include <linux/bitops.h> 15 #include <linux/crash_dump.h> 16 #include <linux/delay.h> 17 #include <linux/err.h> 18 #include <linux/interrupt.h> 19 #include <linux/io-pgtable.h> 20 #include <linux/iopoll.h> 21 #include <linux/module.h> 22 #include <linux/msi.h> 23 #include <linux/of.h> 24 #include <linux/of_address.h> 25 #include <linux/of_platform.h> 26 #include <linux/pci.h> 27 #include <linux/pci-ats.h> 28 #include <linux/platform_device.h> 29 #include <kunit/visibility.h> 30 #include <uapi/linux/iommufd.h> 31 32 #include "arm-smmu-v3.h" 33 #include "../../dma-iommu.h" 34 35 static bool disable_msipolling; 36 module_param(disable_msipolling, bool, 0444); 37 MODULE_PARM_DESC(disable_msipolling, 38 "Disable MSI-based polling for CMD_SYNC completion."); 39 40 static struct iommu_ops arm_smmu_ops; 41 static struct iommu_dirty_ops arm_smmu_dirty_ops; 42 43 enum arm_smmu_msi_index { 44 EVTQ_MSI_INDEX, 45 GERROR_MSI_INDEX, 46 PRIQ_MSI_INDEX, 47 ARM_SMMU_MAX_MSIS, 48 }; 49 50 #define NUM_ENTRY_QWORDS 8 51 static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64)); 52 static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64)); 53 54 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { 55 [EVTQ_MSI_INDEX] = { 56 ARM_SMMU_EVTQ_IRQ_CFG0, 57 ARM_SMMU_EVTQ_IRQ_CFG1, 58 ARM_SMMU_EVTQ_IRQ_CFG2, 59 }, 60 [GERROR_MSI_INDEX] = { 61 ARM_SMMU_GERROR_IRQ_CFG0, 62 ARM_SMMU_GERROR_IRQ_CFG1, 63 ARM_SMMU_GERROR_IRQ_CFG2, 64 }, 65 [PRIQ_MSI_INDEX] = { 66 ARM_SMMU_PRIQ_IRQ_CFG0, 67 ARM_SMMU_PRIQ_IRQ_CFG1, 68 ARM_SMMU_PRIQ_IRQ_CFG2, 69 }, 70 }; 71 72 struct arm_smmu_option_prop { 73 u32 opt; 74 const char *prop; 75 }; 76 77 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa); 78 DEFINE_MUTEX(arm_smmu_asid_lock); 79 80 static struct arm_smmu_option_prop arm_smmu_options[] = { 81 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, 82 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, 83 { 0, NULL}, 84 }; 85 86 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, 87 struct arm_smmu_device *smmu, u32 flags); 88 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); 89 90 static void parse_driver_options(struct arm_smmu_device *smmu) 91 { 92 int i = 0; 93 94 do { 95 if (of_property_read_bool(smmu->dev->of_node, 96 arm_smmu_options[i].prop)) { 97 smmu->options |= arm_smmu_options[i].opt; 98 dev_notice(smmu->dev, "option %s\n", 99 arm_smmu_options[i].prop); 100 } 101 } while (arm_smmu_options[++i].opt); 102 } 103 104 /* Low-level queue manipulation functions */ 105 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n) 106 { 107 u32 space, prod, cons; 108 109 prod = Q_IDX(q, q->prod); 110 cons = Q_IDX(q, q->cons); 111 112 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons)) 113 space = (1 << q->max_n_shift) - (prod - cons); 114 else 115 space = cons - prod; 116 117 return space >= n; 118 } 119 120 static bool queue_full(struct arm_smmu_ll_queue *q) 121 { 122 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) && 123 Q_WRP(q, q->prod) != Q_WRP(q, q->cons); 124 } 125 126 static bool queue_empty(struct arm_smmu_ll_queue *q) 127 { 128 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) && 129 Q_WRP(q, q->prod) == Q_WRP(q, q->cons); 130 } 131 132 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod) 133 { 134 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) && 135 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) || 136 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) && 137 (Q_IDX(q, q->cons) <= Q_IDX(q, prod))); 138 } 139 140 static void queue_sync_cons_out(struct arm_smmu_queue *q) 141 { 142 /* 143 * Ensure that all CPU accesses (reads and writes) to the queue 144 * are complete before we update the cons pointer. 145 */ 146 __iomb(); 147 writel_relaxed(q->llq.cons, q->cons_reg); 148 } 149 150 static void queue_inc_cons(struct arm_smmu_ll_queue *q) 151 { 152 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1; 153 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); 154 } 155 156 static void queue_sync_cons_ovf(struct arm_smmu_queue *q) 157 { 158 struct arm_smmu_ll_queue *llq = &q->llq; 159 160 if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons))) 161 return; 162 163 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | 164 Q_IDX(llq, llq->cons); 165 queue_sync_cons_out(q); 166 } 167 168 static int queue_sync_prod_in(struct arm_smmu_queue *q) 169 { 170 u32 prod; 171 int ret = 0; 172 173 /* 174 * We can't use the _relaxed() variant here, as we must prevent 175 * speculative reads of the queue before we have determined that 176 * prod has indeed moved. 177 */ 178 prod = readl(q->prod_reg); 179 180 if (Q_OVF(prod) != Q_OVF(q->llq.prod)) 181 ret = -EOVERFLOW; 182 183 q->llq.prod = prod; 184 return ret; 185 } 186 187 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n) 188 { 189 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n; 190 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod); 191 } 192 193 static void queue_poll_init(struct arm_smmu_device *smmu, 194 struct arm_smmu_queue_poll *qp) 195 { 196 qp->delay = 1; 197 qp->spin_cnt = 0; 198 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); 199 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US); 200 } 201 202 static int queue_poll(struct arm_smmu_queue_poll *qp) 203 { 204 if (ktime_compare(ktime_get(), qp->timeout) > 0) 205 return -ETIMEDOUT; 206 207 if (qp->wfe) { 208 wfe(); 209 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) { 210 cpu_relax(); 211 } else { 212 udelay(qp->delay); 213 qp->delay *= 2; 214 qp->spin_cnt = 0; 215 } 216 217 return 0; 218 } 219 220 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords) 221 { 222 int i; 223 224 for (i = 0; i < n_dwords; ++i) 225 *dst++ = cpu_to_le64(*src++); 226 } 227 228 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords) 229 { 230 int i; 231 232 for (i = 0; i < n_dwords; ++i) 233 *dst++ = le64_to_cpu(*src++); 234 } 235 236 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent) 237 { 238 if (queue_empty(&q->llq)) 239 return -EAGAIN; 240 241 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords); 242 queue_inc_cons(&q->llq); 243 queue_sync_cons_out(q); 244 return 0; 245 } 246 247 /* High-level queue accessors */ 248 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) 249 { 250 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT); 251 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode); 252 253 switch (ent->opcode) { 254 case CMDQ_OP_TLBI_EL2_ALL: 255 case CMDQ_OP_TLBI_NSNH_ALL: 256 break; 257 case CMDQ_OP_PREFETCH_CFG: 258 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid); 259 break; 260 case CMDQ_OP_CFGI_CD: 261 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid); 262 fallthrough; 263 case CMDQ_OP_CFGI_STE: 264 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); 265 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf); 266 break; 267 case CMDQ_OP_CFGI_CD_ALL: 268 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); 269 break; 270 case CMDQ_OP_CFGI_ALL: 271 /* Cover the entire SID range */ 272 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); 273 break; 274 case CMDQ_OP_TLBI_NH_VA: 275 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 276 fallthrough; 277 case CMDQ_OP_TLBI_EL2_VA: 278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); 279 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); 280 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); 282 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); 283 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); 284 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK; 285 break; 286 case CMDQ_OP_TLBI_S2_IPA: 287 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); 288 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); 289 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 290 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); 291 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); 292 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); 293 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK; 294 break; 295 case CMDQ_OP_TLBI_NH_ASID: 296 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 297 fallthrough; 298 case CMDQ_OP_TLBI_NH_ALL: 299 case CMDQ_OP_TLBI_S12_VMALL: 300 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 301 break; 302 case CMDQ_OP_TLBI_EL2_ASID: 303 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 304 break; 305 case CMDQ_OP_ATC_INV: 306 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); 307 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global); 308 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid); 309 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid); 310 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size); 311 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK; 312 break; 313 case CMDQ_OP_PRI_RESP: 314 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); 315 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid); 316 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid); 317 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid); 318 switch (ent->pri.resp) { 319 case PRI_RESP_DENY: 320 case PRI_RESP_FAIL: 321 case PRI_RESP_SUCC: 322 break; 323 default: 324 return -EINVAL; 325 } 326 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp); 327 break; 328 case CMDQ_OP_RESUME: 329 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid); 330 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp); 331 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag); 332 break; 333 case CMDQ_OP_CMD_SYNC: 334 if (ent->sync.msiaddr) { 335 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ); 336 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK; 337 } else { 338 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); 339 } 340 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH); 341 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); 342 break; 343 default: 344 return -ENOENT; 345 } 346 347 return 0; 348 } 349 350 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu, 351 struct arm_smmu_cmdq_ent *ent) 352 { 353 struct arm_smmu_cmdq *cmdq = NULL; 354 355 if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq) 356 cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent); 357 358 return cmdq ?: &smmu->cmdq; 359 } 360 361 static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu, 362 struct arm_smmu_cmdq *cmdq) 363 { 364 if (cmdq == &smmu->cmdq) 365 return false; 366 367 return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV; 368 } 369 370 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, 371 struct arm_smmu_cmdq *cmdq, u32 prod) 372 { 373 struct arm_smmu_queue *q = &cmdq->q; 374 struct arm_smmu_cmdq_ent ent = { 375 .opcode = CMDQ_OP_CMD_SYNC, 376 }; 377 378 /* 379 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI 380 * payload, so the write will zero the entire command on that platform. 381 */ 382 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) { 383 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) * 384 q->ent_dwords * 8; 385 } 386 387 arm_smmu_cmdq_build_cmd(cmd, &ent); 388 if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) 389 u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS); 390 } 391 392 void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, 393 struct arm_smmu_cmdq *cmdq) 394 { 395 static const char * const cerror_str[] = { 396 [CMDQ_ERR_CERROR_NONE_IDX] = "No error", 397 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command", 398 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", 399 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout", 400 }; 401 struct arm_smmu_queue *q = &cmdq->q; 402 403 int i; 404 u64 cmd[CMDQ_ENT_DWORDS]; 405 u32 cons = readl_relaxed(q->cons_reg); 406 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons); 407 struct arm_smmu_cmdq_ent cmd_sync = { 408 .opcode = CMDQ_OP_CMD_SYNC, 409 }; 410 411 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons, 412 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown"); 413 414 switch (idx) { 415 case CMDQ_ERR_CERROR_ABT_IDX: 416 dev_err(smmu->dev, "retrying command fetch\n"); 417 return; 418 case CMDQ_ERR_CERROR_NONE_IDX: 419 return; 420 case CMDQ_ERR_CERROR_ATC_INV_IDX: 421 /* 422 * ATC Invalidation Completion timeout. CONS is still pointing 423 * at the CMD_SYNC. Attempt to complete other pending commands 424 * by repeating the CMD_SYNC, though we might well end up back 425 * here since the ATC invalidation may still be pending. 426 */ 427 return; 428 case CMDQ_ERR_CERROR_ILL_IDX: 429 default: 430 break; 431 } 432 433 /* 434 * We may have concurrent producers, so we need to be careful 435 * not to touch any of the shadow cmdq state. 436 */ 437 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords); 438 dev_err(smmu->dev, "skipping command in error state:\n"); 439 for (i = 0; i < ARRAY_SIZE(cmd); ++i) 440 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]); 441 442 /* Convert the erroneous command into a CMD_SYNC */ 443 arm_smmu_cmdq_build_cmd(cmd, &cmd_sync); 444 if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) 445 u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS); 446 447 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); 448 } 449 450 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) 451 { 452 __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq); 453 } 454 455 /* 456 * Command queue locking. 457 * This is a form of bastardised rwlock with the following major changes: 458 * 459 * - The only LOCK routines are exclusive_trylock() and shared_lock(). 460 * Neither have barrier semantics, and instead provide only a control 461 * dependency. 462 * 463 * - The UNLOCK routines are supplemented with shared_tryunlock(), which 464 * fails if the caller appears to be the last lock holder (yes, this is 465 * racy). All successful UNLOCK routines have RELEASE semantics. 466 */ 467 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq) 468 { 469 int val; 470 471 /* 472 * We can try to avoid the cmpxchg() loop by simply incrementing the 473 * lock counter. When held in exclusive state, the lock counter is set 474 * to INT_MIN so these increments won't hurt as the value will remain 475 * negative. 476 */ 477 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0) 478 return; 479 480 do { 481 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0); 482 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val); 483 } 484 485 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq) 486 { 487 (void)atomic_dec_return_release(&cmdq->lock); 488 } 489 490 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq) 491 { 492 if (atomic_read(&cmdq->lock) == 1) 493 return false; 494 495 arm_smmu_cmdq_shared_unlock(cmdq); 496 return true; 497 } 498 499 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \ 500 ({ \ 501 bool __ret; \ 502 local_irq_save(flags); \ 503 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \ 504 if (!__ret) \ 505 local_irq_restore(flags); \ 506 __ret; \ 507 }) 508 509 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \ 510 ({ \ 511 atomic_set_release(&cmdq->lock, 0); \ 512 local_irq_restore(flags); \ 513 }) 514 515 516 /* 517 * Command queue insertion. 518 * This is made fiddly by our attempts to achieve some sort of scalability 519 * since there is one queue shared amongst all of the CPUs in the system. If 520 * you like mixed-size concurrency, dependency ordering and relaxed atomics, 521 * then you'll *love* this monstrosity. 522 * 523 * The basic idea is to split the queue up into ranges of commands that are 524 * owned by a given CPU; the owner may not have written all of the commands 525 * itself, but is responsible for advancing the hardware prod pointer when 526 * the time comes. The algorithm is roughly: 527 * 528 * 1. Allocate some space in the queue. At this point we also discover 529 * whether the head of the queue is currently owned by another CPU, 530 * or whether we are the owner. 531 * 532 * 2. Write our commands into our allocated slots in the queue. 533 * 534 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map. 535 * 536 * 4. If we are an owner: 537 * a. Wait for the previous owner to finish. 538 * b. Mark the queue head as unowned, which tells us the range 539 * that we are responsible for publishing. 540 * c. Wait for all commands in our owned range to become valid. 541 * d. Advance the hardware prod pointer. 542 * e. Tell the next owner we've finished. 543 * 544 * 5. If we are inserting a CMD_SYNC (we may or may not have been an 545 * owner), then we need to stick around until it has completed: 546 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC 547 * to clear the first 4 bytes. 548 * b. Otherwise, we spin waiting for the hardware cons pointer to 549 * advance past our command. 550 * 551 * The devil is in the details, particularly the use of locking for handling 552 * SYNC completion and freeing up space in the queue before we think that it is 553 * full. 554 */ 555 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq, 556 u32 sprod, u32 eprod, bool set) 557 { 558 u32 swidx, sbidx, ewidx, ebidx; 559 struct arm_smmu_ll_queue llq = { 560 .max_n_shift = cmdq->q.llq.max_n_shift, 561 .prod = sprod, 562 }; 563 564 ewidx = BIT_WORD(Q_IDX(&llq, eprod)); 565 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG; 566 567 while (llq.prod != eprod) { 568 unsigned long mask; 569 atomic_long_t *ptr; 570 u32 limit = BITS_PER_LONG; 571 572 swidx = BIT_WORD(Q_IDX(&llq, llq.prod)); 573 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG; 574 575 ptr = &cmdq->valid_map[swidx]; 576 577 if ((swidx == ewidx) && (sbidx < ebidx)) 578 limit = ebidx; 579 580 mask = GENMASK(limit - 1, sbidx); 581 582 /* 583 * The valid bit is the inverse of the wrap bit. This means 584 * that a zero-initialised queue is invalid and, after marking 585 * all entries as valid, they become invalid again when we 586 * wrap. 587 */ 588 if (set) { 589 atomic_long_xor(mask, ptr); 590 } else { /* Poll */ 591 unsigned long valid; 592 593 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask; 594 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid); 595 } 596 597 llq.prod = queue_inc_prod_n(&llq, limit - sbidx); 598 } 599 } 600 601 /* Mark all entries in the range [sprod, eprod) as valid */ 602 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq, 603 u32 sprod, u32 eprod) 604 { 605 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true); 606 } 607 608 /* Wait for all entries in the range [sprod, eprod) to become valid */ 609 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq, 610 u32 sprod, u32 eprod) 611 { 612 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false); 613 } 614 615 /* Wait for the command queue to become non-full */ 616 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, 617 struct arm_smmu_cmdq *cmdq, 618 struct arm_smmu_ll_queue *llq) 619 { 620 unsigned long flags; 621 struct arm_smmu_queue_poll qp; 622 int ret = 0; 623 624 /* 625 * Try to update our copy of cons by grabbing exclusive cmdq access. If 626 * that fails, spin until somebody else updates it for us. 627 */ 628 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) { 629 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg)); 630 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags); 631 llq->val = READ_ONCE(cmdq->q.llq.val); 632 return 0; 633 } 634 635 queue_poll_init(smmu, &qp); 636 do { 637 llq->val = READ_ONCE(cmdq->q.llq.val); 638 if (!queue_full(llq)) 639 break; 640 641 ret = queue_poll(&qp); 642 } while (!ret); 643 644 return ret; 645 } 646 647 /* 648 * Wait until the SMMU signals a CMD_SYNC completion MSI. 649 * Must be called with the cmdq lock held in some capacity. 650 */ 651 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, 652 struct arm_smmu_cmdq *cmdq, 653 struct arm_smmu_ll_queue *llq) 654 { 655 int ret = 0; 656 struct arm_smmu_queue_poll qp; 657 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod)); 658 659 queue_poll_init(smmu, &qp); 660 661 /* 662 * The MSI won't generate an event, since it's being written back 663 * into the command queue. 664 */ 665 qp.wfe = false; 666 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp))); 667 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1); 668 return ret; 669 } 670 671 /* 672 * Wait until the SMMU cons index passes llq->prod. 673 * Must be called with the cmdq lock held in some capacity. 674 */ 675 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, 676 struct arm_smmu_cmdq *cmdq, 677 struct arm_smmu_ll_queue *llq) 678 { 679 struct arm_smmu_queue_poll qp; 680 u32 prod = llq->prod; 681 int ret = 0; 682 683 queue_poll_init(smmu, &qp); 684 llq->val = READ_ONCE(cmdq->q.llq.val); 685 do { 686 if (queue_consumed(llq, prod)) 687 break; 688 689 ret = queue_poll(&qp); 690 691 /* 692 * This needs to be a readl() so that our subsequent call 693 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately. 694 * 695 * Specifically, we need to ensure that we observe all 696 * shared_lock()s by other CMD_SYNCs that share our owner, 697 * so that a failing call to tryunlock() means that we're 698 * the last one out and therefore we can safely advance 699 * cmdq->q.llq.cons. Roughly speaking: 700 * 701 * CPU 0 CPU1 CPU2 (us) 702 * 703 * if (sync) 704 * shared_lock(); 705 * 706 * dma_wmb(); 707 * set_valid_map(); 708 * 709 * if (owner) { 710 * poll_valid_map(); 711 * <control dependency> 712 * writel(prod_reg); 713 * 714 * readl(cons_reg); 715 * tryunlock(); 716 * 717 * Requires us to see CPU 0's shared_lock() acquisition. 718 */ 719 llq->cons = readl(cmdq->q.cons_reg); 720 } while (!ret); 721 722 return ret; 723 } 724 725 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, 726 struct arm_smmu_cmdq *cmdq, 727 struct arm_smmu_ll_queue *llq) 728 { 729 if (smmu->options & ARM_SMMU_OPT_MSIPOLL && 730 !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) 731 return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); 732 733 return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); 734 } 735 736 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, 737 u32 prod, int n) 738 { 739 int i; 740 struct arm_smmu_ll_queue llq = { 741 .max_n_shift = cmdq->q.llq.max_n_shift, 742 .prod = prod, 743 }; 744 745 for (i = 0; i < n; ++i) { 746 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS]; 747 748 prod = queue_inc_prod_n(&llq, i); 749 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS); 750 } 751 } 752 753 /* 754 * This is the actual insertion function, and provides the following 755 * ordering guarantees to callers: 756 * 757 * - There is a dma_wmb() before publishing any commands to the queue. 758 * This can be relied upon to order prior writes to data structures 759 * in memory (such as a CD or an STE) before the command. 760 * 761 * - On completion of a CMD_SYNC, there is a control dependency. 762 * This can be relied upon to order subsequent writes to memory (e.g. 763 * freeing an IOVA) after completion of the CMD_SYNC. 764 * 765 * - Command insertion is totally ordered, so if two CPUs each race to 766 * insert their own list of commands then all of the commands from one 767 * CPU will appear before any of the commands from the other CPU. 768 */ 769 int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, 770 struct arm_smmu_cmdq *cmdq, u64 *cmds, int n, 771 bool sync) 772 { 773 u64 cmd_sync[CMDQ_ENT_DWORDS]; 774 u32 prod; 775 unsigned long flags; 776 bool owner; 777 struct arm_smmu_ll_queue llq, head; 778 int ret = 0; 779 780 llq.max_n_shift = cmdq->q.llq.max_n_shift; 781 782 /* 1. Allocate some space in the queue */ 783 local_irq_save(flags); 784 llq.val = READ_ONCE(cmdq->q.llq.val); 785 do { 786 u64 old; 787 788 while (!queue_has_space(&llq, n + sync)) { 789 local_irq_restore(flags); 790 if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq)) 791 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); 792 local_irq_save(flags); 793 } 794 795 head.cons = llq.cons; 796 head.prod = queue_inc_prod_n(&llq, n + sync) | 797 CMDQ_PROD_OWNED_FLAG; 798 799 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val); 800 if (old == llq.val) 801 break; 802 803 llq.val = old; 804 } while (1); 805 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG); 806 head.prod &= ~CMDQ_PROD_OWNED_FLAG; 807 llq.prod &= ~CMDQ_PROD_OWNED_FLAG; 808 809 /* 810 * 2. Write our commands into the queue 811 * Dependency ordering from the cmpxchg() loop above. 812 */ 813 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); 814 if (sync) { 815 prod = queue_inc_prod_n(&llq, n); 816 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod); 817 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); 818 819 /* 820 * In order to determine completion of our CMD_SYNC, we must 821 * ensure that the queue can't wrap twice without us noticing. 822 * We achieve that by taking the cmdq lock as shared before 823 * marking our slot as valid. 824 */ 825 arm_smmu_cmdq_shared_lock(cmdq); 826 } 827 828 /* 3. Mark our slots as valid, ensuring commands are visible first */ 829 dma_wmb(); 830 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod); 831 832 /* 4. If we are the owner, take control of the SMMU hardware */ 833 if (owner) { 834 /* a. Wait for previous owner to finish */ 835 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod); 836 837 /* b. Stop gathering work by clearing the owned flag */ 838 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG, 839 &cmdq->q.llq.atomic.prod); 840 prod &= ~CMDQ_PROD_OWNED_FLAG; 841 842 /* 843 * c. Wait for any gathered work to be written to the queue. 844 * Note that we read our own entries so that we have the control 845 * dependency required by (d). 846 */ 847 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod); 848 849 /* 850 * d. Advance the hardware prod pointer 851 * Control dependency ordering from the entries becoming valid. 852 */ 853 writel_relaxed(prod, cmdq->q.prod_reg); 854 855 /* 856 * e. Tell the next owner we're done 857 * Make sure we've updated the hardware first, so that we don't 858 * race to update prod and potentially move it backwards. 859 */ 860 atomic_set_release(&cmdq->owner_prod, prod); 861 } 862 863 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ 864 if (sync) { 865 llq.prod = queue_inc_prod_n(&llq, n); 866 ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq); 867 if (ret) { 868 dev_err_ratelimited(smmu->dev, 869 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", 870 llq.prod, 871 readl_relaxed(cmdq->q.prod_reg), 872 readl_relaxed(cmdq->q.cons_reg)); 873 } 874 875 /* 876 * Try to unlock the cmdq lock. This will fail if we're the last 877 * reader, in which case we can safely update cmdq->q.llq.cons 878 */ 879 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) { 880 WRITE_ONCE(cmdq->q.llq.cons, llq.cons); 881 arm_smmu_cmdq_shared_unlock(cmdq); 882 } 883 } 884 885 local_irq_restore(flags); 886 return ret; 887 } 888 889 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, 890 struct arm_smmu_cmdq_ent *ent, 891 bool sync) 892 { 893 u64 cmd[CMDQ_ENT_DWORDS]; 894 895 if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) { 896 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", 897 ent->opcode); 898 return -EINVAL; 899 } 900 901 return arm_smmu_cmdq_issue_cmdlist( 902 smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync); 903 } 904 905 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, 906 struct arm_smmu_cmdq_ent *ent) 907 { 908 return __arm_smmu_cmdq_issue_cmd(smmu, ent, false); 909 } 910 911 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, 912 struct arm_smmu_cmdq_ent *ent) 913 { 914 return __arm_smmu_cmdq_issue_cmd(smmu, ent, true); 915 } 916 917 static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu, 918 struct arm_smmu_cmdq_batch *cmds, 919 struct arm_smmu_cmdq_ent *ent) 920 { 921 cmds->num = 0; 922 cmds->cmdq = arm_smmu_get_cmdq(smmu, ent); 923 } 924 925 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, 926 struct arm_smmu_cmdq_batch *cmds, 927 struct arm_smmu_cmdq_ent *cmd) 928 { 929 bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd); 930 bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) && 931 (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC); 932 int index; 933 934 if (force_sync || unsupported_cmd) { 935 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, 936 cmds->num, true); 937 arm_smmu_cmdq_batch_init(smmu, cmds, cmd); 938 } 939 940 if (cmds->num == CMDQ_BATCH_ENTRIES) { 941 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, 942 cmds->num, false); 943 arm_smmu_cmdq_batch_init(smmu, cmds, cmd); 944 } 945 946 index = cmds->num * CMDQ_ENT_DWORDS; 947 if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) { 948 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", 949 cmd->opcode); 950 return; 951 } 952 953 cmds->num++; 954 } 955 956 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, 957 struct arm_smmu_cmdq_batch *cmds) 958 { 959 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, 960 cmds->num, true); 961 } 962 963 static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused, 964 struct iommu_page_response *resp) 965 { 966 struct arm_smmu_cmdq_ent cmd = {0}; 967 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 968 int sid = master->streams[0].id; 969 970 if (WARN_ON(!master->stall_enabled)) 971 return; 972 973 cmd.opcode = CMDQ_OP_RESUME; 974 cmd.resume.sid = sid; 975 cmd.resume.stag = resp->grpid; 976 switch (resp->code) { 977 case IOMMU_PAGE_RESP_INVALID: 978 case IOMMU_PAGE_RESP_FAILURE: 979 cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT; 980 break; 981 case IOMMU_PAGE_RESP_SUCCESS: 982 cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY; 983 break; 984 default: 985 break; 986 } 987 988 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); 989 /* 990 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP. 991 * RESUME consumption guarantees that the stalled transaction will be 992 * terminated... at some point in the future. PRI_RESP is fire and 993 * forget. 994 */ 995 } 996 997 /* Context descriptor manipulation functions */ 998 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) 999 { 1000 struct arm_smmu_cmdq_ent cmd = { 1001 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ? 1002 CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID, 1003 .tlbi.asid = asid, 1004 }; 1005 1006 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 1007 } 1008 1009 /* 1010 * Based on the value of ent report which bits of the STE the HW will access. It 1011 * would be nice if this was complete according to the spec, but minimally it 1012 * has to capture the bits this driver uses. 1013 */ 1014 VISIBLE_IF_KUNIT 1015 void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) 1016 { 1017 unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0])); 1018 1019 used_bits[0] = cpu_to_le64(STRTAB_STE_0_V); 1020 if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V))) 1021 return; 1022 1023 used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG); 1024 1025 /* S1 translates */ 1026 if (cfg & BIT(0)) { 1027 used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT | 1028 STRTAB_STE_0_S1CTXPTR_MASK | 1029 STRTAB_STE_0_S1CDMAX); 1030 used_bits[1] |= 1031 cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | 1032 STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | 1033 STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | 1034 STRTAB_STE_1_EATS); 1035 used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); 1036 1037 /* 1038 * See 13.5 Summary of attribute/permission configuration fields 1039 * for the SHCFG behavior. 1040 */ 1041 if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) == 1042 STRTAB_STE_1_S1DSS_BYPASS) 1043 used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); 1044 } 1045 1046 /* S2 translates */ 1047 if (cfg & BIT(1)) { 1048 used_bits[1] |= 1049 cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS | 1050 STRTAB_STE_1_SHCFG); 1051 used_bits[2] |= 1052 cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | 1053 STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | 1054 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S | 1055 STRTAB_STE_2_S2R); 1056 used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK); 1057 } 1058 1059 if (cfg == STRTAB_STE_0_CFG_BYPASS) 1060 used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); 1061 } 1062 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used); 1063 1064 /* 1065 * Figure out if we can do a hitless update of entry to become target. Returns a 1066 * bit mask where 1 indicates that qword needs to be set disruptively. 1067 * unused_update is an intermediate value of entry that has unused bits set to 1068 * their new values. 1069 */ 1070 static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer, 1071 const __le64 *entry, const __le64 *target, 1072 __le64 *unused_update) 1073 { 1074 __le64 target_used[NUM_ENTRY_QWORDS] = {}; 1075 __le64 cur_used[NUM_ENTRY_QWORDS] = {}; 1076 u8 used_qword_diff = 0; 1077 unsigned int i; 1078 1079 writer->ops->get_used(entry, cur_used); 1080 writer->ops->get_used(target, target_used); 1081 1082 for (i = 0; i != NUM_ENTRY_QWORDS; i++) { 1083 /* 1084 * Check that masks are up to date, the make functions are not 1085 * allowed to set a bit to 1 if the used function doesn't say it 1086 * is used. 1087 */ 1088 WARN_ON_ONCE(target[i] & ~target_used[i]); 1089 1090 /* Bits can change because they are not currently being used */ 1091 unused_update[i] = (entry[i] & cur_used[i]) | 1092 (target[i] & ~cur_used[i]); 1093 /* 1094 * Each bit indicates that a used bit in a qword needs to be 1095 * changed after unused_update is applied. 1096 */ 1097 if ((unused_update[i] & target_used[i]) != target[i]) 1098 used_qword_diff |= 1 << i; 1099 } 1100 return used_qword_diff; 1101 } 1102 1103 static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry, 1104 const __le64 *target, unsigned int start, 1105 unsigned int len) 1106 { 1107 bool changed = false; 1108 unsigned int i; 1109 1110 for (i = start; len != 0; len--, i++) { 1111 if (entry[i] != target[i]) { 1112 WRITE_ONCE(entry[i], target[i]); 1113 changed = true; 1114 } 1115 } 1116 1117 if (changed) 1118 writer->ops->sync(writer); 1119 return changed; 1120 } 1121 1122 /* 1123 * Update the STE/CD to the target configuration. The transition from the 1124 * current entry to the target entry takes place over multiple steps that 1125 * attempts to make the transition hitless if possible. This function takes care 1126 * not to create a situation where the HW can perceive a corrupted entry. HW is 1127 * only required to have a 64 bit atomicity with stores from the CPU, while 1128 * entries are many 64 bit values big. 1129 * 1130 * The difference between the current value and the target value is analyzed to 1131 * determine which of three updates are required - disruptive, hitless or no 1132 * change. 1133 * 1134 * In the most general disruptive case we can make any update in three steps: 1135 * - Disrupting the entry (V=0) 1136 * - Fill now unused qwords, execpt qword 0 which contains V 1137 * - Make qword 0 have the final value and valid (V=1) with a single 64 1138 * bit store 1139 * 1140 * However this disrupts the HW while it is happening. There are several 1141 * interesting cases where a STE/CD can be updated without disturbing the HW 1142 * because only a small number of bits are changing (S1DSS, CONFIG, etc) or 1143 * because the used bits don't intersect. We can detect this by calculating how 1144 * many 64 bit values need update after adjusting the unused bits and skip the 1145 * V=0 process. This relies on the IGNORED behavior described in the 1146 * specification. 1147 */ 1148 VISIBLE_IF_KUNIT 1149 void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry, 1150 const __le64 *target) 1151 { 1152 __le64 unused_update[NUM_ENTRY_QWORDS]; 1153 u8 used_qword_diff; 1154 1155 used_qword_diff = 1156 arm_smmu_entry_qword_diff(writer, entry, target, unused_update); 1157 if (hweight8(used_qword_diff) == 1) { 1158 /* 1159 * Only one qword needs its used bits to be changed. This is a 1160 * hitless update, update all bits the current STE/CD is 1161 * ignoring to their new values, then update a single "critical 1162 * qword" to change the STE/CD and finally 0 out any bits that 1163 * are now unused in the target configuration. 1164 */ 1165 unsigned int critical_qword_index = ffs(used_qword_diff) - 1; 1166 1167 /* 1168 * Skip writing unused bits in the critical qword since we'll be 1169 * writing it in the next step anyways. This can save a sync 1170 * when the only change is in that qword. 1171 */ 1172 unused_update[critical_qword_index] = 1173 entry[critical_qword_index]; 1174 entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS); 1175 entry_set(writer, entry, target, critical_qword_index, 1); 1176 entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS); 1177 } else if (used_qword_diff) { 1178 /* 1179 * At least two qwords need their inuse bits to be changed. This 1180 * requires a breaking update, zero the V bit, write all qwords 1181 * but 0, then set qword 0 1182 */ 1183 unused_update[0] = 0; 1184 entry_set(writer, entry, unused_update, 0, 1); 1185 entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1); 1186 entry_set(writer, entry, target, 0, 1); 1187 } else { 1188 /* 1189 * No inuse bit changed. Sanity check that all unused bits are 0 1190 * in the entry. The target was already sanity checked by 1191 * compute_qword_diff(). 1192 */ 1193 WARN_ON_ONCE( 1194 entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS)); 1195 } 1196 } 1197 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry); 1198 1199 static void arm_smmu_sync_cd(struct arm_smmu_master *master, 1200 int ssid, bool leaf) 1201 { 1202 size_t i; 1203 struct arm_smmu_cmdq_batch cmds; 1204 struct arm_smmu_device *smmu = master->smmu; 1205 struct arm_smmu_cmdq_ent cmd = { 1206 .opcode = CMDQ_OP_CFGI_CD, 1207 .cfgi = { 1208 .ssid = ssid, 1209 .leaf = leaf, 1210 }, 1211 }; 1212 1213 arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd); 1214 for (i = 0; i < master->num_streams; i++) { 1215 cmd.cfgi.sid = master->streams[i].id; 1216 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); 1217 } 1218 1219 arm_smmu_cmdq_batch_submit(smmu, &cmds); 1220 } 1221 1222 static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst, 1223 dma_addr_t l2ptr_dma) 1224 { 1225 u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V; 1226 1227 /* The HW has 64 bit atomicity with stores to the L2 CD table */ 1228 WRITE_ONCE(dst->l2ptr, cpu_to_le64(val)); 1229 } 1230 1231 static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src) 1232 { 1233 return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK; 1234 } 1235 1236 struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, 1237 u32 ssid) 1238 { 1239 struct arm_smmu_cdtab_l2 *l2; 1240 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1241 1242 if (!arm_smmu_cdtab_allocated(cd_table)) 1243 return NULL; 1244 1245 if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) 1246 return &cd_table->linear.table[ssid]; 1247 1248 l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)]; 1249 if (!l2) 1250 return NULL; 1251 return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)]; 1252 } 1253 1254 static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, 1255 u32 ssid) 1256 { 1257 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1258 struct arm_smmu_device *smmu = master->smmu; 1259 1260 might_sleep(); 1261 iommu_group_mutex_assert(master->dev); 1262 1263 if (!arm_smmu_cdtab_allocated(cd_table)) { 1264 if (arm_smmu_alloc_cd_tables(master)) 1265 return NULL; 1266 } 1267 1268 if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) { 1269 unsigned int idx = arm_smmu_cdtab_l1_idx(ssid); 1270 struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx]; 1271 1272 if (!*l2ptr) { 1273 dma_addr_t l2ptr_dma; 1274 1275 *l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr), 1276 &l2ptr_dma, GFP_KERNEL); 1277 if (!*l2ptr) 1278 return NULL; 1279 1280 arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx], 1281 l2ptr_dma); 1282 /* An invalid L1CD can be cached */ 1283 arm_smmu_sync_cd(master, ssid, false); 1284 } 1285 } 1286 return arm_smmu_get_cd_ptr(master, ssid); 1287 } 1288 1289 struct arm_smmu_cd_writer { 1290 struct arm_smmu_entry_writer writer; 1291 unsigned int ssid; 1292 }; 1293 1294 VISIBLE_IF_KUNIT 1295 void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits) 1296 { 1297 used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V); 1298 if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V))) 1299 return; 1300 memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd)); 1301 1302 /* 1303 * If EPD0 is set by the make function it means 1304 * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED 1305 */ 1306 if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) { 1307 used_bits[0] &= ~cpu_to_le64( 1308 CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 | 1309 CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 | 1310 CTXDESC_CD_0_TCR_SH0); 1311 used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK); 1312 } 1313 } 1314 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used); 1315 1316 static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer) 1317 { 1318 struct arm_smmu_cd_writer *cd_writer = 1319 container_of(writer, struct arm_smmu_cd_writer, writer); 1320 1321 arm_smmu_sync_cd(writer->master, cd_writer->ssid, true); 1322 } 1323 1324 static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = { 1325 .sync = arm_smmu_cd_writer_sync_entry, 1326 .get_used = arm_smmu_get_cd_used, 1327 }; 1328 1329 void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, 1330 struct arm_smmu_cd *cdptr, 1331 const struct arm_smmu_cd *target) 1332 { 1333 bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V); 1334 bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V); 1335 struct arm_smmu_cd_writer cd_writer = { 1336 .writer = { 1337 .ops = &arm_smmu_cd_writer_ops, 1338 .master = master, 1339 }, 1340 .ssid = ssid, 1341 }; 1342 1343 if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) { 1344 if (cur_valid) 1345 master->cd_table.used_ssids--; 1346 else 1347 master->cd_table.used_ssids++; 1348 } 1349 1350 arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data); 1351 } 1352 1353 void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, 1354 struct arm_smmu_master *master, 1355 struct arm_smmu_domain *smmu_domain) 1356 { 1357 struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; 1358 const struct io_pgtable_cfg *pgtbl_cfg = 1359 &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg; 1360 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = 1361 &pgtbl_cfg->arm_lpae_s1_cfg.tcr; 1362 1363 memset(target, 0, sizeof(*target)); 1364 1365 target->data[0] = cpu_to_le64( 1366 FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | 1367 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) | 1368 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) | 1369 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | 1370 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | 1371 #ifdef __BIG_ENDIAN 1372 CTXDESC_CD_0_ENDI | 1373 #endif 1374 CTXDESC_CD_0_TCR_EPD1 | 1375 CTXDESC_CD_0_V | 1376 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | 1377 CTXDESC_CD_0_AA64 | 1378 (master->stall_enabled ? CTXDESC_CD_0_S : 0) | 1379 CTXDESC_CD_0_R | 1380 CTXDESC_CD_0_A | 1381 CTXDESC_CD_0_ASET | 1382 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) 1383 ); 1384 1385 /* To enable dirty flag update, set both Access flag and dirty state update */ 1386 if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD) 1387 target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA | 1388 CTXDESC_CD_0_TCR_HD); 1389 1390 target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr & 1391 CTXDESC_CD_1_TTB0_MASK); 1392 target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair); 1393 } 1394 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd); 1395 1396 void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid) 1397 { 1398 struct arm_smmu_cd target = {}; 1399 struct arm_smmu_cd *cdptr; 1400 1401 if (!arm_smmu_cdtab_allocated(&master->cd_table)) 1402 return; 1403 cdptr = arm_smmu_get_cd_ptr(master, ssid); 1404 if (WARN_ON(!cdptr)) 1405 return; 1406 arm_smmu_write_cd_entry(master, ssid, cdptr, &target); 1407 } 1408 1409 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) 1410 { 1411 int ret; 1412 size_t l1size; 1413 size_t max_contexts; 1414 struct arm_smmu_device *smmu = master->smmu; 1415 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1416 1417 cd_table->s1cdmax = master->ssid_bits; 1418 max_contexts = 1 << cd_table->s1cdmax; 1419 1420 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || 1421 max_contexts <= CTXDESC_L2_ENTRIES) { 1422 cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; 1423 cd_table->linear.num_ents = max_contexts; 1424 1425 l1size = max_contexts * sizeof(struct arm_smmu_cd); 1426 cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size, 1427 &cd_table->cdtab_dma, 1428 GFP_KERNEL); 1429 if (!cd_table->linear.table) 1430 return -ENOMEM; 1431 } else { 1432 cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; 1433 cd_table->l2.num_l1_ents = 1434 DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES); 1435 1436 cd_table->l2.l2ptrs = kcalloc(cd_table->l2.num_l1_ents, 1437 sizeof(*cd_table->l2.l2ptrs), 1438 GFP_KERNEL); 1439 if (!cd_table->l2.l2ptrs) 1440 return -ENOMEM; 1441 1442 l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1); 1443 cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size, 1444 &cd_table->cdtab_dma, 1445 GFP_KERNEL); 1446 if (!cd_table->l2.l2ptrs) { 1447 ret = -ENOMEM; 1448 goto err_free_l2ptrs; 1449 } 1450 } 1451 return 0; 1452 1453 err_free_l2ptrs: 1454 kfree(cd_table->l2.l2ptrs); 1455 cd_table->l2.l2ptrs = NULL; 1456 return ret; 1457 } 1458 1459 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) 1460 { 1461 int i; 1462 struct arm_smmu_device *smmu = master->smmu; 1463 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1464 1465 if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) { 1466 for (i = 0; i < cd_table->l2.num_l1_ents; i++) { 1467 if (!cd_table->l2.l2ptrs[i]) 1468 continue; 1469 1470 dma_free_coherent(smmu->dev, 1471 sizeof(*cd_table->l2.l2ptrs[i]), 1472 cd_table->l2.l2ptrs[i], 1473 arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i])); 1474 } 1475 kfree(cd_table->l2.l2ptrs); 1476 1477 dma_free_coherent(smmu->dev, 1478 cd_table->l2.num_l1_ents * 1479 sizeof(struct arm_smmu_cdtab_l1), 1480 cd_table->l2.l1tab, cd_table->cdtab_dma); 1481 } else { 1482 dma_free_coherent(smmu->dev, 1483 cd_table->linear.num_ents * 1484 sizeof(struct arm_smmu_cd), 1485 cd_table->linear.table, cd_table->cdtab_dma); 1486 } 1487 } 1488 1489 /* Stream table manipulation functions */ 1490 static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 *dst, 1491 dma_addr_t l2ptr_dma) 1492 { 1493 u64 val = 0; 1494 1495 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1); 1496 val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; 1497 1498 /* The HW has 64 bit atomicity with stores to the L2 STE table */ 1499 WRITE_ONCE(dst->l2ptr, cpu_to_le64(val)); 1500 } 1501 1502 struct arm_smmu_ste_writer { 1503 struct arm_smmu_entry_writer writer; 1504 u32 sid; 1505 }; 1506 1507 static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer) 1508 { 1509 struct arm_smmu_ste_writer *ste_writer = 1510 container_of(writer, struct arm_smmu_ste_writer, writer); 1511 struct arm_smmu_cmdq_ent cmd = { 1512 .opcode = CMDQ_OP_CFGI_STE, 1513 .cfgi = { 1514 .sid = ste_writer->sid, 1515 .leaf = true, 1516 }, 1517 }; 1518 1519 arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd); 1520 } 1521 1522 static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = { 1523 .sync = arm_smmu_ste_writer_sync_entry, 1524 .get_used = arm_smmu_get_ste_used, 1525 }; 1526 1527 static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, 1528 struct arm_smmu_ste *ste, 1529 const struct arm_smmu_ste *target) 1530 { 1531 struct arm_smmu_device *smmu = master->smmu; 1532 struct arm_smmu_ste_writer ste_writer = { 1533 .writer = { 1534 .ops = &arm_smmu_ste_writer_ops, 1535 .master = master, 1536 }, 1537 .sid = sid, 1538 }; 1539 1540 arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data); 1541 1542 /* It's likely that we'll want to use the new STE soon */ 1543 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) { 1544 struct arm_smmu_cmdq_ent 1545 prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG, 1546 .prefetch = { 1547 .sid = sid, 1548 } }; 1549 1550 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); 1551 } 1552 } 1553 1554 void arm_smmu_make_abort_ste(struct arm_smmu_ste *target) 1555 { 1556 memset(target, 0, sizeof(*target)); 1557 target->data[0] = cpu_to_le64( 1558 STRTAB_STE_0_V | 1559 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT)); 1560 } 1561 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste); 1562 1563 VISIBLE_IF_KUNIT 1564 void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, 1565 struct arm_smmu_ste *target) 1566 { 1567 memset(target, 0, sizeof(*target)); 1568 target->data[0] = cpu_to_le64( 1569 STRTAB_STE_0_V | 1570 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS)); 1571 1572 if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) 1573 target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, 1574 STRTAB_STE_1_SHCFG_INCOMING)); 1575 } 1576 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste); 1577 1578 VISIBLE_IF_KUNIT 1579 void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, 1580 struct arm_smmu_master *master, bool ats_enabled, 1581 unsigned int s1dss) 1582 { 1583 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1584 struct arm_smmu_device *smmu = master->smmu; 1585 1586 memset(target, 0, sizeof(*target)); 1587 target->data[0] = cpu_to_le64( 1588 STRTAB_STE_0_V | 1589 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | 1590 FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) | 1591 (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | 1592 FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax)); 1593 1594 target->data[1] = cpu_to_le64( 1595 FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) | 1596 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | 1597 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | 1598 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | 1599 ((smmu->features & ARM_SMMU_FEAT_STALLS && 1600 !master->stall_enabled) ? 1601 STRTAB_STE_1_S1STALLD : 1602 0) | 1603 FIELD_PREP(STRTAB_STE_1_EATS, 1604 ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); 1605 1606 if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) && 1607 s1dss == STRTAB_STE_1_S1DSS_BYPASS) 1608 target->data[1] |= cpu_to_le64(FIELD_PREP( 1609 STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); 1610 1611 if (smmu->features & ARM_SMMU_FEAT_E2H) { 1612 /* 1613 * To support BTM the streamworld needs to match the 1614 * configuration of the CPU so that the ASID broadcasts are 1615 * properly matched. This means either S/NS-EL2-E2H (hypervisor) 1616 * or NS-EL1 (guest). Since an SVA domain can be installed in a 1617 * PASID this should always use a BTM compatible configuration 1618 * if the HW supports it. 1619 */ 1620 target->data[1] |= cpu_to_le64( 1621 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2)); 1622 } else { 1623 target->data[1] |= cpu_to_le64( 1624 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1)); 1625 1626 /* 1627 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see 1628 * arm_smmu_domain_alloc_id() 1629 */ 1630 target->data[2] = 1631 cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0)); 1632 } 1633 } 1634 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste); 1635 1636 void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, 1637 struct arm_smmu_master *master, 1638 struct arm_smmu_domain *smmu_domain, 1639 bool ats_enabled) 1640 { 1641 struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg; 1642 const struct io_pgtable_cfg *pgtbl_cfg = 1643 &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg; 1644 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr = 1645 &pgtbl_cfg->arm_lpae_s2_cfg.vtcr; 1646 u64 vtcr_val; 1647 struct arm_smmu_device *smmu = master->smmu; 1648 1649 memset(target, 0, sizeof(*target)); 1650 target->data[0] = cpu_to_le64( 1651 STRTAB_STE_0_V | 1652 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS)); 1653 1654 target->data[1] = cpu_to_le64( 1655 FIELD_PREP(STRTAB_STE_1_EATS, 1656 ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); 1657 1658 if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB) 1659 target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB); 1660 if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) 1661 target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, 1662 STRTAB_STE_1_SHCFG_INCOMING)); 1663 1664 vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) | 1665 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) | 1666 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) | 1667 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) | 1668 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) | 1669 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) | 1670 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps); 1671 target->data[2] = cpu_to_le64( 1672 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | 1673 FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) | 1674 STRTAB_STE_2_S2AA64 | 1675 #ifdef __BIG_ENDIAN 1676 STRTAB_STE_2_S2ENDI | 1677 #endif 1678 STRTAB_STE_2_S2PTW | 1679 (master->stall_enabled ? STRTAB_STE_2_S2S : 0) | 1680 STRTAB_STE_2_S2R); 1681 1682 target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr & 1683 STRTAB_STE_3_S2TTB_MASK); 1684 } 1685 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste); 1686 1687 /* 1688 * This can safely directly manipulate the STE memory without a sync sequence 1689 * because the STE table has not been installed in the SMMU yet. 1690 */ 1691 static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab, 1692 unsigned int nent) 1693 { 1694 unsigned int i; 1695 1696 for (i = 0; i < nent; ++i) { 1697 arm_smmu_make_abort_ste(strtab); 1698 strtab++; 1699 } 1700 } 1701 1702 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) 1703 { 1704 dma_addr_t l2ptr_dma; 1705 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 1706 struct arm_smmu_strtab_l2 **l2table; 1707 1708 l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]; 1709 if (*l2table) 1710 return 0; 1711 1712 *l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table), 1713 &l2ptr_dma, GFP_KERNEL); 1714 if (!*l2table) { 1715 dev_err(smmu->dev, 1716 "failed to allocate l2 stream table for SID %u\n", 1717 sid); 1718 return -ENOMEM; 1719 } 1720 1721 arm_smmu_init_initial_stes((*l2table)->stes, 1722 ARRAY_SIZE((*l2table)->stes)); 1723 arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)], 1724 l2ptr_dma); 1725 return 0; 1726 } 1727 1728 static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs) 1729 { 1730 struct arm_smmu_stream *stream_rhs = 1731 rb_entry(rhs, struct arm_smmu_stream, node); 1732 const u32 *sid_lhs = lhs; 1733 1734 if (*sid_lhs < stream_rhs->id) 1735 return -1; 1736 if (*sid_lhs > stream_rhs->id) 1737 return 1; 1738 return 0; 1739 } 1740 1741 static int arm_smmu_streams_cmp_node(struct rb_node *lhs, 1742 const struct rb_node *rhs) 1743 { 1744 return arm_smmu_streams_cmp_key( 1745 &rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs); 1746 } 1747 1748 static struct arm_smmu_master * 1749 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) 1750 { 1751 struct rb_node *node; 1752 1753 lockdep_assert_held(&smmu->streams_mutex); 1754 1755 node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key); 1756 if (!node) 1757 return NULL; 1758 return rb_entry(node, struct arm_smmu_stream, node)->master; 1759 } 1760 1761 /* IRQ and event handlers */ 1762 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) 1763 { 1764 int ret = 0; 1765 u32 perm = 0; 1766 struct arm_smmu_master *master; 1767 bool ssid_valid = evt[0] & EVTQ_0_SSV; 1768 u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]); 1769 struct iopf_fault fault_evt = { }; 1770 struct iommu_fault *flt = &fault_evt.fault; 1771 1772 switch (FIELD_GET(EVTQ_0_ID, evt[0])) { 1773 case EVT_ID_TRANSLATION_FAULT: 1774 case EVT_ID_ADDR_SIZE_FAULT: 1775 case EVT_ID_ACCESS_FAULT: 1776 case EVT_ID_PERMISSION_FAULT: 1777 break; 1778 default: 1779 return -EOPNOTSUPP; 1780 } 1781 1782 if (!(evt[1] & EVTQ_1_STALL)) 1783 return -EOPNOTSUPP; 1784 1785 if (evt[1] & EVTQ_1_RnW) 1786 perm |= IOMMU_FAULT_PERM_READ; 1787 else 1788 perm |= IOMMU_FAULT_PERM_WRITE; 1789 1790 if (evt[1] & EVTQ_1_InD) 1791 perm |= IOMMU_FAULT_PERM_EXEC; 1792 1793 if (evt[1] & EVTQ_1_PnU) 1794 perm |= IOMMU_FAULT_PERM_PRIV; 1795 1796 flt->type = IOMMU_FAULT_PAGE_REQ; 1797 flt->prm = (struct iommu_fault_page_request) { 1798 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, 1799 .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]), 1800 .perm = perm, 1801 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), 1802 }; 1803 1804 if (ssid_valid) { 1805 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 1806 flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); 1807 } 1808 1809 mutex_lock(&smmu->streams_mutex); 1810 master = arm_smmu_find_master(smmu, sid); 1811 if (!master) { 1812 ret = -EINVAL; 1813 goto out_unlock; 1814 } 1815 1816 ret = iommu_report_device_fault(master->dev, &fault_evt); 1817 out_unlock: 1818 mutex_unlock(&smmu->streams_mutex); 1819 return ret; 1820 } 1821 1822 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) 1823 { 1824 int i, ret; 1825 struct arm_smmu_device *smmu = dev; 1826 struct arm_smmu_queue *q = &smmu->evtq.q; 1827 struct arm_smmu_ll_queue *llq = &q->llq; 1828 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, 1829 DEFAULT_RATELIMIT_BURST); 1830 u64 evt[EVTQ_ENT_DWORDS]; 1831 1832 do { 1833 while (!queue_remove_raw(q, evt)) { 1834 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); 1835 1836 ret = arm_smmu_handle_evt(smmu, evt); 1837 if (!ret || !__ratelimit(&rs)) 1838 continue; 1839 1840 dev_info(smmu->dev, "event 0x%02x received:\n", id); 1841 for (i = 0; i < ARRAY_SIZE(evt); ++i) 1842 dev_info(smmu->dev, "\t0x%016llx\n", 1843 (unsigned long long)evt[i]); 1844 1845 cond_resched(); 1846 } 1847 1848 /* 1849 * Not much we can do on overflow, so scream and pretend we're 1850 * trying harder. 1851 */ 1852 if (queue_sync_prod_in(q) == -EOVERFLOW) 1853 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n"); 1854 } while (!queue_empty(llq)); 1855 1856 /* Sync our overflow flag, as we believe we're up to speed */ 1857 queue_sync_cons_ovf(q); 1858 return IRQ_HANDLED; 1859 } 1860 1861 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt) 1862 { 1863 u32 sid, ssid; 1864 u16 grpid; 1865 bool ssv, last; 1866 1867 sid = FIELD_GET(PRIQ_0_SID, evt[0]); 1868 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]); 1869 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID; 1870 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]); 1871 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]); 1872 1873 dev_info(smmu->dev, "unexpected PRI request received:\n"); 1874 dev_info(smmu->dev, 1875 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n", 1876 sid, ssid, grpid, last ? "L" : "", 1877 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un", 1878 evt[0] & PRIQ_0_PERM_READ ? "R" : "", 1879 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "", 1880 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "", 1881 evt[1] & PRIQ_1_ADDR_MASK); 1882 1883 if (last) { 1884 struct arm_smmu_cmdq_ent cmd = { 1885 .opcode = CMDQ_OP_PRI_RESP, 1886 .substream_valid = ssv, 1887 .pri = { 1888 .sid = sid, 1889 .ssid = ssid, 1890 .grpid = grpid, 1891 .resp = PRI_RESP_DENY, 1892 }, 1893 }; 1894 1895 arm_smmu_cmdq_issue_cmd(smmu, &cmd); 1896 } 1897 } 1898 1899 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) 1900 { 1901 struct arm_smmu_device *smmu = dev; 1902 struct arm_smmu_queue *q = &smmu->priq.q; 1903 struct arm_smmu_ll_queue *llq = &q->llq; 1904 u64 evt[PRIQ_ENT_DWORDS]; 1905 1906 do { 1907 while (!queue_remove_raw(q, evt)) 1908 arm_smmu_handle_ppr(smmu, evt); 1909 1910 if (queue_sync_prod_in(q) == -EOVERFLOW) 1911 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n"); 1912 } while (!queue_empty(llq)); 1913 1914 /* Sync our overflow flag, as we believe we're up to speed */ 1915 queue_sync_cons_ovf(q); 1916 return IRQ_HANDLED; 1917 } 1918 1919 static int arm_smmu_device_disable(struct arm_smmu_device *smmu); 1920 1921 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) 1922 { 1923 u32 gerror, gerrorn, active; 1924 struct arm_smmu_device *smmu = dev; 1925 1926 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR); 1927 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN); 1928 1929 active = gerror ^ gerrorn; 1930 if (!(active & GERROR_ERR_MASK)) 1931 return IRQ_NONE; /* No errors pending */ 1932 1933 dev_warn(smmu->dev, 1934 "unexpected global error reported (0x%08x), this could be serious\n", 1935 active); 1936 1937 if (active & GERROR_SFM_ERR) { 1938 dev_err(smmu->dev, "device has entered Service Failure Mode!\n"); 1939 arm_smmu_device_disable(smmu); 1940 } 1941 1942 if (active & GERROR_MSI_GERROR_ABT_ERR) 1943 dev_warn(smmu->dev, "GERROR MSI write aborted\n"); 1944 1945 if (active & GERROR_MSI_PRIQ_ABT_ERR) 1946 dev_warn(smmu->dev, "PRIQ MSI write aborted\n"); 1947 1948 if (active & GERROR_MSI_EVTQ_ABT_ERR) 1949 dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); 1950 1951 if (active & GERROR_MSI_CMDQ_ABT_ERR) 1952 dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); 1953 1954 if (active & GERROR_PRIQ_ABT_ERR) 1955 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); 1956 1957 if (active & GERROR_EVTQ_ABT_ERR) 1958 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n"); 1959 1960 if (active & GERROR_CMDQ_ERR) 1961 arm_smmu_cmdq_skip_err(smmu); 1962 1963 writel(gerror, smmu->base + ARM_SMMU_GERRORN); 1964 return IRQ_HANDLED; 1965 } 1966 1967 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) 1968 { 1969 struct arm_smmu_device *smmu = dev; 1970 1971 arm_smmu_evtq_thread(irq, dev); 1972 if (smmu->features & ARM_SMMU_FEAT_PRI) 1973 arm_smmu_priq_thread(irq, dev); 1974 1975 return IRQ_HANDLED; 1976 } 1977 1978 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) 1979 { 1980 arm_smmu_gerror_handler(irq, dev); 1981 return IRQ_WAKE_THREAD; 1982 } 1983 1984 static void 1985 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, 1986 struct arm_smmu_cmdq_ent *cmd) 1987 { 1988 size_t log2_span; 1989 size_t span_mask; 1990 /* ATC invalidates are always on 4096-bytes pages */ 1991 size_t inval_grain_shift = 12; 1992 unsigned long page_start, page_end; 1993 1994 /* 1995 * ATS and PASID: 1996 * 1997 * If substream_valid is clear, the PCIe TLP is sent without a PASID 1998 * prefix. In that case all ATC entries within the address range are 1999 * invalidated, including those that were requested with a PASID! There 2000 * is no way to invalidate only entries without PASID. 2001 * 2002 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID 2003 * traffic), translation requests without PASID create ATC entries 2004 * without PASID, which must be invalidated with substream_valid clear. 2005 * This has the unpleasant side-effect of invalidating all PASID-tagged 2006 * ATC entries within the address range. 2007 */ 2008 *cmd = (struct arm_smmu_cmdq_ent) { 2009 .opcode = CMDQ_OP_ATC_INV, 2010 .substream_valid = (ssid != IOMMU_NO_PASID), 2011 .atc.ssid = ssid, 2012 }; 2013 2014 if (!size) { 2015 cmd->atc.size = ATC_INV_SIZE_ALL; 2016 return; 2017 } 2018 2019 page_start = iova >> inval_grain_shift; 2020 page_end = (iova + size - 1) >> inval_grain_shift; 2021 2022 /* 2023 * In an ATS Invalidate Request, the address must be aligned on the 2024 * range size, which must be a power of two number of page sizes. We 2025 * thus have to choose between grossly over-invalidating the region, or 2026 * splitting the invalidation into multiple commands. For simplicity 2027 * we'll go with the first solution, but should refine it in the future 2028 * if multiple commands are shown to be more efficient. 2029 * 2030 * Find the smallest power of two that covers the range. The most 2031 * significant differing bit between the start and end addresses, 2032 * fls(start ^ end), indicates the required span. For example: 2033 * 2034 * We want to invalidate pages [8; 11]. This is already the ideal range: 2035 * x = 0b1000 ^ 0b1011 = 0b11 2036 * span = 1 << fls(x) = 4 2037 * 2038 * To invalidate pages [7; 10], we need to invalidate [0; 15]: 2039 * x = 0b0111 ^ 0b1010 = 0b1101 2040 * span = 1 << fls(x) = 16 2041 */ 2042 log2_span = fls_long(page_start ^ page_end); 2043 span_mask = (1ULL << log2_span) - 1; 2044 2045 page_start &= ~span_mask; 2046 2047 cmd->atc.addr = page_start << inval_grain_shift; 2048 cmd->atc.size = log2_span; 2049 } 2050 2051 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, 2052 ioasid_t ssid) 2053 { 2054 int i; 2055 struct arm_smmu_cmdq_ent cmd; 2056 struct arm_smmu_cmdq_batch cmds; 2057 2058 arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd); 2059 2060 arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd); 2061 for (i = 0; i < master->num_streams; i++) { 2062 cmd.atc.sid = master->streams[i].id; 2063 arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); 2064 } 2065 2066 return arm_smmu_cmdq_batch_submit(master->smmu, &cmds); 2067 } 2068 2069 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, 2070 unsigned long iova, size_t size) 2071 { 2072 struct arm_smmu_master_domain *master_domain; 2073 int i; 2074 unsigned long flags; 2075 struct arm_smmu_cmdq_ent cmd = { 2076 .opcode = CMDQ_OP_ATC_INV, 2077 }; 2078 struct arm_smmu_cmdq_batch cmds; 2079 2080 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) 2081 return 0; 2082 2083 /* 2084 * Ensure that we've completed prior invalidation of the main TLBs 2085 * before we read 'nr_ats_masters' in case of a concurrent call to 2086 * arm_smmu_enable_ats(): 2087 * 2088 * // unmap() // arm_smmu_enable_ats() 2089 * TLBI+SYNC atomic_inc(&nr_ats_masters); 2090 * smp_mb(); [...] 2091 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel() 2092 * 2093 * Ensures that we always see the incremented 'nr_ats_masters' count if 2094 * ATS was enabled at the PCI device before completion of the TLBI. 2095 */ 2096 smp_mb(); 2097 if (!atomic_read(&smmu_domain->nr_ats_masters)) 2098 return 0; 2099 2100 arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd); 2101 2102 spin_lock_irqsave(&smmu_domain->devices_lock, flags); 2103 list_for_each_entry(master_domain, &smmu_domain->devices, 2104 devices_elm) { 2105 struct arm_smmu_master *master = master_domain->master; 2106 2107 if (!master->ats_enabled) 2108 continue; 2109 2110 if (master_domain->nested_ats_flush) { 2111 /* 2112 * If a S2 used as a nesting parent is changed we have 2113 * no option but to completely flush the ATC. 2114 */ 2115 arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); 2116 } else { 2117 arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, 2118 &cmd); 2119 } 2120 2121 for (i = 0; i < master->num_streams; i++) { 2122 cmd.atc.sid = master->streams[i].id; 2123 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd); 2124 } 2125 } 2126 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 2127 2128 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds); 2129 } 2130 2131 /* IO_PGTABLE API */ 2132 static void arm_smmu_tlb_inv_context(void *cookie) 2133 { 2134 struct arm_smmu_domain *smmu_domain = cookie; 2135 struct arm_smmu_device *smmu = smmu_domain->smmu; 2136 struct arm_smmu_cmdq_ent cmd; 2137 2138 /* 2139 * NOTE: when io-pgtable is in non-strict mode, we may get here with 2140 * PTEs previously cleared by unmaps on the current CPU not yet visible 2141 * to the SMMU. We are relying on the dma_wmb() implicit during cmd 2142 * insertion to guarantee those are observed before the TLBI. Do be 2143 * careful, 007. 2144 */ 2145 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 2146 arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid); 2147 } else { 2148 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL; 2149 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; 2150 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 2151 } 2152 arm_smmu_atc_inv_domain(smmu_domain, 0, 0); 2153 } 2154 2155 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, 2156 unsigned long iova, size_t size, 2157 size_t granule, 2158 struct arm_smmu_domain *smmu_domain) 2159 { 2160 struct arm_smmu_device *smmu = smmu_domain->smmu; 2161 unsigned long end = iova + size, num_pages = 0, tg = 0; 2162 size_t inv_range = granule; 2163 struct arm_smmu_cmdq_batch cmds; 2164 2165 if (!size) 2166 return; 2167 2168 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { 2169 /* Get the leaf page size */ 2170 tg = __ffs(smmu_domain->domain.pgsize_bitmap); 2171 2172 num_pages = size >> tg; 2173 2174 /* Convert page size of 12,14,16 (log2) to 1,2,3 */ 2175 cmd->tlbi.tg = (tg - 10) / 2; 2176 2177 /* 2178 * Determine what level the granule is at. For non-leaf, both 2179 * io-pgtable and SVA pass a nominal last-level granule because 2180 * they don't know what level(s) actually apply, so ignore that 2181 * and leave TTL=0. However for various errata reasons we still 2182 * want to use a range command, so avoid the SVA corner case 2183 * where both scale and num could be 0 as well. 2184 */ 2185 if (cmd->tlbi.leaf) 2186 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); 2187 else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1) 2188 num_pages++; 2189 } 2190 2191 arm_smmu_cmdq_batch_init(smmu, &cmds, cmd); 2192 2193 while (iova < end) { 2194 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { 2195 /* 2196 * On each iteration of the loop, the range is 5 bits 2197 * worth of the aligned size remaining. 2198 * The range in pages is: 2199 * 2200 * range = (num_pages & (0x1f << __ffs(num_pages))) 2201 */ 2202 unsigned long scale, num; 2203 2204 /* Determine the power of 2 multiple number of pages */ 2205 scale = __ffs(num_pages); 2206 cmd->tlbi.scale = scale; 2207 2208 /* Determine how many chunks of 2^scale size we have */ 2209 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX; 2210 cmd->tlbi.num = num - 1; 2211 2212 /* range is num * 2^scale * pgsize */ 2213 inv_range = num << (scale + tg); 2214 2215 /* Clear out the lower order bits for the next iteration */ 2216 num_pages -= num << scale; 2217 } 2218 2219 cmd->tlbi.addr = iova; 2220 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd); 2221 iova += inv_range; 2222 } 2223 arm_smmu_cmdq_batch_submit(smmu, &cmds); 2224 } 2225 2226 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, 2227 size_t granule, bool leaf, 2228 struct arm_smmu_domain *smmu_domain) 2229 { 2230 struct arm_smmu_cmdq_ent cmd = { 2231 .tlbi = { 2232 .leaf = leaf, 2233 }, 2234 }; 2235 2236 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 2237 cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? 2238 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA; 2239 cmd.tlbi.asid = smmu_domain->cd.asid; 2240 } else { 2241 cmd.opcode = CMDQ_OP_TLBI_S2_IPA; 2242 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; 2243 } 2244 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); 2245 2246 if (smmu_domain->nest_parent) { 2247 /* 2248 * When the S2 domain changes all the nested S1 ASIDs have to be 2249 * flushed too. 2250 */ 2251 cmd.opcode = CMDQ_OP_TLBI_NH_ALL; 2252 arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd); 2253 } 2254 2255 /* 2256 * Unfortunately, this can't be leaf-only since we may have 2257 * zapped an entire table. 2258 */ 2259 arm_smmu_atc_inv_domain(smmu_domain, iova, size); 2260 } 2261 2262 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, 2263 size_t granule, bool leaf, 2264 struct arm_smmu_domain *smmu_domain) 2265 { 2266 struct arm_smmu_cmdq_ent cmd = { 2267 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? 2268 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA, 2269 .tlbi = { 2270 .asid = asid, 2271 .leaf = leaf, 2272 }, 2273 }; 2274 2275 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); 2276 } 2277 2278 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, 2279 unsigned long iova, size_t granule, 2280 void *cookie) 2281 { 2282 struct arm_smmu_domain *smmu_domain = cookie; 2283 struct iommu_domain *domain = &smmu_domain->domain; 2284 2285 iommu_iotlb_gather_add_page(domain, gather, iova, granule); 2286 } 2287 2288 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, 2289 size_t granule, void *cookie) 2290 { 2291 arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie); 2292 } 2293 2294 static const struct iommu_flush_ops arm_smmu_flush_ops = { 2295 .tlb_flush_all = arm_smmu_tlb_inv_context, 2296 .tlb_flush_walk = arm_smmu_tlb_inv_walk, 2297 .tlb_add_page = arm_smmu_tlb_inv_page_nosync, 2298 }; 2299 2300 static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu) 2301 { 2302 u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY); 2303 2304 return (smmu->features & features) == features; 2305 } 2306 2307 /* IOMMU API */ 2308 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) 2309 { 2310 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 2311 2312 switch (cap) { 2313 case IOMMU_CAP_CACHE_COHERENCY: 2314 /* Assume that a coherent TCU implies coherent TBUs */ 2315 return master->smmu->features & ARM_SMMU_FEAT_COHERENCY; 2316 case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: 2317 return arm_smmu_master_canwbs(master); 2318 case IOMMU_CAP_NOEXEC: 2319 case IOMMU_CAP_DEFERRED_FLUSH: 2320 return true; 2321 case IOMMU_CAP_DIRTY_TRACKING: 2322 return arm_smmu_dbm_capable(master->smmu); 2323 default: 2324 return false; 2325 } 2326 } 2327 2328 static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain) 2329 { 2330 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2331 struct arm_smmu_master_domain *master_domain; 2332 unsigned long flags; 2333 bool ret = true; 2334 2335 spin_lock_irqsave(&smmu_domain->devices_lock, flags); 2336 list_for_each_entry(master_domain, &smmu_domain->devices, 2337 devices_elm) { 2338 if (!arm_smmu_master_canwbs(master_domain->master)) { 2339 ret = false; 2340 break; 2341 } 2342 } 2343 smmu_domain->enforce_cache_coherency = ret; 2344 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 2345 return ret; 2346 } 2347 2348 struct arm_smmu_domain *arm_smmu_domain_alloc(void) 2349 { 2350 struct arm_smmu_domain *smmu_domain; 2351 2352 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL); 2353 if (!smmu_domain) 2354 return ERR_PTR(-ENOMEM); 2355 2356 mutex_init(&smmu_domain->init_mutex); 2357 INIT_LIST_HEAD(&smmu_domain->devices); 2358 spin_lock_init(&smmu_domain->devices_lock); 2359 2360 return smmu_domain; 2361 } 2362 2363 static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) 2364 { 2365 struct arm_smmu_domain *smmu_domain; 2366 2367 /* 2368 * Allocate the domain and initialise some of its data structures. 2369 * We can't really do anything meaningful until we've added a 2370 * master. 2371 */ 2372 smmu_domain = arm_smmu_domain_alloc(); 2373 if (IS_ERR(smmu_domain)) 2374 return ERR_CAST(smmu_domain); 2375 2376 if (dev) { 2377 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 2378 int ret; 2379 2380 ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0); 2381 if (ret) { 2382 kfree(smmu_domain); 2383 return ERR_PTR(ret); 2384 } 2385 } 2386 return &smmu_domain->domain; 2387 } 2388 2389 static void arm_smmu_domain_free_paging(struct iommu_domain *domain) 2390 { 2391 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2392 struct arm_smmu_device *smmu = smmu_domain->smmu; 2393 2394 free_io_pgtable_ops(smmu_domain->pgtbl_ops); 2395 2396 /* Free the ASID or VMID */ 2397 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 2398 /* Prevent SVA from touching the CD while we're freeing it */ 2399 mutex_lock(&arm_smmu_asid_lock); 2400 xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); 2401 mutex_unlock(&arm_smmu_asid_lock); 2402 } else { 2403 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; 2404 if (cfg->vmid) 2405 ida_free(&smmu->vmid_map, cfg->vmid); 2406 } 2407 2408 kfree(smmu_domain); 2409 } 2410 2411 static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, 2412 struct arm_smmu_domain *smmu_domain) 2413 { 2414 int ret; 2415 u32 asid = 0; 2416 struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; 2417 2418 /* Prevent SVA from modifying the ASID until it is written to the CD */ 2419 mutex_lock(&arm_smmu_asid_lock); 2420 ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, 2421 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); 2422 cd->asid = (u16)asid; 2423 mutex_unlock(&arm_smmu_asid_lock); 2424 return ret; 2425 } 2426 2427 static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu, 2428 struct arm_smmu_domain *smmu_domain) 2429 { 2430 int vmid; 2431 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; 2432 2433 /* Reserve VMID 0 for stage-2 bypass STEs */ 2434 vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1, 2435 GFP_KERNEL); 2436 if (vmid < 0) 2437 return vmid; 2438 2439 cfg->vmid = (u16)vmid; 2440 return 0; 2441 } 2442 2443 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, 2444 struct arm_smmu_device *smmu, u32 flags) 2445 { 2446 int ret; 2447 enum io_pgtable_fmt fmt; 2448 struct io_pgtable_cfg pgtbl_cfg; 2449 struct io_pgtable_ops *pgtbl_ops; 2450 int (*finalise_stage_fn)(struct arm_smmu_device *smmu, 2451 struct arm_smmu_domain *smmu_domain); 2452 bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; 2453 2454 /* Restrict the stage to what we can actually support */ 2455 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) 2456 smmu_domain->stage = ARM_SMMU_DOMAIN_S2; 2457 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2)) 2458 smmu_domain->stage = ARM_SMMU_DOMAIN_S1; 2459 2460 pgtbl_cfg = (struct io_pgtable_cfg) { 2461 .pgsize_bitmap = smmu->pgsize_bitmap, 2462 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, 2463 .tlb = &arm_smmu_flush_ops, 2464 .iommu_dev = smmu->dev, 2465 }; 2466 2467 switch (smmu_domain->stage) { 2468 case ARM_SMMU_DOMAIN_S1: { 2469 unsigned long ias = (smmu->features & 2470 ARM_SMMU_FEAT_VAX) ? 52 : 48; 2471 2472 pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS); 2473 pgtbl_cfg.oas = smmu->ias; 2474 if (enable_dirty) 2475 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD; 2476 fmt = ARM_64_LPAE_S1; 2477 finalise_stage_fn = arm_smmu_domain_finalise_s1; 2478 break; 2479 } 2480 case ARM_SMMU_DOMAIN_S2: 2481 if (enable_dirty) 2482 return -EOPNOTSUPP; 2483 pgtbl_cfg.ias = smmu->ias; 2484 pgtbl_cfg.oas = smmu->oas; 2485 fmt = ARM_64_LPAE_S2; 2486 finalise_stage_fn = arm_smmu_domain_finalise_s2; 2487 if ((smmu->features & ARM_SMMU_FEAT_S2FWB) && 2488 (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) 2489 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB; 2490 break; 2491 default: 2492 return -EINVAL; 2493 } 2494 2495 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); 2496 if (!pgtbl_ops) 2497 return -ENOMEM; 2498 2499 smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; 2500 smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; 2501 smmu_domain->domain.geometry.force_aperture = true; 2502 if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1) 2503 smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops; 2504 2505 ret = finalise_stage_fn(smmu, smmu_domain); 2506 if (ret < 0) { 2507 free_io_pgtable_ops(pgtbl_ops); 2508 return ret; 2509 } 2510 2511 smmu_domain->pgtbl_ops = pgtbl_ops; 2512 smmu_domain->smmu = smmu; 2513 return 0; 2514 } 2515 2516 static struct arm_smmu_ste * 2517 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) 2518 { 2519 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 2520 2521 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { 2522 /* Two-level walk */ 2523 return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)] 2524 ->stes[arm_smmu_strtab_l2_idx(sid)]; 2525 } else { 2526 /* Simple linear lookup */ 2527 return &cfg->linear.table[sid]; 2528 } 2529 } 2530 2531 void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master, 2532 const struct arm_smmu_ste *target) 2533 { 2534 int i, j; 2535 struct arm_smmu_device *smmu = master->smmu; 2536 2537 master->cd_table.in_ste = 2538 FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) == 2539 STRTAB_STE_0_CFG_S1_TRANS; 2540 master->ste_ats_enabled = 2541 FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) == 2542 STRTAB_STE_1_EATS_TRANS; 2543 2544 for (i = 0; i < master->num_streams; ++i) { 2545 u32 sid = master->streams[i].id; 2546 struct arm_smmu_ste *step = 2547 arm_smmu_get_step_for_sid(smmu, sid); 2548 2549 /* Bridged PCI devices may end up with duplicated IDs */ 2550 for (j = 0; j < i; j++) 2551 if (master->streams[j].id == sid) 2552 break; 2553 if (j < i) 2554 continue; 2555 2556 arm_smmu_write_ste(master, sid, step, target); 2557 } 2558 } 2559 2560 static bool arm_smmu_ats_supported(struct arm_smmu_master *master) 2561 { 2562 struct device *dev = master->dev; 2563 struct arm_smmu_device *smmu = master->smmu; 2564 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2565 2566 if (!(smmu->features & ARM_SMMU_FEAT_ATS)) 2567 return false; 2568 2569 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS)) 2570 return false; 2571 2572 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)); 2573 } 2574 2575 static void arm_smmu_enable_ats(struct arm_smmu_master *master) 2576 { 2577 size_t stu; 2578 struct pci_dev *pdev; 2579 struct arm_smmu_device *smmu = master->smmu; 2580 2581 /* Smallest Translation Unit: log2 of the smallest supported granule */ 2582 stu = __ffs(smmu->pgsize_bitmap); 2583 pdev = to_pci_dev(master->dev); 2584 2585 /* 2586 * ATC invalidation of PASID 0 causes the entire ATC to be flushed. 2587 */ 2588 arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); 2589 if (pci_enable_ats(pdev, stu)) 2590 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); 2591 } 2592 2593 static int arm_smmu_enable_pasid(struct arm_smmu_master *master) 2594 { 2595 int ret; 2596 int features; 2597 int num_pasids; 2598 struct pci_dev *pdev; 2599 2600 if (!dev_is_pci(master->dev)) 2601 return -ENODEV; 2602 2603 pdev = to_pci_dev(master->dev); 2604 2605 features = pci_pasid_features(pdev); 2606 if (features < 0) 2607 return features; 2608 2609 num_pasids = pci_max_pasids(pdev); 2610 if (num_pasids <= 0) 2611 return num_pasids; 2612 2613 ret = pci_enable_pasid(pdev, features); 2614 if (ret) { 2615 dev_err(&pdev->dev, "Failed to enable PASID\n"); 2616 return ret; 2617 } 2618 2619 master->ssid_bits = min_t(u8, ilog2(num_pasids), 2620 master->smmu->ssid_bits); 2621 return 0; 2622 } 2623 2624 static void arm_smmu_disable_pasid(struct arm_smmu_master *master) 2625 { 2626 struct pci_dev *pdev; 2627 2628 if (!dev_is_pci(master->dev)) 2629 return; 2630 2631 pdev = to_pci_dev(master->dev); 2632 2633 if (!pdev->pasid_enabled) 2634 return; 2635 2636 master->ssid_bits = 0; 2637 pci_disable_pasid(pdev); 2638 } 2639 2640 static struct arm_smmu_master_domain * 2641 arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, 2642 struct arm_smmu_master *master, 2643 ioasid_t ssid, bool nested_ats_flush) 2644 { 2645 struct arm_smmu_master_domain *master_domain; 2646 2647 lockdep_assert_held(&smmu_domain->devices_lock); 2648 2649 list_for_each_entry(master_domain, &smmu_domain->devices, 2650 devices_elm) { 2651 if (master_domain->master == master && 2652 master_domain->ssid == ssid && 2653 master_domain->nested_ats_flush == nested_ats_flush) 2654 return master_domain; 2655 } 2656 return NULL; 2657 } 2658 2659 /* 2660 * If the domain uses the smmu_domain->devices list return the arm_smmu_domain 2661 * structure, otherwise NULL. These domains track attached devices so they can 2662 * issue invalidations. 2663 */ 2664 static struct arm_smmu_domain * 2665 to_smmu_domain_devices(struct iommu_domain *domain) 2666 { 2667 /* The domain can be NULL only when processing the first attach */ 2668 if (!domain) 2669 return NULL; 2670 if ((domain->type & __IOMMU_DOMAIN_PAGING) || 2671 domain->type == IOMMU_DOMAIN_SVA) 2672 return to_smmu_domain(domain); 2673 if (domain->type == IOMMU_DOMAIN_NESTED) 2674 return to_smmu_nested_domain(domain)->vsmmu->s2_parent; 2675 return NULL; 2676 } 2677 2678 static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, 2679 struct iommu_domain *domain, 2680 ioasid_t ssid) 2681 { 2682 struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain); 2683 struct arm_smmu_master_domain *master_domain; 2684 bool nested_ats_flush = false; 2685 unsigned long flags; 2686 2687 if (!smmu_domain) 2688 return; 2689 2690 if (domain->type == IOMMU_DOMAIN_NESTED) 2691 nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats; 2692 2693 spin_lock_irqsave(&smmu_domain->devices_lock, flags); 2694 master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid, 2695 nested_ats_flush); 2696 if (master_domain) { 2697 list_del(&master_domain->devices_elm); 2698 kfree(master_domain); 2699 if (master->ats_enabled) 2700 atomic_dec(&smmu_domain->nr_ats_masters); 2701 } 2702 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 2703 } 2704 2705 /* 2706 * Start the sequence to attach a domain to a master. The sequence contains three 2707 * steps: 2708 * arm_smmu_attach_prepare() 2709 * arm_smmu_install_ste_for_dev() 2710 * arm_smmu_attach_commit() 2711 * 2712 * If prepare succeeds then the sequence must be completed. The STE installed 2713 * must set the STE.EATS field according to state.ats_enabled. 2714 * 2715 * If the device supports ATS then this determines if EATS should be enabled 2716 * in the STE, and starts sequencing EATS disable if required. 2717 * 2718 * The change of the EATS in the STE and the PCI ATS config space is managed by 2719 * this sequence to be in the right order so that if PCI ATS is enabled then 2720 * STE.ETAS is enabled. 2721 * 2722 * new_domain can be a non-paging domain. In this case ATS will not be enabled, 2723 * and invalidations won't be tracked. 2724 */ 2725 int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, 2726 struct iommu_domain *new_domain) 2727 { 2728 struct arm_smmu_master *master = state->master; 2729 struct arm_smmu_master_domain *master_domain; 2730 struct arm_smmu_domain *smmu_domain = 2731 to_smmu_domain_devices(new_domain); 2732 unsigned long flags; 2733 2734 /* 2735 * arm_smmu_share_asid() must not see two domains pointing to the same 2736 * arm_smmu_master_domain contents otherwise it could randomly write one 2737 * or the other to the CD. 2738 */ 2739 lockdep_assert_held(&arm_smmu_asid_lock); 2740 2741 if (smmu_domain || state->cd_needs_ats) { 2742 /* 2743 * The SMMU does not support enabling ATS with bypass/abort. 2744 * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS 2745 * Translation Requests and Translated transactions are denied 2746 * as though ATS is disabled for the stream (STE.EATS == 0b00), 2747 * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events 2748 * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be 2749 * enabled if we have arm_smmu_domain, those always have page 2750 * tables. 2751 */ 2752 state->ats_enabled = !state->disable_ats && 2753 arm_smmu_ats_supported(master); 2754 } 2755 2756 if (smmu_domain) { 2757 master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); 2758 if (!master_domain) 2759 return -ENOMEM; 2760 master_domain->master = master; 2761 master_domain->ssid = state->ssid; 2762 if (new_domain->type == IOMMU_DOMAIN_NESTED) 2763 master_domain->nested_ats_flush = 2764 to_smmu_nested_domain(new_domain)->enable_ats; 2765 2766 /* 2767 * During prepare we want the current smmu_domain and new 2768 * smmu_domain to be in the devices list before we change any 2769 * HW. This ensures that both domains will send ATS 2770 * invalidations to the master until we are done. 2771 * 2772 * It is tempting to make this list only track masters that are 2773 * using ATS, but arm_smmu_share_asid() also uses this to change 2774 * the ASID of a domain, unrelated to ATS. 2775 * 2776 * Notice if we are re-attaching the same domain then the list 2777 * will have two identical entries and commit will remove only 2778 * one of them. 2779 */ 2780 spin_lock_irqsave(&smmu_domain->devices_lock, flags); 2781 if (smmu_domain->enforce_cache_coherency && 2782 !arm_smmu_master_canwbs(master)) { 2783 spin_unlock_irqrestore(&smmu_domain->devices_lock, 2784 flags); 2785 kfree(master_domain); 2786 return -EINVAL; 2787 } 2788 2789 if (state->ats_enabled) 2790 atomic_inc(&smmu_domain->nr_ats_masters); 2791 list_add(&master_domain->devices_elm, &smmu_domain->devices); 2792 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 2793 } 2794 2795 if (!state->ats_enabled && master->ats_enabled) { 2796 pci_disable_ats(to_pci_dev(master->dev)); 2797 /* 2798 * This is probably overkill, but the config write for disabling 2799 * ATS should complete before the STE is configured to generate 2800 * UR to avoid AER noise. 2801 */ 2802 wmb(); 2803 } 2804 return 0; 2805 } 2806 2807 /* 2808 * Commit is done after the STE/CD are configured with the EATS setting. It 2809 * completes synchronizing the PCI device's ATC and finishes manipulating the 2810 * smmu_domain->devices list. 2811 */ 2812 void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) 2813 { 2814 struct arm_smmu_master *master = state->master; 2815 2816 lockdep_assert_held(&arm_smmu_asid_lock); 2817 2818 if (state->ats_enabled && !master->ats_enabled) { 2819 arm_smmu_enable_ats(master); 2820 } else if (state->ats_enabled && master->ats_enabled) { 2821 /* 2822 * The translation has changed, flush the ATC. At this point the 2823 * SMMU is translating for the new domain and both the old&new 2824 * domain will issue invalidations. 2825 */ 2826 arm_smmu_atc_inv_master(master, state->ssid); 2827 } else if (!state->ats_enabled && master->ats_enabled) { 2828 /* ATS is being switched off, invalidate the entire ATC */ 2829 arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); 2830 } 2831 master->ats_enabled = state->ats_enabled; 2832 2833 arm_smmu_remove_master_domain(master, state->old_domain, state->ssid); 2834 } 2835 2836 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) 2837 { 2838 int ret = 0; 2839 struct arm_smmu_ste target; 2840 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2841 struct arm_smmu_device *smmu; 2842 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2843 struct arm_smmu_attach_state state = { 2844 .old_domain = iommu_get_domain_for_dev(dev), 2845 .ssid = IOMMU_NO_PASID, 2846 }; 2847 struct arm_smmu_master *master; 2848 struct arm_smmu_cd *cdptr; 2849 2850 if (!fwspec) 2851 return -ENOENT; 2852 2853 state.master = master = dev_iommu_priv_get(dev); 2854 smmu = master->smmu; 2855 2856 mutex_lock(&smmu_domain->init_mutex); 2857 2858 if (!smmu_domain->smmu) { 2859 ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); 2860 } else if (smmu_domain->smmu != smmu) 2861 ret = -EINVAL; 2862 2863 mutex_unlock(&smmu_domain->init_mutex); 2864 if (ret) 2865 return ret; 2866 2867 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 2868 cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); 2869 if (!cdptr) 2870 return -ENOMEM; 2871 } else if (arm_smmu_ssids_in_use(&master->cd_table)) 2872 return -EBUSY; 2873 2874 /* 2875 * Prevent arm_smmu_share_asid() from trying to change the ASID 2876 * of either the old or new domain while we are working on it. 2877 * This allows the STE and the smmu_domain->devices list to 2878 * be inconsistent during this routine. 2879 */ 2880 mutex_lock(&arm_smmu_asid_lock); 2881 2882 ret = arm_smmu_attach_prepare(&state, domain); 2883 if (ret) { 2884 mutex_unlock(&arm_smmu_asid_lock); 2885 return ret; 2886 } 2887 2888 switch (smmu_domain->stage) { 2889 case ARM_SMMU_DOMAIN_S1: { 2890 struct arm_smmu_cd target_cd; 2891 2892 arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); 2893 arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, 2894 &target_cd); 2895 arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled, 2896 STRTAB_STE_1_S1DSS_SSID0); 2897 arm_smmu_install_ste_for_dev(master, &target); 2898 break; 2899 } 2900 case ARM_SMMU_DOMAIN_S2: 2901 arm_smmu_make_s2_domain_ste(&target, master, smmu_domain, 2902 state.ats_enabled); 2903 arm_smmu_install_ste_for_dev(master, &target); 2904 arm_smmu_clear_cd(master, IOMMU_NO_PASID); 2905 break; 2906 } 2907 2908 arm_smmu_attach_commit(&state); 2909 mutex_unlock(&arm_smmu_asid_lock); 2910 return 0; 2911 } 2912 2913 static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain, 2914 struct device *dev, ioasid_t id, 2915 struct iommu_domain *old) 2916 { 2917 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2918 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 2919 struct arm_smmu_device *smmu = master->smmu; 2920 struct arm_smmu_cd target_cd; 2921 int ret = 0; 2922 2923 mutex_lock(&smmu_domain->init_mutex); 2924 if (!smmu_domain->smmu) 2925 ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); 2926 else if (smmu_domain->smmu != smmu) 2927 ret = -EINVAL; 2928 mutex_unlock(&smmu_domain->init_mutex); 2929 if (ret) 2930 return ret; 2931 2932 if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) 2933 return -EINVAL; 2934 2935 /* 2936 * We can read cd.asid outside the lock because arm_smmu_set_pasid() 2937 * will fix it 2938 */ 2939 arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); 2940 return arm_smmu_set_pasid(master, to_smmu_domain(domain), id, 2941 &target_cd, old); 2942 } 2943 2944 static void arm_smmu_update_ste(struct arm_smmu_master *master, 2945 struct iommu_domain *sid_domain, 2946 bool ats_enabled) 2947 { 2948 unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE; 2949 struct arm_smmu_ste ste; 2950 2951 if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled) 2952 return; 2953 2954 if (sid_domain->type == IOMMU_DOMAIN_IDENTITY) 2955 s1dss = STRTAB_STE_1_S1DSS_BYPASS; 2956 else 2957 WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED); 2958 2959 /* 2960 * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior 2961 * using s1dss if necessary. If the cd_table is already installed then 2962 * the S1DSS is correct and this will just update the EATS. Otherwise it 2963 * installs the entire thing. This will be hitless. 2964 */ 2965 arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss); 2966 arm_smmu_install_ste_for_dev(master, &ste); 2967 } 2968 2969 int arm_smmu_set_pasid(struct arm_smmu_master *master, 2970 struct arm_smmu_domain *smmu_domain, ioasid_t pasid, 2971 struct arm_smmu_cd *cd, struct iommu_domain *old) 2972 { 2973 struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev); 2974 struct arm_smmu_attach_state state = { 2975 .master = master, 2976 .ssid = pasid, 2977 .old_domain = old, 2978 }; 2979 struct arm_smmu_cd *cdptr; 2980 int ret; 2981 2982 /* The core code validates pasid */ 2983 2984 if (smmu_domain->smmu != master->smmu) 2985 return -EINVAL; 2986 2987 if (!master->cd_table.in_ste && 2988 sid_domain->type != IOMMU_DOMAIN_IDENTITY && 2989 sid_domain->type != IOMMU_DOMAIN_BLOCKED) 2990 return -EINVAL; 2991 2992 cdptr = arm_smmu_alloc_cd_ptr(master, pasid); 2993 if (!cdptr) 2994 return -ENOMEM; 2995 2996 mutex_lock(&arm_smmu_asid_lock); 2997 ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain); 2998 if (ret) 2999 goto out_unlock; 3000 3001 /* 3002 * We don't want to obtain to the asid_lock too early, so fix up the 3003 * caller set ASID under the lock in case it changed. 3004 */ 3005 cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID); 3006 cd->data[0] |= cpu_to_le64( 3007 FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid)); 3008 3009 arm_smmu_write_cd_entry(master, pasid, cdptr, cd); 3010 arm_smmu_update_ste(master, sid_domain, state.ats_enabled); 3011 3012 arm_smmu_attach_commit(&state); 3013 3014 out_unlock: 3015 mutex_unlock(&arm_smmu_asid_lock); 3016 return ret; 3017 } 3018 3019 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, 3020 struct iommu_domain *domain) 3021 { 3022 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3023 struct arm_smmu_domain *smmu_domain; 3024 3025 smmu_domain = to_smmu_domain(domain); 3026 3027 mutex_lock(&arm_smmu_asid_lock); 3028 arm_smmu_clear_cd(master, pasid); 3029 if (master->ats_enabled) 3030 arm_smmu_atc_inv_master(master, pasid); 3031 arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid); 3032 mutex_unlock(&arm_smmu_asid_lock); 3033 3034 /* 3035 * When the last user of the CD table goes away downgrade the STE back 3036 * to a non-cd_table one. 3037 */ 3038 if (!arm_smmu_ssids_in_use(&master->cd_table)) { 3039 struct iommu_domain *sid_domain = 3040 iommu_get_domain_for_dev(master->dev); 3041 3042 if (sid_domain->type == IOMMU_DOMAIN_IDENTITY || 3043 sid_domain->type == IOMMU_DOMAIN_BLOCKED) 3044 sid_domain->ops->attach_dev(sid_domain, dev); 3045 } 3046 } 3047 3048 static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, 3049 struct device *dev, 3050 struct arm_smmu_ste *ste, 3051 unsigned int s1dss) 3052 { 3053 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3054 struct arm_smmu_attach_state state = { 3055 .master = master, 3056 .old_domain = iommu_get_domain_for_dev(dev), 3057 .ssid = IOMMU_NO_PASID, 3058 }; 3059 3060 /* 3061 * Do not allow any ASID to be changed while are working on the STE, 3062 * otherwise we could miss invalidations. 3063 */ 3064 mutex_lock(&arm_smmu_asid_lock); 3065 3066 /* 3067 * If the CD table is not in use we can use the provided STE, otherwise 3068 * we use a cdtable STE with the provided S1DSS. 3069 */ 3070 if (arm_smmu_ssids_in_use(&master->cd_table)) { 3071 /* 3072 * If a CD table has to be present then we need to run with ATS 3073 * on even though the RID will fail ATS queries with UR. This is 3074 * because we have no idea what the PASID's need. 3075 */ 3076 state.cd_needs_ats = true; 3077 arm_smmu_attach_prepare(&state, domain); 3078 arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss); 3079 } else { 3080 arm_smmu_attach_prepare(&state, domain); 3081 } 3082 arm_smmu_install_ste_for_dev(master, ste); 3083 arm_smmu_attach_commit(&state); 3084 mutex_unlock(&arm_smmu_asid_lock); 3085 3086 /* 3087 * This has to be done after removing the master from the 3088 * arm_smmu_domain->devices to avoid races updating the same context 3089 * descriptor from arm_smmu_share_asid(). 3090 */ 3091 arm_smmu_clear_cd(master, IOMMU_NO_PASID); 3092 } 3093 3094 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, 3095 struct device *dev) 3096 { 3097 struct arm_smmu_ste ste; 3098 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3099 3100 arm_smmu_make_bypass_ste(master->smmu, &ste); 3101 arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS); 3102 return 0; 3103 } 3104 3105 static const struct iommu_domain_ops arm_smmu_identity_ops = { 3106 .attach_dev = arm_smmu_attach_dev_identity, 3107 }; 3108 3109 static struct iommu_domain arm_smmu_identity_domain = { 3110 .type = IOMMU_DOMAIN_IDENTITY, 3111 .ops = &arm_smmu_identity_ops, 3112 }; 3113 3114 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, 3115 struct device *dev) 3116 { 3117 struct arm_smmu_ste ste; 3118 3119 arm_smmu_make_abort_ste(&ste); 3120 arm_smmu_attach_dev_ste(domain, dev, &ste, 3121 STRTAB_STE_1_S1DSS_TERMINATE); 3122 return 0; 3123 } 3124 3125 static const struct iommu_domain_ops arm_smmu_blocked_ops = { 3126 .attach_dev = arm_smmu_attach_dev_blocked, 3127 }; 3128 3129 static struct iommu_domain arm_smmu_blocked_domain = { 3130 .type = IOMMU_DOMAIN_BLOCKED, 3131 .ops = &arm_smmu_blocked_ops, 3132 }; 3133 3134 static struct iommu_domain * 3135 arm_smmu_domain_alloc_user(struct device *dev, u32 flags, 3136 struct iommu_domain *parent, 3137 const struct iommu_user_data *user_data) 3138 { 3139 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3140 const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | 3141 IOMMU_HWPT_ALLOC_PASID | 3142 IOMMU_HWPT_ALLOC_NEST_PARENT; 3143 struct arm_smmu_domain *smmu_domain; 3144 int ret; 3145 3146 if (flags & ~PAGING_FLAGS) 3147 return ERR_PTR(-EOPNOTSUPP); 3148 if (parent || user_data) 3149 return ERR_PTR(-EOPNOTSUPP); 3150 3151 if (flags & IOMMU_HWPT_ALLOC_PASID) 3152 return arm_smmu_domain_alloc_paging(dev); 3153 3154 smmu_domain = arm_smmu_domain_alloc(); 3155 if (IS_ERR(smmu_domain)) 3156 return ERR_CAST(smmu_domain); 3157 3158 if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) { 3159 if (!(master->smmu->features & ARM_SMMU_FEAT_NESTING)) { 3160 ret = -EOPNOTSUPP; 3161 goto err_free; 3162 } 3163 smmu_domain->stage = ARM_SMMU_DOMAIN_S2; 3164 smmu_domain->nest_parent = true; 3165 } 3166 3167 smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; 3168 smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; 3169 ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags); 3170 if (ret) 3171 goto err_free; 3172 return &smmu_domain->domain; 3173 3174 err_free: 3175 kfree(smmu_domain); 3176 return ERR_PTR(ret); 3177 } 3178 3179 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova, 3180 phys_addr_t paddr, size_t pgsize, size_t pgcount, 3181 int prot, gfp_t gfp, size_t *mapped) 3182 { 3183 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 3184 3185 if (!ops) 3186 return -ENODEV; 3187 3188 return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped); 3189 } 3190 3191 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova, 3192 size_t pgsize, size_t pgcount, 3193 struct iommu_iotlb_gather *gather) 3194 { 3195 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 3196 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; 3197 3198 if (!ops) 3199 return 0; 3200 3201 return ops->unmap_pages(ops, iova, pgsize, pgcount, gather); 3202 } 3203 3204 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) 3205 { 3206 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 3207 3208 if (smmu_domain->smmu) 3209 arm_smmu_tlb_inv_context(smmu_domain); 3210 } 3211 3212 static void arm_smmu_iotlb_sync(struct iommu_domain *domain, 3213 struct iommu_iotlb_gather *gather) 3214 { 3215 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 3216 3217 if (!gather->pgsize) 3218 return; 3219 3220 arm_smmu_tlb_inv_range_domain(gather->start, 3221 gather->end - gather->start + 1, 3222 gather->pgsize, true, smmu_domain); 3223 } 3224 3225 static phys_addr_t 3226 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 3227 { 3228 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 3229 3230 if (!ops) 3231 return 0; 3232 3233 return ops->iova_to_phys(ops, iova); 3234 } 3235 3236 static struct platform_driver arm_smmu_driver; 3237 3238 static 3239 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) 3240 { 3241 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver, 3242 fwnode); 3243 put_device(dev); 3244 return dev ? dev_get_drvdata(dev) : NULL; 3245 } 3246 3247 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid) 3248 { 3249 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) 3250 return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents; 3251 return sid < smmu->strtab_cfg.linear.num_ents; 3252 } 3253 3254 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid) 3255 { 3256 /* Check the SIDs are in range of the SMMU and our stream table */ 3257 if (!arm_smmu_sid_in_range(smmu, sid)) 3258 return -ERANGE; 3259 3260 /* Ensure l2 strtab is initialised */ 3261 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) 3262 return arm_smmu_init_l2_strtab(smmu, sid); 3263 3264 return 0; 3265 } 3266 3267 static int arm_smmu_insert_master(struct arm_smmu_device *smmu, 3268 struct arm_smmu_master *master) 3269 { 3270 int i; 3271 int ret = 0; 3272 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); 3273 3274 master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams), 3275 GFP_KERNEL); 3276 if (!master->streams) 3277 return -ENOMEM; 3278 master->num_streams = fwspec->num_ids; 3279 3280 mutex_lock(&smmu->streams_mutex); 3281 for (i = 0; i < fwspec->num_ids; i++) { 3282 struct arm_smmu_stream *new_stream = &master->streams[i]; 3283 u32 sid = fwspec->ids[i]; 3284 3285 new_stream->id = sid; 3286 new_stream->master = master; 3287 3288 ret = arm_smmu_init_sid_strtab(smmu, sid); 3289 if (ret) 3290 break; 3291 3292 /* Insert into SID tree */ 3293 if (rb_find_add(&new_stream->node, &smmu->streams, 3294 arm_smmu_streams_cmp_node)) { 3295 dev_warn(master->dev, "stream %u already in tree\n", 3296 sid); 3297 ret = -EINVAL; 3298 break; 3299 } 3300 } 3301 3302 if (ret) { 3303 for (i--; i >= 0; i--) 3304 rb_erase(&master->streams[i].node, &smmu->streams); 3305 kfree(master->streams); 3306 } 3307 mutex_unlock(&smmu->streams_mutex); 3308 3309 return ret; 3310 } 3311 3312 static void arm_smmu_remove_master(struct arm_smmu_master *master) 3313 { 3314 int i; 3315 struct arm_smmu_device *smmu = master->smmu; 3316 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); 3317 3318 if (!smmu || !master->streams) 3319 return; 3320 3321 mutex_lock(&smmu->streams_mutex); 3322 for (i = 0; i < fwspec->num_ids; i++) 3323 rb_erase(&master->streams[i].node, &smmu->streams); 3324 mutex_unlock(&smmu->streams_mutex); 3325 3326 kfree(master->streams); 3327 } 3328 3329 static struct iommu_device *arm_smmu_probe_device(struct device *dev) 3330 { 3331 int ret; 3332 struct arm_smmu_device *smmu; 3333 struct arm_smmu_master *master; 3334 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3335 3336 if (WARN_ON_ONCE(dev_iommu_priv_get(dev))) 3337 return ERR_PTR(-EBUSY); 3338 3339 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); 3340 if (!smmu) 3341 return ERR_PTR(-ENODEV); 3342 3343 master = kzalloc(sizeof(*master), GFP_KERNEL); 3344 if (!master) 3345 return ERR_PTR(-ENOMEM); 3346 3347 master->dev = dev; 3348 master->smmu = smmu; 3349 dev_iommu_priv_set(dev, master); 3350 3351 ret = arm_smmu_insert_master(smmu, master); 3352 if (ret) 3353 goto err_free_master; 3354 3355 device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits); 3356 master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits); 3357 3358 /* 3359 * Note that PASID must be enabled before, and disabled after ATS: 3360 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register 3361 * 3362 * Behavior is undefined if this bit is Set and the value of the PASID 3363 * Enable, Execute Requested Enable, or Privileged Mode Requested bits 3364 * are changed. 3365 */ 3366 arm_smmu_enable_pasid(master); 3367 3368 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB)) 3369 master->ssid_bits = min_t(u8, master->ssid_bits, 3370 CTXDESC_LINEAR_CDMAX); 3371 3372 if ((smmu->features & ARM_SMMU_FEAT_STALLS && 3373 device_property_read_bool(dev, "dma-can-stall")) || 3374 smmu->features & ARM_SMMU_FEAT_STALL_FORCE) 3375 master->stall_enabled = true; 3376 3377 if (dev_is_pci(dev)) { 3378 unsigned int stu = __ffs(smmu->pgsize_bitmap); 3379 3380 pci_prepare_ats(to_pci_dev(dev), stu); 3381 } 3382 3383 return &smmu->iommu; 3384 3385 err_free_master: 3386 kfree(master); 3387 return ERR_PTR(ret); 3388 } 3389 3390 static void arm_smmu_release_device(struct device *dev) 3391 { 3392 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3393 3394 if (WARN_ON(arm_smmu_master_sva_enabled(master))) 3395 iopf_queue_remove_device(master->smmu->evtq.iopf, dev); 3396 3397 /* Put the STE back to what arm_smmu_init_strtab() sets */ 3398 if (dev->iommu->require_direct) 3399 arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev); 3400 else 3401 arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev); 3402 3403 arm_smmu_disable_pasid(master); 3404 arm_smmu_remove_master(master); 3405 if (arm_smmu_cdtab_allocated(&master->cd_table)) 3406 arm_smmu_free_cd_tables(master); 3407 kfree(master); 3408 } 3409 3410 static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain, 3411 unsigned long iova, size_t size, 3412 unsigned long flags, 3413 struct iommu_dirty_bitmap *dirty) 3414 { 3415 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 3416 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; 3417 3418 return ops->read_and_clear_dirty(ops, iova, size, flags, dirty); 3419 } 3420 3421 static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain, 3422 bool enabled) 3423 { 3424 /* 3425 * Always enabled and the dirty bitmap is cleared prior to 3426 * set_dirty_tracking(). 3427 */ 3428 return 0; 3429 } 3430 3431 static struct iommu_group *arm_smmu_device_group(struct device *dev) 3432 { 3433 struct iommu_group *group; 3434 3435 /* 3436 * We don't support devices sharing stream IDs other than PCI RID 3437 * aliases, since the necessary ID-to-device lookup becomes rather 3438 * impractical given a potential sparse 32-bit stream ID space. 3439 */ 3440 if (dev_is_pci(dev)) 3441 group = pci_device_group(dev); 3442 else 3443 group = generic_device_group(dev); 3444 3445 return group; 3446 } 3447 3448 static int arm_smmu_of_xlate(struct device *dev, 3449 const struct of_phandle_args *args) 3450 { 3451 return iommu_fwspec_add_ids(dev, args->args, 1); 3452 } 3453 3454 static void arm_smmu_get_resv_regions(struct device *dev, 3455 struct list_head *head) 3456 { 3457 struct iommu_resv_region *region; 3458 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; 3459 3460 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, 3461 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL); 3462 if (!region) 3463 return; 3464 3465 list_add_tail(®ion->list, head); 3466 3467 iommu_dma_get_resv_regions(dev, head); 3468 } 3469 3470 static int arm_smmu_dev_enable_feature(struct device *dev, 3471 enum iommu_dev_features feat) 3472 { 3473 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3474 3475 if (!master) 3476 return -ENODEV; 3477 3478 switch (feat) { 3479 case IOMMU_DEV_FEAT_IOPF: 3480 if (!arm_smmu_master_iopf_supported(master)) 3481 return -EINVAL; 3482 if (master->iopf_enabled) 3483 return -EBUSY; 3484 master->iopf_enabled = true; 3485 return 0; 3486 case IOMMU_DEV_FEAT_SVA: 3487 if (!arm_smmu_master_sva_supported(master)) 3488 return -EINVAL; 3489 if (arm_smmu_master_sva_enabled(master)) 3490 return -EBUSY; 3491 return arm_smmu_master_enable_sva(master); 3492 default: 3493 return -EINVAL; 3494 } 3495 } 3496 3497 static int arm_smmu_dev_disable_feature(struct device *dev, 3498 enum iommu_dev_features feat) 3499 { 3500 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3501 3502 if (!master) 3503 return -EINVAL; 3504 3505 switch (feat) { 3506 case IOMMU_DEV_FEAT_IOPF: 3507 if (!master->iopf_enabled) 3508 return -EINVAL; 3509 if (master->sva_enabled) 3510 return -EBUSY; 3511 master->iopf_enabled = false; 3512 return 0; 3513 case IOMMU_DEV_FEAT_SVA: 3514 if (!arm_smmu_master_sva_enabled(master)) 3515 return -EINVAL; 3516 return arm_smmu_master_disable_sva(master); 3517 default: 3518 return -EINVAL; 3519 } 3520 } 3521 3522 /* 3523 * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the 3524 * PCIe link and save the data to memory by DMA. The hardware is restricted to 3525 * use identity mapping only. 3526 */ 3527 #define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \ 3528 (pdev)->device == 0xa12e) 3529 3530 static int arm_smmu_def_domain_type(struct device *dev) 3531 { 3532 if (dev_is_pci(dev)) { 3533 struct pci_dev *pdev = to_pci_dev(dev); 3534 3535 if (IS_HISI_PTT_DEVICE(pdev)) 3536 return IOMMU_DOMAIN_IDENTITY; 3537 } 3538 3539 return 0; 3540 } 3541 3542 static struct iommu_ops arm_smmu_ops = { 3543 .identity_domain = &arm_smmu_identity_domain, 3544 .blocked_domain = &arm_smmu_blocked_domain, 3545 .capable = arm_smmu_capable, 3546 .hw_info = arm_smmu_hw_info, 3547 .domain_alloc_paging = arm_smmu_domain_alloc_paging, 3548 .domain_alloc_sva = arm_smmu_sva_domain_alloc, 3549 .domain_alloc_user = arm_smmu_domain_alloc_user, 3550 .probe_device = arm_smmu_probe_device, 3551 .release_device = arm_smmu_release_device, 3552 .device_group = arm_smmu_device_group, 3553 .of_xlate = arm_smmu_of_xlate, 3554 .get_resv_regions = arm_smmu_get_resv_regions, 3555 .remove_dev_pasid = arm_smmu_remove_dev_pasid, 3556 .dev_enable_feat = arm_smmu_dev_enable_feature, 3557 .dev_disable_feat = arm_smmu_dev_disable_feature, 3558 .page_response = arm_smmu_page_response, 3559 .def_domain_type = arm_smmu_def_domain_type, 3560 .viommu_alloc = arm_vsmmu_alloc, 3561 .user_pasid_table = 1, 3562 .pgsize_bitmap = -1UL, /* Restricted during device attach */ 3563 .owner = THIS_MODULE, 3564 .default_domain_ops = &(const struct iommu_domain_ops) { 3565 .attach_dev = arm_smmu_attach_dev, 3566 .enforce_cache_coherency = arm_smmu_enforce_cache_coherency, 3567 .set_dev_pasid = arm_smmu_s1_set_dev_pasid, 3568 .map_pages = arm_smmu_map_pages, 3569 .unmap_pages = arm_smmu_unmap_pages, 3570 .flush_iotlb_all = arm_smmu_flush_iotlb_all, 3571 .iotlb_sync = arm_smmu_iotlb_sync, 3572 .iova_to_phys = arm_smmu_iova_to_phys, 3573 .free = arm_smmu_domain_free_paging, 3574 } 3575 }; 3576 3577 static struct iommu_dirty_ops arm_smmu_dirty_ops = { 3578 .read_and_clear_dirty = arm_smmu_read_and_clear_dirty, 3579 .set_dirty_tracking = arm_smmu_set_dirty_tracking, 3580 }; 3581 3582 /* Probing and initialisation functions */ 3583 int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, 3584 struct arm_smmu_queue *q, void __iomem *page, 3585 unsigned long prod_off, unsigned long cons_off, 3586 size_t dwords, const char *name) 3587 { 3588 size_t qsz; 3589 3590 do { 3591 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3; 3592 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, 3593 GFP_KERNEL); 3594 if (q->base || qsz < PAGE_SIZE) 3595 break; 3596 3597 q->llq.max_n_shift--; 3598 } while (1); 3599 3600 if (!q->base) { 3601 dev_err(smmu->dev, 3602 "failed to allocate queue (0x%zx bytes) for %s\n", 3603 qsz, name); 3604 return -ENOMEM; 3605 } 3606 3607 if (!WARN_ON(q->base_dma & (qsz - 1))) { 3608 dev_info(smmu->dev, "allocated %u entries for %s\n", 3609 1 << q->llq.max_n_shift, name); 3610 } 3611 3612 q->prod_reg = page + prod_off; 3613 q->cons_reg = page + cons_off; 3614 q->ent_dwords = dwords; 3615 3616 q->q_base = Q_BASE_RWA; 3617 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK; 3618 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift); 3619 3620 q->llq.prod = q->llq.cons = 0; 3621 return 0; 3622 } 3623 3624 int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, 3625 struct arm_smmu_cmdq *cmdq) 3626 { 3627 unsigned int nents = 1 << cmdq->q.llq.max_n_shift; 3628 3629 atomic_set(&cmdq->owner_prod, 0); 3630 atomic_set(&cmdq->lock, 0); 3631 3632 cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents, 3633 GFP_KERNEL); 3634 if (!cmdq->valid_map) 3635 return -ENOMEM; 3636 3637 return 0; 3638 } 3639 3640 static int arm_smmu_init_queues(struct arm_smmu_device *smmu) 3641 { 3642 int ret; 3643 3644 /* cmdq */ 3645 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base, 3646 ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS, 3647 CMDQ_ENT_DWORDS, "cmdq"); 3648 if (ret) 3649 return ret; 3650 3651 ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq); 3652 if (ret) 3653 return ret; 3654 3655 /* evtq */ 3656 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1, 3657 ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS, 3658 EVTQ_ENT_DWORDS, "evtq"); 3659 if (ret) 3660 return ret; 3661 3662 if ((smmu->features & ARM_SMMU_FEAT_SVA) && 3663 (smmu->features & ARM_SMMU_FEAT_STALLS)) { 3664 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev)); 3665 if (!smmu->evtq.iopf) 3666 return -ENOMEM; 3667 } 3668 3669 /* priq */ 3670 if (!(smmu->features & ARM_SMMU_FEAT_PRI)) 3671 return 0; 3672 3673 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1, 3674 ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS, 3675 PRIQ_ENT_DWORDS, "priq"); 3676 } 3677 3678 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) 3679 { 3680 u32 l1size; 3681 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 3682 unsigned int last_sid_idx = 3683 arm_smmu_strtab_l1_idx((1ULL << smmu->sid_bits) - 1); 3684 3685 /* Calculate the L1 size, capped to the SIDSIZE. */ 3686 cfg->l2.num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES); 3687 if (cfg->l2.num_l1_ents <= last_sid_idx) 3688 dev_warn(smmu->dev, 3689 "2-level strtab only covers %u/%u bits of SID\n", 3690 ilog2(cfg->l2.num_l1_ents * STRTAB_NUM_L2_STES), 3691 smmu->sid_bits); 3692 3693 l1size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1); 3694 cfg->l2.l1tab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->l2.l1_dma, 3695 GFP_KERNEL); 3696 if (!cfg->l2.l1tab) { 3697 dev_err(smmu->dev, 3698 "failed to allocate l1 stream table (%u bytes)\n", 3699 l1size); 3700 return -ENOMEM; 3701 } 3702 3703 cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents, 3704 sizeof(*cfg->l2.l2ptrs), GFP_KERNEL); 3705 if (!cfg->l2.l2ptrs) 3706 return -ENOMEM; 3707 3708 return 0; 3709 } 3710 3711 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) 3712 { 3713 u32 size; 3714 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 3715 3716 size = (1 << smmu->sid_bits) * sizeof(struct arm_smmu_ste); 3717 cfg->linear.table = dmam_alloc_coherent(smmu->dev, size, 3718 &cfg->linear.ste_dma, 3719 GFP_KERNEL); 3720 if (!cfg->linear.table) { 3721 dev_err(smmu->dev, 3722 "failed to allocate linear stream table (%u bytes)\n", 3723 size); 3724 return -ENOMEM; 3725 } 3726 cfg->linear.num_ents = 1 << smmu->sid_bits; 3727 3728 arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents); 3729 return 0; 3730 } 3731 3732 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) 3733 { 3734 int ret; 3735 3736 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) 3737 ret = arm_smmu_init_strtab_2lvl(smmu); 3738 else 3739 ret = arm_smmu_init_strtab_linear(smmu); 3740 if (ret) 3741 return ret; 3742 3743 ida_init(&smmu->vmid_map); 3744 3745 return 0; 3746 } 3747 3748 static int arm_smmu_init_structures(struct arm_smmu_device *smmu) 3749 { 3750 int ret; 3751 3752 mutex_init(&smmu->streams_mutex); 3753 smmu->streams = RB_ROOT; 3754 3755 ret = arm_smmu_init_queues(smmu); 3756 if (ret) 3757 return ret; 3758 3759 ret = arm_smmu_init_strtab(smmu); 3760 if (ret) 3761 return ret; 3762 3763 if (smmu->impl_ops && smmu->impl_ops->init_structures) 3764 return smmu->impl_ops->init_structures(smmu); 3765 3766 return 0; 3767 } 3768 3769 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, 3770 unsigned int reg_off, unsigned int ack_off) 3771 { 3772 u32 reg; 3773 3774 writel_relaxed(val, smmu->base + reg_off); 3775 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val, 3776 1, ARM_SMMU_POLL_TIMEOUT_US); 3777 } 3778 3779 /* GBPA is "special" */ 3780 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr) 3781 { 3782 int ret; 3783 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA; 3784 3785 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE), 3786 1, ARM_SMMU_POLL_TIMEOUT_US); 3787 if (ret) 3788 return ret; 3789 3790 reg &= ~clr; 3791 reg |= set; 3792 writel_relaxed(reg | GBPA_UPDATE, gbpa); 3793 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE), 3794 1, ARM_SMMU_POLL_TIMEOUT_US); 3795 3796 if (ret) 3797 dev_err(smmu->dev, "GBPA not responding to update\n"); 3798 return ret; 3799 } 3800 3801 static void arm_smmu_free_msis(void *data) 3802 { 3803 struct device *dev = data; 3804 3805 platform_device_msi_free_irqs_all(dev); 3806 } 3807 3808 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) 3809 { 3810 phys_addr_t doorbell; 3811 struct device *dev = msi_desc_to_dev(desc); 3812 struct arm_smmu_device *smmu = dev_get_drvdata(dev); 3813 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index]; 3814 3815 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; 3816 doorbell &= MSI_CFG0_ADDR_MASK; 3817 3818 writeq_relaxed(doorbell, smmu->base + cfg[0]); 3819 writel_relaxed(msg->data, smmu->base + cfg[1]); 3820 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]); 3821 } 3822 3823 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) 3824 { 3825 int ret, nvec = ARM_SMMU_MAX_MSIS; 3826 struct device *dev = smmu->dev; 3827 3828 /* Clear the MSI address regs */ 3829 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); 3830 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); 3831 3832 if (smmu->features & ARM_SMMU_FEAT_PRI) 3833 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); 3834 else 3835 nvec--; 3836 3837 if (!(smmu->features & ARM_SMMU_FEAT_MSI)) 3838 return; 3839 3840 if (!dev->msi.domain) { 3841 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n"); 3842 return; 3843 } 3844 3845 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */ 3846 ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); 3847 if (ret) { 3848 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n"); 3849 return; 3850 } 3851 3852 smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX); 3853 smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX); 3854 smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX); 3855 3856 /* Add callback to free MSIs on teardown */ 3857 devm_add_action_or_reset(dev, arm_smmu_free_msis, dev); 3858 } 3859 3860 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) 3861 { 3862 int irq, ret; 3863 3864 arm_smmu_setup_msis(smmu); 3865 3866 /* Request interrupt lines */ 3867 irq = smmu->evtq.q.irq; 3868 if (irq) { 3869 ret = devm_request_threaded_irq(smmu->dev, irq, NULL, 3870 arm_smmu_evtq_thread, 3871 IRQF_ONESHOT, 3872 "arm-smmu-v3-evtq", smmu); 3873 if (ret < 0) 3874 dev_warn(smmu->dev, "failed to enable evtq irq\n"); 3875 } else { 3876 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n"); 3877 } 3878 3879 irq = smmu->gerr_irq; 3880 if (irq) { 3881 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler, 3882 0, "arm-smmu-v3-gerror", smmu); 3883 if (ret < 0) 3884 dev_warn(smmu->dev, "failed to enable gerror irq\n"); 3885 } else { 3886 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n"); 3887 } 3888 3889 if (smmu->features & ARM_SMMU_FEAT_PRI) { 3890 irq = smmu->priq.q.irq; 3891 if (irq) { 3892 ret = devm_request_threaded_irq(smmu->dev, irq, NULL, 3893 arm_smmu_priq_thread, 3894 IRQF_ONESHOT, 3895 "arm-smmu-v3-priq", 3896 smmu); 3897 if (ret < 0) 3898 dev_warn(smmu->dev, 3899 "failed to enable priq irq\n"); 3900 } else { 3901 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n"); 3902 } 3903 } 3904 } 3905 3906 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) 3907 { 3908 int ret, irq; 3909 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; 3910 3911 /* Disable IRQs first */ 3912 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, 3913 ARM_SMMU_IRQ_CTRLACK); 3914 if (ret) { 3915 dev_err(smmu->dev, "failed to disable irqs\n"); 3916 return ret; 3917 } 3918 3919 irq = smmu->combined_irq; 3920 if (irq) { 3921 /* 3922 * Cavium ThunderX2 implementation doesn't support unique irq 3923 * lines. Use a single irq line for all the SMMUv3 interrupts. 3924 */ 3925 ret = devm_request_threaded_irq(smmu->dev, irq, 3926 arm_smmu_combined_irq_handler, 3927 arm_smmu_combined_irq_thread, 3928 IRQF_ONESHOT, 3929 "arm-smmu-v3-combined-irq", smmu); 3930 if (ret < 0) 3931 dev_warn(smmu->dev, "failed to enable combined irq\n"); 3932 } else 3933 arm_smmu_setup_unique_irqs(smmu); 3934 3935 if (smmu->features & ARM_SMMU_FEAT_PRI) 3936 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; 3937 3938 /* Enable interrupt generation on the SMMU */ 3939 ret = arm_smmu_write_reg_sync(smmu, irqen_flags, 3940 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); 3941 if (ret) 3942 dev_warn(smmu->dev, "failed to enable irqs\n"); 3943 3944 return 0; 3945 } 3946 3947 static int arm_smmu_device_disable(struct arm_smmu_device *smmu) 3948 { 3949 int ret; 3950 3951 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK); 3952 if (ret) 3953 dev_err(smmu->dev, "failed to clear cr0\n"); 3954 3955 return ret; 3956 } 3957 3958 static void arm_smmu_write_strtab(struct arm_smmu_device *smmu) 3959 { 3960 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 3961 dma_addr_t dma; 3962 u32 reg; 3963 3964 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { 3965 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, 3966 STRTAB_BASE_CFG_FMT_2LVL) | 3967 FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, 3968 ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT) | 3969 FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); 3970 dma = cfg->l2.l1_dma; 3971 } else { 3972 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, 3973 STRTAB_BASE_CFG_FMT_LINEAR) | 3974 FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); 3975 dma = cfg->linear.ste_dma; 3976 } 3977 writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA, 3978 smmu->base + ARM_SMMU_STRTAB_BASE); 3979 writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG); 3980 } 3981 3982 static int arm_smmu_device_reset(struct arm_smmu_device *smmu) 3983 { 3984 int ret; 3985 u32 reg, enables; 3986 struct arm_smmu_cmdq_ent cmd; 3987 3988 /* Clear CR0 and sync (disables SMMU and queue processing) */ 3989 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0); 3990 if (reg & CR0_SMMUEN) { 3991 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n"); 3992 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0); 3993 } 3994 3995 ret = arm_smmu_device_disable(smmu); 3996 if (ret) 3997 return ret; 3998 3999 /* CR1 (table and queue memory attributes) */ 4000 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) | 4001 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) | 4002 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) | 4003 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) | 4004 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) | 4005 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB); 4006 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1); 4007 4008 /* CR2 (random crap) */ 4009 reg = CR2_PTM | CR2_RECINVSID; 4010 4011 if (smmu->features & ARM_SMMU_FEAT_E2H) 4012 reg |= CR2_E2H; 4013 4014 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2); 4015 4016 /* Stream table */ 4017 arm_smmu_write_strtab(smmu); 4018 4019 /* Command queue */ 4020 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE); 4021 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD); 4022 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS); 4023 4024 enables = CR0_CMDQEN; 4025 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 4026 ARM_SMMU_CR0ACK); 4027 if (ret) { 4028 dev_err(smmu->dev, "failed to enable command queue\n"); 4029 return ret; 4030 } 4031 4032 /* Invalidate any cached configuration */ 4033 cmd.opcode = CMDQ_OP_CFGI_ALL; 4034 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 4035 4036 /* Invalidate any stale TLB entries */ 4037 if (smmu->features & ARM_SMMU_FEAT_HYP) { 4038 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL; 4039 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 4040 } 4041 4042 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; 4043 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 4044 4045 /* Event queue */ 4046 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); 4047 writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD); 4048 writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS); 4049 4050 enables |= CR0_EVTQEN; 4051 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 4052 ARM_SMMU_CR0ACK); 4053 if (ret) { 4054 dev_err(smmu->dev, "failed to enable event queue\n"); 4055 return ret; 4056 } 4057 4058 /* PRI queue */ 4059 if (smmu->features & ARM_SMMU_FEAT_PRI) { 4060 writeq_relaxed(smmu->priq.q.q_base, 4061 smmu->base + ARM_SMMU_PRIQ_BASE); 4062 writel_relaxed(smmu->priq.q.llq.prod, 4063 smmu->page1 + ARM_SMMU_PRIQ_PROD); 4064 writel_relaxed(smmu->priq.q.llq.cons, 4065 smmu->page1 + ARM_SMMU_PRIQ_CONS); 4066 4067 enables |= CR0_PRIQEN; 4068 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 4069 ARM_SMMU_CR0ACK); 4070 if (ret) { 4071 dev_err(smmu->dev, "failed to enable PRI queue\n"); 4072 return ret; 4073 } 4074 } 4075 4076 if (smmu->features & ARM_SMMU_FEAT_ATS) { 4077 enables |= CR0_ATSCHK; 4078 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 4079 ARM_SMMU_CR0ACK); 4080 if (ret) { 4081 dev_err(smmu->dev, "failed to enable ATS check\n"); 4082 return ret; 4083 } 4084 } 4085 4086 ret = arm_smmu_setup_irqs(smmu); 4087 if (ret) { 4088 dev_err(smmu->dev, "failed to setup irqs\n"); 4089 return ret; 4090 } 4091 4092 if (is_kdump_kernel()) 4093 enables &= ~(CR0_EVTQEN | CR0_PRIQEN); 4094 4095 /* Enable the SMMU interface */ 4096 enables |= CR0_SMMUEN; 4097 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 4098 ARM_SMMU_CR0ACK); 4099 if (ret) { 4100 dev_err(smmu->dev, "failed to enable SMMU interface\n"); 4101 return ret; 4102 } 4103 4104 if (smmu->impl_ops && smmu->impl_ops->device_reset) { 4105 ret = smmu->impl_ops->device_reset(smmu); 4106 if (ret) { 4107 dev_err(smmu->dev, "failed to reset impl\n"); 4108 return ret; 4109 } 4110 } 4111 4112 return 0; 4113 } 4114 4115 #define IIDR_IMPLEMENTER_ARM 0x43b 4116 #define IIDR_PRODUCTID_ARM_MMU_600 0x483 4117 #define IIDR_PRODUCTID_ARM_MMU_700 0x487 4118 4119 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu) 4120 { 4121 u32 reg; 4122 unsigned int implementer, productid, variant, revision; 4123 4124 reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR); 4125 implementer = FIELD_GET(IIDR_IMPLEMENTER, reg); 4126 productid = FIELD_GET(IIDR_PRODUCTID, reg); 4127 variant = FIELD_GET(IIDR_VARIANT, reg); 4128 revision = FIELD_GET(IIDR_REVISION, reg); 4129 4130 switch (implementer) { 4131 case IIDR_IMPLEMENTER_ARM: 4132 switch (productid) { 4133 case IIDR_PRODUCTID_ARM_MMU_600: 4134 /* Arm erratum 1076982 */ 4135 if (variant == 0 && revision <= 2) 4136 smmu->features &= ~ARM_SMMU_FEAT_SEV; 4137 /* Arm erratum 1209401 */ 4138 if (variant < 2) 4139 smmu->features &= ~ARM_SMMU_FEAT_NESTING; 4140 break; 4141 case IIDR_PRODUCTID_ARM_MMU_700: 4142 /* Arm erratum 2812531 */ 4143 smmu->features &= ~ARM_SMMU_FEAT_BTM; 4144 smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC; 4145 /* Arm errata 2268618, 2812531 */ 4146 smmu->features &= ~ARM_SMMU_FEAT_NESTING; 4147 break; 4148 } 4149 break; 4150 } 4151 } 4152 4153 static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg) 4154 { 4155 u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD); 4156 u32 hw_features = 0; 4157 4158 switch (FIELD_GET(IDR0_HTTU, reg)) { 4159 case IDR0_HTTU_ACCESS_DIRTY: 4160 hw_features |= ARM_SMMU_FEAT_HD; 4161 fallthrough; 4162 case IDR0_HTTU_ACCESS: 4163 hw_features |= ARM_SMMU_FEAT_HA; 4164 } 4165 4166 if (smmu->dev->of_node) 4167 smmu->features |= hw_features; 4168 else if (hw_features != fw_features) 4169 /* ACPI IORT sets the HTTU bits */ 4170 dev_warn(smmu->dev, 4171 "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n", 4172 hw_features, fw_features); 4173 } 4174 4175 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) 4176 { 4177 u32 reg; 4178 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY; 4179 4180 /* IDR0 */ 4181 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0); 4182 4183 /* 2-level structures */ 4184 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL) 4185 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB; 4186 4187 if (reg & IDR0_CD2L) 4188 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB; 4189 4190 /* 4191 * Translation table endianness. 4192 * We currently require the same endianness as the CPU, but this 4193 * could be changed later by adding a new IO_PGTABLE_QUIRK. 4194 */ 4195 switch (FIELD_GET(IDR0_TTENDIAN, reg)) { 4196 case IDR0_TTENDIAN_MIXED: 4197 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE; 4198 break; 4199 #ifdef __BIG_ENDIAN 4200 case IDR0_TTENDIAN_BE: 4201 smmu->features |= ARM_SMMU_FEAT_TT_BE; 4202 break; 4203 #else 4204 case IDR0_TTENDIAN_LE: 4205 smmu->features |= ARM_SMMU_FEAT_TT_LE; 4206 break; 4207 #endif 4208 default: 4209 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n"); 4210 return -ENXIO; 4211 } 4212 4213 /* Boolean feature flags */ 4214 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI) 4215 smmu->features |= ARM_SMMU_FEAT_PRI; 4216 4217 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS) 4218 smmu->features |= ARM_SMMU_FEAT_ATS; 4219 4220 if (reg & IDR0_SEV) 4221 smmu->features |= ARM_SMMU_FEAT_SEV; 4222 4223 if (reg & IDR0_MSI) { 4224 smmu->features |= ARM_SMMU_FEAT_MSI; 4225 if (coherent && !disable_msipolling) 4226 smmu->options |= ARM_SMMU_OPT_MSIPOLL; 4227 } 4228 4229 if (reg & IDR0_HYP) { 4230 smmu->features |= ARM_SMMU_FEAT_HYP; 4231 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) 4232 smmu->features |= ARM_SMMU_FEAT_E2H; 4233 } 4234 4235 arm_smmu_get_httu(smmu, reg); 4236 4237 /* 4238 * The coherency feature as set by FW is used in preference to the ID 4239 * register, but warn on mismatch. 4240 */ 4241 if (!!(reg & IDR0_COHACC) != coherent) 4242 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n", 4243 coherent ? "true" : "false"); 4244 4245 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) { 4246 case IDR0_STALL_MODEL_FORCE: 4247 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE; 4248 fallthrough; 4249 case IDR0_STALL_MODEL_STALL: 4250 smmu->features |= ARM_SMMU_FEAT_STALLS; 4251 } 4252 4253 if (reg & IDR0_S1P) 4254 smmu->features |= ARM_SMMU_FEAT_TRANS_S1; 4255 4256 if (reg & IDR0_S2P) 4257 smmu->features |= ARM_SMMU_FEAT_TRANS_S2; 4258 4259 if (!(reg & (IDR0_S1P | IDR0_S2P))) { 4260 dev_err(smmu->dev, "no translation support!\n"); 4261 return -ENXIO; 4262 } 4263 4264 /* We only support the AArch64 table format at present */ 4265 switch (FIELD_GET(IDR0_TTF, reg)) { 4266 case IDR0_TTF_AARCH32_64: 4267 smmu->ias = 40; 4268 fallthrough; 4269 case IDR0_TTF_AARCH64: 4270 break; 4271 default: 4272 dev_err(smmu->dev, "AArch64 table format not supported!\n"); 4273 return -ENXIO; 4274 } 4275 4276 /* ASID/VMID sizes */ 4277 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8; 4278 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8; 4279 4280 /* IDR1 */ 4281 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1); 4282 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) { 4283 dev_err(smmu->dev, "embedded implementation not supported\n"); 4284 return -ENXIO; 4285 } 4286 4287 if (reg & IDR1_ATTR_TYPES_OVR) 4288 smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR; 4289 4290 /* Queue sizes, capped to ensure natural alignment */ 4291 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, 4292 FIELD_GET(IDR1_CMDQS, reg)); 4293 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) { 4294 /* 4295 * We don't support splitting up batches, so one batch of 4296 * commands plus an extra sync needs to fit inside the command 4297 * queue. There's also no way we can handle the weird alignment 4298 * restrictions on the base pointer for a unit-length queue. 4299 */ 4300 dev_err(smmu->dev, "command queue size <= %d entries not supported\n", 4301 CMDQ_BATCH_ENTRIES); 4302 return -ENXIO; 4303 } 4304 4305 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT, 4306 FIELD_GET(IDR1_EVTQS, reg)); 4307 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT, 4308 FIELD_GET(IDR1_PRIQS, reg)); 4309 4310 /* SID/SSID sizes */ 4311 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg); 4312 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg); 4313 smmu->iommu.max_pasids = 1UL << smmu->ssid_bits; 4314 4315 /* 4316 * If the SMMU supports fewer bits than would fill a single L2 stream 4317 * table, use a linear table instead. 4318 */ 4319 if (smmu->sid_bits <= STRTAB_SPLIT) 4320 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB; 4321 4322 /* IDR3 */ 4323 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); 4324 if (FIELD_GET(IDR3_RIL, reg)) 4325 smmu->features |= ARM_SMMU_FEAT_RANGE_INV; 4326 4327 /* IDR5 */ 4328 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); 4329 4330 /* Maximum number of outstanding stalls */ 4331 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg); 4332 4333 /* Page sizes */ 4334 if (reg & IDR5_GRAN64K) 4335 smmu->pgsize_bitmap |= SZ_64K | SZ_512M; 4336 if (reg & IDR5_GRAN16K) 4337 smmu->pgsize_bitmap |= SZ_16K | SZ_32M; 4338 if (reg & IDR5_GRAN4K) 4339 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G; 4340 4341 /* Input address size */ 4342 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT) 4343 smmu->features |= ARM_SMMU_FEAT_VAX; 4344 4345 /* Output address size */ 4346 switch (FIELD_GET(IDR5_OAS, reg)) { 4347 case IDR5_OAS_32_BIT: 4348 smmu->oas = 32; 4349 break; 4350 case IDR5_OAS_36_BIT: 4351 smmu->oas = 36; 4352 break; 4353 case IDR5_OAS_40_BIT: 4354 smmu->oas = 40; 4355 break; 4356 case IDR5_OAS_42_BIT: 4357 smmu->oas = 42; 4358 break; 4359 case IDR5_OAS_44_BIT: 4360 smmu->oas = 44; 4361 break; 4362 case IDR5_OAS_52_BIT: 4363 smmu->oas = 52; 4364 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */ 4365 break; 4366 default: 4367 dev_info(smmu->dev, 4368 "unknown output address size. Truncating to 48-bit\n"); 4369 fallthrough; 4370 case IDR5_OAS_48_BIT: 4371 smmu->oas = 48; 4372 } 4373 4374 if (arm_smmu_ops.pgsize_bitmap == -1UL) 4375 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap; 4376 else 4377 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap; 4378 4379 /* Set the DMA mask for our table walker */ 4380 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas))) 4381 dev_warn(smmu->dev, 4382 "failed to set DMA mask for table walker\n"); 4383 4384 smmu->ias = max(smmu->ias, smmu->oas); 4385 4386 if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) && 4387 (smmu->features & ARM_SMMU_FEAT_TRANS_S2)) 4388 smmu->features |= ARM_SMMU_FEAT_NESTING; 4389 4390 arm_smmu_device_iidr_probe(smmu); 4391 4392 if (arm_smmu_sva_supported(smmu)) 4393 smmu->features |= ARM_SMMU_FEAT_SVA; 4394 4395 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n", 4396 smmu->ias, smmu->oas, smmu->features); 4397 return 0; 4398 } 4399 4400 #ifdef CONFIG_ACPI 4401 #ifdef CONFIG_TEGRA241_CMDQV 4402 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, 4403 struct arm_smmu_device *smmu) 4404 { 4405 const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier); 4406 struct acpi_device *adev; 4407 4408 /* Look for an NVDA200C node whose _UID matches the SMMU node ID */ 4409 adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1); 4410 if (adev) { 4411 /* Tegra241 CMDQV driver is responsible for put_device() */ 4412 smmu->impl_dev = &adev->dev; 4413 smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV; 4414 dev_info(smmu->dev, "found companion CMDQV device: %s\n", 4415 dev_name(smmu->impl_dev)); 4416 } 4417 kfree(uid); 4418 } 4419 #else 4420 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, 4421 struct arm_smmu_device *smmu) 4422 { 4423 } 4424 #endif 4425 4426 static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node, 4427 struct arm_smmu_device *smmu) 4428 { 4429 struct acpi_iort_smmu_v3 *iort_smmu = 4430 (struct acpi_iort_smmu_v3 *)node->node_data; 4431 4432 switch (iort_smmu->model) { 4433 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: 4434 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; 4435 break; 4436 case ACPI_IORT_SMMU_V3_HISILICON_HI161X: 4437 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; 4438 break; 4439 case ACPI_IORT_SMMU_V3_GENERIC: 4440 /* 4441 * Tegra241 implementation stores its SMMU options and impl_dev 4442 * in DSDT. Thus, go through the ACPI tables unconditionally. 4443 */ 4444 acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu); 4445 break; 4446 } 4447 4448 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); 4449 return 0; 4450 } 4451 4452 static int arm_smmu_device_acpi_probe(struct platform_device *pdev, 4453 struct arm_smmu_device *smmu) 4454 { 4455 struct acpi_iort_smmu_v3 *iort_smmu; 4456 struct device *dev = smmu->dev; 4457 struct acpi_iort_node *node; 4458 4459 node = *(struct acpi_iort_node **)dev_get_platdata(dev); 4460 4461 /* Retrieve SMMUv3 specific data */ 4462 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; 4463 4464 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) 4465 smmu->features |= ARM_SMMU_FEAT_COHERENCY; 4466 4467 switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) { 4468 case IDR0_HTTU_ACCESS_DIRTY: 4469 smmu->features |= ARM_SMMU_FEAT_HD; 4470 fallthrough; 4471 case IDR0_HTTU_ACCESS: 4472 smmu->features |= ARM_SMMU_FEAT_HA; 4473 } 4474 4475 return acpi_smmu_iort_probe_model(node, smmu); 4476 } 4477 #else 4478 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev, 4479 struct arm_smmu_device *smmu) 4480 { 4481 return -ENODEV; 4482 } 4483 #endif 4484 4485 static int arm_smmu_device_dt_probe(struct platform_device *pdev, 4486 struct arm_smmu_device *smmu) 4487 { 4488 struct device *dev = &pdev->dev; 4489 u32 cells; 4490 int ret = -EINVAL; 4491 4492 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells)) 4493 dev_err(dev, "missing #iommu-cells property\n"); 4494 else if (cells != 1) 4495 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells); 4496 else 4497 ret = 0; 4498 4499 parse_driver_options(smmu); 4500 4501 if (of_dma_is_coherent(dev->of_node)) 4502 smmu->features |= ARM_SMMU_FEAT_COHERENCY; 4503 4504 return ret; 4505 } 4506 4507 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu) 4508 { 4509 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY) 4510 return SZ_64K; 4511 else 4512 return SZ_128K; 4513 } 4514 4515 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, 4516 resource_size_t size) 4517 { 4518 struct resource res = DEFINE_RES_MEM(start, size); 4519 4520 return devm_ioremap_resource(dev, &res); 4521 } 4522 4523 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) 4524 { 4525 struct list_head rmr_list; 4526 struct iommu_resv_region *e; 4527 4528 INIT_LIST_HEAD(&rmr_list); 4529 iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); 4530 4531 list_for_each_entry(e, &rmr_list, list) { 4532 struct iommu_iort_rmr_data *rmr; 4533 int ret, i; 4534 4535 rmr = container_of(e, struct iommu_iort_rmr_data, rr); 4536 for (i = 0; i < rmr->num_sids; i++) { 4537 ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]); 4538 if (ret) { 4539 dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n", 4540 rmr->sids[i]); 4541 continue; 4542 } 4543 4544 /* 4545 * STE table is not programmed to HW, see 4546 * arm_smmu_initial_bypass_stes() 4547 */ 4548 arm_smmu_make_bypass_ste(smmu, 4549 arm_smmu_get_step_for_sid(smmu, rmr->sids[i])); 4550 } 4551 } 4552 4553 iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); 4554 } 4555 4556 static void arm_smmu_impl_remove(void *data) 4557 { 4558 struct arm_smmu_device *smmu = data; 4559 4560 if (smmu->impl_ops && smmu->impl_ops->device_remove) 4561 smmu->impl_ops->device_remove(smmu); 4562 } 4563 4564 /* 4565 * Probe all the compiled in implementations. Each one checks to see if it 4566 * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which 4567 * replaces the callers. Otherwise the original is returned or ERR_PTR. 4568 */ 4569 static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) 4570 { 4571 struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV); 4572 int ret; 4573 4574 if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV)) 4575 new_smmu = tegra241_cmdqv_probe(smmu); 4576 4577 if (new_smmu == ERR_PTR(-ENODEV)) 4578 return smmu; 4579 if (IS_ERR(new_smmu)) 4580 return new_smmu; 4581 4582 ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove, 4583 new_smmu); 4584 if (ret) 4585 return ERR_PTR(ret); 4586 return new_smmu; 4587 } 4588 4589 static int arm_smmu_device_probe(struct platform_device *pdev) 4590 { 4591 int irq, ret; 4592 struct resource *res; 4593 resource_size_t ioaddr; 4594 struct arm_smmu_device *smmu; 4595 struct device *dev = &pdev->dev; 4596 4597 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); 4598 if (!smmu) 4599 return -ENOMEM; 4600 smmu->dev = dev; 4601 4602 if (dev->of_node) { 4603 ret = arm_smmu_device_dt_probe(pdev, smmu); 4604 } else { 4605 ret = arm_smmu_device_acpi_probe(pdev, smmu); 4606 } 4607 if (ret) 4608 return ret; 4609 4610 smmu = arm_smmu_impl_probe(smmu); 4611 if (IS_ERR(smmu)) 4612 return PTR_ERR(smmu); 4613 4614 /* Base address */ 4615 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 4616 if (!res) 4617 return -EINVAL; 4618 if (resource_size(res) < arm_smmu_resource_size(smmu)) { 4619 dev_err(dev, "MMIO region too small (%pr)\n", res); 4620 return -EINVAL; 4621 } 4622 ioaddr = res->start; 4623 4624 /* 4625 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain 4626 * the PMCG registers which are reserved by the PMU driver. 4627 */ 4628 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ); 4629 if (IS_ERR(smmu->base)) 4630 return PTR_ERR(smmu->base); 4631 4632 if (arm_smmu_resource_size(smmu) > SZ_64K) { 4633 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K, 4634 ARM_SMMU_REG_SZ); 4635 if (IS_ERR(smmu->page1)) 4636 return PTR_ERR(smmu->page1); 4637 } else { 4638 smmu->page1 = smmu->base; 4639 } 4640 4641 /* Interrupt lines */ 4642 4643 irq = platform_get_irq_byname_optional(pdev, "combined"); 4644 if (irq > 0) 4645 smmu->combined_irq = irq; 4646 else { 4647 irq = platform_get_irq_byname_optional(pdev, "eventq"); 4648 if (irq > 0) 4649 smmu->evtq.q.irq = irq; 4650 4651 irq = platform_get_irq_byname_optional(pdev, "priq"); 4652 if (irq > 0) 4653 smmu->priq.q.irq = irq; 4654 4655 irq = platform_get_irq_byname_optional(pdev, "gerror"); 4656 if (irq > 0) 4657 smmu->gerr_irq = irq; 4658 } 4659 /* Probe the h/w */ 4660 ret = arm_smmu_device_hw_probe(smmu); 4661 if (ret) 4662 return ret; 4663 4664 /* Initialise in-memory data structures */ 4665 ret = arm_smmu_init_structures(smmu); 4666 if (ret) 4667 return ret; 4668 4669 /* Record our private device structure */ 4670 platform_set_drvdata(pdev, smmu); 4671 4672 /* Check for RMRs and install bypass STEs if any */ 4673 arm_smmu_rmr_install_bypass_ste(smmu); 4674 4675 /* Reset the device */ 4676 ret = arm_smmu_device_reset(smmu); 4677 if (ret) 4678 return ret; 4679 4680 /* And we're up. Go go go! */ 4681 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, 4682 "smmu3.%pa", &ioaddr); 4683 if (ret) 4684 return ret; 4685 4686 ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); 4687 if (ret) { 4688 dev_err(dev, "Failed to register iommu\n"); 4689 iommu_device_sysfs_remove(&smmu->iommu); 4690 return ret; 4691 } 4692 4693 return 0; 4694 } 4695 4696 static void arm_smmu_device_remove(struct platform_device *pdev) 4697 { 4698 struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 4699 4700 iommu_device_unregister(&smmu->iommu); 4701 iommu_device_sysfs_remove(&smmu->iommu); 4702 arm_smmu_device_disable(smmu); 4703 iopf_queue_free(smmu->evtq.iopf); 4704 ida_destroy(&smmu->vmid_map); 4705 } 4706 4707 static void arm_smmu_device_shutdown(struct platform_device *pdev) 4708 { 4709 struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 4710 4711 arm_smmu_device_disable(smmu); 4712 } 4713 4714 static const struct of_device_id arm_smmu_of_match[] = { 4715 { .compatible = "arm,smmu-v3", }, 4716 { }, 4717 }; 4718 MODULE_DEVICE_TABLE(of, arm_smmu_of_match); 4719 4720 static void arm_smmu_driver_unregister(struct platform_driver *drv) 4721 { 4722 arm_smmu_sva_notifier_synchronize(); 4723 platform_driver_unregister(drv); 4724 } 4725 4726 static struct platform_driver arm_smmu_driver = { 4727 .driver = { 4728 .name = "arm-smmu-v3", 4729 .of_match_table = arm_smmu_of_match, 4730 .suppress_bind_attrs = true, 4731 }, 4732 .probe = arm_smmu_device_probe, 4733 .remove_new = arm_smmu_device_remove, 4734 .shutdown = arm_smmu_device_shutdown, 4735 }; 4736 module_driver(arm_smmu_driver, platform_driver_register, 4737 arm_smmu_driver_unregister); 4738 4739 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations"); 4740 MODULE_AUTHOR("Will Deacon <will@kernel.org>"); 4741 MODULE_ALIAS("platform:arm-smmu-v3"); 4742 MODULE_LICENSE("GPL v2"); 4743