1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * IOMMU API for ARM architected SMMUv3 implementations. 4 * 5 * Copyright (C) 2015 ARM Limited 6 * 7 * Author: Will Deacon <will.deacon@arm.com> 8 * 9 * This driver is powered by bad coffee and bombay mix. 10 */ 11 12 #include <linux/acpi.h> 13 #include <linux/acpi_iort.h> 14 #include <linux/bitops.h> 15 #include <linux/crash_dump.h> 16 #include <linux/delay.h> 17 #include <linux/err.h> 18 #include <linux/interrupt.h> 19 #include <linux/io-pgtable.h> 20 #include <linux/iopoll.h> 21 #include <linux/module.h> 22 #include <linux/msi.h> 23 #include <linux/of.h> 24 #include <linux/of_address.h> 25 #include <linux/of_platform.h> 26 #include <linux/pci.h> 27 #include <linux/pci-ats.h> 28 #include <linux/platform_device.h> 29 #include <linux/sort.h> 30 #include <linux/string_choices.h> 31 #include <kunit/visibility.h> 32 #include <uapi/linux/iommufd.h> 33 34 #include "arm-smmu-v3.h" 35 #include "../../dma-iommu.h" 36 37 static bool disable_msipolling; 38 module_param(disable_msipolling, bool, 0444); 39 MODULE_PARM_DESC(disable_msipolling, 40 "Disable MSI-based polling for CMD_SYNC completion."); 41 42 static const struct iommu_ops arm_smmu_ops; 43 static struct iommu_dirty_ops arm_smmu_dirty_ops; 44 45 enum arm_smmu_msi_index { 46 EVTQ_MSI_INDEX, 47 GERROR_MSI_INDEX, 48 PRIQ_MSI_INDEX, 49 ARM_SMMU_MAX_MSIS, 50 }; 51 52 #define NUM_ENTRY_QWORDS 8 53 static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64)); 54 static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64)); 55 56 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { 57 [EVTQ_MSI_INDEX] = { 58 ARM_SMMU_EVTQ_IRQ_CFG0, 59 ARM_SMMU_EVTQ_IRQ_CFG1, 60 ARM_SMMU_EVTQ_IRQ_CFG2, 61 }, 62 [GERROR_MSI_INDEX] = { 63 ARM_SMMU_GERROR_IRQ_CFG0, 64 ARM_SMMU_GERROR_IRQ_CFG1, 65 ARM_SMMU_GERROR_IRQ_CFG2, 66 }, 67 [PRIQ_MSI_INDEX] = { 68 ARM_SMMU_PRIQ_IRQ_CFG0, 69 ARM_SMMU_PRIQ_IRQ_CFG1, 70 ARM_SMMU_PRIQ_IRQ_CFG2, 71 }, 72 }; 73 74 struct arm_smmu_option_prop { 75 u32 opt; 76 const char *prop; 77 }; 78 79 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa); 80 DEFINE_MUTEX(arm_smmu_asid_lock); 81 82 static struct arm_smmu_option_prop arm_smmu_options[] = { 83 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, 84 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, 85 { 0, NULL}, 86 }; 87 88 static const char * const event_str[] = { 89 [EVT_ID_BAD_STREAMID_CONFIG] = "C_BAD_STREAMID", 90 [EVT_ID_STE_FETCH_FAULT] = "F_STE_FETCH", 91 [EVT_ID_BAD_STE_CONFIG] = "C_BAD_STE", 92 [EVT_ID_STREAM_DISABLED_FAULT] = "F_STREAM_DISABLED", 93 [EVT_ID_BAD_SUBSTREAMID_CONFIG] = "C_BAD_SUBSTREAMID", 94 [EVT_ID_CD_FETCH_FAULT] = "F_CD_FETCH", 95 [EVT_ID_BAD_CD_CONFIG] = "C_BAD_CD", 96 [EVT_ID_TRANSLATION_FAULT] = "F_TRANSLATION", 97 [EVT_ID_ADDR_SIZE_FAULT] = "F_ADDR_SIZE", 98 [EVT_ID_ACCESS_FAULT] = "F_ACCESS", 99 [EVT_ID_PERMISSION_FAULT] = "F_PERMISSION", 100 [EVT_ID_VMS_FETCH_FAULT] = "F_VMS_FETCH", 101 }; 102 103 static const char * const event_class_str[] = { 104 [0] = "CD fetch", 105 [1] = "Stage 1 translation table fetch", 106 [2] = "Input address caused fault", 107 [3] = "Reserved", 108 }; 109 110 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); 111 static bool arm_smmu_ats_supported(struct arm_smmu_master *master); 112 113 static void parse_driver_options(struct arm_smmu_device *smmu) 114 { 115 int i = 0; 116 117 do { 118 if (of_property_read_bool(smmu->dev->of_node, 119 arm_smmu_options[i].prop)) { 120 smmu->options |= arm_smmu_options[i].opt; 121 dev_notice(smmu->dev, "option %s\n", 122 arm_smmu_options[i].prop); 123 } 124 } while (arm_smmu_options[++i].opt); 125 } 126 127 /* Low-level queue manipulation functions */ 128 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n) 129 { 130 u32 space, prod, cons; 131 132 prod = Q_IDX(q, q->prod); 133 cons = Q_IDX(q, q->cons); 134 135 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons)) 136 space = (1 << q->max_n_shift) - (prod - cons); 137 else 138 space = cons - prod; 139 140 return space >= n; 141 } 142 143 static bool queue_full(struct arm_smmu_ll_queue *q) 144 { 145 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) && 146 Q_WRP(q, q->prod) != Q_WRP(q, q->cons); 147 } 148 149 static bool queue_empty(struct arm_smmu_ll_queue *q) 150 { 151 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) && 152 Q_WRP(q, q->prod) == Q_WRP(q, q->cons); 153 } 154 155 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod) 156 { 157 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) && 158 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) || 159 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) && 160 (Q_IDX(q, q->cons) <= Q_IDX(q, prod))); 161 } 162 163 static void queue_sync_cons_out(struct arm_smmu_queue *q) 164 { 165 /* 166 * Ensure that all CPU accesses (reads and writes) to the queue 167 * are complete before we update the cons pointer. 168 */ 169 __iomb(); 170 writel_relaxed(q->llq.cons, q->cons_reg); 171 } 172 173 static void queue_inc_cons(struct arm_smmu_ll_queue *q) 174 { 175 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1; 176 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); 177 } 178 179 static void queue_sync_cons_ovf(struct arm_smmu_queue *q) 180 { 181 struct arm_smmu_ll_queue *llq = &q->llq; 182 183 if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons))) 184 return; 185 186 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | 187 Q_IDX(llq, llq->cons); 188 queue_sync_cons_out(q); 189 } 190 191 static int queue_sync_prod_in(struct arm_smmu_queue *q) 192 { 193 u32 prod; 194 int ret = 0; 195 196 /* 197 * We can't use the _relaxed() variant here, as we must prevent 198 * speculative reads of the queue before we have determined that 199 * prod has indeed moved. 200 */ 201 prod = readl(q->prod_reg); 202 203 if (Q_OVF(prod) != Q_OVF(q->llq.prod)) 204 ret = -EOVERFLOW; 205 206 q->llq.prod = prod; 207 return ret; 208 } 209 210 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n) 211 { 212 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n; 213 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod); 214 } 215 216 static void queue_poll_init(struct arm_smmu_device *smmu, 217 struct arm_smmu_queue_poll *qp) 218 { 219 qp->delay = 1; 220 qp->spin_cnt = 0; 221 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); 222 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US); 223 } 224 225 static int queue_poll(struct arm_smmu_queue_poll *qp) 226 { 227 if (ktime_compare(ktime_get(), qp->timeout) > 0) 228 return -ETIMEDOUT; 229 230 if (qp->wfe) { 231 wfe(); 232 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) { 233 cpu_relax(); 234 } else { 235 udelay(qp->delay); 236 qp->delay *= 2; 237 qp->spin_cnt = 0; 238 } 239 240 return 0; 241 } 242 243 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords) 244 { 245 int i; 246 247 for (i = 0; i < n_dwords; ++i) 248 *dst++ = cpu_to_le64(*src++); 249 } 250 251 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords) 252 { 253 int i; 254 255 for (i = 0; i < n_dwords; ++i) 256 *dst++ = le64_to_cpu(*src++); 257 } 258 259 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent) 260 { 261 if (queue_empty(&q->llq)) 262 return -EAGAIN; 263 264 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords); 265 queue_inc_cons(&q->llq); 266 queue_sync_cons_out(q); 267 return 0; 268 } 269 270 /* High-level queue accessors */ 271 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) 272 { 273 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT); 274 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode); 275 276 switch (ent->opcode) { 277 case CMDQ_OP_TLBI_EL2_ALL: 278 case CMDQ_OP_TLBI_NSNH_ALL: 279 break; 280 case CMDQ_OP_PREFETCH_CFG: 281 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid); 282 break; 283 case CMDQ_OP_CFGI_CD: 284 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid); 285 fallthrough; 286 case CMDQ_OP_CFGI_STE: 287 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); 288 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf); 289 break; 290 case CMDQ_OP_CFGI_CD_ALL: 291 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); 292 break; 293 case CMDQ_OP_CFGI_ALL: 294 /* Cover the entire SID range */ 295 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); 296 break; 297 case CMDQ_OP_TLBI_NH_VA: 298 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 299 fallthrough; 300 case CMDQ_OP_TLBI_EL2_VA: 301 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); 302 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); 303 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 304 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); 305 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); 306 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); 307 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK; 308 break; 309 case CMDQ_OP_TLBI_S2_IPA: 310 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); 311 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); 312 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 313 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); 314 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); 315 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); 316 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK; 317 break; 318 case CMDQ_OP_TLBI_NH_ASID: 319 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 320 fallthrough; 321 case CMDQ_OP_TLBI_NH_ALL: 322 case CMDQ_OP_TLBI_S12_VMALL: 323 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 324 break; 325 case CMDQ_OP_TLBI_EL2_ASID: 326 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 327 break; 328 case CMDQ_OP_ATC_INV: 329 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); 330 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global); 331 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid); 332 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid); 333 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size); 334 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK; 335 break; 336 case CMDQ_OP_PRI_RESP: 337 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); 338 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid); 339 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid); 340 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid); 341 switch (ent->pri.resp) { 342 case PRI_RESP_DENY: 343 case PRI_RESP_FAIL: 344 case PRI_RESP_SUCC: 345 break; 346 default: 347 return -EINVAL; 348 } 349 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp); 350 break; 351 case CMDQ_OP_RESUME: 352 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid); 353 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp); 354 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag); 355 break; 356 case CMDQ_OP_CMD_SYNC: 357 if (ent->sync.msiaddr) { 358 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ); 359 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK; 360 } else { 361 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); 362 } 363 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH); 364 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); 365 break; 366 default: 367 return -ENOENT; 368 } 369 370 return 0; 371 } 372 373 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu, 374 struct arm_smmu_cmdq_ent *ent) 375 { 376 struct arm_smmu_cmdq *cmdq = NULL; 377 378 if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq) 379 cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent); 380 381 return cmdq ?: &smmu->cmdq; 382 } 383 384 static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu, 385 struct arm_smmu_cmdq *cmdq) 386 { 387 if (cmdq == &smmu->cmdq) 388 return false; 389 390 return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV; 391 } 392 393 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, 394 struct arm_smmu_cmdq *cmdq, u32 prod) 395 { 396 struct arm_smmu_queue *q = &cmdq->q; 397 struct arm_smmu_cmdq_ent ent = { 398 .opcode = CMDQ_OP_CMD_SYNC, 399 }; 400 401 /* 402 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI 403 * payload, so the write will zero the entire command on that platform. 404 */ 405 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) { 406 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) * 407 q->ent_dwords * 8; 408 } 409 410 arm_smmu_cmdq_build_cmd(cmd, &ent); 411 if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) 412 u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS); 413 } 414 415 void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, 416 struct arm_smmu_cmdq *cmdq) 417 { 418 static const char * const cerror_str[] = { 419 [CMDQ_ERR_CERROR_NONE_IDX] = "No error", 420 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command", 421 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", 422 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout", 423 }; 424 struct arm_smmu_queue *q = &cmdq->q; 425 426 int i; 427 u64 cmd[CMDQ_ENT_DWORDS]; 428 u32 cons = readl_relaxed(q->cons_reg); 429 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons); 430 struct arm_smmu_cmdq_ent cmd_sync = { 431 .opcode = CMDQ_OP_CMD_SYNC, 432 }; 433 434 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons, 435 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown"); 436 437 switch (idx) { 438 case CMDQ_ERR_CERROR_ABT_IDX: 439 dev_err(smmu->dev, "retrying command fetch\n"); 440 return; 441 case CMDQ_ERR_CERROR_NONE_IDX: 442 return; 443 case CMDQ_ERR_CERROR_ATC_INV_IDX: 444 /* 445 * ATC Invalidation Completion timeout. CONS is still pointing 446 * at the CMD_SYNC. Attempt to complete other pending commands 447 * by repeating the CMD_SYNC, though we might well end up back 448 * here since the ATC invalidation may still be pending. 449 */ 450 return; 451 case CMDQ_ERR_CERROR_ILL_IDX: 452 default: 453 break; 454 } 455 456 /* 457 * We may have concurrent producers, so we need to be careful 458 * not to touch any of the shadow cmdq state. 459 */ 460 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords); 461 dev_err(smmu->dev, "skipping command in error state:\n"); 462 for (i = 0; i < ARRAY_SIZE(cmd); ++i) 463 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]); 464 465 /* Convert the erroneous command into a CMD_SYNC */ 466 arm_smmu_cmdq_build_cmd(cmd, &cmd_sync); 467 if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) 468 u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS); 469 470 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); 471 } 472 473 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) 474 { 475 __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq); 476 } 477 478 /* 479 * Command queue locking. 480 * This is a form of bastardised rwlock with the following major changes: 481 * 482 * - The only LOCK routines are exclusive_trylock() and shared_lock(). 483 * Neither have barrier semantics, and instead provide only a control 484 * dependency. 485 * 486 * - The UNLOCK routines are supplemented with shared_tryunlock(), which 487 * fails if the caller appears to be the last lock holder (yes, this is 488 * racy). All successful UNLOCK routines have RELEASE semantics. 489 */ 490 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq) 491 { 492 /* 493 * When held in exclusive state, the lock counter is set to INT_MIN 494 * so these increments won't hurt as the value will remain negative. 495 * The increment will also signal the exclusive locker that there are 496 * shared waiters. 497 */ 498 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0) 499 return; 500 501 /* 502 * Someone else is holding the lock in exclusive state, so wait 503 * for them to finish. Since we already incremented the lock counter, 504 * no exclusive lock can be acquired until we finish. We don't need 505 * the return value since we only care that the exclusive lock is 506 * released (i.e. the lock counter is non-negative). 507 * Once the exclusive locker releases the lock, the sign bit will 508 * be cleared and our increment will make the lock counter positive, 509 * allowing us to proceed. 510 */ 511 atomic_cond_read_relaxed(&cmdq->lock, VAL > 0); 512 } 513 514 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq) 515 { 516 (void)atomic_dec_return_release(&cmdq->lock); 517 } 518 519 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq) 520 { 521 if (atomic_read(&cmdq->lock) == 1) 522 return false; 523 524 arm_smmu_cmdq_shared_unlock(cmdq); 525 return true; 526 } 527 528 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \ 529 ({ \ 530 bool __ret; \ 531 local_irq_save(flags); \ 532 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \ 533 if (!__ret) \ 534 local_irq_restore(flags); \ 535 __ret; \ 536 }) 537 538 /* 539 * Only clear the sign bit when releasing the exclusive lock this will 540 * allow any shared_lock() waiters to proceed without the possibility 541 * of entering the exclusive lock in a tight loop. 542 */ 543 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \ 544 ({ \ 545 atomic_fetch_andnot_release(INT_MIN, &cmdq->lock); \ 546 local_irq_restore(flags); \ 547 }) 548 549 550 /* 551 * Command queue insertion. 552 * This is made fiddly by our attempts to achieve some sort of scalability 553 * since there is one queue shared amongst all of the CPUs in the system. If 554 * you like mixed-size concurrency, dependency ordering and relaxed atomics, 555 * then you'll *love* this monstrosity. 556 * 557 * The basic idea is to split the queue up into ranges of commands that are 558 * owned by a given CPU; the owner may not have written all of the commands 559 * itself, but is responsible for advancing the hardware prod pointer when 560 * the time comes. The algorithm is roughly: 561 * 562 * 1. Allocate some space in the queue. At this point we also discover 563 * whether the head of the queue is currently owned by another CPU, 564 * or whether we are the owner. 565 * 566 * 2. Write our commands into our allocated slots in the queue. 567 * 568 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map. 569 * 570 * 4. If we are an owner: 571 * a. Wait for the previous owner to finish. 572 * b. Mark the queue head as unowned, which tells us the range 573 * that we are responsible for publishing. 574 * c. Wait for all commands in our owned range to become valid. 575 * d. Advance the hardware prod pointer. 576 * e. Tell the next owner we've finished. 577 * 578 * 5. If we are inserting a CMD_SYNC (we may or may not have been an 579 * owner), then we need to stick around until it has completed: 580 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC 581 * to clear the first 4 bytes. 582 * b. Otherwise, we spin waiting for the hardware cons pointer to 583 * advance past our command. 584 * 585 * The devil is in the details, particularly the use of locking for handling 586 * SYNC completion and freeing up space in the queue before we think that it is 587 * full. 588 */ 589 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq, 590 u32 sprod, u32 eprod, bool set) 591 { 592 u32 swidx, sbidx, ewidx, ebidx; 593 struct arm_smmu_ll_queue llq = { 594 .max_n_shift = cmdq->q.llq.max_n_shift, 595 .prod = sprod, 596 }; 597 598 ewidx = BIT_WORD(Q_IDX(&llq, eprod)); 599 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG; 600 601 while (llq.prod != eprod) { 602 unsigned long mask; 603 atomic_long_t *ptr; 604 u32 limit = BITS_PER_LONG; 605 606 swidx = BIT_WORD(Q_IDX(&llq, llq.prod)); 607 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG; 608 609 ptr = &cmdq->valid_map[swidx]; 610 611 if ((swidx == ewidx) && (sbidx < ebidx)) 612 limit = ebidx; 613 614 mask = GENMASK(limit - 1, sbidx); 615 616 /* 617 * The valid bit is the inverse of the wrap bit. This means 618 * that a zero-initialised queue is invalid and, after marking 619 * all entries as valid, they become invalid again when we 620 * wrap. 621 */ 622 if (set) { 623 atomic_long_xor(mask, ptr); 624 } else { /* Poll */ 625 unsigned long valid; 626 627 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask; 628 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid); 629 } 630 631 llq.prod = queue_inc_prod_n(&llq, limit - sbidx); 632 } 633 } 634 635 /* Mark all entries in the range [sprod, eprod) as valid */ 636 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq, 637 u32 sprod, u32 eprod) 638 { 639 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true); 640 } 641 642 /* Wait for all entries in the range [sprod, eprod) to become valid */ 643 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq, 644 u32 sprod, u32 eprod) 645 { 646 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false); 647 } 648 649 /* Wait for the command queue to become non-full */ 650 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, 651 struct arm_smmu_cmdq *cmdq, 652 struct arm_smmu_ll_queue *llq) 653 { 654 unsigned long flags; 655 struct arm_smmu_queue_poll qp; 656 int ret = 0; 657 658 /* 659 * Try to update our copy of cons by grabbing exclusive cmdq access. If 660 * that fails, spin until somebody else updates it for us. 661 */ 662 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) { 663 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg)); 664 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags); 665 llq->val = READ_ONCE(cmdq->q.llq.val); 666 return 0; 667 } 668 669 queue_poll_init(smmu, &qp); 670 do { 671 llq->val = READ_ONCE(cmdq->q.llq.val); 672 if (!queue_full(llq)) 673 break; 674 675 ret = queue_poll(&qp); 676 } while (!ret); 677 678 return ret; 679 } 680 681 /* 682 * Wait until the SMMU signals a CMD_SYNC completion MSI. 683 * Must be called with the cmdq lock held in some capacity. 684 */ 685 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, 686 struct arm_smmu_cmdq *cmdq, 687 struct arm_smmu_ll_queue *llq) 688 { 689 int ret = 0; 690 struct arm_smmu_queue_poll qp; 691 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod)); 692 693 queue_poll_init(smmu, &qp); 694 695 /* 696 * The MSI won't generate an event, since it's being written back 697 * into the command queue. 698 */ 699 qp.wfe = false; 700 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp))); 701 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1); 702 return ret; 703 } 704 705 /* 706 * Wait until the SMMU cons index passes llq->prod. 707 * Must be called with the cmdq lock held in some capacity. 708 */ 709 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, 710 struct arm_smmu_cmdq *cmdq, 711 struct arm_smmu_ll_queue *llq) 712 { 713 struct arm_smmu_queue_poll qp; 714 u32 prod = llq->prod; 715 int ret = 0; 716 717 queue_poll_init(smmu, &qp); 718 llq->val = READ_ONCE(cmdq->q.llq.val); 719 do { 720 if (queue_consumed(llq, prod)) 721 break; 722 723 ret = queue_poll(&qp); 724 725 /* 726 * This needs to be a readl() so that our subsequent call 727 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately. 728 * 729 * Specifically, we need to ensure that we observe all 730 * shared_lock()s by other CMD_SYNCs that share our owner, 731 * so that a failing call to tryunlock() means that we're 732 * the last one out and therefore we can safely advance 733 * cmdq->q.llq.cons. Roughly speaking: 734 * 735 * CPU 0 CPU1 CPU2 (us) 736 * 737 * if (sync) 738 * shared_lock(); 739 * 740 * dma_wmb(); 741 * set_valid_map(); 742 * 743 * if (owner) { 744 * poll_valid_map(); 745 * <control dependency> 746 * writel(prod_reg); 747 * 748 * readl(cons_reg); 749 * tryunlock(); 750 * 751 * Requires us to see CPU 0's shared_lock() acquisition. 752 */ 753 llq->cons = readl(cmdq->q.cons_reg); 754 } while (!ret); 755 756 return ret; 757 } 758 759 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, 760 struct arm_smmu_cmdq *cmdq, 761 struct arm_smmu_ll_queue *llq) 762 { 763 if (smmu->options & ARM_SMMU_OPT_MSIPOLL && 764 !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) 765 return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); 766 767 return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); 768 } 769 770 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, 771 u32 prod, int n) 772 { 773 int i; 774 struct arm_smmu_ll_queue llq = { 775 .max_n_shift = cmdq->q.llq.max_n_shift, 776 .prod = prod, 777 }; 778 779 for (i = 0; i < n; ++i) { 780 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS]; 781 782 prod = queue_inc_prod_n(&llq, i); 783 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS); 784 } 785 } 786 787 /* 788 * This is the actual insertion function, and provides the following 789 * ordering guarantees to callers: 790 * 791 * - There is a dma_wmb() before publishing any commands to the queue. 792 * This can be relied upon to order prior writes to data structures 793 * in memory (such as a CD or an STE) before the command. 794 * 795 * - On completion of a CMD_SYNC, there is a control dependency. 796 * This can be relied upon to order subsequent writes to memory (e.g. 797 * freeing an IOVA) after completion of the CMD_SYNC. 798 * 799 * - Command insertion is totally ordered, so if two CPUs each race to 800 * insert their own list of commands then all of the commands from one 801 * CPU will appear before any of the commands from the other CPU. 802 */ 803 int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, 804 struct arm_smmu_cmdq *cmdq, u64 *cmds, int n, 805 bool sync) 806 { 807 u64 cmd_sync[CMDQ_ENT_DWORDS]; 808 u32 prod; 809 unsigned long flags; 810 bool owner; 811 struct arm_smmu_ll_queue llq, head; 812 int ret = 0; 813 814 llq.max_n_shift = cmdq->q.llq.max_n_shift; 815 816 /* 1. Allocate some space in the queue */ 817 local_irq_save(flags); 818 llq.val = READ_ONCE(cmdq->q.llq.val); 819 do { 820 u64 old; 821 822 while (!queue_has_space(&llq, n + sync)) { 823 local_irq_restore(flags); 824 if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq)) 825 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); 826 local_irq_save(flags); 827 } 828 829 head.cons = llq.cons; 830 head.prod = queue_inc_prod_n(&llq, n + sync) | 831 CMDQ_PROD_OWNED_FLAG; 832 833 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val); 834 if (old == llq.val) 835 break; 836 837 llq.val = old; 838 } while (1); 839 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG); 840 head.prod &= ~CMDQ_PROD_OWNED_FLAG; 841 llq.prod &= ~CMDQ_PROD_OWNED_FLAG; 842 843 /* 844 * 2. Write our commands into the queue 845 * Dependency ordering from the cmpxchg() loop above. 846 */ 847 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); 848 if (sync) { 849 prod = queue_inc_prod_n(&llq, n); 850 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod); 851 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); 852 853 /* 854 * In order to determine completion of our CMD_SYNC, we must 855 * ensure that the queue can't wrap twice without us noticing. 856 * We achieve that by taking the cmdq lock as shared before 857 * marking our slot as valid. 858 */ 859 arm_smmu_cmdq_shared_lock(cmdq); 860 } 861 862 /* 3. Mark our slots as valid, ensuring commands are visible first */ 863 dma_wmb(); 864 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod); 865 866 /* 4. If we are the owner, take control of the SMMU hardware */ 867 if (owner) { 868 /* a. Wait for previous owner to finish */ 869 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod); 870 871 /* b. Stop gathering work by clearing the owned flag */ 872 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG, 873 &cmdq->q.llq.atomic.prod); 874 prod &= ~CMDQ_PROD_OWNED_FLAG; 875 876 /* 877 * c. Wait for any gathered work to be written to the queue. 878 * Note that we read our own entries so that we have the control 879 * dependency required by (d). 880 */ 881 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod); 882 883 /* 884 * d. Advance the hardware prod pointer 885 * Control dependency ordering from the entries becoming valid. 886 */ 887 writel_relaxed(prod, cmdq->q.prod_reg); 888 889 /* 890 * e. Tell the next owner we're done 891 * Make sure we've updated the hardware first, so that we don't 892 * race to update prod and potentially move it backwards. 893 */ 894 atomic_set_release(&cmdq->owner_prod, prod); 895 } 896 897 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ 898 if (sync) { 899 llq.prod = queue_inc_prod_n(&llq, n); 900 ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq); 901 if (ret) { 902 dev_err_ratelimited(smmu->dev, 903 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", 904 llq.prod, 905 readl_relaxed(cmdq->q.prod_reg), 906 readl_relaxed(cmdq->q.cons_reg)); 907 } 908 909 /* 910 * Try to unlock the cmdq lock. This will fail if we're the last 911 * reader, in which case we can safely update cmdq->q.llq.cons 912 */ 913 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) { 914 WRITE_ONCE(cmdq->q.llq.cons, llq.cons); 915 arm_smmu_cmdq_shared_unlock(cmdq); 916 } 917 } 918 919 local_irq_restore(flags); 920 return ret; 921 } 922 923 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, 924 struct arm_smmu_cmdq_ent *ent, 925 bool sync) 926 { 927 u64 cmd[CMDQ_ENT_DWORDS]; 928 929 if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) { 930 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", 931 ent->opcode); 932 return -EINVAL; 933 } 934 935 return arm_smmu_cmdq_issue_cmdlist( 936 smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync); 937 } 938 939 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, 940 struct arm_smmu_cmdq_ent *ent) 941 { 942 return __arm_smmu_cmdq_issue_cmd(smmu, ent, false); 943 } 944 945 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, 946 struct arm_smmu_cmdq_ent *ent) 947 { 948 return __arm_smmu_cmdq_issue_cmd(smmu, ent, true); 949 } 950 951 static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu, 952 struct arm_smmu_cmdq_batch *cmds, 953 struct arm_smmu_cmdq_ent *ent) 954 { 955 cmds->num = 0; 956 cmds->cmdq = arm_smmu_get_cmdq(smmu, ent); 957 } 958 959 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, 960 struct arm_smmu_cmdq_batch *cmds, 961 struct arm_smmu_cmdq_ent *cmd) 962 { 963 bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd); 964 bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) && 965 (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC); 966 int index; 967 968 if (force_sync || unsupported_cmd) { 969 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, 970 cmds->num, true); 971 arm_smmu_cmdq_batch_init(smmu, cmds, cmd); 972 } 973 974 if (cmds->num == CMDQ_BATCH_ENTRIES) { 975 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, 976 cmds->num, false); 977 arm_smmu_cmdq_batch_init(smmu, cmds, cmd); 978 } 979 980 index = cmds->num * CMDQ_ENT_DWORDS; 981 if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) { 982 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", 983 cmd->opcode); 984 return; 985 } 986 987 cmds->num++; 988 } 989 990 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, 991 struct arm_smmu_cmdq_batch *cmds) 992 { 993 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, 994 cmds->num, true); 995 } 996 997 static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused, 998 struct iommu_page_response *resp) 999 { 1000 struct arm_smmu_cmdq_ent cmd = {0}; 1001 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 1002 int sid = master->streams[0].id; 1003 1004 if (WARN_ON(!master->stall_enabled)) 1005 return; 1006 1007 cmd.opcode = CMDQ_OP_RESUME; 1008 cmd.resume.sid = sid; 1009 cmd.resume.stag = resp->grpid; 1010 switch (resp->code) { 1011 case IOMMU_PAGE_RESP_INVALID: 1012 case IOMMU_PAGE_RESP_FAILURE: 1013 cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT; 1014 break; 1015 case IOMMU_PAGE_RESP_SUCCESS: 1016 cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY; 1017 break; 1018 default: 1019 break; 1020 } 1021 1022 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); 1023 /* 1024 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP. 1025 * RESUME consumption guarantees that the stalled transaction will be 1026 * terminated... at some point in the future. PRI_RESP is fire and 1027 * forget. 1028 */ 1029 } 1030 1031 /* Invalidation array manipulation functions */ 1032 static inline struct arm_smmu_inv * 1033 arm_smmu_invs_iter_next(struct arm_smmu_invs *invs, size_t next, size_t *idx) 1034 { 1035 while (true) { 1036 if (next >= invs->num_invs) { 1037 *idx = next; 1038 return NULL; 1039 } 1040 if (!READ_ONCE(invs->inv[next].users)) { 1041 next++; 1042 continue; 1043 } 1044 *idx = next; 1045 return &invs->inv[next]; 1046 } 1047 } 1048 1049 /** 1050 * arm_smmu_invs_for_each_entry - Iterate over all non-trash entries in invs 1051 * @invs: the base invalidation array 1052 * @idx: a stack variable of 'size_t', to store the array index 1053 * @cur: a stack variable of 'struct arm_smmu_inv *' 1054 */ 1055 #define arm_smmu_invs_for_each_entry(invs, idx, cur) \ 1056 for (cur = arm_smmu_invs_iter_next(invs, 0, &(idx)); cur; \ 1057 cur = arm_smmu_invs_iter_next(invs, idx + 1, &(idx))) 1058 1059 static int arm_smmu_inv_cmp(const struct arm_smmu_inv *inv_l, 1060 const struct arm_smmu_inv *inv_r) 1061 { 1062 if (inv_l->smmu != inv_r->smmu) 1063 return cmp_int((uintptr_t)inv_l->smmu, (uintptr_t)inv_r->smmu); 1064 if (inv_l->type != inv_r->type) 1065 return cmp_int(inv_l->type, inv_r->type); 1066 if (inv_l->id != inv_r->id) 1067 return cmp_int(inv_l->id, inv_r->id); 1068 if (arm_smmu_inv_is_ats(inv_l)) 1069 return cmp_int(inv_l->ssid, inv_r->ssid); 1070 return 0; 1071 } 1072 1073 static inline int arm_smmu_invs_iter_next_cmp(struct arm_smmu_invs *invs_l, 1074 size_t next_l, size_t *idx_l, 1075 struct arm_smmu_invs *invs_r, 1076 size_t next_r, size_t *idx_r) 1077 { 1078 struct arm_smmu_inv *cur_l = 1079 arm_smmu_invs_iter_next(invs_l, next_l, idx_l); 1080 1081 /* 1082 * We have to update the idx_r manually, because the invs_r cannot call 1083 * arm_smmu_invs_iter_next() as the invs_r never sets any users counter. 1084 */ 1085 *idx_r = next_r; 1086 1087 /* 1088 * Compare of two sorted arrays items. If one side is past the end of 1089 * the array, return the other side to let it run out the iteration. 1090 * 1091 * If the left entry is empty, return 1 to pick the right entry. 1092 * If the right entry is empty, return -1 to pick the left entry. 1093 */ 1094 if (!cur_l) 1095 return 1; 1096 if (next_r >= invs_r->num_invs) 1097 return -1; 1098 return arm_smmu_inv_cmp(cur_l, &invs_r->inv[next_r]); 1099 } 1100 1101 /** 1102 * arm_smmu_invs_for_each_cmp - Iterate over two sorted arrays computing for 1103 * arm_smmu_invs_merge() or arm_smmu_invs_unref() 1104 * @invs_l: the base invalidation array 1105 * @idx_l: a stack variable of 'size_t', to store the base array index 1106 * @invs_r: the build_invs array as to_merge or to_unref 1107 * @idx_r: a stack variable of 'size_t', to store the build_invs index 1108 * @cmp: a stack variable of 'int', to store return value (-1, 0, or 1) 1109 */ 1110 #define arm_smmu_invs_for_each_cmp(invs_l, idx_l, invs_r, idx_r, cmp) \ 1111 for (idx_l = idx_r = 0, \ 1112 cmp = arm_smmu_invs_iter_next_cmp(invs_l, 0, &(idx_l), \ 1113 invs_r, 0, &(idx_r)); \ 1114 idx_l < invs_l->num_invs || idx_r < invs_r->num_invs; \ 1115 cmp = arm_smmu_invs_iter_next_cmp( \ 1116 invs_l, idx_l + (cmp <= 0 ? 1 : 0), &(idx_l), \ 1117 invs_r, idx_r + (cmp >= 0 ? 1 : 0), &(idx_r))) 1118 1119 /** 1120 * arm_smmu_invs_merge() - Merge @to_merge into @invs and generate a new array 1121 * @invs: the base invalidation array 1122 * @to_merge: an array of invalidations to merge 1123 * 1124 * Return: a newly allocated array on success, or ERR_PTR 1125 * 1126 * This function must be locked and serialized with arm_smmu_invs_unref() and 1127 * arm_smmu_invs_purge(), but do not lockdep on any lock for KUNIT test. 1128 * 1129 * Both @invs and @to_merge must be sorted, to ensure the returned array will be 1130 * sorted as well. 1131 * 1132 * Caller is responsible for freeing the @invs and the returned new one. 1133 * 1134 * Entries marked as trash will be purged in the returned array. 1135 */ 1136 VISIBLE_IF_KUNIT 1137 struct arm_smmu_invs *arm_smmu_invs_merge(struct arm_smmu_invs *invs, 1138 struct arm_smmu_invs *to_merge) 1139 { 1140 struct arm_smmu_invs *new_invs; 1141 struct arm_smmu_inv *new; 1142 size_t num_invs = 0; 1143 size_t i, j; 1144 int cmp; 1145 1146 arm_smmu_invs_for_each_cmp(invs, i, to_merge, j, cmp) 1147 num_invs++; 1148 1149 new_invs = arm_smmu_invs_alloc(num_invs); 1150 if (!new_invs) 1151 return ERR_PTR(-ENOMEM); 1152 1153 new = new_invs->inv; 1154 arm_smmu_invs_for_each_cmp(invs, i, to_merge, j, cmp) { 1155 if (cmp < 0) { 1156 *new = invs->inv[i]; 1157 } else if (cmp == 0) { 1158 *new = invs->inv[i]; 1159 WRITE_ONCE(new->users, READ_ONCE(new->users) + 1); 1160 } else { 1161 *new = to_merge->inv[j]; 1162 WRITE_ONCE(new->users, 1); 1163 } 1164 1165 /* 1166 * Check that the new array is sorted. This also validates that 1167 * to_merge is sorted. 1168 */ 1169 if (new != new_invs->inv) 1170 WARN_ON_ONCE(arm_smmu_inv_cmp(new - 1, new) == 1); 1171 if (arm_smmu_inv_is_ats(new)) 1172 new_invs->has_ats = true; 1173 new++; 1174 } 1175 1176 WARN_ON(new != new_invs->inv + new_invs->num_invs); 1177 1178 return new_invs; 1179 } 1180 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_merge); 1181 1182 /** 1183 * arm_smmu_invs_unref() - Find in @invs for all entries in @to_unref, decrease 1184 * the user counts without deletions 1185 * @invs: the base invalidation array 1186 * @to_unref: an array of invalidations to decrease their user counts 1187 * 1188 * Return: the number of trash entries in the array, for arm_smmu_invs_purge() 1189 * 1190 * This function will not fail. Any entry with users=0 will be marked as trash, 1191 * and caller will be notified about the trashed entry via @to_unref by setting 1192 * a users=0. 1193 * 1194 * All tailing trash entries in the array will be dropped. And the size of the 1195 * array will be trimmed properly. All trash entries in-between will remain in 1196 * the @invs until being completely deleted by the next arm_smmu_invs_merge() 1197 * or an arm_smmu_invs_purge() function call. 1198 * 1199 * This function must be locked and serialized with arm_smmu_invs_merge() and 1200 * arm_smmu_invs_purge(), but do not lockdep on any mutex for KUNIT test. 1201 * 1202 * Note that the final @invs->num_invs might not reflect the actual number of 1203 * invalidations due to trash entries. Any reader should take the read lock to 1204 * iterate each entry and check its users counter till the last entry. 1205 */ 1206 VISIBLE_IF_KUNIT 1207 void arm_smmu_invs_unref(struct arm_smmu_invs *invs, 1208 struct arm_smmu_invs *to_unref) 1209 { 1210 unsigned long flags; 1211 size_t num_invs = 0; 1212 size_t i, j; 1213 int cmp; 1214 1215 arm_smmu_invs_for_each_cmp(invs, i, to_unref, j, cmp) { 1216 if (cmp < 0) { 1217 /* not found in to_unref, leave alone */ 1218 num_invs = i + 1; 1219 } else if (cmp == 0) { 1220 int users = READ_ONCE(invs->inv[i].users) - 1; 1221 1222 if (WARN_ON(users < 0)) 1223 continue; 1224 1225 /* same item */ 1226 WRITE_ONCE(invs->inv[i].users, users); 1227 if (users) { 1228 WRITE_ONCE(to_unref->inv[j].users, 1); 1229 num_invs = i + 1; 1230 continue; 1231 } 1232 1233 /* Notify the caller about the trash entry */ 1234 WRITE_ONCE(to_unref->inv[j].users, 0); 1235 invs->num_trashes++; 1236 } else { 1237 /* item in to_unref is not in invs or already a trash */ 1238 WARN_ON(true); 1239 } 1240 } 1241 1242 /* Exclude any tailing trash */ 1243 invs->num_trashes -= invs->num_invs - num_invs; 1244 1245 /* The lock is required to fence concurrent ATS operations. */ 1246 write_lock_irqsave(&invs->rwlock, flags); 1247 WRITE_ONCE(invs->num_invs, num_invs); /* Remove tailing trash entries */ 1248 write_unlock_irqrestore(&invs->rwlock, flags); 1249 } 1250 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_unref); 1251 1252 /** 1253 * arm_smmu_invs_purge() - Purge all the trash entries in the @invs 1254 * @invs: the base invalidation array 1255 * 1256 * Return: a newly allocated array on success removing all the trash entries, or 1257 * NULL if there is no trash entry in the array or if allocation failed 1258 * 1259 * This function must be locked and serialized with arm_smmu_invs_merge() and 1260 * arm_smmu_invs_unref(), but do not lockdep on any lock for KUNIT test. 1261 * 1262 * Caller is responsible for freeing the @invs and the returned new one. 1263 */ 1264 VISIBLE_IF_KUNIT 1265 struct arm_smmu_invs *arm_smmu_invs_purge(struct arm_smmu_invs *invs) 1266 { 1267 struct arm_smmu_invs *new_invs; 1268 struct arm_smmu_inv *inv; 1269 size_t i, num_invs = 0; 1270 1271 if (WARN_ON(invs->num_invs < invs->num_trashes)) 1272 return NULL; 1273 if (!invs->num_invs || !invs->num_trashes) 1274 return NULL; 1275 1276 new_invs = arm_smmu_invs_alloc(invs->num_invs - invs->num_trashes); 1277 if (!new_invs) 1278 return NULL; 1279 1280 arm_smmu_invs_for_each_entry(invs, i, inv) { 1281 new_invs->inv[num_invs] = *inv; 1282 if (arm_smmu_inv_is_ats(inv)) 1283 new_invs->has_ats = true; 1284 num_invs++; 1285 } 1286 1287 WARN_ON(num_invs != new_invs->num_invs); 1288 return new_invs; 1289 } 1290 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_purge); 1291 1292 /* Context descriptor manipulation functions */ 1293 1294 /* 1295 * Based on the value of ent report which bits of the STE the HW will access. It 1296 * would be nice if this was complete according to the spec, but minimally it 1297 * has to capture the bits this driver uses. 1298 */ 1299 VISIBLE_IF_KUNIT 1300 void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) 1301 { 1302 unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0])); 1303 1304 used_bits[0] = cpu_to_le64(STRTAB_STE_0_V); 1305 if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V))) 1306 return; 1307 1308 used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG); 1309 1310 /* S1 translates */ 1311 if (cfg & BIT(0)) { 1312 used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT | 1313 STRTAB_STE_0_S1CTXPTR_MASK | 1314 STRTAB_STE_0_S1CDMAX); 1315 used_bits[1] |= 1316 cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | 1317 STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | 1318 STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | 1319 STRTAB_STE_1_EATS | STRTAB_STE_1_MEV); 1320 used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); 1321 1322 /* 1323 * See 13.5 Summary of attribute/permission configuration fields 1324 * for the SHCFG behavior. 1325 */ 1326 if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) == 1327 STRTAB_STE_1_S1DSS_BYPASS) 1328 used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); 1329 } 1330 1331 /* S2 translates */ 1332 if (cfg & BIT(1)) { 1333 used_bits[1] |= 1334 cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS | 1335 STRTAB_STE_1_SHCFG | STRTAB_STE_1_MEV); 1336 used_bits[2] |= 1337 cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | 1338 STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | 1339 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S | 1340 STRTAB_STE_2_S2R); 1341 used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK); 1342 } 1343 1344 if (cfg == STRTAB_STE_0_CFG_BYPASS) 1345 used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); 1346 } 1347 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used); 1348 1349 VISIBLE_IF_KUNIT 1350 void arm_smmu_get_ste_update_safe(const __le64 *cur, const __le64 *target, 1351 __le64 *safe_bits) 1352 { 1353 const __le64 eats_s1chk = 1354 FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_S1CHK); 1355 const __le64 eats_trans = 1356 FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS); 1357 1358 /* 1359 * When an STE changes EATS_TRANS, the sequencing code in the attach 1360 * logic already will have the PCI cap for ATS disabled. Thus at this 1361 * moment we can expect that the device will not generate ATS queries 1362 * and so we don't care about the sequencing of EATS. The purpose of 1363 * EATS_TRANS is to protect the system from hostile untrusted devices 1364 * that issue ATS when the PCI config space is disabled. However, if 1365 * EATS_TRANS is being changed, then we must have already trusted the 1366 * device as the EATS_TRANS security block is being disabled. 1367 * 1368 * Note: now the EATS_TRANS update is moved to the first entry_set(). 1369 * Changing S2S and EATS might transiently result in S2S=1 and EATS=1 1370 * which is a bad STE (see "5.2 Stream Table Entry"). In such a case, 1371 * we can't do a hitless update. Also, it should not be added to the 1372 * safe bits with STRTAB_STE_1_EATS_S1CHK, because EATS=0b11 would be 1373 * effectively an errant 0b00 configuration. 1374 */ 1375 if (!((cur[1] | target[1]) & cpu_to_le64(eats_s1chk)) && 1376 !((cur[2] | target[2]) & cpu_to_le64(STRTAB_STE_2_S2S))) 1377 safe_bits[1] |= cpu_to_le64(eats_trans); 1378 1379 /* 1380 * MEV does not meaningfully impact the operation of the HW, it only 1381 * changes how many fault events are generated, thus we can relax it 1382 * when computing the ordering. The spec notes the device can act like 1383 * MEV=1 anyhow: 1384 * 1385 * Note: Software must expect, and be able to deal with, coalesced 1386 * fault records even when MEV == 0. 1387 */ 1388 safe_bits[1] |= cpu_to_le64(STRTAB_STE_1_MEV); 1389 } 1390 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_update_safe); 1391 1392 /* 1393 * Figure out if we can do a hitless update of entry to become target. Returns a 1394 * bit mask where 1 indicates that qword needs to be set disruptively. 1395 * unused_update is an intermediate value of entry that has unused bits set to 1396 * their new values. 1397 */ 1398 static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer, 1399 const __le64 *entry, const __le64 *target, 1400 __le64 *unused_update) 1401 { 1402 __le64 target_used[NUM_ENTRY_QWORDS] = {}; 1403 __le64 cur_used[NUM_ENTRY_QWORDS] = {}; 1404 __le64 safe[NUM_ENTRY_QWORDS] = {}; 1405 u8 used_qword_diff = 0; 1406 unsigned int i; 1407 1408 writer->ops->get_used(entry, cur_used); 1409 writer->ops->get_used(target, target_used); 1410 if (writer->ops->get_update_safe) 1411 writer->ops->get_update_safe(entry, target, safe); 1412 1413 for (i = 0; i != NUM_ENTRY_QWORDS; i++) { 1414 /* 1415 * Safe is only used for bits that are used by both entries, 1416 * otherwise it is sequenced according to the unused entry. 1417 */ 1418 safe[i] &= target_used[i] & cur_used[i]; 1419 1420 /* 1421 * Check that masks are up to date, the make functions are not 1422 * allowed to set a bit to 1 if the used function doesn't say it 1423 * is used. 1424 */ 1425 WARN_ON_ONCE(target[i] & ~target_used[i]); 1426 1427 /* Bits can change because they are not currently being used */ 1428 cur_used[i] &= ~safe[i]; 1429 unused_update[i] = (entry[i] & cur_used[i]) | 1430 (target[i] & ~cur_used[i]); 1431 /* 1432 * Each bit indicates that a used bit in a qword needs to be 1433 * changed after unused_update is applied. 1434 */ 1435 if ((unused_update[i] & target_used[i]) != target[i]) 1436 used_qword_diff |= 1 << i; 1437 } 1438 return used_qword_diff; 1439 } 1440 1441 static void entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry, 1442 const __le64 *target, unsigned int start, 1443 unsigned int len) 1444 { 1445 bool changed = false; 1446 unsigned int i; 1447 1448 for (i = start; len != 0; len--, i++) { 1449 if (entry[i] != target[i]) { 1450 WRITE_ONCE(entry[i], target[i]); 1451 changed = true; 1452 } 1453 } 1454 1455 if (changed) 1456 writer->ops->sync(writer); 1457 } 1458 1459 /* 1460 * Update the STE/CD to the target configuration. The transition from the 1461 * current entry to the target entry takes place over multiple steps that 1462 * attempts to make the transition hitless if possible. This function takes care 1463 * not to create a situation where the HW can perceive a corrupted entry. HW is 1464 * only required to have a 64 bit atomicity with stores from the CPU, while 1465 * entries are many 64 bit values big. 1466 * 1467 * The difference between the current value and the target value is analyzed to 1468 * determine which of three updates are required - disruptive, hitless or no 1469 * change. 1470 * 1471 * In the most general disruptive case we can make any update in three steps: 1472 * - Disrupting the entry (V=0) 1473 * - Fill now unused qwords, execpt qword 0 which contains V 1474 * - Make qword 0 have the final value and valid (V=1) with a single 64 1475 * bit store 1476 * 1477 * However this disrupts the HW while it is happening. There are several 1478 * interesting cases where a STE/CD can be updated without disturbing the HW 1479 * because only a small number of bits are changing (S1DSS, CONFIG, etc) or 1480 * because the used bits don't intersect. We can detect this by calculating how 1481 * many 64 bit values need update after adjusting the unused bits and skip the 1482 * V=0 process. This relies on the IGNORED behavior described in the 1483 * specification. 1484 */ 1485 VISIBLE_IF_KUNIT 1486 void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry, 1487 const __le64 *target) 1488 { 1489 __le64 unused_update[NUM_ENTRY_QWORDS]; 1490 u8 used_qword_diff; 1491 1492 /* 1493 * Many of the entry structures have pointers to other structures that 1494 * need to have their updates be visible before any writes of the entry 1495 * happen. 1496 */ 1497 dma_wmb(); 1498 1499 used_qword_diff = 1500 arm_smmu_entry_qword_diff(writer, entry, target, unused_update); 1501 if (hweight8(used_qword_diff) == 1) { 1502 /* 1503 * Only one qword needs its used bits to be changed. This is a 1504 * hitless update, update all bits the current STE/CD is 1505 * ignoring to their new values, then update a single "critical 1506 * qword" to change the STE/CD and finally 0 out any bits that 1507 * are now unused in the target configuration. 1508 */ 1509 unsigned int critical_qword_index = ffs(used_qword_diff) - 1; 1510 1511 /* 1512 * Skip writing unused bits in the critical qword since we'll be 1513 * writing it in the next step anyways. This can save a sync 1514 * when the only change is in that qword. 1515 */ 1516 unused_update[critical_qword_index] = 1517 entry[critical_qword_index]; 1518 entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS); 1519 entry_set(writer, entry, target, critical_qword_index, 1); 1520 entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS); 1521 } else if (used_qword_diff) { 1522 /* 1523 * At least two qwords need their inuse bits to be changed. This 1524 * requires a breaking update, zero the V bit, write all qwords 1525 * but 0, then set qword 0 1526 */ 1527 unused_update[0] = 0; 1528 entry_set(writer, entry, unused_update, 0, 1); 1529 entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1); 1530 entry_set(writer, entry, target, 0, 1); 1531 } else { 1532 /* 1533 * No inuse bit changed, though safe bits may have changed. 1534 */ 1535 entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS); 1536 } 1537 } 1538 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry); 1539 1540 static void arm_smmu_sync_cd(struct arm_smmu_master *master, 1541 int ssid, bool leaf) 1542 { 1543 size_t i; 1544 struct arm_smmu_cmdq_batch cmds; 1545 struct arm_smmu_device *smmu = master->smmu; 1546 struct arm_smmu_cmdq_ent cmd = { 1547 .opcode = CMDQ_OP_CFGI_CD, 1548 .cfgi = { 1549 .ssid = ssid, 1550 .leaf = leaf, 1551 }, 1552 }; 1553 1554 arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd); 1555 for (i = 0; i < master->num_streams; i++) { 1556 cmd.cfgi.sid = master->streams[i].id; 1557 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); 1558 } 1559 1560 arm_smmu_cmdq_batch_submit(smmu, &cmds); 1561 } 1562 1563 static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst, 1564 dma_addr_t l2ptr_dma) 1565 { 1566 u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V; 1567 1568 /* The HW has 64 bit atomicity with stores to the L2 CD table */ 1569 WRITE_ONCE(dst->l2ptr, cpu_to_le64(val)); 1570 } 1571 1572 static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src) 1573 { 1574 return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK; 1575 } 1576 1577 struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, 1578 u32 ssid) 1579 { 1580 struct arm_smmu_cdtab_l2 *l2; 1581 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1582 1583 if (!arm_smmu_cdtab_allocated(cd_table)) 1584 return NULL; 1585 1586 if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) 1587 return &cd_table->linear.table[ssid]; 1588 1589 l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)]; 1590 if (!l2) 1591 return NULL; 1592 return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)]; 1593 } 1594 1595 static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, 1596 u32 ssid) 1597 { 1598 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1599 struct arm_smmu_device *smmu = master->smmu; 1600 1601 might_sleep(); 1602 iommu_group_mutex_assert(master->dev); 1603 1604 if (!arm_smmu_cdtab_allocated(cd_table)) { 1605 if (arm_smmu_alloc_cd_tables(master)) 1606 return NULL; 1607 } 1608 1609 if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) { 1610 unsigned int idx = arm_smmu_cdtab_l1_idx(ssid); 1611 struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx]; 1612 1613 if (!*l2ptr) { 1614 dma_addr_t l2ptr_dma; 1615 1616 *l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr), 1617 &l2ptr_dma, GFP_KERNEL); 1618 if (!*l2ptr) 1619 return NULL; 1620 1621 arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx], 1622 l2ptr_dma); 1623 /* An invalid L1CD can be cached */ 1624 arm_smmu_sync_cd(master, ssid, false); 1625 } 1626 } 1627 return arm_smmu_get_cd_ptr(master, ssid); 1628 } 1629 1630 struct arm_smmu_cd_writer { 1631 struct arm_smmu_entry_writer writer; 1632 unsigned int ssid; 1633 }; 1634 1635 VISIBLE_IF_KUNIT 1636 void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits) 1637 { 1638 used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V); 1639 if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V))) 1640 return; 1641 memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd)); 1642 1643 /* 1644 * If EPD0 is set by the make function it means 1645 * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED 1646 */ 1647 if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) { 1648 used_bits[0] &= ~cpu_to_le64( 1649 CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 | 1650 CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 | 1651 CTXDESC_CD_0_TCR_SH0); 1652 used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK); 1653 } 1654 } 1655 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used); 1656 1657 static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer) 1658 { 1659 struct arm_smmu_cd_writer *cd_writer = 1660 container_of(writer, struct arm_smmu_cd_writer, writer); 1661 1662 arm_smmu_sync_cd(writer->master, cd_writer->ssid, true); 1663 } 1664 1665 static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = { 1666 .sync = arm_smmu_cd_writer_sync_entry, 1667 .get_used = arm_smmu_get_cd_used, 1668 }; 1669 1670 void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, 1671 struct arm_smmu_cd *cdptr, 1672 const struct arm_smmu_cd *target) 1673 { 1674 bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V); 1675 bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V); 1676 struct arm_smmu_cd_writer cd_writer = { 1677 .writer = { 1678 .ops = &arm_smmu_cd_writer_ops, 1679 .master = master, 1680 }, 1681 .ssid = ssid, 1682 }; 1683 1684 if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) { 1685 if (cur_valid) 1686 master->cd_table.used_ssids--; 1687 else 1688 master->cd_table.used_ssids++; 1689 } 1690 1691 arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data); 1692 } 1693 1694 void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, 1695 struct arm_smmu_master *master, 1696 struct arm_smmu_domain *smmu_domain) 1697 { 1698 struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; 1699 const struct io_pgtable_cfg *pgtbl_cfg = 1700 &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg; 1701 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = 1702 &pgtbl_cfg->arm_lpae_s1_cfg.tcr; 1703 1704 memset(target, 0, sizeof(*target)); 1705 1706 target->data[0] = cpu_to_le64( 1707 FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | 1708 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) | 1709 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) | 1710 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | 1711 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | 1712 #ifdef __BIG_ENDIAN 1713 CTXDESC_CD_0_ENDI | 1714 #endif 1715 CTXDESC_CD_0_TCR_EPD1 | 1716 CTXDESC_CD_0_V | 1717 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | 1718 CTXDESC_CD_0_AA64 | 1719 (master->stall_enabled ? CTXDESC_CD_0_S : 0) | 1720 CTXDESC_CD_0_R | 1721 CTXDESC_CD_0_A | 1722 CTXDESC_CD_0_ASET | 1723 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) 1724 ); 1725 1726 /* To enable dirty flag update, set both Access flag and dirty state update */ 1727 if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD) 1728 target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA | 1729 CTXDESC_CD_0_TCR_HD); 1730 1731 target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr & 1732 CTXDESC_CD_1_TTB0_MASK); 1733 target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair); 1734 } 1735 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd); 1736 1737 void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid) 1738 { 1739 struct arm_smmu_cd target = {}; 1740 struct arm_smmu_cd *cdptr; 1741 1742 if (!arm_smmu_cdtab_allocated(&master->cd_table)) 1743 return; 1744 cdptr = arm_smmu_get_cd_ptr(master, ssid); 1745 if (WARN_ON(!cdptr)) 1746 return; 1747 arm_smmu_write_cd_entry(master, ssid, cdptr, &target); 1748 } 1749 1750 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) 1751 { 1752 int ret; 1753 size_t l1size; 1754 size_t max_contexts; 1755 struct arm_smmu_device *smmu = master->smmu; 1756 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1757 1758 cd_table->s1cdmax = master->ssid_bits; 1759 max_contexts = 1 << cd_table->s1cdmax; 1760 1761 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || 1762 max_contexts <= CTXDESC_L2_ENTRIES) { 1763 cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; 1764 cd_table->linear.num_ents = max_contexts; 1765 1766 l1size = max_contexts * sizeof(struct arm_smmu_cd); 1767 cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size, 1768 &cd_table->cdtab_dma, 1769 GFP_KERNEL); 1770 if (!cd_table->linear.table) 1771 return -ENOMEM; 1772 } else { 1773 cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; 1774 cd_table->l2.num_l1_ents = 1775 DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES); 1776 1777 cd_table->l2.l2ptrs = kzalloc_objs(*cd_table->l2.l2ptrs, 1778 cd_table->l2.num_l1_ents); 1779 if (!cd_table->l2.l2ptrs) 1780 return -ENOMEM; 1781 1782 l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1); 1783 cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size, 1784 &cd_table->cdtab_dma, 1785 GFP_KERNEL); 1786 if (!cd_table->l2.l1tab) { 1787 ret = -ENOMEM; 1788 goto err_free_l2ptrs; 1789 } 1790 } 1791 return 0; 1792 1793 err_free_l2ptrs: 1794 kfree(cd_table->l2.l2ptrs); 1795 cd_table->l2.l2ptrs = NULL; 1796 return ret; 1797 } 1798 1799 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) 1800 { 1801 int i; 1802 struct arm_smmu_device *smmu = master->smmu; 1803 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1804 1805 if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) { 1806 for (i = 0; i < cd_table->l2.num_l1_ents; i++) { 1807 if (!cd_table->l2.l2ptrs[i]) 1808 continue; 1809 1810 dma_free_coherent(smmu->dev, 1811 sizeof(*cd_table->l2.l2ptrs[i]), 1812 cd_table->l2.l2ptrs[i], 1813 arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i])); 1814 } 1815 kfree(cd_table->l2.l2ptrs); 1816 1817 dma_free_coherent(smmu->dev, 1818 cd_table->l2.num_l1_ents * 1819 sizeof(struct arm_smmu_cdtab_l1), 1820 cd_table->l2.l1tab, cd_table->cdtab_dma); 1821 } else { 1822 dma_free_coherent(smmu->dev, 1823 cd_table->linear.num_ents * 1824 sizeof(struct arm_smmu_cd), 1825 cd_table->linear.table, cd_table->cdtab_dma); 1826 } 1827 } 1828 1829 /* Stream table manipulation functions */ 1830 static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 *dst, 1831 dma_addr_t l2ptr_dma) 1832 { 1833 u64 val = 0; 1834 1835 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1); 1836 val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; 1837 1838 /* The HW has 64 bit atomicity with stores to the L2 STE table */ 1839 WRITE_ONCE(dst->l2ptr, cpu_to_le64(val)); 1840 } 1841 1842 struct arm_smmu_ste_writer { 1843 struct arm_smmu_entry_writer writer; 1844 u32 sid; 1845 }; 1846 1847 static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer) 1848 { 1849 struct arm_smmu_ste_writer *ste_writer = 1850 container_of(writer, struct arm_smmu_ste_writer, writer); 1851 struct arm_smmu_cmdq_ent cmd = { 1852 .opcode = CMDQ_OP_CFGI_STE, 1853 .cfgi = { 1854 .sid = ste_writer->sid, 1855 .leaf = true, 1856 }, 1857 }; 1858 1859 arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd); 1860 } 1861 1862 static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = { 1863 .sync = arm_smmu_ste_writer_sync_entry, 1864 .get_used = arm_smmu_get_ste_used, 1865 .get_update_safe = arm_smmu_get_ste_update_safe, 1866 }; 1867 1868 static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, 1869 struct arm_smmu_ste *ste, 1870 const struct arm_smmu_ste *target) 1871 { 1872 struct arm_smmu_device *smmu = master->smmu; 1873 struct arm_smmu_ste_writer ste_writer = { 1874 .writer = { 1875 .ops = &arm_smmu_ste_writer_ops, 1876 .master = master, 1877 }, 1878 .sid = sid, 1879 }; 1880 1881 arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data); 1882 1883 /* It's likely that we'll want to use the new STE soon */ 1884 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) { 1885 struct arm_smmu_cmdq_ent 1886 prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG, 1887 .prefetch = { 1888 .sid = sid, 1889 } }; 1890 1891 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); 1892 } 1893 } 1894 1895 void arm_smmu_make_abort_ste(struct arm_smmu_ste *target) 1896 { 1897 memset(target, 0, sizeof(*target)); 1898 target->data[0] = cpu_to_le64( 1899 STRTAB_STE_0_V | 1900 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT)); 1901 } 1902 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste); 1903 1904 VISIBLE_IF_KUNIT 1905 void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, 1906 struct arm_smmu_ste *target) 1907 { 1908 memset(target, 0, sizeof(*target)); 1909 target->data[0] = cpu_to_le64( 1910 STRTAB_STE_0_V | 1911 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS)); 1912 1913 if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) 1914 target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, 1915 STRTAB_STE_1_SHCFG_INCOMING)); 1916 } 1917 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste); 1918 1919 VISIBLE_IF_KUNIT 1920 void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, 1921 struct arm_smmu_master *master, bool ats_enabled, 1922 unsigned int s1dss) 1923 { 1924 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; 1925 struct arm_smmu_device *smmu = master->smmu; 1926 1927 memset(target, 0, sizeof(*target)); 1928 target->data[0] = cpu_to_le64( 1929 STRTAB_STE_0_V | 1930 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | 1931 FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) | 1932 (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | 1933 FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax)); 1934 1935 target->data[1] = cpu_to_le64( 1936 FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) | 1937 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | 1938 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | 1939 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | 1940 ((smmu->features & ARM_SMMU_FEAT_STALLS && 1941 !master->stall_enabled) ? 1942 STRTAB_STE_1_S1STALLD : 1943 0) | 1944 FIELD_PREP(STRTAB_STE_1_EATS, 1945 ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); 1946 1947 if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) && 1948 s1dss == STRTAB_STE_1_S1DSS_BYPASS) 1949 target->data[1] |= cpu_to_le64(FIELD_PREP( 1950 STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); 1951 1952 if (smmu->features & ARM_SMMU_FEAT_E2H) { 1953 /* 1954 * To support BTM the streamworld needs to match the 1955 * configuration of the CPU so that the ASID broadcasts are 1956 * properly matched. This means either S/NS-EL2-E2H (hypervisor) 1957 * or NS-EL1 (guest). Since an SVA domain can be installed in a 1958 * PASID this should always use a BTM compatible configuration 1959 * if the HW supports it. 1960 */ 1961 target->data[1] |= cpu_to_le64( 1962 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2)); 1963 } else { 1964 target->data[1] |= cpu_to_le64( 1965 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1)); 1966 1967 /* 1968 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see 1969 * arm_smmu_domain_alloc_id() 1970 */ 1971 target->data[2] = 1972 cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0)); 1973 } 1974 } 1975 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste); 1976 1977 void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, 1978 struct arm_smmu_master *master, 1979 struct arm_smmu_domain *smmu_domain, 1980 bool ats_enabled) 1981 { 1982 struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg; 1983 const struct io_pgtable_cfg *pgtbl_cfg = 1984 &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg; 1985 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr = 1986 &pgtbl_cfg->arm_lpae_s2_cfg.vtcr; 1987 u64 vtcr_val; 1988 struct arm_smmu_device *smmu = master->smmu; 1989 1990 memset(target, 0, sizeof(*target)); 1991 target->data[0] = cpu_to_le64( 1992 STRTAB_STE_0_V | 1993 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS)); 1994 1995 target->data[1] = cpu_to_le64( 1996 FIELD_PREP(STRTAB_STE_1_EATS, 1997 ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); 1998 1999 if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB) 2000 target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB); 2001 if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) 2002 target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, 2003 STRTAB_STE_1_SHCFG_INCOMING)); 2004 2005 vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) | 2006 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) | 2007 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) | 2008 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) | 2009 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) | 2010 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) | 2011 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps); 2012 target->data[2] = cpu_to_le64( 2013 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | 2014 FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) | 2015 STRTAB_STE_2_S2AA64 | 2016 #ifdef __BIG_ENDIAN 2017 STRTAB_STE_2_S2ENDI | 2018 #endif 2019 STRTAB_STE_2_S2PTW | 2020 (master->stall_enabled ? STRTAB_STE_2_S2S : 0) | 2021 STRTAB_STE_2_S2R); 2022 2023 target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr & 2024 STRTAB_STE_3_S2TTB_MASK); 2025 } 2026 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste); 2027 2028 /* 2029 * This can safely directly manipulate the STE memory without a sync sequence 2030 * because the STE table has not been installed in the SMMU yet. 2031 */ 2032 static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab, 2033 unsigned int nent) 2034 { 2035 unsigned int i; 2036 2037 for (i = 0; i < nent; ++i) { 2038 arm_smmu_make_abort_ste(strtab); 2039 strtab++; 2040 } 2041 } 2042 2043 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) 2044 { 2045 dma_addr_t l2ptr_dma; 2046 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 2047 struct arm_smmu_strtab_l2 **l2table; 2048 2049 l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]; 2050 if (*l2table) 2051 return 0; 2052 2053 *l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table), 2054 &l2ptr_dma, GFP_KERNEL); 2055 if (!*l2table) { 2056 dev_err(smmu->dev, 2057 "failed to allocate l2 stream table for SID %u\n", 2058 sid); 2059 return -ENOMEM; 2060 } 2061 2062 arm_smmu_init_initial_stes((*l2table)->stes, 2063 ARRAY_SIZE((*l2table)->stes)); 2064 arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)], 2065 l2ptr_dma); 2066 return 0; 2067 } 2068 2069 static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs) 2070 { 2071 struct arm_smmu_stream *stream_rhs = 2072 rb_entry(rhs, struct arm_smmu_stream, node); 2073 const u32 *sid_lhs = lhs; 2074 2075 if (*sid_lhs < stream_rhs->id) 2076 return -1; 2077 if (*sid_lhs > stream_rhs->id) 2078 return 1; 2079 return 0; 2080 } 2081 2082 static int arm_smmu_streams_cmp_node(struct rb_node *lhs, 2083 const struct rb_node *rhs) 2084 { 2085 return arm_smmu_streams_cmp_key( 2086 &rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs); 2087 } 2088 2089 static struct arm_smmu_master * 2090 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) 2091 { 2092 struct rb_node *node; 2093 2094 lockdep_assert_held(&smmu->streams_mutex); 2095 2096 node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key); 2097 if (!node) 2098 return NULL; 2099 return rb_entry(node, struct arm_smmu_stream, node)->master; 2100 } 2101 2102 /* IRQ and event handlers */ 2103 static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw, 2104 struct arm_smmu_event *event) 2105 { 2106 struct arm_smmu_master *master; 2107 2108 event->id = FIELD_GET(EVTQ_0_ID, raw[0]); 2109 event->sid = FIELD_GET(EVTQ_0_SID, raw[0]); 2110 event->ssv = FIELD_GET(EVTQ_0_SSV, raw[0]); 2111 event->ssid = event->ssv ? FIELD_GET(EVTQ_0_SSID, raw[0]) : IOMMU_NO_PASID; 2112 event->privileged = FIELD_GET(EVTQ_1_PnU, raw[1]); 2113 event->instruction = FIELD_GET(EVTQ_1_InD, raw[1]); 2114 event->s2 = FIELD_GET(EVTQ_1_S2, raw[1]); 2115 event->read = FIELD_GET(EVTQ_1_RnW, raw[1]); 2116 event->stag = FIELD_GET(EVTQ_1_STAG, raw[1]); 2117 event->stall = FIELD_GET(EVTQ_1_STALL, raw[1]); 2118 event->class = FIELD_GET(EVTQ_1_CLASS, raw[1]); 2119 event->iova = FIELD_GET(EVTQ_2_ADDR, raw[2]); 2120 event->ipa = raw[3] & EVTQ_3_IPA; 2121 event->fetch_addr = raw[3] & EVTQ_3_FETCH_ADDR; 2122 event->ttrnw = FIELD_GET(EVTQ_1_TT_READ, raw[1]); 2123 event->class_tt = false; 2124 event->dev = NULL; 2125 2126 if (event->id == EVT_ID_PERMISSION_FAULT) 2127 event->class_tt = (event->class == EVTQ_1_CLASS_TT); 2128 2129 mutex_lock(&smmu->streams_mutex); 2130 master = arm_smmu_find_master(smmu, event->sid); 2131 if (master) 2132 event->dev = get_device(master->dev); 2133 mutex_unlock(&smmu->streams_mutex); 2134 } 2135 2136 static int arm_smmu_handle_event(struct arm_smmu_device *smmu, u64 *evt, 2137 struct arm_smmu_event *event) 2138 { 2139 int ret = 0; 2140 u32 perm = 0; 2141 struct arm_smmu_master *master; 2142 struct iopf_fault fault_evt = { }; 2143 struct iommu_fault *flt = &fault_evt.fault; 2144 2145 switch (event->id) { 2146 case EVT_ID_BAD_STE_CONFIG: 2147 case EVT_ID_STREAM_DISABLED_FAULT: 2148 case EVT_ID_BAD_SUBSTREAMID_CONFIG: 2149 case EVT_ID_BAD_CD_CONFIG: 2150 case EVT_ID_TRANSLATION_FAULT: 2151 case EVT_ID_ADDR_SIZE_FAULT: 2152 case EVT_ID_ACCESS_FAULT: 2153 case EVT_ID_PERMISSION_FAULT: 2154 break; 2155 default: 2156 return -EOPNOTSUPP; 2157 } 2158 2159 if (event->stall) { 2160 if (event->read) 2161 perm |= IOMMU_FAULT_PERM_READ; 2162 else 2163 perm |= IOMMU_FAULT_PERM_WRITE; 2164 2165 if (event->instruction) 2166 perm |= IOMMU_FAULT_PERM_EXEC; 2167 2168 if (event->privileged) 2169 perm |= IOMMU_FAULT_PERM_PRIV; 2170 2171 flt->type = IOMMU_FAULT_PAGE_REQ; 2172 flt->prm = (struct iommu_fault_page_request){ 2173 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, 2174 .grpid = event->stag, 2175 .perm = perm, 2176 .addr = event->iova, 2177 }; 2178 2179 if (event->ssv) { 2180 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 2181 flt->prm.pasid = event->ssid; 2182 } 2183 } 2184 2185 mutex_lock(&smmu->streams_mutex); 2186 master = arm_smmu_find_master(smmu, event->sid); 2187 if (!master) { 2188 ret = -EINVAL; 2189 goto out_unlock; 2190 } 2191 2192 if (event->stall) 2193 ret = iommu_report_device_fault(master->dev, &fault_evt); 2194 else if (master->vmaster && !event->s2) 2195 ret = arm_vmaster_report_event(master->vmaster, evt); 2196 else 2197 ret = -EOPNOTSUPP; /* Unhandled events should be pinned */ 2198 out_unlock: 2199 mutex_unlock(&smmu->streams_mutex); 2200 return ret; 2201 } 2202 2203 static void arm_smmu_dump_raw_event(struct arm_smmu_device *smmu, u64 *raw, 2204 struct arm_smmu_event *event) 2205 { 2206 int i; 2207 2208 dev_err(smmu->dev, "event 0x%02x received:\n", event->id); 2209 2210 for (i = 0; i < EVTQ_ENT_DWORDS; ++i) 2211 dev_err(smmu->dev, "\t0x%016llx\n", raw[i]); 2212 } 2213 2214 #define ARM_SMMU_EVT_KNOWN(e) ((e)->id < ARRAY_SIZE(event_str) && event_str[(e)->id]) 2215 #define ARM_SMMU_LOG_EVT_STR(e) ARM_SMMU_EVT_KNOWN(e) ? event_str[(e)->id] : "UNKNOWN" 2216 #define ARM_SMMU_LOG_CLIENT(e) (e)->dev ? dev_name((e)->dev) : "(unassigned sid)" 2217 2218 static void arm_smmu_dump_event(struct arm_smmu_device *smmu, u64 *raw, 2219 struct arm_smmu_event *evt, 2220 struct ratelimit_state *rs) 2221 { 2222 if (!__ratelimit(rs)) 2223 return; 2224 2225 arm_smmu_dump_raw_event(smmu, raw, evt); 2226 2227 switch (evt->id) { 2228 case EVT_ID_TRANSLATION_FAULT: 2229 case EVT_ID_ADDR_SIZE_FAULT: 2230 case EVT_ID_ACCESS_FAULT: 2231 case EVT_ID_PERMISSION_FAULT: 2232 dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x iova: %#llx ipa: %#llx", 2233 ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt), 2234 evt->sid, evt->ssid, evt->iova, evt->ipa); 2235 2236 dev_err(smmu->dev, "%s %s %s %s \"%s\"%s%s stag: %#x", 2237 evt->privileged ? "priv" : "unpriv", 2238 evt->instruction ? "inst" : "data", 2239 str_read_write(evt->read), 2240 evt->s2 ? "s2" : "s1", event_class_str[evt->class], 2241 evt->class_tt ? (evt->ttrnw ? " ttd_read" : " ttd_write") : "", 2242 evt->stall ? " stall" : "", evt->stag); 2243 2244 break; 2245 2246 case EVT_ID_STE_FETCH_FAULT: 2247 case EVT_ID_CD_FETCH_FAULT: 2248 case EVT_ID_VMS_FETCH_FAULT: 2249 dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x fetch_addr: %#llx", 2250 ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt), 2251 evt->sid, evt->ssid, evt->fetch_addr); 2252 2253 break; 2254 2255 default: 2256 dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x", 2257 ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt), 2258 evt->sid, evt->ssid); 2259 } 2260 } 2261 2262 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) 2263 { 2264 u64 evt[EVTQ_ENT_DWORDS]; 2265 struct arm_smmu_event event = {0}; 2266 struct arm_smmu_device *smmu = dev; 2267 struct arm_smmu_queue *q = &smmu->evtq.q; 2268 struct arm_smmu_ll_queue *llq = &q->llq; 2269 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, 2270 DEFAULT_RATELIMIT_BURST); 2271 2272 do { 2273 while (!queue_remove_raw(q, evt)) { 2274 arm_smmu_decode_event(smmu, evt, &event); 2275 if (arm_smmu_handle_event(smmu, evt, &event)) 2276 arm_smmu_dump_event(smmu, evt, &event, &rs); 2277 2278 put_device(event.dev); 2279 cond_resched(); 2280 } 2281 2282 /* 2283 * Not much we can do on overflow, so scream and pretend we're 2284 * trying harder. 2285 */ 2286 if (queue_sync_prod_in(q) == -EOVERFLOW) 2287 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n"); 2288 } while (!queue_empty(llq)); 2289 2290 /* Sync our overflow flag, as we believe we're up to speed */ 2291 queue_sync_cons_ovf(q); 2292 return IRQ_HANDLED; 2293 } 2294 2295 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt) 2296 { 2297 u32 sid, ssid; 2298 u16 grpid; 2299 bool ssv, last; 2300 2301 sid = FIELD_GET(PRIQ_0_SID, evt[0]); 2302 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]); 2303 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID; 2304 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]); 2305 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]); 2306 2307 dev_info(smmu->dev, "unexpected PRI request received:\n"); 2308 dev_info(smmu->dev, 2309 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n", 2310 sid, ssid, grpid, last ? "L" : "", 2311 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un", 2312 evt[0] & PRIQ_0_PERM_READ ? "R" : "", 2313 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "", 2314 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "", 2315 evt[1] & PRIQ_1_ADDR_MASK); 2316 2317 if (last) { 2318 struct arm_smmu_cmdq_ent cmd = { 2319 .opcode = CMDQ_OP_PRI_RESP, 2320 .substream_valid = ssv, 2321 .pri = { 2322 .sid = sid, 2323 .ssid = ssid, 2324 .grpid = grpid, 2325 .resp = PRI_RESP_DENY, 2326 }, 2327 }; 2328 2329 arm_smmu_cmdq_issue_cmd(smmu, &cmd); 2330 } 2331 } 2332 2333 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) 2334 { 2335 struct arm_smmu_device *smmu = dev; 2336 struct arm_smmu_queue *q = &smmu->priq.q; 2337 struct arm_smmu_ll_queue *llq = &q->llq; 2338 u64 evt[PRIQ_ENT_DWORDS]; 2339 2340 do { 2341 while (!queue_remove_raw(q, evt)) 2342 arm_smmu_handle_ppr(smmu, evt); 2343 2344 if (queue_sync_prod_in(q) == -EOVERFLOW) 2345 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n"); 2346 } while (!queue_empty(llq)); 2347 2348 /* Sync our overflow flag, as we believe we're up to speed */ 2349 queue_sync_cons_ovf(q); 2350 return IRQ_HANDLED; 2351 } 2352 2353 static int arm_smmu_device_disable(struct arm_smmu_device *smmu); 2354 2355 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) 2356 { 2357 u32 gerror, gerrorn, active; 2358 struct arm_smmu_device *smmu = dev; 2359 2360 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR); 2361 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN); 2362 2363 active = gerror ^ gerrorn; 2364 if (!(active & GERROR_ERR_MASK)) 2365 return IRQ_NONE; /* No errors pending */ 2366 2367 dev_warn(smmu->dev, 2368 "unexpected global error reported (0x%08x), this could be serious\n", 2369 active); 2370 2371 if (active & GERROR_SFM_ERR) { 2372 dev_err(smmu->dev, "device has entered Service Failure Mode!\n"); 2373 arm_smmu_device_disable(smmu); 2374 } 2375 2376 if (active & GERROR_MSI_GERROR_ABT_ERR) 2377 dev_warn(smmu->dev, "GERROR MSI write aborted\n"); 2378 2379 if (active & GERROR_MSI_PRIQ_ABT_ERR) 2380 dev_warn(smmu->dev, "PRIQ MSI write aborted\n"); 2381 2382 if (active & GERROR_MSI_EVTQ_ABT_ERR) 2383 dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); 2384 2385 if (active & GERROR_MSI_CMDQ_ABT_ERR) 2386 dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); 2387 2388 if (active & GERROR_PRIQ_ABT_ERR) 2389 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); 2390 2391 if (active & GERROR_EVTQ_ABT_ERR) 2392 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n"); 2393 2394 if (active & GERROR_CMDQ_ERR) 2395 arm_smmu_cmdq_skip_err(smmu); 2396 2397 writel(gerror, smmu->base + ARM_SMMU_GERRORN); 2398 return IRQ_HANDLED; 2399 } 2400 2401 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) 2402 { 2403 struct arm_smmu_device *smmu = dev; 2404 2405 arm_smmu_evtq_thread(irq, dev); 2406 if (smmu->features & ARM_SMMU_FEAT_PRI) 2407 arm_smmu_priq_thread(irq, dev); 2408 2409 return IRQ_HANDLED; 2410 } 2411 2412 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) 2413 { 2414 arm_smmu_gerror_handler(irq, dev); 2415 return IRQ_WAKE_THREAD; 2416 } 2417 2418 static void 2419 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, 2420 struct arm_smmu_cmdq_ent *cmd) 2421 { 2422 size_t log2_span; 2423 size_t span_mask; 2424 /* ATC invalidates are always on 4096-bytes pages */ 2425 size_t inval_grain_shift = 12; 2426 unsigned long page_start, page_end; 2427 2428 /* 2429 * ATS and PASID: 2430 * 2431 * If substream_valid is clear, the PCIe TLP is sent without a PASID 2432 * prefix. In that case all ATC entries within the address range are 2433 * invalidated, including those that were requested with a PASID! There 2434 * is no way to invalidate only entries without PASID. 2435 * 2436 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID 2437 * traffic), translation requests without PASID create ATC entries 2438 * without PASID, which must be invalidated with substream_valid clear. 2439 * This has the unpleasant side-effect of invalidating all PASID-tagged 2440 * ATC entries within the address range. 2441 */ 2442 *cmd = (struct arm_smmu_cmdq_ent) { 2443 .opcode = CMDQ_OP_ATC_INV, 2444 .substream_valid = (ssid != IOMMU_NO_PASID), 2445 .atc.ssid = ssid, 2446 }; 2447 2448 if (!size) { 2449 cmd->atc.size = ATC_INV_SIZE_ALL; 2450 return; 2451 } 2452 2453 page_start = iova >> inval_grain_shift; 2454 page_end = (iova + size - 1) >> inval_grain_shift; 2455 2456 /* 2457 * In an ATS Invalidate Request, the address must be aligned on the 2458 * range size, which must be a power of two number of page sizes. We 2459 * thus have to choose between grossly over-invalidating the region, or 2460 * splitting the invalidation into multiple commands. For simplicity 2461 * we'll go with the first solution, but should refine it in the future 2462 * if multiple commands are shown to be more efficient. 2463 * 2464 * Find the smallest power of two that covers the range. The most 2465 * significant differing bit between the start and end addresses, 2466 * fls(start ^ end), indicates the required span. For example: 2467 * 2468 * We want to invalidate pages [8; 11]. This is already the ideal range: 2469 * x = 0b1000 ^ 0b1011 = 0b11 2470 * span = 1 << fls(x) = 4 2471 * 2472 * To invalidate pages [7; 10], we need to invalidate [0; 15]: 2473 * x = 0b0111 ^ 0b1010 = 0b1101 2474 * span = 1 << fls(x) = 16 2475 */ 2476 log2_span = fls_long(page_start ^ page_end); 2477 span_mask = (1ULL << log2_span) - 1; 2478 2479 page_start &= ~span_mask; 2480 2481 cmd->atc.addr = page_start << inval_grain_shift; 2482 cmd->atc.size = log2_span; 2483 } 2484 2485 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, 2486 ioasid_t ssid) 2487 { 2488 int i; 2489 struct arm_smmu_cmdq_ent cmd; 2490 struct arm_smmu_cmdq_batch cmds; 2491 2492 arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd); 2493 2494 arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd); 2495 for (i = 0; i < master->num_streams; i++) { 2496 cmd.atc.sid = master->streams[i].id; 2497 arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); 2498 } 2499 2500 return arm_smmu_cmdq_batch_submit(master->smmu, &cmds); 2501 } 2502 2503 /* IO_PGTABLE API */ 2504 static void arm_smmu_tlb_inv_context(void *cookie) 2505 { 2506 struct arm_smmu_domain *smmu_domain = cookie; 2507 2508 /* 2509 * If the DMA API is running in non-strict mode then another CPU could 2510 * have changed the page table and not invoked any flush op. Instead the 2511 * other CPU will do an atomic_read() and this CPU will have done an 2512 * atomic_write(). That handshake is enough to acquire the page table 2513 * writes from the other CPU. 2514 * 2515 * All command execution has a dma_wmb() to release all the in-memory 2516 * structures written by this CPU, that barrier must also release the 2517 * writes acquired from all the other CPUs too. 2518 * 2519 * There are other barriers and atomics on this path, but the above is 2520 * the essential mechanism for ensuring that HW sees the page table 2521 * writes from another CPU before it executes the IOTLB invalidation. 2522 */ 2523 arm_smmu_domain_inv(smmu_domain); 2524 } 2525 2526 static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu, 2527 struct arm_smmu_cmdq_batch *cmds, 2528 struct arm_smmu_cmdq_ent *cmd, 2529 unsigned long iova, size_t size, 2530 size_t granule, size_t pgsize) 2531 { 2532 unsigned long end = iova + size, num_pages = 0, tg = pgsize; 2533 size_t inv_range = granule; 2534 2535 if (WARN_ON_ONCE(!size)) 2536 return; 2537 2538 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { 2539 num_pages = size >> tg; 2540 2541 /* Convert page size of 12,14,16 (log2) to 1,2,3 */ 2542 cmd->tlbi.tg = (tg - 10) / 2; 2543 2544 /* 2545 * Determine what level the granule is at. For non-leaf, both 2546 * io-pgtable and SVA pass a nominal last-level granule because 2547 * they don't know what level(s) actually apply, so ignore that 2548 * and leave TTL=0. However for various errata reasons we still 2549 * want to use a range command, so avoid the SVA corner case 2550 * where both scale and num could be 0 as well. 2551 */ 2552 if (cmd->tlbi.leaf) 2553 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); 2554 else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1) 2555 num_pages++; 2556 } 2557 2558 while (iova < end) { 2559 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { 2560 /* 2561 * On each iteration of the loop, the range is 5 bits 2562 * worth of the aligned size remaining. 2563 * The range in pages is: 2564 * 2565 * range = (num_pages & (0x1f << __ffs(num_pages))) 2566 */ 2567 unsigned long scale, num; 2568 2569 /* Determine the power of 2 multiple number of pages */ 2570 scale = __ffs(num_pages); 2571 cmd->tlbi.scale = scale; 2572 2573 /* Determine how many chunks of 2^scale size we have */ 2574 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX; 2575 cmd->tlbi.num = num - 1; 2576 2577 /* range is num * 2^scale * pgsize */ 2578 inv_range = num << (scale + tg); 2579 2580 /* Clear out the lower order bits for the next iteration */ 2581 num_pages -= num << scale; 2582 } 2583 2584 cmd->tlbi.addr = iova; 2585 arm_smmu_cmdq_batch_add(smmu, cmds, cmd); 2586 iova += inv_range; 2587 } 2588 } 2589 2590 static bool arm_smmu_inv_size_too_big(struct arm_smmu_device *smmu, size_t size, 2591 size_t granule) 2592 { 2593 size_t max_tlbi_ops; 2594 2595 /* 0 size means invalidate all */ 2596 if (!size || size == SIZE_MAX) 2597 return true; 2598 2599 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) 2600 return false; 2601 2602 /* 2603 * Borrowed from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, 2604 * this is used as a threshold to replace "size_opcode" commands with a 2605 * single "nsize_opcode" command, when SMMU doesn't implement the range 2606 * invalidation feature, where there can be too many per-granule TLBIs, 2607 * resulting in a soft lockup. 2608 */ 2609 max_tlbi_ops = 1 << (ilog2(granule) - 3); 2610 return size >= max_tlbi_ops * granule; 2611 } 2612 2613 /* Used by non INV_TYPE_ATS* invalidations */ 2614 static void arm_smmu_inv_to_cmdq_batch(struct arm_smmu_inv *inv, 2615 struct arm_smmu_cmdq_batch *cmds, 2616 struct arm_smmu_cmdq_ent *cmd, 2617 unsigned long iova, size_t size, 2618 unsigned int granule) 2619 { 2620 if (arm_smmu_inv_size_too_big(inv->smmu, size, granule)) { 2621 cmd->opcode = inv->nsize_opcode; 2622 arm_smmu_cmdq_batch_add(inv->smmu, cmds, cmd); 2623 return; 2624 } 2625 2626 cmd->opcode = inv->size_opcode; 2627 arm_smmu_cmdq_batch_add_range(inv->smmu, cmds, cmd, iova, size, granule, 2628 inv->pgsize); 2629 } 2630 2631 static inline bool arm_smmu_invs_end_batch(struct arm_smmu_inv *cur, 2632 struct arm_smmu_inv *next) 2633 { 2634 /* Changing smmu means changing command queue */ 2635 if (cur->smmu != next->smmu) 2636 return true; 2637 /* The batch for S2 TLBI must be done before nested S1 ASIDs */ 2638 if (cur->type != INV_TYPE_S2_VMID_S1_CLEAR && 2639 next->type == INV_TYPE_S2_VMID_S1_CLEAR) 2640 return true; 2641 /* ATS must be after a sync of the S1/S2 invalidations */ 2642 if (!arm_smmu_inv_is_ats(cur) && arm_smmu_inv_is_ats(next)) 2643 return true; 2644 return false; 2645 } 2646 2647 static void __arm_smmu_domain_inv_range(struct arm_smmu_invs *invs, 2648 unsigned long iova, size_t size, 2649 unsigned int granule, bool leaf) 2650 { 2651 struct arm_smmu_cmdq_batch cmds = {}; 2652 struct arm_smmu_inv *cur; 2653 struct arm_smmu_inv *end; 2654 2655 cur = invs->inv; 2656 end = cur + READ_ONCE(invs->num_invs); 2657 /* Skip any leading entry marked as a trash */ 2658 for (; cur != end; cur++) 2659 if (READ_ONCE(cur->users)) 2660 break; 2661 while (cur != end) { 2662 struct arm_smmu_device *smmu = cur->smmu; 2663 struct arm_smmu_cmdq_ent cmd = { 2664 /* 2665 * Pick size_opcode to run arm_smmu_get_cmdq(). This can 2666 * be changed to nsize_opcode, which would result in the 2667 * same CMDQ pointer. 2668 */ 2669 .opcode = cur->size_opcode, 2670 }; 2671 struct arm_smmu_inv *next; 2672 2673 if (!cmds.num) 2674 arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd); 2675 2676 switch (cur->type) { 2677 case INV_TYPE_S1_ASID: 2678 cmd.tlbi.asid = cur->id; 2679 cmd.tlbi.leaf = leaf; 2680 arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size, 2681 granule); 2682 break; 2683 case INV_TYPE_S2_VMID: 2684 cmd.tlbi.vmid = cur->id; 2685 cmd.tlbi.leaf = leaf; 2686 arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size, 2687 granule); 2688 break; 2689 case INV_TYPE_S2_VMID_S1_CLEAR: 2690 /* CMDQ_OP_TLBI_S12_VMALL already flushed S1 entries */ 2691 if (arm_smmu_inv_size_too_big(cur->smmu, size, granule)) 2692 break; 2693 cmd.tlbi.vmid = cur->id; 2694 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); 2695 break; 2696 case INV_TYPE_ATS: 2697 arm_smmu_atc_inv_to_cmd(cur->ssid, iova, size, &cmd); 2698 cmd.atc.sid = cur->id; 2699 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); 2700 break; 2701 case INV_TYPE_ATS_FULL: 2702 arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); 2703 cmd.atc.sid = cur->id; 2704 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); 2705 break; 2706 default: 2707 WARN_ON_ONCE(1); 2708 break; 2709 } 2710 2711 /* Skip any trash entry in-between */ 2712 for (next = cur + 1; next != end; next++) 2713 if (READ_ONCE(next->users)) 2714 break; 2715 2716 if (cmds.num && 2717 (next == end || arm_smmu_invs_end_batch(cur, next))) { 2718 arm_smmu_cmdq_batch_submit(smmu, &cmds); 2719 cmds.num = 0; 2720 } 2721 cur = next; 2722 } 2723 } 2724 2725 void arm_smmu_domain_inv_range(struct arm_smmu_domain *smmu_domain, 2726 unsigned long iova, size_t size, 2727 unsigned int granule, bool leaf) 2728 { 2729 struct arm_smmu_invs *invs; 2730 2731 /* 2732 * An invalidation request must follow some IOPTE change and then load 2733 * an invalidation array. In the meantime, a domain attachment mutates 2734 * the array and then stores an STE/CD asking SMMU HW to acquire those 2735 * changed IOPTEs. 2736 * 2737 * When running alone, a domain attachment relies on the dma_wmb() in 2738 * arm_smmu_write_entry() used by arm_smmu_install_ste_for_dev(). 2739 * 2740 * But in a race, these two can be interdependent, making it a special 2741 * case requiring an additional smp_mb() for the write->read ordering. 2742 * Pairing with the dma_wmb() in arm_smmu_install_ste_for_dev(), this 2743 * makes sure that IOPTE update prior to this point is visible to SMMU 2744 * hardware before we load the updated invalidation array. 2745 * 2746 * [CPU0] | [CPU1] 2747 * change IOPTE on new domain: | 2748 * arm_smmu_domain_inv_range() { | arm_smmu_install_new_domain_invs() 2749 * smp_mb(); // ensures IOPTE | arm_smmu_install_ste_for_dev { 2750 * // seen by SMMU | dma_wmb(); // ensures invs update 2751 * // load the updated invs | // before updating STE 2752 * invs = rcu_dereference(); | STE = TTB0; 2753 * ... | ... 2754 * } | } 2755 */ 2756 smp_mb(); 2757 2758 rcu_read_lock(); 2759 invs = rcu_dereference(smmu_domain->invs); 2760 2761 /* 2762 * Avoid locking unless ATS is being used. No ATC invalidation can be 2763 * going on after a domain is detached. 2764 */ 2765 if (invs->has_ats) { 2766 unsigned long flags; 2767 2768 read_lock_irqsave(&invs->rwlock, flags); 2769 __arm_smmu_domain_inv_range(invs, iova, size, granule, leaf); 2770 read_unlock_irqrestore(&invs->rwlock, flags); 2771 } else { 2772 __arm_smmu_domain_inv_range(invs, iova, size, granule, leaf); 2773 } 2774 2775 rcu_read_unlock(); 2776 } 2777 2778 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, 2779 unsigned long iova, size_t granule, 2780 void *cookie) 2781 { 2782 struct arm_smmu_domain *smmu_domain = cookie; 2783 struct iommu_domain *domain = &smmu_domain->domain; 2784 2785 iommu_iotlb_gather_add_page(domain, gather, iova, granule); 2786 } 2787 2788 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, 2789 size_t granule, void *cookie) 2790 { 2791 struct arm_smmu_domain *smmu_domain = cookie; 2792 2793 arm_smmu_domain_inv_range(smmu_domain, iova, size, granule, false); 2794 } 2795 2796 static const struct iommu_flush_ops arm_smmu_flush_ops = { 2797 .tlb_flush_all = arm_smmu_tlb_inv_context, 2798 .tlb_flush_walk = arm_smmu_tlb_inv_walk, 2799 .tlb_add_page = arm_smmu_tlb_inv_page_nosync, 2800 }; 2801 2802 static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu) 2803 { 2804 u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY); 2805 2806 return (smmu->features & features) == features; 2807 } 2808 2809 /* IOMMU API */ 2810 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) 2811 { 2812 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 2813 2814 switch (cap) { 2815 case IOMMU_CAP_CACHE_COHERENCY: 2816 /* Assume that a coherent TCU implies coherent TBUs */ 2817 return master->smmu->features & ARM_SMMU_FEAT_COHERENCY; 2818 case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: 2819 return arm_smmu_master_canwbs(master); 2820 case IOMMU_CAP_NOEXEC: 2821 case IOMMU_CAP_DEFERRED_FLUSH: 2822 return true; 2823 case IOMMU_CAP_DIRTY_TRACKING: 2824 return arm_smmu_dbm_capable(master->smmu); 2825 case IOMMU_CAP_PCI_ATS_SUPPORTED: 2826 return arm_smmu_ats_supported(master); 2827 default: 2828 return false; 2829 } 2830 } 2831 2832 static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain) 2833 { 2834 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2835 struct arm_smmu_master_domain *master_domain; 2836 unsigned long flags; 2837 bool ret = true; 2838 2839 spin_lock_irqsave(&smmu_domain->devices_lock, flags); 2840 list_for_each_entry(master_domain, &smmu_domain->devices, 2841 devices_elm) { 2842 if (!arm_smmu_master_canwbs(master_domain->master)) { 2843 ret = false; 2844 break; 2845 } 2846 } 2847 smmu_domain->enforce_cache_coherency = ret; 2848 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 2849 return ret; 2850 } 2851 2852 struct arm_smmu_domain *arm_smmu_domain_alloc(void) 2853 { 2854 struct arm_smmu_domain *smmu_domain; 2855 struct arm_smmu_invs *new_invs; 2856 2857 smmu_domain = kzalloc_obj(*smmu_domain); 2858 if (!smmu_domain) 2859 return ERR_PTR(-ENOMEM); 2860 2861 new_invs = arm_smmu_invs_alloc(0); 2862 if (!new_invs) { 2863 kfree(smmu_domain); 2864 return ERR_PTR(-ENOMEM); 2865 } 2866 2867 INIT_LIST_HEAD(&smmu_domain->devices); 2868 spin_lock_init(&smmu_domain->devices_lock); 2869 rcu_assign_pointer(smmu_domain->invs, new_invs); 2870 2871 return smmu_domain; 2872 } 2873 2874 static void arm_smmu_domain_free_paging(struct iommu_domain *domain) 2875 { 2876 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2877 struct arm_smmu_device *smmu = smmu_domain->smmu; 2878 2879 free_io_pgtable_ops(smmu_domain->pgtbl_ops); 2880 2881 /* Free the ASID or VMID */ 2882 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 2883 /* Prevent SVA from touching the CD while we're freeing it */ 2884 mutex_lock(&arm_smmu_asid_lock); 2885 xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); 2886 mutex_unlock(&arm_smmu_asid_lock); 2887 } else { 2888 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; 2889 if (cfg->vmid) 2890 ida_free(&smmu->vmid_map, cfg->vmid); 2891 } 2892 2893 arm_smmu_domain_free(smmu_domain); 2894 } 2895 2896 static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, 2897 struct arm_smmu_domain *smmu_domain) 2898 { 2899 int ret; 2900 u32 asid = 0; 2901 struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; 2902 2903 /* Prevent SVA from modifying the ASID until it is written to the CD */ 2904 mutex_lock(&arm_smmu_asid_lock); 2905 ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, 2906 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); 2907 cd->asid = (u16)asid; 2908 mutex_unlock(&arm_smmu_asid_lock); 2909 return ret; 2910 } 2911 2912 static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu, 2913 struct arm_smmu_domain *smmu_domain) 2914 { 2915 int vmid; 2916 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; 2917 2918 /* Reserve VMID 0 for stage-2 bypass STEs */ 2919 vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1, 2920 GFP_KERNEL); 2921 if (vmid < 0) 2922 return vmid; 2923 2924 cfg->vmid = (u16)vmid; 2925 return 0; 2926 } 2927 2928 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, 2929 struct arm_smmu_device *smmu, u32 flags) 2930 { 2931 int ret; 2932 enum io_pgtable_fmt fmt; 2933 struct io_pgtable_cfg pgtbl_cfg; 2934 struct io_pgtable_ops *pgtbl_ops; 2935 int (*finalise_stage_fn)(struct arm_smmu_device *smmu, 2936 struct arm_smmu_domain *smmu_domain); 2937 bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; 2938 2939 pgtbl_cfg = (struct io_pgtable_cfg) { 2940 .pgsize_bitmap = smmu->pgsize_bitmap, 2941 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, 2942 .tlb = &arm_smmu_flush_ops, 2943 .iommu_dev = smmu->dev, 2944 }; 2945 2946 switch (smmu_domain->stage) { 2947 case ARM_SMMU_DOMAIN_S1: { 2948 unsigned long ias = (smmu->features & 2949 ARM_SMMU_FEAT_VAX) ? 52 : 48; 2950 2951 pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS); 2952 pgtbl_cfg.oas = smmu->oas; 2953 if (enable_dirty) 2954 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD; 2955 fmt = ARM_64_LPAE_S1; 2956 finalise_stage_fn = arm_smmu_domain_finalise_s1; 2957 break; 2958 } 2959 case ARM_SMMU_DOMAIN_S2: 2960 if (enable_dirty) 2961 return -EOPNOTSUPP; 2962 pgtbl_cfg.ias = smmu->oas; 2963 pgtbl_cfg.oas = smmu->oas; 2964 fmt = ARM_64_LPAE_S2; 2965 finalise_stage_fn = arm_smmu_domain_finalise_s2; 2966 if ((smmu->features & ARM_SMMU_FEAT_S2FWB) && 2967 (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) 2968 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB; 2969 break; 2970 default: 2971 return -EINVAL; 2972 } 2973 2974 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); 2975 if (!pgtbl_ops) 2976 return -ENOMEM; 2977 2978 smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; 2979 smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; 2980 smmu_domain->domain.geometry.force_aperture = true; 2981 if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1) 2982 smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops; 2983 2984 ret = finalise_stage_fn(smmu, smmu_domain); 2985 if (ret < 0) { 2986 free_io_pgtable_ops(pgtbl_ops); 2987 return ret; 2988 } 2989 2990 smmu_domain->pgtbl_ops = pgtbl_ops; 2991 smmu_domain->smmu = smmu; 2992 return 0; 2993 } 2994 2995 static struct arm_smmu_ste * 2996 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) 2997 { 2998 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 2999 3000 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { 3001 /* Two-level walk */ 3002 return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)] 3003 ->stes[arm_smmu_strtab_l2_idx(sid)]; 3004 } else { 3005 /* Simple linear lookup */ 3006 return &cfg->linear.table[sid]; 3007 } 3008 } 3009 3010 void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master, 3011 const struct arm_smmu_ste *target) 3012 { 3013 int i, j; 3014 struct arm_smmu_device *smmu = master->smmu; 3015 3016 master->cd_table.in_ste = 3017 FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) == 3018 STRTAB_STE_0_CFG_S1_TRANS; 3019 master->ste_ats_enabled = 3020 FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) == 3021 STRTAB_STE_1_EATS_TRANS; 3022 3023 for (i = 0; i < master->num_streams; ++i) { 3024 u32 sid = master->streams[i].id; 3025 struct arm_smmu_ste *step = 3026 arm_smmu_get_step_for_sid(smmu, sid); 3027 3028 /* Bridged PCI devices may end up with duplicated IDs */ 3029 for (j = 0; j < i; j++) 3030 if (master->streams[j].id == sid) 3031 break; 3032 if (j < i) 3033 continue; 3034 3035 arm_smmu_write_ste(master, sid, step, target); 3036 } 3037 } 3038 3039 static bool arm_smmu_ats_supported(struct arm_smmu_master *master) 3040 { 3041 struct device *dev = master->dev; 3042 struct arm_smmu_device *smmu = master->smmu; 3043 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3044 3045 if (!(smmu->features & ARM_SMMU_FEAT_ATS)) 3046 return false; 3047 3048 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS)) 3049 return false; 3050 3051 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)); 3052 } 3053 3054 static void arm_smmu_enable_ats(struct arm_smmu_master *master) 3055 { 3056 size_t stu; 3057 struct pci_dev *pdev; 3058 struct arm_smmu_device *smmu = master->smmu; 3059 3060 /* Smallest Translation Unit: log2 of the smallest supported granule */ 3061 stu = __ffs(smmu->pgsize_bitmap); 3062 pdev = to_pci_dev(master->dev); 3063 3064 /* 3065 * ATC invalidation of PASID 0 causes the entire ATC to be flushed. 3066 */ 3067 arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); 3068 if (pci_enable_ats(pdev, stu)) 3069 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); 3070 } 3071 3072 static int arm_smmu_enable_pasid(struct arm_smmu_master *master) 3073 { 3074 int ret; 3075 int features; 3076 int num_pasids; 3077 struct pci_dev *pdev; 3078 3079 if (!dev_is_pci(master->dev)) 3080 return -ENODEV; 3081 3082 pdev = to_pci_dev(master->dev); 3083 3084 features = pci_pasid_features(pdev); 3085 if (features < 0) 3086 return features; 3087 3088 num_pasids = pci_max_pasids(pdev); 3089 if (num_pasids <= 0) 3090 return num_pasids; 3091 3092 ret = pci_enable_pasid(pdev, features); 3093 if (ret) { 3094 dev_err(&pdev->dev, "Failed to enable PASID\n"); 3095 return ret; 3096 } 3097 3098 master->ssid_bits = min_t(u8, ilog2(num_pasids), 3099 master->smmu->ssid_bits); 3100 return 0; 3101 } 3102 3103 static void arm_smmu_disable_pasid(struct arm_smmu_master *master) 3104 { 3105 struct pci_dev *pdev; 3106 3107 if (!dev_is_pci(master->dev)) 3108 return; 3109 3110 pdev = to_pci_dev(master->dev); 3111 3112 if (!pdev->pasid_enabled) 3113 return; 3114 3115 master->ssid_bits = 0; 3116 pci_disable_pasid(pdev); 3117 } 3118 3119 static struct arm_smmu_master_domain * 3120 arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, 3121 struct iommu_domain *domain, 3122 struct arm_smmu_master *master, 3123 ioasid_t ssid, bool nested_ats_flush) 3124 { 3125 struct arm_smmu_master_domain *master_domain; 3126 3127 lockdep_assert_held(&smmu_domain->devices_lock); 3128 3129 list_for_each_entry(master_domain, &smmu_domain->devices, 3130 devices_elm) { 3131 if (master_domain->master == master && 3132 master_domain->domain == domain && 3133 master_domain->ssid == ssid && 3134 master_domain->nested_ats_flush == nested_ats_flush) 3135 return master_domain; 3136 } 3137 return NULL; 3138 } 3139 3140 /* 3141 * If the domain uses the smmu_domain->devices list return the arm_smmu_domain 3142 * structure, otherwise NULL. These domains track attached devices so they can 3143 * issue invalidations. 3144 */ 3145 static struct arm_smmu_domain * 3146 to_smmu_domain_devices(struct iommu_domain *domain) 3147 { 3148 /* The domain can be NULL only when processing the first attach */ 3149 if (!domain) 3150 return NULL; 3151 if ((domain->type & __IOMMU_DOMAIN_PAGING) || 3152 domain->type == IOMMU_DOMAIN_SVA) 3153 return to_smmu_domain(domain); 3154 if (domain->type == IOMMU_DOMAIN_NESTED) 3155 return to_smmu_nested_domain(domain)->vsmmu->s2_parent; 3156 return NULL; 3157 } 3158 3159 static int arm_smmu_enable_iopf(struct arm_smmu_master *master, 3160 struct arm_smmu_master_domain *master_domain) 3161 { 3162 int ret; 3163 3164 iommu_group_mutex_assert(master->dev); 3165 3166 if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA)) 3167 return -EOPNOTSUPP; 3168 3169 /* 3170 * Drivers for devices supporting PRI or stall require iopf others have 3171 * device-specific fault handlers and don't need IOPF, so this is not a 3172 * failure. 3173 */ 3174 if (!master->stall_enabled) 3175 return 0; 3176 3177 /* We're not keeping track of SIDs in fault events */ 3178 if (master->num_streams != 1) 3179 return -EOPNOTSUPP; 3180 3181 if (master->iopf_refcount) { 3182 master->iopf_refcount++; 3183 master_domain->using_iopf = true; 3184 return 0; 3185 } 3186 3187 ret = iopf_queue_add_device(master->smmu->evtq.iopf, master->dev); 3188 if (ret) 3189 return ret; 3190 master->iopf_refcount = 1; 3191 master_domain->using_iopf = true; 3192 return 0; 3193 } 3194 3195 static void arm_smmu_disable_iopf(struct arm_smmu_master *master, 3196 struct arm_smmu_master_domain *master_domain) 3197 { 3198 iommu_group_mutex_assert(master->dev); 3199 3200 if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA)) 3201 return; 3202 3203 if (!master_domain || !master_domain->using_iopf) 3204 return; 3205 3206 master->iopf_refcount--; 3207 if (master->iopf_refcount == 0) 3208 iopf_queue_remove_device(master->smmu->evtq.iopf, master->dev); 3209 } 3210 3211 static struct arm_smmu_inv * 3212 arm_smmu_master_build_inv(struct arm_smmu_master *master, 3213 enum arm_smmu_inv_type type, u32 id, ioasid_t ssid, 3214 size_t pgsize) 3215 { 3216 struct arm_smmu_invs *build_invs = master->build_invs; 3217 struct arm_smmu_inv *cur, inv = { 3218 .smmu = master->smmu, 3219 .type = type, 3220 .id = id, 3221 .pgsize = pgsize, 3222 }; 3223 3224 if (WARN_ON(build_invs->num_invs >= build_invs->max_invs)) 3225 return NULL; 3226 cur = &build_invs->inv[build_invs->num_invs]; 3227 build_invs->num_invs++; 3228 3229 *cur = inv; 3230 switch (type) { 3231 case INV_TYPE_S1_ASID: 3232 /* 3233 * For S1 page tables the driver always uses VMID=0, and the 3234 * invalidation logic for this type will set it as well. 3235 */ 3236 if (master->smmu->features & ARM_SMMU_FEAT_E2H) { 3237 cur->size_opcode = CMDQ_OP_TLBI_EL2_VA; 3238 cur->nsize_opcode = CMDQ_OP_TLBI_EL2_ASID; 3239 } else { 3240 cur->size_opcode = CMDQ_OP_TLBI_NH_VA; 3241 cur->nsize_opcode = CMDQ_OP_TLBI_NH_ASID; 3242 } 3243 break; 3244 case INV_TYPE_S2_VMID: 3245 cur->size_opcode = CMDQ_OP_TLBI_S2_IPA; 3246 cur->nsize_opcode = CMDQ_OP_TLBI_S12_VMALL; 3247 break; 3248 case INV_TYPE_S2_VMID_S1_CLEAR: 3249 cur->size_opcode = cur->nsize_opcode = CMDQ_OP_TLBI_NH_ALL; 3250 break; 3251 case INV_TYPE_ATS: 3252 case INV_TYPE_ATS_FULL: 3253 cur->size_opcode = cur->nsize_opcode = CMDQ_OP_ATC_INV; 3254 cur->ssid = ssid; 3255 break; 3256 } 3257 3258 return cur; 3259 } 3260 3261 /* 3262 * Use the preallocated scratch array at master->build_invs, to build a to_merge 3263 * or to_unref array, to pass into a following arm_smmu_invs_merge/unref() call. 3264 * 3265 * Do not free the returned invs array. It is reused, and will be overwritten by 3266 * the next arm_smmu_master_build_invs() call. 3267 */ 3268 static struct arm_smmu_invs * 3269 arm_smmu_master_build_invs(struct arm_smmu_master *master, bool ats_enabled, 3270 ioasid_t ssid, struct arm_smmu_domain *smmu_domain) 3271 { 3272 const bool nesting = smmu_domain->nest_parent; 3273 size_t pgsize = 0, i; 3274 3275 iommu_group_mutex_assert(master->dev); 3276 3277 master->build_invs->num_invs = 0; 3278 3279 /* Range-based invalidation requires the leaf pgsize for calculation */ 3280 if (master->smmu->features & ARM_SMMU_FEAT_RANGE_INV) 3281 pgsize = __ffs(smmu_domain->domain.pgsize_bitmap); 3282 3283 switch (smmu_domain->stage) { 3284 case ARM_SMMU_DOMAIN_SVA: 3285 case ARM_SMMU_DOMAIN_S1: 3286 if (!arm_smmu_master_build_inv(master, INV_TYPE_S1_ASID, 3287 smmu_domain->cd.asid, 3288 IOMMU_NO_PASID, pgsize)) 3289 return NULL; 3290 break; 3291 case ARM_SMMU_DOMAIN_S2: 3292 if (!arm_smmu_master_build_inv(master, INV_TYPE_S2_VMID, 3293 smmu_domain->s2_cfg.vmid, 3294 IOMMU_NO_PASID, pgsize)) 3295 return NULL; 3296 break; 3297 default: 3298 WARN_ON(true); 3299 return NULL; 3300 } 3301 3302 /* All the nested S1 ASIDs have to be flushed when S2 parent changes */ 3303 if (nesting) { 3304 if (!arm_smmu_master_build_inv( 3305 master, INV_TYPE_S2_VMID_S1_CLEAR, 3306 smmu_domain->s2_cfg.vmid, IOMMU_NO_PASID, 0)) 3307 return NULL; 3308 } 3309 3310 for (i = 0; ats_enabled && i < master->num_streams; i++) { 3311 /* 3312 * If an S2 used as a nesting parent is changed we have no 3313 * option but to completely flush the ATC. 3314 */ 3315 if (!arm_smmu_master_build_inv( 3316 master, nesting ? INV_TYPE_ATS_FULL : INV_TYPE_ATS, 3317 master->streams[i].id, ssid, 0)) 3318 return NULL; 3319 } 3320 3321 /* Note this build_invs must have been sorted */ 3322 3323 return master->build_invs; 3324 } 3325 3326 static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, 3327 struct iommu_domain *domain, 3328 ioasid_t ssid) 3329 { 3330 struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain); 3331 struct arm_smmu_master_domain *master_domain; 3332 bool nested_ats_flush = false; 3333 unsigned long flags; 3334 3335 if (!smmu_domain) 3336 return; 3337 3338 if (domain->type == IOMMU_DOMAIN_NESTED) 3339 nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats; 3340 3341 spin_lock_irqsave(&smmu_domain->devices_lock, flags); 3342 master_domain = arm_smmu_find_master_domain(smmu_domain, domain, master, 3343 ssid, nested_ats_flush); 3344 if (master_domain) { 3345 list_del(&master_domain->devices_elm); 3346 if (master->ats_enabled) 3347 atomic_dec(&smmu_domain->nr_ats_masters); 3348 } 3349 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 3350 3351 arm_smmu_disable_iopf(master, master_domain); 3352 kfree(master_domain); 3353 } 3354 3355 /* 3356 * During attachment, the updates of the two domain->invs arrays are sequenced: 3357 * 1. new domain updates its invs array, merging master->build_invs 3358 * 2. new domain starts to include the master during its invalidation 3359 * 3. master updates its STE switching from the old domain to the new domain 3360 * 4. old domain still includes the master during its invalidation 3361 * 5. old domain updates its invs array, unreferencing master->build_invs 3362 * 3363 * For 1 and 5, prepare the two updated arrays in advance, handling any changes 3364 * that can possibly failure. So the actual update of either 1 or 5 won't fail. 3365 * arm_smmu_asid_lock ensures that the old invs in the domains are intact while 3366 * we are sequencing to update them. 3367 */ 3368 static int arm_smmu_attach_prepare_invs(struct arm_smmu_attach_state *state, 3369 struct iommu_domain *new_domain) 3370 { 3371 struct arm_smmu_domain *old_smmu_domain = 3372 to_smmu_domain_devices(state->old_domain); 3373 struct arm_smmu_domain *new_smmu_domain = 3374 to_smmu_domain_devices(new_domain); 3375 struct arm_smmu_master *master = state->master; 3376 ioasid_t ssid = state->ssid; 3377 3378 /* 3379 * At this point a NULL domain indicates the domain doesn't use the 3380 * IOTLB, see to_smmu_domain_devices(). 3381 */ 3382 if (new_smmu_domain) { 3383 struct arm_smmu_inv_state *invst = &state->new_domain_invst; 3384 struct arm_smmu_invs *build_invs; 3385 3386 invst->invs_ptr = &new_smmu_domain->invs; 3387 invst->old_invs = rcu_dereference_protected( 3388 new_smmu_domain->invs, 3389 lockdep_is_held(&arm_smmu_asid_lock)); 3390 build_invs = arm_smmu_master_build_invs( 3391 master, state->ats_enabled, ssid, new_smmu_domain); 3392 if (!build_invs) 3393 return -EINVAL; 3394 3395 invst->new_invs = 3396 arm_smmu_invs_merge(invst->old_invs, build_invs); 3397 if (IS_ERR(invst->new_invs)) 3398 return PTR_ERR(invst->new_invs); 3399 } 3400 3401 if (old_smmu_domain) { 3402 struct arm_smmu_inv_state *invst = &state->old_domain_invst; 3403 3404 invst->invs_ptr = &old_smmu_domain->invs; 3405 /* A re-attach case might have a different ats_enabled state */ 3406 if (new_smmu_domain == old_smmu_domain) 3407 invst->old_invs = state->new_domain_invst.new_invs; 3408 else 3409 invst->old_invs = rcu_dereference_protected( 3410 old_smmu_domain->invs, 3411 lockdep_is_held(&arm_smmu_asid_lock)); 3412 /* For old_smmu_domain, new_invs points to master->build_invs */ 3413 invst->new_invs = arm_smmu_master_build_invs( 3414 master, master->ats_enabled, ssid, old_smmu_domain); 3415 } 3416 3417 return 0; 3418 } 3419 3420 /* Must be installed before arm_smmu_install_ste_for_dev() */ 3421 static void 3422 arm_smmu_install_new_domain_invs(struct arm_smmu_attach_state *state) 3423 { 3424 struct arm_smmu_inv_state *invst = &state->new_domain_invst; 3425 3426 if (!invst->invs_ptr) 3427 return; 3428 3429 rcu_assign_pointer(*invst->invs_ptr, invst->new_invs); 3430 kfree_rcu(invst->old_invs, rcu); 3431 } 3432 3433 static void arm_smmu_inv_flush_iotlb_tag(struct arm_smmu_inv *inv) 3434 { 3435 struct arm_smmu_cmdq_ent cmd = {}; 3436 3437 switch (inv->type) { 3438 case INV_TYPE_S1_ASID: 3439 cmd.tlbi.asid = inv->id; 3440 break; 3441 case INV_TYPE_S2_VMID: 3442 /* S2_VMID using nsize_opcode covers S2_VMID_S1_CLEAR */ 3443 cmd.tlbi.vmid = inv->id; 3444 break; 3445 default: 3446 return; 3447 } 3448 3449 cmd.opcode = inv->nsize_opcode; 3450 arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, &cmd); 3451 } 3452 3453 /* Should be installed after arm_smmu_install_ste_for_dev() */ 3454 static void 3455 arm_smmu_install_old_domain_invs(struct arm_smmu_attach_state *state) 3456 { 3457 struct arm_smmu_inv_state *invst = &state->old_domain_invst; 3458 struct arm_smmu_invs *old_invs = invst->old_invs; 3459 struct arm_smmu_invs *new_invs; 3460 3461 lockdep_assert_held(&arm_smmu_asid_lock); 3462 3463 if (!invst->invs_ptr) 3464 return; 3465 3466 arm_smmu_invs_unref(old_invs, invst->new_invs); 3467 /* 3468 * When an IOTLB tag (the first entry in invs->new_invs) is no longer used, 3469 * it means the ASID or VMID will no longer be invalidated by map/unmap and 3470 * must be cleaned right now. The rule is that any ASID/VMID not in an invs 3471 * array must be left cleared in the IOTLB. 3472 */ 3473 if (!READ_ONCE(invst->new_invs->inv[0].users)) 3474 arm_smmu_inv_flush_iotlb_tag(&invst->new_invs->inv[0]); 3475 3476 new_invs = arm_smmu_invs_purge(old_invs); 3477 if (!new_invs) 3478 return; 3479 3480 rcu_assign_pointer(*invst->invs_ptr, new_invs); 3481 kfree_rcu(old_invs, rcu); 3482 } 3483 3484 /* 3485 * Start the sequence to attach a domain to a master. The sequence contains three 3486 * steps: 3487 * arm_smmu_attach_prepare() 3488 * arm_smmu_install_ste_for_dev() 3489 * arm_smmu_attach_commit() 3490 * 3491 * If prepare succeeds then the sequence must be completed. The STE installed 3492 * must set the STE.EATS field according to state.ats_enabled. 3493 * 3494 * If the device supports ATS then this determines if EATS should be enabled 3495 * in the STE, and starts sequencing EATS disable if required. 3496 * 3497 * The change of the EATS in the STE and the PCI ATS config space is managed by 3498 * this sequence to be in the right order so that if PCI ATS is enabled then 3499 * STE.ETAS is enabled. 3500 * 3501 * new_domain can be a non-paging domain. In this case ATS will not be enabled, 3502 * and invalidations won't be tracked. 3503 */ 3504 int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, 3505 struct iommu_domain *new_domain) 3506 { 3507 struct arm_smmu_master *master = state->master; 3508 struct arm_smmu_master_domain *master_domain; 3509 struct arm_smmu_domain *smmu_domain = 3510 to_smmu_domain_devices(new_domain); 3511 unsigned long flags; 3512 int ret; 3513 3514 /* 3515 * arm_smmu_share_asid() must not see two domains pointing to the same 3516 * arm_smmu_master_domain contents otherwise it could randomly write one 3517 * or the other to the CD. 3518 */ 3519 lockdep_assert_held(&arm_smmu_asid_lock); 3520 3521 if (smmu_domain || state->cd_needs_ats) { 3522 /* 3523 * The SMMU does not support enabling ATS with bypass/abort. 3524 * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS 3525 * Translation Requests and Translated transactions are denied 3526 * as though ATS is disabled for the stream (STE.EATS == 0b00), 3527 * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events 3528 * (IHI0070Ea 5.2 Stream Table Entry). 3529 * 3530 * However, if we have installed a CD table and are using S1DSS 3531 * then ATS will work in S1DSS bypass. See "13.6.4 Full ATS 3532 * skipping stage 1". 3533 * 3534 * Disable ATS if we are going to create a normal 0b100 bypass 3535 * STE. 3536 */ 3537 state->ats_enabled = !state->disable_ats && 3538 arm_smmu_ats_supported(master); 3539 } 3540 3541 ret = arm_smmu_attach_prepare_invs(state, new_domain); 3542 if (ret) 3543 return ret; 3544 3545 if (smmu_domain) { 3546 if (new_domain->type == IOMMU_DOMAIN_NESTED) { 3547 ret = arm_smmu_attach_prepare_vmaster( 3548 state, to_smmu_nested_domain(new_domain)); 3549 if (ret) 3550 goto err_unprepare_invs; 3551 } 3552 3553 master_domain = kzalloc_obj(*master_domain); 3554 if (!master_domain) { 3555 ret = -ENOMEM; 3556 goto err_free_vmaster; 3557 } 3558 master_domain->domain = new_domain; 3559 master_domain->master = master; 3560 master_domain->ssid = state->ssid; 3561 if (new_domain->type == IOMMU_DOMAIN_NESTED) 3562 master_domain->nested_ats_flush = 3563 to_smmu_nested_domain(new_domain)->enable_ats; 3564 3565 if (new_domain->iopf_handler) { 3566 ret = arm_smmu_enable_iopf(master, master_domain); 3567 if (ret) 3568 goto err_free_master_domain; 3569 } 3570 3571 /* 3572 * During prepare we want the current smmu_domain and new 3573 * smmu_domain to be in the devices list before we change any 3574 * HW. This ensures that both domains will send ATS 3575 * invalidations to the master until we are done. 3576 * 3577 * It is tempting to make this list only track masters that are 3578 * using ATS, but arm_smmu_share_asid() also uses this to change 3579 * the ASID of a domain, unrelated to ATS. 3580 * 3581 * Notice if we are re-attaching the same domain then the list 3582 * will have two identical entries and commit will remove only 3583 * one of them. 3584 */ 3585 spin_lock_irqsave(&smmu_domain->devices_lock, flags); 3586 if (smmu_domain->enforce_cache_coherency && 3587 !arm_smmu_master_canwbs(master)) { 3588 spin_unlock_irqrestore(&smmu_domain->devices_lock, 3589 flags); 3590 ret = -EINVAL; 3591 goto err_iopf; 3592 } 3593 3594 if (state->ats_enabled) 3595 atomic_inc(&smmu_domain->nr_ats_masters); 3596 list_add(&master_domain->devices_elm, &smmu_domain->devices); 3597 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 3598 3599 arm_smmu_install_new_domain_invs(state); 3600 } 3601 3602 if (!state->ats_enabled && master->ats_enabled) { 3603 pci_disable_ats(to_pci_dev(master->dev)); 3604 /* 3605 * This is probably overkill, but the config write for disabling 3606 * ATS should complete before the STE is configured to generate 3607 * UR to avoid AER noise. 3608 */ 3609 wmb(); 3610 } 3611 return 0; 3612 3613 err_iopf: 3614 arm_smmu_disable_iopf(master, master_domain); 3615 err_free_master_domain: 3616 kfree(master_domain); 3617 err_free_vmaster: 3618 kfree(state->vmaster); 3619 err_unprepare_invs: 3620 kfree(state->new_domain_invst.new_invs); 3621 return ret; 3622 } 3623 3624 /* 3625 * Commit is done after the STE/CD are configured with the EATS setting. It 3626 * completes synchronizing the PCI device's ATC and finishes manipulating the 3627 * smmu_domain->devices list. 3628 */ 3629 void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) 3630 { 3631 struct arm_smmu_master *master = state->master; 3632 3633 lockdep_assert_held(&arm_smmu_asid_lock); 3634 3635 arm_smmu_attach_commit_vmaster(state); 3636 3637 if (state->ats_enabled && !master->ats_enabled) { 3638 arm_smmu_enable_ats(master); 3639 } else if (state->ats_enabled && master->ats_enabled) { 3640 /* 3641 * The translation has changed, flush the ATC. At this point the 3642 * SMMU is translating for the new domain and both the old&new 3643 * domain will issue invalidations. 3644 */ 3645 arm_smmu_atc_inv_master(master, state->ssid); 3646 } else if (!state->ats_enabled && master->ats_enabled) { 3647 /* ATS is being switched off, invalidate the entire ATC */ 3648 arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); 3649 } 3650 3651 arm_smmu_remove_master_domain(master, state->old_domain, state->ssid); 3652 arm_smmu_install_old_domain_invs(state); 3653 master->ats_enabled = state->ats_enabled; 3654 } 3655 3656 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev, 3657 struct iommu_domain *old_domain) 3658 { 3659 int ret = 0; 3660 struct arm_smmu_ste target; 3661 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3662 struct arm_smmu_device *smmu; 3663 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 3664 struct arm_smmu_attach_state state = { 3665 .old_domain = old_domain, 3666 .ssid = IOMMU_NO_PASID, 3667 }; 3668 struct arm_smmu_master *master; 3669 struct arm_smmu_cd *cdptr; 3670 3671 if (!fwspec) 3672 return -ENOENT; 3673 3674 state.master = master = dev_iommu_priv_get(dev); 3675 smmu = master->smmu; 3676 3677 if (smmu_domain->smmu != smmu) 3678 return -EINVAL; 3679 3680 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 3681 cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); 3682 if (!cdptr) 3683 return -ENOMEM; 3684 } else if (arm_smmu_ssids_in_use(&master->cd_table)) 3685 return -EBUSY; 3686 3687 /* 3688 * Prevent arm_smmu_share_asid() from trying to change the ASID 3689 * of either the old or new domain while we are working on it. 3690 * This allows the STE and the smmu_domain->devices list to 3691 * be inconsistent during this routine. 3692 */ 3693 mutex_lock(&arm_smmu_asid_lock); 3694 3695 ret = arm_smmu_attach_prepare(&state, domain); 3696 if (ret) { 3697 mutex_unlock(&arm_smmu_asid_lock); 3698 return ret; 3699 } 3700 3701 switch (smmu_domain->stage) { 3702 case ARM_SMMU_DOMAIN_S1: { 3703 struct arm_smmu_cd target_cd; 3704 3705 arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); 3706 arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, 3707 &target_cd); 3708 arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled, 3709 STRTAB_STE_1_S1DSS_SSID0); 3710 arm_smmu_install_ste_for_dev(master, &target); 3711 break; 3712 } 3713 case ARM_SMMU_DOMAIN_S2: 3714 arm_smmu_make_s2_domain_ste(&target, master, smmu_domain, 3715 state.ats_enabled); 3716 arm_smmu_install_ste_for_dev(master, &target); 3717 arm_smmu_clear_cd(master, IOMMU_NO_PASID); 3718 break; 3719 default: 3720 WARN_ON(true); 3721 break; 3722 } 3723 3724 arm_smmu_attach_commit(&state); 3725 mutex_unlock(&arm_smmu_asid_lock); 3726 return 0; 3727 } 3728 3729 static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain, 3730 struct device *dev, ioasid_t id, 3731 struct iommu_domain *old) 3732 { 3733 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 3734 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3735 struct arm_smmu_device *smmu = master->smmu; 3736 struct arm_smmu_cd target_cd; 3737 3738 if (smmu_domain->smmu != smmu) 3739 return -EINVAL; 3740 3741 if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) 3742 return -EINVAL; 3743 3744 /* 3745 * We can read cd.asid outside the lock because arm_smmu_set_pasid() 3746 * will fix it 3747 */ 3748 arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); 3749 return arm_smmu_set_pasid(master, to_smmu_domain(domain), id, 3750 &target_cd, old); 3751 } 3752 3753 static void arm_smmu_update_ste(struct arm_smmu_master *master, 3754 struct iommu_domain *sid_domain, 3755 bool ats_enabled) 3756 { 3757 unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE; 3758 struct arm_smmu_ste ste; 3759 3760 if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled) 3761 return; 3762 3763 if (sid_domain->type == IOMMU_DOMAIN_IDENTITY) 3764 s1dss = STRTAB_STE_1_S1DSS_BYPASS; 3765 else 3766 WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED); 3767 3768 /* 3769 * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior 3770 * using s1dss if necessary. If the cd_table is already installed then 3771 * the S1DSS is correct and this will just update the EATS. Otherwise it 3772 * installs the entire thing. This will be hitless. 3773 */ 3774 arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss); 3775 arm_smmu_install_ste_for_dev(master, &ste); 3776 } 3777 3778 int arm_smmu_set_pasid(struct arm_smmu_master *master, 3779 struct arm_smmu_domain *smmu_domain, ioasid_t pasid, 3780 struct arm_smmu_cd *cd, struct iommu_domain *old) 3781 { 3782 struct iommu_domain *sid_domain = 3783 iommu_driver_get_domain_for_dev(master->dev); 3784 struct arm_smmu_attach_state state = { 3785 .master = master, 3786 .ssid = pasid, 3787 .old_domain = old, 3788 }; 3789 struct arm_smmu_cd *cdptr; 3790 int ret; 3791 3792 /* The core code validates pasid */ 3793 3794 if (smmu_domain->smmu != master->smmu) 3795 return -EINVAL; 3796 3797 if (!master->cd_table.in_ste && 3798 sid_domain->type != IOMMU_DOMAIN_IDENTITY && 3799 sid_domain->type != IOMMU_DOMAIN_BLOCKED) 3800 return -EINVAL; 3801 3802 cdptr = arm_smmu_alloc_cd_ptr(master, pasid); 3803 if (!cdptr) 3804 return -ENOMEM; 3805 3806 mutex_lock(&arm_smmu_asid_lock); 3807 ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain); 3808 if (ret) 3809 goto out_unlock; 3810 3811 /* 3812 * We don't want to obtain to the asid_lock too early, so fix up the 3813 * caller set ASID under the lock in case it changed. 3814 */ 3815 cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID); 3816 cd->data[0] |= cpu_to_le64( 3817 FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid)); 3818 3819 arm_smmu_write_cd_entry(master, pasid, cdptr, cd); 3820 arm_smmu_update_ste(master, sid_domain, state.ats_enabled); 3821 3822 arm_smmu_attach_commit(&state); 3823 3824 out_unlock: 3825 mutex_unlock(&arm_smmu_asid_lock); 3826 return ret; 3827 } 3828 3829 static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain, 3830 struct device *dev, ioasid_t pasid, 3831 struct iommu_domain *old_domain) 3832 { 3833 struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain); 3834 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3835 struct arm_smmu_attach_state state = { 3836 .master = master, 3837 .old_domain = old_domain, 3838 .ssid = pasid, 3839 }; 3840 3841 mutex_lock(&arm_smmu_asid_lock); 3842 arm_smmu_attach_prepare_invs(&state, NULL); 3843 arm_smmu_clear_cd(master, pasid); 3844 if (master->ats_enabled) 3845 arm_smmu_atc_inv_master(master, pasid); 3846 arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid); 3847 arm_smmu_install_old_domain_invs(&state); 3848 mutex_unlock(&arm_smmu_asid_lock); 3849 3850 /* 3851 * When the last user of the CD table goes away downgrade the STE back 3852 * to a non-cd_table one, by re-attaching its sid_domain. 3853 */ 3854 if (!arm_smmu_ssids_in_use(&master->cd_table)) { 3855 struct iommu_domain *sid_domain = 3856 iommu_driver_get_domain_for_dev(master->dev); 3857 3858 if (sid_domain->type == IOMMU_DOMAIN_IDENTITY || 3859 sid_domain->type == IOMMU_DOMAIN_BLOCKED) 3860 sid_domain->ops->attach_dev(sid_domain, dev, 3861 sid_domain); 3862 } 3863 return 0; 3864 } 3865 3866 static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, 3867 struct iommu_domain *old_domain, 3868 struct device *dev, 3869 struct arm_smmu_ste *ste, 3870 unsigned int s1dss) 3871 { 3872 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3873 struct arm_smmu_attach_state state = { 3874 .master = master, 3875 .old_domain = old_domain, 3876 .ssid = IOMMU_NO_PASID, 3877 }; 3878 3879 /* 3880 * Do not allow any ASID to be changed while are working on the STE, 3881 * otherwise we could miss invalidations. 3882 */ 3883 mutex_lock(&arm_smmu_asid_lock); 3884 3885 /* 3886 * If the CD table is not in use we can use the provided STE, otherwise 3887 * we use a cdtable STE with the provided S1DSS. 3888 */ 3889 if (arm_smmu_ssids_in_use(&master->cd_table)) { 3890 /* 3891 * If a CD table has to be present then we need to run with ATS 3892 * on because we have to assume a PASID is using ATS. For 3893 * IDENTITY this will setup things so that S1DSS=bypass which 3894 * follows the explanation in "13.6.4 Full ATS skipping stage 1" 3895 * and allows for ATS on the RID to work. 3896 */ 3897 state.cd_needs_ats = true; 3898 arm_smmu_attach_prepare(&state, domain); 3899 arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss); 3900 } else { 3901 arm_smmu_attach_prepare(&state, domain); 3902 } 3903 arm_smmu_install_ste_for_dev(master, ste); 3904 arm_smmu_attach_commit(&state); 3905 mutex_unlock(&arm_smmu_asid_lock); 3906 3907 /* 3908 * This has to be done after removing the master from the 3909 * arm_smmu_domain->devices to avoid races updating the same context 3910 * descriptor from arm_smmu_share_asid(). 3911 */ 3912 arm_smmu_clear_cd(master, IOMMU_NO_PASID); 3913 } 3914 3915 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, 3916 struct device *dev, 3917 struct iommu_domain *old_domain) 3918 { 3919 struct arm_smmu_ste ste; 3920 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3921 3922 arm_smmu_master_clear_vmaster(master); 3923 arm_smmu_make_bypass_ste(master->smmu, &ste); 3924 arm_smmu_attach_dev_ste(domain, old_domain, dev, &ste, 3925 STRTAB_STE_1_S1DSS_BYPASS); 3926 return 0; 3927 } 3928 3929 static const struct iommu_domain_ops arm_smmu_identity_ops = { 3930 .attach_dev = arm_smmu_attach_dev_identity, 3931 }; 3932 3933 static struct iommu_domain arm_smmu_identity_domain = { 3934 .type = IOMMU_DOMAIN_IDENTITY, 3935 .ops = &arm_smmu_identity_ops, 3936 }; 3937 3938 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, 3939 struct device *dev, 3940 struct iommu_domain *old_domain) 3941 { 3942 struct arm_smmu_ste ste; 3943 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3944 3945 arm_smmu_master_clear_vmaster(master); 3946 arm_smmu_make_abort_ste(&ste); 3947 arm_smmu_attach_dev_ste(domain, old_domain, dev, &ste, 3948 STRTAB_STE_1_S1DSS_TERMINATE); 3949 return 0; 3950 } 3951 3952 static const struct iommu_domain_ops arm_smmu_blocked_ops = { 3953 .attach_dev = arm_smmu_attach_dev_blocked, 3954 .set_dev_pasid = arm_smmu_blocking_set_dev_pasid, 3955 }; 3956 3957 static struct iommu_domain arm_smmu_blocked_domain = { 3958 .type = IOMMU_DOMAIN_BLOCKED, 3959 .ops = &arm_smmu_blocked_ops, 3960 }; 3961 3962 static struct iommu_domain * 3963 arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags, 3964 const struct iommu_user_data *user_data) 3965 { 3966 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 3967 struct arm_smmu_device *smmu = master->smmu; 3968 const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | 3969 IOMMU_HWPT_ALLOC_PASID | 3970 IOMMU_HWPT_ALLOC_NEST_PARENT; 3971 struct arm_smmu_domain *smmu_domain; 3972 int ret; 3973 3974 if (flags & ~PAGING_FLAGS) 3975 return ERR_PTR(-EOPNOTSUPP); 3976 if (user_data) 3977 return ERR_PTR(-EOPNOTSUPP); 3978 3979 smmu_domain = arm_smmu_domain_alloc(); 3980 if (IS_ERR(smmu_domain)) 3981 return ERR_CAST(smmu_domain); 3982 3983 switch (flags) { 3984 case 0: 3985 /* Prefer S1 if available */ 3986 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) 3987 smmu_domain->stage = ARM_SMMU_DOMAIN_S1; 3988 else 3989 smmu_domain->stage = ARM_SMMU_DOMAIN_S2; 3990 break; 3991 case IOMMU_HWPT_ALLOC_NEST_PARENT: 3992 if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) { 3993 ret = -EOPNOTSUPP; 3994 goto err_free; 3995 } 3996 smmu_domain->stage = ARM_SMMU_DOMAIN_S2; 3997 smmu_domain->nest_parent = true; 3998 break; 3999 case IOMMU_HWPT_ALLOC_DIRTY_TRACKING: 4000 case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID: 4001 case IOMMU_HWPT_ALLOC_PASID: 4002 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) { 4003 ret = -EOPNOTSUPP; 4004 goto err_free; 4005 } 4006 smmu_domain->stage = ARM_SMMU_DOMAIN_S1; 4007 break; 4008 default: 4009 ret = -EOPNOTSUPP; 4010 goto err_free; 4011 } 4012 4013 smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; 4014 smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; 4015 ret = arm_smmu_domain_finalise(smmu_domain, smmu, flags); 4016 if (ret) 4017 goto err_free; 4018 return &smmu_domain->domain; 4019 4020 err_free: 4021 arm_smmu_domain_free(smmu_domain); 4022 return ERR_PTR(ret); 4023 } 4024 4025 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova, 4026 phys_addr_t paddr, size_t pgsize, size_t pgcount, 4027 int prot, gfp_t gfp, size_t *mapped) 4028 { 4029 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 4030 4031 if (!ops) 4032 return -ENODEV; 4033 4034 return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped); 4035 } 4036 4037 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova, 4038 size_t pgsize, size_t pgcount, 4039 struct iommu_iotlb_gather *gather) 4040 { 4041 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 4042 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; 4043 4044 if (!ops) 4045 return 0; 4046 4047 return ops->unmap_pages(ops, iova, pgsize, pgcount, gather); 4048 } 4049 4050 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) 4051 { 4052 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 4053 4054 if (smmu_domain->smmu) 4055 arm_smmu_tlb_inv_context(smmu_domain); 4056 } 4057 4058 static void arm_smmu_iotlb_sync(struct iommu_domain *domain, 4059 struct iommu_iotlb_gather *gather) 4060 { 4061 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 4062 4063 if (!gather->pgsize) 4064 return; 4065 4066 arm_smmu_domain_inv_range(smmu_domain, gather->start, 4067 gather->end - gather->start + 1, 4068 gather->pgsize, true); 4069 } 4070 4071 static phys_addr_t 4072 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 4073 { 4074 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 4075 4076 if (!ops) 4077 return 0; 4078 4079 return ops->iova_to_phys(ops, iova); 4080 } 4081 4082 static struct platform_driver arm_smmu_driver; 4083 4084 static 4085 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) 4086 { 4087 struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode); 4088 4089 put_device(dev); 4090 return dev ? dev_get_drvdata(dev) : NULL; 4091 } 4092 4093 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid) 4094 { 4095 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) 4096 return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents; 4097 return sid < smmu->strtab_cfg.linear.num_ents; 4098 } 4099 4100 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid) 4101 { 4102 /* Check the SIDs are in range of the SMMU and our stream table */ 4103 if (!arm_smmu_sid_in_range(smmu, sid)) 4104 return -ERANGE; 4105 4106 /* Ensure l2 strtab is initialised */ 4107 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) 4108 return arm_smmu_init_l2_strtab(smmu, sid); 4109 4110 return 0; 4111 } 4112 4113 static int arm_smmu_stream_id_cmp(const void *_l, const void *_r) 4114 { 4115 const typeof_member(struct arm_smmu_stream, id) *l = _l; 4116 const typeof_member(struct arm_smmu_stream, id) *r = _r; 4117 4118 return cmp_int(*l, *r); 4119 } 4120 4121 static int arm_smmu_insert_master(struct arm_smmu_device *smmu, 4122 struct arm_smmu_master *master) 4123 { 4124 int i; 4125 int ret = 0; 4126 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); 4127 bool ats_supported = dev_is_pci(master->dev) && 4128 pci_ats_supported(to_pci_dev(master->dev)); 4129 4130 master->streams = kzalloc_objs(*master->streams, fwspec->num_ids); 4131 if (!master->streams) 4132 return -ENOMEM; 4133 master->num_streams = fwspec->num_ids; 4134 4135 if (!ats_supported) { 4136 /* Base case has 1 ASID entry or maximum 2 VMID entries */ 4137 master->build_invs = arm_smmu_invs_alloc(2); 4138 } else { 4139 /* ATS case adds num_ids of entries, on top of the base case */ 4140 master->build_invs = arm_smmu_invs_alloc(2 + fwspec->num_ids); 4141 } 4142 if (!master->build_invs) { 4143 kfree(master->streams); 4144 return -ENOMEM; 4145 } 4146 4147 for (i = 0; i < fwspec->num_ids; i++) { 4148 struct arm_smmu_stream *new_stream = &master->streams[i]; 4149 4150 new_stream->id = fwspec->ids[i]; 4151 new_stream->master = master; 4152 } 4153 4154 /* Put the ids into order for sorted to_merge/to_unref arrays */ 4155 sort_nonatomic(master->streams, master->num_streams, 4156 sizeof(master->streams[0]), arm_smmu_stream_id_cmp, 4157 NULL); 4158 4159 mutex_lock(&smmu->streams_mutex); 4160 for (i = 0; i < fwspec->num_ids; i++) { 4161 struct arm_smmu_stream *new_stream = &master->streams[i]; 4162 struct rb_node *existing; 4163 u32 sid = new_stream->id; 4164 4165 ret = arm_smmu_init_sid_strtab(smmu, sid); 4166 if (ret) 4167 break; 4168 4169 /* Insert into SID tree */ 4170 existing = rb_find_add(&new_stream->node, &smmu->streams, 4171 arm_smmu_streams_cmp_node); 4172 if (existing) { 4173 struct arm_smmu_master *existing_master = 4174 rb_entry(existing, struct arm_smmu_stream, node) 4175 ->master; 4176 4177 /* Bridged PCI devices may end up with duplicated IDs */ 4178 if (existing_master == master) 4179 continue; 4180 4181 dev_warn(master->dev, 4182 "Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n", 4183 sid, dev_name(existing_master->dev)); 4184 ret = -ENODEV; 4185 break; 4186 } 4187 } 4188 4189 if (ret) { 4190 for (i--; i >= 0; i--) 4191 rb_erase(&master->streams[i].node, &smmu->streams); 4192 kfree(master->streams); 4193 kfree(master->build_invs); 4194 } 4195 mutex_unlock(&smmu->streams_mutex); 4196 4197 return ret; 4198 } 4199 4200 static void arm_smmu_remove_master(struct arm_smmu_master *master) 4201 { 4202 int i; 4203 struct arm_smmu_device *smmu = master->smmu; 4204 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); 4205 4206 if (!smmu || !master->streams) 4207 return; 4208 4209 mutex_lock(&smmu->streams_mutex); 4210 for (i = 0; i < fwspec->num_ids; i++) 4211 rb_erase(&master->streams[i].node, &smmu->streams); 4212 mutex_unlock(&smmu->streams_mutex); 4213 4214 kfree(master->streams); 4215 kfree(master->build_invs); 4216 } 4217 4218 static struct iommu_device *arm_smmu_probe_device(struct device *dev) 4219 { 4220 int ret; 4221 struct arm_smmu_device *smmu; 4222 struct arm_smmu_master *master; 4223 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 4224 4225 if (WARN_ON_ONCE(dev_iommu_priv_get(dev))) 4226 return ERR_PTR(-EBUSY); 4227 4228 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); 4229 if (!smmu) 4230 return ERR_PTR(-ENODEV); 4231 4232 master = kzalloc_obj(*master); 4233 if (!master) 4234 return ERR_PTR(-ENOMEM); 4235 4236 master->dev = dev; 4237 master->smmu = smmu; 4238 dev_iommu_priv_set(dev, master); 4239 4240 ret = arm_smmu_insert_master(smmu, master); 4241 if (ret) 4242 goto err_free_master; 4243 4244 device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits); 4245 master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits); 4246 4247 /* 4248 * Note that PASID must be enabled before, and disabled after ATS: 4249 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register 4250 * 4251 * Behavior is undefined if this bit is Set and the value of the PASID 4252 * Enable, Execute Requested Enable, or Privileged Mode Requested bits 4253 * are changed. 4254 */ 4255 arm_smmu_enable_pasid(master); 4256 4257 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB)) 4258 master->ssid_bits = min_t(u8, master->ssid_bits, 4259 CTXDESC_LINEAR_CDMAX); 4260 4261 if ((smmu->features & ARM_SMMU_FEAT_STALLS && 4262 device_property_read_bool(dev, "dma-can-stall")) || 4263 smmu->features & ARM_SMMU_FEAT_STALL_FORCE) 4264 master->stall_enabled = true; 4265 4266 if (dev_is_pci(dev)) { 4267 unsigned int stu = __ffs(smmu->pgsize_bitmap); 4268 4269 pci_prepare_ats(to_pci_dev(dev), stu); 4270 } 4271 4272 return &smmu->iommu; 4273 4274 err_free_master: 4275 kfree(master); 4276 return ERR_PTR(ret); 4277 } 4278 4279 static void arm_smmu_release_device(struct device *dev) 4280 { 4281 struct arm_smmu_master *master = dev_iommu_priv_get(dev); 4282 4283 WARN_ON(master->iopf_refcount); 4284 4285 arm_smmu_disable_pasid(master); 4286 arm_smmu_remove_master(master); 4287 if (arm_smmu_cdtab_allocated(&master->cd_table)) 4288 arm_smmu_free_cd_tables(master); 4289 kfree(master); 4290 } 4291 4292 static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain, 4293 unsigned long iova, size_t size, 4294 unsigned long flags, 4295 struct iommu_dirty_bitmap *dirty) 4296 { 4297 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 4298 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; 4299 4300 return ops->read_and_clear_dirty(ops, iova, size, flags, dirty); 4301 } 4302 4303 static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain, 4304 bool enabled) 4305 { 4306 /* 4307 * Always enabled and the dirty bitmap is cleared prior to 4308 * set_dirty_tracking(). 4309 */ 4310 return 0; 4311 } 4312 4313 static struct iommu_group *arm_smmu_device_group(struct device *dev) 4314 { 4315 struct iommu_group *group; 4316 4317 /* 4318 * We don't support devices sharing stream IDs other than PCI RID 4319 * aliases, since the necessary ID-to-device lookup becomes rather 4320 * impractical given a potential sparse 32-bit stream ID space. 4321 */ 4322 if (dev_is_pci(dev)) 4323 group = pci_device_group(dev); 4324 else 4325 group = generic_device_group(dev); 4326 4327 return group; 4328 } 4329 4330 static int arm_smmu_of_xlate(struct device *dev, 4331 const struct of_phandle_args *args) 4332 { 4333 return iommu_fwspec_add_ids(dev, args->args, 1); 4334 } 4335 4336 static void arm_smmu_get_resv_regions(struct device *dev, 4337 struct list_head *head) 4338 { 4339 struct iommu_resv_region *region; 4340 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; 4341 4342 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, 4343 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL); 4344 if (!region) 4345 return; 4346 4347 list_add_tail(®ion->list, head); 4348 4349 iommu_dma_get_resv_regions(dev, head); 4350 } 4351 4352 /* 4353 * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the 4354 * PCIe link and save the data to memory by DMA. The hardware is restricted to 4355 * use identity mapping only. 4356 */ 4357 #define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \ 4358 (pdev)->device == 0xa12e) 4359 4360 static int arm_smmu_def_domain_type(struct device *dev) 4361 { 4362 if (dev_is_pci(dev)) { 4363 struct pci_dev *pdev = to_pci_dev(dev); 4364 4365 if (IS_HISI_PTT_DEVICE(pdev)) 4366 return IOMMU_DOMAIN_IDENTITY; 4367 } 4368 4369 return 0; 4370 } 4371 4372 static const struct iommu_ops arm_smmu_ops = { 4373 .identity_domain = &arm_smmu_identity_domain, 4374 .blocked_domain = &arm_smmu_blocked_domain, 4375 .release_domain = &arm_smmu_blocked_domain, 4376 .capable = arm_smmu_capable, 4377 .hw_info = arm_smmu_hw_info, 4378 .domain_alloc_sva = arm_smmu_sva_domain_alloc, 4379 .domain_alloc_paging_flags = arm_smmu_domain_alloc_paging_flags, 4380 .probe_device = arm_smmu_probe_device, 4381 .release_device = arm_smmu_release_device, 4382 .device_group = arm_smmu_device_group, 4383 .of_xlate = arm_smmu_of_xlate, 4384 .get_resv_regions = arm_smmu_get_resv_regions, 4385 .page_response = arm_smmu_page_response, 4386 .def_domain_type = arm_smmu_def_domain_type, 4387 .get_viommu_size = arm_smmu_get_viommu_size, 4388 .viommu_init = arm_vsmmu_init, 4389 .user_pasid_table = 1, 4390 .owner = THIS_MODULE, 4391 .default_domain_ops = &(const struct iommu_domain_ops) { 4392 .attach_dev = arm_smmu_attach_dev, 4393 .enforce_cache_coherency = arm_smmu_enforce_cache_coherency, 4394 .set_dev_pasid = arm_smmu_s1_set_dev_pasid, 4395 .map_pages = arm_smmu_map_pages, 4396 .unmap_pages = arm_smmu_unmap_pages, 4397 .flush_iotlb_all = arm_smmu_flush_iotlb_all, 4398 .iotlb_sync = arm_smmu_iotlb_sync, 4399 .iova_to_phys = arm_smmu_iova_to_phys, 4400 .free = arm_smmu_domain_free_paging, 4401 } 4402 }; 4403 4404 static struct iommu_dirty_ops arm_smmu_dirty_ops = { 4405 .read_and_clear_dirty = arm_smmu_read_and_clear_dirty, 4406 .set_dirty_tracking = arm_smmu_set_dirty_tracking, 4407 }; 4408 4409 /* Probing and initialisation functions */ 4410 int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, 4411 struct arm_smmu_queue *q, void __iomem *page, 4412 unsigned long prod_off, unsigned long cons_off, 4413 size_t dwords, const char *name) 4414 { 4415 size_t qsz; 4416 4417 do { 4418 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3; 4419 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, 4420 GFP_KERNEL); 4421 if (q->base || qsz < PAGE_SIZE) 4422 break; 4423 4424 q->llq.max_n_shift--; 4425 } while (1); 4426 4427 if (!q->base) { 4428 dev_err(smmu->dev, 4429 "failed to allocate queue (0x%zx bytes) for %s\n", 4430 qsz, name); 4431 return -ENOMEM; 4432 } 4433 4434 if (!WARN_ON(q->base_dma & (qsz - 1))) { 4435 dev_info(smmu->dev, "allocated %u entries for %s\n", 4436 1 << q->llq.max_n_shift, name); 4437 } 4438 4439 q->prod_reg = page + prod_off; 4440 q->cons_reg = page + cons_off; 4441 q->ent_dwords = dwords; 4442 4443 q->q_base = Q_BASE_RWA; 4444 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK; 4445 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift); 4446 4447 q->llq.prod = q->llq.cons = 0; 4448 return 0; 4449 } 4450 4451 int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, 4452 struct arm_smmu_cmdq *cmdq) 4453 { 4454 unsigned int nents = 1 << cmdq->q.llq.max_n_shift; 4455 4456 atomic_set(&cmdq->owner_prod, 0); 4457 atomic_set(&cmdq->lock, 0); 4458 4459 cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents, 4460 GFP_KERNEL); 4461 if (!cmdq->valid_map) 4462 return -ENOMEM; 4463 4464 return 0; 4465 } 4466 4467 static int arm_smmu_init_queues(struct arm_smmu_device *smmu) 4468 { 4469 int ret; 4470 4471 /* cmdq */ 4472 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base, 4473 ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS, 4474 CMDQ_ENT_DWORDS, "cmdq"); 4475 if (ret) 4476 return ret; 4477 4478 ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq); 4479 if (ret) 4480 return ret; 4481 4482 /* evtq */ 4483 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1, 4484 ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS, 4485 EVTQ_ENT_DWORDS, "evtq"); 4486 if (ret) 4487 return ret; 4488 4489 if ((smmu->features & ARM_SMMU_FEAT_SVA) && 4490 (smmu->features & ARM_SMMU_FEAT_STALLS)) { 4491 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev)); 4492 if (!smmu->evtq.iopf) 4493 return -ENOMEM; 4494 } 4495 4496 /* priq */ 4497 if (!(smmu->features & ARM_SMMU_FEAT_PRI)) 4498 return 0; 4499 4500 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1, 4501 ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS, 4502 PRIQ_ENT_DWORDS, "priq"); 4503 } 4504 4505 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) 4506 { 4507 u32 l1size; 4508 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 4509 unsigned int last_sid_idx = 4510 arm_smmu_strtab_l1_idx((1ULL << smmu->sid_bits) - 1); 4511 4512 /* Calculate the L1 size, capped to the SIDSIZE. */ 4513 cfg->l2.num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES); 4514 if (cfg->l2.num_l1_ents <= last_sid_idx) 4515 dev_warn(smmu->dev, 4516 "2-level strtab only covers %u/%u bits of SID\n", 4517 ilog2(cfg->l2.num_l1_ents * STRTAB_NUM_L2_STES), 4518 smmu->sid_bits); 4519 4520 l1size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1); 4521 cfg->l2.l1tab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->l2.l1_dma, 4522 GFP_KERNEL); 4523 if (!cfg->l2.l1tab) { 4524 dev_err(smmu->dev, 4525 "failed to allocate l1 stream table (%u bytes)\n", 4526 l1size); 4527 return -ENOMEM; 4528 } 4529 4530 cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents, 4531 sizeof(*cfg->l2.l2ptrs), GFP_KERNEL); 4532 if (!cfg->l2.l2ptrs) 4533 return -ENOMEM; 4534 4535 return 0; 4536 } 4537 4538 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) 4539 { 4540 u32 size; 4541 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 4542 4543 size = (1 << smmu->sid_bits) * sizeof(struct arm_smmu_ste); 4544 cfg->linear.table = dmam_alloc_coherent(smmu->dev, size, 4545 &cfg->linear.ste_dma, 4546 GFP_KERNEL); 4547 if (!cfg->linear.table) { 4548 dev_err(smmu->dev, 4549 "failed to allocate linear stream table (%u bytes)\n", 4550 size); 4551 return -ENOMEM; 4552 } 4553 cfg->linear.num_ents = 1 << smmu->sid_bits; 4554 4555 arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents); 4556 return 0; 4557 } 4558 4559 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) 4560 { 4561 int ret; 4562 4563 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) 4564 ret = arm_smmu_init_strtab_2lvl(smmu); 4565 else 4566 ret = arm_smmu_init_strtab_linear(smmu); 4567 if (ret) 4568 return ret; 4569 4570 ida_init(&smmu->vmid_map); 4571 4572 return 0; 4573 } 4574 4575 static int arm_smmu_init_structures(struct arm_smmu_device *smmu) 4576 { 4577 int ret; 4578 4579 mutex_init(&smmu->streams_mutex); 4580 smmu->streams = RB_ROOT; 4581 4582 ret = arm_smmu_init_queues(smmu); 4583 if (ret) 4584 return ret; 4585 4586 ret = arm_smmu_init_strtab(smmu); 4587 if (ret) 4588 return ret; 4589 4590 if (smmu->impl_ops && smmu->impl_ops->init_structures) 4591 return smmu->impl_ops->init_structures(smmu); 4592 4593 return 0; 4594 } 4595 4596 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, 4597 unsigned int reg_off, unsigned int ack_off) 4598 { 4599 u32 reg; 4600 4601 writel_relaxed(val, smmu->base + reg_off); 4602 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val, 4603 1, ARM_SMMU_POLL_TIMEOUT_US); 4604 } 4605 4606 /* GBPA is "special" */ 4607 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr) 4608 { 4609 int ret; 4610 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA; 4611 4612 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE), 4613 1, ARM_SMMU_POLL_TIMEOUT_US); 4614 if (ret) 4615 return ret; 4616 4617 reg &= ~clr; 4618 reg |= set; 4619 writel_relaxed(reg | GBPA_UPDATE, gbpa); 4620 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE), 4621 1, ARM_SMMU_POLL_TIMEOUT_US); 4622 4623 if (ret) 4624 dev_err(smmu->dev, "GBPA not responding to update\n"); 4625 return ret; 4626 } 4627 4628 static void arm_smmu_free_msis(void *data) 4629 { 4630 struct device *dev = data; 4631 4632 platform_device_msi_free_irqs_all(dev); 4633 } 4634 4635 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) 4636 { 4637 phys_addr_t doorbell; 4638 struct device *dev = msi_desc_to_dev(desc); 4639 struct arm_smmu_device *smmu = dev_get_drvdata(dev); 4640 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index]; 4641 4642 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; 4643 doorbell &= MSI_CFG0_ADDR_MASK; 4644 4645 writeq_relaxed(doorbell, smmu->base + cfg[0]); 4646 writel_relaxed(msg->data, smmu->base + cfg[1]); 4647 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]); 4648 } 4649 4650 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) 4651 { 4652 int ret, nvec = ARM_SMMU_MAX_MSIS; 4653 struct device *dev = smmu->dev; 4654 4655 /* Clear the MSI address regs */ 4656 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); 4657 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); 4658 4659 if (smmu->features & ARM_SMMU_FEAT_PRI) 4660 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); 4661 else 4662 nvec--; 4663 4664 if (!(smmu->features & ARM_SMMU_FEAT_MSI)) 4665 return; 4666 4667 if (!dev->msi.domain) { 4668 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n"); 4669 return; 4670 } 4671 4672 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */ 4673 ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); 4674 if (ret) { 4675 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n"); 4676 return; 4677 } 4678 4679 smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX); 4680 smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX); 4681 smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX); 4682 4683 /* Add callback to free MSIs on teardown */ 4684 devm_add_action_or_reset(dev, arm_smmu_free_msis, dev); 4685 } 4686 4687 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) 4688 { 4689 int irq, ret; 4690 4691 arm_smmu_setup_msis(smmu); 4692 4693 /* Request interrupt lines */ 4694 irq = smmu->evtq.q.irq; 4695 if (irq) { 4696 ret = devm_request_threaded_irq(smmu->dev, irq, NULL, 4697 arm_smmu_evtq_thread, 4698 IRQF_ONESHOT, 4699 "arm-smmu-v3-evtq", smmu); 4700 if (ret < 0) 4701 dev_warn(smmu->dev, "failed to enable evtq irq\n"); 4702 } else { 4703 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n"); 4704 } 4705 4706 irq = smmu->gerr_irq; 4707 if (irq) { 4708 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler, 4709 0, "arm-smmu-v3-gerror", smmu); 4710 if (ret < 0) 4711 dev_warn(smmu->dev, "failed to enable gerror irq\n"); 4712 } else { 4713 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n"); 4714 } 4715 4716 if (smmu->features & ARM_SMMU_FEAT_PRI) { 4717 irq = smmu->priq.q.irq; 4718 if (irq) { 4719 ret = devm_request_threaded_irq(smmu->dev, irq, NULL, 4720 arm_smmu_priq_thread, 4721 IRQF_ONESHOT, 4722 "arm-smmu-v3-priq", 4723 smmu); 4724 if (ret < 0) 4725 dev_warn(smmu->dev, 4726 "failed to enable priq irq\n"); 4727 } else { 4728 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n"); 4729 } 4730 } 4731 } 4732 4733 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) 4734 { 4735 int ret, irq; 4736 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; 4737 4738 /* Disable IRQs first */ 4739 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, 4740 ARM_SMMU_IRQ_CTRLACK); 4741 if (ret) { 4742 dev_err(smmu->dev, "failed to disable irqs\n"); 4743 return ret; 4744 } 4745 4746 irq = smmu->combined_irq; 4747 if (irq) { 4748 /* 4749 * Cavium ThunderX2 implementation doesn't support unique irq 4750 * lines. Use a single irq line for all the SMMUv3 interrupts. 4751 */ 4752 ret = devm_request_threaded_irq(smmu->dev, irq, 4753 arm_smmu_combined_irq_handler, 4754 arm_smmu_combined_irq_thread, 4755 IRQF_ONESHOT, 4756 "arm-smmu-v3-combined-irq", smmu); 4757 if (ret < 0) 4758 dev_warn(smmu->dev, "failed to enable combined irq\n"); 4759 } else 4760 arm_smmu_setup_unique_irqs(smmu); 4761 4762 if (smmu->features & ARM_SMMU_FEAT_PRI) 4763 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; 4764 4765 /* Enable interrupt generation on the SMMU */ 4766 ret = arm_smmu_write_reg_sync(smmu, irqen_flags, 4767 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); 4768 if (ret) 4769 dev_warn(smmu->dev, "failed to enable irqs\n"); 4770 4771 return 0; 4772 } 4773 4774 static int arm_smmu_device_disable(struct arm_smmu_device *smmu) 4775 { 4776 int ret; 4777 4778 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK); 4779 if (ret) 4780 dev_err(smmu->dev, "failed to clear cr0\n"); 4781 4782 return ret; 4783 } 4784 4785 static void arm_smmu_write_strtab(struct arm_smmu_device *smmu) 4786 { 4787 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 4788 dma_addr_t dma; 4789 u32 reg; 4790 4791 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { 4792 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, 4793 STRTAB_BASE_CFG_FMT_2LVL) | 4794 FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, 4795 ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT) | 4796 FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); 4797 dma = cfg->l2.l1_dma; 4798 } else { 4799 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, 4800 STRTAB_BASE_CFG_FMT_LINEAR) | 4801 FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); 4802 dma = cfg->linear.ste_dma; 4803 } 4804 writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA, 4805 smmu->base + ARM_SMMU_STRTAB_BASE); 4806 writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG); 4807 } 4808 4809 static int arm_smmu_device_reset(struct arm_smmu_device *smmu) 4810 { 4811 int ret; 4812 u32 reg, enables; 4813 struct arm_smmu_cmdq_ent cmd; 4814 4815 /* Clear CR0 and sync (disables SMMU and queue processing) */ 4816 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0); 4817 if (reg & CR0_SMMUEN) { 4818 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n"); 4819 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0); 4820 } 4821 4822 ret = arm_smmu_device_disable(smmu); 4823 if (ret) 4824 return ret; 4825 4826 /* CR1 (table and queue memory attributes) */ 4827 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) | 4828 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) | 4829 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) | 4830 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) | 4831 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) | 4832 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB); 4833 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1); 4834 4835 /* CR2 (random crap) */ 4836 reg = CR2_PTM | CR2_RECINVSID; 4837 4838 if (smmu->features & ARM_SMMU_FEAT_E2H) 4839 reg |= CR2_E2H; 4840 4841 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2); 4842 4843 /* Stream table */ 4844 arm_smmu_write_strtab(smmu); 4845 4846 /* Command queue */ 4847 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE); 4848 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD); 4849 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS); 4850 4851 enables = CR0_CMDQEN; 4852 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 4853 ARM_SMMU_CR0ACK); 4854 if (ret) { 4855 dev_err(smmu->dev, "failed to enable command queue\n"); 4856 return ret; 4857 } 4858 4859 /* Invalidate any cached configuration */ 4860 cmd.opcode = CMDQ_OP_CFGI_ALL; 4861 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 4862 4863 /* Invalidate any stale TLB entries */ 4864 if (smmu->features & ARM_SMMU_FEAT_HYP) { 4865 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL; 4866 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 4867 } 4868 4869 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; 4870 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); 4871 4872 /* Event queue */ 4873 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); 4874 writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD); 4875 writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS); 4876 4877 enables |= CR0_EVTQEN; 4878 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 4879 ARM_SMMU_CR0ACK); 4880 if (ret) { 4881 dev_err(smmu->dev, "failed to enable event queue\n"); 4882 return ret; 4883 } 4884 4885 /* PRI queue */ 4886 if (smmu->features & ARM_SMMU_FEAT_PRI) { 4887 writeq_relaxed(smmu->priq.q.q_base, 4888 smmu->base + ARM_SMMU_PRIQ_BASE); 4889 writel_relaxed(smmu->priq.q.llq.prod, 4890 smmu->page1 + ARM_SMMU_PRIQ_PROD); 4891 writel_relaxed(smmu->priq.q.llq.cons, 4892 smmu->page1 + ARM_SMMU_PRIQ_CONS); 4893 4894 enables |= CR0_PRIQEN; 4895 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 4896 ARM_SMMU_CR0ACK); 4897 if (ret) { 4898 dev_err(smmu->dev, "failed to enable PRI queue\n"); 4899 return ret; 4900 } 4901 } 4902 4903 if (smmu->features & ARM_SMMU_FEAT_ATS) { 4904 enables |= CR0_ATSCHK; 4905 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 4906 ARM_SMMU_CR0ACK); 4907 if (ret) { 4908 dev_err(smmu->dev, "failed to enable ATS check\n"); 4909 return ret; 4910 } 4911 } 4912 4913 ret = arm_smmu_setup_irqs(smmu); 4914 if (ret) { 4915 dev_err(smmu->dev, "failed to setup irqs\n"); 4916 return ret; 4917 } 4918 4919 if (is_kdump_kernel()) 4920 enables &= ~(CR0_EVTQEN | CR0_PRIQEN); 4921 4922 /* Enable the SMMU interface */ 4923 enables |= CR0_SMMUEN; 4924 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 4925 ARM_SMMU_CR0ACK); 4926 if (ret) { 4927 dev_err(smmu->dev, "failed to enable SMMU interface\n"); 4928 return ret; 4929 } 4930 4931 if (smmu->impl_ops && smmu->impl_ops->device_reset) { 4932 ret = smmu->impl_ops->device_reset(smmu); 4933 if (ret) { 4934 dev_err(smmu->dev, "failed to reset impl\n"); 4935 return ret; 4936 } 4937 } 4938 4939 return 0; 4940 } 4941 4942 #define IIDR_IMPLEMENTER_ARM 0x43b 4943 #define IIDR_PRODUCTID_ARM_MMU_600 0x483 4944 #define IIDR_PRODUCTID_ARM_MMU_700 0x487 4945 #define IIDR_PRODUCTID_ARM_MMU_L1 0x48a 4946 #define IIDR_PRODUCTID_ARM_MMU_S3 0x498 4947 4948 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu) 4949 { 4950 u32 reg; 4951 unsigned int implementer, productid, variant, revision; 4952 4953 reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR); 4954 implementer = FIELD_GET(IIDR_IMPLEMENTER, reg); 4955 productid = FIELD_GET(IIDR_PRODUCTID, reg); 4956 variant = FIELD_GET(IIDR_VARIANT, reg); 4957 revision = FIELD_GET(IIDR_REVISION, reg); 4958 4959 switch (implementer) { 4960 case IIDR_IMPLEMENTER_ARM: 4961 switch (productid) { 4962 case IIDR_PRODUCTID_ARM_MMU_600: 4963 /* Arm erratum 1076982 */ 4964 if (variant == 0 && revision <= 2) 4965 smmu->features &= ~ARM_SMMU_FEAT_SEV; 4966 /* Arm erratum 1209401 */ 4967 if (variant < 2) 4968 smmu->features &= ~ARM_SMMU_FEAT_NESTING; 4969 break; 4970 case IIDR_PRODUCTID_ARM_MMU_700: 4971 /* Many errata... */ 4972 smmu->features &= ~ARM_SMMU_FEAT_BTM; 4973 if (variant < 1 || revision < 1) { 4974 /* Arm erratum 2812531 */ 4975 smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC; 4976 /* Arm errata 2268618, 2812531 */ 4977 smmu->features &= ~ARM_SMMU_FEAT_NESTING; 4978 } 4979 break; 4980 case IIDR_PRODUCTID_ARM_MMU_L1: 4981 case IIDR_PRODUCTID_ARM_MMU_S3: 4982 /* Arm errata 3878312/3995052 */ 4983 smmu->features &= ~ARM_SMMU_FEAT_BTM; 4984 break; 4985 } 4986 break; 4987 } 4988 } 4989 4990 static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg) 4991 { 4992 u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD); 4993 u32 hw_features = 0; 4994 4995 switch (FIELD_GET(IDR0_HTTU, reg)) { 4996 case IDR0_HTTU_ACCESS_DIRTY: 4997 hw_features |= ARM_SMMU_FEAT_HD; 4998 fallthrough; 4999 case IDR0_HTTU_ACCESS: 5000 hw_features |= ARM_SMMU_FEAT_HA; 5001 } 5002 5003 if (smmu->dev->of_node) 5004 smmu->features |= hw_features; 5005 else if (hw_features != fw_features) 5006 /* ACPI IORT sets the HTTU bits */ 5007 dev_warn(smmu->dev, 5008 "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n", 5009 hw_features, fw_features); 5010 } 5011 5012 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) 5013 { 5014 u32 reg; 5015 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY; 5016 5017 /* IDR0 */ 5018 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0); 5019 5020 /* 2-level structures */ 5021 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL) 5022 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB; 5023 5024 if (reg & IDR0_CD2L) 5025 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB; 5026 5027 /* 5028 * Translation table endianness. 5029 * We currently require the same endianness as the CPU, but this 5030 * could be changed later by adding a new IO_PGTABLE_QUIRK. 5031 */ 5032 switch (FIELD_GET(IDR0_TTENDIAN, reg)) { 5033 case IDR0_TTENDIAN_MIXED: 5034 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE; 5035 break; 5036 #ifdef __BIG_ENDIAN 5037 case IDR0_TTENDIAN_BE: 5038 smmu->features |= ARM_SMMU_FEAT_TT_BE; 5039 break; 5040 #else 5041 case IDR0_TTENDIAN_LE: 5042 smmu->features |= ARM_SMMU_FEAT_TT_LE; 5043 break; 5044 #endif 5045 default: 5046 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n"); 5047 return -ENXIO; 5048 } 5049 5050 /* Boolean feature flags */ 5051 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI) 5052 smmu->features |= ARM_SMMU_FEAT_PRI; 5053 5054 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS) 5055 smmu->features |= ARM_SMMU_FEAT_ATS; 5056 5057 if (reg & IDR0_SEV) 5058 smmu->features |= ARM_SMMU_FEAT_SEV; 5059 5060 if (reg & IDR0_MSI) { 5061 smmu->features |= ARM_SMMU_FEAT_MSI; 5062 if (coherent && !disable_msipolling) 5063 smmu->options |= ARM_SMMU_OPT_MSIPOLL; 5064 } 5065 5066 if (reg & IDR0_HYP) { 5067 smmu->features |= ARM_SMMU_FEAT_HYP; 5068 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) 5069 smmu->features |= ARM_SMMU_FEAT_E2H; 5070 } 5071 5072 arm_smmu_get_httu(smmu, reg); 5073 5074 /* 5075 * The coherency feature as set by FW is used in preference to the ID 5076 * register, but warn on mismatch. 5077 */ 5078 if (!!(reg & IDR0_COHACC) != coherent) 5079 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n", 5080 str_true_false(coherent)); 5081 5082 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) { 5083 case IDR0_STALL_MODEL_FORCE: 5084 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE; 5085 fallthrough; 5086 case IDR0_STALL_MODEL_STALL: 5087 smmu->features |= ARM_SMMU_FEAT_STALLS; 5088 } 5089 5090 if (reg & IDR0_S1P) 5091 smmu->features |= ARM_SMMU_FEAT_TRANS_S1; 5092 5093 if (reg & IDR0_S2P) 5094 smmu->features |= ARM_SMMU_FEAT_TRANS_S2; 5095 5096 if (!(reg & (IDR0_S1P | IDR0_S2P))) { 5097 dev_err(smmu->dev, "no translation support!\n"); 5098 return -ENXIO; 5099 } 5100 5101 /* We only support the AArch64 table format at present */ 5102 if (!(FIELD_GET(IDR0_TTF, reg) & IDR0_TTF_AARCH64)) { 5103 dev_err(smmu->dev, "AArch64 table format not supported!\n"); 5104 return -ENXIO; 5105 } 5106 5107 /* ASID/VMID sizes */ 5108 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8; 5109 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8; 5110 5111 /* IDR1 */ 5112 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1); 5113 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) { 5114 dev_err(smmu->dev, "embedded implementation not supported\n"); 5115 return -ENXIO; 5116 } 5117 5118 if (reg & IDR1_ATTR_TYPES_OVR) 5119 smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR; 5120 5121 /* Queue sizes, capped to ensure natural alignment */ 5122 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, 5123 FIELD_GET(IDR1_CMDQS, reg)); 5124 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) { 5125 /* 5126 * We don't support splitting up batches, so one batch of 5127 * commands plus an extra sync needs to fit inside the command 5128 * queue. There's also no way we can handle the weird alignment 5129 * restrictions on the base pointer for a unit-length queue. 5130 */ 5131 dev_err(smmu->dev, "command queue size <= %d entries not supported\n", 5132 CMDQ_BATCH_ENTRIES); 5133 return -ENXIO; 5134 } 5135 5136 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT, 5137 FIELD_GET(IDR1_EVTQS, reg)); 5138 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT, 5139 FIELD_GET(IDR1_PRIQS, reg)); 5140 5141 /* SID/SSID sizes */ 5142 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg); 5143 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg); 5144 smmu->iommu.max_pasids = 1UL << smmu->ssid_bits; 5145 5146 /* 5147 * If the SMMU supports fewer bits than would fill a single L2 stream 5148 * table, use a linear table instead. 5149 */ 5150 if (smmu->sid_bits <= STRTAB_SPLIT) 5151 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB; 5152 5153 /* IDR3 */ 5154 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); 5155 if (FIELD_GET(IDR3_RIL, reg)) 5156 smmu->features |= ARM_SMMU_FEAT_RANGE_INV; 5157 if (FIELD_GET(IDR3_FWB, reg)) 5158 smmu->features |= ARM_SMMU_FEAT_S2FWB; 5159 5160 if (FIELD_GET(IDR3_BBM, reg) == 2) 5161 smmu->features |= ARM_SMMU_FEAT_BBML2; 5162 5163 /* IDR5 */ 5164 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); 5165 5166 /* Maximum number of outstanding stalls */ 5167 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg); 5168 5169 /* Page sizes */ 5170 if (reg & IDR5_GRAN64K) 5171 smmu->pgsize_bitmap |= SZ_64K | SZ_512M; 5172 if (reg & IDR5_GRAN16K) 5173 smmu->pgsize_bitmap |= SZ_16K | SZ_32M; 5174 if (reg & IDR5_GRAN4K) 5175 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G; 5176 5177 /* Input address size */ 5178 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT) 5179 smmu->features |= ARM_SMMU_FEAT_VAX; 5180 5181 /* Output address size */ 5182 switch (FIELD_GET(IDR5_OAS, reg)) { 5183 case IDR5_OAS_32_BIT: 5184 smmu->oas = 32; 5185 break; 5186 case IDR5_OAS_36_BIT: 5187 smmu->oas = 36; 5188 break; 5189 case IDR5_OAS_40_BIT: 5190 smmu->oas = 40; 5191 break; 5192 case IDR5_OAS_42_BIT: 5193 smmu->oas = 42; 5194 break; 5195 case IDR5_OAS_44_BIT: 5196 smmu->oas = 44; 5197 break; 5198 case IDR5_OAS_52_BIT: 5199 smmu->oas = 52; 5200 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */ 5201 break; 5202 default: 5203 dev_info(smmu->dev, 5204 "unknown output address size. Truncating to 48-bit\n"); 5205 fallthrough; 5206 case IDR5_OAS_48_BIT: 5207 smmu->oas = 48; 5208 } 5209 5210 /* Set the DMA mask for our table walker */ 5211 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas))) 5212 dev_warn(smmu->dev, 5213 "failed to set DMA mask for table walker\n"); 5214 5215 if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) && 5216 (smmu->features & ARM_SMMU_FEAT_TRANS_S2)) 5217 smmu->features |= ARM_SMMU_FEAT_NESTING; 5218 5219 arm_smmu_device_iidr_probe(smmu); 5220 5221 if (arm_smmu_sva_supported(smmu)) 5222 smmu->features |= ARM_SMMU_FEAT_SVA; 5223 5224 dev_info(smmu->dev, "oas %lu-bit (features 0x%08x)\n", 5225 smmu->oas, smmu->features); 5226 return 0; 5227 } 5228 5229 #ifdef CONFIG_TEGRA241_CMDQV 5230 static void tegra_cmdqv_dt_probe(struct device_node *smmu_node, 5231 struct arm_smmu_device *smmu) 5232 { 5233 struct platform_device *pdev; 5234 struct device_node *np; 5235 5236 np = of_parse_phandle(smmu_node, "nvidia,cmdqv", 0); 5237 if (!np) 5238 return; 5239 5240 /* Tegra241 CMDQV driver is responsible for put_device() */ 5241 pdev = of_find_device_by_node(np); 5242 of_node_put(np); 5243 if (!pdev) 5244 return; 5245 5246 smmu->impl_dev = &pdev->dev; 5247 smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV; 5248 dev_dbg(smmu->dev, "found companion CMDQV device: %s\n", 5249 dev_name(smmu->impl_dev)); 5250 } 5251 #else 5252 static void tegra_cmdqv_dt_probe(struct device_node *smmu_node, 5253 struct arm_smmu_device *smmu) 5254 { 5255 } 5256 #endif 5257 5258 #ifdef CONFIG_ACPI 5259 #ifdef CONFIG_TEGRA241_CMDQV 5260 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, 5261 struct arm_smmu_device *smmu) 5262 { 5263 const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier); 5264 struct acpi_device *adev; 5265 5266 /* Look for an NVDA200C node whose _UID matches the SMMU node ID */ 5267 adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1); 5268 if (adev) { 5269 /* Tegra241 CMDQV driver is responsible for put_device() */ 5270 smmu->impl_dev = get_device(acpi_get_first_physical_node(adev)); 5271 smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV; 5272 dev_info(smmu->dev, "found companion CMDQV device: %s\n", 5273 dev_name(smmu->impl_dev)); 5274 acpi_dev_put(adev); 5275 } 5276 kfree(uid); 5277 } 5278 #else 5279 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, 5280 struct arm_smmu_device *smmu) 5281 { 5282 } 5283 #endif 5284 5285 static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node, 5286 struct arm_smmu_device *smmu) 5287 { 5288 struct acpi_iort_smmu_v3 *iort_smmu = 5289 (struct acpi_iort_smmu_v3 *)node->node_data; 5290 5291 switch (iort_smmu->model) { 5292 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: 5293 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; 5294 break; 5295 case ACPI_IORT_SMMU_V3_HISILICON_HI161X: 5296 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; 5297 break; 5298 case ACPI_IORT_SMMU_V3_GENERIC: 5299 /* 5300 * Tegra241 implementation stores its SMMU options and impl_dev 5301 * in DSDT. Thus, go through the ACPI tables unconditionally. 5302 */ 5303 acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu); 5304 break; 5305 } 5306 5307 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); 5308 return 0; 5309 } 5310 5311 static int arm_smmu_device_acpi_probe(struct platform_device *pdev, 5312 struct arm_smmu_device *smmu) 5313 { 5314 struct acpi_iort_smmu_v3 *iort_smmu; 5315 struct device *dev = smmu->dev; 5316 struct acpi_iort_node *node; 5317 5318 node = *(struct acpi_iort_node **)dev_get_platdata(dev); 5319 5320 /* Retrieve SMMUv3 specific data */ 5321 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; 5322 5323 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) 5324 smmu->features |= ARM_SMMU_FEAT_COHERENCY; 5325 5326 switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) { 5327 case IDR0_HTTU_ACCESS_DIRTY: 5328 smmu->features |= ARM_SMMU_FEAT_HD; 5329 fallthrough; 5330 case IDR0_HTTU_ACCESS: 5331 smmu->features |= ARM_SMMU_FEAT_HA; 5332 } 5333 5334 return acpi_smmu_iort_probe_model(node, smmu); 5335 } 5336 #else 5337 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev, 5338 struct arm_smmu_device *smmu) 5339 { 5340 return -ENODEV; 5341 } 5342 #endif 5343 5344 static int arm_smmu_device_dt_probe(struct platform_device *pdev, 5345 struct arm_smmu_device *smmu) 5346 { 5347 struct device *dev = &pdev->dev; 5348 u32 cells; 5349 int ret = -EINVAL; 5350 5351 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells)) 5352 dev_err(dev, "missing #iommu-cells property\n"); 5353 else if (cells != 1) 5354 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells); 5355 else 5356 ret = 0; 5357 5358 parse_driver_options(smmu); 5359 5360 if (of_dma_is_coherent(dev->of_node)) 5361 smmu->features |= ARM_SMMU_FEAT_COHERENCY; 5362 5363 if (of_device_is_compatible(dev->of_node, "nvidia,tegra264-smmu")) 5364 tegra_cmdqv_dt_probe(dev->of_node, smmu); 5365 5366 return ret; 5367 } 5368 5369 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu) 5370 { 5371 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY) 5372 return SZ_64K; 5373 else 5374 return SZ_128K; 5375 } 5376 5377 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, 5378 resource_size_t size) 5379 { 5380 struct resource res = DEFINE_RES_MEM(start, size); 5381 5382 return devm_ioremap_resource(dev, &res); 5383 } 5384 5385 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) 5386 { 5387 struct list_head rmr_list; 5388 struct iommu_resv_region *e; 5389 5390 INIT_LIST_HEAD(&rmr_list); 5391 iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); 5392 5393 list_for_each_entry(e, &rmr_list, list) { 5394 struct iommu_iort_rmr_data *rmr; 5395 int ret, i; 5396 5397 rmr = container_of(e, struct iommu_iort_rmr_data, rr); 5398 for (i = 0; i < rmr->num_sids; i++) { 5399 ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]); 5400 if (ret) { 5401 dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n", 5402 rmr->sids[i]); 5403 continue; 5404 } 5405 5406 /* 5407 * STE table is not programmed to HW, see 5408 * arm_smmu_initial_bypass_stes() 5409 */ 5410 arm_smmu_make_bypass_ste(smmu, 5411 arm_smmu_get_step_for_sid(smmu, rmr->sids[i])); 5412 } 5413 } 5414 5415 iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); 5416 } 5417 5418 static void arm_smmu_impl_remove(void *data) 5419 { 5420 struct arm_smmu_device *smmu = data; 5421 5422 if (smmu->impl_ops && smmu->impl_ops->device_remove) 5423 smmu->impl_ops->device_remove(smmu); 5424 } 5425 5426 /* 5427 * Probe all the compiled in implementations. Each one checks to see if it 5428 * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which 5429 * replaces the callers. Otherwise the original is returned or ERR_PTR. 5430 */ 5431 static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) 5432 { 5433 struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV); 5434 const struct arm_smmu_impl_ops *ops; 5435 int ret; 5436 5437 if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV)) 5438 new_smmu = tegra241_cmdqv_probe(smmu); 5439 5440 if (new_smmu == ERR_PTR(-ENODEV)) 5441 return smmu; 5442 if (IS_ERR(new_smmu)) 5443 return new_smmu; 5444 5445 ops = new_smmu->impl_ops; 5446 if (ops) { 5447 /* get_viommu_size and vsmmu_init ops must be paired */ 5448 if (WARN_ON(!ops->get_viommu_size != !ops->vsmmu_init)) { 5449 ret = -EINVAL; 5450 goto err_remove; 5451 } 5452 } 5453 5454 ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove, 5455 new_smmu); 5456 if (ret) 5457 return ERR_PTR(ret); 5458 return new_smmu; 5459 5460 err_remove: 5461 arm_smmu_impl_remove(new_smmu); 5462 return ERR_PTR(ret); 5463 } 5464 5465 static int arm_smmu_device_probe(struct platform_device *pdev) 5466 { 5467 int irq, ret; 5468 struct resource *res; 5469 resource_size_t ioaddr; 5470 struct arm_smmu_device *smmu; 5471 struct device *dev = &pdev->dev; 5472 5473 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); 5474 if (!smmu) 5475 return -ENOMEM; 5476 smmu->dev = dev; 5477 5478 if (dev->of_node) { 5479 ret = arm_smmu_device_dt_probe(pdev, smmu); 5480 } else { 5481 ret = arm_smmu_device_acpi_probe(pdev, smmu); 5482 } 5483 if (ret) 5484 return ret; 5485 5486 smmu = arm_smmu_impl_probe(smmu); 5487 if (IS_ERR(smmu)) 5488 return PTR_ERR(smmu); 5489 5490 /* Base address */ 5491 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 5492 if (!res) 5493 return -EINVAL; 5494 if (resource_size(res) < arm_smmu_resource_size(smmu)) { 5495 dev_err(dev, "MMIO region too small (%pr)\n", res); 5496 return -EINVAL; 5497 } 5498 ioaddr = res->start; 5499 5500 /* 5501 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain 5502 * the PMCG registers which are reserved by the PMU driver. 5503 */ 5504 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ); 5505 if (IS_ERR(smmu->base)) 5506 return PTR_ERR(smmu->base); 5507 5508 if (arm_smmu_resource_size(smmu) > SZ_64K) { 5509 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K, 5510 ARM_SMMU_REG_SZ); 5511 if (IS_ERR(smmu->page1)) 5512 return PTR_ERR(smmu->page1); 5513 } else { 5514 smmu->page1 = smmu->base; 5515 } 5516 5517 /* Interrupt lines */ 5518 5519 irq = platform_get_irq_byname_optional(pdev, "combined"); 5520 if (irq > 0) 5521 smmu->combined_irq = irq; 5522 else { 5523 irq = platform_get_irq_byname_optional(pdev, "eventq"); 5524 if (irq > 0) 5525 smmu->evtq.q.irq = irq; 5526 5527 irq = platform_get_irq_byname_optional(pdev, "priq"); 5528 if (irq > 0) 5529 smmu->priq.q.irq = irq; 5530 5531 irq = platform_get_irq_byname_optional(pdev, "gerror"); 5532 if (irq > 0) 5533 smmu->gerr_irq = irq; 5534 } 5535 /* Probe the h/w */ 5536 ret = arm_smmu_device_hw_probe(smmu); 5537 if (ret) 5538 return ret; 5539 5540 /* Initialise in-memory data structures */ 5541 ret = arm_smmu_init_structures(smmu); 5542 if (ret) 5543 goto err_free_iopf; 5544 5545 /* Record our private device structure */ 5546 platform_set_drvdata(pdev, smmu); 5547 5548 /* Check for RMRs and install bypass STEs if any */ 5549 arm_smmu_rmr_install_bypass_ste(smmu); 5550 5551 /* Reset the device */ 5552 ret = arm_smmu_device_reset(smmu); 5553 if (ret) 5554 goto err_disable; 5555 5556 /* And we're up. Go go go! */ 5557 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, 5558 "smmu3.%pa", &ioaddr); 5559 if (ret) 5560 goto err_disable; 5561 5562 ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); 5563 if (ret) { 5564 dev_err(dev, "Failed to register iommu\n"); 5565 goto err_free_sysfs; 5566 } 5567 5568 return 0; 5569 5570 err_free_sysfs: 5571 iommu_device_sysfs_remove(&smmu->iommu); 5572 err_disable: 5573 arm_smmu_device_disable(smmu); 5574 err_free_iopf: 5575 iopf_queue_free(smmu->evtq.iopf); 5576 return ret; 5577 } 5578 5579 static void arm_smmu_device_remove(struct platform_device *pdev) 5580 { 5581 struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 5582 5583 iommu_device_unregister(&smmu->iommu); 5584 iommu_device_sysfs_remove(&smmu->iommu); 5585 arm_smmu_device_disable(smmu); 5586 iopf_queue_free(smmu->evtq.iopf); 5587 ida_destroy(&smmu->vmid_map); 5588 } 5589 5590 static void arm_smmu_device_shutdown(struct platform_device *pdev) 5591 { 5592 struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 5593 5594 arm_smmu_device_disable(smmu); 5595 } 5596 5597 static const struct of_device_id arm_smmu_of_match[] = { 5598 { .compatible = "arm,smmu-v3", }, 5599 { }, 5600 }; 5601 MODULE_DEVICE_TABLE(of, arm_smmu_of_match); 5602 5603 static void arm_smmu_driver_unregister(struct platform_driver *drv) 5604 { 5605 arm_smmu_sva_notifier_synchronize(); 5606 platform_driver_unregister(drv); 5607 } 5608 5609 static struct platform_driver arm_smmu_driver = { 5610 .driver = { 5611 .name = "arm-smmu-v3", 5612 .of_match_table = arm_smmu_of_match, 5613 .suppress_bind_attrs = true, 5614 }, 5615 .probe = arm_smmu_device_probe, 5616 .remove = arm_smmu_device_remove, 5617 .shutdown = arm_smmu_device_shutdown, 5618 }; 5619 module_driver(arm_smmu_driver, platform_driver_register, 5620 arm_smmu_driver_unregister); 5621 5622 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations"); 5623 MODULE_AUTHOR("Will Deacon <will@kernel.org>"); 5624 MODULE_ALIAS("platform:arm-smmu-v3"); 5625 MODULE_LICENSE("GPL v2"); 5626