1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. 4 * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. 5 */ 6 7 #include <linux/cleanup.h> 8 #include <linux/device.h> 9 #include <linux/interconnect.h> 10 #include <linux/firmware/qcom/qcom_scm.h> 11 #include <linux/iopoll.h> 12 #include <linux/list.h> 13 #include <linux/mod_devicetable.h> 14 #include <linux/mutex.h> 15 #include <linux/platform_device.h> 16 #include <linux/ratelimit.h> 17 #include <linux/spinlock.h> 18 19 #include "arm-smmu.h" 20 #include "arm-smmu-qcom.h" 21 22 #define TBU_DBG_TIMEOUT_US 100 23 #define DEBUG_AXUSER_REG 0x30 24 #define DEBUG_AXUSER_CDMID GENMASK_ULL(43, 36) 25 #define DEBUG_AXUSER_CDMID_VAL 0xff 26 #define DEBUG_PAR_REG 0x28 27 #define DEBUG_PAR_FAULT_VAL BIT(0) 28 #define DEBUG_PAR_PA GENMASK_ULL(47, 12) 29 #define DEBUG_SID_HALT_REG 0x0 30 #define DEBUG_SID_HALT_VAL BIT(16) 31 #define DEBUG_SID_HALT_SID GENMASK(9, 0) 32 #define DEBUG_SR_HALT_ACK_REG 0x20 33 #define DEBUG_SR_HALT_ACK_VAL BIT(1) 34 #define DEBUG_SR_ECATS_RUNNING_VAL BIT(0) 35 #define DEBUG_TXN_AXCACHE GENMASK(5, 2) 36 #define DEBUG_TXN_AXPROT GENMASK(8, 6) 37 #define DEBUG_TXN_AXPROT_PRIV 0x1 38 #define DEBUG_TXN_AXPROT_NSEC 0x2 39 #define DEBUG_TXN_TRIGG_REG 0x18 40 #define DEBUG_TXN_TRIGGER BIT(0) 41 #define DEBUG_VA_ADDR_REG 0x8 42 43 static LIST_HEAD(tbu_list); 44 static DEFINE_MUTEX(tbu_list_lock); 45 static DEFINE_SPINLOCK(atos_lock); 46 47 struct qcom_tbu { 48 struct device *dev; 49 struct device_node *smmu_np; 50 u32 sid_range[2]; 51 struct list_head list; 52 struct clk *clk; 53 struct icc_path *path; 54 void __iomem *base; 55 spinlock_t halt_lock; /* multiple halt or resume can't execute concurrently */ 56 int halt_count; 57 }; 58 59 static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu) 60 { 61 return container_of(smmu, struct qcom_smmu, smmu); 62 } 63 64 void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) 65 { 66 int ret; 67 u32 tbu_pwr_status, sync_inv_ack, sync_inv_progress; 68 struct qcom_smmu *qsmmu = container_of(smmu, struct qcom_smmu, smmu); 69 const struct qcom_smmu_config *cfg; 70 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, 71 DEFAULT_RATELIMIT_BURST); 72 73 if (__ratelimit(&rs)) { 74 dev_err(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n"); 75 76 cfg = qsmmu->cfg; 77 if (!cfg) 78 return; 79 80 ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS], 81 &tbu_pwr_status); 82 if (ret) 83 dev_err(smmu->dev, 84 "Failed to read TBU power status: %d\n", ret); 85 86 ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK], 87 &sync_inv_ack); 88 if (ret) 89 dev_err(smmu->dev, 90 "Failed to read TBU sync/inv ack status: %d\n", ret); 91 92 ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR], 93 &sync_inv_progress); 94 if (ret) 95 dev_err(smmu->dev, 96 "Failed to read TCU syn/inv progress: %d\n", ret); 97 98 dev_err(smmu->dev, 99 "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n", 100 tbu_pwr_status, sync_inv_ack, sync_inv_progress); 101 } 102 } 103 104 static struct qcom_tbu *qcom_find_tbu(struct qcom_smmu *qsmmu, u32 sid) 105 { 106 struct qcom_tbu *tbu; 107 u32 start, end; 108 109 guard(mutex)(&tbu_list_lock); 110 111 if (list_empty(&tbu_list)) 112 return NULL; 113 114 list_for_each_entry(tbu, &tbu_list, list) { 115 start = tbu->sid_range[0]; 116 end = start + tbu->sid_range[1]; 117 118 if (qsmmu->smmu.dev->of_node == tbu->smmu_np && 119 start <= sid && sid < end) 120 return tbu; 121 } 122 dev_err(qsmmu->smmu.dev, "Unable to find TBU for sid 0x%x\n", sid); 123 124 return NULL; 125 } 126 127 static int qcom_tbu_halt(struct qcom_tbu *tbu, struct arm_smmu_domain *smmu_domain) 128 { 129 struct arm_smmu_device *smmu = smmu_domain->smmu; 130 int ret = 0, idx = smmu_domain->cfg.cbndx; 131 u32 val, fsr, status; 132 133 guard(spinlock_irqsave)(&tbu->halt_lock); 134 if (tbu->halt_count) { 135 tbu->halt_count++; 136 return ret; 137 } 138 139 val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG); 140 val |= DEBUG_SID_HALT_VAL; 141 writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); 142 143 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); 144 if ((fsr & ARM_SMMU_CB_FSR_FAULT) && (fsr & ARM_SMMU_CB_FSR_SS)) { 145 u32 sctlr_orig, sctlr; 146 147 /* 148 * We are in a fault. Our request to halt the bus will not 149 * complete until transactions in front of us (such as the fault 150 * itself) have completed. Disable iommu faults and terminate 151 * any existing transactions. 152 */ 153 sctlr_orig = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_SCTLR); 154 sctlr = sctlr_orig & ~(ARM_SMMU_SCTLR_CFCFG | ARM_SMMU_SCTLR_CFIE); 155 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr); 156 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); 157 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); 158 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig); 159 } 160 161 if (readl_poll_timeout_atomic(tbu->base + DEBUG_SR_HALT_ACK_REG, status, 162 (status & DEBUG_SR_HALT_ACK_VAL), 163 0, TBU_DBG_TIMEOUT_US)) { 164 dev_err(tbu->dev, "Timeout while trying to halt TBU!\n"); 165 ret = -ETIMEDOUT; 166 167 val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG); 168 val &= ~DEBUG_SID_HALT_VAL; 169 writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); 170 171 return ret; 172 } 173 174 tbu->halt_count = 1; 175 176 return ret; 177 } 178 179 static void qcom_tbu_resume(struct qcom_tbu *tbu) 180 { 181 u32 val; 182 183 guard(spinlock_irqsave)(&tbu->halt_lock); 184 if (!tbu->halt_count) { 185 WARN(1, "%s: halt_count is 0", dev_name(tbu->dev)); 186 return; 187 } 188 189 if (tbu->halt_count > 1) { 190 tbu->halt_count--; 191 return; 192 } 193 194 val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG); 195 val &= ~DEBUG_SID_HALT_VAL; 196 writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); 197 198 tbu->halt_count = 0; 199 } 200 201 static phys_addr_t qcom_tbu_trigger_atos(struct arm_smmu_domain *smmu_domain, 202 struct qcom_tbu *tbu, dma_addr_t iova, u32 sid) 203 { 204 bool atos_timedout = false; 205 phys_addr_t phys = 0; 206 ktime_t timeout; 207 u64 val; 208 209 /* Set address and stream-id */ 210 val = readq_relaxed(tbu->base + DEBUG_SID_HALT_REG); 211 val &= ~DEBUG_SID_HALT_SID; 212 val |= FIELD_PREP(DEBUG_SID_HALT_SID, sid); 213 writeq_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); 214 writeq_relaxed(iova, tbu->base + DEBUG_VA_ADDR_REG); 215 val = FIELD_PREP(DEBUG_AXUSER_CDMID, DEBUG_AXUSER_CDMID_VAL); 216 writeq_relaxed(val, tbu->base + DEBUG_AXUSER_REG); 217 218 /* Write-back read and write-allocate */ 219 val = FIELD_PREP(DEBUG_TXN_AXCACHE, 0xf); 220 221 /* Non-secure access */ 222 val |= FIELD_PREP(DEBUG_TXN_AXPROT, DEBUG_TXN_AXPROT_NSEC); 223 224 /* Privileged access */ 225 val |= FIELD_PREP(DEBUG_TXN_AXPROT, DEBUG_TXN_AXPROT_PRIV); 226 227 val |= DEBUG_TXN_TRIGGER; 228 writeq_relaxed(val, tbu->base + DEBUG_TXN_TRIGG_REG); 229 230 timeout = ktime_add_us(ktime_get(), TBU_DBG_TIMEOUT_US); 231 for (;;) { 232 val = readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG); 233 if (!(val & DEBUG_SR_ECATS_RUNNING_VAL)) 234 break; 235 val = readl_relaxed(tbu->base + DEBUG_PAR_REG); 236 if (val & DEBUG_PAR_FAULT_VAL) 237 break; 238 if (ktime_compare(ktime_get(), timeout) > 0) { 239 atos_timedout = true; 240 break; 241 } 242 } 243 244 val = readq_relaxed(tbu->base + DEBUG_PAR_REG); 245 if (val & DEBUG_PAR_FAULT_VAL) 246 dev_err(tbu->dev, "ATOS generated a fault interrupt! PAR = %llx, SID=0x%x\n", 247 val, sid); 248 else if (atos_timedout) 249 dev_err_ratelimited(tbu->dev, "ATOS translation timed out!\n"); 250 else 251 phys = FIELD_GET(DEBUG_PAR_PA, val); 252 253 /* Reset hardware */ 254 writeq_relaxed(0, tbu->base + DEBUG_TXN_TRIGG_REG); 255 writeq_relaxed(0, tbu->base + DEBUG_VA_ADDR_REG); 256 val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG); 257 val &= ~DEBUG_SID_HALT_SID; 258 writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); 259 260 return phys; 261 } 262 263 static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, 264 dma_addr_t iova, u32 sid) 265 { 266 struct arm_smmu_device *smmu = smmu_domain->smmu; 267 struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); 268 int idx = smmu_domain->cfg.cbndx; 269 struct qcom_tbu *tbu; 270 u32 sctlr_orig, sctlr; 271 phys_addr_t phys = 0; 272 int attempt = 0; 273 int ret; 274 u64 fsr; 275 276 tbu = qcom_find_tbu(qsmmu, sid); 277 if (!tbu) 278 return 0; 279 280 ret = icc_set_bw(tbu->path, 0, UINT_MAX); 281 if (ret) 282 return ret; 283 284 ret = clk_prepare_enable(tbu->clk); 285 if (ret) 286 goto disable_icc; 287 288 ret = qcom_tbu_halt(tbu, smmu_domain); 289 if (ret) 290 goto disable_clk; 291 292 /* 293 * ATOS/ECATS can trigger the fault interrupt, so disable it temporarily 294 * and check for an interrupt manually. 295 */ 296 sctlr_orig = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_SCTLR); 297 sctlr = sctlr_orig & ~(ARM_SMMU_SCTLR_CFCFG | ARM_SMMU_SCTLR_CFIE); 298 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr); 299 300 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); 301 if (fsr & ARM_SMMU_CB_FSR_FAULT) { 302 /* Clear pending interrupts */ 303 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); 304 305 /* 306 * TBU halt takes care of resuming any stalled transcation. 307 * Kept it here for completeness sake. 308 */ 309 if (fsr & ARM_SMMU_CB_FSR_SS) 310 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, 311 ARM_SMMU_RESUME_TERMINATE); 312 } 313 314 /* Only one concurrent atos operation */ 315 scoped_guard(spinlock_irqsave, &atos_lock) { 316 /* 317 * If the translation fails, attempt the lookup more time." 318 */ 319 do { 320 phys = qcom_tbu_trigger_atos(smmu_domain, tbu, iova, sid); 321 322 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); 323 if (fsr & ARM_SMMU_CB_FSR_FAULT) { 324 /* Clear pending interrupts */ 325 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); 326 327 if (fsr & ARM_SMMU_CB_FSR_SS) 328 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, 329 ARM_SMMU_RESUME_TERMINATE); 330 } 331 } while (!phys && attempt++ < 2); 332 333 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig); 334 } 335 qcom_tbu_resume(tbu); 336 337 /* Read to complete prior write transcations */ 338 readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG); 339 340 disable_clk: 341 clk_disable_unprepare(tbu->clk); 342 disable_icc: 343 icc_set_bw(tbu->path, 0, 0); 344 345 return phys; 346 } 347 348 static phys_addr_t qcom_smmu_iova_to_phys_hard(struct arm_smmu_domain *smmu_domain, dma_addr_t iova) 349 { 350 struct arm_smmu_device *smmu = smmu_domain->smmu; 351 int idx = smmu_domain->cfg.cbndx; 352 u32 frsynra; 353 u16 sid; 354 355 frsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); 356 sid = FIELD_GET(ARM_SMMU_CBFRSYNRA_SID, frsynra); 357 358 return qcom_iova_to_phys(smmu_domain, iova, sid); 359 } 360 361 static phys_addr_t qcom_smmu_verify_fault(struct arm_smmu_domain *smmu_domain, dma_addr_t iova, u32 fsr) 362 { 363 struct io_pgtable *iop = io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops); 364 struct arm_smmu_device *smmu = smmu_domain->smmu; 365 phys_addr_t phys_post_tlbiall; 366 phys_addr_t phys; 367 368 phys = qcom_smmu_iova_to_phys_hard(smmu_domain, iova); 369 io_pgtable_tlb_flush_all(iop); 370 phys_post_tlbiall = qcom_smmu_iova_to_phys_hard(smmu_domain, iova); 371 372 if (phys != phys_post_tlbiall) { 373 dev_err(smmu->dev, 374 "ATOS results differed across TLBIALL... (before: %pa after: %pa)\n", 375 &phys, &phys_post_tlbiall); 376 } 377 378 return (phys == 0 ? phys_post_tlbiall : phys); 379 } 380 381 irqreturn_t qcom_smmu_context_fault(int irq, void *dev) 382 { 383 struct arm_smmu_domain *smmu_domain = dev; 384 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; 385 struct arm_smmu_device *smmu = smmu_domain->smmu; 386 struct arm_smmu_context_fault_info cfi; 387 u32 resume = 0; 388 int idx = smmu_domain->cfg.cbndx; 389 phys_addr_t phys_soft; 390 int ret, tmp; 391 392 static DEFINE_RATELIMIT_STATE(_rs, 393 DEFAULT_RATELIMIT_INTERVAL, 394 DEFAULT_RATELIMIT_BURST); 395 396 arm_smmu_read_context_fault_info(smmu, idx, &cfi); 397 398 if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) 399 return IRQ_NONE; 400 401 if (list_empty(&tbu_list)) { 402 ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, 403 cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); 404 405 if (ret == -ENOSYS) 406 arm_smmu_print_context_fault_info(smmu, idx, &cfi); 407 408 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); 409 return IRQ_HANDLED; 410 } 411 412 phys_soft = ops->iova_to_phys(ops, cfi.iova); 413 414 tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, 415 cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); 416 if (!tmp || tmp == -EBUSY) { 417 ret = IRQ_HANDLED; 418 resume = ARM_SMMU_RESUME_TERMINATE; 419 } else { 420 phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, cfi.iova, cfi.fsr); 421 422 if (__ratelimit(&_rs)) { 423 arm_smmu_print_context_fault_info(smmu, idx, &cfi); 424 425 dev_err(smmu->dev, 426 "soft iova-to-phys=%pa\n", &phys_soft); 427 if (!phys_soft) 428 dev_err(smmu->dev, 429 "SOFTWARE TABLE WALK FAILED! Looks like %s accessed an unmapped address!\n", 430 dev_name(smmu->dev)); 431 if (phys_atos) 432 dev_err(smmu->dev, "hard iova-to-phys (ATOS)=%pa\n", 433 &phys_atos); 434 else 435 dev_err(smmu->dev, "hard iova-to-phys (ATOS) failed\n"); 436 } 437 ret = IRQ_NONE; 438 resume = ARM_SMMU_RESUME_TERMINATE; 439 } 440 441 /* 442 * If the client returns -EBUSY, do not clear FSR and do not RESUME 443 * if stalled. This is required to keep the IOMMU client stalled on 444 * the outstanding fault. This gives the client a chance to take any 445 * debug action and then terminate the stalled transaction. 446 * So, the sequence in case of stall on fault should be: 447 * 1) Do not clear FSR or write to RESUME here 448 * 2) Client takes any debug action 449 * 3) Client terminates the stalled transaction and resumes the IOMMU 450 * 4) Client clears FSR. The FSR should only be cleared after 3) and 451 * not before so that the fault remains outstanding. This ensures 452 * SCTLR.HUPCF has the desired effect if subsequent transactions also 453 * need to be terminated. 454 */ 455 if (tmp != -EBUSY) { 456 /* Clear the faulting FSR */ 457 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); 458 459 /* Retry or terminate any stalled transactions */ 460 if (cfi.fsr & ARM_SMMU_CB_FSR_SS) 461 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, resume); 462 } 463 464 return ret; 465 } 466 467 int qcom_tbu_probe(struct platform_device *pdev) 468 { 469 struct of_phandle_args args = { .args_count = 2 }; 470 struct device_node *np = pdev->dev.of_node; 471 struct device *dev = &pdev->dev; 472 struct qcom_tbu *tbu; 473 474 tbu = devm_kzalloc(dev, sizeof(*tbu), GFP_KERNEL); 475 if (!tbu) 476 return -ENOMEM; 477 478 tbu->dev = dev; 479 INIT_LIST_HEAD(&tbu->list); 480 spin_lock_init(&tbu->halt_lock); 481 482 if (of_parse_phandle_with_args(np, "qcom,stream-id-range", "#iommu-cells", 0, &args)) { 483 dev_err(dev, "Cannot parse the 'qcom,stream-id-range' DT property\n"); 484 return -EINVAL; 485 } 486 487 tbu->smmu_np = args.np; 488 tbu->sid_range[0] = args.args[0]; 489 tbu->sid_range[1] = args.args[1]; 490 of_node_put(args.np); 491 492 tbu->base = devm_of_iomap(dev, np, 0, NULL); 493 if (IS_ERR(tbu->base)) 494 return PTR_ERR(tbu->base); 495 496 tbu->clk = devm_clk_get_optional(dev, NULL); 497 if (IS_ERR(tbu->clk)) 498 return PTR_ERR(tbu->clk); 499 500 tbu->path = devm_of_icc_get(dev, NULL); 501 if (IS_ERR(tbu->path)) 502 return PTR_ERR(tbu->path); 503 504 guard(mutex)(&tbu_list_lock); 505 list_add_tail(&tbu->list, &tbu_list); 506 507 return 0; 508 } 509