1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
4 * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
5 */
6
7 #include <linux/cleanup.h>
8 #include <linux/device.h>
9 #include <linux/interconnect.h>
10 #include <linux/firmware/qcom/qcom_scm.h>
11 #include <linux/iopoll.h>
12 #include <linux/list.h>
13 #include <linux/mod_devicetable.h>
14 #include <linux/mutex.h>
15 #include <linux/platform_device.h>
16 #include <linux/ratelimit.h>
17 #include <linux/spinlock.h>
18
19 #include "arm-smmu.h"
20 #include "arm-smmu-qcom.h"
21
22 #define TBU_DBG_TIMEOUT_US 100
23 #define DEBUG_AXUSER_REG 0x30
24 #define DEBUG_AXUSER_CDMID GENMASK_ULL(43, 36)
25 #define DEBUG_AXUSER_CDMID_VAL 0xff
26 #define DEBUG_PAR_REG 0x28
27 #define DEBUG_PAR_FAULT_VAL BIT(0)
28 #define DEBUG_PAR_PA GENMASK_ULL(47, 12)
29 #define DEBUG_SID_HALT_REG 0x0
30 #define DEBUG_SID_HALT_VAL BIT(16)
31 #define DEBUG_SID_HALT_SID GENMASK(9, 0)
32 #define DEBUG_SR_HALT_ACK_REG 0x20
33 #define DEBUG_SR_HALT_ACK_VAL BIT(1)
34 #define DEBUG_SR_ECATS_RUNNING_VAL BIT(0)
35 #define DEBUG_TXN_AXCACHE GENMASK(5, 2)
36 #define DEBUG_TXN_AXPROT GENMASK(8, 6)
37 #define DEBUG_TXN_AXPROT_PRIV 0x1
38 #define DEBUG_TXN_AXPROT_NSEC 0x2
39 #define DEBUG_TXN_TRIGG_REG 0x18
40 #define DEBUG_TXN_TRIGGER BIT(0)
41 #define DEBUG_VA_ADDR_REG 0x8
42
43 static LIST_HEAD(tbu_list);
44 static DEFINE_MUTEX(tbu_list_lock);
45 static DEFINE_SPINLOCK(atos_lock);
46
47 struct qcom_tbu {
48 struct device *dev;
49 struct device_node *smmu_np;
50 u32 sid_range[2];
51 struct list_head list;
52 struct clk *clk;
53 struct icc_path *path;
54 void __iomem *base;
55 spinlock_t halt_lock; /* multiple halt or resume can't execute concurrently */
56 int halt_count;
57 };
58
to_qcom_smmu(struct arm_smmu_device * smmu)59 static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
60 {
61 return container_of(smmu, struct qcom_smmu, smmu);
62 }
63
qcom_smmu_tlb_sync_debug(struct arm_smmu_device * smmu)64 void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu)
65 {
66 int ret;
67 u32 tbu_pwr_status, sync_inv_ack, sync_inv_progress;
68 struct qcom_smmu *qsmmu = container_of(smmu, struct qcom_smmu, smmu);
69 const struct qcom_smmu_config *cfg;
70 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
71 DEFAULT_RATELIMIT_BURST);
72
73 if (__ratelimit(&rs)) {
74 dev_err(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n");
75
76 cfg = qsmmu->cfg;
77 if (!cfg)
78 return;
79
80 ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS],
81 &tbu_pwr_status);
82 if (ret)
83 dev_err(smmu->dev,
84 "Failed to read TBU power status: %d\n", ret);
85
86 ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK],
87 &sync_inv_ack);
88 if (ret)
89 dev_err(smmu->dev,
90 "Failed to read TBU sync/inv ack status: %d\n", ret);
91
92 ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR],
93 &sync_inv_progress);
94 if (ret)
95 dev_err(smmu->dev,
96 "Failed to read TCU syn/inv progress: %d\n", ret);
97
98 dev_err(smmu->dev,
99 "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
100 tbu_pwr_status, sync_inv_ack, sync_inv_progress);
101 }
102 }
103
qcom_find_tbu(struct qcom_smmu * qsmmu,u32 sid)104 static struct qcom_tbu *qcom_find_tbu(struct qcom_smmu *qsmmu, u32 sid)
105 {
106 struct qcom_tbu *tbu;
107 u32 start, end;
108
109 guard(mutex)(&tbu_list_lock);
110
111 if (list_empty(&tbu_list))
112 return NULL;
113
114 list_for_each_entry(tbu, &tbu_list, list) {
115 start = tbu->sid_range[0];
116 end = start + tbu->sid_range[1];
117
118 if (qsmmu->smmu.dev->of_node == tbu->smmu_np &&
119 start <= sid && sid < end)
120 return tbu;
121 }
122 dev_err(qsmmu->smmu.dev, "Unable to find TBU for sid 0x%x\n", sid);
123
124 return NULL;
125 }
126
qcom_tbu_halt(struct qcom_tbu * tbu,struct arm_smmu_domain * smmu_domain)127 static int qcom_tbu_halt(struct qcom_tbu *tbu, struct arm_smmu_domain *smmu_domain)
128 {
129 struct arm_smmu_device *smmu = smmu_domain->smmu;
130 int ret = 0, idx = smmu_domain->cfg.cbndx;
131 u32 val, fsr, status;
132
133 guard(spinlock_irqsave)(&tbu->halt_lock);
134 if (tbu->halt_count) {
135 tbu->halt_count++;
136 return ret;
137 }
138
139 val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
140 val |= DEBUG_SID_HALT_VAL;
141 writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
142
143 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
144 if ((fsr & ARM_SMMU_CB_FSR_FAULT) && (fsr & ARM_SMMU_CB_FSR_SS)) {
145 u32 sctlr_orig, sctlr;
146
147 /*
148 * We are in a fault. Our request to halt the bus will not
149 * complete until transactions in front of us (such as the fault
150 * itself) have completed. Disable iommu faults and terminate
151 * any existing transactions.
152 */
153 sctlr_orig = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_SCTLR);
154 sctlr = sctlr_orig & ~(ARM_SMMU_SCTLR_CFCFG | ARM_SMMU_SCTLR_CFIE);
155 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr);
156 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
157 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE);
158 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig);
159 }
160
161 if (readl_poll_timeout_atomic(tbu->base + DEBUG_SR_HALT_ACK_REG, status,
162 (status & DEBUG_SR_HALT_ACK_VAL),
163 0, TBU_DBG_TIMEOUT_US)) {
164 dev_err(tbu->dev, "Timeout while trying to halt TBU!\n");
165 ret = -ETIMEDOUT;
166
167 val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
168 val &= ~DEBUG_SID_HALT_VAL;
169 writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
170
171 return ret;
172 }
173
174 tbu->halt_count = 1;
175
176 return ret;
177 }
178
qcom_tbu_resume(struct qcom_tbu * tbu)179 static void qcom_tbu_resume(struct qcom_tbu *tbu)
180 {
181 u32 val;
182
183 guard(spinlock_irqsave)(&tbu->halt_lock);
184 if (!tbu->halt_count) {
185 WARN(1, "%s: halt_count is 0", dev_name(tbu->dev));
186 return;
187 }
188
189 if (tbu->halt_count > 1) {
190 tbu->halt_count--;
191 return;
192 }
193
194 val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
195 val &= ~DEBUG_SID_HALT_VAL;
196 writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
197
198 tbu->halt_count = 0;
199 }
200
qcom_tbu_trigger_atos(struct arm_smmu_domain * smmu_domain,struct qcom_tbu * tbu,dma_addr_t iova,u32 sid)201 static phys_addr_t qcom_tbu_trigger_atos(struct arm_smmu_domain *smmu_domain,
202 struct qcom_tbu *tbu, dma_addr_t iova, u32 sid)
203 {
204 bool atos_timedout = false;
205 phys_addr_t phys = 0;
206 ktime_t timeout;
207 u64 val;
208
209 /* Set address and stream-id */
210 val = readq_relaxed(tbu->base + DEBUG_SID_HALT_REG);
211 val &= ~DEBUG_SID_HALT_SID;
212 val |= FIELD_PREP(DEBUG_SID_HALT_SID, sid);
213 writeq_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
214 writeq_relaxed(iova, tbu->base + DEBUG_VA_ADDR_REG);
215 val = FIELD_PREP(DEBUG_AXUSER_CDMID, DEBUG_AXUSER_CDMID_VAL);
216 writeq_relaxed(val, tbu->base + DEBUG_AXUSER_REG);
217
218 /* Write-back read and write-allocate */
219 val = FIELD_PREP(DEBUG_TXN_AXCACHE, 0xf);
220
221 /* Non-secure access */
222 val |= FIELD_PREP(DEBUG_TXN_AXPROT, DEBUG_TXN_AXPROT_NSEC);
223
224 /* Privileged access */
225 val |= FIELD_PREP(DEBUG_TXN_AXPROT, DEBUG_TXN_AXPROT_PRIV);
226
227 val |= DEBUG_TXN_TRIGGER;
228 writeq_relaxed(val, tbu->base + DEBUG_TXN_TRIGG_REG);
229
230 timeout = ktime_add_us(ktime_get(), TBU_DBG_TIMEOUT_US);
231 for (;;) {
232 val = readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG);
233 if (!(val & DEBUG_SR_ECATS_RUNNING_VAL))
234 break;
235 val = readl_relaxed(tbu->base + DEBUG_PAR_REG);
236 if (val & DEBUG_PAR_FAULT_VAL)
237 break;
238 if (ktime_compare(ktime_get(), timeout) > 0) {
239 atos_timedout = true;
240 break;
241 }
242 }
243
244 val = readq_relaxed(tbu->base + DEBUG_PAR_REG);
245 if (val & DEBUG_PAR_FAULT_VAL)
246 dev_err(tbu->dev, "ATOS generated a fault interrupt! PAR = %llx, SID=0x%x\n",
247 val, sid);
248 else if (atos_timedout)
249 dev_err_ratelimited(tbu->dev, "ATOS translation timed out!\n");
250 else
251 phys = FIELD_GET(DEBUG_PAR_PA, val);
252
253 /* Reset hardware */
254 writeq_relaxed(0, tbu->base + DEBUG_TXN_TRIGG_REG);
255 writeq_relaxed(0, tbu->base + DEBUG_VA_ADDR_REG);
256 val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
257 val &= ~DEBUG_SID_HALT_SID;
258 writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
259
260 return phys;
261 }
262
qcom_iova_to_phys(struct arm_smmu_domain * smmu_domain,dma_addr_t iova,u32 sid)263 static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain,
264 dma_addr_t iova, u32 sid)
265 {
266 struct arm_smmu_device *smmu = smmu_domain->smmu;
267 struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
268 int idx = smmu_domain->cfg.cbndx;
269 struct qcom_tbu *tbu;
270 u32 sctlr_orig, sctlr;
271 phys_addr_t phys = 0;
272 int attempt = 0;
273 int ret;
274 u64 fsr;
275
276 tbu = qcom_find_tbu(qsmmu, sid);
277 if (!tbu)
278 return 0;
279
280 ret = icc_set_bw(tbu->path, 0, UINT_MAX);
281 if (ret)
282 return ret;
283
284 ret = clk_prepare_enable(tbu->clk);
285 if (ret)
286 goto disable_icc;
287
288 ret = qcom_tbu_halt(tbu, smmu_domain);
289 if (ret)
290 goto disable_clk;
291
292 /*
293 * ATOS/ECATS can trigger the fault interrupt, so disable it temporarily
294 * and check for an interrupt manually.
295 */
296 sctlr_orig = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_SCTLR);
297 sctlr = sctlr_orig & ~(ARM_SMMU_SCTLR_CFCFG | ARM_SMMU_SCTLR_CFIE);
298 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr);
299
300 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
301 if (fsr & ARM_SMMU_CB_FSR_FAULT) {
302 /* Clear pending interrupts */
303 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
304
305 /*
306 * TBU halt takes care of resuming any stalled transcation.
307 * Kept it here for completeness sake.
308 */
309 if (fsr & ARM_SMMU_CB_FSR_SS)
310 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
311 ARM_SMMU_RESUME_TERMINATE);
312 }
313
314 /* Only one concurrent atos operation */
315 scoped_guard(spinlock_irqsave, &atos_lock) {
316 /*
317 * If the translation fails, attempt the lookup more time."
318 */
319 do {
320 phys = qcom_tbu_trigger_atos(smmu_domain, tbu, iova, sid);
321
322 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
323 if (fsr & ARM_SMMU_CB_FSR_FAULT) {
324 /* Clear pending interrupts */
325 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
326
327 if (fsr & ARM_SMMU_CB_FSR_SS)
328 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
329 ARM_SMMU_RESUME_TERMINATE);
330 }
331 } while (!phys && attempt++ < 2);
332
333 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig);
334 }
335 qcom_tbu_resume(tbu);
336
337 /* Read to complete prior write transcations */
338 readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG);
339
340 disable_clk:
341 clk_disable_unprepare(tbu->clk);
342 disable_icc:
343 icc_set_bw(tbu->path, 0, 0);
344
345 return phys;
346 }
347
qcom_smmu_iova_to_phys_hard(struct arm_smmu_domain * smmu_domain,dma_addr_t iova)348 static phys_addr_t qcom_smmu_iova_to_phys_hard(struct arm_smmu_domain *smmu_domain, dma_addr_t iova)
349 {
350 struct arm_smmu_device *smmu = smmu_domain->smmu;
351 int idx = smmu_domain->cfg.cbndx;
352 u32 frsynra;
353 u16 sid;
354
355 frsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
356 sid = FIELD_GET(ARM_SMMU_CBFRSYNRA_SID, frsynra);
357
358 return qcom_iova_to_phys(smmu_domain, iova, sid);
359 }
360
qcom_smmu_verify_fault(struct arm_smmu_domain * smmu_domain,dma_addr_t iova,u32 fsr)361 static phys_addr_t qcom_smmu_verify_fault(struct arm_smmu_domain *smmu_domain, dma_addr_t iova, u32 fsr)
362 {
363 struct io_pgtable *iop = io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops);
364 struct arm_smmu_device *smmu = smmu_domain->smmu;
365 phys_addr_t phys_post_tlbiall;
366 phys_addr_t phys;
367
368 phys = qcom_smmu_iova_to_phys_hard(smmu_domain, iova);
369 io_pgtable_tlb_flush_all(iop);
370 phys_post_tlbiall = qcom_smmu_iova_to_phys_hard(smmu_domain, iova);
371
372 if (phys != phys_post_tlbiall) {
373 dev_err(smmu->dev,
374 "ATOS results differed across TLBIALL... (before: %pa after: %pa)\n",
375 &phys, &phys_post_tlbiall);
376 }
377
378 return (phys == 0 ? phys_post_tlbiall : phys);
379 }
380
qcom_smmu_context_fault(int irq,void * dev)381 irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
382 {
383 struct arm_smmu_domain *smmu_domain = dev;
384 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
385 struct arm_smmu_device *smmu = smmu_domain->smmu;
386 struct arm_smmu_context_fault_info cfi;
387 u32 resume = 0;
388 int idx = smmu_domain->cfg.cbndx;
389 phys_addr_t phys_soft;
390 int ret, tmp;
391
392 static DEFINE_RATELIMIT_STATE(_rs,
393 DEFAULT_RATELIMIT_INTERVAL,
394 DEFAULT_RATELIMIT_BURST);
395
396 arm_smmu_read_context_fault_info(smmu, idx, &cfi);
397
398 if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT))
399 return IRQ_NONE;
400
401 if (list_empty(&tbu_list)) {
402 ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
403 cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
404
405 if (ret == -ENOSYS)
406 arm_smmu_print_context_fault_info(smmu, idx, &cfi);
407
408 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr);
409 return IRQ_HANDLED;
410 }
411
412 phys_soft = ops->iova_to_phys(ops, cfi.iova);
413
414 tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
415 cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
416 if (!tmp || tmp == -EBUSY) {
417 ret = IRQ_HANDLED;
418 resume = ARM_SMMU_RESUME_TERMINATE;
419 } else {
420 phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, cfi.iova, cfi.fsr);
421
422 if (__ratelimit(&_rs)) {
423 arm_smmu_print_context_fault_info(smmu, idx, &cfi);
424
425 dev_err(smmu->dev,
426 "soft iova-to-phys=%pa\n", &phys_soft);
427 if (!phys_soft)
428 dev_err(smmu->dev,
429 "SOFTWARE TABLE WALK FAILED! Looks like %s accessed an unmapped address!\n",
430 dev_name(smmu->dev));
431 if (phys_atos)
432 dev_err(smmu->dev, "hard iova-to-phys (ATOS)=%pa\n",
433 &phys_atos);
434 else
435 dev_err(smmu->dev, "hard iova-to-phys (ATOS) failed\n");
436 }
437 ret = IRQ_NONE;
438 resume = ARM_SMMU_RESUME_TERMINATE;
439 }
440
441 /*
442 * If the client returns -EBUSY, do not clear FSR and do not RESUME
443 * if stalled. This is required to keep the IOMMU client stalled on
444 * the outstanding fault. This gives the client a chance to take any
445 * debug action and then terminate the stalled transaction.
446 * So, the sequence in case of stall on fault should be:
447 * 1) Do not clear FSR or write to RESUME here
448 * 2) Client takes any debug action
449 * 3) Client terminates the stalled transaction and resumes the IOMMU
450 * 4) Client clears FSR. The FSR should only be cleared after 3) and
451 * not before so that the fault remains outstanding. This ensures
452 * SCTLR.HUPCF has the desired effect if subsequent transactions also
453 * need to be terminated.
454 */
455 if (tmp != -EBUSY) {
456 /* Clear the faulting FSR */
457 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr);
458
459 /* Retry or terminate any stalled transactions */
460 if (cfi.fsr & ARM_SMMU_CB_FSR_SS)
461 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, resume);
462 }
463
464 return ret;
465 }
466
qcom_tbu_probe(struct platform_device * pdev)467 int qcom_tbu_probe(struct platform_device *pdev)
468 {
469 struct of_phandle_args args = { .args_count = 2 };
470 struct device_node *np = pdev->dev.of_node;
471 struct device *dev = &pdev->dev;
472 struct qcom_tbu *tbu;
473
474 tbu = devm_kzalloc(dev, sizeof(*tbu), GFP_KERNEL);
475 if (!tbu)
476 return -ENOMEM;
477
478 tbu->dev = dev;
479 INIT_LIST_HEAD(&tbu->list);
480 spin_lock_init(&tbu->halt_lock);
481
482 if (of_parse_phandle_with_args(np, "qcom,stream-id-range", "#iommu-cells", 0, &args)) {
483 dev_err(dev, "Cannot parse the 'qcom,stream-id-range' DT property\n");
484 return -EINVAL;
485 }
486
487 tbu->smmu_np = args.np;
488 tbu->sid_range[0] = args.args[0];
489 tbu->sid_range[1] = args.args[1];
490 of_node_put(args.np);
491
492 tbu->base = devm_of_iomap(dev, np, 0, NULL);
493 if (IS_ERR(tbu->base))
494 return PTR_ERR(tbu->base);
495
496 tbu->clk = devm_clk_get_optional(dev, NULL);
497 if (IS_ERR(tbu->clk))
498 return PTR_ERR(tbu->clk);
499
500 tbu->path = devm_of_icc_get(dev, NULL);
501 if (IS_ERR(tbu->path))
502 return PTR_ERR(tbu->path);
503
504 guard(mutex)(&tbu_list_lock);
505 list_add_tail(&tbu->list, &tbu_list);
506
507 return 0;
508 }
509