xref: /linux/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c (revision 9fd2da71c301184d98fe37674ca8d017d1ce6600)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
4  * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
5  */
6 
7 #include <linux/cleanup.h>
8 #include <linux/device.h>
9 #include <linux/interconnect.h>
10 #include <linux/firmware/qcom/qcom_scm.h>
11 #include <linux/iopoll.h>
12 #include <linux/list.h>
13 #include <linux/mod_devicetable.h>
14 #include <linux/mutex.h>
15 #include <linux/platform_device.h>
16 #include <linux/ratelimit.h>
17 #include <linux/spinlock.h>
18 
19 #include "arm-smmu.h"
20 #include "arm-smmu-qcom.h"
21 
22 #define TBU_DBG_TIMEOUT_US		100
23 #define DEBUG_AXUSER_REG		0x30
24 #define DEBUG_AXUSER_CDMID		GENMASK_ULL(43, 36)
25 #define DEBUG_AXUSER_CDMID_VAL		0xff
26 #define DEBUG_PAR_REG			0x28
27 #define DEBUG_PAR_FAULT_VAL		BIT(0)
28 #define DEBUG_PAR_PA			GENMASK_ULL(47, 12)
29 #define DEBUG_SID_HALT_REG		0x0
30 #define DEBUG_SID_HALT_VAL		BIT(16)
31 #define DEBUG_SID_HALT_SID		GENMASK(9, 0)
32 #define DEBUG_SR_HALT_ACK_REG		0x20
33 #define DEBUG_SR_HALT_ACK_VAL		BIT(1)
34 #define DEBUG_SR_ECATS_RUNNING_VAL	BIT(0)
35 #define DEBUG_TXN_AXCACHE		GENMASK(5, 2)
36 #define DEBUG_TXN_AXPROT		GENMASK(8, 6)
37 #define DEBUG_TXN_AXPROT_PRIV		0x1
38 #define DEBUG_TXN_AXPROT_NSEC		0x2
39 #define DEBUG_TXN_TRIGG_REG		0x18
40 #define DEBUG_TXN_TRIGGER		BIT(0)
41 #define DEBUG_VA_ADDR_REG		0x8
42 
43 static LIST_HEAD(tbu_list);
44 static DEFINE_MUTEX(tbu_list_lock);
45 static DEFINE_SPINLOCK(atos_lock);
46 
47 struct qcom_tbu {
48 	struct device *dev;
49 	struct device_node *smmu_np;
50 	u32 sid_range[2];
51 	struct list_head list;
52 	struct clk *clk;
53 	struct icc_path	*path;
54 	void __iomem *base;
55 	spinlock_t halt_lock; /* multiple halt or resume can't execute concurrently */
56 	int halt_count;
57 };
58 
59 static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
60 {
61 	return container_of(smmu, struct qcom_smmu, smmu);
62 }
63 
64 void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu)
65 {
66 	int ret;
67 	u32 tbu_pwr_status, sync_inv_ack, sync_inv_progress;
68 	struct qcom_smmu *qsmmu = container_of(smmu, struct qcom_smmu, smmu);
69 	const struct qcom_smmu_config *cfg;
70 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
71 				      DEFAULT_RATELIMIT_BURST);
72 
73 	if (__ratelimit(&rs)) {
74 		dev_err(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n");
75 
76 		cfg = qsmmu->data->cfg;
77 		if (!cfg)
78 			return;
79 
80 		ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS],
81 					&tbu_pwr_status);
82 		if (ret)
83 			dev_err(smmu->dev,
84 				"Failed to read TBU power status: %d\n", ret);
85 
86 		ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK],
87 					&sync_inv_ack);
88 		if (ret)
89 			dev_err(smmu->dev,
90 				"Failed to read TBU sync/inv ack status: %d\n", ret);
91 
92 		ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR],
93 					&sync_inv_progress);
94 		if (ret)
95 			dev_err(smmu->dev,
96 				"Failed to read TCU syn/inv progress: %d\n", ret);
97 
98 		dev_err(smmu->dev,
99 			"TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
100 			tbu_pwr_status, sync_inv_ack, sync_inv_progress);
101 	}
102 }
103 
104 static struct qcom_tbu *qcom_find_tbu(struct qcom_smmu *qsmmu, u32 sid)
105 {
106 	struct qcom_tbu *tbu;
107 	u32 start, end;
108 
109 	guard(mutex)(&tbu_list_lock);
110 
111 	if (list_empty(&tbu_list))
112 		return NULL;
113 
114 	list_for_each_entry(tbu, &tbu_list, list) {
115 		start = tbu->sid_range[0];
116 		end = start + tbu->sid_range[1];
117 
118 		if (qsmmu->smmu.dev->of_node == tbu->smmu_np &&
119 		    start <= sid && sid < end)
120 			return tbu;
121 	}
122 	dev_err(qsmmu->smmu.dev, "Unable to find TBU for sid 0x%x\n", sid);
123 
124 	return NULL;
125 }
126 
127 static int qcom_tbu_halt(struct qcom_tbu *tbu, struct arm_smmu_domain *smmu_domain)
128 {
129 	struct arm_smmu_device *smmu = smmu_domain->smmu;
130 	int ret = 0, idx = smmu_domain->cfg.cbndx;
131 	u32 val, fsr, status;
132 
133 	guard(spinlock_irqsave)(&tbu->halt_lock);
134 	if (tbu->halt_count) {
135 		tbu->halt_count++;
136 		return ret;
137 	}
138 
139 	val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
140 	val |= DEBUG_SID_HALT_VAL;
141 	writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
142 
143 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
144 	if ((fsr & ARM_SMMU_CB_FSR_FAULT) && (fsr & ARM_SMMU_CB_FSR_SS)) {
145 		u32 sctlr_orig, sctlr;
146 
147 		/*
148 		 * We are in a fault. Our request to halt the bus will not
149 		 * complete until transactions in front of us (such as the fault
150 		 * itself) have completed. Disable iommu faults and terminate
151 		 * any existing transactions.
152 		 */
153 		sctlr_orig = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_SCTLR);
154 		sctlr = sctlr_orig & ~(ARM_SMMU_SCTLR_CFCFG | ARM_SMMU_SCTLR_CFIE);
155 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr);
156 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
157 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE);
158 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig);
159 	}
160 
161 	if (readl_poll_timeout_atomic(tbu->base + DEBUG_SR_HALT_ACK_REG, status,
162 				      (status & DEBUG_SR_HALT_ACK_VAL),
163 				      0, TBU_DBG_TIMEOUT_US)) {
164 		dev_err(tbu->dev, "Timeout while trying to halt TBU!\n");
165 		ret = -ETIMEDOUT;
166 
167 		val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
168 		val &= ~DEBUG_SID_HALT_VAL;
169 		writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
170 
171 		return ret;
172 	}
173 
174 	tbu->halt_count = 1;
175 
176 	return ret;
177 }
178 
179 static void qcom_tbu_resume(struct qcom_tbu *tbu)
180 {
181 	u32 val;
182 
183 	guard(spinlock_irqsave)(&tbu->halt_lock);
184 	if (!tbu->halt_count) {
185 		WARN(1, "%s: halt_count is 0", dev_name(tbu->dev));
186 		return;
187 	}
188 
189 	if (tbu->halt_count > 1) {
190 		tbu->halt_count--;
191 		return;
192 	}
193 
194 	val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
195 	val &= ~DEBUG_SID_HALT_VAL;
196 	writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
197 
198 	tbu->halt_count = 0;
199 }
200 
201 static phys_addr_t qcom_tbu_trigger_atos(struct arm_smmu_domain *smmu_domain,
202 					 struct qcom_tbu *tbu, dma_addr_t iova, u32 sid)
203 {
204 	bool atos_timedout = false;
205 	phys_addr_t phys = 0;
206 	ktime_t timeout;
207 	u64 val;
208 
209 	/* Set address and stream-id */
210 	val = readq_relaxed(tbu->base + DEBUG_SID_HALT_REG);
211 	val &= ~DEBUG_SID_HALT_SID;
212 	val |= FIELD_PREP(DEBUG_SID_HALT_SID, sid);
213 	writeq_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
214 	writeq_relaxed(iova, tbu->base + DEBUG_VA_ADDR_REG);
215 	val = FIELD_PREP(DEBUG_AXUSER_CDMID, DEBUG_AXUSER_CDMID_VAL);
216 	writeq_relaxed(val, tbu->base + DEBUG_AXUSER_REG);
217 
218 	/* Write-back read and write-allocate */
219 	val = FIELD_PREP(DEBUG_TXN_AXCACHE, 0xf);
220 
221 	/* Non-secure access */
222 	val |= FIELD_PREP(DEBUG_TXN_AXPROT, DEBUG_TXN_AXPROT_NSEC);
223 
224 	/* Privileged access */
225 	val |= FIELD_PREP(DEBUG_TXN_AXPROT, DEBUG_TXN_AXPROT_PRIV);
226 
227 	val |= DEBUG_TXN_TRIGGER;
228 	writeq_relaxed(val, tbu->base + DEBUG_TXN_TRIGG_REG);
229 
230 	timeout = ktime_add_us(ktime_get(), TBU_DBG_TIMEOUT_US);
231 	for (;;) {
232 		val = readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG);
233 		if (!(val & DEBUG_SR_ECATS_RUNNING_VAL))
234 			break;
235 		val = readl_relaxed(tbu->base + DEBUG_PAR_REG);
236 		if (val & DEBUG_PAR_FAULT_VAL)
237 			break;
238 		if (ktime_compare(ktime_get(), timeout) > 0) {
239 			atos_timedout = true;
240 			break;
241 		}
242 	}
243 
244 	val = readq_relaxed(tbu->base + DEBUG_PAR_REG);
245 	if (val & DEBUG_PAR_FAULT_VAL)
246 		dev_err(tbu->dev, "ATOS generated a fault interrupt! PAR = %llx, SID=0x%x\n",
247 			val, sid);
248 	else if (atos_timedout)
249 		dev_err_ratelimited(tbu->dev, "ATOS translation timed out!\n");
250 	else
251 		phys = FIELD_GET(DEBUG_PAR_PA, val);
252 
253 	/* Reset hardware */
254 	writeq_relaxed(0, tbu->base + DEBUG_TXN_TRIGG_REG);
255 	writeq_relaxed(0, tbu->base + DEBUG_VA_ADDR_REG);
256 	val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG);
257 	val &= ~DEBUG_SID_HALT_SID;
258 	writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG);
259 
260 	return phys;
261 }
262 
263 static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain,
264 				     dma_addr_t iova, u32 sid)
265 {
266 	struct arm_smmu_device *smmu = smmu_domain->smmu;
267 	struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
268 	int idx = smmu_domain->cfg.cbndx;
269 	struct qcom_tbu *tbu;
270 	u32 sctlr_orig, sctlr;
271 	phys_addr_t phys = 0;
272 	int attempt = 0;
273 	int ret;
274 	u64 fsr;
275 
276 	tbu = qcom_find_tbu(qsmmu, sid);
277 	if (!tbu)
278 		return 0;
279 
280 	ret = icc_set_bw(tbu->path, 0, UINT_MAX);
281 	if (ret)
282 		return ret;
283 
284 	ret = clk_prepare_enable(tbu->clk);
285 	if (ret)
286 		goto disable_icc;
287 
288 	ret = qcom_tbu_halt(tbu, smmu_domain);
289 	if (ret)
290 		goto disable_clk;
291 
292 	/*
293 	 * ATOS/ECATS can trigger the fault interrupt, so disable it temporarily
294 	 * and check for an interrupt manually.
295 	 */
296 	sctlr_orig = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_SCTLR);
297 	sctlr = sctlr_orig & ~(ARM_SMMU_SCTLR_CFCFG | ARM_SMMU_SCTLR_CFIE);
298 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr);
299 
300 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
301 	if (fsr & ARM_SMMU_CB_FSR_FAULT) {
302 		/* Clear pending interrupts */
303 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
304 
305 		/*
306 		 * TBU halt takes care of resuming any stalled transcation.
307 		 * Kept it here for completeness sake.
308 		 */
309 		if (fsr & ARM_SMMU_CB_FSR_SS)
310 			arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
311 					  ARM_SMMU_RESUME_TERMINATE);
312 	}
313 
314 	/* Only one concurrent atos operation */
315 	scoped_guard(spinlock_irqsave, &atos_lock) {
316 		/*
317 		 * If the translation fails, attempt the lookup more time."
318 		 */
319 		do {
320 			phys = qcom_tbu_trigger_atos(smmu_domain, tbu, iova, sid);
321 
322 			fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
323 			if (fsr & ARM_SMMU_CB_FSR_FAULT) {
324 				/* Clear pending interrupts */
325 				arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
326 
327 				if (fsr & ARM_SMMU_CB_FSR_SS)
328 					arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
329 							  ARM_SMMU_RESUME_TERMINATE);
330 			}
331 		} while (!phys && attempt++ < 2);
332 
333 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig);
334 	}
335 	qcom_tbu_resume(tbu);
336 
337 	/* Read to complete prior write transcations */
338 	readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG);
339 
340 disable_clk:
341 	clk_disable_unprepare(tbu->clk);
342 disable_icc:
343 	icc_set_bw(tbu->path, 0, 0);
344 
345 	return phys;
346 }
347 
348 static phys_addr_t qcom_smmu_iova_to_phys_hard(struct arm_smmu_domain *smmu_domain, dma_addr_t iova)
349 {
350 	struct arm_smmu_device *smmu = smmu_domain->smmu;
351 	int idx = smmu_domain->cfg.cbndx;
352 	u32 frsynra;
353 	u16 sid;
354 
355 	frsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
356 	sid = FIELD_GET(ARM_SMMU_CBFRSYNRA_SID, frsynra);
357 
358 	return qcom_iova_to_phys(smmu_domain, iova, sid);
359 }
360 
361 static phys_addr_t qcom_smmu_verify_fault(struct arm_smmu_domain *smmu_domain, dma_addr_t iova, u32 fsr)
362 {
363 	struct io_pgtable *iop = io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops);
364 	struct arm_smmu_device *smmu = smmu_domain->smmu;
365 	phys_addr_t phys_post_tlbiall;
366 	phys_addr_t phys;
367 
368 	phys = qcom_smmu_iova_to_phys_hard(smmu_domain, iova);
369 	io_pgtable_tlb_flush_all(iop);
370 	phys_post_tlbiall = qcom_smmu_iova_to_phys_hard(smmu_domain, iova);
371 
372 	if (phys != phys_post_tlbiall) {
373 		dev_err(smmu->dev,
374 			"ATOS results differed across TLBIALL... (before: %pa after: %pa)\n",
375 			&phys, &phys_post_tlbiall);
376 	}
377 
378 	return (phys == 0 ? phys_post_tlbiall : phys);
379 }
380 
381 irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
382 {
383 	struct arm_smmu_domain *smmu_domain = dev;
384 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
385 	struct arm_smmu_device *smmu = smmu_domain->smmu;
386 	struct arm_smmu_context_fault_info cfi;
387 	u32 resume = 0;
388 	int idx = smmu_domain->cfg.cbndx;
389 	phys_addr_t phys_soft;
390 	int ret, tmp;
391 
392 	static DEFINE_RATELIMIT_STATE(_rs,
393 				      DEFAULT_RATELIMIT_INTERVAL,
394 				      DEFAULT_RATELIMIT_BURST);
395 
396 	arm_smmu_read_context_fault_info(smmu, idx, &cfi);
397 
398 	if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT))
399 		return IRQ_NONE;
400 
401 	if (list_empty(&tbu_list)) {
402 		ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
403 					 cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
404 
405 		if (ret == -ENOSYS)
406 			arm_smmu_print_context_fault_info(smmu, idx, &cfi);
407 
408 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr);
409 
410 		if (cfi.fsr & ARM_SMMU_CB_FSR_SS) {
411 			arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
412 					  ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE);
413 		}
414 
415 		return IRQ_HANDLED;
416 	}
417 
418 	phys_soft = ops->iova_to_phys(ops, cfi.iova);
419 
420 	tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
421 				 cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
422 	if (!tmp || tmp == -EBUSY) {
423 		ret = IRQ_HANDLED;
424 		resume = ARM_SMMU_RESUME_TERMINATE;
425 	} else if (tmp == -EAGAIN) {
426 		ret = IRQ_HANDLED;
427 		resume = 0;
428 	} else {
429 		phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, cfi.iova, cfi.fsr);
430 
431 		if (__ratelimit(&_rs)) {
432 			arm_smmu_print_context_fault_info(smmu, idx, &cfi);
433 
434 			dev_err(smmu->dev,
435 				"soft iova-to-phys=%pa\n", &phys_soft);
436 			if (!phys_soft)
437 				dev_err(smmu->dev,
438 					"SOFTWARE TABLE WALK FAILED! Looks like %s accessed an unmapped address!\n",
439 					dev_name(smmu->dev));
440 			if (phys_atos)
441 				dev_err(smmu->dev, "hard iova-to-phys (ATOS)=%pa\n",
442 					&phys_atos);
443 			else
444 				dev_err(smmu->dev, "hard iova-to-phys (ATOS) failed\n");
445 		}
446 		ret = IRQ_NONE;
447 		resume = ARM_SMMU_RESUME_TERMINATE;
448 	}
449 
450 	/*
451 	 * If the client returns -EBUSY, do not clear FSR and do not RESUME
452 	 * if stalled. This is required to keep the IOMMU client stalled on
453 	 * the outstanding fault. This gives the client a chance to take any
454 	 * debug action and then terminate the stalled transaction.
455 	 * So, the sequence in case of stall on fault should be:
456 	 * 1) Do not clear FSR or write to RESUME here
457 	 * 2) Client takes any debug action
458 	 * 3) Client terminates the stalled transaction and resumes the IOMMU
459 	 * 4) Client clears FSR. The FSR should only be cleared after 3) and
460 	 *    not before so that the fault remains outstanding. This ensures
461 	 *    SCTLR.HUPCF has the desired effect if subsequent transactions also
462 	 *    need to be terminated.
463 	 */
464 	if (tmp != -EBUSY) {
465 		/* Clear the faulting FSR */
466 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr);
467 
468 		/* Retry or terminate any stalled transactions */
469 		if (cfi.fsr & ARM_SMMU_CB_FSR_SS)
470 			arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, resume);
471 	}
472 
473 	return ret;
474 }
475 
476 int qcom_tbu_probe(struct platform_device *pdev)
477 {
478 	struct of_phandle_args args = { .args_count = 2 };
479 	struct device_node *np = pdev->dev.of_node;
480 	struct device *dev = &pdev->dev;
481 	struct qcom_tbu *tbu;
482 
483 	tbu = devm_kzalloc(dev, sizeof(*tbu), GFP_KERNEL);
484 	if (!tbu)
485 		return -ENOMEM;
486 
487 	tbu->dev = dev;
488 	INIT_LIST_HEAD(&tbu->list);
489 	spin_lock_init(&tbu->halt_lock);
490 
491 	if (of_parse_phandle_with_args(np, "qcom,stream-id-range", "#iommu-cells", 0, &args)) {
492 		dev_err(dev, "Cannot parse the 'qcom,stream-id-range' DT property\n");
493 		return -EINVAL;
494 	}
495 
496 	tbu->smmu_np =  args.np;
497 	tbu->sid_range[0] = args.args[0];
498 	tbu->sid_range[1] = args.args[1];
499 	of_node_put(args.np);
500 
501 	tbu->base = devm_of_iomap(dev, np, 0, NULL);
502 	if (IS_ERR(tbu->base))
503 		return PTR_ERR(tbu->base);
504 
505 	tbu->clk = devm_clk_get_optional(dev, NULL);
506 	if (IS_ERR(tbu->clk))
507 		return PTR_ERR(tbu->clk);
508 
509 	tbu->path = devm_of_icc_get(dev, NULL);
510 	if (IS_ERR(tbu->path))
511 		return PTR_ERR(tbu->path);
512 
513 	guard(mutex)(&tbu_list_lock);
514 	list_add_tail(&tbu->list, &tbu_list);
515 
516 	return 0;
517 }
518