xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 3e93d5bbcbfc3808f83712c0701f9d4c148cc8ed)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 #include <linux/string_choices.h>
30 #include <kunit/visibility.h>
31 #include <uapi/linux/iommufd.h>
32 
33 #include "arm-smmu-v3.h"
34 #include "../../dma-iommu.h"
35 
36 static bool disable_msipolling;
37 module_param(disable_msipolling, bool, 0444);
38 MODULE_PARM_DESC(disable_msipolling,
39 	"Disable MSI-based polling for CMD_SYNC completion.");
40 
41 static const struct iommu_ops arm_smmu_ops;
42 static struct iommu_dirty_ops arm_smmu_dirty_ops;
43 
44 enum arm_smmu_msi_index {
45 	EVTQ_MSI_INDEX,
46 	GERROR_MSI_INDEX,
47 	PRIQ_MSI_INDEX,
48 	ARM_SMMU_MAX_MSIS,
49 };
50 
51 #define NUM_ENTRY_QWORDS 8
52 static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64));
53 static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64));
54 
55 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
56 	[EVTQ_MSI_INDEX] = {
57 		ARM_SMMU_EVTQ_IRQ_CFG0,
58 		ARM_SMMU_EVTQ_IRQ_CFG1,
59 		ARM_SMMU_EVTQ_IRQ_CFG2,
60 	},
61 	[GERROR_MSI_INDEX] = {
62 		ARM_SMMU_GERROR_IRQ_CFG0,
63 		ARM_SMMU_GERROR_IRQ_CFG1,
64 		ARM_SMMU_GERROR_IRQ_CFG2,
65 	},
66 	[PRIQ_MSI_INDEX] = {
67 		ARM_SMMU_PRIQ_IRQ_CFG0,
68 		ARM_SMMU_PRIQ_IRQ_CFG1,
69 		ARM_SMMU_PRIQ_IRQ_CFG2,
70 	},
71 };
72 
73 struct arm_smmu_option_prop {
74 	u32 opt;
75 	const char *prop;
76 };
77 
78 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
79 DEFINE_MUTEX(arm_smmu_asid_lock);
80 
81 static struct arm_smmu_option_prop arm_smmu_options[] = {
82 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
83 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
84 	{ 0, NULL},
85 };
86 
87 static const char * const event_str[] = {
88 	[EVT_ID_BAD_STREAMID_CONFIG] = "C_BAD_STREAMID",
89 	[EVT_ID_STE_FETCH_FAULT] = "F_STE_FETCH",
90 	[EVT_ID_BAD_STE_CONFIG] = "C_BAD_STE",
91 	[EVT_ID_STREAM_DISABLED_FAULT] = "F_STREAM_DISABLED",
92 	[EVT_ID_BAD_SUBSTREAMID_CONFIG] = "C_BAD_SUBSTREAMID",
93 	[EVT_ID_CD_FETCH_FAULT] = "F_CD_FETCH",
94 	[EVT_ID_BAD_CD_CONFIG] = "C_BAD_CD",
95 	[EVT_ID_TRANSLATION_FAULT] = "F_TRANSLATION",
96 	[EVT_ID_ADDR_SIZE_FAULT] = "F_ADDR_SIZE",
97 	[EVT_ID_ACCESS_FAULT] = "F_ACCESS",
98 	[EVT_ID_PERMISSION_FAULT] = "F_PERMISSION",
99 	[EVT_ID_VMS_FETCH_FAULT] = "F_VMS_FETCH",
100 };
101 
102 static const char * const event_class_str[] = {
103 	[0] = "CD fetch",
104 	[1] = "Stage 1 translation table fetch",
105 	[2] = "Input address caused fault",
106 	[3] = "Reserved",
107 };
108 
109 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
110 
parse_driver_options(struct arm_smmu_device * smmu)111 static void parse_driver_options(struct arm_smmu_device *smmu)
112 {
113 	int i = 0;
114 
115 	do {
116 		if (of_property_read_bool(smmu->dev->of_node,
117 						arm_smmu_options[i].prop)) {
118 			smmu->options |= arm_smmu_options[i].opt;
119 			dev_notice(smmu->dev, "option %s\n",
120 				arm_smmu_options[i].prop);
121 		}
122 	} while (arm_smmu_options[++i].opt);
123 }
124 
125 /* Low-level queue manipulation functions */
queue_has_space(struct arm_smmu_ll_queue * q,u32 n)126 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
127 {
128 	u32 space, prod, cons;
129 
130 	prod = Q_IDX(q, q->prod);
131 	cons = Q_IDX(q, q->cons);
132 
133 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
134 		space = (1 << q->max_n_shift) - (prod - cons);
135 	else
136 		space = cons - prod;
137 
138 	return space >= n;
139 }
140 
queue_full(struct arm_smmu_ll_queue * q)141 static bool queue_full(struct arm_smmu_ll_queue *q)
142 {
143 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
144 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
145 }
146 
queue_empty(struct arm_smmu_ll_queue * q)147 static bool queue_empty(struct arm_smmu_ll_queue *q)
148 {
149 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
150 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
151 }
152 
queue_consumed(struct arm_smmu_ll_queue * q,u32 prod)153 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
154 {
155 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
156 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
157 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
158 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
159 }
160 
queue_sync_cons_out(struct arm_smmu_queue * q)161 static void queue_sync_cons_out(struct arm_smmu_queue *q)
162 {
163 	/*
164 	 * Ensure that all CPU accesses (reads and writes) to the queue
165 	 * are complete before we update the cons pointer.
166 	 */
167 	__iomb();
168 	writel_relaxed(q->llq.cons, q->cons_reg);
169 }
170 
queue_inc_cons(struct arm_smmu_ll_queue * q)171 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
172 {
173 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
174 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
175 }
176 
queue_sync_cons_ovf(struct arm_smmu_queue * q)177 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
178 {
179 	struct arm_smmu_ll_queue *llq = &q->llq;
180 
181 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
182 		return;
183 
184 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
185 		      Q_IDX(llq, llq->cons);
186 	queue_sync_cons_out(q);
187 }
188 
queue_sync_prod_in(struct arm_smmu_queue * q)189 static int queue_sync_prod_in(struct arm_smmu_queue *q)
190 {
191 	u32 prod;
192 	int ret = 0;
193 
194 	/*
195 	 * We can't use the _relaxed() variant here, as we must prevent
196 	 * speculative reads of the queue before we have determined that
197 	 * prod has indeed moved.
198 	 */
199 	prod = readl(q->prod_reg);
200 
201 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
202 		ret = -EOVERFLOW;
203 
204 	q->llq.prod = prod;
205 	return ret;
206 }
207 
queue_inc_prod_n(struct arm_smmu_ll_queue * q,int n)208 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
209 {
210 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
211 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
212 }
213 
queue_poll_init(struct arm_smmu_device * smmu,struct arm_smmu_queue_poll * qp)214 static void queue_poll_init(struct arm_smmu_device *smmu,
215 			    struct arm_smmu_queue_poll *qp)
216 {
217 	qp->delay = 1;
218 	qp->spin_cnt = 0;
219 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
220 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
221 }
222 
queue_poll(struct arm_smmu_queue_poll * qp)223 static int queue_poll(struct arm_smmu_queue_poll *qp)
224 {
225 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
226 		return -ETIMEDOUT;
227 
228 	if (qp->wfe) {
229 		wfe();
230 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
231 		cpu_relax();
232 	} else {
233 		udelay(qp->delay);
234 		qp->delay *= 2;
235 		qp->spin_cnt = 0;
236 	}
237 
238 	return 0;
239 }
240 
queue_write(__le64 * dst,u64 * src,size_t n_dwords)241 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
242 {
243 	int i;
244 
245 	for (i = 0; i < n_dwords; ++i)
246 		*dst++ = cpu_to_le64(*src++);
247 }
248 
queue_read(u64 * dst,__le64 * src,size_t n_dwords)249 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
250 {
251 	int i;
252 
253 	for (i = 0; i < n_dwords; ++i)
254 		*dst++ = le64_to_cpu(*src++);
255 }
256 
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)257 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
258 {
259 	if (queue_empty(&q->llq))
260 		return -EAGAIN;
261 
262 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
263 	queue_inc_cons(&q->llq);
264 	queue_sync_cons_out(q);
265 	return 0;
266 }
267 
268 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)269 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
270 {
271 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
272 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
273 
274 	switch (ent->opcode) {
275 	case CMDQ_OP_TLBI_EL2_ALL:
276 	case CMDQ_OP_TLBI_NSNH_ALL:
277 		break;
278 	case CMDQ_OP_PREFETCH_CFG:
279 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
280 		break;
281 	case CMDQ_OP_CFGI_CD:
282 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
283 		fallthrough;
284 	case CMDQ_OP_CFGI_STE:
285 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
286 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
287 		break;
288 	case CMDQ_OP_CFGI_CD_ALL:
289 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
290 		break;
291 	case CMDQ_OP_CFGI_ALL:
292 		/* Cover the entire SID range */
293 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
294 		break;
295 	case CMDQ_OP_TLBI_NH_VA:
296 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
297 		fallthrough;
298 	case CMDQ_OP_TLBI_EL2_VA:
299 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
300 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
301 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
302 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
303 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
304 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
305 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
306 		break;
307 	case CMDQ_OP_TLBI_S2_IPA:
308 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
309 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
310 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
311 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
312 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
313 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
314 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
315 		break;
316 	case CMDQ_OP_TLBI_NH_ASID:
317 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
318 		fallthrough;
319 	case CMDQ_OP_TLBI_NH_ALL:
320 	case CMDQ_OP_TLBI_S12_VMALL:
321 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
322 		break;
323 	case CMDQ_OP_TLBI_EL2_ASID:
324 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
325 		break;
326 	case CMDQ_OP_ATC_INV:
327 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
328 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
329 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
330 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
331 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
332 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
333 		break;
334 	case CMDQ_OP_PRI_RESP:
335 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
336 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
337 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
338 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
339 		switch (ent->pri.resp) {
340 		case PRI_RESP_DENY:
341 		case PRI_RESP_FAIL:
342 		case PRI_RESP_SUCC:
343 			break;
344 		default:
345 			return -EINVAL;
346 		}
347 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
348 		break;
349 	case CMDQ_OP_RESUME:
350 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
351 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
352 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
353 		break;
354 	case CMDQ_OP_CMD_SYNC:
355 		if (ent->sync.msiaddr) {
356 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
357 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
358 		} else {
359 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
360 		}
361 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
362 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
363 		break;
364 	default:
365 		return -ENOENT;
366 	}
367 
368 	return 0;
369 }
370 
arm_smmu_get_cmdq(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)371 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
372 					       struct arm_smmu_cmdq_ent *ent)
373 {
374 	struct arm_smmu_cmdq *cmdq = NULL;
375 
376 	if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq)
377 		cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent);
378 
379 	return cmdq ?: &smmu->cmdq;
380 }
381 
arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq)382 static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
383 					     struct arm_smmu_cmdq *cmdq)
384 {
385 	if (cmdq == &smmu->cmdq)
386 		return false;
387 
388 	return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV;
389 }
390 
arm_smmu_cmdq_build_sync_cmd(u64 * cmd,struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,u32 prod)391 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
392 					 struct arm_smmu_cmdq *cmdq, u32 prod)
393 {
394 	struct arm_smmu_queue *q = &cmdq->q;
395 	struct arm_smmu_cmdq_ent ent = {
396 		.opcode = CMDQ_OP_CMD_SYNC,
397 	};
398 
399 	/*
400 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
401 	 * payload, so the write will zero the entire command on that platform.
402 	 */
403 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
404 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
405 				   q->ent_dwords * 8;
406 	}
407 
408 	arm_smmu_cmdq_build_cmd(cmd, &ent);
409 	if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
410 		u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
411 }
412 
__arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq)413 void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
414 			      struct arm_smmu_cmdq *cmdq)
415 {
416 	static const char * const cerror_str[] = {
417 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
418 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
419 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
420 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
421 	};
422 	struct arm_smmu_queue *q = &cmdq->q;
423 
424 	int i;
425 	u64 cmd[CMDQ_ENT_DWORDS];
426 	u32 cons = readl_relaxed(q->cons_reg);
427 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
428 	struct arm_smmu_cmdq_ent cmd_sync = {
429 		.opcode = CMDQ_OP_CMD_SYNC,
430 	};
431 
432 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
433 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
434 
435 	switch (idx) {
436 	case CMDQ_ERR_CERROR_ABT_IDX:
437 		dev_err(smmu->dev, "retrying command fetch\n");
438 		return;
439 	case CMDQ_ERR_CERROR_NONE_IDX:
440 		return;
441 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
442 		/*
443 		 * ATC Invalidation Completion timeout. CONS is still pointing
444 		 * at the CMD_SYNC. Attempt to complete other pending commands
445 		 * by repeating the CMD_SYNC, though we might well end up back
446 		 * here since the ATC invalidation may still be pending.
447 		 */
448 		return;
449 	case CMDQ_ERR_CERROR_ILL_IDX:
450 	default:
451 		break;
452 	}
453 
454 	/*
455 	 * We may have concurrent producers, so we need to be careful
456 	 * not to touch any of the shadow cmdq state.
457 	 */
458 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
459 	dev_err(smmu->dev, "skipping command in error state:\n");
460 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
461 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
462 
463 	/* Convert the erroneous command into a CMD_SYNC */
464 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
465 	if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
466 		u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
467 
468 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
469 }
470 
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)471 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
472 {
473 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq);
474 }
475 
476 /*
477  * Command queue locking.
478  * This is a form of bastardised rwlock with the following major changes:
479  *
480  * - The only LOCK routines are exclusive_trylock() and shared_lock().
481  *   Neither have barrier semantics, and instead provide only a control
482  *   dependency.
483  *
484  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
485  *   fails if the caller appears to be the last lock holder (yes, this is
486  *   racy). All successful UNLOCK routines have RELEASE semantics.
487  */
arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq * cmdq)488 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
489 {
490 	int val;
491 
492 	/*
493 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
494 	 * lock counter. When held in exclusive state, the lock counter is set
495 	 * to INT_MIN so these increments won't hurt as the value will remain
496 	 * negative.
497 	 */
498 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
499 		return;
500 
501 	do {
502 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
503 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
504 }
505 
arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq * cmdq)506 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
507 {
508 	(void)atomic_dec_return_release(&cmdq->lock);
509 }
510 
arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq * cmdq)511 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
512 {
513 	if (atomic_read(&cmdq->lock) == 1)
514 		return false;
515 
516 	arm_smmu_cmdq_shared_unlock(cmdq);
517 	return true;
518 }
519 
520 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
521 ({									\
522 	bool __ret;							\
523 	local_irq_save(flags);						\
524 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
525 	if (!__ret)							\
526 		local_irq_restore(flags);				\
527 	__ret;								\
528 })
529 
530 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
531 ({									\
532 	atomic_set_release(&cmdq->lock, 0);				\
533 	local_irq_restore(flags);					\
534 })
535 
536 
537 /*
538  * Command queue insertion.
539  * This is made fiddly by our attempts to achieve some sort of scalability
540  * since there is one queue shared amongst all of the CPUs in the system.  If
541  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
542  * then you'll *love* this monstrosity.
543  *
544  * The basic idea is to split the queue up into ranges of commands that are
545  * owned by a given CPU; the owner may not have written all of the commands
546  * itself, but is responsible for advancing the hardware prod pointer when
547  * the time comes. The algorithm is roughly:
548  *
549  * 	1. Allocate some space in the queue. At this point we also discover
550  *	   whether the head of the queue is currently owned by another CPU,
551  *	   or whether we are the owner.
552  *
553  *	2. Write our commands into our allocated slots in the queue.
554  *
555  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
556  *
557  *	4. If we are an owner:
558  *		a. Wait for the previous owner to finish.
559  *		b. Mark the queue head as unowned, which tells us the range
560  *		   that we are responsible for publishing.
561  *		c. Wait for all commands in our owned range to become valid.
562  *		d. Advance the hardware prod pointer.
563  *		e. Tell the next owner we've finished.
564  *
565  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
566  *	   owner), then we need to stick around until it has completed:
567  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
568  *		   to clear the first 4 bytes.
569  *		b. Otherwise, we spin waiting for the hardware cons pointer to
570  *		   advance past our command.
571  *
572  * The devil is in the details, particularly the use of locking for handling
573  * SYNC completion and freeing up space in the queue before we think that it is
574  * full.
575  */
__arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod,bool set)576 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
577 					       u32 sprod, u32 eprod, bool set)
578 {
579 	u32 swidx, sbidx, ewidx, ebidx;
580 	struct arm_smmu_ll_queue llq = {
581 		.max_n_shift	= cmdq->q.llq.max_n_shift,
582 		.prod		= sprod,
583 	};
584 
585 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
586 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
587 
588 	while (llq.prod != eprod) {
589 		unsigned long mask;
590 		atomic_long_t *ptr;
591 		u32 limit = BITS_PER_LONG;
592 
593 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
594 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
595 
596 		ptr = &cmdq->valid_map[swidx];
597 
598 		if ((swidx == ewidx) && (sbidx < ebidx))
599 			limit = ebidx;
600 
601 		mask = GENMASK(limit - 1, sbidx);
602 
603 		/*
604 		 * The valid bit is the inverse of the wrap bit. This means
605 		 * that a zero-initialised queue is invalid and, after marking
606 		 * all entries as valid, they become invalid again when we
607 		 * wrap.
608 		 */
609 		if (set) {
610 			atomic_long_xor(mask, ptr);
611 		} else { /* Poll */
612 			unsigned long valid;
613 
614 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
615 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
616 		}
617 
618 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
619 	}
620 }
621 
622 /* Mark all entries in the range [sprod, eprod) as valid */
arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)623 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
624 					u32 sprod, u32 eprod)
625 {
626 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
627 }
628 
629 /* Wait for all entries in the range [sprod, eprod) to become valid */
arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)630 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
631 					 u32 sprod, u32 eprod)
632 {
633 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
634 }
635 
636 /* Wait for the command queue to become non-full */
arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,struct arm_smmu_ll_queue * llq)637 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
638 					     struct arm_smmu_cmdq *cmdq,
639 					     struct arm_smmu_ll_queue *llq)
640 {
641 	unsigned long flags;
642 	struct arm_smmu_queue_poll qp;
643 	int ret = 0;
644 
645 	/*
646 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
647 	 * that fails, spin until somebody else updates it for us.
648 	 */
649 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
650 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
651 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
652 		llq->val = READ_ONCE(cmdq->q.llq.val);
653 		return 0;
654 	}
655 
656 	queue_poll_init(smmu, &qp);
657 	do {
658 		llq->val = READ_ONCE(cmdq->q.llq.val);
659 		if (!queue_full(llq))
660 			break;
661 
662 		ret = queue_poll(&qp);
663 	} while (!ret);
664 
665 	return ret;
666 }
667 
668 /*
669  * Wait until the SMMU signals a CMD_SYNC completion MSI.
670  * Must be called with the cmdq lock held in some capacity.
671  */
__arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,struct arm_smmu_ll_queue * llq)672 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
673 					  struct arm_smmu_cmdq *cmdq,
674 					  struct arm_smmu_ll_queue *llq)
675 {
676 	int ret = 0;
677 	struct arm_smmu_queue_poll qp;
678 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
679 
680 	queue_poll_init(smmu, &qp);
681 
682 	/*
683 	 * The MSI won't generate an event, since it's being written back
684 	 * into the command queue.
685 	 */
686 	qp.wfe = false;
687 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
688 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
689 	return ret;
690 }
691 
692 /*
693  * Wait until the SMMU cons index passes llq->prod.
694  * Must be called with the cmdq lock held in some capacity.
695  */
__arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,struct arm_smmu_ll_queue * llq)696 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
697 					       struct arm_smmu_cmdq *cmdq,
698 					       struct arm_smmu_ll_queue *llq)
699 {
700 	struct arm_smmu_queue_poll qp;
701 	u32 prod = llq->prod;
702 	int ret = 0;
703 
704 	queue_poll_init(smmu, &qp);
705 	llq->val = READ_ONCE(cmdq->q.llq.val);
706 	do {
707 		if (queue_consumed(llq, prod))
708 			break;
709 
710 		ret = queue_poll(&qp);
711 
712 		/*
713 		 * This needs to be a readl() so that our subsequent call
714 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
715 		 *
716 		 * Specifically, we need to ensure that we observe all
717 		 * shared_lock()s by other CMD_SYNCs that share our owner,
718 		 * so that a failing call to tryunlock() means that we're
719 		 * the last one out and therefore we can safely advance
720 		 * cmdq->q.llq.cons. Roughly speaking:
721 		 *
722 		 * CPU 0		CPU1			CPU2 (us)
723 		 *
724 		 * if (sync)
725 		 * 	shared_lock();
726 		 *
727 		 * dma_wmb();
728 		 * set_valid_map();
729 		 *
730 		 * 			if (owner) {
731 		 *				poll_valid_map();
732 		 *				<control dependency>
733 		 *				writel(prod_reg);
734 		 *
735 		 *						readl(cons_reg);
736 		 *						tryunlock();
737 		 *
738 		 * Requires us to see CPU 0's shared_lock() acquisition.
739 		 */
740 		llq->cons = readl(cmdq->q.cons_reg);
741 	} while (!ret);
742 
743 	return ret;
744 }
745 
arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,struct arm_smmu_ll_queue * llq)746 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
747 					 struct arm_smmu_cmdq *cmdq,
748 					 struct arm_smmu_ll_queue *llq)
749 {
750 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
751 	    !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
752 		return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
753 
754 	return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
755 }
756 
arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq * cmdq,u64 * cmds,u32 prod,int n)757 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
758 					u32 prod, int n)
759 {
760 	int i;
761 	struct arm_smmu_ll_queue llq = {
762 		.max_n_shift	= cmdq->q.llq.max_n_shift,
763 		.prod		= prod,
764 	};
765 
766 	for (i = 0; i < n; ++i) {
767 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
768 
769 		prod = queue_inc_prod_n(&llq, i);
770 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
771 	}
772 }
773 
774 /*
775  * This is the actual insertion function, and provides the following
776  * ordering guarantees to callers:
777  *
778  * - There is a dma_wmb() before publishing any commands to the queue.
779  *   This can be relied upon to order prior writes to data structures
780  *   in memory (such as a CD or an STE) before the command.
781  *
782  * - On completion of a CMD_SYNC, there is a control dependency.
783  *   This can be relied upon to order subsequent writes to memory (e.g.
784  *   freeing an IOVA) after completion of the CMD_SYNC.
785  *
786  * - Command insertion is totally ordered, so if two CPUs each race to
787  *   insert their own list of commands then all of the commands from one
788  *   CPU will appear before any of the commands from the other CPU.
789  */
arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,u64 * cmds,int n,bool sync)790 int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
791 				struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
792 				bool sync)
793 {
794 	u64 cmd_sync[CMDQ_ENT_DWORDS];
795 	u32 prod;
796 	unsigned long flags;
797 	bool owner;
798 	struct arm_smmu_ll_queue llq, head;
799 	int ret = 0;
800 
801 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
802 
803 	/* 1. Allocate some space in the queue */
804 	local_irq_save(flags);
805 	llq.val = READ_ONCE(cmdq->q.llq.val);
806 	do {
807 		u64 old;
808 
809 		while (!queue_has_space(&llq, n + sync)) {
810 			local_irq_restore(flags);
811 			if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
812 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
813 			local_irq_save(flags);
814 		}
815 
816 		head.cons = llq.cons;
817 		head.prod = queue_inc_prod_n(&llq, n + sync) |
818 					     CMDQ_PROD_OWNED_FLAG;
819 
820 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
821 		if (old == llq.val)
822 			break;
823 
824 		llq.val = old;
825 	} while (1);
826 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
827 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
828 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
829 
830 	/*
831 	 * 2. Write our commands into the queue
832 	 * Dependency ordering from the cmpxchg() loop above.
833 	 */
834 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
835 	if (sync) {
836 		prod = queue_inc_prod_n(&llq, n);
837 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
838 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
839 
840 		/*
841 		 * In order to determine completion of our CMD_SYNC, we must
842 		 * ensure that the queue can't wrap twice without us noticing.
843 		 * We achieve that by taking the cmdq lock as shared before
844 		 * marking our slot as valid.
845 		 */
846 		arm_smmu_cmdq_shared_lock(cmdq);
847 	}
848 
849 	/* 3. Mark our slots as valid, ensuring commands are visible first */
850 	dma_wmb();
851 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
852 
853 	/* 4. If we are the owner, take control of the SMMU hardware */
854 	if (owner) {
855 		/* a. Wait for previous owner to finish */
856 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
857 
858 		/* b. Stop gathering work by clearing the owned flag */
859 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
860 						   &cmdq->q.llq.atomic.prod);
861 		prod &= ~CMDQ_PROD_OWNED_FLAG;
862 
863 		/*
864 		 * c. Wait for any gathered work to be written to the queue.
865 		 * Note that we read our own entries so that we have the control
866 		 * dependency required by (d).
867 		 */
868 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
869 
870 		/*
871 		 * d. Advance the hardware prod pointer
872 		 * Control dependency ordering from the entries becoming valid.
873 		 */
874 		writel_relaxed(prod, cmdq->q.prod_reg);
875 
876 		/*
877 		 * e. Tell the next owner we're done
878 		 * Make sure we've updated the hardware first, so that we don't
879 		 * race to update prod and potentially move it backwards.
880 		 */
881 		atomic_set_release(&cmdq->owner_prod, prod);
882 	}
883 
884 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
885 	if (sync) {
886 		llq.prod = queue_inc_prod_n(&llq, n);
887 		ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
888 		if (ret) {
889 			dev_err_ratelimited(smmu->dev,
890 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
891 					    llq.prod,
892 					    readl_relaxed(cmdq->q.prod_reg),
893 					    readl_relaxed(cmdq->q.cons_reg));
894 		}
895 
896 		/*
897 		 * Try to unlock the cmdq lock. This will fail if we're the last
898 		 * reader, in which case we can safely update cmdq->q.llq.cons
899 		 */
900 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
901 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
902 			arm_smmu_cmdq_shared_unlock(cmdq);
903 		}
904 	}
905 
906 	local_irq_restore(flags);
907 	return ret;
908 }
909 
__arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent,bool sync)910 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
911 				     struct arm_smmu_cmdq_ent *ent,
912 				     bool sync)
913 {
914 	u64 cmd[CMDQ_ENT_DWORDS];
915 
916 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
917 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
918 			 ent->opcode);
919 		return -EINVAL;
920 	}
921 
922 	return arm_smmu_cmdq_issue_cmdlist(
923 		smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync);
924 }
925 
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)926 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
927 				   struct arm_smmu_cmdq_ent *ent)
928 {
929 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
930 }
931 
arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)932 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
933 					     struct arm_smmu_cmdq_ent *ent)
934 {
935 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
936 }
937 
arm_smmu_cmdq_batch_init(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * ent)938 static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
939 				     struct arm_smmu_cmdq_batch *cmds,
940 				     struct arm_smmu_cmdq_ent *ent)
941 {
942 	cmds->num = 0;
943 	cmds->cmdq = arm_smmu_get_cmdq(smmu, ent);
944 }
945 
arm_smmu_cmdq_batch_add(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)946 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
947 				    struct arm_smmu_cmdq_batch *cmds,
948 				    struct arm_smmu_cmdq_ent *cmd)
949 {
950 	bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
951 	bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
952 			  (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
953 	int index;
954 
955 	if (force_sync || unsupported_cmd) {
956 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
957 					    cmds->num, true);
958 		arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
959 	}
960 
961 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
962 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
963 					    cmds->num, false);
964 		arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
965 	}
966 
967 	index = cmds->num * CMDQ_ENT_DWORDS;
968 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
969 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
970 			 cmd->opcode);
971 		return;
972 	}
973 
974 	cmds->num++;
975 }
976 
arm_smmu_cmdq_batch_submit(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds)977 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
978 				      struct arm_smmu_cmdq_batch *cmds)
979 {
980 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
981 					   cmds->num, true);
982 }
983 
arm_smmu_page_response(struct device * dev,struct iopf_fault * unused,struct iommu_page_response * resp)984 static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
985 				   struct iommu_page_response *resp)
986 {
987 	struct arm_smmu_cmdq_ent cmd = {0};
988 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
989 	int sid = master->streams[0].id;
990 
991 	if (WARN_ON(!master->stall_enabled))
992 		return;
993 
994 	cmd.opcode		= CMDQ_OP_RESUME;
995 	cmd.resume.sid		= sid;
996 	cmd.resume.stag		= resp->grpid;
997 	switch (resp->code) {
998 	case IOMMU_PAGE_RESP_INVALID:
999 	case IOMMU_PAGE_RESP_FAILURE:
1000 		cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
1001 		break;
1002 	case IOMMU_PAGE_RESP_SUCCESS:
1003 		cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
1004 		break;
1005 	default:
1006 		break;
1007 	}
1008 
1009 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1010 	/*
1011 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
1012 	 * RESUME consumption guarantees that the stalled transaction will be
1013 	 * terminated... at some point in the future. PRI_RESP is fire and
1014 	 * forget.
1015 	 */
1016 }
1017 
1018 /* Context descriptor manipulation functions */
arm_smmu_tlb_inv_asid(struct arm_smmu_device * smmu,u16 asid)1019 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
1020 {
1021 	struct arm_smmu_cmdq_ent cmd = {
1022 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
1023 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
1024 		.tlbi.asid = asid,
1025 	};
1026 
1027 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1028 }
1029 
1030 /*
1031  * Based on the value of ent report which bits of the STE the HW will access. It
1032  * would be nice if this was complete according to the spec, but minimally it
1033  * has to capture the bits this driver uses.
1034  */
1035 VISIBLE_IF_KUNIT
arm_smmu_get_ste_used(const __le64 * ent,__le64 * used_bits)1036 void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
1037 {
1038 	unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
1039 
1040 	used_bits[0] = cpu_to_le64(STRTAB_STE_0_V);
1041 	if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V)))
1042 		return;
1043 
1044 	used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
1045 
1046 	/* S1 translates */
1047 	if (cfg & BIT(0)) {
1048 		used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
1049 					    STRTAB_STE_0_S1CTXPTR_MASK |
1050 					    STRTAB_STE_0_S1CDMAX);
1051 		used_bits[1] |=
1052 			cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
1053 				    STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
1054 				    STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
1055 				    STRTAB_STE_1_EATS | STRTAB_STE_1_MEV);
1056 		used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
1057 
1058 		/*
1059 		 * See 13.5 Summary of attribute/permission configuration fields
1060 		 * for the SHCFG behavior.
1061 		 */
1062 		if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
1063 		    STRTAB_STE_1_S1DSS_BYPASS)
1064 			used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1065 	}
1066 
1067 	/* S2 translates */
1068 	if (cfg & BIT(1)) {
1069 		used_bits[1] |=
1070 			cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS |
1071 				    STRTAB_STE_1_SHCFG | STRTAB_STE_1_MEV);
1072 		used_bits[2] |=
1073 			cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
1074 				    STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
1075 				    STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S |
1076 				    STRTAB_STE_2_S2R);
1077 		used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
1078 	}
1079 
1080 	if (cfg == STRTAB_STE_0_CFG_BYPASS)
1081 		used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1082 }
1083 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
1084 
1085 /*
1086  * Figure out if we can do a hitless update of entry to become target. Returns a
1087  * bit mask where 1 indicates that qword needs to be set disruptively.
1088  * unused_update is an intermediate value of entry that has unused bits set to
1089  * their new values.
1090  */
arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer * writer,const __le64 * entry,const __le64 * target,__le64 * unused_update)1091 static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
1092 				    const __le64 *entry, const __le64 *target,
1093 				    __le64 *unused_update)
1094 {
1095 	__le64 target_used[NUM_ENTRY_QWORDS] = {};
1096 	__le64 cur_used[NUM_ENTRY_QWORDS] = {};
1097 	u8 used_qword_diff = 0;
1098 	unsigned int i;
1099 
1100 	writer->ops->get_used(entry, cur_used);
1101 	writer->ops->get_used(target, target_used);
1102 
1103 	for (i = 0; i != NUM_ENTRY_QWORDS; i++) {
1104 		/*
1105 		 * Check that masks are up to date, the make functions are not
1106 		 * allowed to set a bit to 1 if the used function doesn't say it
1107 		 * is used.
1108 		 */
1109 		WARN_ON_ONCE(target[i] & ~target_used[i]);
1110 
1111 		/* Bits can change because they are not currently being used */
1112 		unused_update[i] = (entry[i] & cur_used[i]) |
1113 				   (target[i] & ~cur_used[i]);
1114 		/*
1115 		 * Each bit indicates that a used bit in a qword needs to be
1116 		 * changed after unused_update is applied.
1117 		 */
1118 		if ((unused_update[i] & target_used[i]) != target[i])
1119 			used_qword_diff |= 1 << i;
1120 	}
1121 	return used_qword_diff;
1122 }
1123 
entry_set(struct arm_smmu_entry_writer * writer,__le64 * entry,const __le64 * target,unsigned int start,unsigned int len)1124 static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
1125 		      const __le64 *target, unsigned int start,
1126 		      unsigned int len)
1127 {
1128 	bool changed = false;
1129 	unsigned int i;
1130 
1131 	for (i = start; len != 0; len--, i++) {
1132 		if (entry[i] != target[i]) {
1133 			WRITE_ONCE(entry[i], target[i]);
1134 			changed = true;
1135 		}
1136 	}
1137 
1138 	if (changed)
1139 		writer->ops->sync(writer);
1140 	return changed;
1141 }
1142 
1143 /*
1144  * Update the STE/CD to the target configuration. The transition from the
1145  * current entry to the target entry takes place over multiple steps that
1146  * attempts to make the transition hitless if possible. This function takes care
1147  * not to create a situation where the HW can perceive a corrupted entry. HW is
1148  * only required to have a 64 bit atomicity with stores from the CPU, while
1149  * entries are many 64 bit values big.
1150  *
1151  * The difference between the current value and the target value is analyzed to
1152  * determine which of three updates are required - disruptive, hitless or no
1153  * change.
1154  *
1155  * In the most general disruptive case we can make any update in three steps:
1156  *  - Disrupting the entry (V=0)
1157  *  - Fill now unused qwords, execpt qword 0 which contains V
1158  *  - Make qword 0 have the final value and valid (V=1) with a single 64
1159  *    bit store
1160  *
1161  * However this disrupts the HW while it is happening. There are several
1162  * interesting cases where a STE/CD can be updated without disturbing the HW
1163  * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
1164  * because the used bits don't intersect. We can detect this by calculating how
1165  * many 64 bit values need update after adjusting the unused bits and skip the
1166  * V=0 process. This relies on the IGNORED behavior described in the
1167  * specification.
1168  */
1169 VISIBLE_IF_KUNIT
arm_smmu_write_entry(struct arm_smmu_entry_writer * writer,__le64 * entry,const __le64 * target)1170 void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
1171 			  const __le64 *target)
1172 {
1173 	__le64 unused_update[NUM_ENTRY_QWORDS];
1174 	u8 used_qword_diff;
1175 
1176 	used_qword_diff =
1177 		arm_smmu_entry_qword_diff(writer, entry, target, unused_update);
1178 	if (hweight8(used_qword_diff) == 1) {
1179 		/*
1180 		 * Only one qword needs its used bits to be changed. This is a
1181 		 * hitless update, update all bits the current STE/CD is
1182 		 * ignoring to their new values, then update a single "critical
1183 		 * qword" to change the STE/CD and finally 0 out any bits that
1184 		 * are now unused in the target configuration.
1185 		 */
1186 		unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
1187 
1188 		/*
1189 		 * Skip writing unused bits in the critical qword since we'll be
1190 		 * writing it in the next step anyways. This can save a sync
1191 		 * when the only change is in that qword.
1192 		 */
1193 		unused_update[critical_qword_index] =
1194 			entry[critical_qword_index];
1195 		entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS);
1196 		entry_set(writer, entry, target, critical_qword_index, 1);
1197 		entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
1198 	} else if (used_qword_diff) {
1199 		/*
1200 		 * At least two qwords need their inuse bits to be changed. This
1201 		 * requires a breaking update, zero the V bit, write all qwords
1202 		 * but 0, then set qword 0
1203 		 */
1204 		unused_update[0] = 0;
1205 		entry_set(writer, entry, unused_update, 0, 1);
1206 		entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
1207 		entry_set(writer, entry, target, 0, 1);
1208 	} else {
1209 		/*
1210 		 * No inuse bit changed. Sanity check that all unused bits are 0
1211 		 * in the entry. The target was already sanity checked by
1212 		 * compute_qword_diff().
1213 		 */
1214 		WARN_ON_ONCE(
1215 			entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
1216 	}
1217 }
1218 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
1219 
arm_smmu_sync_cd(struct arm_smmu_master * master,int ssid,bool leaf)1220 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
1221 			     int ssid, bool leaf)
1222 {
1223 	size_t i;
1224 	struct arm_smmu_cmdq_batch cmds;
1225 	struct arm_smmu_device *smmu = master->smmu;
1226 	struct arm_smmu_cmdq_ent cmd = {
1227 		.opcode	= CMDQ_OP_CFGI_CD,
1228 		.cfgi	= {
1229 			.ssid	= ssid,
1230 			.leaf	= leaf,
1231 		},
1232 	};
1233 
1234 	arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
1235 	for (i = 0; i < master->num_streams; i++) {
1236 		cmd.cfgi.sid = master->streams[i].id;
1237 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1238 	}
1239 
1240 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1241 }
1242 
arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 * dst,dma_addr_t l2ptr_dma)1243 static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst,
1244 				      dma_addr_t l2ptr_dma)
1245 {
1246 	u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V;
1247 
1248 	/* The HW has 64 bit atomicity with stores to the L2 CD table */
1249 	WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
1250 }
1251 
arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 * src)1252 static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src)
1253 {
1254 	return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK;
1255 }
1256 
arm_smmu_get_cd_ptr(struct arm_smmu_master * master,u32 ssid)1257 struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
1258 					u32 ssid)
1259 {
1260 	struct arm_smmu_cdtab_l2 *l2;
1261 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1262 
1263 	if (!arm_smmu_cdtab_allocated(cd_table))
1264 		return NULL;
1265 
1266 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1267 		return &cd_table->linear.table[ssid];
1268 
1269 	l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)];
1270 	if (!l2)
1271 		return NULL;
1272 	return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)];
1273 }
1274 
arm_smmu_alloc_cd_ptr(struct arm_smmu_master * master,u32 ssid)1275 static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
1276 						 u32 ssid)
1277 {
1278 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1279 	struct arm_smmu_device *smmu = master->smmu;
1280 
1281 	might_sleep();
1282 	iommu_group_mutex_assert(master->dev);
1283 
1284 	if (!arm_smmu_cdtab_allocated(cd_table)) {
1285 		if (arm_smmu_alloc_cd_tables(master))
1286 			return NULL;
1287 	}
1288 
1289 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
1290 		unsigned int idx = arm_smmu_cdtab_l1_idx(ssid);
1291 		struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx];
1292 
1293 		if (!*l2ptr) {
1294 			dma_addr_t l2ptr_dma;
1295 
1296 			*l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr),
1297 						    &l2ptr_dma, GFP_KERNEL);
1298 			if (!*l2ptr)
1299 				return NULL;
1300 
1301 			arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx],
1302 						  l2ptr_dma);
1303 			/* An invalid L1CD can be cached */
1304 			arm_smmu_sync_cd(master, ssid, false);
1305 		}
1306 	}
1307 	return arm_smmu_get_cd_ptr(master, ssid);
1308 }
1309 
1310 struct arm_smmu_cd_writer {
1311 	struct arm_smmu_entry_writer writer;
1312 	unsigned int ssid;
1313 };
1314 
1315 VISIBLE_IF_KUNIT
arm_smmu_get_cd_used(const __le64 * ent,__le64 * used_bits)1316 void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
1317 {
1318 	used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
1319 	if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V)))
1320 		return;
1321 	memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));
1322 
1323 	/*
1324 	 * If EPD0 is set by the make function it means
1325 	 * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
1326 	 */
1327 	if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
1328 		used_bits[0] &= ~cpu_to_le64(
1329 			CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
1330 			CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
1331 			CTXDESC_CD_0_TCR_SH0);
1332 		used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
1333 	}
1334 }
1335 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used);
1336 
arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer * writer)1337 static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1338 {
1339 	struct arm_smmu_cd_writer *cd_writer =
1340 		container_of(writer, struct arm_smmu_cd_writer, writer);
1341 
1342 	arm_smmu_sync_cd(writer->master, cd_writer->ssid, true);
1343 }
1344 
1345 static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
1346 	.sync = arm_smmu_cd_writer_sync_entry,
1347 	.get_used = arm_smmu_get_cd_used,
1348 };
1349 
arm_smmu_write_cd_entry(struct arm_smmu_master * master,int ssid,struct arm_smmu_cd * cdptr,const struct arm_smmu_cd * target)1350 void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
1351 			     struct arm_smmu_cd *cdptr,
1352 			     const struct arm_smmu_cd *target)
1353 {
1354 	bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
1355 	bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
1356 	struct arm_smmu_cd_writer cd_writer = {
1357 		.writer = {
1358 			.ops = &arm_smmu_cd_writer_ops,
1359 			.master = master,
1360 		},
1361 		.ssid = ssid,
1362 	};
1363 
1364 	if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) {
1365 		if (cur_valid)
1366 			master->cd_table.used_ssids--;
1367 		else
1368 			master->cd_table.used_ssids++;
1369 	}
1370 
1371 	arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
1372 }
1373 
arm_smmu_make_s1_cd(struct arm_smmu_cd * target,struct arm_smmu_master * master,struct arm_smmu_domain * smmu_domain)1374 void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
1375 			 struct arm_smmu_master *master,
1376 			 struct arm_smmu_domain *smmu_domain)
1377 {
1378 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
1379 	const struct io_pgtable_cfg *pgtbl_cfg =
1380 		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1381 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr =
1382 		&pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1383 
1384 	memset(target, 0, sizeof(*target));
1385 
1386 	target->data[0] = cpu_to_le64(
1387 		FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1388 		FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1389 		FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1390 		FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1391 		FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1392 #ifdef __BIG_ENDIAN
1393 		CTXDESC_CD_0_ENDI |
1394 #endif
1395 		CTXDESC_CD_0_TCR_EPD1 |
1396 		CTXDESC_CD_0_V |
1397 		FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1398 		CTXDESC_CD_0_AA64 |
1399 		(master->stall_enabled ? CTXDESC_CD_0_S : 0) |
1400 		CTXDESC_CD_0_R |
1401 		CTXDESC_CD_0_A |
1402 		CTXDESC_CD_0_ASET |
1403 		FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
1404 		);
1405 
1406 	/* To enable dirty flag update, set both Access flag and dirty state update */
1407 	if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
1408 		target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA |
1409 					       CTXDESC_CD_0_TCR_HD);
1410 
1411 	target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr &
1412 				      CTXDESC_CD_1_TTB0_MASK);
1413 	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair);
1414 }
1415 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd);
1416 
arm_smmu_clear_cd(struct arm_smmu_master * master,ioasid_t ssid)1417 void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
1418 {
1419 	struct arm_smmu_cd target = {};
1420 	struct arm_smmu_cd *cdptr;
1421 
1422 	if (!arm_smmu_cdtab_allocated(&master->cd_table))
1423 		return;
1424 	cdptr = arm_smmu_get_cd_ptr(master, ssid);
1425 	if (WARN_ON(!cdptr))
1426 		return;
1427 	arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
1428 }
1429 
arm_smmu_alloc_cd_tables(struct arm_smmu_master * master)1430 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1431 {
1432 	int ret;
1433 	size_t l1size;
1434 	size_t max_contexts;
1435 	struct arm_smmu_device *smmu = master->smmu;
1436 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1437 
1438 	cd_table->s1cdmax = master->ssid_bits;
1439 	max_contexts = 1 << cd_table->s1cdmax;
1440 
1441 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1442 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1443 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1444 		cd_table->linear.num_ents = max_contexts;
1445 
1446 		l1size = max_contexts * sizeof(struct arm_smmu_cd);
1447 		cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size,
1448 							    &cd_table->cdtab_dma,
1449 							    GFP_KERNEL);
1450 		if (!cd_table->linear.table)
1451 			return -ENOMEM;
1452 	} else {
1453 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1454 		cd_table->l2.num_l1_ents =
1455 			DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES);
1456 
1457 		cd_table->l2.l2ptrs = kcalloc(cd_table->l2.num_l1_ents,
1458 					     sizeof(*cd_table->l2.l2ptrs),
1459 					     GFP_KERNEL);
1460 		if (!cd_table->l2.l2ptrs)
1461 			return -ENOMEM;
1462 
1463 		l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1);
1464 		cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size,
1465 							&cd_table->cdtab_dma,
1466 							GFP_KERNEL);
1467 		if (!cd_table->l2.l2ptrs) {
1468 			ret = -ENOMEM;
1469 			goto err_free_l2ptrs;
1470 		}
1471 	}
1472 	return 0;
1473 
1474 err_free_l2ptrs:
1475 	kfree(cd_table->l2.l2ptrs);
1476 	cd_table->l2.l2ptrs = NULL;
1477 	return ret;
1478 }
1479 
arm_smmu_free_cd_tables(struct arm_smmu_master * master)1480 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1481 {
1482 	int i;
1483 	struct arm_smmu_device *smmu = master->smmu;
1484 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1485 
1486 	if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) {
1487 		for (i = 0; i < cd_table->l2.num_l1_ents; i++) {
1488 			if (!cd_table->l2.l2ptrs[i])
1489 				continue;
1490 
1491 			dma_free_coherent(smmu->dev,
1492 					  sizeof(*cd_table->l2.l2ptrs[i]),
1493 					  cd_table->l2.l2ptrs[i],
1494 					  arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i]));
1495 		}
1496 		kfree(cd_table->l2.l2ptrs);
1497 
1498 		dma_free_coherent(smmu->dev,
1499 				  cd_table->l2.num_l1_ents *
1500 					  sizeof(struct arm_smmu_cdtab_l1),
1501 				  cd_table->l2.l1tab, cd_table->cdtab_dma);
1502 	} else {
1503 		dma_free_coherent(smmu->dev,
1504 				  cd_table->linear.num_ents *
1505 					  sizeof(struct arm_smmu_cd),
1506 				  cd_table->linear.table, cd_table->cdtab_dma);
1507 	}
1508 }
1509 
1510 /* Stream table manipulation functions */
arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 * dst,dma_addr_t l2ptr_dma)1511 static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 *dst,
1512 					  dma_addr_t l2ptr_dma)
1513 {
1514 	u64 val = 0;
1515 
1516 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1);
1517 	val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1518 
1519 	/* The HW has 64 bit atomicity with stores to the L2 STE table */
1520 	WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
1521 }
1522 
1523 struct arm_smmu_ste_writer {
1524 	struct arm_smmu_entry_writer writer;
1525 	u32 sid;
1526 };
1527 
arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer * writer)1528 static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1529 {
1530 	struct arm_smmu_ste_writer *ste_writer =
1531 		container_of(writer, struct arm_smmu_ste_writer, writer);
1532 	struct arm_smmu_cmdq_ent cmd = {
1533 		.opcode	= CMDQ_OP_CFGI_STE,
1534 		.cfgi	= {
1535 			.sid	= ste_writer->sid,
1536 			.leaf	= true,
1537 		},
1538 	};
1539 
1540 	arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
1541 }
1542 
1543 static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
1544 	.sync = arm_smmu_ste_writer_sync_entry,
1545 	.get_used = arm_smmu_get_ste_used,
1546 };
1547 
arm_smmu_write_ste(struct arm_smmu_master * master,u32 sid,struct arm_smmu_ste * ste,const struct arm_smmu_ste * target)1548 static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
1549 			       struct arm_smmu_ste *ste,
1550 			       const struct arm_smmu_ste *target)
1551 {
1552 	struct arm_smmu_device *smmu = master->smmu;
1553 	struct arm_smmu_ste_writer ste_writer = {
1554 		.writer = {
1555 			.ops = &arm_smmu_ste_writer_ops,
1556 			.master = master,
1557 		},
1558 		.sid = sid,
1559 	};
1560 
1561 	arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data);
1562 
1563 	/* It's likely that we'll want to use the new STE soon */
1564 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
1565 		struct arm_smmu_cmdq_ent
1566 			prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
1567 					 .prefetch = {
1568 						 .sid = sid,
1569 					 } };
1570 
1571 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1572 	}
1573 }
1574 
arm_smmu_make_abort_ste(struct arm_smmu_ste * target)1575 void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
1576 {
1577 	memset(target, 0, sizeof(*target));
1578 	target->data[0] = cpu_to_le64(
1579 		STRTAB_STE_0_V |
1580 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
1581 }
1582 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste);
1583 
1584 VISIBLE_IF_KUNIT
arm_smmu_make_bypass_ste(struct arm_smmu_device * smmu,struct arm_smmu_ste * target)1585 void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
1586 			      struct arm_smmu_ste *target)
1587 {
1588 	memset(target, 0, sizeof(*target));
1589 	target->data[0] = cpu_to_le64(
1590 		STRTAB_STE_0_V |
1591 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
1592 
1593 	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1594 		target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1595 							 STRTAB_STE_1_SHCFG_INCOMING));
1596 }
1597 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste);
1598 
1599 VISIBLE_IF_KUNIT
arm_smmu_make_cdtable_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,bool ats_enabled,unsigned int s1dss)1600 void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
1601 			       struct arm_smmu_master *master, bool ats_enabled,
1602 			       unsigned int s1dss)
1603 {
1604 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1605 	struct arm_smmu_device *smmu = master->smmu;
1606 
1607 	memset(target, 0, sizeof(*target));
1608 	target->data[0] = cpu_to_le64(
1609 		STRTAB_STE_0_V |
1610 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1611 		FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
1612 		(cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1613 		FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
1614 
1615 	target->data[1] = cpu_to_le64(
1616 		FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) |
1617 		FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1618 		FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1619 		FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1620 		((smmu->features & ARM_SMMU_FEAT_STALLS &&
1621 		  !master->stall_enabled) ?
1622 			 STRTAB_STE_1_S1STALLD :
1623 			 0) |
1624 		FIELD_PREP(STRTAB_STE_1_EATS,
1625 			   ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1626 
1627 	if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) &&
1628 	    s1dss == STRTAB_STE_1_S1DSS_BYPASS)
1629 		target->data[1] |= cpu_to_le64(FIELD_PREP(
1630 			STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
1631 
1632 	if (smmu->features & ARM_SMMU_FEAT_E2H) {
1633 		/*
1634 		 * To support BTM the streamworld needs to match the
1635 		 * configuration of the CPU so that the ASID broadcasts are
1636 		 * properly matched. This means either S/NS-EL2-E2H (hypervisor)
1637 		 * or NS-EL1 (guest). Since an SVA domain can be installed in a
1638 		 * PASID this should always use a BTM compatible configuration
1639 		 * if the HW supports it.
1640 		 */
1641 		target->data[1] |= cpu_to_le64(
1642 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
1643 	} else {
1644 		target->data[1] |= cpu_to_le64(
1645 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1646 
1647 		/*
1648 		 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
1649 		 * arm_smmu_domain_alloc_id()
1650 		 */
1651 		target->data[2] =
1652 			cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
1653 	}
1654 }
1655 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
1656 
arm_smmu_make_s2_domain_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_domain * smmu_domain,bool ats_enabled)1657 void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
1658 				 struct arm_smmu_master *master,
1659 				 struct arm_smmu_domain *smmu_domain,
1660 				 bool ats_enabled)
1661 {
1662 	struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
1663 	const struct io_pgtable_cfg *pgtbl_cfg =
1664 		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1665 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
1666 		&pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1667 	u64 vtcr_val;
1668 	struct arm_smmu_device *smmu = master->smmu;
1669 
1670 	memset(target, 0, sizeof(*target));
1671 	target->data[0] = cpu_to_le64(
1672 		STRTAB_STE_0_V |
1673 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
1674 
1675 	target->data[1] = cpu_to_le64(
1676 		FIELD_PREP(STRTAB_STE_1_EATS,
1677 			   ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1678 
1679 	if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB)
1680 		target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB);
1681 	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1682 		target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1683 							  STRTAB_STE_1_SHCFG_INCOMING));
1684 
1685 	vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1686 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1687 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1688 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1689 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1690 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1691 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1692 	target->data[2] = cpu_to_le64(
1693 		FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1694 		FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
1695 		STRTAB_STE_2_S2AA64 |
1696 #ifdef __BIG_ENDIAN
1697 		STRTAB_STE_2_S2ENDI |
1698 #endif
1699 		STRTAB_STE_2_S2PTW |
1700 		(master->stall_enabled ? STRTAB_STE_2_S2S : 0) |
1701 		STRTAB_STE_2_S2R);
1702 
1703 	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
1704 				      STRTAB_STE_3_S2TTB_MASK);
1705 }
1706 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste);
1707 
1708 /*
1709  * This can safely directly manipulate the STE memory without a sync sequence
1710  * because the STE table has not been installed in the SMMU yet.
1711  */
arm_smmu_init_initial_stes(struct arm_smmu_ste * strtab,unsigned int nent)1712 static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
1713 				       unsigned int nent)
1714 {
1715 	unsigned int i;
1716 
1717 	for (i = 0; i < nent; ++i) {
1718 		arm_smmu_make_abort_ste(strtab);
1719 		strtab++;
1720 	}
1721 }
1722 
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1723 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1724 {
1725 	dma_addr_t l2ptr_dma;
1726 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1727 	struct arm_smmu_strtab_l2 **l2table;
1728 
1729 	l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)];
1730 	if (*l2table)
1731 		return 0;
1732 
1733 	*l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table),
1734 				       &l2ptr_dma, GFP_KERNEL);
1735 	if (!*l2table) {
1736 		dev_err(smmu->dev,
1737 			"failed to allocate l2 stream table for SID %u\n",
1738 			sid);
1739 		return -ENOMEM;
1740 	}
1741 
1742 	arm_smmu_init_initial_stes((*l2table)->stes,
1743 				   ARRAY_SIZE((*l2table)->stes));
1744 	arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)],
1745 				      l2ptr_dma);
1746 	return 0;
1747 }
1748 
arm_smmu_streams_cmp_key(const void * lhs,const struct rb_node * rhs)1749 static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs)
1750 {
1751 	struct arm_smmu_stream *stream_rhs =
1752 		rb_entry(rhs, struct arm_smmu_stream, node);
1753 	const u32 *sid_lhs = lhs;
1754 
1755 	if (*sid_lhs < stream_rhs->id)
1756 		return -1;
1757 	if (*sid_lhs > stream_rhs->id)
1758 		return 1;
1759 	return 0;
1760 }
1761 
arm_smmu_streams_cmp_node(struct rb_node * lhs,const struct rb_node * rhs)1762 static int arm_smmu_streams_cmp_node(struct rb_node *lhs,
1763 				     const struct rb_node *rhs)
1764 {
1765 	return arm_smmu_streams_cmp_key(
1766 		&rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs);
1767 }
1768 
1769 static struct arm_smmu_master *
arm_smmu_find_master(struct arm_smmu_device * smmu,u32 sid)1770 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1771 {
1772 	struct rb_node *node;
1773 
1774 	lockdep_assert_held(&smmu->streams_mutex);
1775 
1776 	node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key);
1777 	if (!node)
1778 		return NULL;
1779 	return rb_entry(node, struct arm_smmu_stream, node)->master;
1780 }
1781 
1782 /* IRQ and event handlers */
arm_smmu_decode_event(struct arm_smmu_device * smmu,u64 * raw,struct arm_smmu_event * event)1783 static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw,
1784 				  struct arm_smmu_event *event)
1785 {
1786 	struct arm_smmu_master *master;
1787 
1788 	event->id = FIELD_GET(EVTQ_0_ID, raw[0]);
1789 	event->sid = FIELD_GET(EVTQ_0_SID, raw[0]);
1790 	event->ssv = FIELD_GET(EVTQ_0_SSV, raw[0]);
1791 	event->ssid = event->ssv ? FIELD_GET(EVTQ_0_SSID, raw[0]) : IOMMU_NO_PASID;
1792 	event->privileged = FIELD_GET(EVTQ_1_PnU, raw[1]);
1793 	event->instruction = FIELD_GET(EVTQ_1_InD, raw[1]);
1794 	event->s2 = FIELD_GET(EVTQ_1_S2, raw[1]);
1795 	event->read = FIELD_GET(EVTQ_1_RnW, raw[1]);
1796 	event->stag = FIELD_GET(EVTQ_1_STAG, raw[1]);
1797 	event->stall = FIELD_GET(EVTQ_1_STALL, raw[1]);
1798 	event->class = FIELD_GET(EVTQ_1_CLASS, raw[1]);
1799 	event->iova = FIELD_GET(EVTQ_2_ADDR, raw[2]);
1800 	event->ipa = raw[3] & EVTQ_3_IPA;
1801 	event->fetch_addr = raw[3] & EVTQ_3_FETCH_ADDR;
1802 	event->ttrnw = FIELD_GET(EVTQ_1_TT_READ, raw[1]);
1803 	event->class_tt = false;
1804 	event->dev = NULL;
1805 
1806 	if (event->id == EVT_ID_PERMISSION_FAULT)
1807 		event->class_tt = (event->class == EVTQ_1_CLASS_TT);
1808 
1809 	mutex_lock(&smmu->streams_mutex);
1810 	master = arm_smmu_find_master(smmu, event->sid);
1811 	if (master)
1812 		event->dev = get_device(master->dev);
1813 	mutex_unlock(&smmu->streams_mutex);
1814 }
1815 
arm_smmu_handle_event(struct arm_smmu_device * smmu,u64 * evt,struct arm_smmu_event * event)1816 static int arm_smmu_handle_event(struct arm_smmu_device *smmu, u64 *evt,
1817 				 struct arm_smmu_event *event)
1818 {
1819 	int ret = 0;
1820 	u32 perm = 0;
1821 	struct arm_smmu_master *master;
1822 	struct iopf_fault fault_evt = { };
1823 	struct iommu_fault *flt = &fault_evt.fault;
1824 
1825 	switch (event->id) {
1826 	case EVT_ID_BAD_STE_CONFIG:
1827 	case EVT_ID_STREAM_DISABLED_FAULT:
1828 	case EVT_ID_BAD_SUBSTREAMID_CONFIG:
1829 	case EVT_ID_BAD_CD_CONFIG:
1830 	case EVT_ID_TRANSLATION_FAULT:
1831 	case EVT_ID_ADDR_SIZE_FAULT:
1832 	case EVT_ID_ACCESS_FAULT:
1833 	case EVT_ID_PERMISSION_FAULT:
1834 		break;
1835 	default:
1836 		return -EOPNOTSUPP;
1837 	}
1838 
1839 	if (event->stall) {
1840 		if (event->read)
1841 			perm |= IOMMU_FAULT_PERM_READ;
1842 		else
1843 			perm |= IOMMU_FAULT_PERM_WRITE;
1844 
1845 		if (event->instruction)
1846 			perm |= IOMMU_FAULT_PERM_EXEC;
1847 
1848 		if (event->privileged)
1849 			perm |= IOMMU_FAULT_PERM_PRIV;
1850 
1851 		flt->type = IOMMU_FAULT_PAGE_REQ;
1852 		flt->prm = (struct iommu_fault_page_request){
1853 			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1854 			.grpid = event->stag,
1855 			.perm = perm,
1856 			.addr = event->iova,
1857 		};
1858 
1859 		if (event->ssv) {
1860 			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1861 			flt->prm.pasid = event->ssid;
1862 		}
1863 	}
1864 
1865 	mutex_lock(&smmu->streams_mutex);
1866 	master = arm_smmu_find_master(smmu, event->sid);
1867 	if (!master) {
1868 		ret = -EINVAL;
1869 		goto out_unlock;
1870 	}
1871 
1872 	if (event->stall)
1873 		ret = iommu_report_device_fault(master->dev, &fault_evt);
1874 	else if (master->vmaster && !event->s2)
1875 		ret = arm_vmaster_report_event(master->vmaster, evt);
1876 	else
1877 		ret = -EOPNOTSUPP; /* Unhandled events should be pinned */
1878 out_unlock:
1879 	mutex_unlock(&smmu->streams_mutex);
1880 	return ret;
1881 }
1882 
arm_smmu_dump_raw_event(struct arm_smmu_device * smmu,u64 * raw,struct arm_smmu_event * event)1883 static void arm_smmu_dump_raw_event(struct arm_smmu_device *smmu, u64 *raw,
1884 				    struct arm_smmu_event *event)
1885 {
1886 	int i;
1887 
1888 	dev_err(smmu->dev, "event 0x%02x received:\n", event->id);
1889 
1890 	for (i = 0; i < EVTQ_ENT_DWORDS; ++i)
1891 		dev_err(smmu->dev, "\t0x%016llx\n", raw[i]);
1892 }
1893 
1894 #define ARM_SMMU_EVT_KNOWN(e)	((e)->id < ARRAY_SIZE(event_str) && event_str[(e)->id])
1895 #define ARM_SMMU_LOG_EVT_STR(e) ARM_SMMU_EVT_KNOWN(e) ? event_str[(e)->id] : "UNKNOWN"
1896 #define ARM_SMMU_LOG_CLIENT(e)	(e)->dev ? dev_name((e)->dev) : "(unassigned sid)"
1897 
arm_smmu_dump_event(struct arm_smmu_device * smmu,u64 * raw,struct arm_smmu_event * evt,struct ratelimit_state * rs)1898 static void arm_smmu_dump_event(struct arm_smmu_device *smmu, u64 *raw,
1899 				struct arm_smmu_event *evt,
1900 				struct ratelimit_state *rs)
1901 {
1902 	if (!__ratelimit(rs))
1903 		return;
1904 
1905 	arm_smmu_dump_raw_event(smmu, raw, evt);
1906 
1907 	switch (evt->id) {
1908 	case EVT_ID_TRANSLATION_FAULT:
1909 	case EVT_ID_ADDR_SIZE_FAULT:
1910 	case EVT_ID_ACCESS_FAULT:
1911 	case EVT_ID_PERMISSION_FAULT:
1912 		dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x iova: %#llx ipa: %#llx",
1913 			ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
1914 			evt->sid, evt->ssid, evt->iova, evt->ipa);
1915 
1916 		dev_err(smmu->dev, "%s %s %s %s \"%s\"%s%s stag: %#x",
1917 			evt->privileged ? "priv" : "unpriv",
1918 			evt->instruction ? "inst" : "data",
1919 			str_read_write(evt->read),
1920 			evt->s2 ? "s2" : "s1", event_class_str[evt->class],
1921 			evt->class_tt ? (evt->ttrnw ? " ttd_read" : " ttd_write") : "",
1922 			evt->stall ? " stall" : "", evt->stag);
1923 
1924 		break;
1925 
1926 	case EVT_ID_STE_FETCH_FAULT:
1927 	case EVT_ID_CD_FETCH_FAULT:
1928 	case EVT_ID_VMS_FETCH_FAULT:
1929 		dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x fetch_addr: %#llx",
1930 			ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
1931 			evt->sid, evt->ssid, evt->fetch_addr);
1932 
1933 		break;
1934 
1935 	default:
1936 		dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x",
1937 			ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
1938 			evt->sid, evt->ssid);
1939 	}
1940 }
1941 
arm_smmu_evtq_thread(int irq,void * dev)1942 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1943 {
1944 	u64 evt[EVTQ_ENT_DWORDS];
1945 	struct arm_smmu_event event = {0};
1946 	struct arm_smmu_device *smmu = dev;
1947 	struct arm_smmu_queue *q = &smmu->evtq.q;
1948 	struct arm_smmu_ll_queue *llq = &q->llq;
1949 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1950 				      DEFAULT_RATELIMIT_BURST);
1951 
1952 	do {
1953 		while (!queue_remove_raw(q, evt)) {
1954 			arm_smmu_decode_event(smmu, evt, &event);
1955 			if (arm_smmu_handle_event(smmu, evt, &event))
1956 				arm_smmu_dump_event(smmu, evt, &event, &rs);
1957 
1958 			put_device(event.dev);
1959 			cond_resched();
1960 		}
1961 
1962 		/*
1963 		 * Not much we can do on overflow, so scream and pretend we're
1964 		 * trying harder.
1965 		 */
1966 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1967 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1968 	} while (!queue_empty(llq));
1969 
1970 	/* Sync our overflow flag, as we believe we're up to speed */
1971 	queue_sync_cons_ovf(q);
1972 	return IRQ_HANDLED;
1973 }
1974 
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1975 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1976 {
1977 	u32 sid, ssid;
1978 	u16 grpid;
1979 	bool ssv, last;
1980 
1981 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1982 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1983 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1984 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1985 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1986 
1987 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1988 	dev_info(smmu->dev,
1989 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1990 		 sid, ssid, grpid, last ? "L" : "",
1991 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1992 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1993 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1994 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1995 		 evt[1] & PRIQ_1_ADDR_MASK);
1996 
1997 	if (last) {
1998 		struct arm_smmu_cmdq_ent cmd = {
1999 			.opcode			= CMDQ_OP_PRI_RESP,
2000 			.substream_valid	= ssv,
2001 			.pri			= {
2002 				.sid	= sid,
2003 				.ssid	= ssid,
2004 				.grpid	= grpid,
2005 				.resp	= PRI_RESP_DENY,
2006 			},
2007 		};
2008 
2009 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2010 	}
2011 }
2012 
arm_smmu_priq_thread(int irq,void * dev)2013 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
2014 {
2015 	struct arm_smmu_device *smmu = dev;
2016 	struct arm_smmu_queue *q = &smmu->priq.q;
2017 	struct arm_smmu_ll_queue *llq = &q->llq;
2018 	u64 evt[PRIQ_ENT_DWORDS];
2019 
2020 	do {
2021 		while (!queue_remove_raw(q, evt))
2022 			arm_smmu_handle_ppr(smmu, evt);
2023 
2024 		if (queue_sync_prod_in(q) == -EOVERFLOW)
2025 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
2026 	} while (!queue_empty(llq));
2027 
2028 	/* Sync our overflow flag, as we believe we're up to speed */
2029 	queue_sync_cons_ovf(q);
2030 	return IRQ_HANDLED;
2031 }
2032 
2033 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
2034 
arm_smmu_gerror_handler(int irq,void * dev)2035 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
2036 {
2037 	u32 gerror, gerrorn, active;
2038 	struct arm_smmu_device *smmu = dev;
2039 
2040 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
2041 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
2042 
2043 	active = gerror ^ gerrorn;
2044 	if (!(active & GERROR_ERR_MASK))
2045 		return IRQ_NONE; /* No errors pending */
2046 
2047 	dev_warn(smmu->dev,
2048 		 "unexpected global error reported (0x%08x), this could be serious\n",
2049 		 active);
2050 
2051 	if (active & GERROR_SFM_ERR) {
2052 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
2053 		arm_smmu_device_disable(smmu);
2054 	}
2055 
2056 	if (active & GERROR_MSI_GERROR_ABT_ERR)
2057 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
2058 
2059 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
2060 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
2061 
2062 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
2063 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
2064 
2065 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
2066 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
2067 
2068 	if (active & GERROR_PRIQ_ABT_ERR)
2069 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
2070 
2071 	if (active & GERROR_EVTQ_ABT_ERR)
2072 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
2073 
2074 	if (active & GERROR_CMDQ_ERR)
2075 		arm_smmu_cmdq_skip_err(smmu);
2076 
2077 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
2078 	return IRQ_HANDLED;
2079 }
2080 
arm_smmu_combined_irq_thread(int irq,void * dev)2081 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
2082 {
2083 	struct arm_smmu_device *smmu = dev;
2084 
2085 	arm_smmu_evtq_thread(irq, dev);
2086 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2087 		arm_smmu_priq_thread(irq, dev);
2088 
2089 	return IRQ_HANDLED;
2090 }
2091 
arm_smmu_combined_irq_handler(int irq,void * dev)2092 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
2093 {
2094 	arm_smmu_gerror_handler(irq, dev);
2095 	return IRQ_WAKE_THREAD;
2096 }
2097 
2098 static void
arm_smmu_atc_inv_to_cmd(int ssid,unsigned long iova,size_t size,struct arm_smmu_cmdq_ent * cmd)2099 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
2100 			struct arm_smmu_cmdq_ent *cmd)
2101 {
2102 	size_t log2_span;
2103 	size_t span_mask;
2104 	/* ATC invalidates are always on 4096-bytes pages */
2105 	size_t inval_grain_shift = 12;
2106 	unsigned long page_start, page_end;
2107 
2108 	/*
2109 	 * ATS and PASID:
2110 	 *
2111 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
2112 	 * prefix. In that case all ATC entries within the address range are
2113 	 * invalidated, including those that were requested with a PASID! There
2114 	 * is no way to invalidate only entries without PASID.
2115 	 *
2116 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
2117 	 * traffic), translation requests without PASID create ATC entries
2118 	 * without PASID, which must be invalidated with substream_valid clear.
2119 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
2120 	 * ATC entries within the address range.
2121 	 */
2122 	*cmd = (struct arm_smmu_cmdq_ent) {
2123 		.opcode			= CMDQ_OP_ATC_INV,
2124 		.substream_valid	= (ssid != IOMMU_NO_PASID),
2125 		.atc.ssid		= ssid,
2126 	};
2127 
2128 	if (!size) {
2129 		cmd->atc.size = ATC_INV_SIZE_ALL;
2130 		return;
2131 	}
2132 
2133 	page_start	= iova >> inval_grain_shift;
2134 	page_end	= (iova + size - 1) >> inval_grain_shift;
2135 
2136 	/*
2137 	 * In an ATS Invalidate Request, the address must be aligned on the
2138 	 * range size, which must be a power of two number of page sizes. We
2139 	 * thus have to choose between grossly over-invalidating the region, or
2140 	 * splitting the invalidation into multiple commands. For simplicity
2141 	 * we'll go with the first solution, but should refine it in the future
2142 	 * if multiple commands are shown to be more efficient.
2143 	 *
2144 	 * Find the smallest power of two that covers the range. The most
2145 	 * significant differing bit between the start and end addresses,
2146 	 * fls(start ^ end), indicates the required span. For example:
2147 	 *
2148 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
2149 	 *		x = 0b1000 ^ 0b1011 = 0b11
2150 	 *		span = 1 << fls(x) = 4
2151 	 *
2152 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
2153 	 *		x = 0b0111 ^ 0b1010 = 0b1101
2154 	 *		span = 1 << fls(x) = 16
2155 	 */
2156 	log2_span	= fls_long(page_start ^ page_end);
2157 	span_mask	= (1ULL << log2_span) - 1;
2158 
2159 	page_start	&= ~span_mask;
2160 
2161 	cmd->atc.addr	= page_start << inval_grain_shift;
2162 	cmd->atc.size	= log2_span;
2163 }
2164 
arm_smmu_atc_inv_master(struct arm_smmu_master * master,ioasid_t ssid)2165 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
2166 				   ioasid_t ssid)
2167 {
2168 	int i;
2169 	struct arm_smmu_cmdq_ent cmd;
2170 	struct arm_smmu_cmdq_batch cmds;
2171 
2172 	arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
2173 
2174 	arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
2175 	for (i = 0; i < master->num_streams; i++) {
2176 		cmd.atc.sid = master->streams[i].id;
2177 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
2178 	}
2179 
2180 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
2181 }
2182 
arm_smmu_atc_inv_domain(struct arm_smmu_domain * smmu_domain,unsigned long iova,size_t size)2183 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2184 			    unsigned long iova, size_t size)
2185 {
2186 	struct arm_smmu_master_domain *master_domain;
2187 	int i;
2188 	unsigned long flags;
2189 	struct arm_smmu_cmdq_ent cmd = {
2190 		.opcode = CMDQ_OP_ATC_INV,
2191 	};
2192 	struct arm_smmu_cmdq_batch cmds;
2193 
2194 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2195 		return 0;
2196 
2197 	/*
2198 	 * Ensure that we've completed prior invalidation of the main TLBs
2199 	 * before we read 'nr_ats_masters' in case of a concurrent call to
2200 	 * arm_smmu_enable_ats():
2201 	 *
2202 	 *	// unmap()			// arm_smmu_enable_ats()
2203 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
2204 	 *	smp_mb();			[...]
2205 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
2206 	 *
2207 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
2208 	 * ATS was enabled at the PCI device before completion of the TLBI.
2209 	 */
2210 	smp_mb();
2211 	if (!atomic_read(&smmu_domain->nr_ats_masters))
2212 		return 0;
2213 
2214 	arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
2215 
2216 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2217 	list_for_each_entry(master_domain, &smmu_domain->devices,
2218 			    devices_elm) {
2219 		struct arm_smmu_master *master = master_domain->master;
2220 
2221 		if (!master->ats_enabled)
2222 			continue;
2223 
2224 		if (master_domain->nested_ats_flush) {
2225 			/*
2226 			 * If a S2 used as a nesting parent is changed we have
2227 			 * no option but to completely flush the ATC.
2228 			 */
2229 			arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
2230 		} else {
2231 			arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size,
2232 						&cmd);
2233 		}
2234 
2235 		for (i = 0; i < master->num_streams; i++) {
2236 			cmd.atc.sid = master->streams[i].id;
2237 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
2238 		}
2239 	}
2240 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2241 
2242 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
2243 }
2244 
2245 /* IO_PGTABLE API */
arm_smmu_tlb_inv_context(void * cookie)2246 static void arm_smmu_tlb_inv_context(void *cookie)
2247 {
2248 	struct arm_smmu_domain *smmu_domain = cookie;
2249 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2250 	struct arm_smmu_cmdq_ent cmd;
2251 
2252 	/*
2253 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2254 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
2255 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2256 	 * insertion to guarantee those are observed before the TLBI. Do be
2257 	 * careful, 007.
2258 	 */
2259 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2260 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
2261 	} else {
2262 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
2263 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2264 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
2265 	}
2266 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
2267 }
2268 
__arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent * cmd,unsigned long iova,size_t size,size_t granule,struct arm_smmu_domain * smmu_domain)2269 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
2270 				     unsigned long iova, size_t size,
2271 				     size_t granule,
2272 				     struct arm_smmu_domain *smmu_domain)
2273 {
2274 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2275 	unsigned long end = iova + size, num_pages = 0, tg = 0;
2276 	size_t inv_range = granule;
2277 	struct arm_smmu_cmdq_batch cmds;
2278 
2279 	if (!size)
2280 		return;
2281 
2282 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2283 		/* Get the leaf page size */
2284 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2285 
2286 		num_pages = size >> tg;
2287 
2288 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
2289 		cmd->tlbi.tg = (tg - 10) / 2;
2290 
2291 		/*
2292 		 * Determine what level the granule is at. For non-leaf, both
2293 		 * io-pgtable and SVA pass a nominal last-level granule because
2294 		 * they don't know what level(s) actually apply, so ignore that
2295 		 * and leave TTL=0. However for various errata reasons we still
2296 		 * want to use a range command, so avoid the SVA corner case
2297 		 * where both scale and num could be 0 as well.
2298 		 */
2299 		if (cmd->tlbi.leaf)
2300 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2301 		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
2302 			num_pages++;
2303 	}
2304 
2305 	arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
2306 
2307 	while (iova < end) {
2308 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2309 			/*
2310 			 * On each iteration of the loop, the range is 5 bits
2311 			 * worth of the aligned size remaining.
2312 			 * The range in pages is:
2313 			 *
2314 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
2315 			 */
2316 			unsigned long scale, num;
2317 
2318 			/* Determine the power of 2 multiple number of pages */
2319 			scale = __ffs(num_pages);
2320 			cmd->tlbi.scale = scale;
2321 
2322 			/* Determine how many chunks of 2^scale size we have */
2323 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2324 			cmd->tlbi.num = num - 1;
2325 
2326 			/* range is num * 2^scale * pgsize */
2327 			inv_range = num << (scale + tg);
2328 
2329 			/* Clear out the lower order bits for the next iteration */
2330 			num_pages -= num << scale;
2331 		}
2332 
2333 		cmd->tlbi.addr = iova;
2334 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
2335 		iova += inv_range;
2336 	}
2337 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
2338 }
2339 
arm_smmu_tlb_inv_range_domain(unsigned long iova,size_t size,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)2340 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
2341 					  size_t granule, bool leaf,
2342 					  struct arm_smmu_domain *smmu_domain)
2343 {
2344 	struct arm_smmu_cmdq_ent cmd = {
2345 		.tlbi = {
2346 			.leaf	= leaf,
2347 		},
2348 	};
2349 
2350 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2351 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2352 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
2353 		cmd.tlbi.asid	= smmu_domain->cd.asid;
2354 	} else {
2355 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
2356 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2357 	}
2358 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2359 
2360 	if (smmu_domain->nest_parent) {
2361 		/*
2362 		 * When the S2 domain changes all the nested S1 ASIDs have to be
2363 		 * flushed too.
2364 		 */
2365 		cmd.opcode = CMDQ_OP_TLBI_NH_ALL;
2366 		arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);
2367 	}
2368 
2369 	/*
2370 	 * Unfortunately, this can't be leaf-only since we may have
2371 	 * zapped an entire table.
2372 	 */
2373 	arm_smmu_atc_inv_domain(smmu_domain, iova, size);
2374 }
2375 
arm_smmu_tlb_inv_range_asid(unsigned long iova,size_t size,int asid,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)2376 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
2377 				 size_t granule, bool leaf,
2378 				 struct arm_smmu_domain *smmu_domain)
2379 {
2380 	struct arm_smmu_cmdq_ent cmd = {
2381 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2382 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
2383 		.tlbi = {
2384 			.asid	= asid,
2385 			.leaf	= leaf,
2386 		},
2387 	};
2388 
2389 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2390 }
2391 
arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)2392 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2393 					 unsigned long iova, size_t granule,
2394 					 void *cookie)
2395 {
2396 	struct arm_smmu_domain *smmu_domain = cookie;
2397 	struct iommu_domain *domain = &smmu_domain->domain;
2398 
2399 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2400 }
2401 
arm_smmu_tlb_inv_walk(unsigned long iova,size_t size,size_t granule,void * cookie)2402 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2403 				  size_t granule, void *cookie)
2404 {
2405 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2406 }
2407 
2408 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2409 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2410 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2411 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2412 };
2413 
arm_smmu_dbm_capable(struct arm_smmu_device * smmu)2414 static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
2415 {
2416 	u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY);
2417 
2418 	return (smmu->features & features) == features;
2419 }
2420 
2421 /* IOMMU API */
arm_smmu_capable(struct device * dev,enum iommu_cap cap)2422 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2423 {
2424 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2425 
2426 	switch (cap) {
2427 	case IOMMU_CAP_CACHE_COHERENCY:
2428 		/* Assume that a coherent TCU implies coherent TBUs */
2429 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2430 	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
2431 		return arm_smmu_master_canwbs(master);
2432 	case IOMMU_CAP_NOEXEC:
2433 	case IOMMU_CAP_DEFERRED_FLUSH:
2434 		return true;
2435 	case IOMMU_CAP_DIRTY_TRACKING:
2436 		return arm_smmu_dbm_capable(master->smmu);
2437 	default:
2438 		return false;
2439 	}
2440 }
2441 
arm_smmu_enforce_cache_coherency(struct iommu_domain * domain)2442 static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
2443 {
2444 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2445 	struct arm_smmu_master_domain *master_domain;
2446 	unsigned long flags;
2447 	bool ret = true;
2448 
2449 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2450 	list_for_each_entry(master_domain, &smmu_domain->devices,
2451 			    devices_elm) {
2452 		if (!arm_smmu_master_canwbs(master_domain->master)) {
2453 			ret = false;
2454 			break;
2455 		}
2456 	}
2457 	smmu_domain->enforce_cache_coherency = ret;
2458 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2459 	return ret;
2460 }
2461 
arm_smmu_domain_alloc(void)2462 struct arm_smmu_domain *arm_smmu_domain_alloc(void)
2463 {
2464 	struct arm_smmu_domain *smmu_domain;
2465 
2466 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2467 	if (!smmu_domain)
2468 		return ERR_PTR(-ENOMEM);
2469 
2470 	INIT_LIST_HEAD(&smmu_domain->devices);
2471 	spin_lock_init(&smmu_domain->devices_lock);
2472 
2473 	return smmu_domain;
2474 }
2475 
arm_smmu_domain_free_paging(struct iommu_domain * domain)2476 static void arm_smmu_domain_free_paging(struct iommu_domain *domain)
2477 {
2478 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2479 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2480 
2481 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2482 
2483 	/* Free the ASID or VMID */
2484 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2485 		/* Prevent SVA from touching the CD while we're freeing it */
2486 		mutex_lock(&arm_smmu_asid_lock);
2487 		xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid);
2488 		mutex_unlock(&arm_smmu_asid_lock);
2489 	} else {
2490 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2491 		if (cfg->vmid)
2492 			ida_free(&smmu->vmid_map, cfg->vmid);
2493 	}
2494 
2495 	kfree(smmu_domain);
2496 }
2497 
arm_smmu_domain_finalise_s1(struct arm_smmu_device * smmu,struct arm_smmu_domain * smmu_domain)2498 static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
2499 				       struct arm_smmu_domain *smmu_domain)
2500 {
2501 	int ret;
2502 	u32 asid = 0;
2503 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2504 
2505 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2506 	mutex_lock(&arm_smmu_asid_lock);
2507 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
2508 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2509 	cd->asid	= (u16)asid;
2510 	mutex_unlock(&arm_smmu_asid_lock);
2511 	return ret;
2512 }
2513 
arm_smmu_domain_finalise_s2(struct arm_smmu_device * smmu,struct arm_smmu_domain * smmu_domain)2514 static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
2515 				       struct arm_smmu_domain *smmu_domain)
2516 {
2517 	int vmid;
2518 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2519 
2520 	/* Reserve VMID 0 for stage-2 bypass STEs */
2521 	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2522 			       GFP_KERNEL);
2523 	if (vmid < 0)
2524 		return vmid;
2525 
2526 	cfg->vmid	= (u16)vmid;
2527 	return 0;
2528 }
2529 
arm_smmu_domain_finalise(struct arm_smmu_domain * smmu_domain,struct arm_smmu_device * smmu,u32 flags)2530 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
2531 				    struct arm_smmu_device *smmu, u32 flags)
2532 {
2533 	int ret;
2534 	enum io_pgtable_fmt fmt;
2535 	struct io_pgtable_cfg pgtbl_cfg;
2536 	struct io_pgtable_ops *pgtbl_ops;
2537 	int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
2538 				 struct arm_smmu_domain *smmu_domain);
2539 	bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
2540 
2541 	pgtbl_cfg = (struct io_pgtable_cfg) {
2542 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2543 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2544 		.tlb		= &arm_smmu_flush_ops,
2545 		.iommu_dev	= smmu->dev,
2546 	};
2547 
2548 	switch (smmu_domain->stage) {
2549 	case ARM_SMMU_DOMAIN_S1: {
2550 		unsigned long ias = (smmu->features &
2551 				     ARM_SMMU_FEAT_VAX) ? 52 : 48;
2552 
2553 		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
2554 		pgtbl_cfg.oas = smmu->ias;
2555 		if (enable_dirty)
2556 			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
2557 		fmt = ARM_64_LPAE_S1;
2558 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2559 		break;
2560 	}
2561 	case ARM_SMMU_DOMAIN_S2:
2562 		if (enable_dirty)
2563 			return -EOPNOTSUPP;
2564 		pgtbl_cfg.ias = smmu->ias;
2565 		pgtbl_cfg.oas = smmu->oas;
2566 		fmt = ARM_64_LPAE_S2;
2567 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2568 		if ((smmu->features & ARM_SMMU_FEAT_S2FWB) &&
2569 		    (flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
2570 			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB;
2571 		break;
2572 	default:
2573 		return -EINVAL;
2574 	}
2575 
2576 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2577 	if (!pgtbl_ops)
2578 		return -ENOMEM;
2579 
2580 	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2581 	smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2582 	smmu_domain->domain.geometry.force_aperture = true;
2583 	if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
2584 		smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
2585 
2586 	ret = finalise_stage_fn(smmu, smmu_domain);
2587 	if (ret < 0) {
2588 		free_io_pgtable_ops(pgtbl_ops);
2589 		return ret;
2590 	}
2591 
2592 	smmu_domain->pgtbl_ops = pgtbl_ops;
2593 	smmu_domain->smmu = smmu;
2594 	return 0;
2595 }
2596 
2597 static struct arm_smmu_ste *
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)2598 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2599 {
2600 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2601 
2602 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2603 		/* Two-level walk */
2604 		return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]
2605 				->stes[arm_smmu_strtab_l2_idx(sid)];
2606 	} else {
2607 		/* Simple linear lookup */
2608 		return &cfg->linear.table[sid];
2609 	}
2610 }
2611 
arm_smmu_install_ste_for_dev(struct arm_smmu_master * master,const struct arm_smmu_ste * target)2612 void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
2613 				  const struct arm_smmu_ste *target)
2614 {
2615 	int i, j;
2616 	struct arm_smmu_device *smmu = master->smmu;
2617 
2618 	master->cd_table.in_ste =
2619 		FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) ==
2620 		STRTAB_STE_0_CFG_S1_TRANS;
2621 	master->ste_ats_enabled =
2622 		FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) ==
2623 		STRTAB_STE_1_EATS_TRANS;
2624 
2625 	for (i = 0; i < master->num_streams; ++i) {
2626 		u32 sid = master->streams[i].id;
2627 		struct arm_smmu_ste *step =
2628 			arm_smmu_get_step_for_sid(smmu, sid);
2629 
2630 		/* Bridged PCI devices may end up with duplicated IDs */
2631 		for (j = 0; j < i; j++)
2632 			if (master->streams[j].id == sid)
2633 				break;
2634 		if (j < i)
2635 			continue;
2636 
2637 		arm_smmu_write_ste(master, sid, step, target);
2638 	}
2639 }
2640 
arm_smmu_ats_supported(struct arm_smmu_master * master)2641 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2642 {
2643 	struct device *dev = master->dev;
2644 	struct arm_smmu_device *smmu = master->smmu;
2645 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2646 
2647 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2648 		return false;
2649 
2650 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2651 		return false;
2652 
2653 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2654 }
2655 
arm_smmu_enable_ats(struct arm_smmu_master * master)2656 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2657 {
2658 	size_t stu;
2659 	struct pci_dev *pdev;
2660 	struct arm_smmu_device *smmu = master->smmu;
2661 
2662 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2663 	stu = __ffs(smmu->pgsize_bitmap);
2664 	pdev = to_pci_dev(master->dev);
2665 
2666 	/*
2667 	 * ATC invalidation of PASID 0 causes the entire ATC to be flushed.
2668 	 */
2669 	arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
2670 	if (pci_enable_ats(pdev, stu))
2671 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2672 }
2673 
arm_smmu_enable_pasid(struct arm_smmu_master * master)2674 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2675 {
2676 	int ret;
2677 	int features;
2678 	int num_pasids;
2679 	struct pci_dev *pdev;
2680 
2681 	if (!dev_is_pci(master->dev))
2682 		return -ENODEV;
2683 
2684 	pdev = to_pci_dev(master->dev);
2685 
2686 	features = pci_pasid_features(pdev);
2687 	if (features < 0)
2688 		return features;
2689 
2690 	num_pasids = pci_max_pasids(pdev);
2691 	if (num_pasids <= 0)
2692 		return num_pasids;
2693 
2694 	ret = pci_enable_pasid(pdev, features);
2695 	if (ret) {
2696 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2697 		return ret;
2698 	}
2699 
2700 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2701 				  master->smmu->ssid_bits);
2702 	return 0;
2703 }
2704 
arm_smmu_disable_pasid(struct arm_smmu_master * master)2705 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2706 {
2707 	struct pci_dev *pdev;
2708 
2709 	if (!dev_is_pci(master->dev))
2710 		return;
2711 
2712 	pdev = to_pci_dev(master->dev);
2713 
2714 	if (!pdev->pasid_enabled)
2715 		return;
2716 
2717 	master->ssid_bits = 0;
2718 	pci_disable_pasid(pdev);
2719 }
2720 
2721 static struct arm_smmu_master_domain *
arm_smmu_find_master_domain(struct arm_smmu_domain * smmu_domain,struct iommu_domain * domain,struct arm_smmu_master * master,ioasid_t ssid,bool nested_ats_flush)2722 arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
2723 			    struct iommu_domain *domain,
2724 			    struct arm_smmu_master *master,
2725 			    ioasid_t ssid, bool nested_ats_flush)
2726 {
2727 	struct arm_smmu_master_domain *master_domain;
2728 
2729 	lockdep_assert_held(&smmu_domain->devices_lock);
2730 
2731 	list_for_each_entry(master_domain, &smmu_domain->devices,
2732 			    devices_elm) {
2733 		if (master_domain->master == master &&
2734 		    master_domain->domain == domain &&
2735 		    master_domain->ssid == ssid &&
2736 		    master_domain->nested_ats_flush == nested_ats_flush)
2737 			return master_domain;
2738 	}
2739 	return NULL;
2740 }
2741 
2742 /*
2743  * If the domain uses the smmu_domain->devices list return the arm_smmu_domain
2744  * structure, otherwise NULL. These domains track attached devices so they can
2745  * issue invalidations.
2746  */
2747 static struct arm_smmu_domain *
to_smmu_domain_devices(struct iommu_domain * domain)2748 to_smmu_domain_devices(struct iommu_domain *domain)
2749 {
2750 	/* The domain can be NULL only when processing the first attach */
2751 	if (!domain)
2752 		return NULL;
2753 	if ((domain->type & __IOMMU_DOMAIN_PAGING) ||
2754 	    domain->type == IOMMU_DOMAIN_SVA)
2755 		return to_smmu_domain(domain);
2756 	if (domain->type == IOMMU_DOMAIN_NESTED)
2757 		return to_smmu_nested_domain(domain)->vsmmu->s2_parent;
2758 	return NULL;
2759 }
2760 
arm_smmu_enable_iopf(struct arm_smmu_master * master,struct arm_smmu_master_domain * master_domain)2761 static int arm_smmu_enable_iopf(struct arm_smmu_master *master,
2762 				struct arm_smmu_master_domain *master_domain)
2763 {
2764 	int ret;
2765 
2766 	iommu_group_mutex_assert(master->dev);
2767 
2768 	if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA))
2769 		return -EOPNOTSUPP;
2770 
2771 	/*
2772 	 * Drivers for devices supporting PRI or stall require iopf others have
2773 	 * device-specific fault handlers and don't need IOPF, so this is not a
2774 	 * failure.
2775 	 */
2776 	if (!master->stall_enabled)
2777 		return 0;
2778 
2779 	/* We're not keeping track of SIDs in fault events */
2780 	if (master->num_streams != 1)
2781 		return -EOPNOTSUPP;
2782 
2783 	if (master->iopf_refcount) {
2784 		master->iopf_refcount++;
2785 		master_domain->using_iopf = true;
2786 		return 0;
2787 	}
2788 
2789 	ret = iopf_queue_add_device(master->smmu->evtq.iopf, master->dev);
2790 	if (ret)
2791 		return ret;
2792 	master->iopf_refcount = 1;
2793 	master_domain->using_iopf = true;
2794 	return 0;
2795 }
2796 
arm_smmu_disable_iopf(struct arm_smmu_master * master,struct arm_smmu_master_domain * master_domain)2797 static void arm_smmu_disable_iopf(struct arm_smmu_master *master,
2798 				  struct arm_smmu_master_domain *master_domain)
2799 {
2800 	iommu_group_mutex_assert(master->dev);
2801 
2802 	if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA))
2803 		return;
2804 
2805 	if (!master_domain || !master_domain->using_iopf)
2806 		return;
2807 
2808 	master->iopf_refcount--;
2809 	if (master->iopf_refcount == 0)
2810 		iopf_queue_remove_device(master->smmu->evtq.iopf, master->dev);
2811 }
2812 
arm_smmu_remove_master_domain(struct arm_smmu_master * master,struct iommu_domain * domain,ioasid_t ssid)2813 static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
2814 					  struct iommu_domain *domain,
2815 					  ioasid_t ssid)
2816 {
2817 	struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain);
2818 	struct arm_smmu_master_domain *master_domain;
2819 	bool nested_ats_flush = false;
2820 	unsigned long flags;
2821 
2822 	if (!smmu_domain)
2823 		return;
2824 
2825 	if (domain->type == IOMMU_DOMAIN_NESTED)
2826 		nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats;
2827 
2828 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2829 	master_domain = arm_smmu_find_master_domain(smmu_domain, domain, master,
2830 						    ssid, nested_ats_flush);
2831 	if (master_domain) {
2832 		list_del(&master_domain->devices_elm);
2833 		if (master->ats_enabled)
2834 			atomic_dec(&smmu_domain->nr_ats_masters);
2835 	}
2836 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2837 
2838 	arm_smmu_disable_iopf(master, master_domain);
2839 	kfree(master_domain);
2840 }
2841 
2842 /*
2843  * Start the sequence to attach a domain to a master. The sequence contains three
2844  * steps:
2845  *  arm_smmu_attach_prepare()
2846  *  arm_smmu_install_ste_for_dev()
2847  *  arm_smmu_attach_commit()
2848  *
2849  * If prepare succeeds then the sequence must be completed. The STE installed
2850  * must set the STE.EATS field according to state.ats_enabled.
2851  *
2852  * If the device supports ATS then this determines if EATS should be enabled
2853  * in the STE, and starts sequencing EATS disable if required.
2854  *
2855  * The change of the EATS in the STE and the PCI ATS config space is managed by
2856  * this sequence to be in the right order so that if PCI ATS is enabled then
2857  * STE.ETAS is enabled.
2858  *
2859  * new_domain can be a non-paging domain. In this case ATS will not be enabled,
2860  * and invalidations won't be tracked.
2861  */
arm_smmu_attach_prepare(struct arm_smmu_attach_state * state,struct iommu_domain * new_domain)2862 int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
2863 			    struct iommu_domain *new_domain)
2864 {
2865 	struct arm_smmu_master *master = state->master;
2866 	struct arm_smmu_master_domain *master_domain;
2867 	struct arm_smmu_domain *smmu_domain =
2868 		to_smmu_domain_devices(new_domain);
2869 	unsigned long flags;
2870 	int ret;
2871 
2872 	/*
2873 	 * arm_smmu_share_asid() must not see two domains pointing to the same
2874 	 * arm_smmu_master_domain contents otherwise it could randomly write one
2875 	 * or the other to the CD.
2876 	 */
2877 	lockdep_assert_held(&arm_smmu_asid_lock);
2878 
2879 	if (smmu_domain || state->cd_needs_ats) {
2880 		/*
2881 		 * The SMMU does not support enabling ATS with bypass/abort.
2882 		 * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS
2883 		 * Translation Requests and Translated transactions are denied
2884 		 * as though ATS is disabled for the stream (STE.EATS == 0b00),
2885 		 * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
2886 		 * (IHI0070Ea 5.2 Stream Table Entry).
2887 		 *
2888 		 * However, if we have installed a CD table and are using S1DSS
2889 		 * then ATS will work in S1DSS bypass. See "13.6.4 Full ATS
2890 		 * skipping stage 1".
2891 		 *
2892 		 * Disable ATS if we are going to create a normal 0b100 bypass
2893 		 * STE.
2894 		 */
2895 		state->ats_enabled = !state->disable_ats &&
2896 				     arm_smmu_ats_supported(master);
2897 	}
2898 
2899 	if (smmu_domain) {
2900 		if (new_domain->type == IOMMU_DOMAIN_NESTED) {
2901 			ret = arm_smmu_attach_prepare_vmaster(
2902 				state, to_smmu_nested_domain(new_domain));
2903 			if (ret)
2904 				return ret;
2905 		}
2906 
2907 		master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
2908 		if (!master_domain) {
2909 			ret = -ENOMEM;
2910 			goto err_free_vmaster;
2911 		}
2912 		master_domain->domain = new_domain;
2913 		master_domain->master = master;
2914 		master_domain->ssid = state->ssid;
2915 		if (new_domain->type == IOMMU_DOMAIN_NESTED)
2916 			master_domain->nested_ats_flush =
2917 				to_smmu_nested_domain(new_domain)->enable_ats;
2918 
2919 		if (new_domain->iopf_handler) {
2920 			ret = arm_smmu_enable_iopf(master, master_domain);
2921 			if (ret)
2922 				goto err_free_master_domain;
2923 		}
2924 
2925 		/*
2926 		 * During prepare we want the current smmu_domain and new
2927 		 * smmu_domain to be in the devices list before we change any
2928 		 * HW. This ensures that both domains will send ATS
2929 		 * invalidations to the master until we are done.
2930 		 *
2931 		 * It is tempting to make this list only track masters that are
2932 		 * using ATS, but arm_smmu_share_asid() also uses this to change
2933 		 * the ASID of a domain, unrelated to ATS.
2934 		 *
2935 		 * Notice if we are re-attaching the same domain then the list
2936 		 * will have two identical entries and commit will remove only
2937 		 * one of them.
2938 		 */
2939 		spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2940 		if (smmu_domain->enforce_cache_coherency &&
2941 		    !arm_smmu_master_canwbs(master)) {
2942 			spin_unlock_irqrestore(&smmu_domain->devices_lock,
2943 					       flags);
2944 			ret = -EINVAL;
2945 			goto err_iopf;
2946 		}
2947 
2948 		if (state->ats_enabled)
2949 			atomic_inc(&smmu_domain->nr_ats_masters);
2950 		list_add(&master_domain->devices_elm, &smmu_domain->devices);
2951 		spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2952 	}
2953 
2954 	if (!state->ats_enabled && master->ats_enabled) {
2955 		pci_disable_ats(to_pci_dev(master->dev));
2956 		/*
2957 		 * This is probably overkill, but the config write for disabling
2958 		 * ATS should complete before the STE is configured to generate
2959 		 * UR to avoid AER noise.
2960 		 */
2961 		wmb();
2962 	}
2963 	return 0;
2964 
2965 err_iopf:
2966 	arm_smmu_disable_iopf(master, master_domain);
2967 err_free_master_domain:
2968 	kfree(master_domain);
2969 err_free_vmaster:
2970 	kfree(state->vmaster);
2971 	return ret;
2972 }
2973 
2974 /*
2975  * Commit is done after the STE/CD are configured with the EATS setting. It
2976  * completes synchronizing the PCI device's ATC and finishes manipulating the
2977  * smmu_domain->devices list.
2978  */
arm_smmu_attach_commit(struct arm_smmu_attach_state * state)2979 void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
2980 {
2981 	struct arm_smmu_master *master = state->master;
2982 
2983 	lockdep_assert_held(&arm_smmu_asid_lock);
2984 
2985 	arm_smmu_attach_commit_vmaster(state);
2986 
2987 	if (state->ats_enabled && !master->ats_enabled) {
2988 		arm_smmu_enable_ats(master);
2989 	} else if (state->ats_enabled && master->ats_enabled) {
2990 		/*
2991 		 * The translation has changed, flush the ATC. At this point the
2992 		 * SMMU is translating for the new domain and both the old&new
2993 		 * domain will issue invalidations.
2994 		 */
2995 		arm_smmu_atc_inv_master(master, state->ssid);
2996 	} else if (!state->ats_enabled && master->ats_enabled) {
2997 		/* ATS is being switched off, invalidate the entire ATC */
2998 		arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
2999 	}
3000 
3001 	arm_smmu_remove_master_domain(master, state->old_domain, state->ssid);
3002 	master->ats_enabled = state->ats_enabled;
3003 }
3004 
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)3005 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
3006 {
3007 	int ret = 0;
3008 	struct arm_smmu_ste target;
3009 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
3010 	struct arm_smmu_device *smmu;
3011 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3012 	struct arm_smmu_attach_state state = {
3013 		.old_domain = iommu_get_domain_for_dev(dev),
3014 		.ssid = IOMMU_NO_PASID,
3015 	};
3016 	struct arm_smmu_master *master;
3017 	struct arm_smmu_cd *cdptr;
3018 
3019 	if (!fwspec)
3020 		return -ENOENT;
3021 
3022 	state.master = master = dev_iommu_priv_get(dev);
3023 	smmu = master->smmu;
3024 
3025 	if (smmu_domain->smmu != smmu)
3026 		return -EINVAL;
3027 
3028 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
3029 		cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
3030 		if (!cdptr)
3031 			return -ENOMEM;
3032 	} else if (arm_smmu_ssids_in_use(&master->cd_table))
3033 		return -EBUSY;
3034 
3035 	/*
3036 	 * Prevent arm_smmu_share_asid() from trying to change the ASID
3037 	 * of either the old or new domain while we are working on it.
3038 	 * This allows the STE and the smmu_domain->devices list to
3039 	 * be inconsistent during this routine.
3040 	 */
3041 	mutex_lock(&arm_smmu_asid_lock);
3042 
3043 	ret = arm_smmu_attach_prepare(&state, domain);
3044 	if (ret) {
3045 		mutex_unlock(&arm_smmu_asid_lock);
3046 		return ret;
3047 	}
3048 
3049 	switch (smmu_domain->stage) {
3050 	case ARM_SMMU_DOMAIN_S1: {
3051 		struct arm_smmu_cd target_cd;
3052 
3053 		arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
3054 		arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
3055 					&target_cd);
3056 		arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled,
3057 					  STRTAB_STE_1_S1DSS_SSID0);
3058 		arm_smmu_install_ste_for_dev(master, &target);
3059 		break;
3060 	}
3061 	case ARM_SMMU_DOMAIN_S2:
3062 		arm_smmu_make_s2_domain_ste(&target, master, smmu_domain,
3063 					    state.ats_enabled);
3064 		arm_smmu_install_ste_for_dev(master, &target);
3065 		arm_smmu_clear_cd(master, IOMMU_NO_PASID);
3066 		break;
3067 	}
3068 
3069 	arm_smmu_attach_commit(&state);
3070 	mutex_unlock(&arm_smmu_asid_lock);
3071 	return 0;
3072 }
3073 
arm_smmu_s1_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t id,struct iommu_domain * old)3074 static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
3075 				     struct device *dev, ioasid_t id,
3076 				     struct iommu_domain *old)
3077 {
3078 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3079 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3080 	struct arm_smmu_device *smmu = master->smmu;
3081 	struct arm_smmu_cd target_cd;
3082 
3083 	if (smmu_domain->smmu != smmu)
3084 		return -EINVAL;
3085 
3086 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
3087 		return -EINVAL;
3088 
3089 	/*
3090 	 * We can read cd.asid outside the lock because arm_smmu_set_pasid()
3091 	 * will fix it
3092 	 */
3093 	arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
3094 	return arm_smmu_set_pasid(master, to_smmu_domain(domain), id,
3095 				  &target_cd, old);
3096 }
3097 
arm_smmu_update_ste(struct arm_smmu_master * master,struct iommu_domain * sid_domain,bool ats_enabled)3098 static void arm_smmu_update_ste(struct arm_smmu_master *master,
3099 				struct iommu_domain *sid_domain,
3100 				bool ats_enabled)
3101 {
3102 	unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE;
3103 	struct arm_smmu_ste ste;
3104 
3105 	if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled)
3106 		return;
3107 
3108 	if (sid_domain->type == IOMMU_DOMAIN_IDENTITY)
3109 		s1dss = STRTAB_STE_1_S1DSS_BYPASS;
3110 	else
3111 		WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED);
3112 
3113 	/*
3114 	 * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior
3115 	 * using s1dss if necessary. If the cd_table is already installed then
3116 	 * the S1DSS is correct and this will just update the EATS. Otherwise it
3117 	 * installs the entire thing. This will be hitless.
3118 	 */
3119 	arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss);
3120 	arm_smmu_install_ste_for_dev(master, &ste);
3121 }
3122 
arm_smmu_set_pasid(struct arm_smmu_master * master,struct arm_smmu_domain * smmu_domain,ioasid_t pasid,struct arm_smmu_cd * cd,struct iommu_domain * old)3123 int arm_smmu_set_pasid(struct arm_smmu_master *master,
3124 		       struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
3125 		       struct arm_smmu_cd *cd, struct iommu_domain *old)
3126 {
3127 	struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);
3128 	struct arm_smmu_attach_state state = {
3129 		.master = master,
3130 		.ssid = pasid,
3131 		.old_domain = old,
3132 	};
3133 	struct arm_smmu_cd *cdptr;
3134 	int ret;
3135 
3136 	/* The core code validates pasid */
3137 
3138 	if (smmu_domain->smmu != master->smmu)
3139 		return -EINVAL;
3140 
3141 	if (!master->cd_table.in_ste &&
3142 	    sid_domain->type != IOMMU_DOMAIN_IDENTITY &&
3143 	    sid_domain->type != IOMMU_DOMAIN_BLOCKED)
3144 		return -EINVAL;
3145 
3146 	cdptr = arm_smmu_alloc_cd_ptr(master, pasid);
3147 	if (!cdptr)
3148 		return -ENOMEM;
3149 
3150 	mutex_lock(&arm_smmu_asid_lock);
3151 	ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain);
3152 	if (ret)
3153 		goto out_unlock;
3154 
3155 	/*
3156 	 * We don't want to obtain to the asid_lock too early, so fix up the
3157 	 * caller set ASID under the lock in case it changed.
3158 	 */
3159 	cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID);
3160 	cd->data[0] |= cpu_to_le64(
3161 		FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid));
3162 
3163 	arm_smmu_write_cd_entry(master, pasid, cdptr, cd);
3164 	arm_smmu_update_ste(master, sid_domain, state.ats_enabled);
3165 
3166 	arm_smmu_attach_commit(&state);
3167 
3168 out_unlock:
3169 	mutex_unlock(&arm_smmu_asid_lock);
3170 	return ret;
3171 }
3172 
arm_smmu_blocking_set_dev_pasid(struct iommu_domain * new_domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old_domain)3173 static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
3174 					   struct device *dev, ioasid_t pasid,
3175 					   struct iommu_domain *old_domain)
3176 {
3177 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain);
3178 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3179 
3180 	mutex_lock(&arm_smmu_asid_lock);
3181 	arm_smmu_clear_cd(master, pasid);
3182 	if (master->ats_enabled)
3183 		arm_smmu_atc_inv_master(master, pasid);
3184 	arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid);
3185 	mutex_unlock(&arm_smmu_asid_lock);
3186 
3187 	/*
3188 	 * When the last user of the CD table goes away downgrade the STE back
3189 	 * to a non-cd_table one.
3190 	 */
3191 	if (!arm_smmu_ssids_in_use(&master->cd_table)) {
3192 		struct iommu_domain *sid_domain =
3193 			iommu_get_domain_for_dev(master->dev);
3194 
3195 		if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
3196 		    sid_domain->type == IOMMU_DOMAIN_BLOCKED)
3197 			sid_domain->ops->attach_dev(sid_domain, dev);
3198 	}
3199 	return 0;
3200 }
3201 
arm_smmu_attach_dev_ste(struct iommu_domain * domain,struct device * dev,struct arm_smmu_ste * ste,unsigned int s1dss)3202 static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
3203 				    struct device *dev,
3204 				    struct arm_smmu_ste *ste,
3205 				    unsigned int s1dss)
3206 {
3207 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3208 	struct arm_smmu_attach_state state = {
3209 		.master = master,
3210 		.old_domain = iommu_get_domain_for_dev(dev),
3211 		.ssid = IOMMU_NO_PASID,
3212 	};
3213 
3214 	/*
3215 	 * Do not allow any ASID to be changed while are working on the STE,
3216 	 * otherwise we could miss invalidations.
3217 	 */
3218 	mutex_lock(&arm_smmu_asid_lock);
3219 
3220 	/*
3221 	 * If the CD table is not in use we can use the provided STE, otherwise
3222 	 * we use a cdtable STE with the provided S1DSS.
3223 	 */
3224 	if (arm_smmu_ssids_in_use(&master->cd_table)) {
3225 		/*
3226 		 * If a CD table has to be present then we need to run with ATS
3227 		 * on because we have to assume a PASID is using ATS. For
3228 		 * IDENTITY this will setup things so that S1DSS=bypass which
3229 		 * follows the explanation in "13.6.4 Full ATS skipping stage 1"
3230 		 * and allows for ATS on the RID to work.
3231 		 */
3232 		state.cd_needs_ats = true;
3233 		arm_smmu_attach_prepare(&state, domain);
3234 		arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss);
3235 	} else {
3236 		arm_smmu_attach_prepare(&state, domain);
3237 	}
3238 	arm_smmu_install_ste_for_dev(master, ste);
3239 	arm_smmu_attach_commit(&state);
3240 	mutex_unlock(&arm_smmu_asid_lock);
3241 
3242 	/*
3243 	 * This has to be done after removing the master from the
3244 	 * arm_smmu_domain->devices to avoid races updating the same context
3245 	 * descriptor from arm_smmu_share_asid().
3246 	 */
3247 	arm_smmu_clear_cd(master, IOMMU_NO_PASID);
3248 }
3249 
arm_smmu_attach_dev_identity(struct iommu_domain * domain,struct device * dev)3250 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
3251 					struct device *dev)
3252 {
3253 	struct arm_smmu_ste ste;
3254 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3255 
3256 	arm_smmu_master_clear_vmaster(master);
3257 	arm_smmu_make_bypass_ste(master->smmu, &ste);
3258 	arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS);
3259 	return 0;
3260 }
3261 
3262 static const struct iommu_domain_ops arm_smmu_identity_ops = {
3263 	.attach_dev = arm_smmu_attach_dev_identity,
3264 };
3265 
3266 static struct iommu_domain arm_smmu_identity_domain = {
3267 	.type = IOMMU_DOMAIN_IDENTITY,
3268 	.ops = &arm_smmu_identity_ops,
3269 };
3270 
arm_smmu_attach_dev_blocked(struct iommu_domain * domain,struct device * dev)3271 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
3272 					struct device *dev)
3273 {
3274 	struct arm_smmu_ste ste;
3275 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3276 
3277 	arm_smmu_master_clear_vmaster(master);
3278 	arm_smmu_make_abort_ste(&ste);
3279 	arm_smmu_attach_dev_ste(domain, dev, &ste,
3280 				STRTAB_STE_1_S1DSS_TERMINATE);
3281 	return 0;
3282 }
3283 
3284 static const struct iommu_domain_ops arm_smmu_blocked_ops = {
3285 	.attach_dev = arm_smmu_attach_dev_blocked,
3286 	.set_dev_pasid = arm_smmu_blocking_set_dev_pasid,
3287 };
3288 
3289 static struct iommu_domain arm_smmu_blocked_domain = {
3290 	.type = IOMMU_DOMAIN_BLOCKED,
3291 	.ops = &arm_smmu_blocked_ops,
3292 };
3293 
3294 static struct iommu_domain *
arm_smmu_domain_alloc_paging_flags(struct device * dev,u32 flags,const struct iommu_user_data * user_data)3295 arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags,
3296 				   const struct iommu_user_data *user_data)
3297 {
3298 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3299 	struct arm_smmu_device *smmu = master->smmu;
3300 	const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
3301 				 IOMMU_HWPT_ALLOC_PASID |
3302 				 IOMMU_HWPT_ALLOC_NEST_PARENT;
3303 	struct arm_smmu_domain *smmu_domain;
3304 	int ret;
3305 
3306 	if (flags & ~PAGING_FLAGS)
3307 		return ERR_PTR(-EOPNOTSUPP);
3308 	if (user_data)
3309 		return ERR_PTR(-EOPNOTSUPP);
3310 
3311 	smmu_domain = arm_smmu_domain_alloc();
3312 	if (IS_ERR(smmu_domain))
3313 		return ERR_CAST(smmu_domain);
3314 
3315 	switch (flags) {
3316 	case 0:
3317 		/* Prefer S1 if available */
3318 		if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
3319 			smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
3320 		else
3321 			smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
3322 		break;
3323 	case IOMMU_HWPT_ALLOC_NEST_PARENT:
3324 		if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) {
3325 			ret = -EOPNOTSUPP;
3326 			goto err_free;
3327 		}
3328 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
3329 		smmu_domain->nest_parent = true;
3330 		break;
3331 	case IOMMU_HWPT_ALLOC_DIRTY_TRACKING:
3332 	case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID:
3333 	case IOMMU_HWPT_ALLOC_PASID:
3334 		if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) {
3335 			ret = -EOPNOTSUPP;
3336 			goto err_free;
3337 		}
3338 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
3339 		break;
3340 	default:
3341 		ret = -EOPNOTSUPP;
3342 		goto err_free;
3343 	}
3344 
3345 	smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
3346 	smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
3347 	ret = arm_smmu_domain_finalise(smmu_domain, smmu, flags);
3348 	if (ret)
3349 		goto err_free;
3350 	return &smmu_domain->domain;
3351 
3352 err_free:
3353 	kfree(smmu_domain);
3354 	return ERR_PTR(ret);
3355 }
3356 
arm_smmu_map_pages(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,gfp_t gfp,size_t * mapped)3357 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
3358 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
3359 			      int prot, gfp_t gfp, size_t *mapped)
3360 {
3361 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
3362 
3363 	if (!ops)
3364 		return -ENODEV;
3365 
3366 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
3367 }
3368 
arm_smmu_unmap_pages(struct iommu_domain * domain,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)3369 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
3370 				   size_t pgsize, size_t pgcount,
3371 				   struct iommu_iotlb_gather *gather)
3372 {
3373 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3374 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
3375 
3376 	if (!ops)
3377 		return 0;
3378 
3379 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
3380 }
3381 
arm_smmu_flush_iotlb_all(struct iommu_domain * domain)3382 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
3383 {
3384 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3385 
3386 	if (smmu_domain->smmu)
3387 		arm_smmu_tlb_inv_context(smmu_domain);
3388 }
3389 
arm_smmu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)3390 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
3391 				struct iommu_iotlb_gather *gather)
3392 {
3393 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3394 
3395 	if (!gather->pgsize)
3396 		return;
3397 
3398 	arm_smmu_tlb_inv_range_domain(gather->start,
3399 				      gather->end - gather->start + 1,
3400 				      gather->pgsize, true, smmu_domain);
3401 }
3402 
3403 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)3404 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
3405 {
3406 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
3407 
3408 	if (!ops)
3409 		return 0;
3410 
3411 	return ops->iova_to_phys(ops, iova);
3412 }
3413 
3414 static struct platform_driver arm_smmu_driver;
3415 
3416 static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)3417 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
3418 {
3419 	struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
3420 
3421 	put_device(dev);
3422 	return dev ? dev_get_drvdata(dev) : NULL;
3423 }
3424 
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)3425 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
3426 {
3427 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3428 		return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents;
3429 	return sid < smmu->strtab_cfg.linear.num_ents;
3430 }
3431 
arm_smmu_init_sid_strtab(struct arm_smmu_device * smmu,u32 sid)3432 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
3433 {
3434 	/* Check the SIDs are in range of the SMMU and our stream table */
3435 	if (!arm_smmu_sid_in_range(smmu, sid))
3436 		return -ERANGE;
3437 
3438 	/* Ensure l2 strtab is initialised */
3439 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3440 		return arm_smmu_init_l2_strtab(smmu, sid);
3441 
3442 	return 0;
3443 }
3444 
arm_smmu_insert_master(struct arm_smmu_device * smmu,struct arm_smmu_master * master)3445 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
3446 				  struct arm_smmu_master *master)
3447 {
3448 	int i;
3449 	int ret = 0;
3450 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
3451 
3452 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
3453 				  GFP_KERNEL);
3454 	if (!master->streams)
3455 		return -ENOMEM;
3456 	master->num_streams = fwspec->num_ids;
3457 
3458 	mutex_lock(&smmu->streams_mutex);
3459 	for (i = 0; i < fwspec->num_ids; i++) {
3460 		struct arm_smmu_stream *new_stream = &master->streams[i];
3461 		struct rb_node *existing;
3462 		u32 sid = fwspec->ids[i];
3463 
3464 		new_stream->id = sid;
3465 		new_stream->master = master;
3466 
3467 		ret = arm_smmu_init_sid_strtab(smmu, sid);
3468 		if (ret)
3469 			break;
3470 
3471 		/* Insert into SID tree */
3472 		existing = rb_find_add(&new_stream->node, &smmu->streams,
3473 				       arm_smmu_streams_cmp_node);
3474 		if (existing) {
3475 			struct arm_smmu_master *existing_master =
3476 				rb_entry(existing, struct arm_smmu_stream, node)
3477 					->master;
3478 
3479 			/* Bridged PCI devices may end up with duplicated IDs */
3480 			if (existing_master == master)
3481 				continue;
3482 
3483 			dev_warn(master->dev,
3484 				 "Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n",
3485 				 sid, dev_name(existing_master->dev));
3486 			ret = -ENODEV;
3487 			break;
3488 		}
3489 	}
3490 
3491 	if (ret) {
3492 		for (i--; i >= 0; i--)
3493 			rb_erase(&master->streams[i].node, &smmu->streams);
3494 		kfree(master->streams);
3495 	}
3496 	mutex_unlock(&smmu->streams_mutex);
3497 
3498 	return ret;
3499 }
3500 
arm_smmu_remove_master(struct arm_smmu_master * master)3501 static void arm_smmu_remove_master(struct arm_smmu_master *master)
3502 {
3503 	int i;
3504 	struct arm_smmu_device *smmu = master->smmu;
3505 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
3506 
3507 	if (!smmu || !master->streams)
3508 		return;
3509 
3510 	mutex_lock(&smmu->streams_mutex);
3511 	for (i = 0; i < fwspec->num_ids; i++)
3512 		rb_erase(&master->streams[i].node, &smmu->streams);
3513 	mutex_unlock(&smmu->streams_mutex);
3514 
3515 	kfree(master->streams);
3516 }
3517 
arm_smmu_probe_device(struct device * dev)3518 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
3519 {
3520 	int ret;
3521 	struct arm_smmu_device *smmu;
3522 	struct arm_smmu_master *master;
3523 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
3524 
3525 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
3526 		return ERR_PTR(-EBUSY);
3527 
3528 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
3529 	if (!smmu)
3530 		return ERR_PTR(-ENODEV);
3531 
3532 	master = kzalloc(sizeof(*master), GFP_KERNEL);
3533 	if (!master)
3534 		return ERR_PTR(-ENOMEM);
3535 
3536 	master->dev = dev;
3537 	master->smmu = smmu;
3538 	dev_iommu_priv_set(dev, master);
3539 
3540 	ret = arm_smmu_insert_master(smmu, master);
3541 	if (ret)
3542 		goto err_free_master;
3543 
3544 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
3545 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
3546 
3547 	/*
3548 	 * Note that PASID must be enabled before, and disabled after ATS:
3549 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
3550 	 *
3551 	 *   Behavior is undefined if this bit is Set and the value of the PASID
3552 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
3553 	 *   are changed.
3554 	 */
3555 	arm_smmu_enable_pasid(master);
3556 
3557 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
3558 		master->ssid_bits = min_t(u8, master->ssid_bits,
3559 					  CTXDESC_LINEAR_CDMAX);
3560 
3561 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
3562 	     device_property_read_bool(dev, "dma-can-stall")) ||
3563 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
3564 		master->stall_enabled = true;
3565 
3566 	if (dev_is_pci(dev)) {
3567 		unsigned int stu = __ffs(smmu->pgsize_bitmap);
3568 
3569 		pci_prepare_ats(to_pci_dev(dev), stu);
3570 	}
3571 
3572 	return &smmu->iommu;
3573 
3574 err_free_master:
3575 	kfree(master);
3576 	return ERR_PTR(ret);
3577 }
3578 
arm_smmu_release_device(struct device * dev)3579 static void arm_smmu_release_device(struct device *dev)
3580 {
3581 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3582 
3583 	WARN_ON(master->iopf_refcount);
3584 
3585 	/* Put the STE back to what arm_smmu_init_strtab() sets */
3586 	if (dev->iommu->require_direct)
3587 		arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
3588 	else
3589 		arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
3590 
3591 	arm_smmu_disable_pasid(master);
3592 	arm_smmu_remove_master(master);
3593 	if (arm_smmu_cdtab_allocated(&master->cd_table))
3594 		arm_smmu_free_cd_tables(master);
3595 	kfree(master);
3596 }
3597 
arm_smmu_read_and_clear_dirty(struct iommu_domain * domain,unsigned long iova,size_t size,unsigned long flags,struct iommu_dirty_bitmap * dirty)3598 static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
3599 					 unsigned long iova, size_t size,
3600 					 unsigned long flags,
3601 					 struct iommu_dirty_bitmap *dirty)
3602 {
3603 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3604 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
3605 
3606 	return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
3607 }
3608 
arm_smmu_set_dirty_tracking(struct iommu_domain * domain,bool enabled)3609 static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
3610 				       bool enabled)
3611 {
3612 	/*
3613 	 * Always enabled and the dirty bitmap is cleared prior to
3614 	 * set_dirty_tracking().
3615 	 */
3616 	return 0;
3617 }
3618 
arm_smmu_device_group(struct device * dev)3619 static struct iommu_group *arm_smmu_device_group(struct device *dev)
3620 {
3621 	struct iommu_group *group;
3622 
3623 	/*
3624 	 * We don't support devices sharing stream IDs other than PCI RID
3625 	 * aliases, since the necessary ID-to-device lookup becomes rather
3626 	 * impractical given a potential sparse 32-bit stream ID space.
3627 	 */
3628 	if (dev_is_pci(dev))
3629 		group = pci_device_group(dev);
3630 	else
3631 		group = generic_device_group(dev);
3632 
3633 	return group;
3634 }
3635 
arm_smmu_of_xlate(struct device * dev,const struct of_phandle_args * args)3636 static int arm_smmu_of_xlate(struct device *dev,
3637 			     const struct of_phandle_args *args)
3638 {
3639 	return iommu_fwspec_add_ids(dev, args->args, 1);
3640 }
3641 
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)3642 static void arm_smmu_get_resv_regions(struct device *dev,
3643 				      struct list_head *head)
3644 {
3645 	struct iommu_resv_region *region;
3646 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
3647 
3648 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
3649 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
3650 	if (!region)
3651 		return;
3652 
3653 	list_add_tail(&region->list, head);
3654 
3655 	iommu_dma_get_resv_regions(dev, head);
3656 }
3657 
3658 /*
3659  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
3660  * PCIe link and save the data to memory by DMA. The hardware is restricted to
3661  * use identity mapping only.
3662  */
3663 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
3664 					 (pdev)->device == 0xa12e)
3665 
arm_smmu_def_domain_type(struct device * dev)3666 static int arm_smmu_def_domain_type(struct device *dev)
3667 {
3668 	if (dev_is_pci(dev)) {
3669 		struct pci_dev *pdev = to_pci_dev(dev);
3670 
3671 		if (IS_HISI_PTT_DEVICE(pdev))
3672 			return IOMMU_DOMAIN_IDENTITY;
3673 	}
3674 
3675 	return 0;
3676 }
3677 
3678 static const struct iommu_ops arm_smmu_ops = {
3679 	.identity_domain	= &arm_smmu_identity_domain,
3680 	.blocked_domain		= &arm_smmu_blocked_domain,
3681 	.capable		= arm_smmu_capable,
3682 	.hw_info		= arm_smmu_hw_info,
3683 	.domain_alloc_sva       = arm_smmu_sva_domain_alloc,
3684 	.domain_alloc_paging_flags = arm_smmu_domain_alloc_paging_flags,
3685 	.probe_device		= arm_smmu_probe_device,
3686 	.release_device		= arm_smmu_release_device,
3687 	.device_group		= arm_smmu_device_group,
3688 	.of_xlate		= arm_smmu_of_xlate,
3689 	.get_resv_regions	= arm_smmu_get_resv_regions,
3690 	.page_response		= arm_smmu_page_response,
3691 	.def_domain_type	= arm_smmu_def_domain_type,
3692 	.get_viommu_size	= arm_smmu_get_viommu_size,
3693 	.viommu_init		= arm_vsmmu_init,
3694 	.user_pasid_table	= 1,
3695 	.owner			= THIS_MODULE,
3696 	.default_domain_ops = &(const struct iommu_domain_ops) {
3697 		.attach_dev		= arm_smmu_attach_dev,
3698 		.enforce_cache_coherency = arm_smmu_enforce_cache_coherency,
3699 		.set_dev_pasid		= arm_smmu_s1_set_dev_pasid,
3700 		.map_pages		= arm_smmu_map_pages,
3701 		.unmap_pages		= arm_smmu_unmap_pages,
3702 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
3703 		.iotlb_sync		= arm_smmu_iotlb_sync,
3704 		.iova_to_phys		= arm_smmu_iova_to_phys,
3705 		.free			= arm_smmu_domain_free_paging,
3706 	}
3707 };
3708 
3709 static struct iommu_dirty_ops arm_smmu_dirty_ops = {
3710 	.read_and_clear_dirty	= arm_smmu_read_and_clear_dirty,
3711 	.set_dirty_tracking     = arm_smmu_set_dirty_tracking,
3712 };
3713 
3714 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,void __iomem * page,unsigned long prod_off,unsigned long cons_off,size_t dwords,const char * name)3715 int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3716 			    struct arm_smmu_queue *q, void __iomem *page,
3717 			    unsigned long prod_off, unsigned long cons_off,
3718 			    size_t dwords, const char *name)
3719 {
3720 	size_t qsz;
3721 
3722 	do {
3723 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3724 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3725 					      GFP_KERNEL);
3726 		if (q->base || qsz < PAGE_SIZE)
3727 			break;
3728 
3729 		q->llq.max_n_shift--;
3730 	} while (1);
3731 
3732 	if (!q->base) {
3733 		dev_err(smmu->dev,
3734 			"failed to allocate queue (0x%zx bytes) for %s\n",
3735 			qsz, name);
3736 		return -ENOMEM;
3737 	}
3738 
3739 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
3740 		dev_info(smmu->dev, "allocated %u entries for %s\n",
3741 			 1 << q->llq.max_n_shift, name);
3742 	}
3743 
3744 	q->prod_reg	= page + prod_off;
3745 	q->cons_reg	= page + cons_off;
3746 	q->ent_dwords	= dwords;
3747 
3748 	q->q_base  = Q_BASE_RWA;
3749 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3750 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3751 
3752 	q->llq.prod = q->llq.cons = 0;
3753 	return 0;
3754 }
3755 
arm_smmu_cmdq_init(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq)3756 int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
3757 		       struct arm_smmu_cmdq *cmdq)
3758 {
3759 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3760 
3761 	atomic_set(&cmdq->owner_prod, 0);
3762 	atomic_set(&cmdq->lock, 0);
3763 
3764 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
3765 							      GFP_KERNEL);
3766 	if (!cmdq->valid_map)
3767 		return -ENOMEM;
3768 
3769 	return 0;
3770 }
3771 
arm_smmu_init_queues(struct arm_smmu_device * smmu)3772 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3773 {
3774 	int ret;
3775 
3776 	/* cmdq */
3777 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
3778 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
3779 				      CMDQ_ENT_DWORDS, "cmdq");
3780 	if (ret)
3781 		return ret;
3782 
3783 	ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq);
3784 	if (ret)
3785 		return ret;
3786 
3787 	/* evtq */
3788 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
3789 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
3790 				      EVTQ_ENT_DWORDS, "evtq");
3791 	if (ret)
3792 		return ret;
3793 
3794 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
3795 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
3796 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
3797 		if (!smmu->evtq.iopf)
3798 			return -ENOMEM;
3799 	}
3800 
3801 	/* priq */
3802 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3803 		return 0;
3804 
3805 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
3806 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
3807 				       PRIQ_ENT_DWORDS, "priq");
3808 }
3809 
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)3810 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3811 {
3812 	u32 l1size;
3813 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3814 	unsigned int last_sid_idx =
3815 		arm_smmu_strtab_l1_idx((1ULL << smmu->sid_bits) - 1);
3816 
3817 	/* Calculate the L1 size, capped to the SIDSIZE. */
3818 	cfg->l2.num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES);
3819 	if (cfg->l2.num_l1_ents <= last_sid_idx)
3820 		dev_warn(smmu->dev,
3821 			 "2-level strtab only covers %u/%u bits of SID\n",
3822 			 ilog2(cfg->l2.num_l1_ents * STRTAB_NUM_L2_STES),
3823 			 smmu->sid_bits);
3824 
3825 	l1size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1);
3826 	cfg->l2.l1tab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->l2.l1_dma,
3827 					    GFP_KERNEL);
3828 	if (!cfg->l2.l1tab) {
3829 		dev_err(smmu->dev,
3830 			"failed to allocate l1 stream table (%u bytes)\n",
3831 			l1size);
3832 		return -ENOMEM;
3833 	}
3834 
3835 	cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents,
3836 				      sizeof(*cfg->l2.l2ptrs), GFP_KERNEL);
3837 	if (!cfg->l2.l2ptrs)
3838 		return -ENOMEM;
3839 
3840 	return 0;
3841 }
3842 
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)3843 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3844 {
3845 	u32 size;
3846 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3847 
3848 	size = (1 << smmu->sid_bits) * sizeof(struct arm_smmu_ste);
3849 	cfg->linear.table = dmam_alloc_coherent(smmu->dev, size,
3850 						&cfg->linear.ste_dma,
3851 						GFP_KERNEL);
3852 	if (!cfg->linear.table) {
3853 		dev_err(smmu->dev,
3854 			"failed to allocate linear stream table (%u bytes)\n",
3855 			size);
3856 		return -ENOMEM;
3857 	}
3858 	cfg->linear.num_ents = 1 << smmu->sid_bits;
3859 
3860 	arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents);
3861 	return 0;
3862 }
3863 
arm_smmu_init_strtab(struct arm_smmu_device * smmu)3864 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3865 {
3866 	int ret;
3867 
3868 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3869 		ret = arm_smmu_init_strtab_2lvl(smmu);
3870 	else
3871 		ret = arm_smmu_init_strtab_linear(smmu);
3872 	if (ret)
3873 		return ret;
3874 
3875 	ida_init(&smmu->vmid_map);
3876 
3877 	return 0;
3878 }
3879 
arm_smmu_init_structures(struct arm_smmu_device * smmu)3880 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3881 {
3882 	int ret;
3883 
3884 	mutex_init(&smmu->streams_mutex);
3885 	smmu->streams = RB_ROOT;
3886 
3887 	ret = arm_smmu_init_queues(smmu);
3888 	if (ret)
3889 		return ret;
3890 
3891 	ret = arm_smmu_init_strtab(smmu);
3892 	if (ret)
3893 		return ret;
3894 
3895 	if (smmu->impl_ops && smmu->impl_ops->init_structures)
3896 		return smmu->impl_ops->init_structures(smmu);
3897 
3898 	return 0;
3899 }
3900 
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)3901 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3902 				   unsigned int reg_off, unsigned int ack_off)
3903 {
3904 	u32 reg;
3905 
3906 	writel_relaxed(val, smmu->base + reg_off);
3907 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3908 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3909 }
3910 
3911 /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)3912 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3913 {
3914 	int ret;
3915 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3916 
3917 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3918 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3919 	if (ret)
3920 		return ret;
3921 
3922 	reg &= ~clr;
3923 	reg |= set;
3924 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3925 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3926 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3927 
3928 	if (ret)
3929 		dev_err(smmu->dev, "GBPA not responding to update\n");
3930 	return ret;
3931 }
3932 
arm_smmu_free_msis(void * data)3933 static void arm_smmu_free_msis(void *data)
3934 {
3935 	struct device *dev = data;
3936 
3937 	platform_device_msi_free_irqs_all(dev);
3938 }
3939 
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)3940 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3941 {
3942 	phys_addr_t doorbell;
3943 	struct device *dev = msi_desc_to_dev(desc);
3944 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3945 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3946 
3947 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3948 	doorbell &= MSI_CFG0_ADDR_MASK;
3949 
3950 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3951 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3952 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3953 }
3954 
arm_smmu_setup_msis(struct arm_smmu_device * smmu)3955 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3956 {
3957 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3958 	struct device *dev = smmu->dev;
3959 
3960 	/* Clear the MSI address regs */
3961 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3962 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3963 
3964 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3965 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3966 	else
3967 		nvec--;
3968 
3969 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3970 		return;
3971 
3972 	if (!dev->msi.domain) {
3973 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3974 		return;
3975 	}
3976 
3977 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3978 	ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3979 	if (ret) {
3980 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3981 		return;
3982 	}
3983 
3984 	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3985 	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3986 	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3987 
3988 	/* Add callback to free MSIs on teardown */
3989 	devm_add_action_or_reset(dev, arm_smmu_free_msis, dev);
3990 }
3991 
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)3992 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3993 {
3994 	int irq, ret;
3995 
3996 	arm_smmu_setup_msis(smmu);
3997 
3998 	/* Request interrupt lines */
3999 	irq = smmu->evtq.q.irq;
4000 	if (irq) {
4001 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
4002 						arm_smmu_evtq_thread,
4003 						IRQF_ONESHOT,
4004 						"arm-smmu-v3-evtq", smmu);
4005 		if (ret < 0)
4006 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
4007 	} else {
4008 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
4009 	}
4010 
4011 	irq = smmu->gerr_irq;
4012 	if (irq) {
4013 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
4014 				       0, "arm-smmu-v3-gerror", smmu);
4015 		if (ret < 0)
4016 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
4017 	} else {
4018 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
4019 	}
4020 
4021 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
4022 		irq = smmu->priq.q.irq;
4023 		if (irq) {
4024 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
4025 							arm_smmu_priq_thread,
4026 							IRQF_ONESHOT,
4027 							"arm-smmu-v3-priq",
4028 							smmu);
4029 			if (ret < 0)
4030 				dev_warn(smmu->dev,
4031 					 "failed to enable priq irq\n");
4032 		} else {
4033 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
4034 		}
4035 	}
4036 }
4037 
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)4038 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
4039 {
4040 	int ret, irq;
4041 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
4042 
4043 	/* Disable IRQs first */
4044 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
4045 				      ARM_SMMU_IRQ_CTRLACK);
4046 	if (ret) {
4047 		dev_err(smmu->dev, "failed to disable irqs\n");
4048 		return ret;
4049 	}
4050 
4051 	irq = smmu->combined_irq;
4052 	if (irq) {
4053 		/*
4054 		 * Cavium ThunderX2 implementation doesn't support unique irq
4055 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
4056 		 */
4057 		ret = devm_request_threaded_irq(smmu->dev, irq,
4058 					arm_smmu_combined_irq_handler,
4059 					arm_smmu_combined_irq_thread,
4060 					IRQF_ONESHOT,
4061 					"arm-smmu-v3-combined-irq", smmu);
4062 		if (ret < 0)
4063 			dev_warn(smmu->dev, "failed to enable combined irq\n");
4064 	} else
4065 		arm_smmu_setup_unique_irqs(smmu);
4066 
4067 	if (smmu->features & ARM_SMMU_FEAT_PRI)
4068 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
4069 
4070 	/* Enable interrupt generation on the SMMU */
4071 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
4072 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
4073 	if (ret)
4074 		dev_warn(smmu->dev, "failed to enable irqs\n");
4075 
4076 	return 0;
4077 }
4078 
arm_smmu_device_disable(struct arm_smmu_device * smmu)4079 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
4080 {
4081 	int ret;
4082 
4083 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
4084 	if (ret)
4085 		dev_err(smmu->dev, "failed to clear cr0\n");
4086 
4087 	return ret;
4088 }
4089 
arm_smmu_write_strtab(struct arm_smmu_device * smmu)4090 static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
4091 {
4092 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
4093 	dma_addr_t dma;
4094 	u32 reg;
4095 
4096 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
4097 		reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
4098 				 STRTAB_BASE_CFG_FMT_2LVL) |
4099 		      FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE,
4100 				 ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT) |
4101 		      FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
4102 		dma = cfg->l2.l1_dma;
4103 	} else {
4104 		reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
4105 				 STRTAB_BASE_CFG_FMT_LINEAR) |
4106 		      FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
4107 		dma = cfg->linear.ste_dma;
4108 	}
4109 	writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA,
4110 		       smmu->base + ARM_SMMU_STRTAB_BASE);
4111 	writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
4112 }
4113 
arm_smmu_device_reset(struct arm_smmu_device * smmu)4114 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
4115 {
4116 	int ret;
4117 	u32 reg, enables;
4118 	struct arm_smmu_cmdq_ent cmd;
4119 
4120 	/* Clear CR0 and sync (disables SMMU and queue processing) */
4121 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
4122 	if (reg & CR0_SMMUEN) {
4123 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
4124 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
4125 	}
4126 
4127 	ret = arm_smmu_device_disable(smmu);
4128 	if (ret)
4129 		return ret;
4130 
4131 	/* CR1 (table and queue memory attributes) */
4132 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
4133 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
4134 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
4135 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
4136 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
4137 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
4138 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
4139 
4140 	/* CR2 (random crap) */
4141 	reg = CR2_PTM | CR2_RECINVSID;
4142 
4143 	if (smmu->features & ARM_SMMU_FEAT_E2H)
4144 		reg |= CR2_E2H;
4145 
4146 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
4147 
4148 	/* Stream table */
4149 	arm_smmu_write_strtab(smmu);
4150 
4151 	/* Command queue */
4152 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
4153 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
4154 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
4155 
4156 	enables = CR0_CMDQEN;
4157 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4158 				      ARM_SMMU_CR0ACK);
4159 	if (ret) {
4160 		dev_err(smmu->dev, "failed to enable command queue\n");
4161 		return ret;
4162 	}
4163 
4164 	/* Invalidate any cached configuration */
4165 	cmd.opcode = CMDQ_OP_CFGI_ALL;
4166 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
4167 
4168 	/* Invalidate any stale TLB entries */
4169 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
4170 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
4171 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
4172 	}
4173 
4174 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
4175 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
4176 
4177 	/* Event queue */
4178 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
4179 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
4180 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
4181 
4182 	enables |= CR0_EVTQEN;
4183 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4184 				      ARM_SMMU_CR0ACK);
4185 	if (ret) {
4186 		dev_err(smmu->dev, "failed to enable event queue\n");
4187 		return ret;
4188 	}
4189 
4190 	/* PRI queue */
4191 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
4192 		writeq_relaxed(smmu->priq.q.q_base,
4193 			       smmu->base + ARM_SMMU_PRIQ_BASE);
4194 		writel_relaxed(smmu->priq.q.llq.prod,
4195 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
4196 		writel_relaxed(smmu->priq.q.llq.cons,
4197 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
4198 
4199 		enables |= CR0_PRIQEN;
4200 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4201 					      ARM_SMMU_CR0ACK);
4202 		if (ret) {
4203 			dev_err(smmu->dev, "failed to enable PRI queue\n");
4204 			return ret;
4205 		}
4206 	}
4207 
4208 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
4209 		enables |= CR0_ATSCHK;
4210 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4211 					      ARM_SMMU_CR0ACK);
4212 		if (ret) {
4213 			dev_err(smmu->dev, "failed to enable ATS check\n");
4214 			return ret;
4215 		}
4216 	}
4217 
4218 	ret = arm_smmu_setup_irqs(smmu);
4219 	if (ret) {
4220 		dev_err(smmu->dev, "failed to setup irqs\n");
4221 		return ret;
4222 	}
4223 
4224 	if (is_kdump_kernel())
4225 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
4226 
4227 	/* Enable the SMMU interface */
4228 	enables |= CR0_SMMUEN;
4229 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4230 				      ARM_SMMU_CR0ACK);
4231 	if (ret) {
4232 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
4233 		return ret;
4234 	}
4235 
4236 	if (smmu->impl_ops && smmu->impl_ops->device_reset) {
4237 		ret = smmu->impl_ops->device_reset(smmu);
4238 		if (ret) {
4239 			dev_err(smmu->dev, "failed to reset impl\n");
4240 			return ret;
4241 		}
4242 	}
4243 
4244 	return 0;
4245 }
4246 
4247 #define IIDR_IMPLEMENTER_ARM		0x43b
4248 #define IIDR_PRODUCTID_ARM_MMU_600	0x483
4249 #define IIDR_PRODUCTID_ARM_MMU_700	0x487
4250 
arm_smmu_device_iidr_probe(struct arm_smmu_device * smmu)4251 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
4252 {
4253 	u32 reg;
4254 	unsigned int implementer, productid, variant, revision;
4255 
4256 	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
4257 	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
4258 	productid = FIELD_GET(IIDR_PRODUCTID, reg);
4259 	variant = FIELD_GET(IIDR_VARIANT, reg);
4260 	revision = FIELD_GET(IIDR_REVISION, reg);
4261 
4262 	switch (implementer) {
4263 	case IIDR_IMPLEMENTER_ARM:
4264 		switch (productid) {
4265 		case IIDR_PRODUCTID_ARM_MMU_600:
4266 			/* Arm erratum 1076982 */
4267 			if (variant == 0 && revision <= 2)
4268 				smmu->features &= ~ARM_SMMU_FEAT_SEV;
4269 			/* Arm erratum 1209401 */
4270 			if (variant < 2)
4271 				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
4272 			break;
4273 		case IIDR_PRODUCTID_ARM_MMU_700:
4274 			/* Arm erratum 2812531 */
4275 			smmu->features &= ~ARM_SMMU_FEAT_BTM;
4276 			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
4277 			/* Arm errata 2268618, 2812531 */
4278 			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
4279 			break;
4280 		}
4281 		break;
4282 	}
4283 }
4284 
arm_smmu_get_httu(struct arm_smmu_device * smmu,u32 reg)4285 static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg)
4286 {
4287 	u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD);
4288 	u32 hw_features = 0;
4289 
4290 	switch (FIELD_GET(IDR0_HTTU, reg)) {
4291 	case IDR0_HTTU_ACCESS_DIRTY:
4292 		hw_features |= ARM_SMMU_FEAT_HD;
4293 		fallthrough;
4294 	case IDR0_HTTU_ACCESS:
4295 		hw_features |= ARM_SMMU_FEAT_HA;
4296 	}
4297 
4298 	if (smmu->dev->of_node)
4299 		smmu->features |= hw_features;
4300 	else if (hw_features != fw_features)
4301 		/* ACPI IORT sets the HTTU bits */
4302 		dev_warn(smmu->dev,
4303 			 "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n",
4304 			  hw_features, fw_features);
4305 }
4306 
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)4307 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
4308 {
4309 	u32 reg;
4310 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
4311 
4312 	/* IDR0 */
4313 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
4314 
4315 	/* 2-level structures */
4316 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
4317 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
4318 
4319 	if (reg & IDR0_CD2L)
4320 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
4321 
4322 	/*
4323 	 * Translation table endianness.
4324 	 * We currently require the same endianness as the CPU, but this
4325 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
4326 	 */
4327 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
4328 	case IDR0_TTENDIAN_MIXED:
4329 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
4330 		break;
4331 #ifdef __BIG_ENDIAN
4332 	case IDR0_TTENDIAN_BE:
4333 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
4334 		break;
4335 #else
4336 	case IDR0_TTENDIAN_LE:
4337 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
4338 		break;
4339 #endif
4340 	default:
4341 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
4342 		return -ENXIO;
4343 	}
4344 
4345 	/* Boolean feature flags */
4346 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
4347 		smmu->features |= ARM_SMMU_FEAT_PRI;
4348 
4349 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
4350 		smmu->features |= ARM_SMMU_FEAT_ATS;
4351 
4352 	if (reg & IDR0_SEV)
4353 		smmu->features |= ARM_SMMU_FEAT_SEV;
4354 
4355 	if (reg & IDR0_MSI) {
4356 		smmu->features |= ARM_SMMU_FEAT_MSI;
4357 		if (coherent && !disable_msipolling)
4358 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
4359 	}
4360 
4361 	if (reg & IDR0_HYP) {
4362 		smmu->features |= ARM_SMMU_FEAT_HYP;
4363 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
4364 			smmu->features |= ARM_SMMU_FEAT_E2H;
4365 	}
4366 
4367 	arm_smmu_get_httu(smmu, reg);
4368 
4369 	/*
4370 	 * The coherency feature as set by FW is used in preference to the ID
4371 	 * register, but warn on mismatch.
4372 	 */
4373 	if (!!(reg & IDR0_COHACC) != coherent)
4374 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
4375 			 str_true_false(coherent));
4376 
4377 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
4378 	case IDR0_STALL_MODEL_FORCE:
4379 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
4380 		fallthrough;
4381 	case IDR0_STALL_MODEL_STALL:
4382 		smmu->features |= ARM_SMMU_FEAT_STALLS;
4383 	}
4384 
4385 	if (reg & IDR0_S1P)
4386 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
4387 
4388 	if (reg & IDR0_S2P)
4389 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
4390 
4391 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
4392 		dev_err(smmu->dev, "no translation support!\n");
4393 		return -ENXIO;
4394 	}
4395 
4396 	/* We only support the AArch64 table format at present */
4397 	switch (FIELD_GET(IDR0_TTF, reg)) {
4398 	case IDR0_TTF_AARCH32_64:
4399 		smmu->ias = 40;
4400 		fallthrough;
4401 	case IDR0_TTF_AARCH64:
4402 		break;
4403 	default:
4404 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
4405 		return -ENXIO;
4406 	}
4407 
4408 	/* ASID/VMID sizes */
4409 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
4410 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
4411 
4412 	/* IDR1 */
4413 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
4414 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
4415 		dev_err(smmu->dev, "embedded implementation not supported\n");
4416 		return -ENXIO;
4417 	}
4418 
4419 	if (reg & IDR1_ATTR_TYPES_OVR)
4420 		smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
4421 
4422 	/* Queue sizes, capped to ensure natural alignment */
4423 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
4424 					     FIELD_GET(IDR1_CMDQS, reg));
4425 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
4426 		/*
4427 		 * We don't support splitting up batches, so one batch of
4428 		 * commands plus an extra sync needs to fit inside the command
4429 		 * queue. There's also no way we can handle the weird alignment
4430 		 * restrictions on the base pointer for a unit-length queue.
4431 		 */
4432 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
4433 			CMDQ_BATCH_ENTRIES);
4434 		return -ENXIO;
4435 	}
4436 
4437 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
4438 					     FIELD_GET(IDR1_EVTQS, reg));
4439 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
4440 					     FIELD_GET(IDR1_PRIQS, reg));
4441 
4442 	/* SID/SSID sizes */
4443 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
4444 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
4445 	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
4446 
4447 	/*
4448 	 * If the SMMU supports fewer bits than would fill a single L2 stream
4449 	 * table, use a linear table instead.
4450 	 */
4451 	if (smmu->sid_bits <= STRTAB_SPLIT)
4452 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
4453 
4454 	/* IDR3 */
4455 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
4456 	if (FIELD_GET(IDR3_RIL, reg))
4457 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
4458 	if (FIELD_GET(IDR3_FWB, reg))
4459 		smmu->features |= ARM_SMMU_FEAT_S2FWB;
4460 
4461 	if (FIELD_GET(IDR3_BBM, reg) == 2)
4462 		smmu->features |= ARM_SMMU_FEAT_BBML2;
4463 
4464 	/* IDR5 */
4465 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
4466 
4467 	/* Maximum number of outstanding stalls */
4468 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
4469 
4470 	/* Page sizes */
4471 	if (reg & IDR5_GRAN64K)
4472 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
4473 	if (reg & IDR5_GRAN16K)
4474 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
4475 	if (reg & IDR5_GRAN4K)
4476 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
4477 
4478 	/* Input address size */
4479 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
4480 		smmu->features |= ARM_SMMU_FEAT_VAX;
4481 
4482 	/* Output address size */
4483 	switch (FIELD_GET(IDR5_OAS, reg)) {
4484 	case IDR5_OAS_32_BIT:
4485 		smmu->oas = 32;
4486 		break;
4487 	case IDR5_OAS_36_BIT:
4488 		smmu->oas = 36;
4489 		break;
4490 	case IDR5_OAS_40_BIT:
4491 		smmu->oas = 40;
4492 		break;
4493 	case IDR5_OAS_42_BIT:
4494 		smmu->oas = 42;
4495 		break;
4496 	case IDR5_OAS_44_BIT:
4497 		smmu->oas = 44;
4498 		break;
4499 	case IDR5_OAS_52_BIT:
4500 		smmu->oas = 52;
4501 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
4502 		break;
4503 	default:
4504 		dev_info(smmu->dev,
4505 			"unknown output address size. Truncating to 48-bit\n");
4506 		fallthrough;
4507 	case IDR5_OAS_48_BIT:
4508 		smmu->oas = 48;
4509 	}
4510 
4511 	/* Set the DMA mask for our table walker */
4512 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
4513 		dev_warn(smmu->dev,
4514 			 "failed to set DMA mask for table walker\n");
4515 
4516 	smmu->ias = max(smmu->ias, smmu->oas);
4517 
4518 	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
4519 	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
4520 		smmu->features |= ARM_SMMU_FEAT_NESTING;
4521 
4522 	arm_smmu_device_iidr_probe(smmu);
4523 
4524 	if (arm_smmu_sva_supported(smmu))
4525 		smmu->features |= ARM_SMMU_FEAT_SVA;
4526 
4527 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
4528 		 smmu->ias, smmu->oas, smmu->features);
4529 	return 0;
4530 }
4531 
4532 #ifdef CONFIG_ACPI
4533 #ifdef CONFIG_TEGRA241_CMDQV
acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node * node,struct arm_smmu_device * smmu)4534 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
4535 						struct arm_smmu_device *smmu)
4536 {
4537 	const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
4538 	struct acpi_device *adev;
4539 
4540 	/* Look for an NVDA200C node whose _UID matches the SMMU node ID */
4541 	adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);
4542 	if (adev) {
4543 		/* Tegra241 CMDQV driver is responsible for put_device() */
4544 		smmu->impl_dev = &adev->dev;
4545 		smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
4546 		dev_info(smmu->dev, "found companion CMDQV device: %s\n",
4547 			 dev_name(smmu->impl_dev));
4548 	}
4549 	kfree(uid);
4550 }
4551 #else
acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node * node,struct arm_smmu_device * smmu)4552 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
4553 						struct arm_smmu_device *smmu)
4554 {
4555 }
4556 #endif
4557 
acpi_smmu_iort_probe_model(struct acpi_iort_node * node,struct arm_smmu_device * smmu)4558 static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
4559 				      struct arm_smmu_device *smmu)
4560 {
4561 	struct acpi_iort_smmu_v3 *iort_smmu =
4562 		(struct acpi_iort_smmu_v3 *)node->node_data;
4563 
4564 	switch (iort_smmu->model) {
4565 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
4566 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
4567 		break;
4568 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
4569 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
4570 		break;
4571 	case ACPI_IORT_SMMU_V3_GENERIC:
4572 		/*
4573 		 * Tegra241 implementation stores its SMMU options and impl_dev
4574 		 * in DSDT. Thus, go through the ACPI tables unconditionally.
4575 		 */
4576 		acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu);
4577 		break;
4578 	}
4579 
4580 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
4581 	return 0;
4582 }
4583 
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)4584 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
4585 				      struct arm_smmu_device *smmu)
4586 {
4587 	struct acpi_iort_smmu_v3 *iort_smmu;
4588 	struct device *dev = smmu->dev;
4589 	struct acpi_iort_node *node;
4590 
4591 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
4592 
4593 	/* Retrieve SMMUv3 specific data */
4594 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
4595 
4596 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
4597 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
4598 
4599 	switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) {
4600 	case IDR0_HTTU_ACCESS_DIRTY:
4601 		smmu->features |= ARM_SMMU_FEAT_HD;
4602 		fallthrough;
4603 	case IDR0_HTTU_ACCESS:
4604 		smmu->features |= ARM_SMMU_FEAT_HA;
4605 	}
4606 
4607 	return acpi_smmu_iort_probe_model(node, smmu);
4608 }
4609 #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)4610 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
4611 					     struct arm_smmu_device *smmu)
4612 {
4613 	return -ENODEV;
4614 }
4615 #endif
4616 
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)4617 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
4618 				    struct arm_smmu_device *smmu)
4619 {
4620 	struct device *dev = &pdev->dev;
4621 	u32 cells;
4622 	int ret = -EINVAL;
4623 
4624 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
4625 		dev_err(dev, "missing #iommu-cells property\n");
4626 	else if (cells != 1)
4627 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
4628 	else
4629 		ret = 0;
4630 
4631 	parse_driver_options(smmu);
4632 
4633 	if (of_dma_is_coherent(dev->of_node))
4634 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
4635 
4636 	return ret;
4637 }
4638 
arm_smmu_resource_size(struct arm_smmu_device * smmu)4639 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
4640 {
4641 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
4642 		return SZ_64K;
4643 	else
4644 		return SZ_128K;
4645 }
4646 
arm_smmu_ioremap(struct device * dev,resource_size_t start,resource_size_t size)4647 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
4648 				      resource_size_t size)
4649 {
4650 	struct resource res = DEFINE_RES_MEM(start, size);
4651 
4652 	return devm_ioremap_resource(dev, &res);
4653 }
4654 
arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device * smmu)4655 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
4656 {
4657 	struct list_head rmr_list;
4658 	struct iommu_resv_region *e;
4659 
4660 	INIT_LIST_HEAD(&rmr_list);
4661 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4662 
4663 	list_for_each_entry(e, &rmr_list, list) {
4664 		struct iommu_iort_rmr_data *rmr;
4665 		int ret, i;
4666 
4667 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
4668 		for (i = 0; i < rmr->num_sids; i++) {
4669 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
4670 			if (ret) {
4671 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
4672 					rmr->sids[i]);
4673 				continue;
4674 			}
4675 
4676 			/*
4677 			 * STE table is not programmed to HW, see
4678 			 * arm_smmu_initial_bypass_stes()
4679 			 */
4680 			arm_smmu_make_bypass_ste(smmu,
4681 				arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
4682 		}
4683 	}
4684 
4685 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4686 }
4687 
arm_smmu_impl_remove(void * data)4688 static void arm_smmu_impl_remove(void *data)
4689 {
4690 	struct arm_smmu_device *smmu = data;
4691 
4692 	if (smmu->impl_ops && smmu->impl_ops->device_remove)
4693 		smmu->impl_ops->device_remove(smmu);
4694 }
4695 
4696 /*
4697  * Probe all the compiled in implementations. Each one checks to see if it
4698  * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which
4699  * replaces the callers. Otherwise the original is returned or ERR_PTR.
4700  */
arm_smmu_impl_probe(struct arm_smmu_device * smmu)4701 static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
4702 {
4703 	struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
4704 	const struct arm_smmu_impl_ops *ops;
4705 	int ret;
4706 
4707 	if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
4708 		new_smmu = tegra241_cmdqv_probe(smmu);
4709 
4710 	if (new_smmu == ERR_PTR(-ENODEV))
4711 		return smmu;
4712 	if (IS_ERR(new_smmu))
4713 		return new_smmu;
4714 
4715 	ops = new_smmu->impl_ops;
4716 	if (ops) {
4717 		/* get_viommu_size and vsmmu_init ops must be paired */
4718 		if (WARN_ON(!ops->get_viommu_size != !ops->vsmmu_init)) {
4719 			ret = -EINVAL;
4720 			goto err_remove;
4721 		}
4722 	}
4723 
4724 	ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
4725 				       new_smmu);
4726 	if (ret)
4727 		return ERR_PTR(ret);
4728 	return new_smmu;
4729 
4730 err_remove:
4731 	arm_smmu_impl_remove(new_smmu);
4732 	return ERR_PTR(ret);
4733 }
4734 
arm_smmu_device_probe(struct platform_device * pdev)4735 static int arm_smmu_device_probe(struct platform_device *pdev)
4736 {
4737 	int irq, ret;
4738 	struct resource *res;
4739 	resource_size_t ioaddr;
4740 	struct arm_smmu_device *smmu;
4741 	struct device *dev = &pdev->dev;
4742 
4743 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4744 	if (!smmu)
4745 		return -ENOMEM;
4746 	smmu->dev = dev;
4747 
4748 	if (dev->of_node) {
4749 		ret = arm_smmu_device_dt_probe(pdev, smmu);
4750 	} else {
4751 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
4752 	}
4753 	if (ret)
4754 		return ret;
4755 
4756 	smmu = arm_smmu_impl_probe(smmu);
4757 	if (IS_ERR(smmu))
4758 		return PTR_ERR(smmu);
4759 
4760 	/* Base address */
4761 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4762 	if (!res)
4763 		return -EINVAL;
4764 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4765 		dev_err(dev, "MMIO region too small (%pr)\n", res);
4766 		return -EINVAL;
4767 	}
4768 	ioaddr = res->start;
4769 
4770 	/*
4771 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4772 	 * the PMCG registers which are reserved by the PMU driver.
4773 	 */
4774 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4775 	if (IS_ERR(smmu->base))
4776 		return PTR_ERR(smmu->base);
4777 
4778 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
4779 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4780 					       ARM_SMMU_REG_SZ);
4781 		if (IS_ERR(smmu->page1))
4782 			return PTR_ERR(smmu->page1);
4783 	} else {
4784 		smmu->page1 = smmu->base;
4785 	}
4786 
4787 	/* Interrupt lines */
4788 
4789 	irq = platform_get_irq_byname_optional(pdev, "combined");
4790 	if (irq > 0)
4791 		smmu->combined_irq = irq;
4792 	else {
4793 		irq = platform_get_irq_byname_optional(pdev, "eventq");
4794 		if (irq > 0)
4795 			smmu->evtq.q.irq = irq;
4796 
4797 		irq = platform_get_irq_byname_optional(pdev, "priq");
4798 		if (irq > 0)
4799 			smmu->priq.q.irq = irq;
4800 
4801 		irq = platform_get_irq_byname_optional(pdev, "gerror");
4802 		if (irq > 0)
4803 			smmu->gerr_irq = irq;
4804 	}
4805 	/* Probe the h/w */
4806 	ret = arm_smmu_device_hw_probe(smmu);
4807 	if (ret)
4808 		return ret;
4809 
4810 	/* Initialise in-memory data structures */
4811 	ret = arm_smmu_init_structures(smmu);
4812 	if (ret)
4813 		goto err_free_iopf;
4814 
4815 	/* Record our private device structure */
4816 	platform_set_drvdata(pdev, smmu);
4817 
4818 	/* Check for RMRs and install bypass STEs if any */
4819 	arm_smmu_rmr_install_bypass_ste(smmu);
4820 
4821 	/* Reset the device */
4822 	ret = arm_smmu_device_reset(smmu);
4823 	if (ret)
4824 		goto err_disable;
4825 
4826 	/* And we're up. Go go go! */
4827 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4828 				     "smmu3.%pa", &ioaddr);
4829 	if (ret)
4830 		goto err_disable;
4831 
4832 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
4833 	if (ret) {
4834 		dev_err(dev, "Failed to register iommu\n");
4835 		goto err_free_sysfs;
4836 	}
4837 
4838 	return 0;
4839 
4840 err_free_sysfs:
4841 	iommu_device_sysfs_remove(&smmu->iommu);
4842 err_disable:
4843 	arm_smmu_device_disable(smmu);
4844 err_free_iopf:
4845 	iopf_queue_free(smmu->evtq.iopf);
4846 	return ret;
4847 }
4848 
arm_smmu_device_remove(struct platform_device * pdev)4849 static void arm_smmu_device_remove(struct platform_device *pdev)
4850 {
4851 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4852 
4853 	iommu_device_unregister(&smmu->iommu);
4854 	iommu_device_sysfs_remove(&smmu->iommu);
4855 	arm_smmu_device_disable(smmu);
4856 	iopf_queue_free(smmu->evtq.iopf);
4857 	ida_destroy(&smmu->vmid_map);
4858 }
4859 
arm_smmu_device_shutdown(struct platform_device * pdev)4860 static void arm_smmu_device_shutdown(struct platform_device *pdev)
4861 {
4862 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4863 
4864 	arm_smmu_device_disable(smmu);
4865 }
4866 
4867 static const struct of_device_id arm_smmu_of_match[] = {
4868 	{ .compatible = "arm,smmu-v3", },
4869 	{ },
4870 };
4871 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4872 
arm_smmu_driver_unregister(struct platform_driver * drv)4873 static void arm_smmu_driver_unregister(struct platform_driver *drv)
4874 {
4875 	arm_smmu_sva_notifier_synchronize();
4876 	platform_driver_unregister(drv);
4877 }
4878 
4879 static struct platform_driver arm_smmu_driver = {
4880 	.driver	= {
4881 		.name			= "arm-smmu-v3",
4882 		.of_match_table		= arm_smmu_of_match,
4883 		.suppress_bind_attrs	= true,
4884 	},
4885 	.probe	= arm_smmu_device_probe,
4886 	.remove = arm_smmu_device_remove,
4887 	.shutdown = arm_smmu_device_shutdown,
4888 };
4889 module_driver(arm_smmu_driver, platform_driver_register,
4890 	      arm_smmu_driver_unregister);
4891 
4892 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4893 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4894 MODULE_ALIAS("platform:arm-smmu-v3");
4895 MODULE_LICENSE("GPL v2");
4896