xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision ac8fa1bdc02671ff9c3e548878e68886b3f5daee)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_platform.h>
27 #include <linux/pci.h>
28 #include <linux/pci-ats.h>
29 #include <linux/platform_device.h>
30 
31 #include <linux/amba/bus.h>
32 
33 #include "arm-smmu-v3.h"
34 #include "../../iommu-sva-lib.h"
35 
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84 
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 	{ 0, NULL},
89 };
90 
91 static void parse_driver_options(struct arm_smmu_device *smmu)
92 {
93 	int i = 0;
94 
95 	do {
96 		if (of_property_read_bool(smmu->dev->of_node,
97 						arm_smmu_options[i].prop)) {
98 			smmu->options |= arm_smmu_options[i].opt;
99 			dev_notice(smmu->dev, "option %s\n",
100 				arm_smmu_options[i].prop);
101 		}
102 	} while (arm_smmu_options[++i].opt);
103 }
104 
105 /* Low-level queue manipulation functions */
106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
107 {
108 	u32 space, prod, cons;
109 
110 	prod = Q_IDX(q, q->prod);
111 	cons = Q_IDX(q, q->cons);
112 
113 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114 		space = (1 << q->max_n_shift) - (prod - cons);
115 	else
116 		space = cons - prod;
117 
118 	return space >= n;
119 }
120 
121 static bool queue_full(struct arm_smmu_ll_queue *q)
122 {
123 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
125 }
126 
127 static bool queue_empty(struct arm_smmu_ll_queue *q)
128 {
129 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
131 }
132 
133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
134 {
135 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
139 }
140 
141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
142 {
143 	/*
144 	 * Ensure that all CPU accesses (reads and writes) to the queue
145 	 * are complete before we update the cons pointer.
146 	 */
147 	__iomb();
148 	writel_relaxed(q->llq.cons, q->cons_reg);
149 }
150 
151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
152 {
153 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
155 }
156 
157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
158 {
159 	u32 prod;
160 	int ret = 0;
161 
162 	/*
163 	 * We can't use the _relaxed() variant here, as we must prevent
164 	 * speculative reads of the queue before we have determined that
165 	 * prod has indeed moved.
166 	 */
167 	prod = readl(q->prod_reg);
168 
169 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
170 		ret = -EOVERFLOW;
171 
172 	q->llq.prod = prod;
173 	return ret;
174 }
175 
176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
177 {
178 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
179 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
180 }
181 
182 static void queue_poll_init(struct arm_smmu_device *smmu,
183 			    struct arm_smmu_queue_poll *qp)
184 {
185 	qp->delay = 1;
186 	qp->spin_cnt = 0;
187 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
188 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
189 }
190 
191 static int queue_poll(struct arm_smmu_queue_poll *qp)
192 {
193 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
194 		return -ETIMEDOUT;
195 
196 	if (qp->wfe) {
197 		wfe();
198 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
199 		cpu_relax();
200 	} else {
201 		udelay(qp->delay);
202 		qp->delay *= 2;
203 		qp->spin_cnt = 0;
204 	}
205 
206 	return 0;
207 }
208 
209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
210 {
211 	int i;
212 
213 	for (i = 0; i < n_dwords; ++i)
214 		*dst++ = cpu_to_le64(*src++);
215 }
216 
217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
218 {
219 	int i;
220 
221 	for (i = 0; i < n_dwords; ++i)
222 		*dst++ = le64_to_cpu(*src++);
223 }
224 
225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
226 {
227 	if (queue_empty(&q->llq))
228 		return -EAGAIN;
229 
230 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
231 	queue_inc_cons(&q->llq);
232 	queue_sync_cons_out(q);
233 	return 0;
234 }
235 
236 /* High-level queue accessors */
237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
238 {
239 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
240 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
241 
242 	switch (ent->opcode) {
243 	case CMDQ_OP_TLBI_EL2_ALL:
244 	case CMDQ_OP_TLBI_NSNH_ALL:
245 		break;
246 	case CMDQ_OP_PREFETCH_CFG:
247 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
248 		break;
249 	case CMDQ_OP_CFGI_CD:
250 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
251 		fallthrough;
252 	case CMDQ_OP_CFGI_STE:
253 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
254 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
255 		break;
256 	case CMDQ_OP_CFGI_CD_ALL:
257 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
258 		break;
259 	case CMDQ_OP_CFGI_ALL:
260 		/* Cover the entire SID range */
261 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
262 		break;
263 	case CMDQ_OP_TLBI_NH_VA:
264 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
265 		fallthrough;
266 	case CMDQ_OP_TLBI_EL2_VA:
267 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
268 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
269 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
270 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
271 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
272 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
273 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
274 		break;
275 	case CMDQ_OP_TLBI_S2_IPA:
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
279 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
283 		break;
284 	case CMDQ_OP_TLBI_NH_ASID:
285 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
286 		fallthrough;
287 	case CMDQ_OP_TLBI_S12_VMALL:
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 		break;
290 	case CMDQ_OP_TLBI_EL2_ASID:
291 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
292 		break;
293 	case CMDQ_OP_ATC_INV:
294 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
295 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
296 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
297 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
298 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
299 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
300 		break;
301 	case CMDQ_OP_PRI_RESP:
302 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
303 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
304 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
305 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
306 		switch (ent->pri.resp) {
307 		case PRI_RESP_DENY:
308 		case PRI_RESP_FAIL:
309 		case PRI_RESP_SUCC:
310 			break;
311 		default:
312 			return -EINVAL;
313 		}
314 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
315 		break;
316 	case CMDQ_OP_RESUME:
317 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
318 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
319 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
320 		break;
321 	case CMDQ_OP_CMD_SYNC:
322 		if (ent->sync.msiaddr) {
323 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
325 		} else {
326 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
327 		}
328 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
330 		break;
331 	default:
332 		return -ENOENT;
333 	}
334 
335 	return 0;
336 }
337 
338 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
339 					 u32 prod)
340 {
341 	struct arm_smmu_queue *q = &smmu->cmdq.q;
342 	struct arm_smmu_cmdq_ent ent = {
343 		.opcode = CMDQ_OP_CMD_SYNC,
344 	};
345 
346 	/*
347 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
348 	 * payload, so the write will zero the entire command on that platform.
349 	 */
350 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
351 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
352 				   q->ent_dwords * 8;
353 	}
354 
355 	arm_smmu_cmdq_build_cmd(cmd, &ent);
356 }
357 
358 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
359 {
360 	static const char * const cerror_str[] = {
361 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
362 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
363 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
364 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
365 	};
366 
367 	int i;
368 	u64 cmd[CMDQ_ENT_DWORDS];
369 	struct arm_smmu_queue *q = &smmu->cmdq.q;
370 	u32 cons = readl_relaxed(q->cons_reg);
371 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
372 	struct arm_smmu_cmdq_ent cmd_sync = {
373 		.opcode = CMDQ_OP_CMD_SYNC,
374 	};
375 
376 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
377 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
378 
379 	switch (idx) {
380 	case CMDQ_ERR_CERROR_ABT_IDX:
381 		dev_err(smmu->dev, "retrying command fetch\n");
382 	case CMDQ_ERR_CERROR_NONE_IDX:
383 		return;
384 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
385 		/*
386 		 * ATC Invalidation Completion timeout. CONS is still pointing
387 		 * at the CMD_SYNC. Attempt to complete other pending commands
388 		 * by repeating the CMD_SYNC, though we might well end up back
389 		 * here since the ATC invalidation may still be pending.
390 		 */
391 		return;
392 	case CMDQ_ERR_CERROR_ILL_IDX:
393 	default:
394 		break;
395 	}
396 
397 	/*
398 	 * We may have concurrent producers, so we need to be careful
399 	 * not to touch any of the shadow cmdq state.
400 	 */
401 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
402 	dev_err(smmu->dev, "skipping command in error state:\n");
403 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
404 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
405 
406 	/* Convert the erroneous command into a CMD_SYNC */
407 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
408 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
409 		return;
410 	}
411 
412 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
413 }
414 
415 /*
416  * Command queue locking.
417  * This is a form of bastardised rwlock with the following major changes:
418  *
419  * - The only LOCK routines are exclusive_trylock() and shared_lock().
420  *   Neither have barrier semantics, and instead provide only a control
421  *   dependency.
422  *
423  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
424  *   fails if the caller appears to be the last lock holder (yes, this is
425  *   racy). All successful UNLOCK routines have RELEASE semantics.
426  */
427 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
428 {
429 	int val;
430 
431 	/*
432 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
433 	 * lock counter. When held in exclusive state, the lock counter is set
434 	 * to INT_MIN so these increments won't hurt as the value will remain
435 	 * negative.
436 	 */
437 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
438 		return;
439 
440 	do {
441 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
442 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
443 }
444 
445 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
446 {
447 	(void)atomic_dec_return_release(&cmdq->lock);
448 }
449 
450 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
451 {
452 	if (atomic_read(&cmdq->lock) == 1)
453 		return false;
454 
455 	arm_smmu_cmdq_shared_unlock(cmdq);
456 	return true;
457 }
458 
459 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
460 ({									\
461 	bool __ret;							\
462 	local_irq_save(flags);						\
463 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
464 	if (!__ret)							\
465 		local_irq_restore(flags);				\
466 	__ret;								\
467 })
468 
469 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
470 ({									\
471 	atomic_set_release(&cmdq->lock, 0);				\
472 	local_irq_restore(flags);					\
473 })
474 
475 
476 /*
477  * Command queue insertion.
478  * This is made fiddly by our attempts to achieve some sort of scalability
479  * since there is one queue shared amongst all of the CPUs in the system.  If
480  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
481  * then you'll *love* this monstrosity.
482  *
483  * The basic idea is to split the queue up into ranges of commands that are
484  * owned by a given CPU; the owner may not have written all of the commands
485  * itself, but is responsible for advancing the hardware prod pointer when
486  * the time comes. The algorithm is roughly:
487  *
488  * 	1. Allocate some space in the queue. At this point we also discover
489  *	   whether the head of the queue is currently owned by another CPU,
490  *	   or whether we are the owner.
491  *
492  *	2. Write our commands into our allocated slots in the queue.
493  *
494  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
495  *
496  *	4. If we are an owner:
497  *		a. Wait for the previous owner to finish.
498  *		b. Mark the queue head as unowned, which tells us the range
499  *		   that we are responsible for publishing.
500  *		c. Wait for all commands in our owned range to become valid.
501  *		d. Advance the hardware prod pointer.
502  *		e. Tell the next owner we've finished.
503  *
504  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
505  *	   owner), then we need to stick around until it has completed:
506  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
507  *		   to clear the first 4 bytes.
508  *		b. Otherwise, we spin waiting for the hardware cons pointer to
509  *		   advance past our command.
510  *
511  * The devil is in the details, particularly the use of locking for handling
512  * SYNC completion and freeing up space in the queue before we think that it is
513  * full.
514  */
515 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
516 					       u32 sprod, u32 eprod, bool set)
517 {
518 	u32 swidx, sbidx, ewidx, ebidx;
519 	struct arm_smmu_ll_queue llq = {
520 		.max_n_shift	= cmdq->q.llq.max_n_shift,
521 		.prod		= sprod,
522 	};
523 
524 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
525 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
526 
527 	while (llq.prod != eprod) {
528 		unsigned long mask;
529 		atomic_long_t *ptr;
530 		u32 limit = BITS_PER_LONG;
531 
532 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
533 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
534 
535 		ptr = &cmdq->valid_map[swidx];
536 
537 		if ((swidx == ewidx) && (sbidx < ebidx))
538 			limit = ebidx;
539 
540 		mask = GENMASK(limit - 1, sbidx);
541 
542 		/*
543 		 * The valid bit is the inverse of the wrap bit. This means
544 		 * that a zero-initialised queue is invalid and, after marking
545 		 * all entries as valid, they become invalid again when we
546 		 * wrap.
547 		 */
548 		if (set) {
549 			atomic_long_xor(mask, ptr);
550 		} else { /* Poll */
551 			unsigned long valid;
552 
553 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
554 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
555 		}
556 
557 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
558 	}
559 }
560 
561 /* Mark all entries in the range [sprod, eprod) as valid */
562 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
563 					u32 sprod, u32 eprod)
564 {
565 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
566 }
567 
568 /* Wait for all entries in the range [sprod, eprod) to become valid */
569 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
570 					 u32 sprod, u32 eprod)
571 {
572 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
573 }
574 
575 /* Wait for the command queue to become non-full */
576 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
577 					     struct arm_smmu_ll_queue *llq)
578 {
579 	unsigned long flags;
580 	struct arm_smmu_queue_poll qp;
581 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
582 	int ret = 0;
583 
584 	/*
585 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
586 	 * that fails, spin until somebody else updates it for us.
587 	 */
588 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
589 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
590 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
591 		llq->val = READ_ONCE(cmdq->q.llq.val);
592 		return 0;
593 	}
594 
595 	queue_poll_init(smmu, &qp);
596 	do {
597 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
598 		if (!queue_full(llq))
599 			break;
600 
601 		ret = queue_poll(&qp);
602 	} while (!ret);
603 
604 	return ret;
605 }
606 
607 /*
608  * Wait until the SMMU signals a CMD_SYNC completion MSI.
609  * Must be called with the cmdq lock held in some capacity.
610  */
611 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
612 					  struct arm_smmu_ll_queue *llq)
613 {
614 	int ret = 0;
615 	struct arm_smmu_queue_poll qp;
616 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
617 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
618 
619 	queue_poll_init(smmu, &qp);
620 
621 	/*
622 	 * The MSI won't generate an event, since it's being written back
623 	 * into the command queue.
624 	 */
625 	qp.wfe = false;
626 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
627 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
628 	return ret;
629 }
630 
631 /*
632  * Wait until the SMMU cons index passes llq->prod.
633  * Must be called with the cmdq lock held in some capacity.
634  */
635 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
636 					       struct arm_smmu_ll_queue *llq)
637 {
638 	struct arm_smmu_queue_poll qp;
639 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
640 	u32 prod = llq->prod;
641 	int ret = 0;
642 
643 	queue_poll_init(smmu, &qp);
644 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
645 	do {
646 		if (queue_consumed(llq, prod))
647 			break;
648 
649 		ret = queue_poll(&qp);
650 
651 		/*
652 		 * This needs to be a readl() so that our subsequent call
653 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
654 		 *
655 		 * Specifically, we need to ensure that we observe all
656 		 * shared_lock()s by other CMD_SYNCs that share our owner,
657 		 * so that a failing call to tryunlock() means that we're
658 		 * the last one out and therefore we can safely advance
659 		 * cmdq->q.llq.cons. Roughly speaking:
660 		 *
661 		 * CPU 0		CPU1			CPU2 (us)
662 		 *
663 		 * if (sync)
664 		 * 	shared_lock();
665 		 *
666 		 * dma_wmb();
667 		 * set_valid_map();
668 		 *
669 		 * 			if (owner) {
670 		 *				poll_valid_map();
671 		 *				<control dependency>
672 		 *				writel(prod_reg);
673 		 *
674 		 *						readl(cons_reg);
675 		 *						tryunlock();
676 		 *
677 		 * Requires us to see CPU 0's shared_lock() acquisition.
678 		 */
679 		llq->cons = readl(cmdq->q.cons_reg);
680 	} while (!ret);
681 
682 	return ret;
683 }
684 
685 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
686 					 struct arm_smmu_ll_queue *llq)
687 {
688 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
689 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
690 
691 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
692 }
693 
694 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
695 					u32 prod, int n)
696 {
697 	int i;
698 	struct arm_smmu_ll_queue llq = {
699 		.max_n_shift	= cmdq->q.llq.max_n_shift,
700 		.prod		= prod,
701 	};
702 
703 	for (i = 0; i < n; ++i) {
704 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
705 
706 		prod = queue_inc_prod_n(&llq, i);
707 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
708 	}
709 }
710 
711 /*
712  * This is the actual insertion function, and provides the following
713  * ordering guarantees to callers:
714  *
715  * - There is a dma_wmb() before publishing any commands to the queue.
716  *   This can be relied upon to order prior writes to data structures
717  *   in memory (such as a CD or an STE) before the command.
718  *
719  * - On completion of a CMD_SYNC, there is a control dependency.
720  *   This can be relied upon to order subsequent writes to memory (e.g.
721  *   freeing an IOVA) after completion of the CMD_SYNC.
722  *
723  * - Command insertion is totally ordered, so if two CPUs each race to
724  *   insert their own list of commands then all of the commands from one
725  *   CPU will appear before any of the commands from the other CPU.
726  */
727 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
728 				       u64 *cmds, int n, bool sync)
729 {
730 	u64 cmd_sync[CMDQ_ENT_DWORDS];
731 	u32 prod;
732 	unsigned long flags;
733 	bool owner;
734 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
735 	struct arm_smmu_ll_queue llq = {
736 		.max_n_shift = cmdq->q.llq.max_n_shift,
737 	}, head = llq;
738 	int ret = 0;
739 
740 	/* 1. Allocate some space in the queue */
741 	local_irq_save(flags);
742 	llq.val = READ_ONCE(cmdq->q.llq.val);
743 	do {
744 		u64 old;
745 
746 		while (!queue_has_space(&llq, n + sync)) {
747 			local_irq_restore(flags);
748 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
749 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
750 			local_irq_save(flags);
751 		}
752 
753 		head.cons = llq.cons;
754 		head.prod = queue_inc_prod_n(&llq, n + sync) |
755 					     CMDQ_PROD_OWNED_FLAG;
756 
757 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
758 		if (old == llq.val)
759 			break;
760 
761 		llq.val = old;
762 	} while (1);
763 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
764 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
765 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
766 
767 	/*
768 	 * 2. Write our commands into the queue
769 	 * Dependency ordering from the cmpxchg() loop above.
770 	 */
771 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
772 	if (sync) {
773 		prod = queue_inc_prod_n(&llq, n);
774 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
775 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
776 
777 		/*
778 		 * In order to determine completion of our CMD_SYNC, we must
779 		 * ensure that the queue can't wrap twice without us noticing.
780 		 * We achieve that by taking the cmdq lock as shared before
781 		 * marking our slot as valid.
782 		 */
783 		arm_smmu_cmdq_shared_lock(cmdq);
784 	}
785 
786 	/* 3. Mark our slots as valid, ensuring commands are visible first */
787 	dma_wmb();
788 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
789 
790 	/* 4. If we are the owner, take control of the SMMU hardware */
791 	if (owner) {
792 		/* a. Wait for previous owner to finish */
793 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
794 
795 		/* b. Stop gathering work by clearing the owned flag */
796 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
797 						   &cmdq->q.llq.atomic.prod);
798 		prod &= ~CMDQ_PROD_OWNED_FLAG;
799 
800 		/*
801 		 * c. Wait for any gathered work to be written to the queue.
802 		 * Note that we read our own entries so that we have the control
803 		 * dependency required by (d).
804 		 */
805 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
806 
807 		/*
808 		 * d. Advance the hardware prod pointer
809 		 * Control dependency ordering from the entries becoming valid.
810 		 */
811 		writel_relaxed(prod, cmdq->q.prod_reg);
812 
813 		/*
814 		 * e. Tell the next owner we're done
815 		 * Make sure we've updated the hardware first, so that we don't
816 		 * race to update prod and potentially move it backwards.
817 		 */
818 		atomic_set_release(&cmdq->owner_prod, prod);
819 	}
820 
821 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
822 	if (sync) {
823 		llq.prod = queue_inc_prod_n(&llq, n);
824 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
825 		if (ret) {
826 			dev_err_ratelimited(smmu->dev,
827 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
828 					    llq.prod,
829 					    readl_relaxed(cmdq->q.prod_reg),
830 					    readl_relaxed(cmdq->q.cons_reg));
831 		}
832 
833 		/*
834 		 * Try to unlock the cmdq lock. This will fail if we're the last
835 		 * reader, in which case we can safely update cmdq->q.llq.cons
836 		 */
837 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
838 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
839 			arm_smmu_cmdq_shared_unlock(cmdq);
840 		}
841 	}
842 
843 	local_irq_restore(flags);
844 	return ret;
845 }
846 
847 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
848 				   struct arm_smmu_cmdq_ent *ent)
849 {
850 	u64 cmd[CMDQ_ENT_DWORDS];
851 
852 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
853 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
854 			 ent->opcode);
855 		return -EINVAL;
856 	}
857 
858 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
859 }
860 
861 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
862 {
863 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
864 }
865 
866 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
867 				    struct arm_smmu_cmdq_batch *cmds,
868 				    struct arm_smmu_cmdq_ent *cmd)
869 {
870 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
871 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
872 		cmds->num = 0;
873 	}
874 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
875 	cmds->num++;
876 }
877 
878 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
879 				      struct arm_smmu_cmdq_batch *cmds)
880 {
881 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
882 }
883 
884 static int arm_smmu_page_response(struct device *dev,
885 				  struct iommu_fault_event *unused,
886 				  struct iommu_page_response *resp)
887 {
888 	struct arm_smmu_cmdq_ent cmd = {0};
889 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
890 	int sid = master->streams[0].id;
891 
892 	if (master->stall_enabled) {
893 		cmd.opcode		= CMDQ_OP_RESUME;
894 		cmd.resume.sid		= sid;
895 		cmd.resume.stag		= resp->grpid;
896 		switch (resp->code) {
897 		case IOMMU_PAGE_RESP_INVALID:
898 		case IOMMU_PAGE_RESP_FAILURE:
899 			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
900 			break;
901 		case IOMMU_PAGE_RESP_SUCCESS:
902 			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
903 			break;
904 		default:
905 			return -EINVAL;
906 		}
907 	} else {
908 		return -ENODEV;
909 	}
910 
911 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
912 	/*
913 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
914 	 * RESUME consumption guarantees that the stalled transaction will be
915 	 * terminated... at some point in the future. PRI_RESP is fire and
916 	 * forget.
917 	 */
918 
919 	return 0;
920 }
921 
922 /* Context descriptor manipulation functions */
923 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
924 {
925 	struct arm_smmu_cmdq_ent cmd = {
926 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
927 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
928 		.tlbi.asid = asid,
929 	};
930 
931 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
932 	arm_smmu_cmdq_issue_sync(smmu);
933 }
934 
935 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
936 			     int ssid, bool leaf)
937 {
938 	size_t i;
939 	unsigned long flags;
940 	struct arm_smmu_master *master;
941 	struct arm_smmu_cmdq_batch cmds = {};
942 	struct arm_smmu_device *smmu = smmu_domain->smmu;
943 	struct arm_smmu_cmdq_ent cmd = {
944 		.opcode	= CMDQ_OP_CFGI_CD,
945 		.cfgi	= {
946 			.ssid	= ssid,
947 			.leaf	= leaf,
948 		},
949 	};
950 
951 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
952 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
953 		for (i = 0; i < master->num_streams; i++) {
954 			cmd.cfgi.sid = master->streams[i].id;
955 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
956 		}
957 	}
958 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
959 
960 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
961 }
962 
963 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
964 					struct arm_smmu_l1_ctx_desc *l1_desc)
965 {
966 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
967 
968 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
969 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
970 	if (!l1_desc->l2ptr) {
971 		dev_warn(smmu->dev,
972 			 "failed to allocate context descriptor table\n");
973 		return -ENOMEM;
974 	}
975 	return 0;
976 }
977 
978 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
979 				      struct arm_smmu_l1_ctx_desc *l1_desc)
980 {
981 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
982 		  CTXDESC_L1_DESC_V;
983 
984 	/* See comment in arm_smmu_write_ctx_desc() */
985 	WRITE_ONCE(*dst, cpu_to_le64(val));
986 }
987 
988 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
989 				   u32 ssid)
990 {
991 	__le64 *l1ptr;
992 	unsigned int idx;
993 	struct arm_smmu_l1_ctx_desc *l1_desc;
994 	struct arm_smmu_device *smmu = smmu_domain->smmu;
995 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
996 
997 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
998 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
999 
1000 	idx = ssid >> CTXDESC_SPLIT;
1001 	l1_desc = &cdcfg->l1_desc[idx];
1002 	if (!l1_desc->l2ptr) {
1003 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1004 			return NULL;
1005 
1006 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1007 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1008 		/* An invalid L1CD can be cached */
1009 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1010 	}
1011 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1012 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1013 }
1014 
1015 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1016 			    struct arm_smmu_ctx_desc *cd)
1017 {
1018 	/*
1019 	 * This function handles the following cases:
1020 	 *
1021 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1022 	 * (2) Install a secondary CD, for SID+SSID traffic.
1023 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1024 	 *     CD, then invalidate the old entry and mappings.
1025 	 * (4) Quiesce the context without clearing the valid bit. Disable
1026 	 *     translation, and ignore any translation fault.
1027 	 * (5) Remove a secondary CD.
1028 	 */
1029 	u64 val;
1030 	bool cd_live;
1031 	__le64 *cdptr;
1032 
1033 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1034 		return -E2BIG;
1035 
1036 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1037 	if (!cdptr)
1038 		return -ENOMEM;
1039 
1040 	val = le64_to_cpu(cdptr[0]);
1041 	cd_live = !!(val & CTXDESC_CD_0_V);
1042 
1043 	if (!cd) { /* (5) */
1044 		val = 0;
1045 	} else if (cd == &quiet_cd) { /* (4) */
1046 		val |= CTXDESC_CD_0_TCR_EPD0;
1047 	} else if (cd_live) { /* (3) */
1048 		val &= ~CTXDESC_CD_0_ASID;
1049 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1050 		/*
1051 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1052 		 * this substream's traffic
1053 		 */
1054 	} else { /* (1) and (2) */
1055 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1056 		cdptr[2] = 0;
1057 		cdptr[3] = cpu_to_le64(cd->mair);
1058 
1059 		/*
1060 		 * STE is live, and the SMMU might read dwords of this CD in any
1061 		 * order. Ensure that it observes valid values before reading
1062 		 * V=1.
1063 		 */
1064 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1065 
1066 		val = cd->tcr |
1067 #ifdef __BIG_ENDIAN
1068 			CTXDESC_CD_0_ENDI |
1069 #endif
1070 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1071 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1072 			CTXDESC_CD_0_AA64 |
1073 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1074 			CTXDESC_CD_0_V;
1075 
1076 		if (smmu_domain->stall_enabled)
1077 			val |= CTXDESC_CD_0_S;
1078 	}
1079 
1080 	/*
1081 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1082 	 * "Configuration structures and configuration invalidation completion"
1083 	 *
1084 	 *   The size of single-copy atomic reads made by the SMMU is
1085 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1086 	 *   field within an aligned 64-bit span of a structure can be altered
1087 	 *   without first making the structure invalid.
1088 	 */
1089 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1090 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1091 	return 0;
1092 }
1093 
1094 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1095 {
1096 	int ret;
1097 	size_t l1size;
1098 	size_t max_contexts;
1099 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1100 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1101 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1102 
1103 	max_contexts = 1 << cfg->s1cdmax;
1104 
1105 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1106 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1107 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1108 		cdcfg->num_l1_ents = max_contexts;
1109 
1110 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1111 	} else {
1112 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1113 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1114 						  CTXDESC_L2_ENTRIES);
1115 
1116 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1117 					      sizeof(*cdcfg->l1_desc),
1118 					      GFP_KERNEL);
1119 		if (!cdcfg->l1_desc)
1120 			return -ENOMEM;
1121 
1122 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1123 	}
1124 
1125 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1126 					   GFP_KERNEL);
1127 	if (!cdcfg->cdtab) {
1128 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1129 		ret = -ENOMEM;
1130 		goto err_free_l1;
1131 	}
1132 
1133 	return 0;
1134 
1135 err_free_l1:
1136 	if (cdcfg->l1_desc) {
1137 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1138 		cdcfg->l1_desc = NULL;
1139 	}
1140 	return ret;
1141 }
1142 
1143 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1144 {
1145 	int i;
1146 	size_t size, l1size;
1147 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1148 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1149 
1150 	if (cdcfg->l1_desc) {
1151 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1152 
1153 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1154 			if (!cdcfg->l1_desc[i].l2ptr)
1155 				continue;
1156 
1157 			dmam_free_coherent(smmu->dev, size,
1158 					   cdcfg->l1_desc[i].l2ptr,
1159 					   cdcfg->l1_desc[i].l2ptr_dma);
1160 		}
1161 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1162 		cdcfg->l1_desc = NULL;
1163 
1164 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1165 	} else {
1166 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1167 	}
1168 
1169 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1170 	cdcfg->cdtab_dma = 0;
1171 	cdcfg->cdtab = NULL;
1172 }
1173 
1174 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1175 {
1176 	bool free;
1177 	struct arm_smmu_ctx_desc *old_cd;
1178 
1179 	if (!cd->asid)
1180 		return false;
1181 
1182 	free = refcount_dec_and_test(&cd->refs);
1183 	if (free) {
1184 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1185 		WARN_ON(old_cd != cd);
1186 	}
1187 	return free;
1188 }
1189 
1190 /* Stream table manipulation functions */
1191 static void
1192 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1193 {
1194 	u64 val = 0;
1195 
1196 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1197 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1198 
1199 	/* See comment in arm_smmu_write_ctx_desc() */
1200 	WRITE_ONCE(*dst, cpu_to_le64(val));
1201 }
1202 
1203 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1204 {
1205 	struct arm_smmu_cmdq_ent cmd = {
1206 		.opcode	= CMDQ_OP_CFGI_STE,
1207 		.cfgi	= {
1208 			.sid	= sid,
1209 			.leaf	= true,
1210 		},
1211 	};
1212 
1213 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1214 	arm_smmu_cmdq_issue_sync(smmu);
1215 }
1216 
1217 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1218 				      __le64 *dst)
1219 {
1220 	/*
1221 	 * This is hideously complicated, but we only really care about
1222 	 * three cases at the moment:
1223 	 *
1224 	 * 1. Invalid (all zero) -> bypass/fault (init)
1225 	 * 2. Bypass/fault -> translation/bypass (attach)
1226 	 * 3. Translation/bypass -> bypass/fault (detach)
1227 	 *
1228 	 * Given that we can't update the STE atomically and the SMMU
1229 	 * doesn't read the thing in a defined order, that leaves us
1230 	 * with the following maintenance requirements:
1231 	 *
1232 	 * 1. Update Config, return (init time STEs aren't live)
1233 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1234 	 * 3. Update Config, sync
1235 	 */
1236 	u64 val = le64_to_cpu(dst[0]);
1237 	bool ste_live = false;
1238 	struct arm_smmu_device *smmu = NULL;
1239 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1240 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1241 	struct arm_smmu_domain *smmu_domain = NULL;
1242 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1243 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1244 		.prefetch	= {
1245 			.sid	= sid,
1246 		},
1247 	};
1248 
1249 	if (master) {
1250 		smmu_domain = master->domain;
1251 		smmu = master->smmu;
1252 	}
1253 
1254 	if (smmu_domain) {
1255 		switch (smmu_domain->stage) {
1256 		case ARM_SMMU_DOMAIN_S1:
1257 			s1_cfg = &smmu_domain->s1_cfg;
1258 			break;
1259 		case ARM_SMMU_DOMAIN_S2:
1260 		case ARM_SMMU_DOMAIN_NESTED:
1261 			s2_cfg = &smmu_domain->s2_cfg;
1262 			break;
1263 		default:
1264 			break;
1265 		}
1266 	}
1267 
1268 	if (val & STRTAB_STE_0_V) {
1269 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1270 		case STRTAB_STE_0_CFG_BYPASS:
1271 			break;
1272 		case STRTAB_STE_0_CFG_S1_TRANS:
1273 		case STRTAB_STE_0_CFG_S2_TRANS:
1274 			ste_live = true;
1275 			break;
1276 		case STRTAB_STE_0_CFG_ABORT:
1277 			BUG_ON(!disable_bypass);
1278 			break;
1279 		default:
1280 			BUG(); /* STE corruption */
1281 		}
1282 	}
1283 
1284 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1285 	val = STRTAB_STE_0_V;
1286 
1287 	/* Bypass/fault */
1288 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1289 		if (!smmu_domain && disable_bypass)
1290 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1291 		else
1292 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1293 
1294 		dst[0] = cpu_to_le64(val);
1295 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1296 						STRTAB_STE_1_SHCFG_INCOMING));
1297 		dst[2] = 0; /* Nuke the VMID */
1298 		/*
1299 		 * The SMMU can perform negative caching, so we must sync
1300 		 * the STE regardless of whether the old value was live.
1301 		 */
1302 		if (smmu)
1303 			arm_smmu_sync_ste_for_sid(smmu, sid);
1304 		return;
1305 	}
1306 
1307 	if (s1_cfg) {
1308 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1309 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1310 
1311 		BUG_ON(ste_live);
1312 		dst[1] = cpu_to_le64(
1313 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1314 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1315 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1316 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1317 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1318 
1319 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1320 		    !master->stall_enabled)
1321 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1322 
1323 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1324 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1325 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1326 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1327 	}
1328 
1329 	if (s2_cfg) {
1330 		BUG_ON(ste_live);
1331 		dst[2] = cpu_to_le64(
1332 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1333 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1334 #ifdef __BIG_ENDIAN
1335 			 STRTAB_STE_2_S2ENDI |
1336 #endif
1337 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1338 			 STRTAB_STE_2_S2R);
1339 
1340 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1341 
1342 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1343 	}
1344 
1345 	if (master->ats_enabled)
1346 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1347 						 STRTAB_STE_1_EATS_TRANS));
1348 
1349 	arm_smmu_sync_ste_for_sid(smmu, sid);
1350 	/* See comment in arm_smmu_write_ctx_desc() */
1351 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1352 	arm_smmu_sync_ste_for_sid(smmu, sid);
1353 
1354 	/* It's likely that we'll want to use the new STE soon */
1355 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1356 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1357 }
1358 
1359 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1360 {
1361 	unsigned int i;
1362 
1363 	for (i = 0; i < nent; ++i) {
1364 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1365 		strtab += STRTAB_STE_DWORDS;
1366 	}
1367 }
1368 
1369 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1370 {
1371 	size_t size;
1372 	void *strtab;
1373 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1374 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1375 
1376 	if (desc->l2ptr)
1377 		return 0;
1378 
1379 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1380 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1381 
1382 	desc->span = STRTAB_SPLIT + 1;
1383 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1384 					  GFP_KERNEL);
1385 	if (!desc->l2ptr) {
1386 		dev_err(smmu->dev,
1387 			"failed to allocate l2 stream table for SID %u\n",
1388 			sid);
1389 		return -ENOMEM;
1390 	}
1391 
1392 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1393 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1394 	return 0;
1395 }
1396 
1397 static struct arm_smmu_master *
1398 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1399 {
1400 	struct rb_node *node;
1401 	struct arm_smmu_stream *stream;
1402 
1403 	lockdep_assert_held(&smmu->streams_mutex);
1404 
1405 	node = smmu->streams.rb_node;
1406 	while (node) {
1407 		stream = rb_entry(node, struct arm_smmu_stream, node);
1408 		if (stream->id < sid)
1409 			node = node->rb_right;
1410 		else if (stream->id > sid)
1411 			node = node->rb_left;
1412 		else
1413 			return stream->master;
1414 	}
1415 
1416 	return NULL;
1417 }
1418 
1419 /* IRQ and event handlers */
1420 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1421 {
1422 	int ret;
1423 	u32 reason;
1424 	u32 perm = 0;
1425 	struct arm_smmu_master *master;
1426 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1427 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1428 	struct iommu_fault_event fault_evt = { };
1429 	struct iommu_fault *flt = &fault_evt.fault;
1430 
1431 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1432 	case EVT_ID_TRANSLATION_FAULT:
1433 		reason = IOMMU_FAULT_REASON_PTE_FETCH;
1434 		break;
1435 	case EVT_ID_ADDR_SIZE_FAULT:
1436 		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1437 		break;
1438 	case EVT_ID_ACCESS_FAULT:
1439 		reason = IOMMU_FAULT_REASON_ACCESS;
1440 		break;
1441 	case EVT_ID_PERMISSION_FAULT:
1442 		reason = IOMMU_FAULT_REASON_PERMISSION;
1443 		break;
1444 	default:
1445 		return -EOPNOTSUPP;
1446 	}
1447 
1448 	/* Stage-2 is always pinned at the moment */
1449 	if (evt[1] & EVTQ_1_S2)
1450 		return -EFAULT;
1451 
1452 	if (evt[1] & EVTQ_1_RnW)
1453 		perm |= IOMMU_FAULT_PERM_READ;
1454 	else
1455 		perm |= IOMMU_FAULT_PERM_WRITE;
1456 
1457 	if (evt[1] & EVTQ_1_InD)
1458 		perm |= IOMMU_FAULT_PERM_EXEC;
1459 
1460 	if (evt[1] & EVTQ_1_PnU)
1461 		perm |= IOMMU_FAULT_PERM_PRIV;
1462 
1463 	if (evt[1] & EVTQ_1_STALL) {
1464 		flt->type = IOMMU_FAULT_PAGE_REQ;
1465 		flt->prm = (struct iommu_fault_page_request) {
1466 			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1467 			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1468 			.perm = perm,
1469 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1470 		};
1471 
1472 		if (ssid_valid) {
1473 			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1474 			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1475 		}
1476 	} else {
1477 		flt->type = IOMMU_FAULT_DMA_UNRECOV;
1478 		flt->event = (struct iommu_fault_unrecoverable) {
1479 			.reason = reason,
1480 			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1481 			.perm = perm,
1482 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1483 		};
1484 
1485 		if (ssid_valid) {
1486 			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1487 			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1488 		}
1489 	}
1490 
1491 	mutex_lock(&smmu->streams_mutex);
1492 	master = arm_smmu_find_master(smmu, sid);
1493 	if (!master) {
1494 		ret = -EINVAL;
1495 		goto out_unlock;
1496 	}
1497 
1498 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1499 	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1500 		/* Nobody cared, abort the access */
1501 		struct iommu_page_response resp = {
1502 			.pasid		= flt->prm.pasid,
1503 			.grpid		= flt->prm.grpid,
1504 			.code		= IOMMU_PAGE_RESP_FAILURE,
1505 		};
1506 		arm_smmu_page_response(master->dev, &fault_evt, &resp);
1507 	}
1508 
1509 out_unlock:
1510 	mutex_unlock(&smmu->streams_mutex);
1511 	return ret;
1512 }
1513 
1514 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1515 {
1516 	int i, ret;
1517 	struct arm_smmu_device *smmu = dev;
1518 	struct arm_smmu_queue *q = &smmu->evtq.q;
1519 	struct arm_smmu_ll_queue *llq = &q->llq;
1520 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1521 				      DEFAULT_RATELIMIT_BURST);
1522 	u64 evt[EVTQ_ENT_DWORDS];
1523 
1524 	do {
1525 		while (!queue_remove_raw(q, evt)) {
1526 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1527 
1528 			ret = arm_smmu_handle_evt(smmu, evt);
1529 			if (!ret || !__ratelimit(&rs))
1530 				continue;
1531 
1532 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1533 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1534 				dev_info(smmu->dev, "\t0x%016llx\n",
1535 					 (unsigned long long)evt[i]);
1536 
1537 		}
1538 
1539 		/*
1540 		 * Not much we can do on overflow, so scream and pretend we're
1541 		 * trying harder.
1542 		 */
1543 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1544 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1545 	} while (!queue_empty(llq));
1546 
1547 	/* Sync our overflow flag, as we believe we're up to speed */
1548 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1549 		    Q_IDX(llq, llq->cons);
1550 	return IRQ_HANDLED;
1551 }
1552 
1553 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1554 {
1555 	u32 sid, ssid;
1556 	u16 grpid;
1557 	bool ssv, last;
1558 
1559 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1560 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1561 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1562 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1563 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1564 
1565 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1566 	dev_info(smmu->dev,
1567 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1568 		 sid, ssid, grpid, last ? "L" : "",
1569 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1570 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1571 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1572 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1573 		 evt[1] & PRIQ_1_ADDR_MASK);
1574 
1575 	if (last) {
1576 		struct arm_smmu_cmdq_ent cmd = {
1577 			.opcode			= CMDQ_OP_PRI_RESP,
1578 			.substream_valid	= ssv,
1579 			.pri			= {
1580 				.sid	= sid,
1581 				.ssid	= ssid,
1582 				.grpid	= grpid,
1583 				.resp	= PRI_RESP_DENY,
1584 			},
1585 		};
1586 
1587 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1588 	}
1589 }
1590 
1591 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1592 {
1593 	struct arm_smmu_device *smmu = dev;
1594 	struct arm_smmu_queue *q = &smmu->priq.q;
1595 	struct arm_smmu_ll_queue *llq = &q->llq;
1596 	u64 evt[PRIQ_ENT_DWORDS];
1597 
1598 	do {
1599 		while (!queue_remove_raw(q, evt))
1600 			arm_smmu_handle_ppr(smmu, evt);
1601 
1602 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1603 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1604 	} while (!queue_empty(llq));
1605 
1606 	/* Sync our overflow flag, as we believe we're up to speed */
1607 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1608 		      Q_IDX(llq, llq->cons);
1609 	queue_sync_cons_out(q);
1610 	return IRQ_HANDLED;
1611 }
1612 
1613 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1614 
1615 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1616 {
1617 	u32 gerror, gerrorn, active;
1618 	struct arm_smmu_device *smmu = dev;
1619 
1620 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1621 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1622 
1623 	active = gerror ^ gerrorn;
1624 	if (!(active & GERROR_ERR_MASK))
1625 		return IRQ_NONE; /* No errors pending */
1626 
1627 	dev_warn(smmu->dev,
1628 		 "unexpected global error reported (0x%08x), this could be serious\n",
1629 		 active);
1630 
1631 	if (active & GERROR_SFM_ERR) {
1632 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1633 		arm_smmu_device_disable(smmu);
1634 	}
1635 
1636 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1637 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1638 
1639 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1640 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1641 
1642 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1643 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1644 
1645 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1646 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1647 
1648 	if (active & GERROR_PRIQ_ABT_ERR)
1649 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1650 
1651 	if (active & GERROR_EVTQ_ABT_ERR)
1652 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1653 
1654 	if (active & GERROR_CMDQ_ERR)
1655 		arm_smmu_cmdq_skip_err(smmu);
1656 
1657 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1658 	return IRQ_HANDLED;
1659 }
1660 
1661 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1662 {
1663 	struct arm_smmu_device *smmu = dev;
1664 
1665 	arm_smmu_evtq_thread(irq, dev);
1666 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1667 		arm_smmu_priq_thread(irq, dev);
1668 
1669 	return IRQ_HANDLED;
1670 }
1671 
1672 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1673 {
1674 	arm_smmu_gerror_handler(irq, dev);
1675 	return IRQ_WAKE_THREAD;
1676 }
1677 
1678 static void
1679 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1680 			struct arm_smmu_cmdq_ent *cmd)
1681 {
1682 	size_t log2_span;
1683 	size_t span_mask;
1684 	/* ATC invalidates are always on 4096-bytes pages */
1685 	size_t inval_grain_shift = 12;
1686 	unsigned long page_start, page_end;
1687 
1688 	/*
1689 	 * ATS and PASID:
1690 	 *
1691 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1692 	 * prefix. In that case all ATC entries within the address range are
1693 	 * invalidated, including those that were requested with a PASID! There
1694 	 * is no way to invalidate only entries without PASID.
1695 	 *
1696 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1697 	 * traffic), translation requests without PASID create ATC entries
1698 	 * without PASID, which must be invalidated with substream_valid clear.
1699 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1700 	 * ATC entries within the address range.
1701 	 */
1702 	*cmd = (struct arm_smmu_cmdq_ent) {
1703 		.opcode			= CMDQ_OP_ATC_INV,
1704 		.substream_valid	= !!ssid,
1705 		.atc.ssid		= ssid,
1706 	};
1707 
1708 	if (!size) {
1709 		cmd->atc.size = ATC_INV_SIZE_ALL;
1710 		return;
1711 	}
1712 
1713 	page_start	= iova >> inval_grain_shift;
1714 	page_end	= (iova + size - 1) >> inval_grain_shift;
1715 
1716 	/*
1717 	 * In an ATS Invalidate Request, the address must be aligned on the
1718 	 * range size, which must be a power of two number of page sizes. We
1719 	 * thus have to choose between grossly over-invalidating the region, or
1720 	 * splitting the invalidation into multiple commands. For simplicity
1721 	 * we'll go with the first solution, but should refine it in the future
1722 	 * if multiple commands are shown to be more efficient.
1723 	 *
1724 	 * Find the smallest power of two that covers the range. The most
1725 	 * significant differing bit between the start and end addresses,
1726 	 * fls(start ^ end), indicates the required span. For example:
1727 	 *
1728 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1729 	 *		x = 0b1000 ^ 0b1011 = 0b11
1730 	 *		span = 1 << fls(x) = 4
1731 	 *
1732 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1733 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1734 	 *		span = 1 << fls(x) = 16
1735 	 */
1736 	log2_span	= fls_long(page_start ^ page_end);
1737 	span_mask	= (1ULL << log2_span) - 1;
1738 
1739 	page_start	&= ~span_mask;
1740 
1741 	cmd->atc.addr	= page_start << inval_grain_shift;
1742 	cmd->atc.size	= log2_span;
1743 }
1744 
1745 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1746 {
1747 	int i;
1748 	struct arm_smmu_cmdq_ent cmd;
1749 
1750 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1751 
1752 	for (i = 0; i < master->num_streams; i++) {
1753 		cmd.atc.sid = master->streams[i].id;
1754 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1755 	}
1756 
1757 	return arm_smmu_cmdq_issue_sync(master->smmu);
1758 }
1759 
1760 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1761 			    unsigned long iova, size_t size)
1762 {
1763 	int i;
1764 	unsigned long flags;
1765 	struct arm_smmu_cmdq_ent cmd;
1766 	struct arm_smmu_master *master;
1767 	struct arm_smmu_cmdq_batch cmds = {};
1768 
1769 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1770 		return 0;
1771 
1772 	/*
1773 	 * Ensure that we've completed prior invalidation of the main TLBs
1774 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1775 	 * arm_smmu_enable_ats():
1776 	 *
1777 	 *	// unmap()			// arm_smmu_enable_ats()
1778 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1779 	 *	smp_mb();			[...]
1780 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1781 	 *
1782 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1783 	 * ATS was enabled at the PCI device before completion of the TLBI.
1784 	 */
1785 	smp_mb();
1786 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1787 		return 0;
1788 
1789 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1790 
1791 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1792 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1793 		if (!master->ats_enabled)
1794 			continue;
1795 
1796 		for (i = 0; i < master->num_streams; i++) {
1797 			cmd.atc.sid = master->streams[i].id;
1798 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1799 		}
1800 	}
1801 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1802 
1803 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1804 }
1805 
1806 /* IO_PGTABLE API */
1807 static void arm_smmu_tlb_inv_context(void *cookie)
1808 {
1809 	struct arm_smmu_domain *smmu_domain = cookie;
1810 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1811 	struct arm_smmu_cmdq_ent cmd;
1812 
1813 	/*
1814 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1815 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1816 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1817 	 * insertion to guarantee those are observed before the TLBI. Do be
1818 	 * careful, 007.
1819 	 */
1820 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1821 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1822 	} else {
1823 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1824 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1825 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1826 		arm_smmu_cmdq_issue_sync(smmu);
1827 	}
1828 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1829 }
1830 
1831 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1832 				     unsigned long iova, size_t size,
1833 				     size_t granule,
1834 				     struct arm_smmu_domain *smmu_domain)
1835 {
1836 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1837 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1838 	size_t inv_range = granule;
1839 	struct arm_smmu_cmdq_batch cmds = {};
1840 
1841 	if (!size)
1842 		return;
1843 
1844 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1845 		/* Get the leaf page size */
1846 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1847 
1848 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1849 		cmd->tlbi.tg = (tg - 10) / 2;
1850 
1851 		/* Determine what level the granule is at */
1852 		cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1853 
1854 		num_pages = size >> tg;
1855 	}
1856 
1857 	while (iova < end) {
1858 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1859 			/*
1860 			 * On each iteration of the loop, the range is 5 bits
1861 			 * worth of the aligned size remaining.
1862 			 * The range in pages is:
1863 			 *
1864 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1865 			 */
1866 			unsigned long scale, num;
1867 
1868 			/* Determine the power of 2 multiple number of pages */
1869 			scale = __ffs(num_pages);
1870 			cmd->tlbi.scale = scale;
1871 
1872 			/* Determine how many chunks of 2^scale size we have */
1873 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1874 			cmd->tlbi.num = num - 1;
1875 
1876 			/* range is num * 2^scale * pgsize */
1877 			inv_range = num << (scale + tg);
1878 
1879 			/* Clear out the lower order bits for the next iteration */
1880 			num_pages -= num << scale;
1881 		}
1882 
1883 		cmd->tlbi.addr = iova;
1884 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1885 		iova += inv_range;
1886 	}
1887 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1888 }
1889 
1890 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1891 					  size_t granule, bool leaf,
1892 					  struct arm_smmu_domain *smmu_domain)
1893 {
1894 	struct arm_smmu_cmdq_ent cmd = {
1895 		.tlbi = {
1896 			.leaf	= leaf,
1897 		},
1898 	};
1899 
1900 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1901 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1902 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1903 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1904 	} else {
1905 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1906 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1907 	}
1908 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1909 
1910 	/*
1911 	 * Unfortunately, this can't be leaf-only since we may have
1912 	 * zapped an entire table.
1913 	 */
1914 	arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1915 }
1916 
1917 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1918 				 size_t granule, bool leaf,
1919 				 struct arm_smmu_domain *smmu_domain)
1920 {
1921 	struct arm_smmu_cmdq_ent cmd = {
1922 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1923 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1924 		.tlbi = {
1925 			.asid	= asid,
1926 			.leaf	= leaf,
1927 		},
1928 	};
1929 
1930 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1931 }
1932 
1933 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1934 					 unsigned long iova, size_t granule,
1935 					 void *cookie)
1936 {
1937 	struct arm_smmu_domain *smmu_domain = cookie;
1938 	struct iommu_domain *domain = &smmu_domain->domain;
1939 
1940 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1941 }
1942 
1943 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1944 				  size_t granule, void *cookie)
1945 {
1946 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1947 }
1948 
1949 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1950 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1951 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1952 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1953 };
1954 
1955 /* IOMMU API */
1956 static bool arm_smmu_capable(enum iommu_cap cap)
1957 {
1958 	switch (cap) {
1959 	case IOMMU_CAP_CACHE_COHERENCY:
1960 		return true;
1961 	case IOMMU_CAP_NOEXEC:
1962 		return true;
1963 	default:
1964 		return false;
1965 	}
1966 }
1967 
1968 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1969 {
1970 	struct arm_smmu_domain *smmu_domain;
1971 
1972 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1973 	    type != IOMMU_DOMAIN_DMA &&
1974 	    type != IOMMU_DOMAIN_IDENTITY)
1975 		return NULL;
1976 
1977 	/*
1978 	 * Allocate the domain and initialise some of its data structures.
1979 	 * We can't really do anything meaningful until we've added a
1980 	 * master.
1981 	 */
1982 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1983 	if (!smmu_domain)
1984 		return NULL;
1985 
1986 	if (type == IOMMU_DOMAIN_DMA &&
1987 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1988 		kfree(smmu_domain);
1989 		return NULL;
1990 	}
1991 
1992 	mutex_init(&smmu_domain->init_mutex);
1993 	INIT_LIST_HEAD(&smmu_domain->devices);
1994 	spin_lock_init(&smmu_domain->devices_lock);
1995 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
1996 
1997 	return &smmu_domain->domain;
1998 }
1999 
2000 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2001 {
2002 	int idx, size = 1 << span;
2003 
2004 	do {
2005 		idx = find_first_zero_bit(map, size);
2006 		if (idx == size)
2007 			return -ENOSPC;
2008 	} while (test_and_set_bit(idx, map));
2009 
2010 	return idx;
2011 }
2012 
2013 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2014 {
2015 	clear_bit(idx, map);
2016 }
2017 
2018 static void arm_smmu_domain_free(struct iommu_domain *domain)
2019 {
2020 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2021 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2022 
2023 	iommu_put_dma_cookie(domain);
2024 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2025 
2026 	/* Free the CD and ASID, if we allocated them */
2027 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2028 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2029 
2030 		/* Prevent SVA from touching the CD while we're freeing it */
2031 		mutex_lock(&arm_smmu_asid_lock);
2032 		if (cfg->cdcfg.cdtab)
2033 			arm_smmu_free_cd_tables(smmu_domain);
2034 		arm_smmu_free_asid(&cfg->cd);
2035 		mutex_unlock(&arm_smmu_asid_lock);
2036 	} else {
2037 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2038 		if (cfg->vmid)
2039 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2040 	}
2041 
2042 	kfree(smmu_domain);
2043 }
2044 
2045 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2046 				       struct arm_smmu_master *master,
2047 				       struct io_pgtable_cfg *pgtbl_cfg)
2048 {
2049 	int ret;
2050 	u32 asid;
2051 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2052 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2053 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2054 
2055 	refcount_set(&cfg->cd.refs, 1);
2056 
2057 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2058 	mutex_lock(&arm_smmu_asid_lock);
2059 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2060 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2061 	if (ret)
2062 		goto out_unlock;
2063 
2064 	cfg->s1cdmax = master->ssid_bits;
2065 
2066 	smmu_domain->stall_enabled = master->stall_enabled;
2067 
2068 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2069 	if (ret)
2070 		goto out_free_asid;
2071 
2072 	cfg->cd.asid	= (u16)asid;
2073 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2074 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2075 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2076 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2077 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2078 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2079 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2080 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2081 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2082 
2083 	/*
2084 	 * Note that this will end up calling arm_smmu_sync_cd() before
2085 	 * the master has been added to the devices list for this domain.
2086 	 * This isn't an issue because the STE hasn't been installed yet.
2087 	 */
2088 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2089 	if (ret)
2090 		goto out_free_cd_tables;
2091 
2092 	mutex_unlock(&arm_smmu_asid_lock);
2093 	return 0;
2094 
2095 out_free_cd_tables:
2096 	arm_smmu_free_cd_tables(smmu_domain);
2097 out_free_asid:
2098 	arm_smmu_free_asid(&cfg->cd);
2099 out_unlock:
2100 	mutex_unlock(&arm_smmu_asid_lock);
2101 	return ret;
2102 }
2103 
2104 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2105 				       struct arm_smmu_master *master,
2106 				       struct io_pgtable_cfg *pgtbl_cfg)
2107 {
2108 	int vmid;
2109 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2110 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2111 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2112 
2113 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2114 	if (vmid < 0)
2115 		return vmid;
2116 
2117 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2118 	cfg->vmid	= (u16)vmid;
2119 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2120 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2121 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2122 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2123 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2124 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2125 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2126 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2127 	return 0;
2128 }
2129 
2130 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2131 				    struct arm_smmu_master *master)
2132 {
2133 	int ret;
2134 	unsigned long ias, oas;
2135 	enum io_pgtable_fmt fmt;
2136 	struct io_pgtable_cfg pgtbl_cfg;
2137 	struct io_pgtable_ops *pgtbl_ops;
2138 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2139 				 struct arm_smmu_master *,
2140 				 struct io_pgtable_cfg *);
2141 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2142 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2143 
2144 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2145 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2146 		return 0;
2147 	}
2148 
2149 	/* Restrict the stage to what we can actually support */
2150 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2151 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2152 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2153 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2154 
2155 	switch (smmu_domain->stage) {
2156 	case ARM_SMMU_DOMAIN_S1:
2157 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2158 		ias = min_t(unsigned long, ias, VA_BITS);
2159 		oas = smmu->ias;
2160 		fmt = ARM_64_LPAE_S1;
2161 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2162 		break;
2163 	case ARM_SMMU_DOMAIN_NESTED:
2164 	case ARM_SMMU_DOMAIN_S2:
2165 		ias = smmu->ias;
2166 		oas = smmu->oas;
2167 		fmt = ARM_64_LPAE_S2;
2168 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2169 		break;
2170 	default:
2171 		return -EINVAL;
2172 	}
2173 
2174 	pgtbl_cfg = (struct io_pgtable_cfg) {
2175 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2176 		.ias		= ias,
2177 		.oas		= oas,
2178 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2179 		.tlb		= &arm_smmu_flush_ops,
2180 		.iommu_dev	= smmu->dev,
2181 	};
2182 
2183 	if (!iommu_get_dma_strict(domain))
2184 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2185 
2186 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2187 	if (!pgtbl_ops)
2188 		return -ENOMEM;
2189 
2190 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2191 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2192 	domain->geometry.force_aperture = true;
2193 
2194 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2195 	if (ret < 0) {
2196 		free_io_pgtable_ops(pgtbl_ops);
2197 		return ret;
2198 	}
2199 
2200 	smmu_domain->pgtbl_ops = pgtbl_ops;
2201 	return 0;
2202 }
2203 
2204 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2205 {
2206 	__le64 *step;
2207 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2208 
2209 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2210 		struct arm_smmu_strtab_l1_desc *l1_desc;
2211 		int idx;
2212 
2213 		/* Two-level walk */
2214 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2215 		l1_desc = &cfg->l1_desc[idx];
2216 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2217 		step = &l1_desc->l2ptr[idx];
2218 	} else {
2219 		/* Simple linear lookup */
2220 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2221 	}
2222 
2223 	return step;
2224 }
2225 
2226 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2227 {
2228 	int i, j;
2229 	struct arm_smmu_device *smmu = master->smmu;
2230 
2231 	for (i = 0; i < master->num_streams; ++i) {
2232 		u32 sid = master->streams[i].id;
2233 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2234 
2235 		/* Bridged PCI devices may end up with duplicated IDs */
2236 		for (j = 0; j < i; j++)
2237 			if (master->streams[j].id == sid)
2238 				break;
2239 		if (j < i)
2240 			continue;
2241 
2242 		arm_smmu_write_strtab_ent(master, sid, step);
2243 	}
2244 }
2245 
2246 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2247 {
2248 	struct device *dev = master->dev;
2249 	struct arm_smmu_device *smmu = master->smmu;
2250 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2251 
2252 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2253 		return false;
2254 
2255 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2256 		return false;
2257 
2258 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2259 }
2260 
2261 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2262 {
2263 	size_t stu;
2264 	struct pci_dev *pdev;
2265 	struct arm_smmu_device *smmu = master->smmu;
2266 	struct arm_smmu_domain *smmu_domain = master->domain;
2267 
2268 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2269 	if (!master->ats_enabled)
2270 		return;
2271 
2272 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2273 	stu = __ffs(smmu->pgsize_bitmap);
2274 	pdev = to_pci_dev(master->dev);
2275 
2276 	atomic_inc(&smmu_domain->nr_ats_masters);
2277 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2278 	if (pci_enable_ats(pdev, stu))
2279 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2280 }
2281 
2282 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2283 {
2284 	struct arm_smmu_domain *smmu_domain = master->domain;
2285 
2286 	if (!master->ats_enabled)
2287 		return;
2288 
2289 	pci_disable_ats(to_pci_dev(master->dev));
2290 	/*
2291 	 * Ensure ATS is disabled at the endpoint before we issue the
2292 	 * ATC invalidation via the SMMU.
2293 	 */
2294 	wmb();
2295 	arm_smmu_atc_inv_master(master);
2296 	atomic_dec(&smmu_domain->nr_ats_masters);
2297 }
2298 
2299 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2300 {
2301 	int ret;
2302 	int features;
2303 	int num_pasids;
2304 	struct pci_dev *pdev;
2305 
2306 	if (!dev_is_pci(master->dev))
2307 		return -ENODEV;
2308 
2309 	pdev = to_pci_dev(master->dev);
2310 
2311 	features = pci_pasid_features(pdev);
2312 	if (features < 0)
2313 		return features;
2314 
2315 	num_pasids = pci_max_pasids(pdev);
2316 	if (num_pasids <= 0)
2317 		return num_pasids;
2318 
2319 	ret = pci_enable_pasid(pdev, features);
2320 	if (ret) {
2321 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2322 		return ret;
2323 	}
2324 
2325 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2326 				  master->smmu->ssid_bits);
2327 	return 0;
2328 }
2329 
2330 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2331 {
2332 	struct pci_dev *pdev;
2333 
2334 	if (!dev_is_pci(master->dev))
2335 		return;
2336 
2337 	pdev = to_pci_dev(master->dev);
2338 
2339 	if (!pdev->pasid_enabled)
2340 		return;
2341 
2342 	master->ssid_bits = 0;
2343 	pci_disable_pasid(pdev);
2344 }
2345 
2346 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2347 {
2348 	unsigned long flags;
2349 	struct arm_smmu_domain *smmu_domain = master->domain;
2350 
2351 	if (!smmu_domain)
2352 		return;
2353 
2354 	arm_smmu_disable_ats(master);
2355 
2356 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2357 	list_del(&master->domain_head);
2358 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2359 
2360 	master->domain = NULL;
2361 	master->ats_enabled = false;
2362 	arm_smmu_install_ste_for_dev(master);
2363 }
2364 
2365 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2366 {
2367 	int ret = 0;
2368 	unsigned long flags;
2369 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2370 	struct arm_smmu_device *smmu;
2371 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2372 	struct arm_smmu_master *master;
2373 
2374 	if (!fwspec)
2375 		return -ENOENT;
2376 
2377 	master = dev_iommu_priv_get(dev);
2378 	smmu = master->smmu;
2379 
2380 	/*
2381 	 * Checking that SVA is disabled ensures that this device isn't bound to
2382 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2383 	 * be removed concurrently since we're holding the group mutex.
2384 	 */
2385 	if (arm_smmu_master_sva_enabled(master)) {
2386 		dev_err(dev, "cannot attach - SVA enabled\n");
2387 		return -EBUSY;
2388 	}
2389 
2390 	arm_smmu_detach_dev(master);
2391 
2392 	mutex_lock(&smmu_domain->init_mutex);
2393 
2394 	if (!smmu_domain->smmu) {
2395 		smmu_domain->smmu = smmu;
2396 		ret = arm_smmu_domain_finalise(domain, master);
2397 		if (ret) {
2398 			smmu_domain->smmu = NULL;
2399 			goto out_unlock;
2400 		}
2401 	} else if (smmu_domain->smmu != smmu) {
2402 		dev_err(dev,
2403 			"cannot attach to SMMU %s (upstream of %s)\n",
2404 			dev_name(smmu_domain->smmu->dev),
2405 			dev_name(smmu->dev));
2406 		ret = -ENXIO;
2407 		goto out_unlock;
2408 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2409 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2410 		dev_err(dev,
2411 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2412 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2413 		ret = -EINVAL;
2414 		goto out_unlock;
2415 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2416 		   smmu_domain->stall_enabled != master->stall_enabled) {
2417 		dev_err(dev, "cannot attach to stall-%s domain\n",
2418 			smmu_domain->stall_enabled ? "enabled" : "disabled");
2419 		ret = -EINVAL;
2420 		goto out_unlock;
2421 	}
2422 
2423 	master->domain = smmu_domain;
2424 
2425 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2426 		master->ats_enabled = arm_smmu_ats_supported(master);
2427 
2428 	arm_smmu_install_ste_for_dev(master);
2429 
2430 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2431 	list_add(&master->domain_head, &smmu_domain->devices);
2432 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2433 
2434 	arm_smmu_enable_ats(master);
2435 
2436 out_unlock:
2437 	mutex_unlock(&smmu_domain->init_mutex);
2438 	return ret;
2439 }
2440 
2441 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2442 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2443 {
2444 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2445 
2446 	if (!ops)
2447 		return -ENODEV;
2448 
2449 	return ops->map(ops, iova, paddr, size, prot, gfp);
2450 }
2451 
2452 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2453 			     size_t size, struct iommu_iotlb_gather *gather)
2454 {
2455 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2456 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2457 
2458 	if (!ops)
2459 		return 0;
2460 
2461 	return ops->unmap(ops, iova, size, gather);
2462 }
2463 
2464 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2465 {
2466 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2467 
2468 	if (smmu_domain->smmu)
2469 		arm_smmu_tlb_inv_context(smmu_domain);
2470 }
2471 
2472 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2473 				struct iommu_iotlb_gather *gather)
2474 {
2475 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2476 
2477 	if (!gather->pgsize)
2478 		return;
2479 
2480 	arm_smmu_tlb_inv_range_domain(gather->start,
2481 				      gather->end - gather->start + 1,
2482 				      gather->pgsize, true, smmu_domain);
2483 }
2484 
2485 static phys_addr_t
2486 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2487 {
2488 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2489 
2490 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2491 		return iova;
2492 
2493 	if (!ops)
2494 		return 0;
2495 
2496 	return ops->iova_to_phys(ops, iova);
2497 }
2498 
2499 static struct platform_driver arm_smmu_driver;
2500 
2501 static
2502 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2503 {
2504 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2505 							  fwnode);
2506 	put_device(dev);
2507 	return dev ? dev_get_drvdata(dev) : NULL;
2508 }
2509 
2510 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2511 {
2512 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2513 
2514 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2515 		limit *= 1UL << STRTAB_SPLIT;
2516 
2517 	return sid < limit;
2518 }
2519 
2520 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2521 				  struct arm_smmu_master *master)
2522 {
2523 	int i;
2524 	int ret = 0;
2525 	struct arm_smmu_stream *new_stream, *cur_stream;
2526 	struct rb_node **new_node, *parent_node = NULL;
2527 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2528 
2529 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2530 				  GFP_KERNEL);
2531 	if (!master->streams)
2532 		return -ENOMEM;
2533 	master->num_streams = fwspec->num_ids;
2534 
2535 	mutex_lock(&smmu->streams_mutex);
2536 	for (i = 0; i < fwspec->num_ids; i++) {
2537 		u32 sid = fwspec->ids[i];
2538 
2539 		new_stream = &master->streams[i];
2540 		new_stream->id = sid;
2541 		new_stream->master = master;
2542 
2543 		/*
2544 		 * Check the SIDs are in range of the SMMU and our stream table
2545 		 */
2546 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2547 			ret = -ERANGE;
2548 			break;
2549 		}
2550 
2551 		/* Ensure l2 strtab is initialised */
2552 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2553 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2554 			if (ret)
2555 				break;
2556 		}
2557 
2558 		/* Insert into SID tree */
2559 		new_node = &(smmu->streams.rb_node);
2560 		while (*new_node) {
2561 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2562 					      node);
2563 			parent_node = *new_node;
2564 			if (cur_stream->id > new_stream->id) {
2565 				new_node = &((*new_node)->rb_left);
2566 			} else if (cur_stream->id < new_stream->id) {
2567 				new_node = &((*new_node)->rb_right);
2568 			} else {
2569 				dev_warn(master->dev,
2570 					 "stream %u already in tree\n",
2571 					 cur_stream->id);
2572 				ret = -EINVAL;
2573 				break;
2574 			}
2575 		}
2576 		if (ret)
2577 			break;
2578 
2579 		rb_link_node(&new_stream->node, parent_node, new_node);
2580 		rb_insert_color(&new_stream->node, &smmu->streams);
2581 	}
2582 
2583 	if (ret) {
2584 		for (i--; i >= 0; i--)
2585 			rb_erase(&master->streams[i].node, &smmu->streams);
2586 		kfree(master->streams);
2587 	}
2588 	mutex_unlock(&smmu->streams_mutex);
2589 
2590 	return ret;
2591 }
2592 
2593 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2594 {
2595 	int i;
2596 	struct arm_smmu_device *smmu = master->smmu;
2597 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2598 
2599 	if (!smmu || !master->streams)
2600 		return;
2601 
2602 	mutex_lock(&smmu->streams_mutex);
2603 	for (i = 0; i < fwspec->num_ids; i++)
2604 		rb_erase(&master->streams[i].node, &smmu->streams);
2605 	mutex_unlock(&smmu->streams_mutex);
2606 
2607 	kfree(master->streams);
2608 }
2609 
2610 static struct iommu_ops arm_smmu_ops;
2611 
2612 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2613 {
2614 	int ret;
2615 	struct arm_smmu_device *smmu;
2616 	struct arm_smmu_master *master;
2617 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2618 
2619 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2620 		return ERR_PTR(-ENODEV);
2621 
2622 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2623 		return ERR_PTR(-EBUSY);
2624 
2625 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2626 	if (!smmu)
2627 		return ERR_PTR(-ENODEV);
2628 
2629 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2630 	if (!master)
2631 		return ERR_PTR(-ENOMEM);
2632 
2633 	master->dev = dev;
2634 	master->smmu = smmu;
2635 	INIT_LIST_HEAD(&master->bonds);
2636 	dev_iommu_priv_set(dev, master);
2637 
2638 	ret = arm_smmu_insert_master(smmu, master);
2639 	if (ret)
2640 		goto err_free_master;
2641 
2642 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2643 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2644 
2645 	/*
2646 	 * Note that PASID must be enabled before, and disabled after ATS:
2647 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2648 	 *
2649 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2650 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2651 	 *   are changed.
2652 	 */
2653 	arm_smmu_enable_pasid(master);
2654 
2655 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2656 		master->ssid_bits = min_t(u8, master->ssid_bits,
2657 					  CTXDESC_LINEAR_CDMAX);
2658 
2659 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2660 	     device_property_read_bool(dev, "dma-can-stall")) ||
2661 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2662 		master->stall_enabled = true;
2663 
2664 	return &smmu->iommu;
2665 
2666 err_free_master:
2667 	kfree(master);
2668 	dev_iommu_priv_set(dev, NULL);
2669 	return ERR_PTR(ret);
2670 }
2671 
2672 static void arm_smmu_release_device(struct device *dev)
2673 {
2674 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2675 	struct arm_smmu_master *master;
2676 
2677 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2678 		return;
2679 
2680 	master = dev_iommu_priv_get(dev);
2681 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2682 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2683 	arm_smmu_detach_dev(master);
2684 	arm_smmu_disable_pasid(master);
2685 	arm_smmu_remove_master(master);
2686 	kfree(master);
2687 	iommu_fwspec_free(dev);
2688 }
2689 
2690 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2691 {
2692 	struct iommu_group *group;
2693 
2694 	/*
2695 	 * We don't support devices sharing stream IDs other than PCI RID
2696 	 * aliases, since the necessary ID-to-device lookup becomes rather
2697 	 * impractical given a potential sparse 32-bit stream ID space.
2698 	 */
2699 	if (dev_is_pci(dev))
2700 		group = pci_device_group(dev);
2701 	else
2702 		group = generic_device_group(dev);
2703 
2704 	return group;
2705 }
2706 
2707 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2708 {
2709 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2710 	int ret = 0;
2711 
2712 	mutex_lock(&smmu_domain->init_mutex);
2713 	if (smmu_domain->smmu)
2714 		ret = -EPERM;
2715 	else
2716 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2717 	mutex_unlock(&smmu_domain->init_mutex);
2718 
2719 	return ret;
2720 }
2721 
2722 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2723 {
2724 	return iommu_fwspec_add_ids(dev, args->args, 1);
2725 }
2726 
2727 static void arm_smmu_get_resv_regions(struct device *dev,
2728 				      struct list_head *head)
2729 {
2730 	struct iommu_resv_region *region;
2731 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2732 
2733 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2734 					 prot, IOMMU_RESV_SW_MSI);
2735 	if (!region)
2736 		return;
2737 
2738 	list_add_tail(&region->list, head);
2739 
2740 	iommu_dma_get_resv_regions(dev, head);
2741 }
2742 
2743 static bool arm_smmu_dev_has_feature(struct device *dev,
2744 				     enum iommu_dev_features feat)
2745 {
2746 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2747 
2748 	if (!master)
2749 		return false;
2750 
2751 	switch (feat) {
2752 	case IOMMU_DEV_FEAT_IOPF:
2753 		return arm_smmu_master_iopf_supported(master);
2754 	case IOMMU_DEV_FEAT_SVA:
2755 		return arm_smmu_master_sva_supported(master);
2756 	default:
2757 		return false;
2758 	}
2759 }
2760 
2761 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2762 					 enum iommu_dev_features feat)
2763 {
2764 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2765 
2766 	if (!master)
2767 		return false;
2768 
2769 	switch (feat) {
2770 	case IOMMU_DEV_FEAT_IOPF:
2771 		return master->iopf_enabled;
2772 	case IOMMU_DEV_FEAT_SVA:
2773 		return arm_smmu_master_sva_enabled(master);
2774 	default:
2775 		return false;
2776 	}
2777 }
2778 
2779 static int arm_smmu_dev_enable_feature(struct device *dev,
2780 				       enum iommu_dev_features feat)
2781 {
2782 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2783 
2784 	if (!arm_smmu_dev_has_feature(dev, feat))
2785 		return -ENODEV;
2786 
2787 	if (arm_smmu_dev_feature_enabled(dev, feat))
2788 		return -EBUSY;
2789 
2790 	switch (feat) {
2791 	case IOMMU_DEV_FEAT_IOPF:
2792 		master->iopf_enabled = true;
2793 		return 0;
2794 	case IOMMU_DEV_FEAT_SVA:
2795 		return arm_smmu_master_enable_sva(master);
2796 	default:
2797 		return -EINVAL;
2798 	}
2799 }
2800 
2801 static int arm_smmu_dev_disable_feature(struct device *dev,
2802 					enum iommu_dev_features feat)
2803 {
2804 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2805 
2806 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2807 		return -EINVAL;
2808 
2809 	switch (feat) {
2810 	case IOMMU_DEV_FEAT_IOPF:
2811 		if (master->sva_enabled)
2812 			return -EBUSY;
2813 		master->iopf_enabled = false;
2814 		return 0;
2815 	case IOMMU_DEV_FEAT_SVA:
2816 		return arm_smmu_master_disable_sva(master);
2817 	default:
2818 		return -EINVAL;
2819 	}
2820 }
2821 
2822 static struct iommu_ops arm_smmu_ops = {
2823 	.capable		= arm_smmu_capable,
2824 	.domain_alloc		= arm_smmu_domain_alloc,
2825 	.domain_free		= arm_smmu_domain_free,
2826 	.attach_dev		= arm_smmu_attach_dev,
2827 	.map			= arm_smmu_map,
2828 	.unmap			= arm_smmu_unmap,
2829 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2830 	.iotlb_sync		= arm_smmu_iotlb_sync,
2831 	.iova_to_phys		= arm_smmu_iova_to_phys,
2832 	.probe_device		= arm_smmu_probe_device,
2833 	.release_device		= arm_smmu_release_device,
2834 	.device_group		= arm_smmu_device_group,
2835 	.enable_nesting		= arm_smmu_enable_nesting,
2836 	.of_xlate		= arm_smmu_of_xlate,
2837 	.get_resv_regions	= arm_smmu_get_resv_regions,
2838 	.put_resv_regions	= generic_iommu_put_resv_regions,
2839 	.dev_has_feat		= arm_smmu_dev_has_feature,
2840 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2841 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2842 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2843 	.sva_bind		= arm_smmu_sva_bind,
2844 	.sva_unbind		= arm_smmu_sva_unbind,
2845 	.sva_get_pasid		= arm_smmu_sva_get_pasid,
2846 	.page_response		= arm_smmu_page_response,
2847 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2848 	.owner			= THIS_MODULE,
2849 };
2850 
2851 /* Probing and initialisation functions */
2852 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2853 				   struct arm_smmu_queue *q,
2854 				   void __iomem *page,
2855 				   unsigned long prod_off,
2856 				   unsigned long cons_off,
2857 				   size_t dwords, const char *name)
2858 {
2859 	size_t qsz;
2860 
2861 	do {
2862 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2863 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2864 					      GFP_KERNEL);
2865 		if (q->base || qsz < PAGE_SIZE)
2866 			break;
2867 
2868 		q->llq.max_n_shift--;
2869 	} while (1);
2870 
2871 	if (!q->base) {
2872 		dev_err(smmu->dev,
2873 			"failed to allocate queue (0x%zx bytes) for %s\n",
2874 			qsz, name);
2875 		return -ENOMEM;
2876 	}
2877 
2878 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2879 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2880 			 1 << q->llq.max_n_shift, name);
2881 	}
2882 
2883 	q->prod_reg	= page + prod_off;
2884 	q->cons_reg	= page + cons_off;
2885 	q->ent_dwords	= dwords;
2886 
2887 	q->q_base  = Q_BASE_RWA;
2888 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2889 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2890 
2891 	q->llq.prod = q->llq.cons = 0;
2892 	return 0;
2893 }
2894 
2895 static void arm_smmu_cmdq_free_bitmap(void *data)
2896 {
2897 	unsigned long *bitmap = data;
2898 	bitmap_free(bitmap);
2899 }
2900 
2901 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2902 {
2903 	int ret = 0;
2904 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2905 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2906 	atomic_long_t *bitmap;
2907 
2908 	atomic_set(&cmdq->owner_prod, 0);
2909 	atomic_set(&cmdq->lock, 0);
2910 
2911 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2912 	if (!bitmap) {
2913 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2914 		ret = -ENOMEM;
2915 	} else {
2916 		cmdq->valid_map = bitmap;
2917 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2918 	}
2919 
2920 	return ret;
2921 }
2922 
2923 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2924 {
2925 	int ret;
2926 
2927 	/* cmdq */
2928 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2929 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2930 				      CMDQ_ENT_DWORDS, "cmdq");
2931 	if (ret)
2932 		return ret;
2933 
2934 	ret = arm_smmu_cmdq_init(smmu);
2935 	if (ret)
2936 		return ret;
2937 
2938 	/* evtq */
2939 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2940 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2941 				      EVTQ_ENT_DWORDS, "evtq");
2942 	if (ret)
2943 		return ret;
2944 
2945 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2946 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2947 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2948 		if (!smmu->evtq.iopf)
2949 			return -ENOMEM;
2950 	}
2951 
2952 	/* priq */
2953 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2954 		return 0;
2955 
2956 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2957 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2958 				       PRIQ_ENT_DWORDS, "priq");
2959 }
2960 
2961 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2962 {
2963 	unsigned int i;
2964 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2965 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2966 	void *strtab = smmu->strtab_cfg.strtab;
2967 
2968 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2969 	if (!cfg->l1_desc)
2970 		return -ENOMEM;
2971 
2972 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2973 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2974 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2975 	}
2976 
2977 	return 0;
2978 }
2979 
2980 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2981 {
2982 	void *strtab;
2983 	u64 reg;
2984 	u32 size, l1size;
2985 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2986 
2987 	/* Calculate the L1 size, capped to the SIDSIZE. */
2988 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2989 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2990 	cfg->num_l1_ents = 1 << size;
2991 
2992 	size += STRTAB_SPLIT;
2993 	if (size < smmu->sid_bits)
2994 		dev_warn(smmu->dev,
2995 			 "2-level strtab only covers %u/%u bits of SID\n",
2996 			 size, smmu->sid_bits);
2997 
2998 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2999 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3000 				     GFP_KERNEL);
3001 	if (!strtab) {
3002 		dev_err(smmu->dev,
3003 			"failed to allocate l1 stream table (%u bytes)\n",
3004 			l1size);
3005 		return -ENOMEM;
3006 	}
3007 	cfg->strtab = strtab;
3008 
3009 	/* Configure strtab_base_cfg for 2 levels */
3010 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3011 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3012 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3013 	cfg->strtab_base_cfg = reg;
3014 
3015 	return arm_smmu_init_l1_strtab(smmu);
3016 }
3017 
3018 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3019 {
3020 	void *strtab;
3021 	u64 reg;
3022 	u32 size;
3023 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3024 
3025 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3026 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3027 				     GFP_KERNEL);
3028 	if (!strtab) {
3029 		dev_err(smmu->dev,
3030 			"failed to allocate linear stream table (%u bytes)\n",
3031 			size);
3032 		return -ENOMEM;
3033 	}
3034 	cfg->strtab = strtab;
3035 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3036 
3037 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3038 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3039 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3040 	cfg->strtab_base_cfg = reg;
3041 
3042 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3043 	return 0;
3044 }
3045 
3046 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3047 {
3048 	u64 reg;
3049 	int ret;
3050 
3051 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3052 		ret = arm_smmu_init_strtab_2lvl(smmu);
3053 	else
3054 		ret = arm_smmu_init_strtab_linear(smmu);
3055 
3056 	if (ret)
3057 		return ret;
3058 
3059 	/* Set the strtab base address */
3060 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3061 	reg |= STRTAB_BASE_RA;
3062 	smmu->strtab_cfg.strtab_base = reg;
3063 
3064 	/* Allocate the first VMID for stage-2 bypass STEs */
3065 	set_bit(0, smmu->vmid_map);
3066 	return 0;
3067 }
3068 
3069 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3070 {
3071 	int ret;
3072 
3073 	mutex_init(&smmu->streams_mutex);
3074 	smmu->streams = RB_ROOT;
3075 
3076 	ret = arm_smmu_init_queues(smmu);
3077 	if (ret)
3078 		return ret;
3079 
3080 	return arm_smmu_init_strtab(smmu);
3081 }
3082 
3083 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3084 				   unsigned int reg_off, unsigned int ack_off)
3085 {
3086 	u32 reg;
3087 
3088 	writel_relaxed(val, smmu->base + reg_off);
3089 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3090 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3091 }
3092 
3093 /* GBPA is "special" */
3094 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3095 {
3096 	int ret;
3097 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3098 
3099 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3100 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3101 	if (ret)
3102 		return ret;
3103 
3104 	reg &= ~clr;
3105 	reg |= set;
3106 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3107 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3108 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3109 
3110 	if (ret)
3111 		dev_err(smmu->dev, "GBPA not responding to update\n");
3112 	return ret;
3113 }
3114 
3115 static void arm_smmu_free_msis(void *data)
3116 {
3117 	struct device *dev = data;
3118 	platform_msi_domain_free_irqs(dev);
3119 }
3120 
3121 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3122 {
3123 	phys_addr_t doorbell;
3124 	struct device *dev = msi_desc_to_dev(desc);
3125 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3126 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3127 
3128 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3129 	doorbell &= MSI_CFG0_ADDR_MASK;
3130 
3131 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3132 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3133 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3134 }
3135 
3136 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3137 {
3138 	struct msi_desc *desc;
3139 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3140 	struct device *dev = smmu->dev;
3141 
3142 	/* Clear the MSI address regs */
3143 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3144 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3145 
3146 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3147 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3148 	else
3149 		nvec--;
3150 
3151 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3152 		return;
3153 
3154 	if (!dev->msi_domain) {
3155 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3156 		return;
3157 	}
3158 
3159 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3160 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3161 	if (ret) {
3162 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3163 		return;
3164 	}
3165 
3166 	for_each_msi_entry(desc, dev) {
3167 		switch (desc->platform.msi_index) {
3168 		case EVTQ_MSI_INDEX:
3169 			smmu->evtq.q.irq = desc->irq;
3170 			break;
3171 		case GERROR_MSI_INDEX:
3172 			smmu->gerr_irq = desc->irq;
3173 			break;
3174 		case PRIQ_MSI_INDEX:
3175 			smmu->priq.q.irq = desc->irq;
3176 			break;
3177 		default:	/* Unknown */
3178 			continue;
3179 		}
3180 	}
3181 
3182 	/* Add callback to free MSIs on teardown */
3183 	devm_add_action(dev, arm_smmu_free_msis, dev);
3184 }
3185 
3186 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3187 {
3188 	int irq, ret;
3189 
3190 	arm_smmu_setup_msis(smmu);
3191 
3192 	/* Request interrupt lines */
3193 	irq = smmu->evtq.q.irq;
3194 	if (irq) {
3195 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3196 						arm_smmu_evtq_thread,
3197 						IRQF_ONESHOT,
3198 						"arm-smmu-v3-evtq", smmu);
3199 		if (ret < 0)
3200 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3201 	} else {
3202 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3203 	}
3204 
3205 	irq = smmu->gerr_irq;
3206 	if (irq) {
3207 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3208 				       0, "arm-smmu-v3-gerror", smmu);
3209 		if (ret < 0)
3210 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3211 	} else {
3212 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3213 	}
3214 
3215 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3216 		irq = smmu->priq.q.irq;
3217 		if (irq) {
3218 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3219 							arm_smmu_priq_thread,
3220 							IRQF_ONESHOT,
3221 							"arm-smmu-v3-priq",
3222 							smmu);
3223 			if (ret < 0)
3224 				dev_warn(smmu->dev,
3225 					 "failed to enable priq irq\n");
3226 		} else {
3227 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3228 		}
3229 	}
3230 }
3231 
3232 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3233 {
3234 	int ret, irq;
3235 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3236 
3237 	/* Disable IRQs first */
3238 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3239 				      ARM_SMMU_IRQ_CTRLACK);
3240 	if (ret) {
3241 		dev_err(smmu->dev, "failed to disable irqs\n");
3242 		return ret;
3243 	}
3244 
3245 	irq = smmu->combined_irq;
3246 	if (irq) {
3247 		/*
3248 		 * Cavium ThunderX2 implementation doesn't support unique irq
3249 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3250 		 */
3251 		ret = devm_request_threaded_irq(smmu->dev, irq,
3252 					arm_smmu_combined_irq_handler,
3253 					arm_smmu_combined_irq_thread,
3254 					IRQF_ONESHOT,
3255 					"arm-smmu-v3-combined-irq", smmu);
3256 		if (ret < 0)
3257 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3258 	} else
3259 		arm_smmu_setup_unique_irqs(smmu);
3260 
3261 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3262 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3263 
3264 	/* Enable interrupt generation on the SMMU */
3265 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3266 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3267 	if (ret)
3268 		dev_warn(smmu->dev, "failed to enable irqs\n");
3269 
3270 	return 0;
3271 }
3272 
3273 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3274 {
3275 	int ret;
3276 
3277 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3278 	if (ret)
3279 		dev_err(smmu->dev, "failed to clear cr0\n");
3280 
3281 	return ret;
3282 }
3283 
3284 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3285 {
3286 	int ret;
3287 	u32 reg, enables;
3288 	struct arm_smmu_cmdq_ent cmd;
3289 
3290 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3291 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3292 	if (reg & CR0_SMMUEN) {
3293 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3294 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3295 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3296 	}
3297 
3298 	ret = arm_smmu_device_disable(smmu);
3299 	if (ret)
3300 		return ret;
3301 
3302 	/* CR1 (table and queue memory attributes) */
3303 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3304 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3305 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3306 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3307 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3308 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3309 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3310 
3311 	/* CR2 (random crap) */
3312 	reg = CR2_PTM | CR2_RECINVSID;
3313 
3314 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3315 		reg |= CR2_E2H;
3316 
3317 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3318 
3319 	/* Stream table */
3320 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3321 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3322 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3323 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3324 
3325 	/* Command queue */
3326 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3327 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3328 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3329 
3330 	enables = CR0_CMDQEN;
3331 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3332 				      ARM_SMMU_CR0ACK);
3333 	if (ret) {
3334 		dev_err(smmu->dev, "failed to enable command queue\n");
3335 		return ret;
3336 	}
3337 
3338 	/* Invalidate any cached configuration */
3339 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3340 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3341 	arm_smmu_cmdq_issue_sync(smmu);
3342 
3343 	/* Invalidate any stale TLB entries */
3344 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3345 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3346 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3347 	}
3348 
3349 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3350 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3351 	arm_smmu_cmdq_issue_sync(smmu);
3352 
3353 	/* Event queue */
3354 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3355 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3356 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3357 
3358 	enables |= CR0_EVTQEN;
3359 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3360 				      ARM_SMMU_CR0ACK);
3361 	if (ret) {
3362 		dev_err(smmu->dev, "failed to enable event queue\n");
3363 		return ret;
3364 	}
3365 
3366 	/* PRI queue */
3367 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3368 		writeq_relaxed(smmu->priq.q.q_base,
3369 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3370 		writel_relaxed(smmu->priq.q.llq.prod,
3371 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3372 		writel_relaxed(smmu->priq.q.llq.cons,
3373 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3374 
3375 		enables |= CR0_PRIQEN;
3376 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3377 					      ARM_SMMU_CR0ACK);
3378 		if (ret) {
3379 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3380 			return ret;
3381 		}
3382 	}
3383 
3384 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3385 		enables |= CR0_ATSCHK;
3386 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3387 					      ARM_SMMU_CR0ACK);
3388 		if (ret) {
3389 			dev_err(smmu->dev, "failed to enable ATS check\n");
3390 			return ret;
3391 		}
3392 	}
3393 
3394 	ret = arm_smmu_setup_irqs(smmu);
3395 	if (ret) {
3396 		dev_err(smmu->dev, "failed to setup irqs\n");
3397 		return ret;
3398 	}
3399 
3400 	if (is_kdump_kernel())
3401 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3402 
3403 	/* Enable the SMMU interface, or ensure bypass */
3404 	if (!bypass || disable_bypass) {
3405 		enables |= CR0_SMMUEN;
3406 	} else {
3407 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3408 		if (ret)
3409 			return ret;
3410 	}
3411 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3412 				      ARM_SMMU_CR0ACK);
3413 	if (ret) {
3414 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3415 		return ret;
3416 	}
3417 
3418 	return 0;
3419 }
3420 
3421 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3422 {
3423 	u32 reg;
3424 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3425 
3426 	/* IDR0 */
3427 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3428 
3429 	/* 2-level structures */
3430 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3431 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3432 
3433 	if (reg & IDR0_CD2L)
3434 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3435 
3436 	/*
3437 	 * Translation table endianness.
3438 	 * We currently require the same endianness as the CPU, but this
3439 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3440 	 */
3441 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3442 	case IDR0_TTENDIAN_MIXED:
3443 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3444 		break;
3445 #ifdef __BIG_ENDIAN
3446 	case IDR0_TTENDIAN_BE:
3447 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3448 		break;
3449 #else
3450 	case IDR0_TTENDIAN_LE:
3451 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3452 		break;
3453 #endif
3454 	default:
3455 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3456 		return -ENXIO;
3457 	}
3458 
3459 	/* Boolean feature flags */
3460 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3461 		smmu->features |= ARM_SMMU_FEAT_PRI;
3462 
3463 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3464 		smmu->features |= ARM_SMMU_FEAT_ATS;
3465 
3466 	if (reg & IDR0_SEV)
3467 		smmu->features |= ARM_SMMU_FEAT_SEV;
3468 
3469 	if (reg & IDR0_MSI) {
3470 		smmu->features |= ARM_SMMU_FEAT_MSI;
3471 		if (coherent && !disable_msipolling)
3472 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3473 	}
3474 
3475 	if (reg & IDR0_HYP) {
3476 		smmu->features |= ARM_SMMU_FEAT_HYP;
3477 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3478 			smmu->features |= ARM_SMMU_FEAT_E2H;
3479 	}
3480 
3481 	/*
3482 	 * The coherency feature as set by FW is used in preference to the ID
3483 	 * register, but warn on mismatch.
3484 	 */
3485 	if (!!(reg & IDR0_COHACC) != coherent)
3486 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3487 			 coherent ? "true" : "false");
3488 
3489 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3490 	case IDR0_STALL_MODEL_FORCE:
3491 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3492 		fallthrough;
3493 	case IDR0_STALL_MODEL_STALL:
3494 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3495 	}
3496 
3497 	if (reg & IDR0_S1P)
3498 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3499 
3500 	if (reg & IDR0_S2P)
3501 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3502 
3503 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3504 		dev_err(smmu->dev, "no translation support!\n");
3505 		return -ENXIO;
3506 	}
3507 
3508 	/* We only support the AArch64 table format at present */
3509 	switch (FIELD_GET(IDR0_TTF, reg)) {
3510 	case IDR0_TTF_AARCH32_64:
3511 		smmu->ias = 40;
3512 		fallthrough;
3513 	case IDR0_TTF_AARCH64:
3514 		break;
3515 	default:
3516 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3517 		return -ENXIO;
3518 	}
3519 
3520 	/* ASID/VMID sizes */
3521 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3522 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3523 
3524 	/* IDR1 */
3525 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3526 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3527 		dev_err(smmu->dev, "embedded implementation not supported\n");
3528 		return -ENXIO;
3529 	}
3530 
3531 	/* Queue sizes, capped to ensure natural alignment */
3532 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3533 					     FIELD_GET(IDR1_CMDQS, reg));
3534 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3535 		/*
3536 		 * We don't support splitting up batches, so one batch of
3537 		 * commands plus an extra sync needs to fit inside the command
3538 		 * queue. There's also no way we can handle the weird alignment
3539 		 * restrictions on the base pointer for a unit-length queue.
3540 		 */
3541 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3542 			CMDQ_BATCH_ENTRIES);
3543 		return -ENXIO;
3544 	}
3545 
3546 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3547 					     FIELD_GET(IDR1_EVTQS, reg));
3548 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3549 					     FIELD_GET(IDR1_PRIQS, reg));
3550 
3551 	/* SID/SSID sizes */
3552 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3553 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3554 
3555 	/*
3556 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3557 	 * table, use a linear table instead.
3558 	 */
3559 	if (smmu->sid_bits <= STRTAB_SPLIT)
3560 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3561 
3562 	/* IDR3 */
3563 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3564 	if (FIELD_GET(IDR3_RIL, reg))
3565 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3566 
3567 	/* IDR5 */
3568 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3569 
3570 	/* Maximum number of outstanding stalls */
3571 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3572 
3573 	/* Page sizes */
3574 	if (reg & IDR5_GRAN64K)
3575 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3576 	if (reg & IDR5_GRAN16K)
3577 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3578 	if (reg & IDR5_GRAN4K)
3579 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3580 
3581 	/* Input address size */
3582 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3583 		smmu->features |= ARM_SMMU_FEAT_VAX;
3584 
3585 	/* Output address size */
3586 	switch (FIELD_GET(IDR5_OAS, reg)) {
3587 	case IDR5_OAS_32_BIT:
3588 		smmu->oas = 32;
3589 		break;
3590 	case IDR5_OAS_36_BIT:
3591 		smmu->oas = 36;
3592 		break;
3593 	case IDR5_OAS_40_BIT:
3594 		smmu->oas = 40;
3595 		break;
3596 	case IDR5_OAS_42_BIT:
3597 		smmu->oas = 42;
3598 		break;
3599 	case IDR5_OAS_44_BIT:
3600 		smmu->oas = 44;
3601 		break;
3602 	case IDR5_OAS_52_BIT:
3603 		smmu->oas = 52;
3604 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3605 		break;
3606 	default:
3607 		dev_info(smmu->dev,
3608 			"unknown output address size. Truncating to 48-bit\n");
3609 		fallthrough;
3610 	case IDR5_OAS_48_BIT:
3611 		smmu->oas = 48;
3612 	}
3613 
3614 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3615 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3616 	else
3617 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3618 
3619 	/* Set the DMA mask for our table walker */
3620 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3621 		dev_warn(smmu->dev,
3622 			 "failed to set DMA mask for table walker\n");
3623 
3624 	smmu->ias = max(smmu->ias, smmu->oas);
3625 
3626 	if (arm_smmu_sva_supported(smmu))
3627 		smmu->features |= ARM_SMMU_FEAT_SVA;
3628 
3629 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3630 		 smmu->ias, smmu->oas, smmu->features);
3631 	return 0;
3632 }
3633 
3634 #ifdef CONFIG_ACPI
3635 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3636 {
3637 	switch (model) {
3638 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3639 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3640 		break;
3641 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3642 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3643 		break;
3644 	}
3645 
3646 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3647 }
3648 
3649 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3650 				      struct arm_smmu_device *smmu)
3651 {
3652 	struct acpi_iort_smmu_v3 *iort_smmu;
3653 	struct device *dev = smmu->dev;
3654 	struct acpi_iort_node *node;
3655 
3656 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3657 
3658 	/* Retrieve SMMUv3 specific data */
3659 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3660 
3661 	acpi_smmu_get_options(iort_smmu->model, smmu);
3662 
3663 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3664 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3665 
3666 	return 0;
3667 }
3668 #else
3669 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3670 					     struct arm_smmu_device *smmu)
3671 {
3672 	return -ENODEV;
3673 }
3674 #endif
3675 
3676 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3677 				    struct arm_smmu_device *smmu)
3678 {
3679 	struct device *dev = &pdev->dev;
3680 	u32 cells;
3681 	int ret = -EINVAL;
3682 
3683 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3684 		dev_err(dev, "missing #iommu-cells property\n");
3685 	else if (cells != 1)
3686 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3687 	else
3688 		ret = 0;
3689 
3690 	parse_driver_options(smmu);
3691 
3692 	if (of_dma_is_coherent(dev->of_node))
3693 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3694 
3695 	return ret;
3696 }
3697 
3698 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3699 {
3700 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3701 		return SZ_64K;
3702 	else
3703 		return SZ_128K;
3704 }
3705 
3706 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3707 {
3708 	int err;
3709 
3710 #ifdef CONFIG_PCI
3711 	if (pci_bus_type.iommu_ops != ops) {
3712 		err = bus_set_iommu(&pci_bus_type, ops);
3713 		if (err)
3714 			return err;
3715 	}
3716 #endif
3717 #ifdef CONFIG_ARM_AMBA
3718 	if (amba_bustype.iommu_ops != ops) {
3719 		err = bus_set_iommu(&amba_bustype, ops);
3720 		if (err)
3721 			goto err_reset_pci_ops;
3722 	}
3723 #endif
3724 	if (platform_bus_type.iommu_ops != ops) {
3725 		err = bus_set_iommu(&platform_bus_type, ops);
3726 		if (err)
3727 			goto err_reset_amba_ops;
3728 	}
3729 
3730 	return 0;
3731 
3732 err_reset_amba_ops:
3733 #ifdef CONFIG_ARM_AMBA
3734 	bus_set_iommu(&amba_bustype, NULL);
3735 #endif
3736 err_reset_pci_ops: __maybe_unused;
3737 #ifdef CONFIG_PCI
3738 	bus_set_iommu(&pci_bus_type, NULL);
3739 #endif
3740 	return err;
3741 }
3742 
3743 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3744 				      resource_size_t size)
3745 {
3746 	struct resource res = DEFINE_RES_MEM(start, size);
3747 
3748 	return devm_ioremap_resource(dev, &res);
3749 }
3750 
3751 static int arm_smmu_device_probe(struct platform_device *pdev)
3752 {
3753 	int irq, ret;
3754 	struct resource *res;
3755 	resource_size_t ioaddr;
3756 	struct arm_smmu_device *smmu;
3757 	struct device *dev = &pdev->dev;
3758 	bool bypass;
3759 
3760 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3761 	if (!smmu)
3762 		return -ENOMEM;
3763 	smmu->dev = dev;
3764 
3765 	if (dev->of_node) {
3766 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3767 	} else {
3768 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3769 		if (ret == -ENODEV)
3770 			return ret;
3771 	}
3772 
3773 	/* Set bypass mode according to firmware probing result */
3774 	bypass = !!ret;
3775 
3776 	/* Base address */
3777 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3778 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3779 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3780 		return -EINVAL;
3781 	}
3782 	ioaddr = res->start;
3783 
3784 	/*
3785 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3786 	 * the PMCG registers which are reserved by the PMU driver.
3787 	 */
3788 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3789 	if (IS_ERR(smmu->base))
3790 		return PTR_ERR(smmu->base);
3791 
3792 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3793 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3794 					       ARM_SMMU_REG_SZ);
3795 		if (IS_ERR(smmu->page1))
3796 			return PTR_ERR(smmu->page1);
3797 	} else {
3798 		smmu->page1 = smmu->base;
3799 	}
3800 
3801 	/* Interrupt lines */
3802 
3803 	irq = platform_get_irq_byname_optional(pdev, "combined");
3804 	if (irq > 0)
3805 		smmu->combined_irq = irq;
3806 	else {
3807 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3808 		if (irq > 0)
3809 			smmu->evtq.q.irq = irq;
3810 
3811 		irq = platform_get_irq_byname_optional(pdev, "priq");
3812 		if (irq > 0)
3813 			smmu->priq.q.irq = irq;
3814 
3815 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3816 		if (irq > 0)
3817 			smmu->gerr_irq = irq;
3818 	}
3819 	/* Probe the h/w */
3820 	ret = arm_smmu_device_hw_probe(smmu);
3821 	if (ret)
3822 		return ret;
3823 
3824 	/* Initialise in-memory data structures */
3825 	ret = arm_smmu_init_structures(smmu);
3826 	if (ret)
3827 		return ret;
3828 
3829 	/* Record our private device structure */
3830 	platform_set_drvdata(pdev, smmu);
3831 
3832 	/* Reset the device */
3833 	ret = arm_smmu_device_reset(smmu, bypass);
3834 	if (ret)
3835 		return ret;
3836 
3837 	/* And we're up. Go go go! */
3838 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3839 				     "smmu3.%pa", &ioaddr);
3840 	if (ret)
3841 		return ret;
3842 
3843 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3844 	if (ret) {
3845 		dev_err(dev, "Failed to register iommu\n");
3846 		goto err_sysfs_remove;
3847 	}
3848 
3849 	ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3850 	if (ret)
3851 		goto err_unregister_device;
3852 
3853 	return 0;
3854 
3855 err_unregister_device:
3856 	iommu_device_unregister(&smmu->iommu);
3857 err_sysfs_remove:
3858 	iommu_device_sysfs_remove(&smmu->iommu);
3859 	return ret;
3860 }
3861 
3862 static int arm_smmu_device_remove(struct platform_device *pdev)
3863 {
3864 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3865 
3866 	arm_smmu_set_bus_ops(NULL);
3867 	iommu_device_unregister(&smmu->iommu);
3868 	iommu_device_sysfs_remove(&smmu->iommu);
3869 	arm_smmu_device_disable(smmu);
3870 	iopf_queue_free(smmu->evtq.iopf);
3871 
3872 	return 0;
3873 }
3874 
3875 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3876 {
3877 	arm_smmu_device_remove(pdev);
3878 }
3879 
3880 static const struct of_device_id arm_smmu_of_match[] = {
3881 	{ .compatible = "arm,smmu-v3", },
3882 	{ },
3883 };
3884 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3885 
3886 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3887 {
3888 	arm_smmu_sva_notifier_synchronize();
3889 	platform_driver_unregister(drv);
3890 }
3891 
3892 static struct platform_driver arm_smmu_driver = {
3893 	.driver	= {
3894 		.name			= "arm-smmu-v3",
3895 		.of_match_table		= arm_smmu_of_match,
3896 		.suppress_bind_attrs	= true,
3897 	},
3898 	.probe	= arm_smmu_device_probe,
3899 	.remove	= arm_smmu_device_remove,
3900 	.shutdown = arm_smmu_device_shutdown,
3901 };
3902 module_driver(arm_smmu_driver, platform_driver_register,
3903 	      arm_smmu_driver_unregister);
3904 
3905 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3906 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3907 MODULE_ALIAS("platform:arm-smmu-v3");
3908 MODULE_LICENSE("GPL v2");
3909