xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 92481c7d14b8030418f00c4b4ec65556565d892d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_platform.h>
27 #include <linux/pci.h>
28 #include <linux/pci-ats.h>
29 #include <linux/platform_device.h>
30 
31 #include <linux/amba/bus.h>
32 
33 #include "arm-smmu-v3.h"
34 #include "../../iommu-sva-lib.h"
35 
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84 
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 	{ 0, NULL},
89 };
90 
91 static void parse_driver_options(struct arm_smmu_device *smmu)
92 {
93 	int i = 0;
94 
95 	do {
96 		if (of_property_read_bool(smmu->dev->of_node,
97 						arm_smmu_options[i].prop)) {
98 			smmu->options |= arm_smmu_options[i].opt;
99 			dev_notice(smmu->dev, "option %s\n",
100 				arm_smmu_options[i].prop);
101 		}
102 	} while (arm_smmu_options[++i].opt);
103 }
104 
105 /* Low-level queue manipulation functions */
106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
107 {
108 	u32 space, prod, cons;
109 
110 	prod = Q_IDX(q, q->prod);
111 	cons = Q_IDX(q, q->cons);
112 
113 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114 		space = (1 << q->max_n_shift) - (prod - cons);
115 	else
116 		space = cons - prod;
117 
118 	return space >= n;
119 }
120 
121 static bool queue_full(struct arm_smmu_ll_queue *q)
122 {
123 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
125 }
126 
127 static bool queue_empty(struct arm_smmu_ll_queue *q)
128 {
129 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
131 }
132 
133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
134 {
135 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
139 }
140 
141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
142 {
143 	/*
144 	 * Ensure that all CPU accesses (reads and writes) to the queue
145 	 * are complete before we update the cons pointer.
146 	 */
147 	__iomb();
148 	writel_relaxed(q->llq.cons, q->cons_reg);
149 }
150 
151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
152 {
153 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
155 }
156 
157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
158 {
159 	u32 prod;
160 	int ret = 0;
161 
162 	/*
163 	 * We can't use the _relaxed() variant here, as we must prevent
164 	 * speculative reads of the queue before we have determined that
165 	 * prod has indeed moved.
166 	 */
167 	prod = readl(q->prod_reg);
168 
169 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
170 		ret = -EOVERFLOW;
171 
172 	q->llq.prod = prod;
173 	return ret;
174 }
175 
176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
177 {
178 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
179 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
180 }
181 
182 static void queue_poll_init(struct arm_smmu_device *smmu,
183 			    struct arm_smmu_queue_poll *qp)
184 {
185 	qp->delay = 1;
186 	qp->spin_cnt = 0;
187 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
188 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
189 }
190 
191 static int queue_poll(struct arm_smmu_queue_poll *qp)
192 {
193 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
194 		return -ETIMEDOUT;
195 
196 	if (qp->wfe) {
197 		wfe();
198 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
199 		cpu_relax();
200 	} else {
201 		udelay(qp->delay);
202 		qp->delay *= 2;
203 		qp->spin_cnt = 0;
204 	}
205 
206 	return 0;
207 }
208 
209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
210 {
211 	int i;
212 
213 	for (i = 0; i < n_dwords; ++i)
214 		*dst++ = cpu_to_le64(*src++);
215 }
216 
217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
218 {
219 	int i;
220 
221 	for (i = 0; i < n_dwords; ++i)
222 		*dst++ = le64_to_cpu(*src++);
223 }
224 
225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
226 {
227 	if (queue_empty(&q->llq))
228 		return -EAGAIN;
229 
230 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
231 	queue_inc_cons(&q->llq);
232 	queue_sync_cons_out(q);
233 	return 0;
234 }
235 
236 /* High-level queue accessors */
237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
238 {
239 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
240 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
241 
242 	switch (ent->opcode) {
243 	case CMDQ_OP_TLBI_EL2_ALL:
244 	case CMDQ_OP_TLBI_NSNH_ALL:
245 		break;
246 	case CMDQ_OP_PREFETCH_CFG:
247 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
248 		break;
249 	case CMDQ_OP_CFGI_CD:
250 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
251 		fallthrough;
252 	case CMDQ_OP_CFGI_STE:
253 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
254 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
255 		break;
256 	case CMDQ_OP_CFGI_CD_ALL:
257 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
258 		break;
259 	case CMDQ_OP_CFGI_ALL:
260 		/* Cover the entire SID range */
261 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
262 		break;
263 	case CMDQ_OP_TLBI_NH_VA:
264 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
265 		fallthrough;
266 	case CMDQ_OP_TLBI_EL2_VA:
267 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
268 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
269 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
270 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
271 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
272 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
273 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
274 		break;
275 	case CMDQ_OP_TLBI_S2_IPA:
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
279 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
283 		break;
284 	case CMDQ_OP_TLBI_NH_ASID:
285 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
286 		fallthrough;
287 	case CMDQ_OP_TLBI_S12_VMALL:
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 		break;
290 	case CMDQ_OP_TLBI_EL2_ASID:
291 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
292 		break;
293 	case CMDQ_OP_ATC_INV:
294 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
295 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
296 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
297 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
298 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
299 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
300 		break;
301 	case CMDQ_OP_PRI_RESP:
302 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
303 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
304 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
305 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
306 		switch (ent->pri.resp) {
307 		case PRI_RESP_DENY:
308 		case PRI_RESP_FAIL:
309 		case PRI_RESP_SUCC:
310 			break;
311 		default:
312 			return -EINVAL;
313 		}
314 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
315 		break;
316 	case CMDQ_OP_RESUME:
317 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
318 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
319 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
320 		break;
321 	case CMDQ_OP_CMD_SYNC:
322 		if (ent->sync.msiaddr) {
323 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
325 		} else {
326 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
327 		}
328 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
330 		break;
331 	default:
332 		return -ENOENT;
333 	}
334 
335 	return 0;
336 }
337 
338 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
339 {
340 	return &smmu->cmdq;
341 }
342 
343 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
344 					 struct arm_smmu_queue *q, u32 prod)
345 {
346 	struct arm_smmu_cmdq_ent ent = {
347 		.opcode = CMDQ_OP_CMD_SYNC,
348 	};
349 
350 	/*
351 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
352 	 * payload, so the write will zero the entire command on that platform.
353 	 */
354 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
355 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
356 				   q->ent_dwords * 8;
357 	}
358 
359 	arm_smmu_cmdq_build_cmd(cmd, &ent);
360 }
361 
362 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
363 				     struct arm_smmu_queue *q)
364 {
365 	static const char * const cerror_str[] = {
366 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
367 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
368 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
369 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
370 	};
371 
372 	int i;
373 	u64 cmd[CMDQ_ENT_DWORDS];
374 	u32 cons = readl_relaxed(q->cons_reg);
375 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
376 	struct arm_smmu_cmdq_ent cmd_sync = {
377 		.opcode = CMDQ_OP_CMD_SYNC,
378 	};
379 
380 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
381 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
382 
383 	switch (idx) {
384 	case CMDQ_ERR_CERROR_ABT_IDX:
385 		dev_err(smmu->dev, "retrying command fetch\n");
386 		return;
387 	case CMDQ_ERR_CERROR_NONE_IDX:
388 		return;
389 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
390 		/*
391 		 * ATC Invalidation Completion timeout. CONS is still pointing
392 		 * at the CMD_SYNC. Attempt to complete other pending commands
393 		 * by repeating the CMD_SYNC, though we might well end up back
394 		 * here since the ATC invalidation may still be pending.
395 		 */
396 		return;
397 	case CMDQ_ERR_CERROR_ILL_IDX:
398 	default:
399 		break;
400 	}
401 
402 	/*
403 	 * We may have concurrent producers, so we need to be careful
404 	 * not to touch any of the shadow cmdq state.
405 	 */
406 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
407 	dev_err(smmu->dev, "skipping command in error state:\n");
408 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
409 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
410 
411 	/* Convert the erroneous command into a CMD_SYNC */
412 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
413 
414 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
415 }
416 
417 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
418 {
419 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
420 }
421 
422 /*
423  * Command queue locking.
424  * This is a form of bastardised rwlock with the following major changes:
425  *
426  * - The only LOCK routines are exclusive_trylock() and shared_lock().
427  *   Neither have barrier semantics, and instead provide only a control
428  *   dependency.
429  *
430  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
431  *   fails if the caller appears to be the last lock holder (yes, this is
432  *   racy). All successful UNLOCK routines have RELEASE semantics.
433  */
434 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
435 {
436 	int val;
437 
438 	/*
439 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
440 	 * lock counter. When held in exclusive state, the lock counter is set
441 	 * to INT_MIN so these increments won't hurt as the value will remain
442 	 * negative.
443 	 */
444 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
445 		return;
446 
447 	do {
448 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
449 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
450 }
451 
452 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
453 {
454 	(void)atomic_dec_return_release(&cmdq->lock);
455 }
456 
457 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
458 {
459 	if (atomic_read(&cmdq->lock) == 1)
460 		return false;
461 
462 	arm_smmu_cmdq_shared_unlock(cmdq);
463 	return true;
464 }
465 
466 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
467 ({									\
468 	bool __ret;							\
469 	local_irq_save(flags);						\
470 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
471 	if (!__ret)							\
472 		local_irq_restore(flags);				\
473 	__ret;								\
474 })
475 
476 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
477 ({									\
478 	atomic_set_release(&cmdq->lock, 0);				\
479 	local_irq_restore(flags);					\
480 })
481 
482 
483 /*
484  * Command queue insertion.
485  * This is made fiddly by our attempts to achieve some sort of scalability
486  * since there is one queue shared amongst all of the CPUs in the system.  If
487  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
488  * then you'll *love* this monstrosity.
489  *
490  * The basic idea is to split the queue up into ranges of commands that are
491  * owned by a given CPU; the owner may not have written all of the commands
492  * itself, but is responsible for advancing the hardware prod pointer when
493  * the time comes. The algorithm is roughly:
494  *
495  * 	1. Allocate some space in the queue. At this point we also discover
496  *	   whether the head of the queue is currently owned by another CPU,
497  *	   or whether we are the owner.
498  *
499  *	2. Write our commands into our allocated slots in the queue.
500  *
501  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
502  *
503  *	4. If we are an owner:
504  *		a. Wait for the previous owner to finish.
505  *		b. Mark the queue head as unowned, which tells us the range
506  *		   that we are responsible for publishing.
507  *		c. Wait for all commands in our owned range to become valid.
508  *		d. Advance the hardware prod pointer.
509  *		e. Tell the next owner we've finished.
510  *
511  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
512  *	   owner), then we need to stick around until it has completed:
513  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
514  *		   to clear the first 4 bytes.
515  *		b. Otherwise, we spin waiting for the hardware cons pointer to
516  *		   advance past our command.
517  *
518  * The devil is in the details, particularly the use of locking for handling
519  * SYNC completion and freeing up space in the queue before we think that it is
520  * full.
521  */
522 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
523 					       u32 sprod, u32 eprod, bool set)
524 {
525 	u32 swidx, sbidx, ewidx, ebidx;
526 	struct arm_smmu_ll_queue llq = {
527 		.max_n_shift	= cmdq->q.llq.max_n_shift,
528 		.prod		= sprod,
529 	};
530 
531 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
532 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
533 
534 	while (llq.prod != eprod) {
535 		unsigned long mask;
536 		atomic_long_t *ptr;
537 		u32 limit = BITS_PER_LONG;
538 
539 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
540 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
541 
542 		ptr = &cmdq->valid_map[swidx];
543 
544 		if ((swidx == ewidx) && (sbidx < ebidx))
545 			limit = ebidx;
546 
547 		mask = GENMASK(limit - 1, sbidx);
548 
549 		/*
550 		 * The valid bit is the inverse of the wrap bit. This means
551 		 * that a zero-initialised queue is invalid and, after marking
552 		 * all entries as valid, they become invalid again when we
553 		 * wrap.
554 		 */
555 		if (set) {
556 			atomic_long_xor(mask, ptr);
557 		} else { /* Poll */
558 			unsigned long valid;
559 
560 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
561 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
562 		}
563 
564 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
565 	}
566 }
567 
568 /* Mark all entries in the range [sprod, eprod) as valid */
569 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
570 					u32 sprod, u32 eprod)
571 {
572 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
573 }
574 
575 /* Wait for all entries in the range [sprod, eprod) to become valid */
576 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
577 					 u32 sprod, u32 eprod)
578 {
579 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
580 }
581 
582 /* Wait for the command queue to become non-full */
583 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
584 					     struct arm_smmu_ll_queue *llq)
585 {
586 	unsigned long flags;
587 	struct arm_smmu_queue_poll qp;
588 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
589 	int ret = 0;
590 
591 	/*
592 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
593 	 * that fails, spin until somebody else updates it for us.
594 	 */
595 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
596 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
597 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
598 		llq->val = READ_ONCE(cmdq->q.llq.val);
599 		return 0;
600 	}
601 
602 	queue_poll_init(smmu, &qp);
603 	do {
604 		llq->val = READ_ONCE(cmdq->q.llq.val);
605 		if (!queue_full(llq))
606 			break;
607 
608 		ret = queue_poll(&qp);
609 	} while (!ret);
610 
611 	return ret;
612 }
613 
614 /*
615  * Wait until the SMMU signals a CMD_SYNC completion MSI.
616  * Must be called with the cmdq lock held in some capacity.
617  */
618 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
619 					  struct arm_smmu_ll_queue *llq)
620 {
621 	int ret = 0;
622 	struct arm_smmu_queue_poll qp;
623 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
624 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
625 
626 	queue_poll_init(smmu, &qp);
627 
628 	/*
629 	 * The MSI won't generate an event, since it's being written back
630 	 * into the command queue.
631 	 */
632 	qp.wfe = false;
633 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
634 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
635 	return ret;
636 }
637 
638 /*
639  * Wait until the SMMU cons index passes llq->prod.
640  * Must be called with the cmdq lock held in some capacity.
641  */
642 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
643 					       struct arm_smmu_ll_queue *llq)
644 {
645 	struct arm_smmu_queue_poll qp;
646 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
647 	u32 prod = llq->prod;
648 	int ret = 0;
649 
650 	queue_poll_init(smmu, &qp);
651 	llq->val = READ_ONCE(cmdq->q.llq.val);
652 	do {
653 		if (queue_consumed(llq, prod))
654 			break;
655 
656 		ret = queue_poll(&qp);
657 
658 		/*
659 		 * This needs to be a readl() so that our subsequent call
660 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
661 		 *
662 		 * Specifically, we need to ensure that we observe all
663 		 * shared_lock()s by other CMD_SYNCs that share our owner,
664 		 * so that a failing call to tryunlock() means that we're
665 		 * the last one out and therefore we can safely advance
666 		 * cmdq->q.llq.cons. Roughly speaking:
667 		 *
668 		 * CPU 0		CPU1			CPU2 (us)
669 		 *
670 		 * if (sync)
671 		 * 	shared_lock();
672 		 *
673 		 * dma_wmb();
674 		 * set_valid_map();
675 		 *
676 		 * 			if (owner) {
677 		 *				poll_valid_map();
678 		 *				<control dependency>
679 		 *				writel(prod_reg);
680 		 *
681 		 *						readl(cons_reg);
682 		 *						tryunlock();
683 		 *
684 		 * Requires us to see CPU 0's shared_lock() acquisition.
685 		 */
686 		llq->cons = readl(cmdq->q.cons_reg);
687 	} while (!ret);
688 
689 	return ret;
690 }
691 
692 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
693 					 struct arm_smmu_ll_queue *llq)
694 {
695 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
696 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
697 
698 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
699 }
700 
701 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
702 					u32 prod, int n)
703 {
704 	int i;
705 	struct arm_smmu_ll_queue llq = {
706 		.max_n_shift	= cmdq->q.llq.max_n_shift,
707 		.prod		= prod,
708 	};
709 
710 	for (i = 0; i < n; ++i) {
711 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
712 
713 		prod = queue_inc_prod_n(&llq, i);
714 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
715 	}
716 }
717 
718 /*
719  * This is the actual insertion function, and provides the following
720  * ordering guarantees to callers:
721  *
722  * - There is a dma_wmb() before publishing any commands to the queue.
723  *   This can be relied upon to order prior writes to data structures
724  *   in memory (such as a CD or an STE) before the command.
725  *
726  * - On completion of a CMD_SYNC, there is a control dependency.
727  *   This can be relied upon to order subsequent writes to memory (e.g.
728  *   freeing an IOVA) after completion of the CMD_SYNC.
729  *
730  * - Command insertion is totally ordered, so if two CPUs each race to
731  *   insert their own list of commands then all of the commands from one
732  *   CPU will appear before any of the commands from the other CPU.
733  */
734 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
735 				       u64 *cmds, int n, bool sync)
736 {
737 	u64 cmd_sync[CMDQ_ENT_DWORDS];
738 	u32 prod;
739 	unsigned long flags;
740 	bool owner;
741 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
742 	struct arm_smmu_ll_queue llq, head;
743 	int ret = 0;
744 
745 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
746 
747 	/* 1. Allocate some space in the queue */
748 	local_irq_save(flags);
749 	llq.val = READ_ONCE(cmdq->q.llq.val);
750 	do {
751 		u64 old;
752 
753 		while (!queue_has_space(&llq, n + sync)) {
754 			local_irq_restore(flags);
755 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
756 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
757 			local_irq_save(flags);
758 		}
759 
760 		head.cons = llq.cons;
761 		head.prod = queue_inc_prod_n(&llq, n + sync) |
762 					     CMDQ_PROD_OWNED_FLAG;
763 
764 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
765 		if (old == llq.val)
766 			break;
767 
768 		llq.val = old;
769 	} while (1);
770 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
771 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
772 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
773 
774 	/*
775 	 * 2. Write our commands into the queue
776 	 * Dependency ordering from the cmpxchg() loop above.
777 	 */
778 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
779 	if (sync) {
780 		prod = queue_inc_prod_n(&llq, n);
781 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
782 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
783 
784 		/*
785 		 * In order to determine completion of our CMD_SYNC, we must
786 		 * ensure that the queue can't wrap twice without us noticing.
787 		 * We achieve that by taking the cmdq lock as shared before
788 		 * marking our slot as valid.
789 		 */
790 		arm_smmu_cmdq_shared_lock(cmdq);
791 	}
792 
793 	/* 3. Mark our slots as valid, ensuring commands are visible first */
794 	dma_wmb();
795 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
796 
797 	/* 4. If we are the owner, take control of the SMMU hardware */
798 	if (owner) {
799 		/* a. Wait for previous owner to finish */
800 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
801 
802 		/* b. Stop gathering work by clearing the owned flag */
803 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
804 						   &cmdq->q.llq.atomic.prod);
805 		prod &= ~CMDQ_PROD_OWNED_FLAG;
806 
807 		/*
808 		 * c. Wait for any gathered work to be written to the queue.
809 		 * Note that we read our own entries so that we have the control
810 		 * dependency required by (d).
811 		 */
812 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
813 
814 		/*
815 		 * d. Advance the hardware prod pointer
816 		 * Control dependency ordering from the entries becoming valid.
817 		 */
818 		writel_relaxed(prod, cmdq->q.prod_reg);
819 
820 		/*
821 		 * e. Tell the next owner we're done
822 		 * Make sure we've updated the hardware first, so that we don't
823 		 * race to update prod and potentially move it backwards.
824 		 */
825 		atomic_set_release(&cmdq->owner_prod, prod);
826 	}
827 
828 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
829 	if (sync) {
830 		llq.prod = queue_inc_prod_n(&llq, n);
831 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
832 		if (ret) {
833 			dev_err_ratelimited(smmu->dev,
834 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
835 					    llq.prod,
836 					    readl_relaxed(cmdq->q.prod_reg),
837 					    readl_relaxed(cmdq->q.cons_reg));
838 		}
839 
840 		/*
841 		 * Try to unlock the cmdq lock. This will fail if we're the last
842 		 * reader, in which case we can safely update cmdq->q.llq.cons
843 		 */
844 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
845 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
846 			arm_smmu_cmdq_shared_unlock(cmdq);
847 		}
848 	}
849 
850 	local_irq_restore(flags);
851 	return ret;
852 }
853 
854 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
855 				     struct arm_smmu_cmdq_ent *ent,
856 				     bool sync)
857 {
858 	u64 cmd[CMDQ_ENT_DWORDS];
859 
860 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
861 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
862 			 ent->opcode);
863 		return -EINVAL;
864 	}
865 
866 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
867 }
868 
869 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
870 				   struct arm_smmu_cmdq_ent *ent)
871 {
872 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
873 }
874 
875 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
876 					     struct arm_smmu_cmdq_ent *ent)
877 {
878 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
879 }
880 
881 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
882 				    struct arm_smmu_cmdq_batch *cmds,
883 				    struct arm_smmu_cmdq_ent *cmd)
884 {
885 	int index;
886 
887 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
888 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
889 		cmds->num = 0;
890 	}
891 
892 	index = cmds->num * CMDQ_ENT_DWORDS;
893 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
894 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
895 			 cmd->opcode);
896 		return;
897 	}
898 
899 	cmds->num++;
900 }
901 
902 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
903 				      struct arm_smmu_cmdq_batch *cmds)
904 {
905 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
906 }
907 
908 static int arm_smmu_page_response(struct device *dev,
909 				  struct iommu_fault_event *unused,
910 				  struct iommu_page_response *resp)
911 {
912 	struct arm_smmu_cmdq_ent cmd = {0};
913 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
914 	int sid = master->streams[0].id;
915 
916 	if (master->stall_enabled) {
917 		cmd.opcode		= CMDQ_OP_RESUME;
918 		cmd.resume.sid		= sid;
919 		cmd.resume.stag		= resp->grpid;
920 		switch (resp->code) {
921 		case IOMMU_PAGE_RESP_INVALID:
922 		case IOMMU_PAGE_RESP_FAILURE:
923 			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
924 			break;
925 		case IOMMU_PAGE_RESP_SUCCESS:
926 			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
927 			break;
928 		default:
929 			return -EINVAL;
930 		}
931 	} else {
932 		return -ENODEV;
933 	}
934 
935 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
936 	/*
937 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
938 	 * RESUME consumption guarantees that the stalled transaction will be
939 	 * terminated... at some point in the future. PRI_RESP is fire and
940 	 * forget.
941 	 */
942 
943 	return 0;
944 }
945 
946 /* Context descriptor manipulation functions */
947 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
948 {
949 	struct arm_smmu_cmdq_ent cmd = {
950 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
951 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
952 		.tlbi.asid = asid,
953 	};
954 
955 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
956 }
957 
958 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
959 			     int ssid, bool leaf)
960 {
961 	size_t i;
962 	unsigned long flags;
963 	struct arm_smmu_master *master;
964 	struct arm_smmu_cmdq_batch cmds;
965 	struct arm_smmu_device *smmu = smmu_domain->smmu;
966 	struct arm_smmu_cmdq_ent cmd = {
967 		.opcode	= CMDQ_OP_CFGI_CD,
968 		.cfgi	= {
969 			.ssid	= ssid,
970 			.leaf	= leaf,
971 		},
972 	};
973 
974 	cmds.num = 0;
975 
976 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
977 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
978 		for (i = 0; i < master->num_streams; i++) {
979 			cmd.cfgi.sid = master->streams[i].id;
980 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
981 		}
982 	}
983 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
984 
985 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
986 }
987 
988 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
989 					struct arm_smmu_l1_ctx_desc *l1_desc)
990 {
991 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
992 
993 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
994 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
995 	if (!l1_desc->l2ptr) {
996 		dev_warn(smmu->dev,
997 			 "failed to allocate context descriptor table\n");
998 		return -ENOMEM;
999 	}
1000 	return 0;
1001 }
1002 
1003 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1004 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1005 {
1006 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1007 		  CTXDESC_L1_DESC_V;
1008 
1009 	/* See comment in arm_smmu_write_ctx_desc() */
1010 	WRITE_ONCE(*dst, cpu_to_le64(val));
1011 }
1012 
1013 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1014 				   u32 ssid)
1015 {
1016 	__le64 *l1ptr;
1017 	unsigned int idx;
1018 	struct arm_smmu_l1_ctx_desc *l1_desc;
1019 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1020 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1021 
1022 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1023 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1024 
1025 	idx = ssid >> CTXDESC_SPLIT;
1026 	l1_desc = &cdcfg->l1_desc[idx];
1027 	if (!l1_desc->l2ptr) {
1028 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1029 			return NULL;
1030 
1031 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1032 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1033 		/* An invalid L1CD can be cached */
1034 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1035 	}
1036 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1037 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1038 }
1039 
1040 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1041 			    struct arm_smmu_ctx_desc *cd)
1042 {
1043 	/*
1044 	 * This function handles the following cases:
1045 	 *
1046 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1047 	 * (2) Install a secondary CD, for SID+SSID traffic.
1048 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1049 	 *     CD, then invalidate the old entry and mappings.
1050 	 * (4) Quiesce the context without clearing the valid bit. Disable
1051 	 *     translation, and ignore any translation fault.
1052 	 * (5) Remove a secondary CD.
1053 	 */
1054 	u64 val;
1055 	bool cd_live;
1056 	__le64 *cdptr;
1057 
1058 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1059 		return -E2BIG;
1060 
1061 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1062 	if (!cdptr)
1063 		return -ENOMEM;
1064 
1065 	val = le64_to_cpu(cdptr[0]);
1066 	cd_live = !!(val & CTXDESC_CD_0_V);
1067 
1068 	if (!cd) { /* (5) */
1069 		val = 0;
1070 	} else if (cd == &quiet_cd) { /* (4) */
1071 		val |= CTXDESC_CD_0_TCR_EPD0;
1072 	} else if (cd_live) { /* (3) */
1073 		val &= ~CTXDESC_CD_0_ASID;
1074 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1075 		/*
1076 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1077 		 * this substream's traffic
1078 		 */
1079 	} else { /* (1) and (2) */
1080 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1081 		cdptr[2] = 0;
1082 		cdptr[3] = cpu_to_le64(cd->mair);
1083 
1084 		/*
1085 		 * STE is live, and the SMMU might read dwords of this CD in any
1086 		 * order. Ensure that it observes valid values before reading
1087 		 * V=1.
1088 		 */
1089 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1090 
1091 		val = cd->tcr |
1092 #ifdef __BIG_ENDIAN
1093 			CTXDESC_CD_0_ENDI |
1094 #endif
1095 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1096 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1097 			CTXDESC_CD_0_AA64 |
1098 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1099 			CTXDESC_CD_0_V;
1100 
1101 		if (smmu_domain->stall_enabled)
1102 			val |= CTXDESC_CD_0_S;
1103 	}
1104 
1105 	/*
1106 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1107 	 * "Configuration structures and configuration invalidation completion"
1108 	 *
1109 	 *   The size of single-copy atomic reads made by the SMMU is
1110 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1111 	 *   field within an aligned 64-bit span of a structure can be altered
1112 	 *   without first making the structure invalid.
1113 	 */
1114 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1115 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1116 	return 0;
1117 }
1118 
1119 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1120 {
1121 	int ret;
1122 	size_t l1size;
1123 	size_t max_contexts;
1124 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1125 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1126 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1127 
1128 	max_contexts = 1 << cfg->s1cdmax;
1129 
1130 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1131 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1132 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1133 		cdcfg->num_l1_ents = max_contexts;
1134 
1135 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1136 	} else {
1137 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1138 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1139 						  CTXDESC_L2_ENTRIES);
1140 
1141 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1142 					      sizeof(*cdcfg->l1_desc),
1143 					      GFP_KERNEL);
1144 		if (!cdcfg->l1_desc)
1145 			return -ENOMEM;
1146 
1147 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1148 	}
1149 
1150 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1151 					   GFP_KERNEL);
1152 	if (!cdcfg->cdtab) {
1153 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1154 		ret = -ENOMEM;
1155 		goto err_free_l1;
1156 	}
1157 
1158 	return 0;
1159 
1160 err_free_l1:
1161 	if (cdcfg->l1_desc) {
1162 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1163 		cdcfg->l1_desc = NULL;
1164 	}
1165 	return ret;
1166 }
1167 
1168 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1169 {
1170 	int i;
1171 	size_t size, l1size;
1172 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1173 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1174 
1175 	if (cdcfg->l1_desc) {
1176 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1177 
1178 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1179 			if (!cdcfg->l1_desc[i].l2ptr)
1180 				continue;
1181 
1182 			dmam_free_coherent(smmu->dev, size,
1183 					   cdcfg->l1_desc[i].l2ptr,
1184 					   cdcfg->l1_desc[i].l2ptr_dma);
1185 		}
1186 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1187 		cdcfg->l1_desc = NULL;
1188 
1189 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1190 	} else {
1191 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1192 	}
1193 
1194 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1195 	cdcfg->cdtab_dma = 0;
1196 	cdcfg->cdtab = NULL;
1197 }
1198 
1199 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1200 {
1201 	bool free;
1202 	struct arm_smmu_ctx_desc *old_cd;
1203 
1204 	if (!cd->asid)
1205 		return false;
1206 
1207 	free = refcount_dec_and_test(&cd->refs);
1208 	if (free) {
1209 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1210 		WARN_ON(old_cd != cd);
1211 	}
1212 	return free;
1213 }
1214 
1215 /* Stream table manipulation functions */
1216 static void
1217 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1218 {
1219 	u64 val = 0;
1220 
1221 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1222 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1223 
1224 	/* See comment in arm_smmu_write_ctx_desc() */
1225 	WRITE_ONCE(*dst, cpu_to_le64(val));
1226 }
1227 
1228 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1229 {
1230 	struct arm_smmu_cmdq_ent cmd = {
1231 		.opcode	= CMDQ_OP_CFGI_STE,
1232 		.cfgi	= {
1233 			.sid	= sid,
1234 			.leaf	= true,
1235 		},
1236 	};
1237 
1238 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1239 }
1240 
1241 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1242 				      __le64 *dst)
1243 {
1244 	/*
1245 	 * This is hideously complicated, but we only really care about
1246 	 * three cases at the moment:
1247 	 *
1248 	 * 1. Invalid (all zero) -> bypass/fault (init)
1249 	 * 2. Bypass/fault -> translation/bypass (attach)
1250 	 * 3. Translation/bypass -> bypass/fault (detach)
1251 	 *
1252 	 * Given that we can't update the STE atomically and the SMMU
1253 	 * doesn't read the thing in a defined order, that leaves us
1254 	 * with the following maintenance requirements:
1255 	 *
1256 	 * 1. Update Config, return (init time STEs aren't live)
1257 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1258 	 * 3. Update Config, sync
1259 	 */
1260 	u64 val = le64_to_cpu(dst[0]);
1261 	bool ste_live = false;
1262 	struct arm_smmu_device *smmu = NULL;
1263 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1264 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1265 	struct arm_smmu_domain *smmu_domain = NULL;
1266 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1267 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1268 		.prefetch	= {
1269 			.sid	= sid,
1270 		},
1271 	};
1272 
1273 	if (master) {
1274 		smmu_domain = master->domain;
1275 		smmu = master->smmu;
1276 	}
1277 
1278 	if (smmu_domain) {
1279 		switch (smmu_domain->stage) {
1280 		case ARM_SMMU_DOMAIN_S1:
1281 			s1_cfg = &smmu_domain->s1_cfg;
1282 			break;
1283 		case ARM_SMMU_DOMAIN_S2:
1284 		case ARM_SMMU_DOMAIN_NESTED:
1285 			s2_cfg = &smmu_domain->s2_cfg;
1286 			break;
1287 		default:
1288 			break;
1289 		}
1290 	}
1291 
1292 	if (val & STRTAB_STE_0_V) {
1293 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1294 		case STRTAB_STE_0_CFG_BYPASS:
1295 			break;
1296 		case STRTAB_STE_0_CFG_S1_TRANS:
1297 		case STRTAB_STE_0_CFG_S2_TRANS:
1298 			ste_live = true;
1299 			break;
1300 		case STRTAB_STE_0_CFG_ABORT:
1301 			BUG_ON(!disable_bypass);
1302 			break;
1303 		default:
1304 			BUG(); /* STE corruption */
1305 		}
1306 	}
1307 
1308 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1309 	val = STRTAB_STE_0_V;
1310 
1311 	/* Bypass/fault */
1312 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1313 		if (!smmu_domain && disable_bypass)
1314 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1315 		else
1316 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1317 
1318 		dst[0] = cpu_to_le64(val);
1319 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1320 						STRTAB_STE_1_SHCFG_INCOMING));
1321 		dst[2] = 0; /* Nuke the VMID */
1322 		/*
1323 		 * The SMMU can perform negative caching, so we must sync
1324 		 * the STE regardless of whether the old value was live.
1325 		 */
1326 		if (smmu)
1327 			arm_smmu_sync_ste_for_sid(smmu, sid);
1328 		return;
1329 	}
1330 
1331 	if (s1_cfg) {
1332 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1333 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1334 
1335 		BUG_ON(ste_live);
1336 		dst[1] = cpu_to_le64(
1337 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1338 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1339 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1340 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1341 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1342 
1343 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1344 		    !master->stall_enabled)
1345 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1346 
1347 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1348 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1349 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1350 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1351 	}
1352 
1353 	if (s2_cfg) {
1354 		BUG_ON(ste_live);
1355 		dst[2] = cpu_to_le64(
1356 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1357 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1358 #ifdef __BIG_ENDIAN
1359 			 STRTAB_STE_2_S2ENDI |
1360 #endif
1361 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1362 			 STRTAB_STE_2_S2R);
1363 
1364 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1365 
1366 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1367 	}
1368 
1369 	if (master->ats_enabled)
1370 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1371 						 STRTAB_STE_1_EATS_TRANS));
1372 
1373 	arm_smmu_sync_ste_for_sid(smmu, sid);
1374 	/* See comment in arm_smmu_write_ctx_desc() */
1375 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1376 	arm_smmu_sync_ste_for_sid(smmu, sid);
1377 
1378 	/* It's likely that we'll want to use the new STE soon */
1379 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1380 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1381 }
1382 
1383 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
1384 {
1385 	unsigned int i;
1386 	u64 val = STRTAB_STE_0_V;
1387 
1388 	if (disable_bypass && !force)
1389 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1390 	else
1391 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1392 
1393 	for (i = 0; i < nent; ++i) {
1394 		strtab[0] = cpu_to_le64(val);
1395 		strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1396 						   STRTAB_STE_1_SHCFG_INCOMING));
1397 		strtab[2] = 0;
1398 		strtab += STRTAB_STE_DWORDS;
1399 	}
1400 }
1401 
1402 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1403 {
1404 	size_t size;
1405 	void *strtab;
1406 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1407 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1408 
1409 	if (desc->l2ptr)
1410 		return 0;
1411 
1412 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1413 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1414 
1415 	desc->span = STRTAB_SPLIT + 1;
1416 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1417 					  GFP_KERNEL);
1418 	if (!desc->l2ptr) {
1419 		dev_err(smmu->dev,
1420 			"failed to allocate l2 stream table for SID %u\n",
1421 			sid);
1422 		return -ENOMEM;
1423 	}
1424 
1425 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1426 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1427 	return 0;
1428 }
1429 
1430 static struct arm_smmu_master *
1431 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1432 {
1433 	struct rb_node *node;
1434 	struct arm_smmu_stream *stream;
1435 
1436 	lockdep_assert_held(&smmu->streams_mutex);
1437 
1438 	node = smmu->streams.rb_node;
1439 	while (node) {
1440 		stream = rb_entry(node, struct arm_smmu_stream, node);
1441 		if (stream->id < sid)
1442 			node = node->rb_right;
1443 		else if (stream->id > sid)
1444 			node = node->rb_left;
1445 		else
1446 			return stream->master;
1447 	}
1448 
1449 	return NULL;
1450 }
1451 
1452 /* IRQ and event handlers */
1453 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1454 {
1455 	int ret;
1456 	u32 reason;
1457 	u32 perm = 0;
1458 	struct arm_smmu_master *master;
1459 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1460 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1461 	struct iommu_fault_event fault_evt = { };
1462 	struct iommu_fault *flt = &fault_evt.fault;
1463 
1464 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1465 	case EVT_ID_TRANSLATION_FAULT:
1466 		reason = IOMMU_FAULT_REASON_PTE_FETCH;
1467 		break;
1468 	case EVT_ID_ADDR_SIZE_FAULT:
1469 		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1470 		break;
1471 	case EVT_ID_ACCESS_FAULT:
1472 		reason = IOMMU_FAULT_REASON_ACCESS;
1473 		break;
1474 	case EVT_ID_PERMISSION_FAULT:
1475 		reason = IOMMU_FAULT_REASON_PERMISSION;
1476 		break;
1477 	default:
1478 		return -EOPNOTSUPP;
1479 	}
1480 
1481 	/* Stage-2 is always pinned at the moment */
1482 	if (evt[1] & EVTQ_1_S2)
1483 		return -EFAULT;
1484 
1485 	if (evt[1] & EVTQ_1_RnW)
1486 		perm |= IOMMU_FAULT_PERM_READ;
1487 	else
1488 		perm |= IOMMU_FAULT_PERM_WRITE;
1489 
1490 	if (evt[1] & EVTQ_1_InD)
1491 		perm |= IOMMU_FAULT_PERM_EXEC;
1492 
1493 	if (evt[1] & EVTQ_1_PnU)
1494 		perm |= IOMMU_FAULT_PERM_PRIV;
1495 
1496 	if (evt[1] & EVTQ_1_STALL) {
1497 		flt->type = IOMMU_FAULT_PAGE_REQ;
1498 		flt->prm = (struct iommu_fault_page_request) {
1499 			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1500 			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1501 			.perm = perm,
1502 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1503 		};
1504 
1505 		if (ssid_valid) {
1506 			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1507 			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1508 		}
1509 	} else {
1510 		flt->type = IOMMU_FAULT_DMA_UNRECOV;
1511 		flt->event = (struct iommu_fault_unrecoverable) {
1512 			.reason = reason,
1513 			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1514 			.perm = perm,
1515 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1516 		};
1517 
1518 		if (ssid_valid) {
1519 			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1520 			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1521 		}
1522 	}
1523 
1524 	mutex_lock(&smmu->streams_mutex);
1525 	master = arm_smmu_find_master(smmu, sid);
1526 	if (!master) {
1527 		ret = -EINVAL;
1528 		goto out_unlock;
1529 	}
1530 
1531 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1532 	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1533 		/* Nobody cared, abort the access */
1534 		struct iommu_page_response resp = {
1535 			.pasid		= flt->prm.pasid,
1536 			.grpid		= flt->prm.grpid,
1537 			.code		= IOMMU_PAGE_RESP_FAILURE,
1538 		};
1539 		arm_smmu_page_response(master->dev, &fault_evt, &resp);
1540 	}
1541 
1542 out_unlock:
1543 	mutex_unlock(&smmu->streams_mutex);
1544 	return ret;
1545 }
1546 
1547 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1548 {
1549 	int i, ret;
1550 	struct arm_smmu_device *smmu = dev;
1551 	struct arm_smmu_queue *q = &smmu->evtq.q;
1552 	struct arm_smmu_ll_queue *llq = &q->llq;
1553 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1554 				      DEFAULT_RATELIMIT_BURST);
1555 	u64 evt[EVTQ_ENT_DWORDS];
1556 
1557 	do {
1558 		while (!queue_remove_raw(q, evt)) {
1559 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1560 
1561 			ret = arm_smmu_handle_evt(smmu, evt);
1562 			if (!ret || !__ratelimit(&rs))
1563 				continue;
1564 
1565 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1566 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1567 				dev_info(smmu->dev, "\t0x%016llx\n",
1568 					 (unsigned long long)evt[i]);
1569 
1570 			cond_resched();
1571 		}
1572 
1573 		/*
1574 		 * Not much we can do on overflow, so scream and pretend we're
1575 		 * trying harder.
1576 		 */
1577 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1578 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1579 	} while (!queue_empty(llq));
1580 
1581 	/* Sync our overflow flag, as we believe we're up to speed */
1582 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1583 		    Q_IDX(llq, llq->cons);
1584 	return IRQ_HANDLED;
1585 }
1586 
1587 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1588 {
1589 	u32 sid, ssid;
1590 	u16 grpid;
1591 	bool ssv, last;
1592 
1593 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1594 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1595 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1596 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1597 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1598 
1599 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1600 	dev_info(smmu->dev,
1601 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1602 		 sid, ssid, grpid, last ? "L" : "",
1603 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1604 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1605 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1606 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1607 		 evt[1] & PRIQ_1_ADDR_MASK);
1608 
1609 	if (last) {
1610 		struct arm_smmu_cmdq_ent cmd = {
1611 			.opcode			= CMDQ_OP_PRI_RESP,
1612 			.substream_valid	= ssv,
1613 			.pri			= {
1614 				.sid	= sid,
1615 				.ssid	= ssid,
1616 				.grpid	= grpid,
1617 				.resp	= PRI_RESP_DENY,
1618 			},
1619 		};
1620 
1621 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1622 	}
1623 }
1624 
1625 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1626 {
1627 	struct arm_smmu_device *smmu = dev;
1628 	struct arm_smmu_queue *q = &smmu->priq.q;
1629 	struct arm_smmu_ll_queue *llq = &q->llq;
1630 	u64 evt[PRIQ_ENT_DWORDS];
1631 
1632 	do {
1633 		while (!queue_remove_raw(q, evt))
1634 			arm_smmu_handle_ppr(smmu, evt);
1635 
1636 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1637 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1638 	} while (!queue_empty(llq));
1639 
1640 	/* Sync our overflow flag, as we believe we're up to speed */
1641 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1642 		      Q_IDX(llq, llq->cons);
1643 	queue_sync_cons_out(q);
1644 	return IRQ_HANDLED;
1645 }
1646 
1647 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1648 
1649 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1650 {
1651 	u32 gerror, gerrorn, active;
1652 	struct arm_smmu_device *smmu = dev;
1653 
1654 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1655 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1656 
1657 	active = gerror ^ gerrorn;
1658 	if (!(active & GERROR_ERR_MASK))
1659 		return IRQ_NONE; /* No errors pending */
1660 
1661 	dev_warn(smmu->dev,
1662 		 "unexpected global error reported (0x%08x), this could be serious\n",
1663 		 active);
1664 
1665 	if (active & GERROR_SFM_ERR) {
1666 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1667 		arm_smmu_device_disable(smmu);
1668 	}
1669 
1670 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1671 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1672 
1673 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1674 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1675 
1676 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1677 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1678 
1679 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1680 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1681 
1682 	if (active & GERROR_PRIQ_ABT_ERR)
1683 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1684 
1685 	if (active & GERROR_EVTQ_ABT_ERR)
1686 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1687 
1688 	if (active & GERROR_CMDQ_ERR)
1689 		arm_smmu_cmdq_skip_err(smmu);
1690 
1691 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1692 	return IRQ_HANDLED;
1693 }
1694 
1695 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1696 {
1697 	struct arm_smmu_device *smmu = dev;
1698 
1699 	arm_smmu_evtq_thread(irq, dev);
1700 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1701 		arm_smmu_priq_thread(irq, dev);
1702 
1703 	return IRQ_HANDLED;
1704 }
1705 
1706 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1707 {
1708 	arm_smmu_gerror_handler(irq, dev);
1709 	return IRQ_WAKE_THREAD;
1710 }
1711 
1712 static void
1713 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1714 			struct arm_smmu_cmdq_ent *cmd)
1715 {
1716 	size_t log2_span;
1717 	size_t span_mask;
1718 	/* ATC invalidates are always on 4096-bytes pages */
1719 	size_t inval_grain_shift = 12;
1720 	unsigned long page_start, page_end;
1721 
1722 	/*
1723 	 * ATS and PASID:
1724 	 *
1725 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1726 	 * prefix. In that case all ATC entries within the address range are
1727 	 * invalidated, including those that were requested with a PASID! There
1728 	 * is no way to invalidate only entries without PASID.
1729 	 *
1730 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1731 	 * traffic), translation requests without PASID create ATC entries
1732 	 * without PASID, which must be invalidated with substream_valid clear.
1733 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1734 	 * ATC entries within the address range.
1735 	 */
1736 	*cmd = (struct arm_smmu_cmdq_ent) {
1737 		.opcode			= CMDQ_OP_ATC_INV,
1738 		.substream_valid	= !!ssid,
1739 		.atc.ssid		= ssid,
1740 	};
1741 
1742 	if (!size) {
1743 		cmd->atc.size = ATC_INV_SIZE_ALL;
1744 		return;
1745 	}
1746 
1747 	page_start	= iova >> inval_grain_shift;
1748 	page_end	= (iova + size - 1) >> inval_grain_shift;
1749 
1750 	/*
1751 	 * In an ATS Invalidate Request, the address must be aligned on the
1752 	 * range size, which must be a power of two number of page sizes. We
1753 	 * thus have to choose between grossly over-invalidating the region, or
1754 	 * splitting the invalidation into multiple commands. For simplicity
1755 	 * we'll go with the first solution, but should refine it in the future
1756 	 * if multiple commands are shown to be more efficient.
1757 	 *
1758 	 * Find the smallest power of two that covers the range. The most
1759 	 * significant differing bit between the start and end addresses,
1760 	 * fls(start ^ end), indicates the required span. For example:
1761 	 *
1762 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1763 	 *		x = 0b1000 ^ 0b1011 = 0b11
1764 	 *		span = 1 << fls(x) = 4
1765 	 *
1766 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1767 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1768 	 *		span = 1 << fls(x) = 16
1769 	 */
1770 	log2_span	= fls_long(page_start ^ page_end);
1771 	span_mask	= (1ULL << log2_span) - 1;
1772 
1773 	page_start	&= ~span_mask;
1774 
1775 	cmd->atc.addr	= page_start << inval_grain_shift;
1776 	cmd->atc.size	= log2_span;
1777 }
1778 
1779 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1780 {
1781 	int i;
1782 	struct arm_smmu_cmdq_ent cmd;
1783 	struct arm_smmu_cmdq_batch cmds;
1784 
1785 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1786 
1787 	cmds.num = 0;
1788 	for (i = 0; i < master->num_streams; i++) {
1789 		cmd.atc.sid = master->streams[i].id;
1790 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1791 	}
1792 
1793 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1794 }
1795 
1796 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1797 			    unsigned long iova, size_t size)
1798 {
1799 	int i;
1800 	unsigned long flags;
1801 	struct arm_smmu_cmdq_ent cmd;
1802 	struct arm_smmu_master *master;
1803 	struct arm_smmu_cmdq_batch cmds;
1804 
1805 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1806 		return 0;
1807 
1808 	/*
1809 	 * Ensure that we've completed prior invalidation of the main TLBs
1810 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1811 	 * arm_smmu_enable_ats():
1812 	 *
1813 	 *	// unmap()			// arm_smmu_enable_ats()
1814 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1815 	 *	smp_mb();			[...]
1816 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1817 	 *
1818 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1819 	 * ATS was enabled at the PCI device before completion of the TLBI.
1820 	 */
1821 	smp_mb();
1822 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1823 		return 0;
1824 
1825 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1826 
1827 	cmds.num = 0;
1828 
1829 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1830 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1831 		if (!master->ats_enabled)
1832 			continue;
1833 
1834 		for (i = 0; i < master->num_streams; i++) {
1835 			cmd.atc.sid = master->streams[i].id;
1836 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1837 		}
1838 	}
1839 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1840 
1841 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1842 }
1843 
1844 /* IO_PGTABLE API */
1845 static void arm_smmu_tlb_inv_context(void *cookie)
1846 {
1847 	struct arm_smmu_domain *smmu_domain = cookie;
1848 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1849 	struct arm_smmu_cmdq_ent cmd;
1850 
1851 	/*
1852 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1853 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1854 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1855 	 * insertion to guarantee those are observed before the TLBI. Do be
1856 	 * careful, 007.
1857 	 */
1858 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1859 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1860 	} else {
1861 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1862 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1863 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1864 	}
1865 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1866 }
1867 
1868 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1869 				     unsigned long iova, size_t size,
1870 				     size_t granule,
1871 				     struct arm_smmu_domain *smmu_domain)
1872 {
1873 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1874 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1875 	size_t inv_range = granule;
1876 	struct arm_smmu_cmdq_batch cmds;
1877 
1878 	if (!size)
1879 		return;
1880 
1881 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1882 		/* Get the leaf page size */
1883 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1884 
1885 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1886 		cmd->tlbi.tg = (tg - 10) / 2;
1887 
1888 		/* Determine what level the granule is at */
1889 		cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1890 
1891 		num_pages = size >> tg;
1892 	}
1893 
1894 	cmds.num = 0;
1895 
1896 	while (iova < end) {
1897 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1898 			/*
1899 			 * On each iteration of the loop, the range is 5 bits
1900 			 * worth of the aligned size remaining.
1901 			 * The range in pages is:
1902 			 *
1903 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1904 			 */
1905 			unsigned long scale, num;
1906 
1907 			/* Determine the power of 2 multiple number of pages */
1908 			scale = __ffs(num_pages);
1909 			cmd->tlbi.scale = scale;
1910 
1911 			/* Determine how many chunks of 2^scale size we have */
1912 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1913 			cmd->tlbi.num = num - 1;
1914 
1915 			/* range is num * 2^scale * pgsize */
1916 			inv_range = num << (scale + tg);
1917 
1918 			/* Clear out the lower order bits for the next iteration */
1919 			num_pages -= num << scale;
1920 		}
1921 
1922 		cmd->tlbi.addr = iova;
1923 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1924 		iova += inv_range;
1925 	}
1926 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1927 }
1928 
1929 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1930 					  size_t granule, bool leaf,
1931 					  struct arm_smmu_domain *smmu_domain)
1932 {
1933 	struct arm_smmu_cmdq_ent cmd = {
1934 		.tlbi = {
1935 			.leaf	= leaf,
1936 		},
1937 	};
1938 
1939 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1940 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1941 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1942 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1943 	} else {
1944 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1945 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1946 	}
1947 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1948 
1949 	/*
1950 	 * Unfortunately, this can't be leaf-only since we may have
1951 	 * zapped an entire table.
1952 	 */
1953 	arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1954 }
1955 
1956 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1957 				 size_t granule, bool leaf,
1958 				 struct arm_smmu_domain *smmu_domain)
1959 {
1960 	struct arm_smmu_cmdq_ent cmd = {
1961 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1962 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1963 		.tlbi = {
1964 			.asid	= asid,
1965 			.leaf	= leaf,
1966 		},
1967 	};
1968 
1969 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1970 }
1971 
1972 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1973 					 unsigned long iova, size_t granule,
1974 					 void *cookie)
1975 {
1976 	struct arm_smmu_domain *smmu_domain = cookie;
1977 	struct iommu_domain *domain = &smmu_domain->domain;
1978 
1979 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1980 }
1981 
1982 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1983 				  size_t granule, void *cookie)
1984 {
1985 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1986 }
1987 
1988 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1989 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1990 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1991 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1992 };
1993 
1994 /* IOMMU API */
1995 static bool arm_smmu_capable(enum iommu_cap cap)
1996 {
1997 	switch (cap) {
1998 	case IOMMU_CAP_CACHE_COHERENCY:
1999 		return true;
2000 	case IOMMU_CAP_NOEXEC:
2001 		return true;
2002 	default:
2003 		return false;
2004 	}
2005 }
2006 
2007 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2008 {
2009 	struct arm_smmu_domain *smmu_domain;
2010 
2011 	if (type != IOMMU_DOMAIN_UNMANAGED &&
2012 	    type != IOMMU_DOMAIN_DMA &&
2013 	    type != IOMMU_DOMAIN_DMA_FQ &&
2014 	    type != IOMMU_DOMAIN_IDENTITY)
2015 		return NULL;
2016 
2017 	/*
2018 	 * Allocate the domain and initialise some of its data structures.
2019 	 * We can't really do anything meaningful until we've added a
2020 	 * master.
2021 	 */
2022 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2023 	if (!smmu_domain)
2024 		return NULL;
2025 
2026 	mutex_init(&smmu_domain->init_mutex);
2027 	INIT_LIST_HEAD(&smmu_domain->devices);
2028 	spin_lock_init(&smmu_domain->devices_lock);
2029 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2030 
2031 	return &smmu_domain->domain;
2032 }
2033 
2034 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2035 {
2036 	int idx, size = 1 << span;
2037 
2038 	do {
2039 		idx = find_first_zero_bit(map, size);
2040 		if (idx == size)
2041 			return -ENOSPC;
2042 	} while (test_and_set_bit(idx, map));
2043 
2044 	return idx;
2045 }
2046 
2047 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2048 {
2049 	clear_bit(idx, map);
2050 }
2051 
2052 static void arm_smmu_domain_free(struct iommu_domain *domain)
2053 {
2054 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2055 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2056 
2057 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2058 
2059 	/* Free the CD and ASID, if we allocated them */
2060 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2061 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2062 
2063 		/* Prevent SVA from touching the CD while we're freeing it */
2064 		mutex_lock(&arm_smmu_asid_lock);
2065 		if (cfg->cdcfg.cdtab)
2066 			arm_smmu_free_cd_tables(smmu_domain);
2067 		arm_smmu_free_asid(&cfg->cd);
2068 		mutex_unlock(&arm_smmu_asid_lock);
2069 	} else {
2070 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2071 		if (cfg->vmid)
2072 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2073 	}
2074 
2075 	kfree(smmu_domain);
2076 }
2077 
2078 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2079 				       struct arm_smmu_master *master,
2080 				       struct io_pgtable_cfg *pgtbl_cfg)
2081 {
2082 	int ret;
2083 	u32 asid;
2084 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2085 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2086 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2087 
2088 	refcount_set(&cfg->cd.refs, 1);
2089 
2090 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2091 	mutex_lock(&arm_smmu_asid_lock);
2092 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2093 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2094 	if (ret)
2095 		goto out_unlock;
2096 
2097 	cfg->s1cdmax = master->ssid_bits;
2098 
2099 	smmu_domain->stall_enabled = master->stall_enabled;
2100 
2101 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2102 	if (ret)
2103 		goto out_free_asid;
2104 
2105 	cfg->cd.asid	= (u16)asid;
2106 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2107 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2108 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2109 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2110 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2111 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2112 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2113 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2114 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2115 
2116 	/*
2117 	 * Note that this will end up calling arm_smmu_sync_cd() before
2118 	 * the master has been added to the devices list for this domain.
2119 	 * This isn't an issue because the STE hasn't been installed yet.
2120 	 */
2121 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2122 	if (ret)
2123 		goto out_free_cd_tables;
2124 
2125 	mutex_unlock(&arm_smmu_asid_lock);
2126 	return 0;
2127 
2128 out_free_cd_tables:
2129 	arm_smmu_free_cd_tables(smmu_domain);
2130 out_free_asid:
2131 	arm_smmu_free_asid(&cfg->cd);
2132 out_unlock:
2133 	mutex_unlock(&arm_smmu_asid_lock);
2134 	return ret;
2135 }
2136 
2137 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2138 				       struct arm_smmu_master *master,
2139 				       struct io_pgtable_cfg *pgtbl_cfg)
2140 {
2141 	int vmid;
2142 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2143 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2144 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2145 
2146 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2147 	if (vmid < 0)
2148 		return vmid;
2149 
2150 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2151 	cfg->vmid	= (u16)vmid;
2152 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2153 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2154 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2155 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2156 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2157 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2158 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2159 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2160 	return 0;
2161 }
2162 
2163 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2164 				    struct arm_smmu_master *master)
2165 {
2166 	int ret;
2167 	unsigned long ias, oas;
2168 	enum io_pgtable_fmt fmt;
2169 	struct io_pgtable_cfg pgtbl_cfg;
2170 	struct io_pgtable_ops *pgtbl_ops;
2171 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2172 				 struct arm_smmu_master *,
2173 				 struct io_pgtable_cfg *);
2174 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2175 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2176 
2177 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2178 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2179 		return 0;
2180 	}
2181 
2182 	/* Restrict the stage to what we can actually support */
2183 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2184 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2185 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2186 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2187 
2188 	switch (smmu_domain->stage) {
2189 	case ARM_SMMU_DOMAIN_S1:
2190 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2191 		ias = min_t(unsigned long, ias, VA_BITS);
2192 		oas = smmu->ias;
2193 		fmt = ARM_64_LPAE_S1;
2194 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2195 		break;
2196 	case ARM_SMMU_DOMAIN_NESTED:
2197 	case ARM_SMMU_DOMAIN_S2:
2198 		ias = smmu->ias;
2199 		oas = smmu->oas;
2200 		fmt = ARM_64_LPAE_S2;
2201 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2202 		break;
2203 	default:
2204 		return -EINVAL;
2205 	}
2206 
2207 	pgtbl_cfg = (struct io_pgtable_cfg) {
2208 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2209 		.ias		= ias,
2210 		.oas		= oas,
2211 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2212 		.tlb		= &arm_smmu_flush_ops,
2213 		.iommu_dev	= smmu->dev,
2214 	};
2215 
2216 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2217 	if (!pgtbl_ops)
2218 		return -ENOMEM;
2219 
2220 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2221 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2222 	domain->geometry.force_aperture = true;
2223 
2224 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2225 	if (ret < 0) {
2226 		free_io_pgtable_ops(pgtbl_ops);
2227 		return ret;
2228 	}
2229 
2230 	smmu_domain->pgtbl_ops = pgtbl_ops;
2231 	return 0;
2232 }
2233 
2234 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2235 {
2236 	__le64 *step;
2237 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2238 
2239 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2240 		struct arm_smmu_strtab_l1_desc *l1_desc;
2241 		int idx;
2242 
2243 		/* Two-level walk */
2244 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2245 		l1_desc = &cfg->l1_desc[idx];
2246 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2247 		step = &l1_desc->l2ptr[idx];
2248 	} else {
2249 		/* Simple linear lookup */
2250 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2251 	}
2252 
2253 	return step;
2254 }
2255 
2256 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2257 {
2258 	int i, j;
2259 	struct arm_smmu_device *smmu = master->smmu;
2260 
2261 	for (i = 0; i < master->num_streams; ++i) {
2262 		u32 sid = master->streams[i].id;
2263 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2264 
2265 		/* Bridged PCI devices may end up with duplicated IDs */
2266 		for (j = 0; j < i; j++)
2267 			if (master->streams[j].id == sid)
2268 				break;
2269 		if (j < i)
2270 			continue;
2271 
2272 		arm_smmu_write_strtab_ent(master, sid, step);
2273 	}
2274 }
2275 
2276 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2277 {
2278 	struct device *dev = master->dev;
2279 	struct arm_smmu_device *smmu = master->smmu;
2280 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2281 
2282 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2283 		return false;
2284 
2285 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2286 		return false;
2287 
2288 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2289 }
2290 
2291 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2292 {
2293 	size_t stu;
2294 	struct pci_dev *pdev;
2295 	struct arm_smmu_device *smmu = master->smmu;
2296 	struct arm_smmu_domain *smmu_domain = master->domain;
2297 
2298 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2299 	if (!master->ats_enabled)
2300 		return;
2301 
2302 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2303 	stu = __ffs(smmu->pgsize_bitmap);
2304 	pdev = to_pci_dev(master->dev);
2305 
2306 	atomic_inc(&smmu_domain->nr_ats_masters);
2307 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2308 	if (pci_enable_ats(pdev, stu))
2309 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2310 }
2311 
2312 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2313 {
2314 	struct arm_smmu_domain *smmu_domain = master->domain;
2315 
2316 	if (!master->ats_enabled)
2317 		return;
2318 
2319 	pci_disable_ats(to_pci_dev(master->dev));
2320 	/*
2321 	 * Ensure ATS is disabled at the endpoint before we issue the
2322 	 * ATC invalidation via the SMMU.
2323 	 */
2324 	wmb();
2325 	arm_smmu_atc_inv_master(master);
2326 	atomic_dec(&smmu_domain->nr_ats_masters);
2327 }
2328 
2329 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2330 {
2331 	int ret;
2332 	int features;
2333 	int num_pasids;
2334 	struct pci_dev *pdev;
2335 
2336 	if (!dev_is_pci(master->dev))
2337 		return -ENODEV;
2338 
2339 	pdev = to_pci_dev(master->dev);
2340 
2341 	features = pci_pasid_features(pdev);
2342 	if (features < 0)
2343 		return features;
2344 
2345 	num_pasids = pci_max_pasids(pdev);
2346 	if (num_pasids <= 0)
2347 		return num_pasids;
2348 
2349 	ret = pci_enable_pasid(pdev, features);
2350 	if (ret) {
2351 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2352 		return ret;
2353 	}
2354 
2355 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2356 				  master->smmu->ssid_bits);
2357 	return 0;
2358 }
2359 
2360 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2361 {
2362 	struct pci_dev *pdev;
2363 
2364 	if (!dev_is_pci(master->dev))
2365 		return;
2366 
2367 	pdev = to_pci_dev(master->dev);
2368 
2369 	if (!pdev->pasid_enabled)
2370 		return;
2371 
2372 	master->ssid_bits = 0;
2373 	pci_disable_pasid(pdev);
2374 }
2375 
2376 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2377 {
2378 	unsigned long flags;
2379 	struct arm_smmu_domain *smmu_domain = master->domain;
2380 
2381 	if (!smmu_domain)
2382 		return;
2383 
2384 	arm_smmu_disable_ats(master);
2385 
2386 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2387 	list_del(&master->domain_head);
2388 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2389 
2390 	master->domain = NULL;
2391 	master->ats_enabled = false;
2392 	arm_smmu_install_ste_for_dev(master);
2393 }
2394 
2395 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2396 {
2397 	int ret = 0;
2398 	unsigned long flags;
2399 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2400 	struct arm_smmu_device *smmu;
2401 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2402 	struct arm_smmu_master *master;
2403 
2404 	if (!fwspec)
2405 		return -ENOENT;
2406 
2407 	master = dev_iommu_priv_get(dev);
2408 	smmu = master->smmu;
2409 
2410 	/*
2411 	 * Checking that SVA is disabled ensures that this device isn't bound to
2412 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2413 	 * be removed concurrently since we're holding the group mutex.
2414 	 */
2415 	if (arm_smmu_master_sva_enabled(master)) {
2416 		dev_err(dev, "cannot attach - SVA enabled\n");
2417 		return -EBUSY;
2418 	}
2419 
2420 	arm_smmu_detach_dev(master);
2421 
2422 	mutex_lock(&smmu_domain->init_mutex);
2423 
2424 	if (!smmu_domain->smmu) {
2425 		smmu_domain->smmu = smmu;
2426 		ret = arm_smmu_domain_finalise(domain, master);
2427 		if (ret) {
2428 			smmu_domain->smmu = NULL;
2429 			goto out_unlock;
2430 		}
2431 	} else if (smmu_domain->smmu != smmu) {
2432 		dev_err(dev,
2433 			"cannot attach to SMMU %s (upstream of %s)\n",
2434 			dev_name(smmu_domain->smmu->dev),
2435 			dev_name(smmu->dev));
2436 		ret = -ENXIO;
2437 		goto out_unlock;
2438 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2439 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2440 		dev_err(dev,
2441 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2442 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2443 		ret = -EINVAL;
2444 		goto out_unlock;
2445 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2446 		   smmu_domain->stall_enabled != master->stall_enabled) {
2447 		dev_err(dev, "cannot attach to stall-%s domain\n",
2448 			smmu_domain->stall_enabled ? "enabled" : "disabled");
2449 		ret = -EINVAL;
2450 		goto out_unlock;
2451 	}
2452 
2453 	master->domain = smmu_domain;
2454 
2455 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2456 		master->ats_enabled = arm_smmu_ats_supported(master);
2457 
2458 	arm_smmu_install_ste_for_dev(master);
2459 
2460 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2461 	list_add(&master->domain_head, &smmu_domain->devices);
2462 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2463 
2464 	arm_smmu_enable_ats(master);
2465 
2466 out_unlock:
2467 	mutex_unlock(&smmu_domain->init_mutex);
2468 	return ret;
2469 }
2470 
2471 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2472 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2473 			      int prot, gfp_t gfp, size_t *mapped)
2474 {
2475 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2476 
2477 	if (!ops)
2478 		return -ENODEV;
2479 
2480 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2481 }
2482 
2483 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2484 				   size_t pgsize, size_t pgcount,
2485 				   struct iommu_iotlb_gather *gather)
2486 {
2487 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2488 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2489 
2490 	if (!ops)
2491 		return 0;
2492 
2493 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2494 }
2495 
2496 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2497 {
2498 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2499 
2500 	if (smmu_domain->smmu)
2501 		arm_smmu_tlb_inv_context(smmu_domain);
2502 }
2503 
2504 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2505 				struct iommu_iotlb_gather *gather)
2506 {
2507 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2508 
2509 	if (!gather->pgsize)
2510 		return;
2511 
2512 	arm_smmu_tlb_inv_range_domain(gather->start,
2513 				      gather->end - gather->start + 1,
2514 				      gather->pgsize, true, smmu_domain);
2515 }
2516 
2517 static phys_addr_t
2518 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2519 {
2520 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2521 
2522 	if (!ops)
2523 		return 0;
2524 
2525 	return ops->iova_to_phys(ops, iova);
2526 }
2527 
2528 static struct platform_driver arm_smmu_driver;
2529 
2530 static
2531 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2532 {
2533 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2534 							  fwnode);
2535 	put_device(dev);
2536 	return dev ? dev_get_drvdata(dev) : NULL;
2537 }
2538 
2539 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2540 {
2541 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2542 
2543 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2544 		limit *= 1UL << STRTAB_SPLIT;
2545 
2546 	return sid < limit;
2547 }
2548 
2549 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2550 {
2551 	/* Check the SIDs are in range of the SMMU and our stream table */
2552 	if (!arm_smmu_sid_in_range(smmu, sid))
2553 		return -ERANGE;
2554 
2555 	/* Ensure l2 strtab is initialised */
2556 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2557 		return arm_smmu_init_l2_strtab(smmu, sid);
2558 
2559 	return 0;
2560 }
2561 
2562 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2563 				  struct arm_smmu_master *master)
2564 {
2565 	int i;
2566 	int ret = 0;
2567 	struct arm_smmu_stream *new_stream, *cur_stream;
2568 	struct rb_node **new_node, *parent_node = NULL;
2569 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2570 
2571 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2572 				  GFP_KERNEL);
2573 	if (!master->streams)
2574 		return -ENOMEM;
2575 	master->num_streams = fwspec->num_ids;
2576 
2577 	mutex_lock(&smmu->streams_mutex);
2578 	for (i = 0; i < fwspec->num_ids; i++) {
2579 		u32 sid = fwspec->ids[i];
2580 
2581 		new_stream = &master->streams[i];
2582 		new_stream->id = sid;
2583 		new_stream->master = master;
2584 
2585 		ret = arm_smmu_init_sid_strtab(smmu, sid);
2586 		if (ret)
2587 			break;
2588 
2589 		/* Insert into SID tree */
2590 		new_node = &(smmu->streams.rb_node);
2591 		while (*new_node) {
2592 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2593 					      node);
2594 			parent_node = *new_node;
2595 			if (cur_stream->id > new_stream->id) {
2596 				new_node = &((*new_node)->rb_left);
2597 			} else if (cur_stream->id < new_stream->id) {
2598 				new_node = &((*new_node)->rb_right);
2599 			} else {
2600 				dev_warn(master->dev,
2601 					 "stream %u already in tree\n",
2602 					 cur_stream->id);
2603 				ret = -EINVAL;
2604 				break;
2605 			}
2606 		}
2607 		if (ret)
2608 			break;
2609 
2610 		rb_link_node(&new_stream->node, parent_node, new_node);
2611 		rb_insert_color(&new_stream->node, &smmu->streams);
2612 	}
2613 
2614 	if (ret) {
2615 		for (i--; i >= 0; i--)
2616 			rb_erase(&master->streams[i].node, &smmu->streams);
2617 		kfree(master->streams);
2618 	}
2619 	mutex_unlock(&smmu->streams_mutex);
2620 
2621 	return ret;
2622 }
2623 
2624 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2625 {
2626 	int i;
2627 	struct arm_smmu_device *smmu = master->smmu;
2628 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2629 
2630 	if (!smmu || !master->streams)
2631 		return;
2632 
2633 	mutex_lock(&smmu->streams_mutex);
2634 	for (i = 0; i < fwspec->num_ids; i++)
2635 		rb_erase(&master->streams[i].node, &smmu->streams);
2636 	mutex_unlock(&smmu->streams_mutex);
2637 
2638 	kfree(master->streams);
2639 }
2640 
2641 static struct iommu_ops arm_smmu_ops;
2642 
2643 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2644 {
2645 	int ret;
2646 	struct arm_smmu_device *smmu;
2647 	struct arm_smmu_master *master;
2648 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2649 
2650 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2651 		return ERR_PTR(-ENODEV);
2652 
2653 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2654 		return ERR_PTR(-EBUSY);
2655 
2656 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2657 	if (!smmu)
2658 		return ERR_PTR(-ENODEV);
2659 
2660 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2661 	if (!master)
2662 		return ERR_PTR(-ENOMEM);
2663 
2664 	master->dev = dev;
2665 	master->smmu = smmu;
2666 	INIT_LIST_HEAD(&master->bonds);
2667 	dev_iommu_priv_set(dev, master);
2668 
2669 	ret = arm_smmu_insert_master(smmu, master);
2670 	if (ret)
2671 		goto err_free_master;
2672 
2673 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2674 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2675 
2676 	/*
2677 	 * Note that PASID must be enabled before, and disabled after ATS:
2678 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2679 	 *
2680 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2681 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2682 	 *   are changed.
2683 	 */
2684 	arm_smmu_enable_pasid(master);
2685 
2686 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2687 		master->ssid_bits = min_t(u8, master->ssid_bits,
2688 					  CTXDESC_LINEAR_CDMAX);
2689 
2690 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2691 	     device_property_read_bool(dev, "dma-can-stall")) ||
2692 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2693 		master->stall_enabled = true;
2694 
2695 	return &smmu->iommu;
2696 
2697 err_free_master:
2698 	kfree(master);
2699 	dev_iommu_priv_set(dev, NULL);
2700 	return ERR_PTR(ret);
2701 }
2702 
2703 static void arm_smmu_release_device(struct device *dev)
2704 {
2705 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2706 
2707 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2708 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2709 	arm_smmu_detach_dev(master);
2710 	arm_smmu_disable_pasid(master);
2711 	arm_smmu_remove_master(master);
2712 	kfree(master);
2713 }
2714 
2715 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2716 {
2717 	struct iommu_group *group;
2718 
2719 	/*
2720 	 * We don't support devices sharing stream IDs other than PCI RID
2721 	 * aliases, since the necessary ID-to-device lookup becomes rather
2722 	 * impractical given a potential sparse 32-bit stream ID space.
2723 	 */
2724 	if (dev_is_pci(dev))
2725 		group = pci_device_group(dev);
2726 	else
2727 		group = generic_device_group(dev);
2728 
2729 	return group;
2730 }
2731 
2732 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2733 {
2734 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2735 	int ret = 0;
2736 
2737 	mutex_lock(&smmu_domain->init_mutex);
2738 	if (smmu_domain->smmu)
2739 		ret = -EPERM;
2740 	else
2741 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2742 	mutex_unlock(&smmu_domain->init_mutex);
2743 
2744 	return ret;
2745 }
2746 
2747 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2748 {
2749 	return iommu_fwspec_add_ids(dev, args->args, 1);
2750 }
2751 
2752 static void arm_smmu_get_resv_regions(struct device *dev,
2753 				      struct list_head *head)
2754 {
2755 	struct iommu_resv_region *region;
2756 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2757 
2758 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2759 					 prot, IOMMU_RESV_SW_MSI);
2760 	if (!region)
2761 		return;
2762 
2763 	list_add_tail(&region->list, head);
2764 
2765 	iommu_dma_get_resv_regions(dev, head);
2766 }
2767 
2768 static int arm_smmu_dev_enable_feature(struct device *dev,
2769 				       enum iommu_dev_features feat)
2770 {
2771 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2772 
2773 	if (!master)
2774 		return -ENODEV;
2775 
2776 	switch (feat) {
2777 	case IOMMU_DEV_FEAT_IOPF:
2778 		if (!arm_smmu_master_iopf_supported(master))
2779 			return -EINVAL;
2780 		if (master->iopf_enabled)
2781 			return -EBUSY;
2782 		master->iopf_enabled = true;
2783 		return 0;
2784 	case IOMMU_DEV_FEAT_SVA:
2785 		if (!arm_smmu_master_sva_supported(master))
2786 			return -EINVAL;
2787 		if (arm_smmu_master_sva_enabled(master))
2788 			return -EBUSY;
2789 		return arm_smmu_master_enable_sva(master);
2790 	default:
2791 		return -EINVAL;
2792 	}
2793 }
2794 
2795 static int arm_smmu_dev_disable_feature(struct device *dev,
2796 					enum iommu_dev_features feat)
2797 {
2798 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2799 
2800 	if (!master)
2801 		return -EINVAL;
2802 
2803 	switch (feat) {
2804 	case IOMMU_DEV_FEAT_IOPF:
2805 		if (!master->iopf_enabled)
2806 			return -EINVAL;
2807 		if (master->sva_enabled)
2808 			return -EBUSY;
2809 		master->iopf_enabled = false;
2810 		return 0;
2811 	case IOMMU_DEV_FEAT_SVA:
2812 		if (!arm_smmu_master_sva_enabled(master))
2813 			return -EINVAL;
2814 		return arm_smmu_master_disable_sva(master);
2815 	default:
2816 		return -EINVAL;
2817 	}
2818 }
2819 
2820 static struct iommu_ops arm_smmu_ops = {
2821 	.capable		= arm_smmu_capable,
2822 	.domain_alloc		= arm_smmu_domain_alloc,
2823 	.probe_device		= arm_smmu_probe_device,
2824 	.release_device		= arm_smmu_release_device,
2825 	.device_group		= arm_smmu_device_group,
2826 	.of_xlate		= arm_smmu_of_xlate,
2827 	.get_resv_regions	= arm_smmu_get_resv_regions,
2828 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2829 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2830 	.sva_bind		= arm_smmu_sva_bind,
2831 	.sva_unbind		= arm_smmu_sva_unbind,
2832 	.sva_get_pasid		= arm_smmu_sva_get_pasid,
2833 	.page_response		= arm_smmu_page_response,
2834 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2835 	.owner			= THIS_MODULE,
2836 	.default_domain_ops = &(const struct iommu_domain_ops) {
2837 		.attach_dev		= arm_smmu_attach_dev,
2838 		.map_pages		= arm_smmu_map_pages,
2839 		.unmap_pages		= arm_smmu_unmap_pages,
2840 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2841 		.iotlb_sync		= arm_smmu_iotlb_sync,
2842 		.iova_to_phys		= arm_smmu_iova_to_phys,
2843 		.enable_nesting		= arm_smmu_enable_nesting,
2844 		.free			= arm_smmu_domain_free,
2845 	}
2846 };
2847 
2848 /* Probing and initialisation functions */
2849 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2850 				   struct arm_smmu_queue *q,
2851 				   void __iomem *page,
2852 				   unsigned long prod_off,
2853 				   unsigned long cons_off,
2854 				   size_t dwords, const char *name)
2855 {
2856 	size_t qsz;
2857 
2858 	do {
2859 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2860 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2861 					      GFP_KERNEL);
2862 		if (q->base || qsz < PAGE_SIZE)
2863 			break;
2864 
2865 		q->llq.max_n_shift--;
2866 	} while (1);
2867 
2868 	if (!q->base) {
2869 		dev_err(smmu->dev,
2870 			"failed to allocate queue (0x%zx bytes) for %s\n",
2871 			qsz, name);
2872 		return -ENOMEM;
2873 	}
2874 
2875 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2876 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2877 			 1 << q->llq.max_n_shift, name);
2878 	}
2879 
2880 	q->prod_reg	= page + prod_off;
2881 	q->cons_reg	= page + cons_off;
2882 	q->ent_dwords	= dwords;
2883 
2884 	q->q_base  = Q_BASE_RWA;
2885 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2886 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2887 
2888 	q->llq.prod = q->llq.cons = 0;
2889 	return 0;
2890 }
2891 
2892 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2893 {
2894 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2895 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2896 
2897 	atomic_set(&cmdq->owner_prod, 0);
2898 	atomic_set(&cmdq->lock, 0);
2899 
2900 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2901 							      GFP_KERNEL);
2902 	if (!cmdq->valid_map)
2903 		return -ENOMEM;
2904 
2905 	return 0;
2906 }
2907 
2908 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2909 {
2910 	int ret;
2911 
2912 	/* cmdq */
2913 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2914 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2915 				      CMDQ_ENT_DWORDS, "cmdq");
2916 	if (ret)
2917 		return ret;
2918 
2919 	ret = arm_smmu_cmdq_init(smmu);
2920 	if (ret)
2921 		return ret;
2922 
2923 	/* evtq */
2924 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2925 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2926 				      EVTQ_ENT_DWORDS, "evtq");
2927 	if (ret)
2928 		return ret;
2929 
2930 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2931 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2932 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2933 		if (!smmu->evtq.iopf)
2934 			return -ENOMEM;
2935 	}
2936 
2937 	/* priq */
2938 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2939 		return 0;
2940 
2941 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2942 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2943 				       PRIQ_ENT_DWORDS, "priq");
2944 }
2945 
2946 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2947 {
2948 	unsigned int i;
2949 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2950 	void *strtab = smmu->strtab_cfg.strtab;
2951 
2952 	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
2953 				    sizeof(*cfg->l1_desc), GFP_KERNEL);
2954 	if (!cfg->l1_desc)
2955 		return -ENOMEM;
2956 
2957 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2958 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2959 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2960 	}
2961 
2962 	return 0;
2963 }
2964 
2965 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2966 {
2967 	void *strtab;
2968 	u64 reg;
2969 	u32 size, l1size;
2970 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2971 
2972 	/* Calculate the L1 size, capped to the SIDSIZE. */
2973 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2974 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2975 	cfg->num_l1_ents = 1 << size;
2976 
2977 	size += STRTAB_SPLIT;
2978 	if (size < smmu->sid_bits)
2979 		dev_warn(smmu->dev,
2980 			 "2-level strtab only covers %u/%u bits of SID\n",
2981 			 size, smmu->sid_bits);
2982 
2983 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2984 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2985 				     GFP_KERNEL);
2986 	if (!strtab) {
2987 		dev_err(smmu->dev,
2988 			"failed to allocate l1 stream table (%u bytes)\n",
2989 			l1size);
2990 		return -ENOMEM;
2991 	}
2992 	cfg->strtab = strtab;
2993 
2994 	/* Configure strtab_base_cfg for 2 levels */
2995 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2996 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2997 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2998 	cfg->strtab_base_cfg = reg;
2999 
3000 	return arm_smmu_init_l1_strtab(smmu);
3001 }
3002 
3003 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3004 {
3005 	void *strtab;
3006 	u64 reg;
3007 	u32 size;
3008 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3009 
3010 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3011 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3012 				     GFP_KERNEL);
3013 	if (!strtab) {
3014 		dev_err(smmu->dev,
3015 			"failed to allocate linear stream table (%u bytes)\n",
3016 			size);
3017 		return -ENOMEM;
3018 	}
3019 	cfg->strtab = strtab;
3020 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3021 
3022 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3023 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3024 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3025 	cfg->strtab_base_cfg = reg;
3026 
3027 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3028 	return 0;
3029 }
3030 
3031 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3032 {
3033 	u64 reg;
3034 	int ret;
3035 
3036 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3037 		ret = arm_smmu_init_strtab_2lvl(smmu);
3038 	else
3039 		ret = arm_smmu_init_strtab_linear(smmu);
3040 
3041 	if (ret)
3042 		return ret;
3043 
3044 	/* Set the strtab base address */
3045 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3046 	reg |= STRTAB_BASE_RA;
3047 	smmu->strtab_cfg.strtab_base = reg;
3048 
3049 	/* Allocate the first VMID for stage-2 bypass STEs */
3050 	set_bit(0, smmu->vmid_map);
3051 	return 0;
3052 }
3053 
3054 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3055 {
3056 	int ret;
3057 
3058 	mutex_init(&smmu->streams_mutex);
3059 	smmu->streams = RB_ROOT;
3060 
3061 	ret = arm_smmu_init_queues(smmu);
3062 	if (ret)
3063 		return ret;
3064 
3065 	return arm_smmu_init_strtab(smmu);
3066 }
3067 
3068 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3069 				   unsigned int reg_off, unsigned int ack_off)
3070 {
3071 	u32 reg;
3072 
3073 	writel_relaxed(val, smmu->base + reg_off);
3074 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3075 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3076 }
3077 
3078 /* GBPA is "special" */
3079 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3080 {
3081 	int ret;
3082 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3083 
3084 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3085 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3086 	if (ret)
3087 		return ret;
3088 
3089 	reg &= ~clr;
3090 	reg |= set;
3091 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3092 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3093 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3094 
3095 	if (ret)
3096 		dev_err(smmu->dev, "GBPA not responding to update\n");
3097 	return ret;
3098 }
3099 
3100 static void arm_smmu_free_msis(void *data)
3101 {
3102 	struct device *dev = data;
3103 	platform_msi_domain_free_irqs(dev);
3104 }
3105 
3106 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3107 {
3108 	phys_addr_t doorbell;
3109 	struct device *dev = msi_desc_to_dev(desc);
3110 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3111 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3112 
3113 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3114 	doorbell &= MSI_CFG0_ADDR_MASK;
3115 
3116 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3117 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3118 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3119 }
3120 
3121 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3122 {
3123 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3124 	struct device *dev = smmu->dev;
3125 
3126 	/* Clear the MSI address regs */
3127 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3128 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3129 
3130 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3131 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3132 	else
3133 		nvec--;
3134 
3135 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3136 		return;
3137 
3138 	if (!dev->msi.domain) {
3139 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3140 		return;
3141 	}
3142 
3143 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3144 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3145 	if (ret) {
3146 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3147 		return;
3148 	}
3149 
3150 	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3151 	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3152 	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3153 
3154 	/* Add callback to free MSIs on teardown */
3155 	devm_add_action(dev, arm_smmu_free_msis, dev);
3156 }
3157 
3158 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3159 {
3160 	int irq, ret;
3161 
3162 	arm_smmu_setup_msis(smmu);
3163 
3164 	/* Request interrupt lines */
3165 	irq = smmu->evtq.q.irq;
3166 	if (irq) {
3167 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3168 						arm_smmu_evtq_thread,
3169 						IRQF_ONESHOT,
3170 						"arm-smmu-v3-evtq", smmu);
3171 		if (ret < 0)
3172 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3173 	} else {
3174 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3175 	}
3176 
3177 	irq = smmu->gerr_irq;
3178 	if (irq) {
3179 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3180 				       0, "arm-smmu-v3-gerror", smmu);
3181 		if (ret < 0)
3182 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3183 	} else {
3184 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3185 	}
3186 
3187 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3188 		irq = smmu->priq.q.irq;
3189 		if (irq) {
3190 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3191 							arm_smmu_priq_thread,
3192 							IRQF_ONESHOT,
3193 							"arm-smmu-v3-priq",
3194 							smmu);
3195 			if (ret < 0)
3196 				dev_warn(smmu->dev,
3197 					 "failed to enable priq irq\n");
3198 		} else {
3199 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3200 		}
3201 	}
3202 }
3203 
3204 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3205 {
3206 	int ret, irq;
3207 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3208 
3209 	/* Disable IRQs first */
3210 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3211 				      ARM_SMMU_IRQ_CTRLACK);
3212 	if (ret) {
3213 		dev_err(smmu->dev, "failed to disable irqs\n");
3214 		return ret;
3215 	}
3216 
3217 	irq = smmu->combined_irq;
3218 	if (irq) {
3219 		/*
3220 		 * Cavium ThunderX2 implementation doesn't support unique irq
3221 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3222 		 */
3223 		ret = devm_request_threaded_irq(smmu->dev, irq,
3224 					arm_smmu_combined_irq_handler,
3225 					arm_smmu_combined_irq_thread,
3226 					IRQF_ONESHOT,
3227 					"arm-smmu-v3-combined-irq", smmu);
3228 		if (ret < 0)
3229 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3230 	} else
3231 		arm_smmu_setup_unique_irqs(smmu);
3232 
3233 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3234 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3235 
3236 	/* Enable interrupt generation on the SMMU */
3237 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3238 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3239 	if (ret)
3240 		dev_warn(smmu->dev, "failed to enable irqs\n");
3241 
3242 	return 0;
3243 }
3244 
3245 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3246 {
3247 	int ret;
3248 
3249 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3250 	if (ret)
3251 		dev_err(smmu->dev, "failed to clear cr0\n");
3252 
3253 	return ret;
3254 }
3255 
3256 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3257 {
3258 	int ret;
3259 	u32 reg, enables;
3260 	struct arm_smmu_cmdq_ent cmd;
3261 
3262 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3263 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3264 	if (reg & CR0_SMMUEN) {
3265 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3266 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3267 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3268 	}
3269 
3270 	ret = arm_smmu_device_disable(smmu);
3271 	if (ret)
3272 		return ret;
3273 
3274 	/* CR1 (table and queue memory attributes) */
3275 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3276 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3277 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3278 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3279 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3280 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3281 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3282 
3283 	/* CR2 (random crap) */
3284 	reg = CR2_PTM | CR2_RECINVSID;
3285 
3286 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3287 		reg |= CR2_E2H;
3288 
3289 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3290 
3291 	/* Stream table */
3292 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3293 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3294 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3295 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3296 
3297 	/* Command queue */
3298 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3299 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3300 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3301 
3302 	enables = CR0_CMDQEN;
3303 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3304 				      ARM_SMMU_CR0ACK);
3305 	if (ret) {
3306 		dev_err(smmu->dev, "failed to enable command queue\n");
3307 		return ret;
3308 	}
3309 
3310 	/* Invalidate any cached configuration */
3311 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3312 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3313 
3314 	/* Invalidate any stale TLB entries */
3315 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3316 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3317 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3318 	}
3319 
3320 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3321 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3322 
3323 	/* Event queue */
3324 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3325 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3326 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3327 
3328 	enables |= CR0_EVTQEN;
3329 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3330 				      ARM_SMMU_CR0ACK);
3331 	if (ret) {
3332 		dev_err(smmu->dev, "failed to enable event queue\n");
3333 		return ret;
3334 	}
3335 
3336 	/* PRI queue */
3337 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3338 		writeq_relaxed(smmu->priq.q.q_base,
3339 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3340 		writel_relaxed(smmu->priq.q.llq.prod,
3341 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3342 		writel_relaxed(smmu->priq.q.llq.cons,
3343 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3344 
3345 		enables |= CR0_PRIQEN;
3346 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3347 					      ARM_SMMU_CR0ACK);
3348 		if (ret) {
3349 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3350 			return ret;
3351 		}
3352 	}
3353 
3354 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3355 		enables |= CR0_ATSCHK;
3356 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3357 					      ARM_SMMU_CR0ACK);
3358 		if (ret) {
3359 			dev_err(smmu->dev, "failed to enable ATS check\n");
3360 			return ret;
3361 		}
3362 	}
3363 
3364 	ret = arm_smmu_setup_irqs(smmu);
3365 	if (ret) {
3366 		dev_err(smmu->dev, "failed to setup irqs\n");
3367 		return ret;
3368 	}
3369 
3370 	if (is_kdump_kernel())
3371 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3372 
3373 	/* Enable the SMMU interface, or ensure bypass */
3374 	if (!bypass || disable_bypass) {
3375 		enables |= CR0_SMMUEN;
3376 	} else {
3377 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3378 		if (ret)
3379 			return ret;
3380 	}
3381 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3382 				      ARM_SMMU_CR0ACK);
3383 	if (ret) {
3384 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3385 		return ret;
3386 	}
3387 
3388 	return 0;
3389 }
3390 
3391 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3392 {
3393 	u32 reg;
3394 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3395 
3396 	/* IDR0 */
3397 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3398 
3399 	/* 2-level structures */
3400 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3401 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3402 
3403 	if (reg & IDR0_CD2L)
3404 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3405 
3406 	/*
3407 	 * Translation table endianness.
3408 	 * We currently require the same endianness as the CPU, but this
3409 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3410 	 */
3411 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3412 	case IDR0_TTENDIAN_MIXED:
3413 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3414 		break;
3415 #ifdef __BIG_ENDIAN
3416 	case IDR0_TTENDIAN_BE:
3417 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3418 		break;
3419 #else
3420 	case IDR0_TTENDIAN_LE:
3421 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3422 		break;
3423 #endif
3424 	default:
3425 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3426 		return -ENXIO;
3427 	}
3428 
3429 	/* Boolean feature flags */
3430 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3431 		smmu->features |= ARM_SMMU_FEAT_PRI;
3432 
3433 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3434 		smmu->features |= ARM_SMMU_FEAT_ATS;
3435 
3436 	if (reg & IDR0_SEV)
3437 		smmu->features |= ARM_SMMU_FEAT_SEV;
3438 
3439 	if (reg & IDR0_MSI) {
3440 		smmu->features |= ARM_SMMU_FEAT_MSI;
3441 		if (coherent && !disable_msipolling)
3442 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3443 	}
3444 
3445 	if (reg & IDR0_HYP) {
3446 		smmu->features |= ARM_SMMU_FEAT_HYP;
3447 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3448 			smmu->features |= ARM_SMMU_FEAT_E2H;
3449 	}
3450 
3451 	/*
3452 	 * The coherency feature as set by FW is used in preference to the ID
3453 	 * register, but warn on mismatch.
3454 	 */
3455 	if (!!(reg & IDR0_COHACC) != coherent)
3456 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3457 			 coherent ? "true" : "false");
3458 
3459 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3460 	case IDR0_STALL_MODEL_FORCE:
3461 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3462 		fallthrough;
3463 	case IDR0_STALL_MODEL_STALL:
3464 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3465 	}
3466 
3467 	if (reg & IDR0_S1P)
3468 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3469 
3470 	if (reg & IDR0_S2P)
3471 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3472 
3473 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3474 		dev_err(smmu->dev, "no translation support!\n");
3475 		return -ENXIO;
3476 	}
3477 
3478 	/* We only support the AArch64 table format at present */
3479 	switch (FIELD_GET(IDR0_TTF, reg)) {
3480 	case IDR0_TTF_AARCH32_64:
3481 		smmu->ias = 40;
3482 		fallthrough;
3483 	case IDR0_TTF_AARCH64:
3484 		break;
3485 	default:
3486 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3487 		return -ENXIO;
3488 	}
3489 
3490 	/* ASID/VMID sizes */
3491 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3492 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3493 
3494 	/* IDR1 */
3495 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3496 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3497 		dev_err(smmu->dev, "embedded implementation not supported\n");
3498 		return -ENXIO;
3499 	}
3500 
3501 	/* Queue sizes, capped to ensure natural alignment */
3502 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3503 					     FIELD_GET(IDR1_CMDQS, reg));
3504 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3505 		/*
3506 		 * We don't support splitting up batches, so one batch of
3507 		 * commands plus an extra sync needs to fit inside the command
3508 		 * queue. There's also no way we can handle the weird alignment
3509 		 * restrictions on the base pointer for a unit-length queue.
3510 		 */
3511 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3512 			CMDQ_BATCH_ENTRIES);
3513 		return -ENXIO;
3514 	}
3515 
3516 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3517 					     FIELD_GET(IDR1_EVTQS, reg));
3518 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3519 					     FIELD_GET(IDR1_PRIQS, reg));
3520 
3521 	/* SID/SSID sizes */
3522 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3523 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3524 
3525 	/*
3526 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3527 	 * table, use a linear table instead.
3528 	 */
3529 	if (smmu->sid_bits <= STRTAB_SPLIT)
3530 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3531 
3532 	/* IDR3 */
3533 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3534 	if (FIELD_GET(IDR3_RIL, reg))
3535 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3536 
3537 	/* IDR5 */
3538 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3539 
3540 	/* Maximum number of outstanding stalls */
3541 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3542 
3543 	/* Page sizes */
3544 	if (reg & IDR5_GRAN64K)
3545 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3546 	if (reg & IDR5_GRAN16K)
3547 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3548 	if (reg & IDR5_GRAN4K)
3549 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3550 
3551 	/* Input address size */
3552 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3553 		smmu->features |= ARM_SMMU_FEAT_VAX;
3554 
3555 	/* Output address size */
3556 	switch (FIELD_GET(IDR5_OAS, reg)) {
3557 	case IDR5_OAS_32_BIT:
3558 		smmu->oas = 32;
3559 		break;
3560 	case IDR5_OAS_36_BIT:
3561 		smmu->oas = 36;
3562 		break;
3563 	case IDR5_OAS_40_BIT:
3564 		smmu->oas = 40;
3565 		break;
3566 	case IDR5_OAS_42_BIT:
3567 		smmu->oas = 42;
3568 		break;
3569 	case IDR5_OAS_44_BIT:
3570 		smmu->oas = 44;
3571 		break;
3572 	case IDR5_OAS_52_BIT:
3573 		smmu->oas = 52;
3574 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3575 		break;
3576 	default:
3577 		dev_info(smmu->dev,
3578 			"unknown output address size. Truncating to 48-bit\n");
3579 		fallthrough;
3580 	case IDR5_OAS_48_BIT:
3581 		smmu->oas = 48;
3582 	}
3583 
3584 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3585 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3586 	else
3587 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3588 
3589 	/* Set the DMA mask for our table walker */
3590 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3591 		dev_warn(smmu->dev,
3592 			 "failed to set DMA mask for table walker\n");
3593 
3594 	smmu->ias = max(smmu->ias, smmu->oas);
3595 
3596 	if (arm_smmu_sva_supported(smmu))
3597 		smmu->features |= ARM_SMMU_FEAT_SVA;
3598 
3599 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3600 		 smmu->ias, smmu->oas, smmu->features);
3601 	return 0;
3602 }
3603 
3604 #ifdef CONFIG_ACPI
3605 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3606 {
3607 	switch (model) {
3608 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3609 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3610 		break;
3611 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3612 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3613 		break;
3614 	}
3615 
3616 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3617 }
3618 
3619 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3620 				      struct arm_smmu_device *smmu)
3621 {
3622 	struct acpi_iort_smmu_v3 *iort_smmu;
3623 	struct device *dev = smmu->dev;
3624 	struct acpi_iort_node *node;
3625 
3626 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3627 
3628 	/* Retrieve SMMUv3 specific data */
3629 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3630 
3631 	acpi_smmu_get_options(iort_smmu->model, smmu);
3632 
3633 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3634 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3635 
3636 	return 0;
3637 }
3638 #else
3639 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3640 					     struct arm_smmu_device *smmu)
3641 {
3642 	return -ENODEV;
3643 }
3644 #endif
3645 
3646 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3647 				    struct arm_smmu_device *smmu)
3648 {
3649 	struct device *dev = &pdev->dev;
3650 	u32 cells;
3651 	int ret = -EINVAL;
3652 
3653 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3654 		dev_err(dev, "missing #iommu-cells property\n");
3655 	else if (cells != 1)
3656 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3657 	else
3658 		ret = 0;
3659 
3660 	parse_driver_options(smmu);
3661 
3662 	if (of_dma_is_coherent(dev->of_node))
3663 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3664 
3665 	return ret;
3666 }
3667 
3668 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3669 {
3670 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3671 		return SZ_64K;
3672 	else
3673 		return SZ_128K;
3674 }
3675 
3676 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3677 {
3678 	int err;
3679 
3680 #ifdef CONFIG_PCI
3681 	if (pci_bus_type.iommu_ops != ops) {
3682 		err = bus_set_iommu(&pci_bus_type, ops);
3683 		if (err)
3684 			return err;
3685 	}
3686 #endif
3687 #ifdef CONFIG_ARM_AMBA
3688 	if (amba_bustype.iommu_ops != ops) {
3689 		err = bus_set_iommu(&amba_bustype, ops);
3690 		if (err)
3691 			goto err_reset_pci_ops;
3692 	}
3693 #endif
3694 	if (platform_bus_type.iommu_ops != ops) {
3695 		err = bus_set_iommu(&platform_bus_type, ops);
3696 		if (err)
3697 			goto err_reset_amba_ops;
3698 	}
3699 
3700 	return 0;
3701 
3702 err_reset_amba_ops:
3703 #ifdef CONFIG_ARM_AMBA
3704 	bus_set_iommu(&amba_bustype, NULL);
3705 #endif
3706 err_reset_pci_ops: __maybe_unused;
3707 #ifdef CONFIG_PCI
3708 	bus_set_iommu(&pci_bus_type, NULL);
3709 #endif
3710 	return err;
3711 }
3712 
3713 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3714 				      resource_size_t size)
3715 {
3716 	struct resource res = DEFINE_RES_MEM(start, size);
3717 
3718 	return devm_ioremap_resource(dev, &res);
3719 }
3720 
3721 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3722 {
3723 	struct list_head rmr_list;
3724 	struct iommu_resv_region *e;
3725 
3726 	INIT_LIST_HEAD(&rmr_list);
3727 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3728 
3729 	list_for_each_entry(e, &rmr_list, list) {
3730 		__le64 *step;
3731 		struct iommu_iort_rmr_data *rmr;
3732 		int ret, i;
3733 
3734 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3735 		for (i = 0; i < rmr->num_sids; i++) {
3736 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3737 			if (ret) {
3738 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3739 					rmr->sids[i]);
3740 				continue;
3741 			}
3742 
3743 			step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3744 			arm_smmu_init_bypass_stes(step, 1, true);
3745 		}
3746 	}
3747 
3748 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3749 }
3750 
3751 static int arm_smmu_device_probe(struct platform_device *pdev)
3752 {
3753 	int irq, ret;
3754 	struct resource *res;
3755 	resource_size_t ioaddr;
3756 	struct arm_smmu_device *smmu;
3757 	struct device *dev = &pdev->dev;
3758 	bool bypass;
3759 
3760 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3761 	if (!smmu)
3762 		return -ENOMEM;
3763 	smmu->dev = dev;
3764 
3765 	if (dev->of_node) {
3766 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3767 	} else {
3768 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3769 		if (ret == -ENODEV)
3770 			return ret;
3771 	}
3772 
3773 	/* Set bypass mode according to firmware probing result */
3774 	bypass = !!ret;
3775 
3776 	/* Base address */
3777 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3778 	if (!res)
3779 		return -EINVAL;
3780 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3781 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3782 		return -EINVAL;
3783 	}
3784 	ioaddr = res->start;
3785 
3786 	/*
3787 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3788 	 * the PMCG registers which are reserved by the PMU driver.
3789 	 */
3790 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3791 	if (IS_ERR(smmu->base))
3792 		return PTR_ERR(smmu->base);
3793 
3794 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3795 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3796 					       ARM_SMMU_REG_SZ);
3797 		if (IS_ERR(smmu->page1))
3798 			return PTR_ERR(smmu->page1);
3799 	} else {
3800 		smmu->page1 = smmu->base;
3801 	}
3802 
3803 	/* Interrupt lines */
3804 
3805 	irq = platform_get_irq_byname_optional(pdev, "combined");
3806 	if (irq > 0)
3807 		smmu->combined_irq = irq;
3808 	else {
3809 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3810 		if (irq > 0)
3811 			smmu->evtq.q.irq = irq;
3812 
3813 		irq = platform_get_irq_byname_optional(pdev, "priq");
3814 		if (irq > 0)
3815 			smmu->priq.q.irq = irq;
3816 
3817 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3818 		if (irq > 0)
3819 			smmu->gerr_irq = irq;
3820 	}
3821 	/* Probe the h/w */
3822 	ret = arm_smmu_device_hw_probe(smmu);
3823 	if (ret)
3824 		return ret;
3825 
3826 	/* Initialise in-memory data structures */
3827 	ret = arm_smmu_init_structures(smmu);
3828 	if (ret)
3829 		return ret;
3830 
3831 	/* Record our private device structure */
3832 	platform_set_drvdata(pdev, smmu);
3833 
3834 	/* Check for RMRs and install bypass STEs if any */
3835 	arm_smmu_rmr_install_bypass_ste(smmu);
3836 
3837 	/* Reset the device */
3838 	ret = arm_smmu_device_reset(smmu, bypass);
3839 	if (ret)
3840 		return ret;
3841 
3842 	/* And we're up. Go go go! */
3843 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3844 				     "smmu3.%pa", &ioaddr);
3845 	if (ret)
3846 		return ret;
3847 
3848 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3849 	if (ret) {
3850 		dev_err(dev, "Failed to register iommu\n");
3851 		goto err_sysfs_remove;
3852 	}
3853 
3854 	ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3855 	if (ret)
3856 		goto err_unregister_device;
3857 
3858 	return 0;
3859 
3860 err_unregister_device:
3861 	iommu_device_unregister(&smmu->iommu);
3862 err_sysfs_remove:
3863 	iommu_device_sysfs_remove(&smmu->iommu);
3864 	return ret;
3865 }
3866 
3867 static int arm_smmu_device_remove(struct platform_device *pdev)
3868 {
3869 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3870 
3871 	arm_smmu_set_bus_ops(NULL);
3872 	iommu_device_unregister(&smmu->iommu);
3873 	iommu_device_sysfs_remove(&smmu->iommu);
3874 	arm_smmu_device_disable(smmu);
3875 	iopf_queue_free(smmu->evtq.iopf);
3876 
3877 	return 0;
3878 }
3879 
3880 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3881 {
3882 	arm_smmu_device_remove(pdev);
3883 }
3884 
3885 static const struct of_device_id arm_smmu_of_match[] = {
3886 	{ .compatible = "arm,smmu-v3", },
3887 	{ },
3888 };
3889 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3890 
3891 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3892 {
3893 	arm_smmu_sva_notifier_synchronize();
3894 	platform_driver_unregister(drv);
3895 }
3896 
3897 static struct platform_driver arm_smmu_driver = {
3898 	.driver	= {
3899 		.name			= "arm-smmu-v3",
3900 		.of_match_table		= arm_smmu_of_match,
3901 		.suppress_bind_attrs	= true,
3902 	},
3903 	.probe	= arm_smmu_device_probe,
3904 	.remove	= arm_smmu_device_remove,
3905 	.shutdown = arm_smmu_device_shutdown,
3906 };
3907 module_driver(arm_smmu_driver, platform_driver_register,
3908 	      arm_smmu_driver_unregister);
3909 
3910 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3911 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3912 MODULE_ALIAS("platform:arm-smmu-v3");
3913 MODULE_LICENSE("GPL v2");
3914