xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 0c7c237b1c35011ef0b8d30c1d5c20bc6ae7b69b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 
30 #include "arm-smmu-v3.h"
31 #include "../../dma-iommu.h"
32 #include "../../iommu-sva.h"
33 
34 static bool disable_bypass = true;
35 module_param(disable_bypass, bool, 0444);
36 MODULE_PARM_DESC(disable_bypass,
37 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
38 
39 static bool disable_msipolling;
40 module_param(disable_msipolling, bool, 0444);
41 MODULE_PARM_DESC(disable_msipolling,
42 	"Disable MSI-based polling for CMD_SYNC completion.");
43 
44 enum arm_smmu_msi_index {
45 	EVTQ_MSI_INDEX,
46 	GERROR_MSI_INDEX,
47 	PRIQ_MSI_INDEX,
48 	ARM_SMMU_MAX_MSIS,
49 };
50 
51 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
52 	[EVTQ_MSI_INDEX] = {
53 		ARM_SMMU_EVTQ_IRQ_CFG0,
54 		ARM_SMMU_EVTQ_IRQ_CFG1,
55 		ARM_SMMU_EVTQ_IRQ_CFG2,
56 	},
57 	[GERROR_MSI_INDEX] = {
58 		ARM_SMMU_GERROR_IRQ_CFG0,
59 		ARM_SMMU_GERROR_IRQ_CFG1,
60 		ARM_SMMU_GERROR_IRQ_CFG2,
61 	},
62 	[PRIQ_MSI_INDEX] = {
63 		ARM_SMMU_PRIQ_IRQ_CFG0,
64 		ARM_SMMU_PRIQ_IRQ_CFG1,
65 		ARM_SMMU_PRIQ_IRQ_CFG2,
66 	},
67 };
68 
69 struct arm_smmu_option_prop {
70 	u32 opt;
71 	const char *prop;
72 };
73 
74 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
75 DEFINE_MUTEX(arm_smmu_asid_lock);
76 
77 /*
78  * Special value used by SVA when a process dies, to quiesce a CD without
79  * disabling it.
80  */
81 struct arm_smmu_ctx_desc quiet_cd = { 0 };
82 
83 static struct arm_smmu_option_prop arm_smmu_options[] = {
84 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
85 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
86 	{ 0, NULL},
87 };
88 
89 static void parse_driver_options(struct arm_smmu_device *smmu)
90 {
91 	int i = 0;
92 
93 	do {
94 		if (of_property_read_bool(smmu->dev->of_node,
95 						arm_smmu_options[i].prop)) {
96 			smmu->options |= arm_smmu_options[i].opt;
97 			dev_notice(smmu->dev, "option %s\n",
98 				arm_smmu_options[i].prop);
99 		}
100 	} while (arm_smmu_options[++i].opt);
101 }
102 
103 /* Low-level queue manipulation functions */
104 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
105 {
106 	u32 space, prod, cons;
107 
108 	prod = Q_IDX(q, q->prod);
109 	cons = Q_IDX(q, q->cons);
110 
111 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
112 		space = (1 << q->max_n_shift) - (prod - cons);
113 	else
114 		space = cons - prod;
115 
116 	return space >= n;
117 }
118 
119 static bool queue_full(struct arm_smmu_ll_queue *q)
120 {
121 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
122 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
123 }
124 
125 static bool queue_empty(struct arm_smmu_ll_queue *q)
126 {
127 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
128 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
129 }
130 
131 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
132 {
133 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
134 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
135 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
137 }
138 
139 static void queue_sync_cons_out(struct arm_smmu_queue *q)
140 {
141 	/*
142 	 * Ensure that all CPU accesses (reads and writes) to the queue
143 	 * are complete before we update the cons pointer.
144 	 */
145 	__iomb();
146 	writel_relaxed(q->llq.cons, q->cons_reg);
147 }
148 
149 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
150 {
151 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
152 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
153 }
154 
155 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
156 {
157 	struct arm_smmu_ll_queue *llq = &q->llq;
158 
159 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
160 		return;
161 
162 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
163 		      Q_IDX(llq, llq->cons);
164 	queue_sync_cons_out(q);
165 }
166 
167 static int queue_sync_prod_in(struct arm_smmu_queue *q)
168 {
169 	u32 prod;
170 	int ret = 0;
171 
172 	/*
173 	 * We can't use the _relaxed() variant here, as we must prevent
174 	 * speculative reads of the queue before we have determined that
175 	 * prod has indeed moved.
176 	 */
177 	prod = readl(q->prod_reg);
178 
179 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
180 		ret = -EOVERFLOW;
181 
182 	q->llq.prod = prod;
183 	return ret;
184 }
185 
186 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
187 {
188 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
189 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
190 }
191 
192 static void queue_poll_init(struct arm_smmu_device *smmu,
193 			    struct arm_smmu_queue_poll *qp)
194 {
195 	qp->delay = 1;
196 	qp->spin_cnt = 0;
197 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
198 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
199 }
200 
201 static int queue_poll(struct arm_smmu_queue_poll *qp)
202 {
203 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
204 		return -ETIMEDOUT;
205 
206 	if (qp->wfe) {
207 		wfe();
208 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
209 		cpu_relax();
210 	} else {
211 		udelay(qp->delay);
212 		qp->delay *= 2;
213 		qp->spin_cnt = 0;
214 	}
215 
216 	return 0;
217 }
218 
219 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
220 {
221 	int i;
222 
223 	for (i = 0; i < n_dwords; ++i)
224 		*dst++ = cpu_to_le64(*src++);
225 }
226 
227 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
228 {
229 	int i;
230 
231 	for (i = 0; i < n_dwords; ++i)
232 		*dst++ = le64_to_cpu(*src++);
233 }
234 
235 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
236 {
237 	if (queue_empty(&q->llq))
238 		return -EAGAIN;
239 
240 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
241 	queue_inc_cons(&q->llq);
242 	queue_sync_cons_out(q);
243 	return 0;
244 }
245 
246 /* High-level queue accessors */
247 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
248 {
249 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
250 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
251 
252 	switch (ent->opcode) {
253 	case CMDQ_OP_TLBI_EL2_ALL:
254 	case CMDQ_OP_TLBI_NSNH_ALL:
255 		break;
256 	case CMDQ_OP_PREFETCH_CFG:
257 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
258 		break;
259 	case CMDQ_OP_CFGI_CD:
260 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261 		fallthrough;
262 	case CMDQ_OP_CFGI_STE:
263 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265 		break;
266 	case CMDQ_OP_CFGI_CD_ALL:
267 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268 		break;
269 	case CMDQ_OP_CFGI_ALL:
270 		/* Cover the entire SID range */
271 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272 		break;
273 	case CMDQ_OP_TLBI_NH_VA:
274 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
275 		fallthrough;
276 	case CMDQ_OP_TLBI_EL2_VA:
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
279 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
282 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
283 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
284 		break;
285 	case CMDQ_OP_TLBI_S2_IPA:
286 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
287 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
290 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
291 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
292 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
293 		break;
294 	case CMDQ_OP_TLBI_NH_ASID:
295 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
296 		fallthrough;
297 	case CMDQ_OP_TLBI_S12_VMALL:
298 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
299 		break;
300 	case CMDQ_OP_TLBI_EL2_ASID:
301 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
302 		break;
303 	case CMDQ_OP_ATC_INV:
304 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
305 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
306 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
307 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
308 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
309 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
310 		break;
311 	case CMDQ_OP_PRI_RESP:
312 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
313 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
314 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
315 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
316 		switch (ent->pri.resp) {
317 		case PRI_RESP_DENY:
318 		case PRI_RESP_FAIL:
319 		case PRI_RESP_SUCC:
320 			break;
321 		default:
322 			return -EINVAL;
323 		}
324 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
325 		break;
326 	case CMDQ_OP_RESUME:
327 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
328 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
329 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
330 		break;
331 	case CMDQ_OP_CMD_SYNC:
332 		if (ent->sync.msiaddr) {
333 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
334 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
335 		} else {
336 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
337 		}
338 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
339 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
340 		break;
341 	default:
342 		return -ENOENT;
343 	}
344 
345 	return 0;
346 }
347 
348 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
349 {
350 	return &smmu->cmdq;
351 }
352 
353 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
354 					 struct arm_smmu_queue *q, u32 prod)
355 {
356 	struct arm_smmu_cmdq_ent ent = {
357 		.opcode = CMDQ_OP_CMD_SYNC,
358 	};
359 
360 	/*
361 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
362 	 * payload, so the write will zero the entire command on that platform.
363 	 */
364 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
365 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
366 				   q->ent_dwords * 8;
367 	}
368 
369 	arm_smmu_cmdq_build_cmd(cmd, &ent);
370 }
371 
372 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
373 				     struct arm_smmu_queue *q)
374 {
375 	static const char * const cerror_str[] = {
376 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
377 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
378 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
379 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
380 	};
381 
382 	int i;
383 	u64 cmd[CMDQ_ENT_DWORDS];
384 	u32 cons = readl_relaxed(q->cons_reg);
385 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
386 	struct arm_smmu_cmdq_ent cmd_sync = {
387 		.opcode = CMDQ_OP_CMD_SYNC,
388 	};
389 
390 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
391 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
392 
393 	switch (idx) {
394 	case CMDQ_ERR_CERROR_ABT_IDX:
395 		dev_err(smmu->dev, "retrying command fetch\n");
396 		return;
397 	case CMDQ_ERR_CERROR_NONE_IDX:
398 		return;
399 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
400 		/*
401 		 * ATC Invalidation Completion timeout. CONS is still pointing
402 		 * at the CMD_SYNC. Attempt to complete other pending commands
403 		 * by repeating the CMD_SYNC, though we might well end up back
404 		 * here since the ATC invalidation may still be pending.
405 		 */
406 		return;
407 	case CMDQ_ERR_CERROR_ILL_IDX:
408 	default:
409 		break;
410 	}
411 
412 	/*
413 	 * We may have concurrent producers, so we need to be careful
414 	 * not to touch any of the shadow cmdq state.
415 	 */
416 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
417 	dev_err(smmu->dev, "skipping command in error state:\n");
418 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
419 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
420 
421 	/* Convert the erroneous command into a CMD_SYNC */
422 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
423 
424 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
425 }
426 
427 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
428 {
429 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
430 }
431 
432 /*
433  * Command queue locking.
434  * This is a form of bastardised rwlock with the following major changes:
435  *
436  * - The only LOCK routines are exclusive_trylock() and shared_lock().
437  *   Neither have barrier semantics, and instead provide only a control
438  *   dependency.
439  *
440  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
441  *   fails if the caller appears to be the last lock holder (yes, this is
442  *   racy). All successful UNLOCK routines have RELEASE semantics.
443  */
444 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
445 {
446 	int val;
447 
448 	/*
449 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
450 	 * lock counter. When held in exclusive state, the lock counter is set
451 	 * to INT_MIN so these increments won't hurt as the value will remain
452 	 * negative.
453 	 */
454 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
455 		return;
456 
457 	do {
458 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
459 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
460 }
461 
462 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
463 {
464 	(void)atomic_dec_return_release(&cmdq->lock);
465 }
466 
467 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
468 {
469 	if (atomic_read(&cmdq->lock) == 1)
470 		return false;
471 
472 	arm_smmu_cmdq_shared_unlock(cmdq);
473 	return true;
474 }
475 
476 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
477 ({									\
478 	bool __ret;							\
479 	local_irq_save(flags);						\
480 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
481 	if (!__ret)							\
482 		local_irq_restore(flags);				\
483 	__ret;								\
484 })
485 
486 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
487 ({									\
488 	atomic_set_release(&cmdq->lock, 0);				\
489 	local_irq_restore(flags);					\
490 })
491 
492 
493 /*
494  * Command queue insertion.
495  * This is made fiddly by our attempts to achieve some sort of scalability
496  * since there is one queue shared amongst all of the CPUs in the system.  If
497  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
498  * then you'll *love* this monstrosity.
499  *
500  * The basic idea is to split the queue up into ranges of commands that are
501  * owned by a given CPU; the owner may not have written all of the commands
502  * itself, but is responsible for advancing the hardware prod pointer when
503  * the time comes. The algorithm is roughly:
504  *
505  * 	1. Allocate some space in the queue. At this point we also discover
506  *	   whether the head of the queue is currently owned by another CPU,
507  *	   or whether we are the owner.
508  *
509  *	2. Write our commands into our allocated slots in the queue.
510  *
511  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
512  *
513  *	4. If we are an owner:
514  *		a. Wait for the previous owner to finish.
515  *		b. Mark the queue head as unowned, which tells us the range
516  *		   that we are responsible for publishing.
517  *		c. Wait for all commands in our owned range to become valid.
518  *		d. Advance the hardware prod pointer.
519  *		e. Tell the next owner we've finished.
520  *
521  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
522  *	   owner), then we need to stick around until it has completed:
523  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
524  *		   to clear the first 4 bytes.
525  *		b. Otherwise, we spin waiting for the hardware cons pointer to
526  *		   advance past our command.
527  *
528  * The devil is in the details, particularly the use of locking for handling
529  * SYNC completion and freeing up space in the queue before we think that it is
530  * full.
531  */
532 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
533 					       u32 sprod, u32 eprod, bool set)
534 {
535 	u32 swidx, sbidx, ewidx, ebidx;
536 	struct arm_smmu_ll_queue llq = {
537 		.max_n_shift	= cmdq->q.llq.max_n_shift,
538 		.prod		= sprod,
539 	};
540 
541 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
542 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
543 
544 	while (llq.prod != eprod) {
545 		unsigned long mask;
546 		atomic_long_t *ptr;
547 		u32 limit = BITS_PER_LONG;
548 
549 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
550 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
551 
552 		ptr = &cmdq->valid_map[swidx];
553 
554 		if ((swidx == ewidx) && (sbidx < ebidx))
555 			limit = ebidx;
556 
557 		mask = GENMASK(limit - 1, sbidx);
558 
559 		/*
560 		 * The valid bit is the inverse of the wrap bit. This means
561 		 * that a zero-initialised queue is invalid and, after marking
562 		 * all entries as valid, they become invalid again when we
563 		 * wrap.
564 		 */
565 		if (set) {
566 			atomic_long_xor(mask, ptr);
567 		} else { /* Poll */
568 			unsigned long valid;
569 
570 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
571 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
572 		}
573 
574 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
575 	}
576 }
577 
578 /* Mark all entries in the range [sprod, eprod) as valid */
579 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
580 					u32 sprod, u32 eprod)
581 {
582 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
583 }
584 
585 /* Wait for all entries in the range [sprod, eprod) to become valid */
586 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
587 					 u32 sprod, u32 eprod)
588 {
589 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
590 }
591 
592 /* Wait for the command queue to become non-full */
593 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
594 					     struct arm_smmu_ll_queue *llq)
595 {
596 	unsigned long flags;
597 	struct arm_smmu_queue_poll qp;
598 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
599 	int ret = 0;
600 
601 	/*
602 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
603 	 * that fails, spin until somebody else updates it for us.
604 	 */
605 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
606 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
607 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
608 		llq->val = READ_ONCE(cmdq->q.llq.val);
609 		return 0;
610 	}
611 
612 	queue_poll_init(smmu, &qp);
613 	do {
614 		llq->val = READ_ONCE(cmdq->q.llq.val);
615 		if (!queue_full(llq))
616 			break;
617 
618 		ret = queue_poll(&qp);
619 	} while (!ret);
620 
621 	return ret;
622 }
623 
624 /*
625  * Wait until the SMMU signals a CMD_SYNC completion MSI.
626  * Must be called with the cmdq lock held in some capacity.
627  */
628 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
629 					  struct arm_smmu_ll_queue *llq)
630 {
631 	int ret = 0;
632 	struct arm_smmu_queue_poll qp;
633 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
634 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
635 
636 	queue_poll_init(smmu, &qp);
637 
638 	/*
639 	 * The MSI won't generate an event, since it's being written back
640 	 * into the command queue.
641 	 */
642 	qp.wfe = false;
643 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
644 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
645 	return ret;
646 }
647 
648 /*
649  * Wait until the SMMU cons index passes llq->prod.
650  * Must be called with the cmdq lock held in some capacity.
651  */
652 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
653 					       struct arm_smmu_ll_queue *llq)
654 {
655 	struct arm_smmu_queue_poll qp;
656 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
657 	u32 prod = llq->prod;
658 	int ret = 0;
659 
660 	queue_poll_init(smmu, &qp);
661 	llq->val = READ_ONCE(cmdq->q.llq.val);
662 	do {
663 		if (queue_consumed(llq, prod))
664 			break;
665 
666 		ret = queue_poll(&qp);
667 
668 		/*
669 		 * This needs to be a readl() so that our subsequent call
670 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
671 		 *
672 		 * Specifically, we need to ensure that we observe all
673 		 * shared_lock()s by other CMD_SYNCs that share our owner,
674 		 * so that a failing call to tryunlock() means that we're
675 		 * the last one out and therefore we can safely advance
676 		 * cmdq->q.llq.cons. Roughly speaking:
677 		 *
678 		 * CPU 0		CPU1			CPU2 (us)
679 		 *
680 		 * if (sync)
681 		 * 	shared_lock();
682 		 *
683 		 * dma_wmb();
684 		 * set_valid_map();
685 		 *
686 		 * 			if (owner) {
687 		 *				poll_valid_map();
688 		 *				<control dependency>
689 		 *				writel(prod_reg);
690 		 *
691 		 *						readl(cons_reg);
692 		 *						tryunlock();
693 		 *
694 		 * Requires us to see CPU 0's shared_lock() acquisition.
695 		 */
696 		llq->cons = readl(cmdq->q.cons_reg);
697 	} while (!ret);
698 
699 	return ret;
700 }
701 
702 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
703 					 struct arm_smmu_ll_queue *llq)
704 {
705 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
706 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
707 
708 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
709 }
710 
711 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
712 					u32 prod, int n)
713 {
714 	int i;
715 	struct arm_smmu_ll_queue llq = {
716 		.max_n_shift	= cmdq->q.llq.max_n_shift,
717 		.prod		= prod,
718 	};
719 
720 	for (i = 0; i < n; ++i) {
721 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
722 
723 		prod = queue_inc_prod_n(&llq, i);
724 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
725 	}
726 }
727 
728 /*
729  * This is the actual insertion function, and provides the following
730  * ordering guarantees to callers:
731  *
732  * - There is a dma_wmb() before publishing any commands to the queue.
733  *   This can be relied upon to order prior writes to data structures
734  *   in memory (such as a CD or an STE) before the command.
735  *
736  * - On completion of a CMD_SYNC, there is a control dependency.
737  *   This can be relied upon to order subsequent writes to memory (e.g.
738  *   freeing an IOVA) after completion of the CMD_SYNC.
739  *
740  * - Command insertion is totally ordered, so if two CPUs each race to
741  *   insert their own list of commands then all of the commands from one
742  *   CPU will appear before any of the commands from the other CPU.
743  */
744 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
745 				       u64 *cmds, int n, bool sync)
746 {
747 	u64 cmd_sync[CMDQ_ENT_DWORDS];
748 	u32 prod;
749 	unsigned long flags;
750 	bool owner;
751 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
752 	struct arm_smmu_ll_queue llq, head;
753 	int ret = 0;
754 
755 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
756 
757 	/* 1. Allocate some space in the queue */
758 	local_irq_save(flags);
759 	llq.val = READ_ONCE(cmdq->q.llq.val);
760 	do {
761 		u64 old;
762 
763 		while (!queue_has_space(&llq, n + sync)) {
764 			local_irq_restore(flags);
765 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
766 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
767 			local_irq_save(flags);
768 		}
769 
770 		head.cons = llq.cons;
771 		head.prod = queue_inc_prod_n(&llq, n + sync) |
772 					     CMDQ_PROD_OWNED_FLAG;
773 
774 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
775 		if (old == llq.val)
776 			break;
777 
778 		llq.val = old;
779 	} while (1);
780 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
781 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
782 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
783 
784 	/*
785 	 * 2. Write our commands into the queue
786 	 * Dependency ordering from the cmpxchg() loop above.
787 	 */
788 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
789 	if (sync) {
790 		prod = queue_inc_prod_n(&llq, n);
791 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
792 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
793 
794 		/*
795 		 * In order to determine completion of our CMD_SYNC, we must
796 		 * ensure that the queue can't wrap twice without us noticing.
797 		 * We achieve that by taking the cmdq lock as shared before
798 		 * marking our slot as valid.
799 		 */
800 		arm_smmu_cmdq_shared_lock(cmdq);
801 	}
802 
803 	/* 3. Mark our slots as valid, ensuring commands are visible first */
804 	dma_wmb();
805 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
806 
807 	/* 4. If we are the owner, take control of the SMMU hardware */
808 	if (owner) {
809 		/* a. Wait for previous owner to finish */
810 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
811 
812 		/* b. Stop gathering work by clearing the owned flag */
813 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
814 						   &cmdq->q.llq.atomic.prod);
815 		prod &= ~CMDQ_PROD_OWNED_FLAG;
816 
817 		/*
818 		 * c. Wait for any gathered work to be written to the queue.
819 		 * Note that we read our own entries so that we have the control
820 		 * dependency required by (d).
821 		 */
822 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
823 
824 		/*
825 		 * d. Advance the hardware prod pointer
826 		 * Control dependency ordering from the entries becoming valid.
827 		 */
828 		writel_relaxed(prod, cmdq->q.prod_reg);
829 
830 		/*
831 		 * e. Tell the next owner we're done
832 		 * Make sure we've updated the hardware first, so that we don't
833 		 * race to update prod and potentially move it backwards.
834 		 */
835 		atomic_set_release(&cmdq->owner_prod, prod);
836 	}
837 
838 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
839 	if (sync) {
840 		llq.prod = queue_inc_prod_n(&llq, n);
841 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
842 		if (ret) {
843 			dev_err_ratelimited(smmu->dev,
844 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
845 					    llq.prod,
846 					    readl_relaxed(cmdq->q.prod_reg),
847 					    readl_relaxed(cmdq->q.cons_reg));
848 		}
849 
850 		/*
851 		 * Try to unlock the cmdq lock. This will fail if we're the last
852 		 * reader, in which case we can safely update cmdq->q.llq.cons
853 		 */
854 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
855 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
856 			arm_smmu_cmdq_shared_unlock(cmdq);
857 		}
858 	}
859 
860 	local_irq_restore(flags);
861 	return ret;
862 }
863 
864 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
865 				     struct arm_smmu_cmdq_ent *ent,
866 				     bool sync)
867 {
868 	u64 cmd[CMDQ_ENT_DWORDS];
869 
870 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
871 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
872 			 ent->opcode);
873 		return -EINVAL;
874 	}
875 
876 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
877 }
878 
879 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
880 				   struct arm_smmu_cmdq_ent *ent)
881 {
882 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
883 }
884 
885 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
886 					     struct arm_smmu_cmdq_ent *ent)
887 {
888 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
889 }
890 
891 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
892 				    struct arm_smmu_cmdq_batch *cmds,
893 				    struct arm_smmu_cmdq_ent *cmd)
894 {
895 	int index;
896 
897 	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
898 	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
899 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
900 		cmds->num = 0;
901 	}
902 
903 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
904 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
905 		cmds->num = 0;
906 	}
907 
908 	index = cmds->num * CMDQ_ENT_DWORDS;
909 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
910 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
911 			 cmd->opcode);
912 		return;
913 	}
914 
915 	cmds->num++;
916 }
917 
918 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
919 				      struct arm_smmu_cmdq_batch *cmds)
920 {
921 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
922 }
923 
924 static int arm_smmu_page_response(struct device *dev,
925 				  struct iommu_fault_event *unused,
926 				  struct iommu_page_response *resp)
927 {
928 	struct arm_smmu_cmdq_ent cmd = {0};
929 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
930 	int sid = master->streams[0].id;
931 
932 	if (master->stall_enabled) {
933 		cmd.opcode		= CMDQ_OP_RESUME;
934 		cmd.resume.sid		= sid;
935 		cmd.resume.stag		= resp->grpid;
936 		switch (resp->code) {
937 		case IOMMU_PAGE_RESP_INVALID:
938 		case IOMMU_PAGE_RESP_FAILURE:
939 			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
940 			break;
941 		case IOMMU_PAGE_RESP_SUCCESS:
942 			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
943 			break;
944 		default:
945 			return -EINVAL;
946 		}
947 	} else {
948 		return -ENODEV;
949 	}
950 
951 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
952 	/*
953 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
954 	 * RESUME consumption guarantees that the stalled transaction will be
955 	 * terminated... at some point in the future. PRI_RESP is fire and
956 	 * forget.
957 	 */
958 
959 	return 0;
960 }
961 
962 /* Context descriptor manipulation functions */
963 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
964 {
965 	struct arm_smmu_cmdq_ent cmd = {
966 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
967 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
968 		.tlbi.asid = asid,
969 	};
970 
971 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
972 }
973 
974 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
975 			     int ssid, bool leaf)
976 {
977 	size_t i;
978 	unsigned long flags;
979 	struct arm_smmu_master *master;
980 	struct arm_smmu_cmdq_batch cmds;
981 	struct arm_smmu_device *smmu = smmu_domain->smmu;
982 	struct arm_smmu_cmdq_ent cmd = {
983 		.opcode	= CMDQ_OP_CFGI_CD,
984 		.cfgi	= {
985 			.ssid	= ssid,
986 			.leaf	= leaf,
987 		},
988 	};
989 
990 	cmds.num = 0;
991 
992 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
993 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
994 		for (i = 0; i < master->num_streams; i++) {
995 			cmd.cfgi.sid = master->streams[i].id;
996 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
997 		}
998 	}
999 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1000 
1001 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1002 }
1003 
1004 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1005 					struct arm_smmu_l1_ctx_desc *l1_desc)
1006 {
1007 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1008 
1009 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1010 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1011 	if (!l1_desc->l2ptr) {
1012 		dev_warn(smmu->dev,
1013 			 "failed to allocate context descriptor table\n");
1014 		return -ENOMEM;
1015 	}
1016 	return 0;
1017 }
1018 
1019 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1020 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1021 {
1022 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1023 		  CTXDESC_L1_DESC_V;
1024 
1025 	/* See comment in arm_smmu_write_ctx_desc() */
1026 	WRITE_ONCE(*dst, cpu_to_le64(val));
1027 }
1028 
1029 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1030 				   u32 ssid)
1031 {
1032 	__le64 *l1ptr;
1033 	unsigned int idx;
1034 	struct arm_smmu_l1_ctx_desc *l1_desc;
1035 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1036 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1037 
1038 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1039 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1040 
1041 	idx = ssid >> CTXDESC_SPLIT;
1042 	l1_desc = &cdcfg->l1_desc[idx];
1043 	if (!l1_desc->l2ptr) {
1044 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1045 			return NULL;
1046 
1047 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1048 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1049 		/* An invalid L1CD can be cached */
1050 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1051 	}
1052 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1053 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1054 }
1055 
1056 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1057 			    struct arm_smmu_ctx_desc *cd)
1058 {
1059 	/*
1060 	 * This function handles the following cases:
1061 	 *
1062 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1063 	 * (2) Install a secondary CD, for SID+SSID traffic.
1064 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1065 	 *     CD, then invalidate the old entry and mappings.
1066 	 * (4) Quiesce the context without clearing the valid bit. Disable
1067 	 *     translation, and ignore any translation fault.
1068 	 * (5) Remove a secondary CD.
1069 	 */
1070 	u64 val;
1071 	bool cd_live;
1072 	__le64 *cdptr;
1073 
1074 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1075 		return -E2BIG;
1076 
1077 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1078 	if (!cdptr)
1079 		return -ENOMEM;
1080 
1081 	val = le64_to_cpu(cdptr[0]);
1082 	cd_live = !!(val & CTXDESC_CD_0_V);
1083 
1084 	if (!cd) { /* (5) */
1085 		val = 0;
1086 	} else if (cd == &quiet_cd) { /* (4) */
1087 		val |= CTXDESC_CD_0_TCR_EPD0;
1088 	} else if (cd_live) { /* (3) */
1089 		val &= ~CTXDESC_CD_0_ASID;
1090 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1091 		/*
1092 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1093 		 * this substream's traffic
1094 		 */
1095 	} else { /* (1) and (2) */
1096 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1097 		cdptr[2] = 0;
1098 		cdptr[3] = cpu_to_le64(cd->mair);
1099 
1100 		/*
1101 		 * STE is live, and the SMMU might read dwords of this CD in any
1102 		 * order. Ensure that it observes valid values before reading
1103 		 * V=1.
1104 		 */
1105 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1106 
1107 		val = cd->tcr |
1108 #ifdef __BIG_ENDIAN
1109 			CTXDESC_CD_0_ENDI |
1110 #endif
1111 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1112 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1113 			CTXDESC_CD_0_AA64 |
1114 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1115 			CTXDESC_CD_0_V;
1116 
1117 		if (smmu_domain->stall_enabled)
1118 			val |= CTXDESC_CD_0_S;
1119 	}
1120 
1121 	/*
1122 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1123 	 * "Configuration structures and configuration invalidation completion"
1124 	 *
1125 	 *   The size of single-copy atomic reads made by the SMMU is
1126 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1127 	 *   field within an aligned 64-bit span of a structure can be altered
1128 	 *   without first making the structure invalid.
1129 	 */
1130 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1131 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1132 	return 0;
1133 }
1134 
1135 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1136 {
1137 	int ret;
1138 	size_t l1size;
1139 	size_t max_contexts;
1140 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1141 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1142 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1143 
1144 	max_contexts = 1 << cfg->s1cdmax;
1145 
1146 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1147 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1148 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1149 		cdcfg->num_l1_ents = max_contexts;
1150 
1151 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1152 	} else {
1153 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1154 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1155 						  CTXDESC_L2_ENTRIES);
1156 
1157 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1158 					      sizeof(*cdcfg->l1_desc),
1159 					      GFP_KERNEL);
1160 		if (!cdcfg->l1_desc)
1161 			return -ENOMEM;
1162 
1163 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1164 	}
1165 
1166 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1167 					   GFP_KERNEL);
1168 	if (!cdcfg->cdtab) {
1169 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1170 		ret = -ENOMEM;
1171 		goto err_free_l1;
1172 	}
1173 
1174 	return 0;
1175 
1176 err_free_l1:
1177 	if (cdcfg->l1_desc) {
1178 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1179 		cdcfg->l1_desc = NULL;
1180 	}
1181 	return ret;
1182 }
1183 
1184 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1185 {
1186 	int i;
1187 	size_t size, l1size;
1188 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1189 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1190 
1191 	if (cdcfg->l1_desc) {
1192 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1193 
1194 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1195 			if (!cdcfg->l1_desc[i].l2ptr)
1196 				continue;
1197 
1198 			dmam_free_coherent(smmu->dev, size,
1199 					   cdcfg->l1_desc[i].l2ptr,
1200 					   cdcfg->l1_desc[i].l2ptr_dma);
1201 		}
1202 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1203 		cdcfg->l1_desc = NULL;
1204 
1205 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1206 	} else {
1207 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1208 	}
1209 
1210 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1211 	cdcfg->cdtab_dma = 0;
1212 	cdcfg->cdtab = NULL;
1213 }
1214 
1215 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1216 {
1217 	bool free;
1218 	struct arm_smmu_ctx_desc *old_cd;
1219 
1220 	if (!cd->asid)
1221 		return false;
1222 
1223 	free = refcount_dec_and_test(&cd->refs);
1224 	if (free) {
1225 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1226 		WARN_ON(old_cd != cd);
1227 	}
1228 	return free;
1229 }
1230 
1231 /* Stream table manipulation functions */
1232 static void
1233 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1234 {
1235 	u64 val = 0;
1236 
1237 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1238 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1239 
1240 	/* See comment in arm_smmu_write_ctx_desc() */
1241 	WRITE_ONCE(*dst, cpu_to_le64(val));
1242 }
1243 
1244 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1245 {
1246 	struct arm_smmu_cmdq_ent cmd = {
1247 		.opcode	= CMDQ_OP_CFGI_STE,
1248 		.cfgi	= {
1249 			.sid	= sid,
1250 			.leaf	= true,
1251 		},
1252 	};
1253 
1254 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1255 }
1256 
1257 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1258 				      __le64 *dst)
1259 {
1260 	/*
1261 	 * This is hideously complicated, but we only really care about
1262 	 * three cases at the moment:
1263 	 *
1264 	 * 1. Invalid (all zero) -> bypass/fault (init)
1265 	 * 2. Bypass/fault -> translation/bypass (attach)
1266 	 * 3. Translation/bypass -> bypass/fault (detach)
1267 	 *
1268 	 * Given that we can't update the STE atomically and the SMMU
1269 	 * doesn't read the thing in a defined order, that leaves us
1270 	 * with the following maintenance requirements:
1271 	 *
1272 	 * 1. Update Config, return (init time STEs aren't live)
1273 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1274 	 * 3. Update Config, sync
1275 	 */
1276 	u64 val = le64_to_cpu(dst[0]);
1277 	bool ste_live = false;
1278 	struct arm_smmu_device *smmu = NULL;
1279 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1280 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1281 	struct arm_smmu_domain *smmu_domain = NULL;
1282 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1283 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1284 		.prefetch	= {
1285 			.sid	= sid,
1286 		},
1287 	};
1288 
1289 	if (master) {
1290 		smmu_domain = master->domain;
1291 		smmu = master->smmu;
1292 	}
1293 
1294 	if (smmu_domain) {
1295 		switch (smmu_domain->stage) {
1296 		case ARM_SMMU_DOMAIN_S1:
1297 			s1_cfg = &smmu_domain->s1_cfg;
1298 			break;
1299 		case ARM_SMMU_DOMAIN_S2:
1300 		case ARM_SMMU_DOMAIN_NESTED:
1301 			s2_cfg = &smmu_domain->s2_cfg;
1302 			break;
1303 		default:
1304 			break;
1305 		}
1306 	}
1307 
1308 	if (val & STRTAB_STE_0_V) {
1309 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1310 		case STRTAB_STE_0_CFG_BYPASS:
1311 			break;
1312 		case STRTAB_STE_0_CFG_S1_TRANS:
1313 		case STRTAB_STE_0_CFG_S2_TRANS:
1314 			ste_live = true;
1315 			break;
1316 		case STRTAB_STE_0_CFG_ABORT:
1317 			BUG_ON(!disable_bypass);
1318 			break;
1319 		default:
1320 			BUG(); /* STE corruption */
1321 		}
1322 	}
1323 
1324 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1325 	val = STRTAB_STE_0_V;
1326 
1327 	/* Bypass/fault */
1328 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1329 		if (!smmu_domain && disable_bypass)
1330 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1331 		else
1332 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1333 
1334 		dst[0] = cpu_to_le64(val);
1335 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1336 						STRTAB_STE_1_SHCFG_INCOMING));
1337 		dst[2] = 0; /* Nuke the VMID */
1338 		/*
1339 		 * The SMMU can perform negative caching, so we must sync
1340 		 * the STE regardless of whether the old value was live.
1341 		 */
1342 		if (smmu)
1343 			arm_smmu_sync_ste_for_sid(smmu, sid);
1344 		return;
1345 	}
1346 
1347 	if (s1_cfg) {
1348 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1349 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1350 
1351 		BUG_ON(ste_live);
1352 		dst[1] = cpu_to_le64(
1353 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1354 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1355 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1356 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1357 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1358 
1359 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1360 		    !master->stall_enabled)
1361 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1362 
1363 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1364 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1365 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1366 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1367 	}
1368 
1369 	if (s2_cfg) {
1370 		BUG_ON(ste_live);
1371 		dst[2] = cpu_to_le64(
1372 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1373 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1374 #ifdef __BIG_ENDIAN
1375 			 STRTAB_STE_2_S2ENDI |
1376 #endif
1377 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1378 			 STRTAB_STE_2_S2R);
1379 
1380 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1381 
1382 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1383 	}
1384 
1385 	if (master->ats_enabled)
1386 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1387 						 STRTAB_STE_1_EATS_TRANS));
1388 
1389 	arm_smmu_sync_ste_for_sid(smmu, sid);
1390 	/* See comment in arm_smmu_write_ctx_desc() */
1391 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1392 	arm_smmu_sync_ste_for_sid(smmu, sid);
1393 
1394 	/* It's likely that we'll want to use the new STE soon */
1395 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1396 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1397 }
1398 
1399 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
1400 {
1401 	unsigned int i;
1402 	u64 val = STRTAB_STE_0_V;
1403 
1404 	if (disable_bypass && !force)
1405 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1406 	else
1407 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1408 
1409 	for (i = 0; i < nent; ++i) {
1410 		strtab[0] = cpu_to_le64(val);
1411 		strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1412 						   STRTAB_STE_1_SHCFG_INCOMING));
1413 		strtab[2] = 0;
1414 		strtab += STRTAB_STE_DWORDS;
1415 	}
1416 }
1417 
1418 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1419 {
1420 	size_t size;
1421 	void *strtab;
1422 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1423 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1424 
1425 	if (desc->l2ptr)
1426 		return 0;
1427 
1428 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1429 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1430 
1431 	desc->span = STRTAB_SPLIT + 1;
1432 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1433 					  GFP_KERNEL);
1434 	if (!desc->l2ptr) {
1435 		dev_err(smmu->dev,
1436 			"failed to allocate l2 stream table for SID %u\n",
1437 			sid);
1438 		return -ENOMEM;
1439 	}
1440 
1441 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1442 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1443 	return 0;
1444 }
1445 
1446 static struct arm_smmu_master *
1447 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1448 {
1449 	struct rb_node *node;
1450 	struct arm_smmu_stream *stream;
1451 
1452 	lockdep_assert_held(&smmu->streams_mutex);
1453 
1454 	node = smmu->streams.rb_node;
1455 	while (node) {
1456 		stream = rb_entry(node, struct arm_smmu_stream, node);
1457 		if (stream->id < sid)
1458 			node = node->rb_right;
1459 		else if (stream->id > sid)
1460 			node = node->rb_left;
1461 		else
1462 			return stream->master;
1463 	}
1464 
1465 	return NULL;
1466 }
1467 
1468 /* IRQ and event handlers */
1469 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1470 {
1471 	int ret;
1472 	u32 reason;
1473 	u32 perm = 0;
1474 	struct arm_smmu_master *master;
1475 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1476 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1477 	struct iommu_fault_event fault_evt = { };
1478 	struct iommu_fault *flt = &fault_evt.fault;
1479 
1480 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1481 	case EVT_ID_TRANSLATION_FAULT:
1482 		reason = IOMMU_FAULT_REASON_PTE_FETCH;
1483 		break;
1484 	case EVT_ID_ADDR_SIZE_FAULT:
1485 		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1486 		break;
1487 	case EVT_ID_ACCESS_FAULT:
1488 		reason = IOMMU_FAULT_REASON_ACCESS;
1489 		break;
1490 	case EVT_ID_PERMISSION_FAULT:
1491 		reason = IOMMU_FAULT_REASON_PERMISSION;
1492 		break;
1493 	default:
1494 		return -EOPNOTSUPP;
1495 	}
1496 
1497 	/* Stage-2 is always pinned at the moment */
1498 	if (evt[1] & EVTQ_1_S2)
1499 		return -EFAULT;
1500 
1501 	if (evt[1] & EVTQ_1_RnW)
1502 		perm |= IOMMU_FAULT_PERM_READ;
1503 	else
1504 		perm |= IOMMU_FAULT_PERM_WRITE;
1505 
1506 	if (evt[1] & EVTQ_1_InD)
1507 		perm |= IOMMU_FAULT_PERM_EXEC;
1508 
1509 	if (evt[1] & EVTQ_1_PnU)
1510 		perm |= IOMMU_FAULT_PERM_PRIV;
1511 
1512 	if (evt[1] & EVTQ_1_STALL) {
1513 		flt->type = IOMMU_FAULT_PAGE_REQ;
1514 		flt->prm = (struct iommu_fault_page_request) {
1515 			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1516 			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1517 			.perm = perm,
1518 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1519 		};
1520 
1521 		if (ssid_valid) {
1522 			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1523 			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1524 		}
1525 	} else {
1526 		flt->type = IOMMU_FAULT_DMA_UNRECOV;
1527 		flt->event = (struct iommu_fault_unrecoverable) {
1528 			.reason = reason,
1529 			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1530 			.perm = perm,
1531 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1532 		};
1533 
1534 		if (ssid_valid) {
1535 			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1536 			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1537 		}
1538 	}
1539 
1540 	mutex_lock(&smmu->streams_mutex);
1541 	master = arm_smmu_find_master(smmu, sid);
1542 	if (!master) {
1543 		ret = -EINVAL;
1544 		goto out_unlock;
1545 	}
1546 
1547 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1548 	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1549 		/* Nobody cared, abort the access */
1550 		struct iommu_page_response resp = {
1551 			.pasid		= flt->prm.pasid,
1552 			.grpid		= flt->prm.grpid,
1553 			.code		= IOMMU_PAGE_RESP_FAILURE,
1554 		};
1555 		arm_smmu_page_response(master->dev, &fault_evt, &resp);
1556 	}
1557 
1558 out_unlock:
1559 	mutex_unlock(&smmu->streams_mutex);
1560 	return ret;
1561 }
1562 
1563 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1564 {
1565 	int i, ret;
1566 	struct arm_smmu_device *smmu = dev;
1567 	struct arm_smmu_queue *q = &smmu->evtq.q;
1568 	struct arm_smmu_ll_queue *llq = &q->llq;
1569 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1570 				      DEFAULT_RATELIMIT_BURST);
1571 	u64 evt[EVTQ_ENT_DWORDS];
1572 
1573 	do {
1574 		while (!queue_remove_raw(q, evt)) {
1575 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1576 
1577 			ret = arm_smmu_handle_evt(smmu, evt);
1578 			if (!ret || !__ratelimit(&rs))
1579 				continue;
1580 
1581 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1582 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1583 				dev_info(smmu->dev, "\t0x%016llx\n",
1584 					 (unsigned long long)evt[i]);
1585 
1586 			cond_resched();
1587 		}
1588 
1589 		/*
1590 		 * Not much we can do on overflow, so scream and pretend we're
1591 		 * trying harder.
1592 		 */
1593 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1594 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1595 	} while (!queue_empty(llq));
1596 
1597 	/* Sync our overflow flag, as we believe we're up to speed */
1598 	queue_sync_cons_ovf(q);
1599 	return IRQ_HANDLED;
1600 }
1601 
1602 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1603 {
1604 	u32 sid, ssid;
1605 	u16 grpid;
1606 	bool ssv, last;
1607 
1608 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1609 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1610 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1611 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1612 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1613 
1614 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1615 	dev_info(smmu->dev,
1616 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1617 		 sid, ssid, grpid, last ? "L" : "",
1618 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1619 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1620 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1621 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1622 		 evt[1] & PRIQ_1_ADDR_MASK);
1623 
1624 	if (last) {
1625 		struct arm_smmu_cmdq_ent cmd = {
1626 			.opcode			= CMDQ_OP_PRI_RESP,
1627 			.substream_valid	= ssv,
1628 			.pri			= {
1629 				.sid	= sid,
1630 				.ssid	= ssid,
1631 				.grpid	= grpid,
1632 				.resp	= PRI_RESP_DENY,
1633 			},
1634 		};
1635 
1636 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1637 	}
1638 }
1639 
1640 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1641 {
1642 	struct arm_smmu_device *smmu = dev;
1643 	struct arm_smmu_queue *q = &smmu->priq.q;
1644 	struct arm_smmu_ll_queue *llq = &q->llq;
1645 	u64 evt[PRIQ_ENT_DWORDS];
1646 
1647 	do {
1648 		while (!queue_remove_raw(q, evt))
1649 			arm_smmu_handle_ppr(smmu, evt);
1650 
1651 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1652 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1653 	} while (!queue_empty(llq));
1654 
1655 	/* Sync our overflow flag, as we believe we're up to speed */
1656 	queue_sync_cons_ovf(q);
1657 	return IRQ_HANDLED;
1658 }
1659 
1660 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1661 
1662 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1663 {
1664 	u32 gerror, gerrorn, active;
1665 	struct arm_smmu_device *smmu = dev;
1666 
1667 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1668 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1669 
1670 	active = gerror ^ gerrorn;
1671 	if (!(active & GERROR_ERR_MASK))
1672 		return IRQ_NONE; /* No errors pending */
1673 
1674 	dev_warn(smmu->dev,
1675 		 "unexpected global error reported (0x%08x), this could be serious\n",
1676 		 active);
1677 
1678 	if (active & GERROR_SFM_ERR) {
1679 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1680 		arm_smmu_device_disable(smmu);
1681 	}
1682 
1683 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1684 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1685 
1686 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1687 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1688 
1689 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1690 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1691 
1692 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1693 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1694 
1695 	if (active & GERROR_PRIQ_ABT_ERR)
1696 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1697 
1698 	if (active & GERROR_EVTQ_ABT_ERR)
1699 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1700 
1701 	if (active & GERROR_CMDQ_ERR)
1702 		arm_smmu_cmdq_skip_err(smmu);
1703 
1704 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1705 	return IRQ_HANDLED;
1706 }
1707 
1708 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1709 {
1710 	struct arm_smmu_device *smmu = dev;
1711 
1712 	arm_smmu_evtq_thread(irq, dev);
1713 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1714 		arm_smmu_priq_thread(irq, dev);
1715 
1716 	return IRQ_HANDLED;
1717 }
1718 
1719 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1720 {
1721 	arm_smmu_gerror_handler(irq, dev);
1722 	return IRQ_WAKE_THREAD;
1723 }
1724 
1725 static void
1726 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1727 			struct arm_smmu_cmdq_ent *cmd)
1728 {
1729 	size_t log2_span;
1730 	size_t span_mask;
1731 	/* ATC invalidates are always on 4096-bytes pages */
1732 	size_t inval_grain_shift = 12;
1733 	unsigned long page_start, page_end;
1734 
1735 	/*
1736 	 * ATS and PASID:
1737 	 *
1738 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1739 	 * prefix. In that case all ATC entries within the address range are
1740 	 * invalidated, including those that were requested with a PASID! There
1741 	 * is no way to invalidate only entries without PASID.
1742 	 *
1743 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1744 	 * traffic), translation requests without PASID create ATC entries
1745 	 * without PASID, which must be invalidated with substream_valid clear.
1746 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1747 	 * ATC entries within the address range.
1748 	 */
1749 	*cmd = (struct arm_smmu_cmdq_ent) {
1750 		.opcode			= CMDQ_OP_ATC_INV,
1751 		.substream_valid	= !!ssid,
1752 		.atc.ssid		= ssid,
1753 	};
1754 
1755 	if (!size) {
1756 		cmd->atc.size = ATC_INV_SIZE_ALL;
1757 		return;
1758 	}
1759 
1760 	page_start	= iova >> inval_grain_shift;
1761 	page_end	= (iova + size - 1) >> inval_grain_shift;
1762 
1763 	/*
1764 	 * In an ATS Invalidate Request, the address must be aligned on the
1765 	 * range size, which must be a power of two number of page sizes. We
1766 	 * thus have to choose between grossly over-invalidating the region, or
1767 	 * splitting the invalidation into multiple commands. For simplicity
1768 	 * we'll go with the first solution, but should refine it in the future
1769 	 * if multiple commands are shown to be more efficient.
1770 	 *
1771 	 * Find the smallest power of two that covers the range. The most
1772 	 * significant differing bit between the start and end addresses,
1773 	 * fls(start ^ end), indicates the required span. For example:
1774 	 *
1775 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1776 	 *		x = 0b1000 ^ 0b1011 = 0b11
1777 	 *		span = 1 << fls(x) = 4
1778 	 *
1779 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1780 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1781 	 *		span = 1 << fls(x) = 16
1782 	 */
1783 	log2_span	= fls_long(page_start ^ page_end);
1784 	span_mask	= (1ULL << log2_span) - 1;
1785 
1786 	page_start	&= ~span_mask;
1787 
1788 	cmd->atc.addr	= page_start << inval_grain_shift;
1789 	cmd->atc.size	= log2_span;
1790 }
1791 
1792 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1793 {
1794 	int i;
1795 	struct arm_smmu_cmdq_ent cmd;
1796 	struct arm_smmu_cmdq_batch cmds;
1797 
1798 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1799 
1800 	cmds.num = 0;
1801 	for (i = 0; i < master->num_streams; i++) {
1802 		cmd.atc.sid = master->streams[i].id;
1803 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1804 	}
1805 
1806 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1807 }
1808 
1809 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1810 			    unsigned long iova, size_t size)
1811 {
1812 	int i;
1813 	unsigned long flags;
1814 	struct arm_smmu_cmdq_ent cmd;
1815 	struct arm_smmu_master *master;
1816 	struct arm_smmu_cmdq_batch cmds;
1817 
1818 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1819 		return 0;
1820 
1821 	/*
1822 	 * Ensure that we've completed prior invalidation of the main TLBs
1823 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1824 	 * arm_smmu_enable_ats():
1825 	 *
1826 	 *	// unmap()			// arm_smmu_enable_ats()
1827 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1828 	 *	smp_mb();			[...]
1829 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1830 	 *
1831 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1832 	 * ATS was enabled at the PCI device before completion of the TLBI.
1833 	 */
1834 	smp_mb();
1835 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1836 		return 0;
1837 
1838 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1839 
1840 	cmds.num = 0;
1841 
1842 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1843 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1844 		if (!master->ats_enabled)
1845 			continue;
1846 
1847 		for (i = 0; i < master->num_streams; i++) {
1848 			cmd.atc.sid = master->streams[i].id;
1849 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1850 		}
1851 	}
1852 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1853 
1854 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1855 }
1856 
1857 /* IO_PGTABLE API */
1858 static void arm_smmu_tlb_inv_context(void *cookie)
1859 {
1860 	struct arm_smmu_domain *smmu_domain = cookie;
1861 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1862 	struct arm_smmu_cmdq_ent cmd;
1863 
1864 	/*
1865 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1866 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1867 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1868 	 * insertion to guarantee those are observed before the TLBI. Do be
1869 	 * careful, 007.
1870 	 */
1871 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1872 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1873 	} else {
1874 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1875 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1876 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1877 	}
1878 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1879 }
1880 
1881 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1882 				     unsigned long iova, size_t size,
1883 				     size_t granule,
1884 				     struct arm_smmu_domain *smmu_domain)
1885 {
1886 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1887 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1888 	size_t inv_range = granule;
1889 	struct arm_smmu_cmdq_batch cmds;
1890 
1891 	if (!size)
1892 		return;
1893 
1894 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1895 		/* Get the leaf page size */
1896 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1897 
1898 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1899 		cmd->tlbi.tg = (tg - 10) / 2;
1900 
1901 		/*
1902 		 * Determine what level the granule is at. For non-leaf, io-pgtable
1903 		 * assumes .tlb_flush_walk can invalidate multiple levels at once,
1904 		 * so ignore the nominal last-level granule and leave TTL=0.
1905 		 */
1906 		if (cmd->tlbi.leaf)
1907 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1908 
1909 		num_pages = size >> tg;
1910 	}
1911 
1912 	cmds.num = 0;
1913 
1914 	while (iova < end) {
1915 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1916 			/*
1917 			 * On each iteration of the loop, the range is 5 bits
1918 			 * worth of the aligned size remaining.
1919 			 * The range in pages is:
1920 			 *
1921 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1922 			 */
1923 			unsigned long scale, num;
1924 
1925 			/* Determine the power of 2 multiple number of pages */
1926 			scale = __ffs(num_pages);
1927 			cmd->tlbi.scale = scale;
1928 
1929 			/* Determine how many chunks of 2^scale size we have */
1930 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1931 			cmd->tlbi.num = num - 1;
1932 
1933 			/* range is num * 2^scale * pgsize */
1934 			inv_range = num << (scale + tg);
1935 
1936 			/* Clear out the lower order bits for the next iteration */
1937 			num_pages -= num << scale;
1938 		}
1939 
1940 		cmd->tlbi.addr = iova;
1941 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1942 		iova += inv_range;
1943 	}
1944 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1945 }
1946 
1947 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1948 					  size_t granule, bool leaf,
1949 					  struct arm_smmu_domain *smmu_domain)
1950 {
1951 	struct arm_smmu_cmdq_ent cmd = {
1952 		.tlbi = {
1953 			.leaf	= leaf,
1954 		},
1955 	};
1956 
1957 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1958 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1959 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1960 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1961 	} else {
1962 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1963 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1964 	}
1965 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1966 
1967 	/*
1968 	 * Unfortunately, this can't be leaf-only since we may have
1969 	 * zapped an entire table.
1970 	 */
1971 	arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1972 }
1973 
1974 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1975 				 size_t granule, bool leaf,
1976 				 struct arm_smmu_domain *smmu_domain)
1977 {
1978 	struct arm_smmu_cmdq_ent cmd = {
1979 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1980 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1981 		.tlbi = {
1982 			.asid	= asid,
1983 			.leaf	= leaf,
1984 		},
1985 	};
1986 
1987 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1988 }
1989 
1990 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1991 					 unsigned long iova, size_t granule,
1992 					 void *cookie)
1993 {
1994 	struct arm_smmu_domain *smmu_domain = cookie;
1995 	struct iommu_domain *domain = &smmu_domain->domain;
1996 
1997 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1998 }
1999 
2000 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2001 				  size_t granule, void *cookie)
2002 {
2003 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2004 }
2005 
2006 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2007 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2008 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2009 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2010 };
2011 
2012 /* IOMMU API */
2013 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2014 {
2015 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2016 
2017 	switch (cap) {
2018 	case IOMMU_CAP_CACHE_COHERENCY:
2019 		/* Assume that a coherent TCU implies coherent TBUs */
2020 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2021 	case IOMMU_CAP_NOEXEC:
2022 	case IOMMU_CAP_DEFERRED_FLUSH:
2023 		return true;
2024 	default:
2025 		return false;
2026 	}
2027 }
2028 
2029 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2030 {
2031 	struct arm_smmu_domain *smmu_domain;
2032 
2033 	if (type == IOMMU_DOMAIN_SVA)
2034 		return arm_smmu_sva_domain_alloc();
2035 
2036 	if (type != IOMMU_DOMAIN_UNMANAGED &&
2037 	    type != IOMMU_DOMAIN_DMA &&
2038 	    type != IOMMU_DOMAIN_IDENTITY)
2039 		return NULL;
2040 
2041 	/*
2042 	 * Allocate the domain and initialise some of its data structures.
2043 	 * We can't really do anything meaningful until we've added a
2044 	 * master.
2045 	 */
2046 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2047 	if (!smmu_domain)
2048 		return NULL;
2049 
2050 	mutex_init(&smmu_domain->init_mutex);
2051 	INIT_LIST_HEAD(&smmu_domain->devices);
2052 	spin_lock_init(&smmu_domain->devices_lock);
2053 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2054 
2055 	return &smmu_domain->domain;
2056 }
2057 
2058 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2059 {
2060 	int idx, size = 1 << span;
2061 
2062 	do {
2063 		idx = find_first_zero_bit(map, size);
2064 		if (idx == size)
2065 			return -ENOSPC;
2066 	} while (test_and_set_bit(idx, map));
2067 
2068 	return idx;
2069 }
2070 
2071 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2072 {
2073 	clear_bit(idx, map);
2074 }
2075 
2076 static void arm_smmu_domain_free(struct iommu_domain *domain)
2077 {
2078 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2079 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2080 
2081 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2082 
2083 	/* Free the CD and ASID, if we allocated them */
2084 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2085 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2086 
2087 		/* Prevent SVA from touching the CD while we're freeing it */
2088 		mutex_lock(&arm_smmu_asid_lock);
2089 		if (cfg->cdcfg.cdtab)
2090 			arm_smmu_free_cd_tables(smmu_domain);
2091 		arm_smmu_free_asid(&cfg->cd);
2092 		mutex_unlock(&arm_smmu_asid_lock);
2093 	} else {
2094 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2095 		if (cfg->vmid)
2096 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2097 	}
2098 
2099 	kfree(smmu_domain);
2100 }
2101 
2102 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2103 				       struct arm_smmu_master *master,
2104 				       struct io_pgtable_cfg *pgtbl_cfg)
2105 {
2106 	int ret;
2107 	u32 asid;
2108 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2109 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2110 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2111 
2112 	refcount_set(&cfg->cd.refs, 1);
2113 
2114 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2115 	mutex_lock(&arm_smmu_asid_lock);
2116 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2117 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2118 	if (ret)
2119 		goto out_unlock;
2120 
2121 	cfg->s1cdmax = master->ssid_bits;
2122 
2123 	smmu_domain->stall_enabled = master->stall_enabled;
2124 
2125 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2126 	if (ret)
2127 		goto out_free_asid;
2128 
2129 	cfg->cd.asid	= (u16)asid;
2130 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2131 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2132 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2133 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2134 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2135 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2136 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2137 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2138 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2139 
2140 	/*
2141 	 * Note that this will end up calling arm_smmu_sync_cd() before
2142 	 * the master has been added to the devices list for this domain.
2143 	 * This isn't an issue because the STE hasn't been installed yet.
2144 	 */
2145 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2146 	if (ret)
2147 		goto out_free_cd_tables;
2148 
2149 	mutex_unlock(&arm_smmu_asid_lock);
2150 	return 0;
2151 
2152 out_free_cd_tables:
2153 	arm_smmu_free_cd_tables(smmu_domain);
2154 out_free_asid:
2155 	arm_smmu_free_asid(&cfg->cd);
2156 out_unlock:
2157 	mutex_unlock(&arm_smmu_asid_lock);
2158 	return ret;
2159 }
2160 
2161 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2162 				       struct arm_smmu_master *master,
2163 				       struct io_pgtable_cfg *pgtbl_cfg)
2164 {
2165 	int vmid;
2166 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2167 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2168 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2169 
2170 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2171 	if (vmid < 0)
2172 		return vmid;
2173 
2174 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2175 	cfg->vmid	= (u16)vmid;
2176 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2177 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2178 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2179 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2180 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2181 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2182 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2183 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2184 	return 0;
2185 }
2186 
2187 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2188 				    struct arm_smmu_master *master)
2189 {
2190 	int ret;
2191 	unsigned long ias, oas;
2192 	enum io_pgtable_fmt fmt;
2193 	struct io_pgtable_cfg pgtbl_cfg;
2194 	struct io_pgtable_ops *pgtbl_ops;
2195 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2196 				 struct arm_smmu_master *,
2197 				 struct io_pgtable_cfg *);
2198 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2199 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2200 
2201 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2202 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2203 		return 0;
2204 	}
2205 
2206 	/* Restrict the stage to what we can actually support */
2207 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2208 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2209 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2210 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2211 
2212 	switch (smmu_domain->stage) {
2213 	case ARM_SMMU_DOMAIN_S1:
2214 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2215 		ias = min_t(unsigned long, ias, VA_BITS);
2216 		oas = smmu->ias;
2217 		fmt = ARM_64_LPAE_S1;
2218 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2219 		break;
2220 	case ARM_SMMU_DOMAIN_NESTED:
2221 	case ARM_SMMU_DOMAIN_S2:
2222 		ias = smmu->ias;
2223 		oas = smmu->oas;
2224 		fmt = ARM_64_LPAE_S2;
2225 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2226 		break;
2227 	default:
2228 		return -EINVAL;
2229 	}
2230 
2231 	pgtbl_cfg = (struct io_pgtable_cfg) {
2232 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2233 		.ias		= ias,
2234 		.oas		= oas,
2235 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2236 		.tlb		= &arm_smmu_flush_ops,
2237 		.iommu_dev	= smmu->dev,
2238 	};
2239 
2240 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2241 	if (!pgtbl_ops)
2242 		return -ENOMEM;
2243 
2244 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2245 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2246 	domain->geometry.force_aperture = true;
2247 
2248 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2249 	if (ret < 0) {
2250 		free_io_pgtable_ops(pgtbl_ops);
2251 		return ret;
2252 	}
2253 
2254 	smmu_domain->pgtbl_ops = pgtbl_ops;
2255 	return 0;
2256 }
2257 
2258 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2259 {
2260 	__le64 *step;
2261 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2262 
2263 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2264 		struct arm_smmu_strtab_l1_desc *l1_desc;
2265 		int idx;
2266 
2267 		/* Two-level walk */
2268 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2269 		l1_desc = &cfg->l1_desc[idx];
2270 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2271 		step = &l1_desc->l2ptr[idx];
2272 	} else {
2273 		/* Simple linear lookup */
2274 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2275 	}
2276 
2277 	return step;
2278 }
2279 
2280 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2281 {
2282 	int i, j;
2283 	struct arm_smmu_device *smmu = master->smmu;
2284 
2285 	for (i = 0; i < master->num_streams; ++i) {
2286 		u32 sid = master->streams[i].id;
2287 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2288 
2289 		/* Bridged PCI devices may end up with duplicated IDs */
2290 		for (j = 0; j < i; j++)
2291 			if (master->streams[j].id == sid)
2292 				break;
2293 		if (j < i)
2294 			continue;
2295 
2296 		arm_smmu_write_strtab_ent(master, sid, step);
2297 	}
2298 }
2299 
2300 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2301 {
2302 	struct device *dev = master->dev;
2303 	struct arm_smmu_device *smmu = master->smmu;
2304 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2305 
2306 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2307 		return false;
2308 
2309 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2310 		return false;
2311 
2312 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2313 }
2314 
2315 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2316 {
2317 	size_t stu;
2318 	struct pci_dev *pdev;
2319 	struct arm_smmu_device *smmu = master->smmu;
2320 	struct arm_smmu_domain *smmu_domain = master->domain;
2321 
2322 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2323 	if (!master->ats_enabled)
2324 		return;
2325 
2326 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2327 	stu = __ffs(smmu->pgsize_bitmap);
2328 	pdev = to_pci_dev(master->dev);
2329 
2330 	atomic_inc(&smmu_domain->nr_ats_masters);
2331 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2332 	if (pci_enable_ats(pdev, stu))
2333 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2334 }
2335 
2336 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2337 {
2338 	struct arm_smmu_domain *smmu_domain = master->domain;
2339 
2340 	if (!master->ats_enabled)
2341 		return;
2342 
2343 	pci_disable_ats(to_pci_dev(master->dev));
2344 	/*
2345 	 * Ensure ATS is disabled at the endpoint before we issue the
2346 	 * ATC invalidation via the SMMU.
2347 	 */
2348 	wmb();
2349 	arm_smmu_atc_inv_master(master);
2350 	atomic_dec(&smmu_domain->nr_ats_masters);
2351 }
2352 
2353 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2354 {
2355 	int ret;
2356 	int features;
2357 	int num_pasids;
2358 	struct pci_dev *pdev;
2359 
2360 	if (!dev_is_pci(master->dev))
2361 		return -ENODEV;
2362 
2363 	pdev = to_pci_dev(master->dev);
2364 
2365 	features = pci_pasid_features(pdev);
2366 	if (features < 0)
2367 		return features;
2368 
2369 	num_pasids = pci_max_pasids(pdev);
2370 	if (num_pasids <= 0)
2371 		return num_pasids;
2372 
2373 	ret = pci_enable_pasid(pdev, features);
2374 	if (ret) {
2375 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2376 		return ret;
2377 	}
2378 
2379 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2380 				  master->smmu->ssid_bits);
2381 	return 0;
2382 }
2383 
2384 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2385 {
2386 	struct pci_dev *pdev;
2387 
2388 	if (!dev_is_pci(master->dev))
2389 		return;
2390 
2391 	pdev = to_pci_dev(master->dev);
2392 
2393 	if (!pdev->pasid_enabled)
2394 		return;
2395 
2396 	master->ssid_bits = 0;
2397 	pci_disable_pasid(pdev);
2398 }
2399 
2400 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2401 {
2402 	unsigned long flags;
2403 	struct arm_smmu_domain *smmu_domain = master->domain;
2404 
2405 	if (!smmu_domain)
2406 		return;
2407 
2408 	arm_smmu_disable_ats(master);
2409 
2410 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2411 	list_del(&master->domain_head);
2412 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2413 
2414 	master->domain = NULL;
2415 	master->ats_enabled = false;
2416 	arm_smmu_install_ste_for_dev(master);
2417 }
2418 
2419 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2420 {
2421 	int ret = 0;
2422 	unsigned long flags;
2423 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2424 	struct arm_smmu_device *smmu;
2425 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2426 	struct arm_smmu_master *master;
2427 
2428 	if (!fwspec)
2429 		return -ENOENT;
2430 
2431 	master = dev_iommu_priv_get(dev);
2432 	smmu = master->smmu;
2433 
2434 	/*
2435 	 * Checking that SVA is disabled ensures that this device isn't bound to
2436 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2437 	 * be removed concurrently since we're holding the group mutex.
2438 	 */
2439 	if (arm_smmu_master_sva_enabled(master)) {
2440 		dev_err(dev, "cannot attach - SVA enabled\n");
2441 		return -EBUSY;
2442 	}
2443 
2444 	arm_smmu_detach_dev(master);
2445 
2446 	mutex_lock(&smmu_domain->init_mutex);
2447 
2448 	if (!smmu_domain->smmu) {
2449 		smmu_domain->smmu = smmu;
2450 		ret = arm_smmu_domain_finalise(domain, master);
2451 		if (ret) {
2452 			smmu_domain->smmu = NULL;
2453 			goto out_unlock;
2454 		}
2455 	} else if (smmu_domain->smmu != smmu) {
2456 		ret = -EINVAL;
2457 		goto out_unlock;
2458 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2459 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2460 		ret = -EINVAL;
2461 		goto out_unlock;
2462 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2463 		   smmu_domain->stall_enabled != master->stall_enabled) {
2464 		ret = -EINVAL;
2465 		goto out_unlock;
2466 	}
2467 
2468 	master->domain = smmu_domain;
2469 
2470 	/*
2471 	 * The SMMU does not support enabling ATS with bypass. When the STE is
2472 	 * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and
2473 	 * Translated transactions are denied as though ATS is disabled for the
2474 	 * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2475 	 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2476 	 */
2477 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2478 		master->ats_enabled = arm_smmu_ats_supported(master);
2479 
2480 	arm_smmu_install_ste_for_dev(master);
2481 
2482 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2483 	list_add(&master->domain_head, &smmu_domain->devices);
2484 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2485 
2486 	arm_smmu_enable_ats(master);
2487 
2488 out_unlock:
2489 	mutex_unlock(&smmu_domain->init_mutex);
2490 	return ret;
2491 }
2492 
2493 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2494 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2495 			      int prot, gfp_t gfp, size_t *mapped)
2496 {
2497 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2498 
2499 	if (!ops)
2500 		return -ENODEV;
2501 
2502 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2503 }
2504 
2505 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2506 				   size_t pgsize, size_t pgcount,
2507 				   struct iommu_iotlb_gather *gather)
2508 {
2509 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2510 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2511 
2512 	if (!ops)
2513 		return 0;
2514 
2515 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2516 }
2517 
2518 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2519 {
2520 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2521 
2522 	if (smmu_domain->smmu)
2523 		arm_smmu_tlb_inv_context(smmu_domain);
2524 }
2525 
2526 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2527 				struct iommu_iotlb_gather *gather)
2528 {
2529 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2530 
2531 	if (!gather->pgsize)
2532 		return;
2533 
2534 	arm_smmu_tlb_inv_range_domain(gather->start,
2535 				      gather->end - gather->start + 1,
2536 				      gather->pgsize, true, smmu_domain);
2537 }
2538 
2539 static phys_addr_t
2540 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2541 {
2542 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2543 
2544 	if (!ops)
2545 		return 0;
2546 
2547 	return ops->iova_to_phys(ops, iova);
2548 }
2549 
2550 static struct platform_driver arm_smmu_driver;
2551 
2552 static
2553 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2554 {
2555 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2556 							  fwnode);
2557 	put_device(dev);
2558 	return dev ? dev_get_drvdata(dev) : NULL;
2559 }
2560 
2561 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2562 {
2563 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2564 
2565 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2566 		limit *= 1UL << STRTAB_SPLIT;
2567 
2568 	return sid < limit;
2569 }
2570 
2571 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2572 {
2573 	/* Check the SIDs are in range of the SMMU and our stream table */
2574 	if (!arm_smmu_sid_in_range(smmu, sid))
2575 		return -ERANGE;
2576 
2577 	/* Ensure l2 strtab is initialised */
2578 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2579 		return arm_smmu_init_l2_strtab(smmu, sid);
2580 
2581 	return 0;
2582 }
2583 
2584 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2585 				  struct arm_smmu_master *master)
2586 {
2587 	int i;
2588 	int ret = 0;
2589 	struct arm_smmu_stream *new_stream, *cur_stream;
2590 	struct rb_node **new_node, *parent_node = NULL;
2591 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2592 
2593 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2594 				  GFP_KERNEL);
2595 	if (!master->streams)
2596 		return -ENOMEM;
2597 	master->num_streams = fwspec->num_ids;
2598 
2599 	mutex_lock(&smmu->streams_mutex);
2600 	for (i = 0; i < fwspec->num_ids; i++) {
2601 		u32 sid = fwspec->ids[i];
2602 
2603 		new_stream = &master->streams[i];
2604 		new_stream->id = sid;
2605 		new_stream->master = master;
2606 
2607 		ret = arm_smmu_init_sid_strtab(smmu, sid);
2608 		if (ret)
2609 			break;
2610 
2611 		/* Insert into SID tree */
2612 		new_node = &(smmu->streams.rb_node);
2613 		while (*new_node) {
2614 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2615 					      node);
2616 			parent_node = *new_node;
2617 			if (cur_stream->id > new_stream->id) {
2618 				new_node = &((*new_node)->rb_left);
2619 			} else if (cur_stream->id < new_stream->id) {
2620 				new_node = &((*new_node)->rb_right);
2621 			} else {
2622 				dev_warn(master->dev,
2623 					 "stream %u already in tree\n",
2624 					 cur_stream->id);
2625 				ret = -EINVAL;
2626 				break;
2627 			}
2628 		}
2629 		if (ret)
2630 			break;
2631 
2632 		rb_link_node(&new_stream->node, parent_node, new_node);
2633 		rb_insert_color(&new_stream->node, &smmu->streams);
2634 	}
2635 
2636 	if (ret) {
2637 		for (i--; i >= 0; i--)
2638 			rb_erase(&master->streams[i].node, &smmu->streams);
2639 		kfree(master->streams);
2640 	}
2641 	mutex_unlock(&smmu->streams_mutex);
2642 
2643 	return ret;
2644 }
2645 
2646 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2647 {
2648 	int i;
2649 	struct arm_smmu_device *smmu = master->smmu;
2650 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2651 
2652 	if (!smmu || !master->streams)
2653 		return;
2654 
2655 	mutex_lock(&smmu->streams_mutex);
2656 	for (i = 0; i < fwspec->num_ids; i++)
2657 		rb_erase(&master->streams[i].node, &smmu->streams);
2658 	mutex_unlock(&smmu->streams_mutex);
2659 
2660 	kfree(master->streams);
2661 }
2662 
2663 static struct iommu_ops arm_smmu_ops;
2664 
2665 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2666 {
2667 	int ret;
2668 	struct arm_smmu_device *smmu;
2669 	struct arm_smmu_master *master;
2670 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2671 
2672 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2673 		return ERR_PTR(-ENODEV);
2674 
2675 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2676 		return ERR_PTR(-EBUSY);
2677 
2678 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2679 	if (!smmu)
2680 		return ERR_PTR(-ENODEV);
2681 
2682 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2683 	if (!master)
2684 		return ERR_PTR(-ENOMEM);
2685 
2686 	master->dev = dev;
2687 	master->smmu = smmu;
2688 	INIT_LIST_HEAD(&master->bonds);
2689 	dev_iommu_priv_set(dev, master);
2690 
2691 	ret = arm_smmu_insert_master(smmu, master);
2692 	if (ret)
2693 		goto err_free_master;
2694 
2695 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2696 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2697 
2698 	/*
2699 	 * Note that PASID must be enabled before, and disabled after ATS:
2700 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2701 	 *
2702 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2703 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2704 	 *   are changed.
2705 	 */
2706 	arm_smmu_enable_pasid(master);
2707 
2708 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2709 		master->ssid_bits = min_t(u8, master->ssid_bits,
2710 					  CTXDESC_LINEAR_CDMAX);
2711 
2712 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2713 	     device_property_read_bool(dev, "dma-can-stall")) ||
2714 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2715 		master->stall_enabled = true;
2716 
2717 	return &smmu->iommu;
2718 
2719 err_free_master:
2720 	kfree(master);
2721 	dev_iommu_priv_set(dev, NULL);
2722 	return ERR_PTR(ret);
2723 }
2724 
2725 static void arm_smmu_release_device(struct device *dev)
2726 {
2727 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2728 
2729 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2730 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2731 	arm_smmu_detach_dev(master);
2732 	arm_smmu_disable_pasid(master);
2733 	arm_smmu_remove_master(master);
2734 	kfree(master);
2735 }
2736 
2737 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2738 {
2739 	struct iommu_group *group;
2740 
2741 	/*
2742 	 * We don't support devices sharing stream IDs other than PCI RID
2743 	 * aliases, since the necessary ID-to-device lookup becomes rather
2744 	 * impractical given a potential sparse 32-bit stream ID space.
2745 	 */
2746 	if (dev_is_pci(dev))
2747 		group = pci_device_group(dev);
2748 	else
2749 		group = generic_device_group(dev);
2750 
2751 	return group;
2752 }
2753 
2754 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2755 {
2756 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2757 	int ret = 0;
2758 
2759 	mutex_lock(&smmu_domain->init_mutex);
2760 	if (smmu_domain->smmu)
2761 		ret = -EPERM;
2762 	else
2763 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2764 	mutex_unlock(&smmu_domain->init_mutex);
2765 
2766 	return ret;
2767 }
2768 
2769 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2770 {
2771 	return iommu_fwspec_add_ids(dev, args->args, 1);
2772 }
2773 
2774 static void arm_smmu_get_resv_regions(struct device *dev,
2775 				      struct list_head *head)
2776 {
2777 	struct iommu_resv_region *region;
2778 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2779 
2780 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2781 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2782 	if (!region)
2783 		return;
2784 
2785 	list_add_tail(&region->list, head);
2786 
2787 	iommu_dma_get_resv_regions(dev, head);
2788 }
2789 
2790 static int arm_smmu_dev_enable_feature(struct device *dev,
2791 				       enum iommu_dev_features feat)
2792 {
2793 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2794 
2795 	if (!master)
2796 		return -ENODEV;
2797 
2798 	switch (feat) {
2799 	case IOMMU_DEV_FEAT_IOPF:
2800 		if (!arm_smmu_master_iopf_supported(master))
2801 			return -EINVAL;
2802 		if (master->iopf_enabled)
2803 			return -EBUSY;
2804 		master->iopf_enabled = true;
2805 		return 0;
2806 	case IOMMU_DEV_FEAT_SVA:
2807 		if (!arm_smmu_master_sva_supported(master))
2808 			return -EINVAL;
2809 		if (arm_smmu_master_sva_enabled(master))
2810 			return -EBUSY;
2811 		return arm_smmu_master_enable_sva(master);
2812 	default:
2813 		return -EINVAL;
2814 	}
2815 }
2816 
2817 static int arm_smmu_dev_disable_feature(struct device *dev,
2818 					enum iommu_dev_features feat)
2819 {
2820 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2821 
2822 	if (!master)
2823 		return -EINVAL;
2824 
2825 	switch (feat) {
2826 	case IOMMU_DEV_FEAT_IOPF:
2827 		if (!master->iopf_enabled)
2828 			return -EINVAL;
2829 		if (master->sva_enabled)
2830 			return -EBUSY;
2831 		master->iopf_enabled = false;
2832 		return 0;
2833 	case IOMMU_DEV_FEAT_SVA:
2834 		if (!arm_smmu_master_sva_enabled(master))
2835 			return -EINVAL;
2836 		return arm_smmu_master_disable_sva(master);
2837 	default:
2838 		return -EINVAL;
2839 	}
2840 }
2841 
2842 /*
2843  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
2844  * PCIe link and save the data to memory by DMA. The hardware is restricted to
2845  * use identity mapping only.
2846  */
2847 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
2848 					 (pdev)->device == 0xa12e)
2849 
2850 static int arm_smmu_def_domain_type(struct device *dev)
2851 {
2852 	if (dev_is_pci(dev)) {
2853 		struct pci_dev *pdev = to_pci_dev(dev);
2854 
2855 		if (IS_HISI_PTT_DEVICE(pdev))
2856 			return IOMMU_DOMAIN_IDENTITY;
2857 	}
2858 
2859 	return 0;
2860 }
2861 
2862 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
2863 {
2864 	struct iommu_domain *domain;
2865 
2866 	domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
2867 	if (WARN_ON(IS_ERR(domain)) || !domain)
2868 		return;
2869 
2870 	arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
2871 }
2872 
2873 static struct iommu_ops arm_smmu_ops = {
2874 	.capable		= arm_smmu_capable,
2875 	.domain_alloc		= arm_smmu_domain_alloc,
2876 	.probe_device		= arm_smmu_probe_device,
2877 	.release_device		= arm_smmu_release_device,
2878 	.device_group		= arm_smmu_device_group,
2879 	.of_xlate		= arm_smmu_of_xlate,
2880 	.get_resv_regions	= arm_smmu_get_resv_regions,
2881 	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
2882 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2883 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2884 	.page_response		= arm_smmu_page_response,
2885 	.def_domain_type	= arm_smmu_def_domain_type,
2886 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2887 	.owner			= THIS_MODULE,
2888 	.default_domain_ops = &(const struct iommu_domain_ops) {
2889 		.attach_dev		= arm_smmu_attach_dev,
2890 		.map_pages		= arm_smmu_map_pages,
2891 		.unmap_pages		= arm_smmu_unmap_pages,
2892 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2893 		.iotlb_sync		= arm_smmu_iotlb_sync,
2894 		.iova_to_phys		= arm_smmu_iova_to_phys,
2895 		.enable_nesting		= arm_smmu_enable_nesting,
2896 		.free			= arm_smmu_domain_free,
2897 	}
2898 };
2899 
2900 /* Probing and initialisation functions */
2901 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2902 				   struct arm_smmu_queue *q,
2903 				   void __iomem *page,
2904 				   unsigned long prod_off,
2905 				   unsigned long cons_off,
2906 				   size_t dwords, const char *name)
2907 {
2908 	size_t qsz;
2909 
2910 	do {
2911 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2912 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2913 					      GFP_KERNEL);
2914 		if (q->base || qsz < PAGE_SIZE)
2915 			break;
2916 
2917 		q->llq.max_n_shift--;
2918 	} while (1);
2919 
2920 	if (!q->base) {
2921 		dev_err(smmu->dev,
2922 			"failed to allocate queue (0x%zx bytes) for %s\n",
2923 			qsz, name);
2924 		return -ENOMEM;
2925 	}
2926 
2927 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2928 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2929 			 1 << q->llq.max_n_shift, name);
2930 	}
2931 
2932 	q->prod_reg	= page + prod_off;
2933 	q->cons_reg	= page + cons_off;
2934 	q->ent_dwords	= dwords;
2935 
2936 	q->q_base  = Q_BASE_RWA;
2937 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2938 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2939 
2940 	q->llq.prod = q->llq.cons = 0;
2941 	return 0;
2942 }
2943 
2944 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2945 {
2946 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2947 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2948 
2949 	atomic_set(&cmdq->owner_prod, 0);
2950 	atomic_set(&cmdq->lock, 0);
2951 
2952 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2953 							      GFP_KERNEL);
2954 	if (!cmdq->valid_map)
2955 		return -ENOMEM;
2956 
2957 	return 0;
2958 }
2959 
2960 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2961 {
2962 	int ret;
2963 
2964 	/* cmdq */
2965 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2966 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2967 				      CMDQ_ENT_DWORDS, "cmdq");
2968 	if (ret)
2969 		return ret;
2970 
2971 	ret = arm_smmu_cmdq_init(smmu);
2972 	if (ret)
2973 		return ret;
2974 
2975 	/* evtq */
2976 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2977 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2978 				      EVTQ_ENT_DWORDS, "evtq");
2979 	if (ret)
2980 		return ret;
2981 
2982 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2983 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2984 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2985 		if (!smmu->evtq.iopf)
2986 			return -ENOMEM;
2987 	}
2988 
2989 	/* priq */
2990 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2991 		return 0;
2992 
2993 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2994 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2995 				       PRIQ_ENT_DWORDS, "priq");
2996 }
2997 
2998 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2999 {
3000 	unsigned int i;
3001 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3002 	void *strtab = smmu->strtab_cfg.strtab;
3003 
3004 	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
3005 				    sizeof(*cfg->l1_desc), GFP_KERNEL);
3006 	if (!cfg->l1_desc)
3007 		return -ENOMEM;
3008 
3009 	for (i = 0; i < cfg->num_l1_ents; ++i) {
3010 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3011 		strtab += STRTAB_L1_DESC_DWORDS << 3;
3012 	}
3013 
3014 	return 0;
3015 }
3016 
3017 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3018 {
3019 	void *strtab;
3020 	u64 reg;
3021 	u32 size, l1size;
3022 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3023 
3024 	/* Calculate the L1 size, capped to the SIDSIZE. */
3025 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3026 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3027 	cfg->num_l1_ents = 1 << size;
3028 
3029 	size += STRTAB_SPLIT;
3030 	if (size < smmu->sid_bits)
3031 		dev_warn(smmu->dev,
3032 			 "2-level strtab only covers %u/%u bits of SID\n",
3033 			 size, smmu->sid_bits);
3034 
3035 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3036 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3037 				     GFP_KERNEL);
3038 	if (!strtab) {
3039 		dev_err(smmu->dev,
3040 			"failed to allocate l1 stream table (%u bytes)\n",
3041 			l1size);
3042 		return -ENOMEM;
3043 	}
3044 	cfg->strtab = strtab;
3045 
3046 	/* Configure strtab_base_cfg for 2 levels */
3047 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3048 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3049 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3050 	cfg->strtab_base_cfg = reg;
3051 
3052 	return arm_smmu_init_l1_strtab(smmu);
3053 }
3054 
3055 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3056 {
3057 	void *strtab;
3058 	u64 reg;
3059 	u32 size;
3060 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3061 
3062 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3063 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3064 				     GFP_KERNEL);
3065 	if (!strtab) {
3066 		dev_err(smmu->dev,
3067 			"failed to allocate linear stream table (%u bytes)\n",
3068 			size);
3069 		return -ENOMEM;
3070 	}
3071 	cfg->strtab = strtab;
3072 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3073 
3074 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3075 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3076 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3077 	cfg->strtab_base_cfg = reg;
3078 
3079 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3080 	return 0;
3081 }
3082 
3083 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3084 {
3085 	u64 reg;
3086 	int ret;
3087 
3088 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3089 		ret = arm_smmu_init_strtab_2lvl(smmu);
3090 	else
3091 		ret = arm_smmu_init_strtab_linear(smmu);
3092 
3093 	if (ret)
3094 		return ret;
3095 
3096 	/* Set the strtab base address */
3097 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3098 	reg |= STRTAB_BASE_RA;
3099 	smmu->strtab_cfg.strtab_base = reg;
3100 
3101 	/* Allocate the first VMID for stage-2 bypass STEs */
3102 	set_bit(0, smmu->vmid_map);
3103 	return 0;
3104 }
3105 
3106 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3107 {
3108 	int ret;
3109 
3110 	mutex_init(&smmu->streams_mutex);
3111 	smmu->streams = RB_ROOT;
3112 
3113 	ret = arm_smmu_init_queues(smmu);
3114 	if (ret)
3115 		return ret;
3116 
3117 	return arm_smmu_init_strtab(smmu);
3118 }
3119 
3120 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3121 				   unsigned int reg_off, unsigned int ack_off)
3122 {
3123 	u32 reg;
3124 
3125 	writel_relaxed(val, smmu->base + reg_off);
3126 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3127 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3128 }
3129 
3130 /* GBPA is "special" */
3131 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3132 {
3133 	int ret;
3134 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3135 
3136 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3137 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3138 	if (ret)
3139 		return ret;
3140 
3141 	reg &= ~clr;
3142 	reg |= set;
3143 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3144 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3145 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3146 
3147 	if (ret)
3148 		dev_err(smmu->dev, "GBPA not responding to update\n");
3149 	return ret;
3150 }
3151 
3152 static void arm_smmu_free_msis(void *data)
3153 {
3154 	struct device *dev = data;
3155 	platform_msi_domain_free_irqs(dev);
3156 }
3157 
3158 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3159 {
3160 	phys_addr_t doorbell;
3161 	struct device *dev = msi_desc_to_dev(desc);
3162 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3163 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3164 
3165 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3166 	doorbell &= MSI_CFG0_ADDR_MASK;
3167 
3168 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3169 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3170 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3171 }
3172 
3173 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3174 {
3175 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3176 	struct device *dev = smmu->dev;
3177 
3178 	/* Clear the MSI address regs */
3179 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3180 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3181 
3182 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3183 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3184 	else
3185 		nvec--;
3186 
3187 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3188 		return;
3189 
3190 	if (!dev->msi.domain) {
3191 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3192 		return;
3193 	}
3194 
3195 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3196 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3197 	if (ret) {
3198 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3199 		return;
3200 	}
3201 
3202 	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3203 	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3204 	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3205 
3206 	/* Add callback to free MSIs on teardown */
3207 	devm_add_action(dev, arm_smmu_free_msis, dev);
3208 }
3209 
3210 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3211 {
3212 	int irq, ret;
3213 
3214 	arm_smmu_setup_msis(smmu);
3215 
3216 	/* Request interrupt lines */
3217 	irq = smmu->evtq.q.irq;
3218 	if (irq) {
3219 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3220 						arm_smmu_evtq_thread,
3221 						IRQF_ONESHOT,
3222 						"arm-smmu-v3-evtq", smmu);
3223 		if (ret < 0)
3224 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3225 	} else {
3226 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3227 	}
3228 
3229 	irq = smmu->gerr_irq;
3230 	if (irq) {
3231 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3232 				       0, "arm-smmu-v3-gerror", smmu);
3233 		if (ret < 0)
3234 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3235 	} else {
3236 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3237 	}
3238 
3239 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3240 		irq = smmu->priq.q.irq;
3241 		if (irq) {
3242 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3243 							arm_smmu_priq_thread,
3244 							IRQF_ONESHOT,
3245 							"arm-smmu-v3-priq",
3246 							smmu);
3247 			if (ret < 0)
3248 				dev_warn(smmu->dev,
3249 					 "failed to enable priq irq\n");
3250 		} else {
3251 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3252 		}
3253 	}
3254 }
3255 
3256 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3257 {
3258 	int ret, irq;
3259 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3260 
3261 	/* Disable IRQs first */
3262 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3263 				      ARM_SMMU_IRQ_CTRLACK);
3264 	if (ret) {
3265 		dev_err(smmu->dev, "failed to disable irqs\n");
3266 		return ret;
3267 	}
3268 
3269 	irq = smmu->combined_irq;
3270 	if (irq) {
3271 		/*
3272 		 * Cavium ThunderX2 implementation doesn't support unique irq
3273 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3274 		 */
3275 		ret = devm_request_threaded_irq(smmu->dev, irq,
3276 					arm_smmu_combined_irq_handler,
3277 					arm_smmu_combined_irq_thread,
3278 					IRQF_ONESHOT,
3279 					"arm-smmu-v3-combined-irq", smmu);
3280 		if (ret < 0)
3281 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3282 	} else
3283 		arm_smmu_setup_unique_irqs(smmu);
3284 
3285 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3286 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3287 
3288 	/* Enable interrupt generation on the SMMU */
3289 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3290 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3291 	if (ret)
3292 		dev_warn(smmu->dev, "failed to enable irqs\n");
3293 
3294 	return 0;
3295 }
3296 
3297 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3298 {
3299 	int ret;
3300 
3301 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3302 	if (ret)
3303 		dev_err(smmu->dev, "failed to clear cr0\n");
3304 
3305 	return ret;
3306 }
3307 
3308 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3309 {
3310 	int ret;
3311 	u32 reg, enables;
3312 	struct arm_smmu_cmdq_ent cmd;
3313 
3314 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3315 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3316 	if (reg & CR0_SMMUEN) {
3317 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3318 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3319 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3320 	}
3321 
3322 	ret = arm_smmu_device_disable(smmu);
3323 	if (ret)
3324 		return ret;
3325 
3326 	/* CR1 (table and queue memory attributes) */
3327 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3328 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3329 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3330 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3331 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3332 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3333 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3334 
3335 	/* CR2 (random crap) */
3336 	reg = CR2_PTM | CR2_RECINVSID;
3337 
3338 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3339 		reg |= CR2_E2H;
3340 
3341 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3342 
3343 	/* Stream table */
3344 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3345 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3346 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3347 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3348 
3349 	/* Command queue */
3350 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3351 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3352 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3353 
3354 	enables = CR0_CMDQEN;
3355 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3356 				      ARM_SMMU_CR0ACK);
3357 	if (ret) {
3358 		dev_err(smmu->dev, "failed to enable command queue\n");
3359 		return ret;
3360 	}
3361 
3362 	/* Invalidate any cached configuration */
3363 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3364 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3365 
3366 	/* Invalidate any stale TLB entries */
3367 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3368 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3369 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3370 	}
3371 
3372 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3373 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3374 
3375 	/* Event queue */
3376 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3377 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3378 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3379 
3380 	enables |= CR0_EVTQEN;
3381 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3382 				      ARM_SMMU_CR0ACK);
3383 	if (ret) {
3384 		dev_err(smmu->dev, "failed to enable event queue\n");
3385 		return ret;
3386 	}
3387 
3388 	/* PRI queue */
3389 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3390 		writeq_relaxed(smmu->priq.q.q_base,
3391 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3392 		writel_relaxed(smmu->priq.q.llq.prod,
3393 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3394 		writel_relaxed(smmu->priq.q.llq.cons,
3395 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3396 
3397 		enables |= CR0_PRIQEN;
3398 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3399 					      ARM_SMMU_CR0ACK);
3400 		if (ret) {
3401 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3402 			return ret;
3403 		}
3404 	}
3405 
3406 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3407 		enables |= CR0_ATSCHK;
3408 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3409 					      ARM_SMMU_CR0ACK);
3410 		if (ret) {
3411 			dev_err(smmu->dev, "failed to enable ATS check\n");
3412 			return ret;
3413 		}
3414 	}
3415 
3416 	ret = arm_smmu_setup_irqs(smmu);
3417 	if (ret) {
3418 		dev_err(smmu->dev, "failed to setup irqs\n");
3419 		return ret;
3420 	}
3421 
3422 	if (is_kdump_kernel())
3423 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3424 
3425 	/* Enable the SMMU interface, or ensure bypass */
3426 	if (!bypass || disable_bypass) {
3427 		enables |= CR0_SMMUEN;
3428 	} else {
3429 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3430 		if (ret)
3431 			return ret;
3432 	}
3433 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3434 				      ARM_SMMU_CR0ACK);
3435 	if (ret) {
3436 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3437 		return ret;
3438 	}
3439 
3440 	return 0;
3441 }
3442 
3443 #define IIDR_IMPLEMENTER_ARM		0x43b
3444 #define IIDR_PRODUCTID_ARM_MMU_600	0x483
3445 #define IIDR_PRODUCTID_ARM_MMU_700	0x487
3446 
3447 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3448 {
3449 	u32 reg;
3450 	unsigned int implementer, productid, variant, revision;
3451 
3452 	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3453 	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3454 	productid = FIELD_GET(IIDR_PRODUCTID, reg);
3455 	variant = FIELD_GET(IIDR_VARIANT, reg);
3456 	revision = FIELD_GET(IIDR_REVISION, reg);
3457 
3458 	switch (implementer) {
3459 	case IIDR_IMPLEMENTER_ARM:
3460 		switch (productid) {
3461 		case IIDR_PRODUCTID_ARM_MMU_600:
3462 			/* Arm erratum 1076982 */
3463 			if (variant == 0 && revision <= 2)
3464 				smmu->features &= ~ARM_SMMU_FEAT_SEV;
3465 			/* Arm erratum 1209401 */
3466 			if (variant < 2)
3467 				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3468 			break;
3469 		case IIDR_PRODUCTID_ARM_MMU_700:
3470 			/* Arm erratum 2812531 */
3471 			smmu->features &= ~ARM_SMMU_FEAT_BTM;
3472 			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3473 			/* Arm errata 2268618, 2812531 */
3474 			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3475 			break;
3476 		}
3477 		break;
3478 	}
3479 }
3480 
3481 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3482 {
3483 	u32 reg;
3484 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3485 
3486 	/* IDR0 */
3487 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3488 
3489 	/* 2-level structures */
3490 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3491 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3492 
3493 	if (reg & IDR0_CD2L)
3494 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3495 
3496 	/*
3497 	 * Translation table endianness.
3498 	 * We currently require the same endianness as the CPU, but this
3499 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3500 	 */
3501 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3502 	case IDR0_TTENDIAN_MIXED:
3503 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3504 		break;
3505 #ifdef __BIG_ENDIAN
3506 	case IDR0_TTENDIAN_BE:
3507 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3508 		break;
3509 #else
3510 	case IDR0_TTENDIAN_LE:
3511 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3512 		break;
3513 #endif
3514 	default:
3515 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3516 		return -ENXIO;
3517 	}
3518 
3519 	/* Boolean feature flags */
3520 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3521 		smmu->features |= ARM_SMMU_FEAT_PRI;
3522 
3523 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3524 		smmu->features |= ARM_SMMU_FEAT_ATS;
3525 
3526 	if (reg & IDR0_SEV)
3527 		smmu->features |= ARM_SMMU_FEAT_SEV;
3528 
3529 	if (reg & IDR0_MSI) {
3530 		smmu->features |= ARM_SMMU_FEAT_MSI;
3531 		if (coherent && !disable_msipolling)
3532 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3533 	}
3534 
3535 	if (reg & IDR0_HYP) {
3536 		smmu->features |= ARM_SMMU_FEAT_HYP;
3537 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3538 			smmu->features |= ARM_SMMU_FEAT_E2H;
3539 	}
3540 
3541 	/*
3542 	 * The coherency feature as set by FW is used in preference to the ID
3543 	 * register, but warn on mismatch.
3544 	 */
3545 	if (!!(reg & IDR0_COHACC) != coherent)
3546 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3547 			 coherent ? "true" : "false");
3548 
3549 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3550 	case IDR0_STALL_MODEL_FORCE:
3551 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3552 		fallthrough;
3553 	case IDR0_STALL_MODEL_STALL:
3554 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3555 	}
3556 
3557 	if (reg & IDR0_S1P)
3558 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3559 
3560 	if (reg & IDR0_S2P)
3561 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3562 
3563 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3564 		dev_err(smmu->dev, "no translation support!\n");
3565 		return -ENXIO;
3566 	}
3567 
3568 	/* We only support the AArch64 table format at present */
3569 	switch (FIELD_GET(IDR0_TTF, reg)) {
3570 	case IDR0_TTF_AARCH32_64:
3571 		smmu->ias = 40;
3572 		fallthrough;
3573 	case IDR0_TTF_AARCH64:
3574 		break;
3575 	default:
3576 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3577 		return -ENXIO;
3578 	}
3579 
3580 	/* ASID/VMID sizes */
3581 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3582 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3583 
3584 	/* IDR1 */
3585 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3586 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3587 		dev_err(smmu->dev, "embedded implementation not supported\n");
3588 		return -ENXIO;
3589 	}
3590 
3591 	/* Queue sizes, capped to ensure natural alignment */
3592 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3593 					     FIELD_GET(IDR1_CMDQS, reg));
3594 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3595 		/*
3596 		 * We don't support splitting up batches, so one batch of
3597 		 * commands plus an extra sync needs to fit inside the command
3598 		 * queue. There's also no way we can handle the weird alignment
3599 		 * restrictions on the base pointer for a unit-length queue.
3600 		 */
3601 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3602 			CMDQ_BATCH_ENTRIES);
3603 		return -ENXIO;
3604 	}
3605 
3606 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3607 					     FIELD_GET(IDR1_EVTQS, reg));
3608 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3609 					     FIELD_GET(IDR1_PRIQS, reg));
3610 
3611 	/* SID/SSID sizes */
3612 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3613 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3614 	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3615 
3616 	/*
3617 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3618 	 * table, use a linear table instead.
3619 	 */
3620 	if (smmu->sid_bits <= STRTAB_SPLIT)
3621 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3622 
3623 	/* IDR3 */
3624 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3625 	if (FIELD_GET(IDR3_RIL, reg))
3626 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3627 
3628 	/* IDR5 */
3629 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3630 
3631 	/* Maximum number of outstanding stalls */
3632 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3633 
3634 	/* Page sizes */
3635 	if (reg & IDR5_GRAN64K)
3636 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3637 	if (reg & IDR5_GRAN16K)
3638 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3639 	if (reg & IDR5_GRAN4K)
3640 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3641 
3642 	/* Input address size */
3643 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3644 		smmu->features |= ARM_SMMU_FEAT_VAX;
3645 
3646 	/* Output address size */
3647 	switch (FIELD_GET(IDR5_OAS, reg)) {
3648 	case IDR5_OAS_32_BIT:
3649 		smmu->oas = 32;
3650 		break;
3651 	case IDR5_OAS_36_BIT:
3652 		smmu->oas = 36;
3653 		break;
3654 	case IDR5_OAS_40_BIT:
3655 		smmu->oas = 40;
3656 		break;
3657 	case IDR5_OAS_42_BIT:
3658 		smmu->oas = 42;
3659 		break;
3660 	case IDR5_OAS_44_BIT:
3661 		smmu->oas = 44;
3662 		break;
3663 	case IDR5_OAS_52_BIT:
3664 		smmu->oas = 52;
3665 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3666 		break;
3667 	default:
3668 		dev_info(smmu->dev,
3669 			"unknown output address size. Truncating to 48-bit\n");
3670 		fallthrough;
3671 	case IDR5_OAS_48_BIT:
3672 		smmu->oas = 48;
3673 	}
3674 
3675 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3676 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3677 	else
3678 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3679 
3680 	/* Set the DMA mask for our table walker */
3681 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3682 		dev_warn(smmu->dev,
3683 			 "failed to set DMA mask for table walker\n");
3684 
3685 	smmu->ias = max(smmu->ias, smmu->oas);
3686 
3687 	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3688 	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3689 		smmu->features |= ARM_SMMU_FEAT_NESTING;
3690 
3691 	arm_smmu_device_iidr_probe(smmu);
3692 
3693 	if (arm_smmu_sva_supported(smmu))
3694 		smmu->features |= ARM_SMMU_FEAT_SVA;
3695 
3696 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3697 		 smmu->ias, smmu->oas, smmu->features);
3698 	return 0;
3699 }
3700 
3701 #ifdef CONFIG_ACPI
3702 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3703 {
3704 	switch (model) {
3705 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3706 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3707 		break;
3708 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3709 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3710 		break;
3711 	}
3712 
3713 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3714 }
3715 
3716 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3717 				      struct arm_smmu_device *smmu)
3718 {
3719 	struct acpi_iort_smmu_v3 *iort_smmu;
3720 	struct device *dev = smmu->dev;
3721 	struct acpi_iort_node *node;
3722 
3723 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3724 
3725 	/* Retrieve SMMUv3 specific data */
3726 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3727 
3728 	acpi_smmu_get_options(iort_smmu->model, smmu);
3729 
3730 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3731 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3732 
3733 	return 0;
3734 }
3735 #else
3736 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3737 					     struct arm_smmu_device *smmu)
3738 {
3739 	return -ENODEV;
3740 }
3741 #endif
3742 
3743 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3744 				    struct arm_smmu_device *smmu)
3745 {
3746 	struct device *dev = &pdev->dev;
3747 	u32 cells;
3748 	int ret = -EINVAL;
3749 
3750 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3751 		dev_err(dev, "missing #iommu-cells property\n");
3752 	else if (cells != 1)
3753 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3754 	else
3755 		ret = 0;
3756 
3757 	parse_driver_options(smmu);
3758 
3759 	if (of_dma_is_coherent(dev->of_node))
3760 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3761 
3762 	return ret;
3763 }
3764 
3765 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3766 {
3767 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3768 		return SZ_64K;
3769 	else
3770 		return SZ_128K;
3771 }
3772 
3773 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3774 				      resource_size_t size)
3775 {
3776 	struct resource res = DEFINE_RES_MEM(start, size);
3777 
3778 	return devm_ioremap_resource(dev, &res);
3779 }
3780 
3781 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3782 {
3783 	struct list_head rmr_list;
3784 	struct iommu_resv_region *e;
3785 
3786 	INIT_LIST_HEAD(&rmr_list);
3787 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3788 
3789 	list_for_each_entry(e, &rmr_list, list) {
3790 		__le64 *step;
3791 		struct iommu_iort_rmr_data *rmr;
3792 		int ret, i;
3793 
3794 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3795 		for (i = 0; i < rmr->num_sids; i++) {
3796 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3797 			if (ret) {
3798 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3799 					rmr->sids[i]);
3800 				continue;
3801 			}
3802 
3803 			step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3804 			arm_smmu_init_bypass_stes(step, 1, true);
3805 		}
3806 	}
3807 
3808 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3809 }
3810 
3811 static int arm_smmu_device_probe(struct platform_device *pdev)
3812 {
3813 	int irq, ret;
3814 	struct resource *res;
3815 	resource_size_t ioaddr;
3816 	struct arm_smmu_device *smmu;
3817 	struct device *dev = &pdev->dev;
3818 	bool bypass;
3819 
3820 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3821 	if (!smmu)
3822 		return -ENOMEM;
3823 	smmu->dev = dev;
3824 
3825 	if (dev->of_node) {
3826 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3827 	} else {
3828 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3829 		if (ret == -ENODEV)
3830 			return ret;
3831 	}
3832 
3833 	/* Set bypass mode according to firmware probing result */
3834 	bypass = !!ret;
3835 
3836 	/* Base address */
3837 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3838 	if (!res)
3839 		return -EINVAL;
3840 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3841 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3842 		return -EINVAL;
3843 	}
3844 	ioaddr = res->start;
3845 
3846 	/*
3847 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3848 	 * the PMCG registers which are reserved by the PMU driver.
3849 	 */
3850 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3851 	if (IS_ERR(smmu->base))
3852 		return PTR_ERR(smmu->base);
3853 
3854 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3855 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3856 					       ARM_SMMU_REG_SZ);
3857 		if (IS_ERR(smmu->page1))
3858 			return PTR_ERR(smmu->page1);
3859 	} else {
3860 		smmu->page1 = smmu->base;
3861 	}
3862 
3863 	/* Interrupt lines */
3864 
3865 	irq = platform_get_irq_byname_optional(pdev, "combined");
3866 	if (irq > 0)
3867 		smmu->combined_irq = irq;
3868 	else {
3869 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3870 		if (irq > 0)
3871 			smmu->evtq.q.irq = irq;
3872 
3873 		irq = platform_get_irq_byname_optional(pdev, "priq");
3874 		if (irq > 0)
3875 			smmu->priq.q.irq = irq;
3876 
3877 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3878 		if (irq > 0)
3879 			smmu->gerr_irq = irq;
3880 	}
3881 	/* Probe the h/w */
3882 	ret = arm_smmu_device_hw_probe(smmu);
3883 	if (ret)
3884 		return ret;
3885 
3886 	/* Initialise in-memory data structures */
3887 	ret = arm_smmu_init_structures(smmu);
3888 	if (ret)
3889 		return ret;
3890 
3891 	/* Record our private device structure */
3892 	platform_set_drvdata(pdev, smmu);
3893 
3894 	/* Check for RMRs and install bypass STEs if any */
3895 	arm_smmu_rmr_install_bypass_ste(smmu);
3896 
3897 	/* Reset the device */
3898 	ret = arm_smmu_device_reset(smmu, bypass);
3899 	if (ret)
3900 		return ret;
3901 
3902 	/* And we're up. Go go go! */
3903 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3904 				     "smmu3.%pa", &ioaddr);
3905 	if (ret)
3906 		return ret;
3907 
3908 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3909 	if (ret) {
3910 		dev_err(dev, "Failed to register iommu\n");
3911 		iommu_device_sysfs_remove(&smmu->iommu);
3912 		return ret;
3913 	}
3914 
3915 	return 0;
3916 }
3917 
3918 static void arm_smmu_device_remove(struct platform_device *pdev)
3919 {
3920 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3921 
3922 	iommu_device_unregister(&smmu->iommu);
3923 	iommu_device_sysfs_remove(&smmu->iommu);
3924 	arm_smmu_device_disable(smmu);
3925 	iopf_queue_free(smmu->evtq.iopf);
3926 }
3927 
3928 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3929 {
3930 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3931 
3932 	arm_smmu_device_disable(smmu);
3933 }
3934 
3935 static const struct of_device_id arm_smmu_of_match[] = {
3936 	{ .compatible = "arm,smmu-v3", },
3937 	{ },
3938 };
3939 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3940 
3941 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3942 {
3943 	arm_smmu_sva_notifier_synchronize();
3944 	platform_driver_unregister(drv);
3945 }
3946 
3947 static struct platform_driver arm_smmu_driver = {
3948 	.driver	= {
3949 		.name			= "arm-smmu-v3",
3950 		.of_match_table		= arm_smmu_of_match,
3951 		.suppress_bind_attrs	= true,
3952 	},
3953 	.probe	= arm_smmu_device_probe,
3954 	.remove_new = arm_smmu_device_remove,
3955 	.shutdown = arm_smmu_device_shutdown,
3956 };
3957 module_driver(arm_smmu_driver, platform_driver_register,
3958 	      arm_smmu_driver_unregister);
3959 
3960 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3961 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3962 MODULE_ALIAS("platform:arm-smmu-v3");
3963 MODULE_LICENSE("GPL v2");
3964