xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 6a4aee277740d04ac0fd54cfa17cc28261932ddc)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 
30 #include "arm-smmu-v3.h"
31 #include "../../dma-iommu.h"
32 
33 static bool disable_bypass = true;
34 module_param(disable_bypass, bool, 0444);
35 MODULE_PARM_DESC(disable_bypass,
36 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
37 
38 static bool disable_msipolling;
39 module_param(disable_msipolling, bool, 0444);
40 MODULE_PARM_DESC(disable_msipolling,
41 	"Disable MSI-based polling for CMD_SYNC completion.");
42 
43 enum arm_smmu_msi_index {
44 	EVTQ_MSI_INDEX,
45 	GERROR_MSI_INDEX,
46 	PRIQ_MSI_INDEX,
47 	ARM_SMMU_MAX_MSIS,
48 };
49 
50 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu,
51 				      ioasid_t sid);
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84 
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 	{ 0, NULL},
89 };
90 
91 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
92 				    struct arm_smmu_device *smmu);
93 
94 static void parse_driver_options(struct arm_smmu_device *smmu)
95 {
96 	int i = 0;
97 
98 	do {
99 		if (of_property_read_bool(smmu->dev->of_node,
100 						arm_smmu_options[i].prop)) {
101 			smmu->options |= arm_smmu_options[i].opt;
102 			dev_notice(smmu->dev, "option %s\n",
103 				arm_smmu_options[i].prop);
104 		}
105 	} while (arm_smmu_options[++i].opt);
106 }
107 
108 /* Low-level queue manipulation functions */
109 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
110 {
111 	u32 space, prod, cons;
112 
113 	prod = Q_IDX(q, q->prod);
114 	cons = Q_IDX(q, q->cons);
115 
116 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
117 		space = (1 << q->max_n_shift) - (prod - cons);
118 	else
119 		space = cons - prod;
120 
121 	return space >= n;
122 }
123 
124 static bool queue_full(struct arm_smmu_ll_queue *q)
125 {
126 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
127 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
128 }
129 
130 static bool queue_empty(struct arm_smmu_ll_queue *q)
131 {
132 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
133 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
134 }
135 
136 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
137 {
138 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
139 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
140 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
141 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
142 }
143 
144 static void queue_sync_cons_out(struct arm_smmu_queue *q)
145 {
146 	/*
147 	 * Ensure that all CPU accesses (reads and writes) to the queue
148 	 * are complete before we update the cons pointer.
149 	 */
150 	__iomb();
151 	writel_relaxed(q->llq.cons, q->cons_reg);
152 }
153 
154 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
155 {
156 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
157 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
158 }
159 
160 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
161 {
162 	struct arm_smmu_ll_queue *llq = &q->llq;
163 
164 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
165 		return;
166 
167 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
168 		      Q_IDX(llq, llq->cons);
169 	queue_sync_cons_out(q);
170 }
171 
172 static int queue_sync_prod_in(struct arm_smmu_queue *q)
173 {
174 	u32 prod;
175 	int ret = 0;
176 
177 	/*
178 	 * We can't use the _relaxed() variant here, as we must prevent
179 	 * speculative reads of the queue before we have determined that
180 	 * prod has indeed moved.
181 	 */
182 	prod = readl(q->prod_reg);
183 
184 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
185 		ret = -EOVERFLOW;
186 
187 	q->llq.prod = prod;
188 	return ret;
189 }
190 
191 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
192 {
193 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
194 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
195 }
196 
197 static void queue_poll_init(struct arm_smmu_device *smmu,
198 			    struct arm_smmu_queue_poll *qp)
199 {
200 	qp->delay = 1;
201 	qp->spin_cnt = 0;
202 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
203 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
204 }
205 
206 static int queue_poll(struct arm_smmu_queue_poll *qp)
207 {
208 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
209 		return -ETIMEDOUT;
210 
211 	if (qp->wfe) {
212 		wfe();
213 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
214 		cpu_relax();
215 	} else {
216 		udelay(qp->delay);
217 		qp->delay *= 2;
218 		qp->spin_cnt = 0;
219 	}
220 
221 	return 0;
222 }
223 
224 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
225 {
226 	int i;
227 
228 	for (i = 0; i < n_dwords; ++i)
229 		*dst++ = cpu_to_le64(*src++);
230 }
231 
232 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
233 {
234 	int i;
235 
236 	for (i = 0; i < n_dwords; ++i)
237 		*dst++ = le64_to_cpu(*src++);
238 }
239 
240 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
241 {
242 	if (queue_empty(&q->llq))
243 		return -EAGAIN;
244 
245 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
246 	queue_inc_cons(&q->llq);
247 	queue_sync_cons_out(q);
248 	return 0;
249 }
250 
251 /* High-level queue accessors */
252 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
253 {
254 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
255 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
256 
257 	switch (ent->opcode) {
258 	case CMDQ_OP_TLBI_EL2_ALL:
259 	case CMDQ_OP_TLBI_NSNH_ALL:
260 		break;
261 	case CMDQ_OP_PREFETCH_CFG:
262 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
263 		break;
264 	case CMDQ_OP_CFGI_CD:
265 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
266 		fallthrough;
267 	case CMDQ_OP_CFGI_STE:
268 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
269 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
270 		break;
271 	case CMDQ_OP_CFGI_CD_ALL:
272 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
273 		break;
274 	case CMDQ_OP_CFGI_ALL:
275 		/* Cover the entire SID range */
276 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
277 		break;
278 	case CMDQ_OP_TLBI_NH_VA:
279 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
280 		fallthrough;
281 	case CMDQ_OP_TLBI_EL2_VA:
282 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
283 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
284 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
285 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
286 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
287 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
288 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
289 		break;
290 	case CMDQ_OP_TLBI_S2_IPA:
291 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
292 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
293 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
294 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
295 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
296 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
297 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
298 		break;
299 	case CMDQ_OP_TLBI_NH_ASID:
300 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
301 		fallthrough;
302 	case CMDQ_OP_TLBI_S12_VMALL:
303 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
304 		break;
305 	case CMDQ_OP_TLBI_EL2_ASID:
306 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
307 		break;
308 	case CMDQ_OP_ATC_INV:
309 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
310 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
311 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
312 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
313 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
314 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
315 		break;
316 	case CMDQ_OP_PRI_RESP:
317 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
318 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
319 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
320 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
321 		switch (ent->pri.resp) {
322 		case PRI_RESP_DENY:
323 		case PRI_RESP_FAIL:
324 		case PRI_RESP_SUCC:
325 			break;
326 		default:
327 			return -EINVAL;
328 		}
329 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
330 		break;
331 	case CMDQ_OP_RESUME:
332 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
333 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
334 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
335 		break;
336 	case CMDQ_OP_CMD_SYNC:
337 		if (ent->sync.msiaddr) {
338 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
339 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
340 		} else {
341 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
342 		}
343 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
344 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
345 		break;
346 	default:
347 		return -ENOENT;
348 	}
349 
350 	return 0;
351 }
352 
353 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
354 {
355 	return &smmu->cmdq;
356 }
357 
358 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
359 					 struct arm_smmu_queue *q, u32 prod)
360 {
361 	struct arm_smmu_cmdq_ent ent = {
362 		.opcode = CMDQ_OP_CMD_SYNC,
363 	};
364 
365 	/*
366 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
367 	 * payload, so the write will zero the entire command on that platform.
368 	 */
369 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
370 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
371 				   q->ent_dwords * 8;
372 	}
373 
374 	arm_smmu_cmdq_build_cmd(cmd, &ent);
375 }
376 
377 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
378 				     struct arm_smmu_queue *q)
379 {
380 	static const char * const cerror_str[] = {
381 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
382 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
383 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
384 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
385 	};
386 
387 	int i;
388 	u64 cmd[CMDQ_ENT_DWORDS];
389 	u32 cons = readl_relaxed(q->cons_reg);
390 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
391 	struct arm_smmu_cmdq_ent cmd_sync = {
392 		.opcode = CMDQ_OP_CMD_SYNC,
393 	};
394 
395 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
396 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
397 
398 	switch (idx) {
399 	case CMDQ_ERR_CERROR_ABT_IDX:
400 		dev_err(smmu->dev, "retrying command fetch\n");
401 		return;
402 	case CMDQ_ERR_CERROR_NONE_IDX:
403 		return;
404 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
405 		/*
406 		 * ATC Invalidation Completion timeout. CONS is still pointing
407 		 * at the CMD_SYNC. Attempt to complete other pending commands
408 		 * by repeating the CMD_SYNC, though we might well end up back
409 		 * here since the ATC invalidation may still be pending.
410 		 */
411 		return;
412 	case CMDQ_ERR_CERROR_ILL_IDX:
413 	default:
414 		break;
415 	}
416 
417 	/*
418 	 * We may have concurrent producers, so we need to be careful
419 	 * not to touch any of the shadow cmdq state.
420 	 */
421 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
422 	dev_err(smmu->dev, "skipping command in error state:\n");
423 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
424 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
425 
426 	/* Convert the erroneous command into a CMD_SYNC */
427 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
428 
429 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
430 }
431 
432 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
433 {
434 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
435 }
436 
437 /*
438  * Command queue locking.
439  * This is a form of bastardised rwlock with the following major changes:
440  *
441  * - The only LOCK routines are exclusive_trylock() and shared_lock().
442  *   Neither have barrier semantics, and instead provide only a control
443  *   dependency.
444  *
445  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
446  *   fails if the caller appears to be the last lock holder (yes, this is
447  *   racy). All successful UNLOCK routines have RELEASE semantics.
448  */
449 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
450 {
451 	int val;
452 
453 	/*
454 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
455 	 * lock counter. When held in exclusive state, the lock counter is set
456 	 * to INT_MIN so these increments won't hurt as the value will remain
457 	 * negative.
458 	 */
459 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
460 		return;
461 
462 	do {
463 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
464 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
465 }
466 
467 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
468 {
469 	(void)atomic_dec_return_release(&cmdq->lock);
470 }
471 
472 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
473 {
474 	if (atomic_read(&cmdq->lock) == 1)
475 		return false;
476 
477 	arm_smmu_cmdq_shared_unlock(cmdq);
478 	return true;
479 }
480 
481 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
482 ({									\
483 	bool __ret;							\
484 	local_irq_save(flags);						\
485 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
486 	if (!__ret)							\
487 		local_irq_restore(flags);				\
488 	__ret;								\
489 })
490 
491 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
492 ({									\
493 	atomic_set_release(&cmdq->lock, 0);				\
494 	local_irq_restore(flags);					\
495 })
496 
497 
498 /*
499  * Command queue insertion.
500  * This is made fiddly by our attempts to achieve some sort of scalability
501  * since there is one queue shared amongst all of the CPUs in the system.  If
502  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
503  * then you'll *love* this monstrosity.
504  *
505  * The basic idea is to split the queue up into ranges of commands that are
506  * owned by a given CPU; the owner may not have written all of the commands
507  * itself, but is responsible for advancing the hardware prod pointer when
508  * the time comes. The algorithm is roughly:
509  *
510  * 	1. Allocate some space in the queue. At this point we also discover
511  *	   whether the head of the queue is currently owned by another CPU,
512  *	   or whether we are the owner.
513  *
514  *	2. Write our commands into our allocated slots in the queue.
515  *
516  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
517  *
518  *	4. If we are an owner:
519  *		a. Wait for the previous owner to finish.
520  *		b. Mark the queue head as unowned, which tells us the range
521  *		   that we are responsible for publishing.
522  *		c. Wait for all commands in our owned range to become valid.
523  *		d. Advance the hardware prod pointer.
524  *		e. Tell the next owner we've finished.
525  *
526  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
527  *	   owner), then we need to stick around until it has completed:
528  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
529  *		   to clear the first 4 bytes.
530  *		b. Otherwise, we spin waiting for the hardware cons pointer to
531  *		   advance past our command.
532  *
533  * The devil is in the details, particularly the use of locking for handling
534  * SYNC completion and freeing up space in the queue before we think that it is
535  * full.
536  */
537 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
538 					       u32 sprod, u32 eprod, bool set)
539 {
540 	u32 swidx, sbidx, ewidx, ebidx;
541 	struct arm_smmu_ll_queue llq = {
542 		.max_n_shift	= cmdq->q.llq.max_n_shift,
543 		.prod		= sprod,
544 	};
545 
546 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
547 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
548 
549 	while (llq.prod != eprod) {
550 		unsigned long mask;
551 		atomic_long_t *ptr;
552 		u32 limit = BITS_PER_LONG;
553 
554 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
555 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
556 
557 		ptr = &cmdq->valid_map[swidx];
558 
559 		if ((swidx == ewidx) && (sbidx < ebidx))
560 			limit = ebidx;
561 
562 		mask = GENMASK(limit - 1, sbidx);
563 
564 		/*
565 		 * The valid bit is the inverse of the wrap bit. This means
566 		 * that a zero-initialised queue is invalid and, after marking
567 		 * all entries as valid, they become invalid again when we
568 		 * wrap.
569 		 */
570 		if (set) {
571 			atomic_long_xor(mask, ptr);
572 		} else { /* Poll */
573 			unsigned long valid;
574 
575 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
576 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
577 		}
578 
579 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
580 	}
581 }
582 
583 /* Mark all entries in the range [sprod, eprod) as valid */
584 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
585 					u32 sprod, u32 eprod)
586 {
587 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
588 }
589 
590 /* Wait for all entries in the range [sprod, eprod) to become valid */
591 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
592 					 u32 sprod, u32 eprod)
593 {
594 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
595 }
596 
597 /* Wait for the command queue to become non-full */
598 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
599 					     struct arm_smmu_ll_queue *llq)
600 {
601 	unsigned long flags;
602 	struct arm_smmu_queue_poll qp;
603 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
604 	int ret = 0;
605 
606 	/*
607 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
608 	 * that fails, spin until somebody else updates it for us.
609 	 */
610 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
611 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
612 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
613 		llq->val = READ_ONCE(cmdq->q.llq.val);
614 		return 0;
615 	}
616 
617 	queue_poll_init(smmu, &qp);
618 	do {
619 		llq->val = READ_ONCE(cmdq->q.llq.val);
620 		if (!queue_full(llq))
621 			break;
622 
623 		ret = queue_poll(&qp);
624 	} while (!ret);
625 
626 	return ret;
627 }
628 
629 /*
630  * Wait until the SMMU signals a CMD_SYNC completion MSI.
631  * Must be called with the cmdq lock held in some capacity.
632  */
633 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
634 					  struct arm_smmu_ll_queue *llq)
635 {
636 	int ret = 0;
637 	struct arm_smmu_queue_poll qp;
638 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
639 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
640 
641 	queue_poll_init(smmu, &qp);
642 
643 	/*
644 	 * The MSI won't generate an event, since it's being written back
645 	 * into the command queue.
646 	 */
647 	qp.wfe = false;
648 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
649 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
650 	return ret;
651 }
652 
653 /*
654  * Wait until the SMMU cons index passes llq->prod.
655  * Must be called with the cmdq lock held in some capacity.
656  */
657 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
658 					       struct arm_smmu_ll_queue *llq)
659 {
660 	struct arm_smmu_queue_poll qp;
661 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
662 	u32 prod = llq->prod;
663 	int ret = 0;
664 
665 	queue_poll_init(smmu, &qp);
666 	llq->val = READ_ONCE(cmdq->q.llq.val);
667 	do {
668 		if (queue_consumed(llq, prod))
669 			break;
670 
671 		ret = queue_poll(&qp);
672 
673 		/*
674 		 * This needs to be a readl() so that our subsequent call
675 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
676 		 *
677 		 * Specifically, we need to ensure that we observe all
678 		 * shared_lock()s by other CMD_SYNCs that share our owner,
679 		 * so that a failing call to tryunlock() means that we're
680 		 * the last one out and therefore we can safely advance
681 		 * cmdq->q.llq.cons. Roughly speaking:
682 		 *
683 		 * CPU 0		CPU1			CPU2 (us)
684 		 *
685 		 * if (sync)
686 		 * 	shared_lock();
687 		 *
688 		 * dma_wmb();
689 		 * set_valid_map();
690 		 *
691 		 * 			if (owner) {
692 		 *				poll_valid_map();
693 		 *				<control dependency>
694 		 *				writel(prod_reg);
695 		 *
696 		 *						readl(cons_reg);
697 		 *						tryunlock();
698 		 *
699 		 * Requires us to see CPU 0's shared_lock() acquisition.
700 		 */
701 		llq->cons = readl(cmdq->q.cons_reg);
702 	} while (!ret);
703 
704 	return ret;
705 }
706 
707 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
708 					 struct arm_smmu_ll_queue *llq)
709 {
710 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
711 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
712 
713 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
714 }
715 
716 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
717 					u32 prod, int n)
718 {
719 	int i;
720 	struct arm_smmu_ll_queue llq = {
721 		.max_n_shift	= cmdq->q.llq.max_n_shift,
722 		.prod		= prod,
723 	};
724 
725 	for (i = 0; i < n; ++i) {
726 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
727 
728 		prod = queue_inc_prod_n(&llq, i);
729 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
730 	}
731 }
732 
733 /*
734  * This is the actual insertion function, and provides the following
735  * ordering guarantees to callers:
736  *
737  * - There is a dma_wmb() before publishing any commands to the queue.
738  *   This can be relied upon to order prior writes to data structures
739  *   in memory (such as a CD or an STE) before the command.
740  *
741  * - On completion of a CMD_SYNC, there is a control dependency.
742  *   This can be relied upon to order subsequent writes to memory (e.g.
743  *   freeing an IOVA) after completion of the CMD_SYNC.
744  *
745  * - Command insertion is totally ordered, so if two CPUs each race to
746  *   insert their own list of commands then all of the commands from one
747  *   CPU will appear before any of the commands from the other CPU.
748  */
749 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
750 				       u64 *cmds, int n, bool sync)
751 {
752 	u64 cmd_sync[CMDQ_ENT_DWORDS];
753 	u32 prod;
754 	unsigned long flags;
755 	bool owner;
756 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
757 	struct arm_smmu_ll_queue llq, head;
758 	int ret = 0;
759 
760 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
761 
762 	/* 1. Allocate some space in the queue */
763 	local_irq_save(flags);
764 	llq.val = READ_ONCE(cmdq->q.llq.val);
765 	do {
766 		u64 old;
767 
768 		while (!queue_has_space(&llq, n + sync)) {
769 			local_irq_restore(flags);
770 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
771 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
772 			local_irq_save(flags);
773 		}
774 
775 		head.cons = llq.cons;
776 		head.prod = queue_inc_prod_n(&llq, n + sync) |
777 					     CMDQ_PROD_OWNED_FLAG;
778 
779 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
780 		if (old == llq.val)
781 			break;
782 
783 		llq.val = old;
784 	} while (1);
785 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
786 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
787 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
788 
789 	/*
790 	 * 2. Write our commands into the queue
791 	 * Dependency ordering from the cmpxchg() loop above.
792 	 */
793 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
794 	if (sync) {
795 		prod = queue_inc_prod_n(&llq, n);
796 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
797 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
798 
799 		/*
800 		 * In order to determine completion of our CMD_SYNC, we must
801 		 * ensure that the queue can't wrap twice without us noticing.
802 		 * We achieve that by taking the cmdq lock as shared before
803 		 * marking our slot as valid.
804 		 */
805 		arm_smmu_cmdq_shared_lock(cmdq);
806 	}
807 
808 	/* 3. Mark our slots as valid, ensuring commands are visible first */
809 	dma_wmb();
810 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
811 
812 	/* 4. If we are the owner, take control of the SMMU hardware */
813 	if (owner) {
814 		/* a. Wait for previous owner to finish */
815 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
816 
817 		/* b. Stop gathering work by clearing the owned flag */
818 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
819 						   &cmdq->q.llq.atomic.prod);
820 		prod &= ~CMDQ_PROD_OWNED_FLAG;
821 
822 		/*
823 		 * c. Wait for any gathered work to be written to the queue.
824 		 * Note that we read our own entries so that we have the control
825 		 * dependency required by (d).
826 		 */
827 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
828 
829 		/*
830 		 * d. Advance the hardware prod pointer
831 		 * Control dependency ordering from the entries becoming valid.
832 		 */
833 		writel_relaxed(prod, cmdq->q.prod_reg);
834 
835 		/*
836 		 * e. Tell the next owner we're done
837 		 * Make sure we've updated the hardware first, so that we don't
838 		 * race to update prod and potentially move it backwards.
839 		 */
840 		atomic_set_release(&cmdq->owner_prod, prod);
841 	}
842 
843 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
844 	if (sync) {
845 		llq.prod = queue_inc_prod_n(&llq, n);
846 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
847 		if (ret) {
848 			dev_err_ratelimited(smmu->dev,
849 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
850 					    llq.prod,
851 					    readl_relaxed(cmdq->q.prod_reg),
852 					    readl_relaxed(cmdq->q.cons_reg));
853 		}
854 
855 		/*
856 		 * Try to unlock the cmdq lock. This will fail if we're the last
857 		 * reader, in which case we can safely update cmdq->q.llq.cons
858 		 */
859 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
860 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
861 			arm_smmu_cmdq_shared_unlock(cmdq);
862 		}
863 	}
864 
865 	local_irq_restore(flags);
866 	return ret;
867 }
868 
869 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
870 				     struct arm_smmu_cmdq_ent *ent,
871 				     bool sync)
872 {
873 	u64 cmd[CMDQ_ENT_DWORDS];
874 
875 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
876 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
877 			 ent->opcode);
878 		return -EINVAL;
879 	}
880 
881 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
882 }
883 
884 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
885 				   struct arm_smmu_cmdq_ent *ent)
886 {
887 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
888 }
889 
890 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
891 					     struct arm_smmu_cmdq_ent *ent)
892 {
893 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
894 }
895 
896 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
897 				    struct arm_smmu_cmdq_batch *cmds,
898 				    struct arm_smmu_cmdq_ent *cmd)
899 {
900 	int index;
901 
902 	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
903 	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
904 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
905 		cmds->num = 0;
906 	}
907 
908 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
909 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
910 		cmds->num = 0;
911 	}
912 
913 	index = cmds->num * CMDQ_ENT_DWORDS;
914 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
915 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
916 			 cmd->opcode);
917 		return;
918 	}
919 
920 	cmds->num++;
921 }
922 
923 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
924 				      struct arm_smmu_cmdq_batch *cmds)
925 {
926 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
927 }
928 
929 static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
930 				   struct iommu_page_response *resp)
931 {
932 	struct arm_smmu_cmdq_ent cmd = {0};
933 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
934 	int sid = master->streams[0].id;
935 
936 	if (WARN_ON(!master->stall_enabled))
937 		return;
938 
939 	cmd.opcode		= CMDQ_OP_RESUME;
940 	cmd.resume.sid		= sid;
941 	cmd.resume.stag		= resp->grpid;
942 	switch (resp->code) {
943 	case IOMMU_PAGE_RESP_INVALID:
944 	case IOMMU_PAGE_RESP_FAILURE:
945 		cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
946 		break;
947 	case IOMMU_PAGE_RESP_SUCCESS:
948 		cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
949 		break;
950 	default:
951 		break;
952 	}
953 
954 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
955 	/*
956 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
957 	 * RESUME consumption guarantees that the stalled transaction will be
958 	 * terminated... at some point in the future. PRI_RESP is fire and
959 	 * forget.
960 	 */
961 }
962 
963 /* Context descriptor manipulation functions */
964 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
965 {
966 	struct arm_smmu_cmdq_ent cmd = {
967 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
968 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
969 		.tlbi.asid = asid,
970 	};
971 
972 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
973 }
974 
975 /*
976  * Based on the value of ent report which bits of the STE the HW will access. It
977  * would be nice if this was complete according to the spec, but minimally it
978  * has to capture the bits this driver uses.
979  */
980 static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent,
981 				  struct arm_smmu_ste *used_bits)
982 {
983 	unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent->data[0]));
984 
985 	used_bits->data[0] = cpu_to_le64(STRTAB_STE_0_V);
986 	if (!(ent->data[0] & cpu_to_le64(STRTAB_STE_0_V)))
987 		return;
988 
989 	used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
990 
991 	/* S1 translates */
992 	if (cfg & BIT(0)) {
993 		used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
994 						  STRTAB_STE_0_S1CTXPTR_MASK |
995 						  STRTAB_STE_0_S1CDMAX);
996 		used_bits->data[1] |=
997 			cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
998 				    STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
999 				    STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
1000 				    STRTAB_STE_1_EATS);
1001 		used_bits->data[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
1002 	}
1003 
1004 	/* S2 translates */
1005 	if (cfg & BIT(1)) {
1006 		used_bits->data[1] |=
1007 			cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
1008 		used_bits->data[2] |=
1009 			cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
1010 				    STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
1011 				    STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R);
1012 		used_bits->data[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
1013 	}
1014 
1015 	if (cfg == STRTAB_STE_0_CFG_BYPASS)
1016 		used_bits->data[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1017 }
1018 
1019 /*
1020  * Figure out if we can do a hitless update of entry to become target. Returns a
1021  * bit mask where 1 indicates that qword needs to be set disruptively.
1022  * unused_update is an intermediate value of entry that has unused bits set to
1023  * their new values.
1024  */
1025 static u8 arm_smmu_entry_qword_diff(const struct arm_smmu_ste *entry,
1026 				    const struct arm_smmu_ste *target,
1027 				    struct arm_smmu_ste *unused_update)
1028 {
1029 	struct arm_smmu_ste target_used = {};
1030 	struct arm_smmu_ste cur_used = {};
1031 	u8 used_qword_diff = 0;
1032 	unsigned int i;
1033 
1034 	arm_smmu_get_ste_used(entry, &cur_used);
1035 	arm_smmu_get_ste_used(target, &target_used);
1036 
1037 	for (i = 0; i != ARRAY_SIZE(target_used.data); i++) {
1038 		/*
1039 		 * Check that masks are up to date, the make functions are not
1040 		 * allowed to set a bit to 1 if the used function doesn't say it
1041 		 * is used.
1042 		 */
1043 		WARN_ON_ONCE(target->data[i] & ~target_used.data[i]);
1044 
1045 		/* Bits can change because they are not currently being used */
1046 		unused_update->data[i] = (entry->data[i] & cur_used.data[i]) |
1047 					 (target->data[i] & ~cur_used.data[i]);
1048 		/*
1049 		 * Each bit indicates that a used bit in a qword needs to be
1050 		 * changed after unused_update is applied.
1051 		 */
1052 		if ((unused_update->data[i] & target_used.data[i]) !=
1053 		    target->data[i])
1054 			used_qword_diff |= 1 << i;
1055 	}
1056 	return used_qword_diff;
1057 }
1058 
1059 static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid,
1060 		      struct arm_smmu_ste *entry,
1061 		      const struct arm_smmu_ste *target, unsigned int start,
1062 		      unsigned int len)
1063 {
1064 	bool changed = false;
1065 	unsigned int i;
1066 
1067 	for (i = start; len != 0; len--, i++) {
1068 		if (entry->data[i] != target->data[i]) {
1069 			WRITE_ONCE(entry->data[i], target->data[i]);
1070 			changed = true;
1071 		}
1072 	}
1073 
1074 	if (changed)
1075 		arm_smmu_sync_ste_for_sid(smmu, sid);
1076 	return changed;
1077 }
1078 
1079 /*
1080  * Update the STE/CD to the target configuration. The transition from the
1081  * current entry to the target entry takes place over multiple steps that
1082  * attempts to make the transition hitless if possible. This function takes care
1083  * not to create a situation where the HW can perceive a corrupted entry. HW is
1084  * only required to have a 64 bit atomicity with stores from the CPU, while
1085  * entries are many 64 bit values big.
1086  *
1087  * The difference between the current value and the target value is analyzed to
1088  * determine which of three updates are required - disruptive, hitless or no
1089  * change.
1090  *
1091  * In the most general disruptive case we can make any update in three steps:
1092  *  - Disrupting the entry (V=0)
1093  *  - Fill now unused qwords, execpt qword 0 which contains V
1094  *  - Make qword 0 have the final value and valid (V=1) with a single 64
1095  *    bit store
1096  *
1097  * However this disrupts the HW while it is happening. There are several
1098  * interesting cases where a STE/CD can be updated without disturbing the HW
1099  * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
1100  * because the used bits don't intersect. We can detect this by calculating how
1101  * many 64 bit values need update after adjusting the unused bits and skip the
1102  * V=0 process. This relies on the IGNORED behavior described in the
1103  * specification.
1104  */
1105 static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
1106 			       struct arm_smmu_ste *entry,
1107 			       const struct arm_smmu_ste *target)
1108 {
1109 	unsigned int num_entry_qwords = ARRAY_SIZE(target->data);
1110 	struct arm_smmu_device *smmu = master->smmu;
1111 	struct arm_smmu_ste unused_update;
1112 	u8 used_qword_diff;
1113 
1114 	used_qword_diff =
1115 		arm_smmu_entry_qword_diff(entry, target, &unused_update);
1116 	if (hweight8(used_qword_diff) == 1) {
1117 		/*
1118 		 * Only one qword needs its used bits to be changed. This is a
1119 		 * hitless update, update all bits the current STE is ignoring
1120 		 * to their new values, then update a single "critical qword" to
1121 		 * change the STE and finally 0 out any bits that are now unused
1122 		 * in the target configuration.
1123 		 */
1124 		unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
1125 
1126 		/*
1127 		 * Skip writing unused bits in the critical qword since we'll be
1128 		 * writing it in the next step anyways. This can save a sync
1129 		 * when the only change is in that qword.
1130 		 */
1131 		unused_update.data[critical_qword_index] =
1132 			entry->data[critical_qword_index];
1133 		entry_set(smmu, sid, entry, &unused_update, 0, num_entry_qwords);
1134 		entry_set(smmu, sid, entry, target, critical_qword_index, 1);
1135 		entry_set(smmu, sid, entry, target, 0, num_entry_qwords);
1136 	} else if (used_qword_diff) {
1137 		/*
1138 		 * At least two qwords need their inuse bits to be changed. This
1139 		 * requires a breaking update, zero the V bit, write all qwords
1140 		 * but 0, then set qword 0
1141 		 */
1142 		unused_update.data[0] = entry->data[0] & (~STRTAB_STE_0_V);
1143 		entry_set(smmu, sid, entry, &unused_update, 0, 1);
1144 		entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1);
1145 		entry_set(smmu, sid, entry, target, 0, 1);
1146 	} else {
1147 		/*
1148 		 * No inuse bit changed. Sanity check that all unused bits are 0
1149 		 * in the entry. The target was already sanity checked by
1150 		 * compute_qword_diff().
1151 		 */
1152 		WARN_ON_ONCE(
1153 			entry_set(smmu, sid, entry, target, 0, num_entry_qwords));
1154 	}
1155 
1156 	/* It's likely that we'll want to use the new STE soon */
1157 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
1158 		struct arm_smmu_cmdq_ent
1159 			prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
1160 					 .prefetch = {
1161 						 .sid = sid,
1162 					 } };
1163 
1164 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1165 	}
1166 }
1167 
1168 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
1169 			     int ssid, bool leaf)
1170 {
1171 	size_t i;
1172 	struct arm_smmu_cmdq_batch cmds;
1173 	struct arm_smmu_device *smmu = master->smmu;
1174 	struct arm_smmu_cmdq_ent cmd = {
1175 		.opcode	= CMDQ_OP_CFGI_CD,
1176 		.cfgi	= {
1177 			.ssid	= ssid,
1178 			.leaf	= leaf,
1179 		},
1180 	};
1181 
1182 	cmds.num = 0;
1183 	for (i = 0; i < master->num_streams; i++) {
1184 		cmd.cfgi.sid = master->streams[i].id;
1185 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1186 	}
1187 
1188 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1189 }
1190 
1191 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1192 					struct arm_smmu_l1_ctx_desc *l1_desc)
1193 {
1194 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1195 
1196 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1197 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1198 	if (!l1_desc->l2ptr) {
1199 		dev_warn(smmu->dev,
1200 			 "failed to allocate context descriptor table\n");
1201 		return -ENOMEM;
1202 	}
1203 	return 0;
1204 }
1205 
1206 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1207 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1208 {
1209 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1210 		  CTXDESC_L1_DESC_V;
1211 
1212 	/* See comment in arm_smmu_write_ctx_desc() */
1213 	WRITE_ONCE(*dst, cpu_to_le64(val));
1214 }
1215 
1216 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
1217 {
1218 	__le64 *l1ptr;
1219 	unsigned int idx;
1220 	struct arm_smmu_l1_ctx_desc *l1_desc;
1221 	struct arm_smmu_device *smmu = master->smmu;
1222 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1223 
1224 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1225 		return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
1226 
1227 	idx = ssid >> CTXDESC_SPLIT;
1228 	l1_desc = &cd_table->l1_desc[idx];
1229 	if (!l1_desc->l2ptr) {
1230 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1231 			return NULL;
1232 
1233 		l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1234 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1235 		/* An invalid L1CD can be cached */
1236 		arm_smmu_sync_cd(master, ssid, false);
1237 	}
1238 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1239 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1240 }
1241 
1242 int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
1243 			    struct arm_smmu_ctx_desc *cd)
1244 {
1245 	/*
1246 	 * This function handles the following cases:
1247 	 *
1248 	 * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
1249 	 * (2) Install a secondary CD, for SID+SSID traffic.
1250 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1251 	 *     CD, then invalidate the old entry and mappings.
1252 	 * (4) Quiesce the context without clearing the valid bit. Disable
1253 	 *     translation, and ignore any translation fault.
1254 	 * (5) Remove a secondary CD.
1255 	 */
1256 	u64 val;
1257 	bool cd_live;
1258 	__le64 *cdptr;
1259 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1260 	struct arm_smmu_device *smmu = master->smmu;
1261 
1262 	if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
1263 		return -E2BIG;
1264 
1265 	cdptr = arm_smmu_get_cd_ptr(master, ssid);
1266 	if (!cdptr)
1267 		return -ENOMEM;
1268 
1269 	val = le64_to_cpu(cdptr[0]);
1270 	cd_live = !!(val & CTXDESC_CD_0_V);
1271 
1272 	if (!cd) { /* (5) */
1273 		val = 0;
1274 	} else if (cd == &quiet_cd) { /* (4) */
1275 		if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1276 			val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R);
1277 		val |= CTXDESC_CD_0_TCR_EPD0;
1278 	} else if (cd_live) { /* (3) */
1279 		val &= ~CTXDESC_CD_0_ASID;
1280 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1281 		/*
1282 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1283 		 * this substream's traffic
1284 		 */
1285 	} else { /* (1) and (2) */
1286 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1287 		cdptr[2] = 0;
1288 		cdptr[3] = cpu_to_le64(cd->mair);
1289 
1290 		/*
1291 		 * STE may be live, and the SMMU might read dwords of this CD in any
1292 		 * order. Ensure that it observes valid values before reading
1293 		 * V=1.
1294 		 */
1295 		arm_smmu_sync_cd(master, ssid, true);
1296 
1297 		val = cd->tcr |
1298 #ifdef __BIG_ENDIAN
1299 			CTXDESC_CD_0_ENDI |
1300 #endif
1301 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1302 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1303 			CTXDESC_CD_0_AA64 |
1304 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1305 			CTXDESC_CD_0_V;
1306 
1307 		if (cd_table->stall_enabled)
1308 			val |= CTXDESC_CD_0_S;
1309 	}
1310 
1311 	/*
1312 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1313 	 * "Configuration structures and configuration invalidation completion"
1314 	 *
1315 	 *   The size of single-copy atomic reads made by the SMMU is
1316 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1317 	 *   field within an aligned 64-bit span of a structure can be altered
1318 	 *   without first making the structure invalid.
1319 	 */
1320 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1321 	arm_smmu_sync_cd(master, ssid, true);
1322 	return 0;
1323 }
1324 
1325 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1326 {
1327 	int ret;
1328 	size_t l1size;
1329 	size_t max_contexts;
1330 	struct arm_smmu_device *smmu = master->smmu;
1331 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1332 
1333 	cd_table->stall_enabled = master->stall_enabled;
1334 	cd_table->s1cdmax = master->ssid_bits;
1335 	max_contexts = 1 << cd_table->s1cdmax;
1336 
1337 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1338 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1339 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1340 		cd_table->num_l1_ents = max_contexts;
1341 
1342 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1343 	} else {
1344 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1345 		cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
1346 						  CTXDESC_L2_ENTRIES);
1347 
1348 		cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
1349 					      sizeof(*cd_table->l1_desc),
1350 					      GFP_KERNEL);
1351 		if (!cd_table->l1_desc)
1352 			return -ENOMEM;
1353 
1354 		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1355 	}
1356 
1357 	cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
1358 					   GFP_KERNEL);
1359 	if (!cd_table->cdtab) {
1360 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1361 		ret = -ENOMEM;
1362 		goto err_free_l1;
1363 	}
1364 
1365 	return 0;
1366 
1367 err_free_l1:
1368 	if (cd_table->l1_desc) {
1369 		devm_kfree(smmu->dev, cd_table->l1_desc);
1370 		cd_table->l1_desc = NULL;
1371 	}
1372 	return ret;
1373 }
1374 
1375 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1376 {
1377 	int i;
1378 	size_t size, l1size;
1379 	struct arm_smmu_device *smmu = master->smmu;
1380 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1381 
1382 	if (cd_table->l1_desc) {
1383 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1384 
1385 		for (i = 0; i < cd_table->num_l1_ents; i++) {
1386 			if (!cd_table->l1_desc[i].l2ptr)
1387 				continue;
1388 
1389 			dmam_free_coherent(smmu->dev, size,
1390 					   cd_table->l1_desc[i].l2ptr,
1391 					   cd_table->l1_desc[i].l2ptr_dma);
1392 		}
1393 		devm_kfree(smmu->dev, cd_table->l1_desc);
1394 		cd_table->l1_desc = NULL;
1395 
1396 		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1397 	} else {
1398 		l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1399 	}
1400 
1401 	dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
1402 	cd_table->cdtab_dma = 0;
1403 	cd_table->cdtab = NULL;
1404 }
1405 
1406 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1407 {
1408 	bool free;
1409 	struct arm_smmu_ctx_desc *old_cd;
1410 
1411 	if (!cd->asid)
1412 		return false;
1413 
1414 	free = refcount_dec_and_test(&cd->refs);
1415 	if (free) {
1416 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1417 		WARN_ON(old_cd != cd);
1418 	}
1419 	return free;
1420 }
1421 
1422 /* Stream table manipulation functions */
1423 static void
1424 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1425 {
1426 	u64 val = 0;
1427 
1428 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1429 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1430 
1431 	/* See comment in arm_smmu_write_ctx_desc() */
1432 	WRITE_ONCE(*dst, cpu_to_le64(val));
1433 }
1434 
1435 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1436 {
1437 	struct arm_smmu_cmdq_ent cmd = {
1438 		.opcode	= CMDQ_OP_CFGI_STE,
1439 		.cfgi	= {
1440 			.sid	= sid,
1441 			.leaf	= true,
1442 		},
1443 	};
1444 
1445 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1446 }
1447 
1448 static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
1449 {
1450 	memset(target, 0, sizeof(*target));
1451 	target->data[0] = cpu_to_le64(
1452 		STRTAB_STE_0_V |
1453 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
1454 }
1455 
1456 static void arm_smmu_make_bypass_ste(struct arm_smmu_ste *target)
1457 {
1458 	memset(target, 0, sizeof(*target));
1459 	target->data[0] = cpu_to_le64(
1460 		STRTAB_STE_0_V |
1461 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
1462 	target->data[1] = cpu_to_le64(
1463 		FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
1464 }
1465 
1466 static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
1467 				      struct arm_smmu_master *master)
1468 {
1469 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1470 	struct arm_smmu_device *smmu = master->smmu;
1471 
1472 	memset(target, 0, sizeof(*target));
1473 	target->data[0] = cpu_to_le64(
1474 		STRTAB_STE_0_V |
1475 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1476 		FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
1477 		(cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1478 		FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
1479 
1480 	target->data[1] = cpu_to_le64(
1481 		FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1482 		FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1483 		FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1484 		FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1485 		((smmu->features & ARM_SMMU_FEAT_STALLS &&
1486 		  !master->stall_enabled) ?
1487 			 STRTAB_STE_1_S1STALLD :
1488 			 0) |
1489 		FIELD_PREP(STRTAB_STE_1_EATS,
1490 			   master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1491 
1492 	if (smmu->features & ARM_SMMU_FEAT_E2H) {
1493 		/*
1494 		 * To support BTM the streamworld needs to match the
1495 		 * configuration of the CPU so that the ASID broadcasts are
1496 		 * properly matched. This means either S/NS-EL2-E2H (hypervisor)
1497 		 * or NS-EL1 (guest). Since an SVA domain can be installed in a
1498 		 * PASID this should always use a BTM compatible configuration
1499 		 * if the HW supports it.
1500 		 */
1501 		target->data[1] |= cpu_to_le64(
1502 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
1503 	} else {
1504 		target->data[1] |= cpu_to_le64(
1505 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1506 
1507 		/*
1508 		 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
1509 		 * arm_smmu_domain_alloc_id()
1510 		 */
1511 		target->data[2] =
1512 			cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
1513 	}
1514 }
1515 
1516 static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
1517 					struct arm_smmu_master *master,
1518 					struct arm_smmu_domain *smmu_domain)
1519 {
1520 	struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
1521 	const struct io_pgtable_cfg *pgtbl_cfg =
1522 		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1523 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
1524 		&pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1525 	u64 vtcr_val;
1526 
1527 	memset(target, 0, sizeof(*target));
1528 	target->data[0] = cpu_to_le64(
1529 		STRTAB_STE_0_V |
1530 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
1531 
1532 	target->data[1] = cpu_to_le64(
1533 		FIELD_PREP(STRTAB_STE_1_EATS,
1534 			   master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0) |
1535 		FIELD_PREP(STRTAB_STE_1_SHCFG,
1536 			   STRTAB_STE_1_SHCFG_INCOMING));
1537 
1538 	vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1539 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1540 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1541 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1542 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1543 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1544 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1545 	target->data[2] = cpu_to_le64(
1546 		FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1547 		FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
1548 		STRTAB_STE_2_S2AA64 |
1549 #ifdef __BIG_ENDIAN
1550 		STRTAB_STE_2_S2ENDI |
1551 #endif
1552 		STRTAB_STE_2_S2PTW |
1553 		STRTAB_STE_2_S2R);
1554 
1555 	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
1556 				      STRTAB_STE_3_S2TTB_MASK);
1557 }
1558 
1559 /*
1560  * This can safely directly manipulate the STE memory without a sync sequence
1561  * because the STE table has not been installed in the SMMU yet.
1562  */
1563 static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
1564 				       unsigned int nent)
1565 {
1566 	unsigned int i;
1567 
1568 	for (i = 0; i < nent; ++i) {
1569 		if (disable_bypass)
1570 			arm_smmu_make_abort_ste(strtab);
1571 		else
1572 			arm_smmu_make_bypass_ste(strtab);
1573 		strtab++;
1574 	}
1575 }
1576 
1577 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1578 {
1579 	size_t size;
1580 	void *strtab;
1581 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1582 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1583 
1584 	if (desc->l2ptr)
1585 		return 0;
1586 
1587 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1588 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1589 
1590 	desc->span = STRTAB_SPLIT + 1;
1591 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1592 					  GFP_KERNEL);
1593 	if (!desc->l2ptr) {
1594 		dev_err(smmu->dev,
1595 			"failed to allocate l2 stream table for SID %u\n",
1596 			sid);
1597 		return -ENOMEM;
1598 	}
1599 
1600 	arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1601 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1602 	return 0;
1603 }
1604 
1605 static struct arm_smmu_master *
1606 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1607 {
1608 	struct rb_node *node;
1609 	struct arm_smmu_stream *stream;
1610 
1611 	lockdep_assert_held(&smmu->streams_mutex);
1612 
1613 	node = smmu->streams.rb_node;
1614 	while (node) {
1615 		stream = rb_entry(node, struct arm_smmu_stream, node);
1616 		if (stream->id < sid)
1617 			node = node->rb_right;
1618 		else if (stream->id > sid)
1619 			node = node->rb_left;
1620 		else
1621 			return stream->master;
1622 	}
1623 
1624 	return NULL;
1625 }
1626 
1627 /* IRQ and event handlers */
1628 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1629 {
1630 	int ret = 0;
1631 	u32 perm = 0;
1632 	struct arm_smmu_master *master;
1633 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1634 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1635 	struct iopf_fault fault_evt = { };
1636 	struct iommu_fault *flt = &fault_evt.fault;
1637 
1638 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1639 	case EVT_ID_TRANSLATION_FAULT:
1640 	case EVT_ID_ADDR_SIZE_FAULT:
1641 	case EVT_ID_ACCESS_FAULT:
1642 	case EVT_ID_PERMISSION_FAULT:
1643 		break;
1644 	default:
1645 		return -EOPNOTSUPP;
1646 	}
1647 
1648 	/* Stage-2 is always pinned at the moment */
1649 	if (evt[1] & EVTQ_1_S2)
1650 		return -EFAULT;
1651 
1652 	if (!(evt[1] & EVTQ_1_STALL))
1653 		return -EOPNOTSUPP;
1654 
1655 	if (evt[1] & EVTQ_1_RnW)
1656 		perm |= IOMMU_FAULT_PERM_READ;
1657 	else
1658 		perm |= IOMMU_FAULT_PERM_WRITE;
1659 
1660 	if (evt[1] & EVTQ_1_InD)
1661 		perm |= IOMMU_FAULT_PERM_EXEC;
1662 
1663 	if (evt[1] & EVTQ_1_PnU)
1664 		perm |= IOMMU_FAULT_PERM_PRIV;
1665 
1666 	flt->type = IOMMU_FAULT_PAGE_REQ;
1667 	flt->prm = (struct iommu_fault_page_request) {
1668 		.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1669 		.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1670 		.perm = perm,
1671 		.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1672 	};
1673 
1674 	if (ssid_valid) {
1675 		flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1676 		flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1677 	}
1678 
1679 	mutex_lock(&smmu->streams_mutex);
1680 	master = arm_smmu_find_master(smmu, sid);
1681 	if (!master) {
1682 		ret = -EINVAL;
1683 		goto out_unlock;
1684 	}
1685 
1686 	iommu_report_device_fault(master->dev, &fault_evt);
1687 out_unlock:
1688 	mutex_unlock(&smmu->streams_mutex);
1689 	return ret;
1690 }
1691 
1692 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1693 {
1694 	int i, ret;
1695 	struct arm_smmu_device *smmu = dev;
1696 	struct arm_smmu_queue *q = &smmu->evtq.q;
1697 	struct arm_smmu_ll_queue *llq = &q->llq;
1698 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1699 				      DEFAULT_RATELIMIT_BURST);
1700 	u64 evt[EVTQ_ENT_DWORDS];
1701 
1702 	do {
1703 		while (!queue_remove_raw(q, evt)) {
1704 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1705 
1706 			ret = arm_smmu_handle_evt(smmu, evt);
1707 			if (!ret || !__ratelimit(&rs))
1708 				continue;
1709 
1710 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1711 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1712 				dev_info(smmu->dev, "\t0x%016llx\n",
1713 					 (unsigned long long)evt[i]);
1714 
1715 			cond_resched();
1716 		}
1717 
1718 		/*
1719 		 * Not much we can do on overflow, so scream and pretend we're
1720 		 * trying harder.
1721 		 */
1722 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1723 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1724 	} while (!queue_empty(llq));
1725 
1726 	/* Sync our overflow flag, as we believe we're up to speed */
1727 	queue_sync_cons_ovf(q);
1728 	return IRQ_HANDLED;
1729 }
1730 
1731 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1732 {
1733 	u32 sid, ssid;
1734 	u16 grpid;
1735 	bool ssv, last;
1736 
1737 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1738 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1739 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1740 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1741 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1742 
1743 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1744 	dev_info(smmu->dev,
1745 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1746 		 sid, ssid, grpid, last ? "L" : "",
1747 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1748 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1749 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1750 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1751 		 evt[1] & PRIQ_1_ADDR_MASK);
1752 
1753 	if (last) {
1754 		struct arm_smmu_cmdq_ent cmd = {
1755 			.opcode			= CMDQ_OP_PRI_RESP,
1756 			.substream_valid	= ssv,
1757 			.pri			= {
1758 				.sid	= sid,
1759 				.ssid	= ssid,
1760 				.grpid	= grpid,
1761 				.resp	= PRI_RESP_DENY,
1762 			},
1763 		};
1764 
1765 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1766 	}
1767 }
1768 
1769 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1770 {
1771 	struct arm_smmu_device *smmu = dev;
1772 	struct arm_smmu_queue *q = &smmu->priq.q;
1773 	struct arm_smmu_ll_queue *llq = &q->llq;
1774 	u64 evt[PRIQ_ENT_DWORDS];
1775 
1776 	do {
1777 		while (!queue_remove_raw(q, evt))
1778 			arm_smmu_handle_ppr(smmu, evt);
1779 
1780 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1781 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1782 	} while (!queue_empty(llq));
1783 
1784 	/* Sync our overflow flag, as we believe we're up to speed */
1785 	queue_sync_cons_ovf(q);
1786 	return IRQ_HANDLED;
1787 }
1788 
1789 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1790 
1791 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1792 {
1793 	u32 gerror, gerrorn, active;
1794 	struct arm_smmu_device *smmu = dev;
1795 
1796 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1797 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1798 
1799 	active = gerror ^ gerrorn;
1800 	if (!(active & GERROR_ERR_MASK))
1801 		return IRQ_NONE; /* No errors pending */
1802 
1803 	dev_warn(smmu->dev,
1804 		 "unexpected global error reported (0x%08x), this could be serious\n",
1805 		 active);
1806 
1807 	if (active & GERROR_SFM_ERR) {
1808 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1809 		arm_smmu_device_disable(smmu);
1810 	}
1811 
1812 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1813 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1814 
1815 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1816 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1817 
1818 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1819 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1820 
1821 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1822 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1823 
1824 	if (active & GERROR_PRIQ_ABT_ERR)
1825 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1826 
1827 	if (active & GERROR_EVTQ_ABT_ERR)
1828 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1829 
1830 	if (active & GERROR_CMDQ_ERR)
1831 		arm_smmu_cmdq_skip_err(smmu);
1832 
1833 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1834 	return IRQ_HANDLED;
1835 }
1836 
1837 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1838 {
1839 	struct arm_smmu_device *smmu = dev;
1840 
1841 	arm_smmu_evtq_thread(irq, dev);
1842 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1843 		arm_smmu_priq_thread(irq, dev);
1844 
1845 	return IRQ_HANDLED;
1846 }
1847 
1848 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1849 {
1850 	arm_smmu_gerror_handler(irq, dev);
1851 	return IRQ_WAKE_THREAD;
1852 }
1853 
1854 static void
1855 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1856 			struct arm_smmu_cmdq_ent *cmd)
1857 {
1858 	size_t log2_span;
1859 	size_t span_mask;
1860 	/* ATC invalidates are always on 4096-bytes pages */
1861 	size_t inval_grain_shift = 12;
1862 	unsigned long page_start, page_end;
1863 
1864 	/*
1865 	 * ATS and PASID:
1866 	 *
1867 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1868 	 * prefix. In that case all ATC entries within the address range are
1869 	 * invalidated, including those that were requested with a PASID! There
1870 	 * is no way to invalidate only entries without PASID.
1871 	 *
1872 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1873 	 * traffic), translation requests without PASID create ATC entries
1874 	 * without PASID, which must be invalidated with substream_valid clear.
1875 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1876 	 * ATC entries within the address range.
1877 	 */
1878 	*cmd = (struct arm_smmu_cmdq_ent) {
1879 		.opcode			= CMDQ_OP_ATC_INV,
1880 		.substream_valid	= (ssid != IOMMU_NO_PASID),
1881 		.atc.ssid		= ssid,
1882 	};
1883 
1884 	if (!size) {
1885 		cmd->atc.size = ATC_INV_SIZE_ALL;
1886 		return;
1887 	}
1888 
1889 	page_start	= iova >> inval_grain_shift;
1890 	page_end	= (iova + size - 1) >> inval_grain_shift;
1891 
1892 	/*
1893 	 * In an ATS Invalidate Request, the address must be aligned on the
1894 	 * range size, which must be a power of two number of page sizes. We
1895 	 * thus have to choose between grossly over-invalidating the region, or
1896 	 * splitting the invalidation into multiple commands. For simplicity
1897 	 * we'll go with the first solution, but should refine it in the future
1898 	 * if multiple commands are shown to be more efficient.
1899 	 *
1900 	 * Find the smallest power of two that covers the range. The most
1901 	 * significant differing bit between the start and end addresses,
1902 	 * fls(start ^ end), indicates the required span. For example:
1903 	 *
1904 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1905 	 *		x = 0b1000 ^ 0b1011 = 0b11
1906 	 *		span = 1 << fls(x) = 4
1907 	 *
1908 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1909 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1910 	 *		span = 1 << fls(x) = 16
1911 	 */
1912 	log2_span	= fls_long(page_start ^ page_end);
1913 	span_mask	= (1ULL << log2_span) - 1;
1914 
1915 	page_start	&= ~span_mask;
1916 
1917 	cmd->atc.addr	= page_start << inval_grain_shift;
1918 	cmd->atc.size	= log2_span;
1919 }
1920 
1921 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1922 {
1923 	int i;
1924 	struct arm_smmu_cmdq_ent cmd;
1925 	struct arm_smmu_cmdq_batch cmds;
1926 
1927 	arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
1928 
1929 	cmds.num = 0;
1930 	for (i = 0; i < master->num_streams; i++) {
1931 		cmd.atc.sid = master->streams[i].id;
1932 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1933 	}
1934 
1935 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1936 }
1937 
1938 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1939 			    unsigned long iova, size_t size)
1940 {
1941 	int i;
1942 	unsigned long flags;
1943 	struct arm_smmu_cmdq_ent cmd;
1944 	struct arm_smmu_master *master;
1945 	struct arm_smmu_cmdq_batch cmds;
1946 
1947 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1948 		return 0;
1949 
1950 	/*
1951 	 * Ensure that we've completed prior invalidation of the main TLBs
1952 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1953 	 * arm_smmu_enable_ats():
1954 	 *
1955 	 *	// unmap()			// arm_smmu_enable_ats()
1956 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1957 	 *	smp_mb();			[...]
1958 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1959 	 *
1960 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1961 	 * ATS was enabled at the PCI device before completion of the TLBI.
1962 	 */
1963 	smp_mb();
1964 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1965 		return 0;
1966 
1967 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1968 
1969 	cmds.num = 0;
1970 
1971 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1972 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1973 		if (!master->ats_enabled)
1974 			continue;
1975 
1976 		for (i = 0; i < master->num_streams; i++) {
1977 			cmd.atc.sid = master->streams[i].id;
1978 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1979 		}
1980 	}
1981 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1982 
1983 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1984 }
1985 
1986 /* IO_PGTABLE API */
1987 static void arm_smmu_tlb_inv_context(void *cookie)
1988 {
1989 	struct arm_smmu_domain *smmu_domain = cookie;
1990 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1991 	struct arm_smmu_cmdq_ent cmd;
1992 
1993 	/*
1994 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1995 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1996 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1997 	 * insertion to guarantee those are observed before the TLBI. Do be
1998 	 * careful, 007.
1999 	 */
2000 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2001 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
2002 	} else {
2003 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
2004 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2005 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
2006 	}
2007 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2008 }
2009 
2010 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
2011 				     unsigned long iova, size_t size,
2012 				     size_t granule,
2013 				     struct arm_smmu_domain *smmu_domain)
2014 {
2015 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2016 	unsigned long end = iova + size, num_pages = 0, tg = 0;
2017 	size_t inv_range = granule;
2018 	struct arm_smmu_cmdq_batch cmds;
2019 
2020 	if (!size)
2021 		return;
2022 
2023 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2024 		/* Get the leaf page size */
2025 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2026 
2027 		num_pages = size >> tg;
2028 
2029 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
2030 		cmd->tlbi.tg = (tg - 10) / 2;
2031 
2032 		/*
2033 		 * Determine what level the granule is at. For non-leaf, both
2034 		 * io-pgtable and SVA pass a nominal last-level granule because
2035 		 * they don't know what level(s) actually apply, so ignore that
2036 		 * and leave TTL=0. However for various errata reasons we still
2037 		 * want to use a range command, so avoid the SVA corner case
2038 		 * where both scale and num could be 0 as well.
2039 		 */
2040 		if (cmd->tlbi.leaf)
2041 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2042 		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
2043 			num_pages++;
2044 	}
2045 
2046 	cmds.num = 0;
2047 
2048 	while (iova < end) {
2049 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2050 			/*
2051 			 * On each iteration of the loop, the range is 5 bits
2052 			 * worth of the aligned size remaining.
2053 			 * The range in pages is:
2054 			 *
2055 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
2056 			 */
2057 			unsigned long scale, num;
2058 
2059 			/* Determine the power of 2 multiple number of pages */
2060 			scale = __ffs(num_pages);
2061 			cmd->tlbi.scale = scale;
2062 
2063 			/* Determine how many chunks of 2^scale size we have */
2064 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2065 			cmd->tlbi.num = num - 1;
2066 
2067 			/* range is num * 2^scale * pgsize */
2068 			inv_range = num << (scale + tg);
2069 
2070 			/* Clear out the lower order bits for the next iteration */
2071 			num_pages -= num << scale;
2072 		}
2073 
2074 		cmd->tlbi.addr = iova;
2075 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
2076 		iova += inv_range;
2077 	}
2078 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
2079 }
2080 
2081 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
2082 					  size_t granule, bool leaf,
2083 					  struct arm_smmu_domain *smmu_domain)
2084 {
2085 	struct arm_smmu_cmdq_ent cmd = {
2086 		.tlbi = {
2087 			.leaf	= leaf,
2088 		},
2089 	};
2090 
2091 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2092 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2093 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
2094 		cmd.tlbi.asid	= smmu_domain->cd.asid;
2095 	} else {
2096 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
2097 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2098 	}
2099 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2100 
2101 	/*
2102 	 * Unfortunately, this can't be leaf-only since we may have
2103 	 * zapped an entire table.
2104 	 */
2105 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
2106 }
2107 
2108 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
2109 				 size_t granule, bool leaf,
2110 				 struct arm_smmu_domain *smmu_domain)
2111 {
2112 	struct arm_smmu_cmdq_ent cmd = {
2113 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2114 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
2115 		.tlbi = {
2116 			.asid	= asid,
2117 			.leaf	= leaf,
2118 		},
2119 	};
2120 
2121 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2122 }
2123 
2124 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2125 					 unsigned long iova, size_t granule,
2126 					 void *cookie)
2127 {
2128 	struct arm_smmu_domain *smmu_domain = cookie;
2129 	struct iommu_domain *domain = &smmu_domain->domain;
2130 
2131 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2132 }
2133 
2134 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2135 				  size_t granule, void *cookie)
2136 {
2137 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2138 }
2139 
2140 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2141 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2142 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2143 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2144 };
2145 
2146 /* IOMMU API */
2147 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2148 {
2149 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2150 
2151 	switch (cap) {
2152 	case IOMMU_CAP_CACHE_COHERENCY:
2153 		/* Assume that a coherent TCU implies coherent TBUs */
2154 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2155 	case IOMMU_CAP_NOEXEC:
2156 	case IOMMU_CAP_DEFERRED_FLUSH:
2157 		return true;
2158 	default:
2159 		return false;
2160 	}
2161 }
2162 
2163 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2164 {
2165 
2166 	if (type == IOMMU_DOMAIN_SVA)
2167 		return arm_smmu_sva_domain_alloc();
2168 	return ERR_PTR(-EOPNOTSUPP);
2169 }
2170 
2171 static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
2172 {
2173 	struct arm_smmu_domain *smmu_domain;
2174 
2175 	/*
2176 	 * Allocate the domain and initialise some of its data structures.
2177 	 * We can't really do anything meaningful until we've added a
2178 	 * master.
2179 	 */
2180 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2181 	if (!smmu_domain)
2182 		return ERR_PTR(-ENOMEM);
2183 
2184 	mutex_init(&smmu_domain->init_mutex);
2185 	INIT_LIST_HEAD(&smmu_domain->devices);
2186 	spin_lock_init(&smmu_domain->devices_lock);
2187 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2188 
2189 	if (dev) {
2190 		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2191 		int ret;
2192 
2193 		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
2194 		if (ret) {
2195 			kfree(smmu_domain);
2196 			return ERR_PTR(ret);
2197 		}
2198 	}
2199 	return &smmu_domain->domain;
2200 }
2201 
2202 static void arm_smmu_domain_free(struct iommu_domain *domain)
2203 {
2204 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2205 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2206 
2207 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2208 
2209 	/* Free the ASID or VMID */
2210 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2211 		/* Prevent SVA from touching the CD while we're freeing it */
2212 		mutex_lock(&arm_smmu_asid_lock);
2213 		arm_smmu_free_asid(&smmu_domain->cd);
2214 		mutex_unlock(&arm_smmu_asid_lock);
2215 	} else {
2216 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2217 		if (cfg->vmid)
2218 			ida_free(&smmu->vmid_map, cfg->vmid);
2219 	}
2220 
2221 	kfree(smmu_domain);
2222 }
2223 
2224 static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
2225 				       struct arm_smmu_domain *smmu_domain,
2226 				       struct io_pgtable_cfg *pgtbl_cfg)
2227 {
2228 	int ret;
2229 	u32 asid;
2230 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2231 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2232 
2233 	refcount_set(&cd->refs, 1);
2234 
2235 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2236 	mutex_lock(&arm_smmu_asid_lock);
2237 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
2238 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2239 	if (ret)
2240 		goto out_unlock;
2241 
2242 	cd->asid	= (u16)asid;
2243 	cd->ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2244 	cd->tcr		= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2245 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2246 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2247 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2248 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2249 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2250 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2251 	cd->mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2252 
2253 	mutex_unlock(&arm_smmu_asid_lock);
2254 	return 0;
2255 
2256 out_unlock:
2257 	mutex_unlock(&arm_smmu_asid_lock);
2258 	return ret;
2259 }
2260 
2261 static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
2262 				       struct arm_smmu_domain *smmu_domain,
2263 				       struct io_pgtable_cfg *pgtbl_cfg)
2264 {
2265 	int vmid;
2266 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2267 
2268 	/* Reserve VMID 0 for stage-2 bypass STEs */
2269 	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2270 			       GFP_KERNEL);
2271 	if (vmid < 0)
2272 		return vmid;
2273 
2274 	cfg->vmid	= (u16)vmid;
2275 	return 0;
2276 }
2277 
2278 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
2279 				    struct arm_smmu_device *smmu)
2280 {
2281 	int ret;
2282 	unsigned long ias, oas;
2283 	enum io_pgtable_fmt fmt;
2284 	struct io_pgtable_cfg pgtbl_cfg;
2285 	struct io_pgtable_ops *pgtbl_ops;
2286 	int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
2287 				 struct arm_smmu_domain *smmu_domain,
2288 				 struct io_pgtable_cfg *pgtbl_cfg);
2289 
2290 	/* Restrict the stage to what we can actually support */
2291 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2292 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2293 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2294 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2295 
2296 	switch (smmu_domain->stage) {
2297 	case ARM_SMMU_DOMAIN_S1:
2298 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2299 		ias = min_t(unsigned long, ias, VA_BITS);
2300 		oas = smmu->ias;
2301 		fmt = ARM_64_LPAE_S1;
2302 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2303 		break;
2304 	case ARM_SMMU_DOMAIN_S2:
2305 		ias = smmu->ias;
2306 		oas = smmu->oas;
2307 		fmt = ARM_64_LPAE_S2;
2308 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2309 		break;
2310 	default:
2311 		return -EINVAL;
2312 	}
2313 
2314 	pgtbl_cfg = (struct io_pgtable_cfg) {
2315 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2316 		.ias		= ias,
2317 		.oas		= oas,
2318 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2319 		.tlb		= &arm_smmu_flush_ops,
2320 		.iommu_dev	= smmu->dev,
2321 	};
2322 
2323 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2324 	if (!pgtbl_ops)
2325 		return -ENOMEM;
2326 
2327 	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2328 	smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2329 	smmu_domain->domain.geometry.force_aperture = true;
2330 
2331 	ret = finalise_stage_fn(smmu, smmu_domain, &pgtbl_cfg);
2332 	if (ret < 0) {
2333 		free_io_pgtable_ops(pgtbl_ops);
2334 		return ret;
2335 	}
2336 
2337 	smmu_domain->pgtbl_ops = pgtbl_ops;
2338 	smmu_domain->smmu = smmu;
2339 	return 0;
2340 }
2341 
2342 static struct arm_smmu_ste *
2343 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2344 {
2345 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2346 
2347 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2348 		unsigned int idx1, idx2;
2349 
2350 		/* Two-level walk */
2351 		idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2352 		idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
2353 		return &cfg->l1_desc[idx1].l2ptr[idx2];
2354 	} else {
2355 		/* Simple linear lookup */
2356 		return (struct arm_smmu_ste *)&cfg
2357 			       ->strtab[sid * STRTAB_STE_DWORDS];
2358 	}
2359 }
2360 
2361 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
2362 					 const struct arm_smmu_ste *target)
2363 {
2364 	int i, j;
2365 	struct arm_smmu_device *smmu = master->smmu;
2366 
2367 	for (i = 0; i < master->num_streams; ++i) {
2368 		u32 sid = master->streams[i].id;
2369 		struct arm_smmu_ste *step =
2370 			arm_smmu_get_step_for_sid(smmu, sid);
2371 
2372 		/* Bridged PCI devices may end up with duplicated IDs */
2373 		for (j = 0; j < i; j++)
2374 			if (master->streams[j].id == sid)
2375 				break;
2376 		if (j < i)
2377 			continue;
2378 
2379 		arm_smmu_write_ste(master, sid, step, target);
2380 	}
2381 }
2382 
2383 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2384 {
2385 	struct device *dev = master->dev;
2386 	struct arm_smmu_device *smmu = master->smmu;
2387 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2388 
2389 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2390 		return false;
2391 
2392 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2393 		return false;
2394 
2395 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2396 }
2397 
2398 static void arm_smmu_enable_ats(struct arm_smmu_master *master,
2399 				struct arm_smmu_domain *smmu_domain)
2400 {
2401 	size_t stu;
2402 	struct pci_dev *pdev;
2403 	struct arm_smmu_device *smmu = master->smmu;
2404 
2405 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2406 	if (!master->ats_enabled)
2407 		return;
2408 
2409 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2410 	stu = __ffs(smmu->pgsize_bitmap);
2411 	pdev = to_pci_dev(master->dev);
2412 
2413 	atomic_inc(&smmu_domain->nr_ats_masters);
2414 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2415 	if (pci_enable_ats(pdev, stu))
2416 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2417 }
2418 
2419 static void arm_smmu_disable_ats(struct arm_smmu_master *master,
2420 				 struct arm_smmu_domain *smmu_domain)
2421 {
2422 	if (!master->ats_enabled)
2423 		return;
2424 
2425 	pci_disable_ats(to_pci_dev(master->dev));
2426 	/*
2427 	 * Ensure ATS is disabled at the endpoint before we issue the
2428 	 * ATC invalidation via the SMMU.
2429 	 */
2430 	wmb();
2431 	arm_smmu_atc_inv_master(master);
2432 	atomic_dec(&smmu_domain->nr_ats_masters);
2433 }
2434 
2435 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2436 {
2437 	int ret;
2438 	int features;
2439 	int num_pasids;
2440 	struct pci_dev *pdev;
2441 
2442 	if (!dev_is_pci(master->dev))
2443 		return -ENODEV;
2444 
2445 	pdev = to_pci_dev(master->dev);
2446 
2447 	features = pci_pasid_features(pdev);
2448 	if (features < 0)
2449 		return features;
2450 
2451 	num_pasids = pci_max_pasids(pdev);
2452 	if (num_pasids <= 0)
2453 		return num_pasids;
2454 
2455 	ret = pci_enable_pasid(pdev, features);
2456 	if (ret) {
2457 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2458 		return ret;
2459 	}
2460 
2461 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2462 				  master->smmu->ssid_bits);
2463 	return 0;
2464 }
2465 
2466 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2467 {
2468 	struct pci_dev *pdev;
2469 
2470 	if (!dev_is_pci(master->dev))
2471 		return;
2472 
2473 	pdev = to_pci_dev(master->dev);
2474 
2475 	if (!pdev->pasid_enabled)
2476 		return;
2477 
2478 	master->ssid_bits = 0;
2479 	pci_disable_pasid(pdev);
2480 }
2481 
2482 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2483 {
2484 	struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev);
2485 	struct arm_smmu_domain *smmu_domain;
2486 	unsigned long flags;
2487 
2488 	if (!domain || !(domain->type & __IOMMU_DOMAIN_PAGING))
2489 		return;
2490 
2491 	smmu_domain = to_smmu_domain(domain);
2492 	arm_smmu_disable_ats(master, smmu_domain);
2493 
2494 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2495 	list_del_init(&master->domain_head);
2496 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2497 
2498 	master->ats_enabled = false;
2499 }
2500 
2501 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2502 {
2503 	int ret = 0;
2504 	unsigned long flags;
2505 	struct arm_smmu_ste target;
2506 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2507 	struct arm_smmu_device *smmu;
2508 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2509 	struct arm_smmu_master *master;
2510 
2511 	if (!fwspec)
2512 		return -ENOENT;
2513 
2514 	master = dev_iommu_priv_get(dev);
2515 	smmu = master->smmu;
2516 
2517 	/*
2518 	 * Checking that SVA is disabled ensures that this device isn't bound to
2519 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2520 	 * be removed concurrently since we're holding the group mutex.
2521 	 */
2522 	if (arm_smmu_master_sva_enabled(master)) {
2523 		dev_err(dev, "cannot attach - SVA enabled\n");
2524 		return -EBUSY;
2525 	}
2526 
2527 	mutex_lock(&smmu_domain->init_mutex);
2528 
2529 	if (!smmu_domain->smmu) {
2530 		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
2531 	} else if (smmu_domain->smmu != smmu)
2532 		ret = -EINVAL;
2533 
2534 	mutex_unlock(&smmu_domain->init_mutex);
2535 	if (ret)
2536 		return ret;
2537 
2538 	/*
2539 	 * Prevent arm_smmu_share_asid() from trying to change the ASID
2540 	 * of either the old or new domain while we are working on it.
2541 	 * This allows the STE and the smmu_domain->devices list to
2542 	 * be inconsistent during this routine.
2543 	 */
2544 	mutex_lock(&arm_smmu_asid_lock);
2545 
2546 	arm_smmu_detach_dev(master);
2547 
2548 	master->ats_enabled = arm_smmu_ats_supported(master);
2549 
2550 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2551 	list_add(&master->domain_head, &smmu_domain->devices);
2552 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2553 
2554 	switch (smmu_domain->stage) {
2555 	case ARM_SMMU_DOMAIN_S1:
2556 		if (!master->cd_table.cdtab) {
2557 			ret = arm_smmu_alloc_cd_tables(master);
2558 			if (ret)
2559 				goto out_list_del;
2560 		} else {
2561 			/*
2562 			 * arm_smmu_write_ctx_desc() relies on the entry being
2563 			 * invalid to work, clear any existing entry.
2564 			 */
2565 			ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID,
2566 						      NULL);
2567 			if (ret)
2568 				goto out_list_del;
2569 		}
2570 
2571 		ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
2572 		if (ret)
2573 			goto out_list_del;
2574 
2575 		arm_smmu_make_cdtable_ste(&target, master);
2576 		arm_smmu_install_ste_for_dev(master, &target);
2577 		break;
2578 	case ARM_SMMU_DOMAIN_S2:
2579 		arm_smmu_make_s2_domain_ste(&target, master, smmu_domain);
2580 		arm_smmu_install_ste_for_dev(master, &target);
2581 		if (master->cd_table.cdtab)
2582 			arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID,
2583 						      NULL);
2584 		break;
2585 	}
2586 
2587 	arm_smmu_enable_ats(master, smmu_domain);
2588 	goto out_unlock;
2589 
2590 out_list_del:
2591 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2592 	list_del_init(&master->domain_head);
2593 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2594 
2595 out_unlock:
2596 	mutex_unlock(&arm_smmu_asid_lock);
2597 	return ret;
2598 }
2599 
2600 static int arm_smmu_attach_dev_ste(struct device *dev,
2601 				   struct arm_smmu_ste *ste)
2602 {
2603 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2604 
2605 	if (arm_smmu_master_sva_enabled(master))
2606 		return -EBUSY;
2607 
2608 	/*
2609 	 * Do not allow any ASID to be changed while are working on the STE,
2610 	 * otherwise we could miss invalidations.
2611 	 */
2612 	mutex_lock(&arm_smmu_asid_lock);
2613 
2614 	/*
2615 	 * The SMMU does not support enabling ATS with bypass/abort. When the
2616 	 * STE is in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests
2617 	 * and Translated transactions are denied as though ATS is disabled for
2618 	 * the stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2619 	 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2620 	 */
2621 	arm_smmu_detach_dev(master);
2622 
2623 	arm_smmu_install_ste_for_dev(master, ste);
2624 	mutex_unlock(&arm_smmu_asid_lock);
2625 
2626 	/*
2627 	 * This has to be done after removing the master from the
2628 	 * arm_smmu_domain->devices to avoid races updating the same context
2629 	 * descriptor from arm_smmu_share_asid().
2630 	 */
2631 	if (master->cd_table.cdtab)
2632 		arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
2633 	return 0;
2634 }
2635 
2636 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
2637 					struct device *dev)
2638 {
2639 	struct arm_smmu_ste ste;
2640 
2641 	arm_smmu_make_bypass_ste(&ste);
2642 	return arm_smmu_attach_dev_ste(dev, &ste);
2643 }
2644 
2645 static const struct iommu_domain_ops arm_smmu_identity_ops = {
2646 	.attach_dev = arm_smmu_attach_dev_identity,
2647 };
2648 
2649 static struct iommu_domain arm_smmu_identity_domain = {
2650 	.type = IOMMU_DOMAIN_IDENTITY,
2651 	.ops = &arm_smmu_identity_ops,
2652 };
2653 
2654 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
2655 					struct device *dev)
2656 {
2657 	struct arm_smmu_ste ste;
2658 
2659 	arm_smmu_make_abort_ste(&ste);
2660 	return arm_smmu_attach_dev_ste(dev, &ste);
2661 }
2662 
2663 static const struct iommu_domain_ops arm_smmu_blocked_ops = {
2664 	.attach_dev = arm_smmu_attach_dev_blocked,
2665 };
2666 
2667 static struct iommu_domain arm_smmu_blocked_domain = {
2668 	.type = IOMMU_DOMAIN_BLOCKED,
2669 	.ops = &arm_smmu_blocked_ops,
2670 };
2671 
2672 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2673 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2674 			      int prot, gfp_t gfp, size_t *mapped)
2675 {
2676 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2677 
2678 	if (!ops)
2679 		return -ENODEV;
2680 
2681 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2682 }
2683 
2684 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2685 				   size_t pgsize, size_t pgcount,
2686 				   struct iommu_iotlb_gather *gather)
2687 {
2688 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2689 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2690 
2691 	if (!ops)
2692 		return 0;
2693 
2694 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2695 }
2696 
2697 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2698 {
2699 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2700 
2701 	if (smmu_domain->smmu)
2702 		arm_smmu_tlb_inv_context(smmu_domain);
2703 }
2704 
2705 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2706 				struct iommu_iotlb_gather *gather)
2707 {
2708 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2709 
2710 	if (!gather->pgsize)
2711 		return;
2712 
2713 	arm_smmu_tlb_inv_range_domain(gather->start,
2714 				      gather->end - gather->start + 1,
2715 				      gather->pgsize, true, smmu_domain);
2716 }
2717 
2718 static phys_addr_t
2719 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2720 {
2721 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2722 
2723 	if (!ops)
2724 		return 0;
2725 
2726 	return ops->iova_to_phys(ops, iova);
2727 }
2728 
2729 static struct platform_driver arm_smmu_driver;
2730 
2731 static
2732 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2733 {
2734 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2735 							  fwnode);
2736 	put_device(dev);
2737 	return dev ? dev_get_drvdata(dev) : NULL;
2738 }
2739 
2740 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2741 {
2742 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2743 
2744 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2745 		limit *= 1UL << STRTAB_SPLIT;
2746 
2747 	return sid < limit;
2748 }
2749 
2750 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2751 {
2752 	/* Check the SIDs are in range of the SMMU and our stream table */
2753 	if (!arm_smmu_sid_in_range(smmu, sid))
2754 		return -ERANGE;
2755 
2756 	/* Ensure l2 strtab is initialised */
2757 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2758 		return arm_smmu_init_l2_strtab(smmu, sid);
2759 
2760 	return 0;
2761 }
2762 
2763 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2764 				  struct arm_smmu_master *master)
2765 {
2766 	int i;
2767 	int ret = 0;
2768 	struct arm_smmu_stream *new_stream, *cur_stream;
2769 	struct rb_node **new_node, *parent_node = NULL;
2770 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2771 
2772 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2773 				  GFP_KERNEL);
2774 	if (!master->streams)
2775 		return -ENOMEM;
2776 	master->num_streams = fwspec->num_ids;
2777 
2778 	mutex_lock(&smmu->streams_mutex);
2779 	for (i = 0; i < fwspec->num_ids; i++) {
2780 		u32 sid = fwspec->ids[i];
2781 
2782 		new_stream = &master->streams[i];
2783 		new_stream->id = sid;
2784 		new_stream->master = master;
2785 
2786 		ret = arm_smmu_init_sid_strtab(smmu, sid);
2787 		if (ret)
2788 			break;
2789 
2790 		/* Insert into SID tree */
2791 		new_node = &(smmu->streams.rb_node);
2792 		while (*new_node) {
2793 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2794 					      node);
2795 			parent_node = *new_node;
2796 			if (cur_stream->id > new_stream->id) {
2797 				new_node = &((*new_node)->rb_left);
2798 			} else if (cur_stream->id < new_stream->id) {
2799 				new_node = &((*new_node)->rb_right);
2800 			} else {
2801 				dev_warn(master->dev,
2802 					 "stream %u already in tree\n",
2803 					 cur_stream->id);
2804 				ret = -EINVAL;
2805 				break;
2806 			}
2807 		}
2808 		if (ret)
2809 			break;
2810 
2811 		rb_link_node(&new_stream->node, parent_node, new_node);
2812 		rb_insert_color(&new_stream->node, &smmu->streams);
2813 	}
2814 
2815 	if (ret) {
2816 		for (i--; i >= 0; i--)
2817 			rb_erase(&master->streams[i].node, &smmu->streams);
2818 		kfree(master->streams);
2819 	}
2820 	mutex_unlock(&smmu->streams_mutex);
2821 
2822 	return ret;
2823 }
2824 
2825 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2826 {
2827 	int i;
2828 	struct arm_smmu_device *smmu = master->smmu;
2829 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2830 
2831 	if (!smmu || !master->streams)
2832 		return;
2833 
2834 	mutex_lock(&smmu->streams_mutex);
2835 	for (i = 0; i < fwspec->num_ids; i++)
2836 		rb_erase(&master->streams[i].node, &smmu->streams);
2837 	mutex_unlock(&smmu->streams_mutex);
2838 
2839 	kfree(master->streams);
2840 }
2841 
2842 static struct iommu_ops arm_smmu_ops;
2843 
2844 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2845 {
2846 	int ret;
2847 	struct arm_smmu_device *smmu;
2848 	struct arm_smmu_master *master;
2849 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2850 
2851 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2852 		return ERR_PTR(-EBUSY);
2853 
2854 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2855 	if (!smmu)
2856 		return ERR_PTR(-ENODEV);
2857 
2858 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2859 	if (!master)
2860 		return ERR_PTR(-ENOMEM);
2861 
2862 	master->dev = dev;
2863 	master->smmu = smmu;
2864 	INIT_LIST_HEAD(&master->bonds);
2865 	INIT_LIST_HEAD(&master->domain_head);
2866 	dev_iommu_priv_set(dev, master);
2867 
2868 	ret = arm_smmu_insert_master(smmu, master);
2869 	if (ret)
2870 		goto err_free_master;
2871 
2872 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2873 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2874 
2875 	/*
2876 	 * Note that PASID must be enabled before, and disabled after ATS:
2877 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2878 	 *
2879 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2880 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2881 	 *   are changed.
2882 	 */
2883 	arm_smmu_enable_pasid(master);
2884 
2885 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2886 		master->ssid_bits = min_t(u8, master->ssid_bits,
2887 					  CTXDESC_LINEAR_CDMAX);
2888 
2889 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2890 	     device_property_read_bool(dev, "dma-can-stall")) ||
2891 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2892 		master->stall_enabled = true;
2893 
2894 	return &smmu->iommu;
2895 
2896 err_free_master:
2897 	kfree(master);
2898 	return ERR_PTR(ret);
2899 }
2900 
2901 static void arm_smmu_release_device(struct device *dev)
2902 {
2903 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2904 
2905 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2906 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2907 
2908 	/* Put the STE back to what arm_smmu_init_strtab() sets */
2909 	if (disable_bypass && !dev->iommu->require_direct)
2910 		arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
2911 	else
2912 		arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
2913 
2914 	arm_smmu_disable_pasid(master);
2915 	arm_smmu_remove_master(master);
2916 	if (master->cd_table.cdtab)
2917 		arm_smmu_free_cd_tables(master);
2918 	kfree(master);
2919 }
2920 
2921 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2922 {
2923 	struct iommu_group *group;
2924 
2925 	/*
2926 	 * We don't support devices sharing stream IDs other than PCI RID
2927 	 * aliases, since the necessary ID-to-device lookup becomes rather
2928 	 * impractical given a potential sparse 32-bit stream ID space.
2929 	 */
2930 	if (dev_is_pci(dev))
2931 		group = pci_device_group(dev);
2932 	else
2933 		group = generic_device_group(dev);
2934 
2935 	return group;
2936 }
2937 
2938 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2939 {
2940 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2941 	int ret = 0;
2942 
2943 	mutex_lock(&smmu_domain->init_mutex);
2944 	if (smmu_domain->smmu)
2945 		ret = -EPERM;
2946 	else
2947 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2948 	mutex_unlock(&smmu_domain->init_mutex);
2949 
2950 	return ret;
2951 }
2952 
2953 static int arm_smmu_of_xlate(struct device *dev,
2954 			     const struct of_phandle_args *args)
2955 {
2956 	return iommu_fwspec_add_ids(dev, args->args, 1);
2957 }
2958 
2959 static void arm_smmu_get_resv_regions(struct device *dev,
2960 				      struct list_head *head)
2961 {
2962 	struct iommu_resv_region *region;
2963 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2964 
2965 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2966 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2967 	if (!region)
2968 		return;
2969 
2970 	list_add_tail(&region->list, head);
2971 
2972 	iommu_dma_get_resv_regions(dev, head);
2973 }
2974 
2975 static int arm_smmu_dev_enable_feature(struct device *dev,
2976 				       enum iommu_dev_features feat)
2977 {
2978 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2979 
2980 	if (!master)
2981 		return -ENODEV;
2982 
2983 	switch (feat) {
2984 	case IOMMU_DEV_FEAT_IOPF:
2985 		if (!arm_smmu_master_iopf_supported(master))
2986 			return -EINVAL;
2987 		if (master->iopf_enabled)
2988 			return -EBUSY;
2989 		master->iopf_enabled = true;
2990 		return 0;
2991 	case IOMMU_DEV_FEAT_SVA:
2992 		if (!arm_smmu_master_sva_supported(master))
2993 			return -EINVAL;
2994 		if (arm_smmu_master_sva_enabled(master))
2995 			return -EBUSY;
2996 		return arm_smmu_master_enable_sva(master);
2997 	default:
2998 		return -EINVAL;
2999 	}
3000 }
3001 
3002 static int arm_smmu_dev_disable_feature(struct device *dev,
3003 					enum iommu_dev_features feat)
3004 {
3005 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3006 
3007 	if (!master)
3008 		return -EINVAL;
3009 
3010 	switch (feat) {
3011 	case IOMMU_DEV_FEAT_IOPF:
3012 		if (!master->iopf_enabled)
3013 			return -EINVAL;
3014 		if (master->sva_enabled)
3015 			return -EBUSY;
3016 		master->iopf_enabled = false;
3017 		return 0;
3018 	case IOMMU_DEV_FEAT_SVA:
3019 		if (!arm_smmu_master_sva_enabled(master))
3020 			return -EINVAL;
3021 		return arm_smmu_master_disable_sva(master);
3022 	default:
3023 		return -EINVAL;
3024 	}
3025 }
3026 
3027 /*
3028  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
3029  * PCIe link and save the data to memory by DMA. The hardware is restricted to
3030  * use identity mapping only.
3031  */
3032 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
3033 					 (pdev)->device == 0xa12e)
3034 
3035 static int arm_smmu_def_domain_type(struct device *dev)
3036 {
3037 	if (dev_is_pci(dev)) {
3038 		struct pci_dev *pdev = to_pci_dev(dev);
3039 
3040 		if (IS_HISI_PTT_DEVICE(pdev))
3041 			return IOMMU_DOMAIN_IDENTITY;
3042 	}
3043 
3044 	return 0;
3045 }
3046 
3047 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
3048 {
3049 	struct iommu_domain *domain;
3050 
3051 	domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
3052 	if (WARN_ON(IS_ERR(domain)) || !domain)
3053 		return;
3054 
3055 	arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
3056 }
3057 
3058 static struct iommu_ops arm_smmu_ops = {
3059 	.identity_domain	= &arm_smmu_identity_domain,
3060 	.blocked_domain		= &arm_smmu_blocked_domain,
3061 	.capable		= arm_smmu_capable,
3062 	.domain_alloc		= arm_smmu_domain_alloc,
3063 	.domain_alloc_paging    = arm_smmu_domain_alloc_paging,
3064 	.probe_device		= arm_smmu_probe_device,
3065 	.release_device		= arm_smmu_release_device,
3066 	.device_group		= arm_smmu_device_group,
3067 	.of_xlate		= arm_smmu_of_xlate,
3068 	.get_resv_regions	= arm_smmu_get_resv_regions,
3069 	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
3070 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
3071 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
3072 	.page_response		= arm_smmu_page_response,
3073 	.def_domain_type	= arm_smmu_def_domain_type,
3074 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
3075 	.owner			= THIS_MODULE,
3076 	.default_domain_ops = &(const struct iommu_domain_ops) {
3077 		.attach_dev		= arm_smmu_attach_dev,
3078 		.map_pages		= arm_smmu_map_pages,
3079 		.unmap_pages		= arm_smmu_unmap_pages,
3080 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
3081 		.iotlb_sync		= arm_smmu_iotlb_sync,
3082 		.iova_to_phys		= arm_smmu_iova_to_phys,
3083 		.enable_nesting		= arm_smmu_enable_nesting,
3084 		.free			= arm_smmu_domain_free,
3085 	}
3086 };
3087 
3088 /* Probing and initialisation functions */
3089 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3090 				   struct arm_smmu_queue *q,
3091 				   void __iomem *page,
3092 				   unsigned long prod_off,
3093 				   unsigned long cons_off,
3094 				   size_t dwords, const char *name)
3095 {
3096 	size_t qsz;
3097 
3098 	do {
3099 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3100 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3101 					      GFP_KERNEL);
3102 		if (q->base || qsz < PAGE_SIZE)
3103 			break;
3104 
3105 		q->llq.max_n_shift--;
3106 	} while (1);
3107 
3108 	if (!q->base) {
3109 		dev_err(smmu->dev,
3110 			"failed to allocate queue (0x%zx bytes) for %s\n",
3111 			qsz, name);
3112 		return -ENOMEM;
3113 	}
3114 
3115 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
3116 		dev_info(smmu->dev, "allocated %u entries for %s\n",
3117 			 1 << q->llq.max_n_shift, name);
3118 	}
3119 
3120 	q->prod_reg	= page + prod_off;
3121 	q->cons_reg	= page + cons_off;
3122 	q->ent_dwords	= dwords;
3123 
3124 	q->q_base  = Q_BASE_RWA;
3125 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3126 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3127 
3128 	q->llq.prod = q->llq.cons = 0;
3129 	return 0;
3130 }
3131 
3132 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
3133 {
3134 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
3135 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3136 
3137 	atomic_set(&cmdq->owner_prod, 0);
3138 	atomic_set(&cmdq->lock, 0);
3139 
3140 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
3141 							      GFP_KERNEL);
3142 	if (!cmdq->valid_map)
3143 		return -ENOMEM;
3144 
3145 	return 0;
3146 }
3147 
3148 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3149 {
3150 	int ret;
3151 
3152 	/* cmdq */
3153 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
3154 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
3155 				      CMDQ_ENT_DWORDS, "cmdq");
3156 	if (ret)
3157 		return ret;
3158 
3159 	ret = arm_smmu_cmdq_init(smmu);
3160 	if (ret)
3161 		return ret;
3162 
3163 	/* evtq */
3164 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
3165 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
3166 				      EVTQ_ENT_DWORDS, "evtq");
3167 	if (ret)
3168 		return ret;
3169 
3170 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
3171 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
3172 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
3173 		if (!smmu->evtq.iopf)
3174 			return -ENOMEM;
3175 	}
3176 
3177 	/* priq */
3178 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3179 		return 0;
3180 
3181 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
3182 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
3183 				       PRIQ_ENT_DWORDS, "priq");
3184 }
3185 
3186 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
3187 {
3188 	unsigned int i;
3189 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3190 	void *strtab = smmu->strtab_cfg.strtab;
3191 
3192 	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
3193 				    sizeof(*cfg->l1_desc), GFP_KERNEL);
3194 	if (!cfg->l1_desc)
3195 		return -ENOMEM;
3196 
3197 	for (i = 0; i < cfg->num_l1_ents; ++i) {
3198 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3199 		strtab += STRTAB_L1_DESC_DWORDS << 3;
3200 	}
3201 
3202 	return 0;
3203 }
3204 
3205 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3206 {
3207 	void *strtab;
3208 	u64 reg;
3209 	u32 size, l1size;
3210 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3211 
3212 	/* Calculate the L1 size, capped to the SIDSIZE. */
3213 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3214 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3215 	cfg->num_l1_ents = 1 << size;
3216 
3217 	size += STRTAB_SPLIT;
3218 	if (size < smmu->sid_bits)
3219 		dev_warn(smmu->dev,
3220 			 "2-level strtab only covers %u/%u bits of SID\n",
3221 			 size, smmu->sid_bits);
3222 
3223 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3224 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3225 				     GFP_KERNEL);
3226 	if (!strtab) {
3227 		dev_err(smmu->dev,
3228 			"failed to allocate l1 stream table (%u bytes)\n",
3229 			l1size);
3230 		return -ENOMEM;
3231 	}
3232 	cfg->strtab = strtab;
3233 
3234 	/* Configure strtab_base_cfg for 2 levels */
3235 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3236 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3237 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3238 	cfg->strtab_base_cfg = reg;
3239 
3240 	return arm_smmu_init_l1_strtab(smmu);
3241 }
3242 
3243 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3244 {
3245 	void *strtab;
3246 	u64 reg;
3247 	u32 size;
3248 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3249 
3250 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3251 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3252 				     GFP_KERNEL);
3253 	if (!strtab) {
3254 		dev_err(smmu->dev,
3255 			"failed to allocate linear stream table (%u bytes)\n",
3256 			size);
3257 		return -ENOMEM;
3258 	}
3259 	cfg->strtab = strtab;
3260 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3261 
3262 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3263 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3264 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3265 	cfg->strtab_base_cfg = reg;
3266 
3267 	arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents);
3268 	return 0;
3269 }
3270 
3271 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3272 {
3273 	u64 reg;
3274 	int ret;
3275 
3276 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3277 		ret = arm_smmu_init_strtab_2lvl(smmu);
3278 	else
3279 		ret = arm_smmu_init_strtab_linear(smmu);
3280 
3281 	if (ret)
3282 		return ret;
3283 
3284 	/* Set the strtab base address */
3285 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3286 	reg |= STRTAB_BASE_RA;
3287 	smmu->strtab_cfg.strtab_base = reg;
3288 
3289 	ida_init(&smmu->vmid_map);
3290 
3291 	return 0;
3292 }
3293 
3294 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3295 {
3296 	int ret;
3297 
3298 	mutex_init(&smmu->streams_mutex);
3299 	smmu->streams = RB_ROOT;
3300 
3301 	ret = arm_smmu_init_queues(smmu);
3302 	if (ret)
3303 		return ret;
3304 
3305 	return arm_smmu_init_strtab(smmu);
3306 }
3307 
3308 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3309 				   unsigned int reg_off, unsigned int ack_off)
3310 {
3311 	u32 reg;
3312 
3313 	writel_relaxed(val, smmu->base + reg_off);
3314 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3315 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3316 }
3317 
3318 /* GBPA is "special" */
3319 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3320 {
3321 	int ret;
3322 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3323 
3324 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3325 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3326 	if (ret)
3327 		return ret;
3328 
3329 	reg &= ~clr;
3330 	reg |= set;
3331 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3332 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3333 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3334 
3335 	if (ret)
3336 		dev_err(smmu->dev, "GBPA not responding to update\n");
3337 	return ret;
3338 }
3339 
3340 static void arm_smmu_free_msis(void *data)
3341 {
3342 	struct device *dev = data;
3343 
3344 	platform_device_msi_free_irqs_all(dev);
3345 }
3346 
3347 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3348 {
3349 	phys_addr_t doorbell;
3350 	struct device *dev = msi_desc_to_dev(desc);
3351 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3352 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3353 
3354 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3355 	doorbell &= MSI_CFG0_ADDR_MASK;
3356 
3357 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3358 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3359 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3360 }
3361 
3362 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3363 {
3364 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3365 	struct device *dev = smmu->dev;
3366 
3367 	/* Clear the MSI address regs */
3368 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3369 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3370 
3371 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3372 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3373 	else
3374 		nvec--;
3375 
3376 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3377 		return;
3378 
3379 	if (!dev->msi.domain) {
3380 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3381 		return;
3382 	}
3383 
3384 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3385 	ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3386 	if (ret) {
3387 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3388 		return;
3389 	}
3390 
3391 	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3392 	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3393 	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3394 
3395 	/* Add callback to free MSIs on teardown */
3396 	devm_add_action(dev, arm_smmu_free_msis, dev);
3397 }
3398 
3399 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3400 {
3401 	int irq, ret;
3402 
3403 	arm_smmu_setup_msis(smmu);
3404 
3405 	/* Request interrupt lines */
3406 	irq = smmu->evtq.q.irq;
3407 	if (irq) {
3408 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3409 						arm_smmu_evtq_thread,
3410 						IRQF_ONESHOT,
3411 						"arm-smmu-v3-evtq", smmu);
3412 		if (ret < 0)
3413 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3414 	} else {
3415 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3416 	}
3417 
3418 	irq = smmu->gerr_irq;
3419 	if (irq) {
3420 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3421 				       0, "arm-smmu-v3-gerror", smmu);
3422 		if (ret < 0)
3423 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3424 	} else {
3425 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3426 	}
3427 
3428 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3429 		irq = smmu->priq.q.irq;
3430 		if (irq) {
3431 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3432 							arm_smmu_priq_thread,
3433 							IRQF_ONESHOT,
3434 							"arm-smmu-v3-priq",
3435 							smmu);
3436 			if (ret < 0)
3437 				dev_warn(smmu->dev,
3438 					 "failed to enable priq irq\n");
3439 		} else {
3440 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3441 		}
3442 	}
3443 }
3444 
3445 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3446 {
3447 	int ret, irq;
3448 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3449 
3450 	/* Disable IRQs first */
3451 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3452 				      ARM_SMMU_IRQ_CTRLACK);
3453 	if (ret) {
3454 		dev_err(smmu->dev, "failed to disable irqs\n");
3455 		return ret;
3456 	}
3457 
3458 	irq = smmu->combined_irq;
3459 	if (irq) {
3460 		/*
3461 		 * Cavium ThunderX2 implementation doesn't support unique irq
3462 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3463 		 */
3464 		ret = devm_request_threaded_irq(smmu->dev, irq,
3465 					arm_smmu_combined_irq_handler,
3466 					arm_smmu_combined_irq_thread,
3467 					IRQF_ONESHOT,
3468 					"arm-smmu-v3-combined-irq", smmu);
3469 		if (ret < 0)
3470 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3471 	} else
3472 		arm_smmu_setup_unique_irqs(smmu);
3473 
3474 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3475 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3476 
3477 	/* Enable interrupt generation on the SMMU */
3478 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3479 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3480 	if (ret)
3481 		dev_warn(smmu->dev, "failed to enable irqs\n");
3482 
3483 	return 0;
3484 }
3485 
3486 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3487 {
3488 	int ret;
3489 
3490 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3491 	if (ret)
3492 		dev_err(smmu->dev, "failed to clear cr0\n");
3493 
3494 	return ret;
3495 }
3496 
3497 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3498 {
3499 	int ret;
3500 	u32 reg, enables;
3501 	struct arm_smmu_cmdq_ent cmd;
3502 
3503 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3504 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3505 	if (reg & CR0_SMMUEN) {
3506 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3507 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3508 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3509 	}
3510 
3511 	ret = arm_smmu_device_disable(smmu);
3512 	if (ret)
3513 		return ret;
3514 
3515 	/* CR1 (table and queue memory attributes) */
3516 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3517 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3518 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3519 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3520 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3521 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3522 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3523 
3524 	/* CR2 (random crap) */
3525 	reg = CR2_PTM | CR2_RECINVSID;
3526 
3527 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3528 		reg |= CR2_E2H;
3529 
3530 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3531 
3532 	/* Stream table */
3533 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3534 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3535 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3536 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3537 
3538 	/* Command queue */
3539 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3540 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3541 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3542 
3543 	enables = CR0_CMDQEN;
3544 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3545 				      ARM_SMMU_CR0ACK);
3546 	if (ret) {
3547 		dev_err(smmu->dev, "failed to enable command queue\n");
3548 		return ret;
3549 	}
3550 
3551 	/* Invalidate any cached configuration */
3552 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3553 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3554 
3555 	/* Invalidate any stale TLB entries */
3556 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3557 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3558 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3559 	}
3560 
3561 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3562 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3563 
3564 	/* Event queue */
3565 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3566 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3567 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3568 
3569 	enables |= CR0_EVTQEN;
3570 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3571 				      ARM_SMMU_CR0ACK);
3572 	if (ret) {
3573 		dev_err(smmu->dev, "failed to enable event queue\n");
3574 		return ret;
3575 	}
3576 
3577 	/* PRI queue */
3578 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3579 		writeq_relaxed(smmu->priq.q.q_base,
3580 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3581 		writel_relaxed(smmu->priq.q.llq.prod,
3582 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3583 		writel_relaxed(smmu->priq.q.llq.cons,
3584 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3585 
3586 		enables |= CR0_PRIQEN;
3587 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3588 					      ARM_SMMU_CR0ACK);
3589 		if (ret) {
3590 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3591 			return ret;
3592 		}
3593 	}
3594 
3595 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3596 		enables |= CR0_ATSCHK;
3597 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3598 					      ARM_SMMU_CR0ACK);
3599 		if (ret) {
3600 			dev_err(smmu->dev, "failed to enable ATS check\n");
3601 			return ret;
3602 		}
3603 	}
3604 
3605 	ret = arm_smmu_setup_irqs(smmu);
3606 	if (ret) {
3607 		dev_err(smmu->dev, "failed to setup irqs\n");
3608 		return ret;
3609 	}
3610 
3611 	if (is_kdump_kernel())
3612 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3613 
3614 	/* Enable the SMMU interface, or ensure bypass */
3615 	if (!bypass || disable_bypass) {
3616 		enables |= CR0_SMMUEN;
3617 	} else {
3618 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3619 		if (ret)
3620 			return ret;
3621 	}
3622 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3623 				      ARM_SMMU_CR0ACK);
3624 	if (ret) {
3625 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3626 		return ret;
3627 	}
3628 
3629 	return 0;
3630 }
3631 
3632 #define IIDR_IMPLEMENTER_ARM		0x43b
3633 #define IIDR_PRODUCTID_ARM_MMU_600	0x483
3634 #define IIDR_PRODUCTID_ARM_MMU_700	0x487
3635 
3636 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3637 {
3638 	u32 reg;
3639 	unsigned int implementer, productid, variant, revision;
3640 
3641 	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3642 	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3643 	productid = FIELD_GET(IIDR_PRODUCTID, reg);
3644 	variant = FIELD_GET(IIDR_VARIANT, reg);
3645 	revision = FIELD_GET(IIDR_REVISION, reg);
3646 
3647 	switch (implementer) {
3648 	case IIDR_IMPLEMENTER_ARM:
3649 		switch (productid) {
3650 		case IIDR_PRODUCTID_ARM_MMU_600:
3651 			/* Arm erratum 1076982 */
3652 			if (variant == 0 && revision <= 2)
3653 				smmu->features &= ~ARM_SMMU_FEAT_SEV;
3654 			/* Arm erratum 1209401 */
3655 			if (variant < 2)
3656 				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3657 			break;
3658 		case IIDR_PRODUCTID_ARM_MMU_700:
3659 			/* Arm erratum 2812531 */
3660 			smmu->features &= ~ARM_SMMU_FEAT_BTM;
3661 			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3662 			/* Arm errata 2268618, 2812531 */
3663 			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3664 			break;
3665 		}
3666 		break;
3667 	}
3668 }
3669 
3670 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3671 {
3672 	u32 reg;
3673 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3674 
3675 	/* IDR0 */
3676 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3677 
3678 	/* 2-level structures */
3679 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3680 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3681 
3682 	if (reg & IDR0_CD2L)
3683 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3684 
3685 	/*
3686 	 * Translation table endianness.
3687 	 * We currently require the same endianness as the CPU, but this
3688 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3689 	 */
3690 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3691 	case IDR0_TTENDIAN_MIXED:
3692 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3693 		break;
3694 #ifdef __BIG_ENDIAN
3695 	case IDR0_TTENDIAN_BE:
3696 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3697 		break;
3698 #else
3699 	case IDR0_TTENDIAN_LE:
3700 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3701 		break;
3702 #endif
3703 	default:
3704 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3705 		return -ENXIO;
3706 	}
3707 
3708 	/* Boolean feature flags */
3709 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3710 		smmu->features |= ARM_SMMU_FEAT_PRI;
3711 
3712 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3713 		smmu->features |= ARM_SMMU_FEAT_ATS;
3714 
3715 	if (reg & IDR0_SEV)
3716 		smmu->features |= ARM_SMMU_FEAT_SEV;
3717 
3718 	if (reg & IDR0_MSI) {
3719 		smmu->features |= ARM_SMMU_FEAT_MSI;
3720 		if (coherent && !disable_msipolling)
3721 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3722 	}
3723 
3724 	if (reg & IDR0_HYP) {
3725 		smmu->features |= ARM_SMMU_FEAT_HYP;
3726 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3727 			smmu->features |= ARM_SMMU_FEAT_E2H;
3728 	}
3729 
3730 	/*
3731 	 * The coherency feature as set by FW is used in preference to the ID
3732 	 * register, but warn on mismatch.
3733 	 */
3734 	if (!!(reg & IDR0_COHACC) != coherent)
3735 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3736 			 coherent ? "true" : "false");
3737 
3738 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3739 	case IDR0_STALL_MODEL_FORCE:
3740 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3741 		fallthrough;
3742 	case IDR0_STALL_MODEL_STALL:
3743 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3744 	}
3745 
3746 	if (reg & IDR0_S1P)
3747 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3748 
3749 	if (reg & IDR0_S2P)
3750 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3751 
3752 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3753 		dev_err(smmu->dev, "no translation support!\n");
3754 		return -ENXIO;
3755 	}
3756 
3757 	/* We only support the AArch64 table format at present */
3758 	switch (FIELD_GET(IDR0_TTF, reg)) {
3759 	case IDR0_TTF_AARCH32_64:
3760 		smmu->ias = 40;
3761 		fallthrough;
3762 	case IDR0_TTF_AARCH64:
3763 		break;
3764 	default:
3765 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3766 		return -ENXIO;
3767 	}
3768 
3769 	/* ASID/VMID sizes */
3770 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3771 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3772 
3773 	/* IDR1 */
3774 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3775 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3776 		dev_err(smmu->dev, "embedded implementation not supported\n");
3777 		return -ENXIO;
3778 	}
3779 
3780 	/* Queue sizes, capped to ensure natural alignment */
3781 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3782 					     FIELD_GET(IDR1_CMDQS, reg));
3783 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3784 		/*
3785 		 * We don't support splitting up batches, so one batch of
3786 		 * commands plus an extra sync needs to fit inside the command
3787 		 * queue. There's also no way we can handle the weird alignment
3788 		 * restrictions on the base pointer for a unit-length queue.
3789 		 */
3790 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3791 			CMDQ_BATCH_ENTRIES);
3792 		return -ENXIO;
3793 	}
3794 
3795 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3796 					     FIELD_GET(IDR1_EVTQS, reg));
3797 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3798 					     FIELD_GET(IDR1_PRIQS, reg));
3799 
3800 	/* SID/SSID sizes */
3801 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3802 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3803 	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3804 
3805 	/*
3806 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3807 	 * table, use a linear table instead.
3808 	 */
3809 	if (smmu->sid_bits <= STRTAB_SPLIT)
3810 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3811 
3812 	/* IDR3 */
3813 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3814 	if (FIELD_GET(IDR3_RIL, reg))
3815 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3816 
3817 	/* IDR5 */
3818 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3819 
3820 	/* Maximum number of outstanding stalls */
3821 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3822 
3823 	/* Page sizes */
3824 	if (reg & IDR5_GRAN64K)
3825 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3826 	if (reg & IDR5_GRAN16K)
3827 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3828 	if (reg & IDR5_GRAN4K)
3829 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3830 
3831 	/* Input address size */
3832 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3833 		smmu->features |= ARM_SMMU_FEAT_VAX;
3834 
3835 	/* Output address size */
3836 	switch (FIELD_GET(IDR5_OAS, reg)) {
3837 	case IDR5_OAS_32_BIT:
3838 		smmu->oas = 32;
3839 		break;
3840 	case IDR5_OAS_36_BIT:
3841 		smmu->oas = 36;
3842 		break;
3843 	case IDR5_OAS_40_BIT:
3844 		smmu->oas = 40;
3845 		break;
3846 	case IDR5_OAS_42_BIT:
3847 		smmu->oas = 42;
3848 		break;
3849 	case IDR5_OAS_44_BIT:
3850 		smmu->oas = 44;
3851 		break;
3852 	case IDR5_OAS_52_BIT:
3853 		smmu->oas = 52;
3854 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3855 		break;
3856 	default:
3857 		dev_info(smmu->dev,
3858 			"unknown output address size. Truncating to 48-bit\n");
3859 		fallthrough;
3860 	case IDR5_OAS_48_BIT:
3861 		smmu->oas = 48;
3862 	}
3863 
3864 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3865 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3866 	else
3867 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3868 
3869 	/* Set the DMA mask for our table walker */
3870 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3871 		dev_warn(smmu->dev,
3872 			 "failed to set DMA mask for table walker\n");
3873 
3874 	smmu->ias = max(smmu->ias, smmu->oas);
3875 
3876 	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3877 	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3878 		smmu->features |= ARM_SMMU_FEAT_NESTING;
3879 
3880 	arm_smmu_device_iidr_probe(smmu);
3881 
3882 	if (arm_smmu_sva_supported(smmu))
3883 		smmu->features |= ARM_SMMU_FEAT_SVA;
3884 
3885 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3886 		 smmu->ias, smmu->oas, smmu->features);
3887 	return 0;
3888 }
3889 
3890 #ifdef CONFIG_ACPI
3891 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3892 {
3893 	switch (model) {
3894 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3895 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3896 		break;
3897 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3898 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3899 		break;
3900 	}
3901 
3902 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3903 }
3904 
3905 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3906 				      struct arm_smmu_device *smmu)
3907 {
3908 	struct acpi_iort_smmu_v3 *iort_smmu;
3909 	struct device *dev = smmu->dev;
3910 	struct acpi_iort_node *node;
3911 
3912 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3913 
3914 	/* Retrieve SMMUv3 specific data */
3915 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3916 
3917 	acpi_smmu_get_options(iort_smmu->model, smmu);
3918 
3919 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3920 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3921 
3922 	return 0;
3923 }
3924 #else
3925 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3926 					     struct arm_smmu_device *smmu)
3927 {
3928 	return -ENODEV;
3929 }
3930 #endif
3931 
3932 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3933 				    struct arm_smmu_device *smmu)
3934 {
3935 	struct device *dev = &pdev->dev;
3936 	u32 cells;
3937 	int ret = -EINVAL;
3938 
3939 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3940 		dev_err(dev, "missing #iommu-cells property\n");
3941 	else if (cells != 1)
3942 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3943 	else
3944 		ret = 0;
3945 
3946 	parse_driver_options(smmu);
3947 
3948 	if (of_dma_is_coherent(dev->of_node))
3949 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3950 
3951 	return ret;
3952 }
3953 
3954 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3955 {
3956 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3957 		return SZ_64K;
3958 	else
3959 		return SZ_128K;
3960 }
3961 
3962 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3963 				      resource_size_t size)
3964 {
3965 	struct resource res = DEFINE_RES_MEM(start, size);
3966 
3967 	return devm_ioremap_resource(dev, &res);
3968 }
3969 
3970 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3971 {
3972 	struct list_head rmr_list;
3973 	struct iommu_resv_region *e;
3974 
3975 	INIT_LIST_HEAD(&rmr_list);
3976 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3977 
3978 	list_for_each_entry(e, &rmr_list, list) {
3979 		struct iommu_iort_rmr_data *rmr;
3980 		int ret, i;
3981 
3982 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3983 		for (i = 0; i < rmr->num_sids; i++) {
3984 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3985 			if (ret) {
3986 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3987 					rmr->sids[i]);
3988 				continue;
3989 			}
3990 
3991 			/*
3992 			 * STE table is not programmed to HW, see
3993 			 * arm_smmu_initial_bypass_stes()
3994 			 */
3995 			arm_smmu_make_bypass_ste(
3996 				arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
3997 		}
3998 	}
3999 
4000 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4001 }
4002 
4003 static int arm_smmu_device_probe(struct platform_device *pdev)
4004 {
4005 	int irq, ret;
4006 	struct resource *res;
4007 	resource_size_t ioaddr;
4008 	struct arm_smmu_device *smmu;
4009 	struct device *dev = &pdev->dev;
4010 	bool bypass;
4011 
4012 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4013 	if (!smmu)
4014 		return -ENOMEM;
4015 	smmu->dev = dev;
4016 
4017 	if (dev->of_node) {
4018 		ret = arm_smmu_device_dt_probe(pdev, smmu);
4019 	} else {
4020 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
4021 		if (ret == -ENODEV)
4022 			return ret;
4023 	}
4024 
4025 	/* Set bypass mode according to firmware probing result */
4026 	bypass = !!ret;
4027 
4028 	/* Base address */
4029 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4030 	if (!res)
4031 		return -EINVAL;
4032 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4033 		dev_err(dev, "MMIO region too small (%pr)\n", res);
4034 		return -EINVAL;
4035 	}
4036 	ioaddr = res->start;
4037 
4038 	/*
4039 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4040 	 * the PMCG registers which are reserved by the PMU driver.
4041 	 */
4042 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4043 	if (IS_ERR(smmu->base))
4044 		return PTR_ERR(smmu->base);
4045 
4046 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
4047 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4048 					       ARM_SMMU_REG_SZ);
4049 		if (IS_ERR(smmu->page1))
4050 			return PTR_ERR(smmu->page1);
4051 	} else {
4052 		smmu->page1 = smmu->base;
4053 	}
4054 
4055 	/* Interrupt lines */
4056 
4057 	irq = platform_get_irq_byname_optional(pdev, "combined");
4058 	if (irq > 0)
4059 		smmu->combined_irq = irq;
4060 	else {
4061 		irq = platform_get_irq_byname_optional(pdev, "eventq");
4062 		if (irq > 0)
4063 			smmu->evtq.q.irq = irq;
4064 
4065 		irq = platform_get_irq_byname_optional(pdev, "priq");
4066 		if (irq > 0)
4067 			smmu->priq.q.irq = irq;
4068 
4069 		irq = platform_get_irq_byname_optional(pdev, "gerror");
4070 		if (irq > 0)
4071 			smmu->gerr_irq = irq;
4072 	}
4073 	/* Probe the h/w */
4074 	ret = arm_smmu_device_hw_probe(smmu);
4075 	if (ret)
4076 		return ret;
4077 
4078 	/* Initialise in-memory data structures */
4079 	ret = arm_smmu_init_structures(smmu);
4080 	if (ret)
4081 		return ret;
4082 
4083 	/* Record our private device structure */
4084 	platform_set_drvdata(pdev, smmu);
4085 
4086 	/* Check for RMRs and install bypass STEs if any */
4087 	arm_smmu_rmr_install_bypass_ste(smmu);
4088 
4089 	/* Reset the device */
4090 	ret = arm_smmu_device_reset(smmu, bypass);
4091 	if (ret)
4092 		return ret;
4093 
4094 	/* And we're up. Go go go! */
4095 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4096 				     "smmu3.%pa", &ioaddr);
4097 	if (ret)
4098 		return ret;
4099 
4100 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
4101 	if (ret) {
4102 		dev_err(dev, "Failed to register iommu\n");
4103 		iommu_device_sysfs_remove(&smmu->iommu);
4104 		return ret;
4105 	}
4106 
4107 	return 0;
4108 }
4109 
4110 static void arm_smmu_device_remove(struct platform_device *pdev)
4111 {
4112 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4113 
4114 	iommu_device_unregister(&smmu->iommu);
4115 	iommu_device_sysfs_remove(&smmu->iommu);
4116 	arm_smmu_device_disable(smmu);
4117 	iopf_queue_free(smmu->evtq.iopf);
4118 	ida_destroy(&smmu->vmid_map);
4119 }
4120 
4121 static void arm_smmu_device_shutdown(struct platform_device *pdev)
4122 {
4123 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4124 
4125 	arm_smmu_device_disable(smmu);
4126 }
4127 
4128 static const struct of_device_id arm_smmu_of_match[] = {
4129 	{ .compatible = "arm,smmu-v3", },
4130 	{ },
4131 };
4132 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4133 
4134 static void arm_smmu_driver_unregister(struct platform_driver *drv)
4135 {
4136 	arm_smmu_sva_notifier_synchronize();
4137 	platform_driver_unregister(drv);
4138 }
4139 
4140 static struct platform_driver arm_smmu_driver = {
4141 	.driver	= {
4142 		.name			= "arm-smmu-v3",
4143 		.of_match_table		= arm_smmu_of_match,
4144 		.suppress_bind_attrs	= true,
4145 	},
4146 	.probe	= arm_smmu_device_probe,
4147 	.remove_new = arm_smmu_device_remove,
4148 	.shutdown = arm_smmu_device_shutdown,
4149 };
4150 module_driver(arm_smmu_driver, platform_driver_register,
4151 	      arm_smmu_driver_unregister);
4152 
4153 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4154 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4155 MODULE_ALIAS("platform:arm-smmu-v3");
4156 MODULE_LICENSE("GPL v2");
4157