xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 7255fcc80d4b525cc10cfaaf7f485830d4ed2000)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 
30 #include "arm-smmu-v3.h"
31 #include "../../dma-iommu.h"
32 
33 static bool disable_bypass = true;
34 module_param(disable_bypass, bool, 0444);
35 MODULE_PARM_DESC(disable_bypass,
36 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
37 
38 static bool disable_msipolling;
39 module_param(disable_msipolling, bool, 0444);
40 MODULE_PARM_DESC(disable_msipolling,
41 	"Disable MSI-based polling for CMD_SYNC completion.");
42 
43 enum arm_smmu_msi_index {
44 	EVTQ_MSI_INDEX,
45 	GERROR_MSI_INDEX,
46 	PRIQ_MSI_INDEX,
47 	ARM_SMMU_MAX_MSIS,
48 };
49 
50 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu,
51 				      ioasid_t sid);
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84 
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 	{ 0, NULL},
89 };
90 
91 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
92 				    struct arm_smmu_device *smmu);
93 
94 static void parse_driver_options(struct arm_smmu_device *smmu)
95 {
96 	int i = 0;
97 
98 	do {
99 		if (of_property_read_bool(smmu->dev->of_node,
100 						arm_smmu_options[i].prop)) {
101 			smmu->options |= arm_smmu_options[i].opt;
102 			dev_notice(smmu->dev, "option %s\n",
103 				arm_smmu_options[i].prop);
104 		}
105 	} while (arm_smmu_options[++i].opt);
106 }
107 
108 /* Low-level queue manipulation functions */
109 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
110 {
111 	u32 space, prod, cons;
112 
113 	prod = Q_IDX(q, q->prod);
114 	cons = Q_IDX(q, q->cons);
115 
116 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
117 		space = (1 << q->max_n_shift) - (prod - cons);
118 	else
119 		space = cons - prod;
120 
121 	return space >= n;
122 }
123 
124 static bool queue_full(struct arm_smmu_ll_queue *q)
125 {
126 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
127 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
128 }
129 
130 static bool queue_empty(struct arm_smmu_ll_queue *q)
131 {
132 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
133 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
134 }
135 
136 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
137 {
138 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
139 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
140 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
141 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
142 }
143 
144 static void queue_sync_cons_out(struct arm_smmu_queue *q)
145 {
146 	/*
147 	 * Ensure that all CPU accesses (reads and writes) to the queue
148 	 * are complete before we update the cons pointer.
149 	 */
150 	__iomb();
151 	writel_relaxed(q->llq.cons, q->cons_reg);
152 }
153 
154 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
155 {
156 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
157 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
158 }
159 
160 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
161 {
162 	struct arm_smmu_ll_queue *llq = &q->llq;
163 
164 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
165 		return;
166 
167 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
168 		      Q_IDX(llq, llq->cons);
169 	queue_sync_cons_out(q);
170 }
171 
172 static int queue_sync_prod_in(struct arm_smmu_queue *q)
173 {
174 	u32 prod;
175 	int ret = 0;
176 
177 	/*
178 	 * We can't use the _relaxed() variant here, as we must prevent
179 	 * speculative reads of the queue before we have determined that
180 	 * prod has indeed moved.
181 	 */
182 	prod = readl(q->prod_reg);
183 
184 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
185 		ret = -EOVERFLOW;
186 
187 	q->llq.prod = prod;
188 	return ret;
189 }
190 
191 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
192 {
193 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
194 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
195 }
196 
197 static void queue_poll_init(struct arm_smmu_device *smmu,
198 			    struct arm_smmu_queue_poll *qp)
199 {
200 	qp->delay = 1;
201 	qp->spin_cnt = 0;
202 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
203 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
204 }
205 
206 static int queue_poll(struct arm_smmu_queue_poll *qp)
207 {
208 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
209 		return -ETIMEDOUT;
210 
211 	if (qp->wfe) {
212 		wfe();
213 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
214 		cpu_relax();
215 	} else {
216 		udelay(qp->delay);
217 		qp->delay *= 2;
218 		qp->spin_cnt = 0;
219 	}
220 
221 	return 0;
222 }
223 
224 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
225 {
226 	int i;
227 
228 	for (i = 0; i < n_dwords; ++i)
229 		*dst++ = cpu_to_le64(*src++);
230 }
231 
232 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
233 {
234 	int i;
235 
236 	for (i = 0; i < n_dwords; ++i)
237 		*dst++ = le64_to_cpu(*src++);
238 }
239 
240 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
241 {
242 	if (queue_empty(&q->llq))
243 		return -EAGAIN;
244 
245 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
246 	queue_inc_cons(&q->llq);
247 	queue_sync_cons_out(q);
248 	return 0;
249 }
250 
251 /* High-level queue accessors */
252 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
253 {
254 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
255 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
256 
257 	switch (ent->opcode) {
258 	case CMDQ_OP_TLBI_EL2_ALL:
259 	case CMDQ_OP_TLBI_NSNH_ALL:
260 		break;
261 	case CMDQ_OP_PREFETCH_CFG:
262 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
263 		break;
264 	case CMDQ_OP_CFGI_CD:
265 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
266 		fallthrough;
267 	case CMDQ_OP_CFGI_STE:
268 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
269 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
270 		break;
271 	case CMDQ_OP_CFGI_CD_ALL:
272 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
273 		break;
274 	case CMDQ_OP_CFGI_ALL:
275 		/* Cover the entire SID range */
276 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
277 		break;
278 	case CMDQ_OP_TLBI_NH_VA:
279 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
280 		fallthrough;
281 	case CMDQ_OP_TLBI_EL2_VA:
282 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
283 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
284 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
285 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
286 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
287 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
288 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
289 		break;
290 	case CMDQ_OP_TLBI_S2_IPA:
291 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
292 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
293 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
294 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
295 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
296 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
297 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
298 		break;
299 	case CMDQ_OP_TLBI_NH_ASID:
300 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
301 		fallthrough;
302 	case CMDQ_OP_TLBI_S12_VMALL:
303 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
304 		break;
305 	case CMDQ_OP_TLBI_EL2_ASID:
306 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
307 		break;
308 	case CMDQ_OP_ATC_INV:
309 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
310 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
311 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
312 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
313 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
314 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
315 		break;
316 	case CMDQ_OP_PRI_RESP:
317 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
318 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
319 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
320 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
321 		switch (ent->pri.resp) {
322 		case PRI_RESP_DENY:
323 		case PRI_RESP_FAIL:
324 		case PRI_RESP_SUCC:
325 			break;
326 		default:
327 			return -EINVAL;
328 		}
329 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
330 		break;
331 	case CMDQ_OP_RESUME:
332 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
333 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
334 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
335 		break;
336 	case CMDQ_OP_CMD_SYNC:
337 		if (ent->sync.msiaddr) {
338 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
339 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
340 		} else {
341 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
342 		}
343 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
344 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
345 		break;
346 	default:
347 		return -ENOENT;
348 	}
349 
350 	return 0;
351 }
352 
353 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
354 {
355 	return &smmu->cmdq;
356 }
357 
358 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
359 					 struct arm_smmu_queue *q, u32 prod)
360 {
361 	struct arm_smmu_cmdq_ent ent = {
362 		.opcode = CMDQ_OP_CMD_SYNC,
363 	};
364 
365 	/*
366 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
367 	 * payload, so the write will zero the entire command on that platform.
368 	 */
369 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
370 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
371 				   q->ent_dwords * 8;
372 	}
373 
374 	arm_smmu_cmdq_build_cmd(cmd, &ent);
375 }
376 
377 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
378 				     struct arm_smmu_queue *q)
379 {
380 	static const char * const cerror_str[] = {
381 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
382 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
383 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
384 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
385 	};
386 
387 	int i;
388 	u64 cmd[CMDQ_ENT_DWORDS];
389 	u32 cons = readl_relaxed(q->cons_reg);
390 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
391 	struct arm_smmu_cmdq_ent cmd_sync = {
392 		.opcode = CMDQ_OP_CMD_SYNC,
393 	};
394 
395 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
396 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
397 
398 	switch (idx) {
399 	case CMDQ_ERR_CERROR_ABT_IDX:
400 		dev_err(smmu->dev, "retrying command fetch\n");
401 		return;
402 	case CMDQ_ERR_CERROR_NONE_IDX:
403 		return;
404 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
405 		/*
406 		 * ATC Invalidation Completion timeout. CONS is still pointing
407 		 * at the CMD_SYNC. Attempt to complete other pending commands
408 		 * by repeating the CMD_SYNC, though we might well end up back
409 		 * here since the ATC invalidation may still be pending.
410 		 */
411 		return;
412 	case CMDQ_ERR_CERROR_ILL_IDX:
413 	default:
414 		break;
415 	}
416 
417 	/*
418 	 * We may have concurrent producers, so we need to be careful
419 	 * not to touch any of the shadow cmdq state.
420 	 */
421 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
422 	dev_err(smmu->dev, "skipping command in error state:\n");
423 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
424 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
425 
426 	/* Convert the erroneous command into a CMD_SYNC */
427 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
428 
429 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
430 }
431 
432 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
433 {
434 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
435 }
436 
437 /*
438  * Command queue locking.
439  * This is a form of bastardised rwlock with the following major changes:
440  *
441  * - The only LOCK routines are exclusive_trylock() and shared_lock().
442  *   Neither have barrier semantics, and instead provide only a control
443  *   dependency.
444  *
445  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
446  *   fails if the caller appears to be the last lock holder (yes, this is
447  *   racy). All successful UNLOCK routines have RELEASE semantics.
448  */
449 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
450 {
451 	int val;
452 
453 	/*
454 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
455 	 * lock counter. When held in exclusive state, the lock counter is set
456 	 * to INT_MIN so these increments won't hurt as the value will remain
457 	 * negative.
458 	 */
459 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
460 		return;
461 
462 	do {
463 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
464 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
465 }
466 
467 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
468 {
469 	(void)atomic_dec_return_release(&cmdq->lock);
470 }
471 
472 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
473 {
474 	if (atomic_read(&cmdq->lock) == 1)
475 		return false;
476 
477 	arm_smmu_cmdq_shared_unlock(cmdq);
478 	return true;
479 }
480 
481 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
482 ({									\
483 	bool __ret;							\
484 	local_irq_save(flags);						\
485 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
486 	if (!__ret)							\
487 		local_irq_restore(flags);				\
488 	__ret;								\
489 })
490 
491 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
492 ({									\
493 	atomic_set_release(&cmdq->lock, 0);				\
494 	local_irq_restore(flags);					\
495 })
496 
497 
498 /*
499  * Command queue insertion.
500  * This is made fiddly by our attempts to achieve some sort of scalability
501  * since there is one queue shared amongst all of the CPUs in the system.  If
502  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
503  * then you'll *love* this monstrosity.
504  *
505  * The basic idea is to split the queue up into ranges of commands that are
506  * owned by a given CPU; the owner may not have written all of the commands
507  * itself, but is responsible for advancing the hardware prod pointer when
508  * the time comes. The algorithm is roughly:
509  *
510  * 	1. Allocate some space in the queue. At this point we also discover
511  *	   whether the head of the queue is currently owned by another CPU,
512  *	   or whether we are the owner.
513  *
514  *	2. Write our commands into our allocated slots in the queue.
515  *
516  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
517  *
518  *	4. If we are an owner:
519  *		a. Wait for the previous owner to finish.
520  *		b. Mark the queue head as unowned, which tells us the range
521  *		   that we are responsible for publishing.
522  *		c. Wait for all commands in our owned range to become valid.
523  *		d. Advance the hardware prod pointer.
524  *		e. Tell the next owner we've finished.
525  *
526  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
527  *	   owner), then we need to stick around until it has completed:
528  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
529  *		   to clear the first 4 bytes.
530  *		b. Otherwise, we spin waiting for the hardware cons pointer to
531  *		   advance past our command.
532  *
533  * The devil is in the details, particularly the use of locking for handling
534  * SYNC completion and freeing up space in the queue before we think that it is
535  * full.
536  */
537 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
538 					       u32 sprod, u32 eprod, bool set)
539 {
540 	u32 swidx, sbidx, ewidx, ebidx;
541 	struct arm_smmu_ll_queue llq = {
542 		.max_n_shift	= cmdq->q.llq.max_n_shift,
543 		.prod		= sprod,
544 	};
545 
546 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
547 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
548 
549 	while (llq.prod != eprod) {
550 		unsigned long mask;
551 		atomic_long_t *ptr;
552 		u32 limit = BITS_PER_LONG;
553 
554 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
555 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
556 
557 		ptr = &cmdq->valid_map[swidx];
558 
559 		if ((swidx == ewidx) && (sbidx < ebidx))
560 			limit = ebidx;
561 
562 		mask = GENMASK(limit - 1, sbidx);
563 
564 		/*
565 		 * The valid bit is the inverse of the wrap bit. This means
566 		 * that a zero-initialised queue is invalid and, after marking
567 		 * all entries as valid, they become invalid again when we
568 		 * wrap.
569 		 */
570 		if (set) {
571 			atomic_long_xor(mask, ptr);
572 		} else { /* Poll */
573 			unsigned long valid;
574 
575 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
576 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
577 		}
578 
579 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
580 	}
581 }
582 
583 /* Mark all entries in the range [sprod, eprod) as valid */
584 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
585 					u32 sprod, u32 eprod)
586 {
587 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
588 }
589 
590 /* Wait for all entries in the range [sprod, eprod) to become valid */
591 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
592 					 u32 sprod, u32 eprod)
593 {
594 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
595 }
596 
597 /* Wait for the command queue to become non-full */
598 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
599 					     struct arm_smmu_ll_queue *llq)
600 {
601 	unsigned long flags;
602 	struct arm_smmu_queue_poll qp;
603 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
604 	int ret = 0;
605 
606 	/*
607 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
608 	 * that fails, spin until somebody else updates it for us.
609 	 */
610 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
611 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
612 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
613 		llq->val = READ_ONCE(cmdq->q.llq.val);
614 		return 0;
615 	}
616 
617 	queue_poll_init(smmu, &qp);
618 	do {
619 		llq->val = READ_ONCE(cmdq->q.llq.val);
620 		if (!queue_full(llq))
621 			break;
622 
623 		ret = queue_poll(&qp);
624 	} while (!ret);
625 
626 	return ret;
627 }
628 
629 /*
630  * Wait until the SMMU signals a CMD_SYNC completion MSI.
631  * Must be called with the cmdq lock held in some capacity.
632  */
633 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
634 					  struct arm_smmu_ll_queue *llq)
635 {
636 	int ret = 0;
637 	struct arm_smmu_queue_poll qp;
638 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
639 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
640 
641 	queue_poll_init(smmu, &qp);
642 
643 	/*
644 	 * The MSI won't generate an event, since it's being written back
645 	 * into the command queue.
646 	 */
647 	qp.wfe = false;
648 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
649 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
650 	return ret;
651 }
652 
653 /*
654  * Wait until the SMMU cons index passes llq->prod.
655  * Must be called with the cmdq lock held in some capacity.
656  */
657 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
658 					       struct arm_smmu_ll_queue *llq)
659 {
660 	struct arm_smmu_queue_poll qp;
661 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
662 	u32 prod = llq->prod;
663 	int ret = 0;
664 
665 	queue_poll_init(smmu, &qp);
666 	llq->val = READ_ONCE(cmdq->q.llq.val);
667 	do {
668 		if (queue_consumed(llq, prod))
669 			break;
670 
671 		ret = queue_poll(&qp);
672 
673 		/*
674 		 * This needs to be a readl() so that our subsequent call
675 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
676 		 *
677 		 * Specifically, we need to ensure that we observe all
678 		 * shared_lock()s by other CMD_SYNCs that share our owner,
679 		 * so that a failing call to tryunlock() means that we're
680 		 * the last one out and therefore we can safely advance
681 		 * cmdq->q.llq.cons. Roughly speaking:
682 		 *
683 		 * CPU 0		CPU1			CPU2 (us)
684 		 *
685 		 * if (sync)
686 		 * 	shared_lock();
687 		 *
688 		 * dma_wmb();
689 		 * set_valid_map();
690 		 *
691 		 * 			if (owner) {
692 		 *				poll_valid_map();
693 		 *				<control dependency>
694 		 *				writel(prod_reg);
695 		 *
696 		 *						readl(cons_reg);
697 		 *						tryunlock();
698 		 *
699 		 * Requires us to see CPU 0's shared_lock() acquisition.
700 		 */
701 		llq->cons = readl(cmdq->q.cons_reg);
702 	} while (!ret);
703 
704 	return ret;
705 }
706 
707 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
708 					 struct arm_smmu_ll_queue *llq)
709 {
710 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
711 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
712 
713 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
714 }
715 
716 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
717 					u32 prod, int n)
718 {
719 	int i;
720 	struct arm_smmu_ll_queue llq = {
721 		.max_n_shift	= cmdq->q.llq.max_n_shift,
722 		.prod		= prod,
723 	};
724 
725 	for (i = 0; i < n; ++i) {
726 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
727 
728 		prod = queue_inc_prod_n(&llq, i);
729 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
730 	}
731 }
732 
733 /*
734  * This is the actual insertion function, and provides the following
735  * ordering guarantees to callers:
736  *
737  * - There is a dma_wmb() before publishing any commands to the queue.
738  *   This can be relied upon to order prior writes to data structures
739  *   in memory (such as a CD or an STE) before the command.
740  *
741  * - On completion of a CMD_SYNC, there is a control dependency.
742  *   This can be relied upon to order subsequent writes to memory (e.g.
743  *   freeing an IOVA) after completion of the CMD_SYNC.
744  *
745  * - Command insertion is totally ordered, so if two CPUs each race to
746  *   insert their own list of commands then all of the commands from one
747  *   CPU will appear before any of the commands from the other CPU.
748  */
749 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
750 				       u64 *cmds, int n, bool sync)
751 {
752 	u64 cmd_sync[CMDQ_ENT_DWORDS];
753 	u32 prod;
754 	unsigned long flags;
755 	bool owner;
756 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
757 	struct arm_smmu_ll_queue llq, head;
758 	int ret = 0;
759 
760 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
761 
762 	/* 1. Allocate some space in the queue */
763 	local_irq_save(flags);
764 	llq.val = READ_ONCE(cmdq->q.llq.val);
765 	do {
766 		u64 old;
767 
768 		while (!queue_has_space(&llq, n + sync)) {
769 			local_irq_restore(flags);
770 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
771 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
772 			local_irq_save(flags);
773 		}
774 
775 		head.cons = llq.cons;
776 		head.prod = queue_inc_prod_n(&llq, n + sync) |
777 					     CMDQ_PROD_OWNED_FLAG;
778 
779 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
780 		if (old == llq.val)
781 			break;
782 
783 		llq.val = old;
784 	} while (1);
785 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
786 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
787 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
788 
789 	/*
790 	 * 2. Write our commands into the queue
791 	 * Dependency ordering from the cmpxchg() loop above.
792 	 */
793 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
794 	if (sync) {
795 		prod = queue_inc_prod_n(&llq, n);
796 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
797 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
798 
799 		/*
800 		 * In order to determine completion of our CMD_SYNC, we must
801 		 * ensure that the queue can't wrap twice without us noticing.
802 		 * We achieve that by taking the cmdq lock as shared before
803 		 * marking our slot as valid.
804 		 */
805 		arm_smmu_cmdq_shared_lock(cmdq);
806 	}
807 
808 	/* 3. Mark our slots as valid, ensuring commands are visible first */
809 	dma_wmb();
810 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
811 
812 	/* 4. If we are the owner, take control of the SMMU hardware */
813 	if (owner) {
814 		/* a. Wait for previous owner to finish */
815 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
816 
817 		/* b. Stop gathering work by clearing the owned flag */
818 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
819 						   &cmdq->q.llq.atomic.prod);
820 		prod &= ~CMDQ_PROD_OWNED_FLAG;
821 
822 		/*
823 		 * c. Wait for any gathered work to be written to the queue.
824 		 * Note that we read our own entries so that we have the control
825 		 * dependency required by (d).
826 		 */
827 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
828 
829 		/*
830 		 * d. Advance the hardware prod pointer
831 		 * Control dependency ordering from the entries becoming valid.
832 		 */
833 		writel_relaxed(prod, cmdq->q.prod_reg);
834 
835 		/*
836 		 * e. Tell the next owner we're done
837 		 * Make sure we've updated the hardware first, so that we don't
838 		 * race to update prod and potentially move it backwards.
839 		 */
840 		atomic_set_release(&cmdq->owner_prod, prod);
841 	}
842 
843 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
844 	if (sync) {
845 		llq.prod = queue_inc_prod_n(&llq, n);
846 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
847 		if (ret) {
848 			dev_err_ratelimited(smmu->dev,
849 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
850 					    llq.prod,
851 					    readl_relaxed(cmdq->q.prod_reg),
852 					    readl_relaxed(cmdq->q.cons_reg));
853 		}
854 
855 		/*
856 		 * Try to unlock the cmdq lock. This will fail if we're the last
857 		 * reader, in which case we can safely update cmdq->q.llq.cons
858 		 */
859 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
860 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
861 			arm_smmu_cmdq_shared_unlock(cmdq);
862 		}
863 	}
864 
865 	local_irq_restore(flags);
866 	return ret;
867 }
868 
869 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
870 				     struct arm_smmu_cmdq_ent *ent,
871 				     bool sync)
872 {
873 	u64 cmd[CMDQ_ENT_DWORDS];
874 
875 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
876 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
877 			 ent->opcode);
878 		return -EINVAL;
879 	}
880 
881 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
882 }
883 
884 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
885 				   struct arm_smmu_cmdq_ent *ent)
886 {
887 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
888 }
889 
890 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
891 					     struct arm_smmu_cmdq_ent *ent)
892 {
893 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
894 }
895 
896 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
897 				    struct arm_smmu_cmdq_batch *cmds,
898 				    struct arm_smmu_cmdq_ent *cmd)
899 {
900 	int index;
901 
902 	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
903 	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
904 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
905 		cmds->num = 0;
906 	}
907 
908 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
909 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
910 		cmds->num = 0;
911 	}
912 
913 	index = cmds->num * CMDQ_ENT_DWORDS;
914 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
915 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
916 			 cmd->opcode);
917 		return;
918 	}
919 
920 	cmds->num++;
921 }
922 
923 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
924 				      struct arm_smmu_cmdq_batch *cmds)
925 {
926 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
927 }
928 
929 static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
930 				   struct iommu_page_response *resp)
931 {
932 	struct arm_smmu_cmdq_ent cmd = {0};
933 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
934 	int sid = master->streams[0].id;
935 
936 	if (WARN_ON(!master->stall_enabled))
937 		return;
938 
939 	cmd.opcode		= CMDQ_OP_RESUME;
940 	cmd.resume.sid		= sid;
941 	cmd.resume.stag		= resp->grpid;
942 	switch (resp->code) {
943 	case IOMMU_PAGE_RESP_INVALID:
944 	case IOMMU_PAGE_RESP_FAILURE:
945 		cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
946 		break;
947 	case IOMMU_PAGE_RESP_SUCCESS:
948 		cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
949 		break;
950 	default:
951 		break;
952 	}
953 
954 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
955 	/*
956 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
957 	 * RESUME consumption guarantees that the stalled transaction will be
958 	 * terminated... at some point in the future. PRI_RESP is fire and
959 	 * forget.
960 	 */
961 }
962 
963 /* Context descriptor manipulation functions */
964 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
965 {
966 	struct arm_smmu_cmdq_ent cmd = {
967 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
968 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
969 		.tlbi.asid = asid,
970 	};
971 
972 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
973 }
974 
975 /*
976  * Based on the value of ent report which bits of the STE the HW will access. It
977  * would be nice if this was complete according to the spec, but minimally it
978  * has to capture the bits this driver uses.
979  */
980 static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent,
981 				  struct arm_smmu_ste *used_bits)
982 {
983 	unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent->data[0]));
984 
985 	used_bits->data[0] = cpu_to_le64(STRTAB_STE_0_V);
986 	if (!(ent->data[0] & cpu_to_le64(STRTAB_STE_0_V)))
987 		return;
988 
989 	used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
990 
991 	/* S1 translates */
992 	if (cfg & BIT(0)) {
993 		used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
994 						  STRTAB_STE_0_S1CTXPTR_MASK |
995 						  STRTAB_STE_0_S1CDMAX);
996 		used_bits->data[1] |=
997 			cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
998 				    STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
999 				    STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
1000 				    STRTAB_STE_1_EATS);
1001 		used_bits->data[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
1002 	}
1003 
1004 	/* S2 translates */
1005 	if (cfg & BIT(1)) {
1006 		used_bits->data[1] |=
1007 			cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
1008 		used_bits->data[2] |=
1009 			cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
1010 				    STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
1011 				    STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R);
1012 		used_bits->data[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
1013 	}
1014 
1015 	if (cfg == STRTAB_STE_0_CFG_BYPASS)
1016 		used_bits->data[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1017 }
1018 
1019 /*
1020  * Figure out if we can do a hitless update of entry to become target. Returns a
1021  * bit mask where 1 indicates that qword needs to be set disruptively.
1022  * unused_update is an intermediate value of entry that has unused bits set to
1023  * their new values.
1024  */
1025 static u8 arm_smmu_entry_qword_diff(const struct arm_smmu_ste *entry,
1026 				    const struct arm_smmu_ste *target,
1027 				    struct arm_smmu_ste *unused_update)
1028 {
1029 	struct arm_smmu_ste target_used = {};
1030 	struct arm_smmu_ste cur_used = {};
1031 	u8 used_qword_diff = 0;
1032 	unsigned int i;
1033 
1034 	arm_smmu_get_ste_used(entry, &cur_used);
1035 	arm_smmu_get_ste_used(target, &target_used);
1036 
1037 	for (i = 0; i != ARRAY_SIZE(target_used.data); i++) {
1038 		/*
1039 		 * Check that masks are up to date, the make functions are not
1040 		 * allowed to set a bit to 1 if the used function doesn't say it
1041 		 * is used.
1042 		 */
1043 		WARN_ON_ONCE(target->data[i] & ~target_used.data[i]);
1044 
1045 		/* Bits can change because they are not currently being used */
1046 		unused_update->data[i] = (entry->data[i] & cur_used.data[i]) |
1047 					 (target->data[i] & ~cur_used.data[i]);
1048 		/*
1049 		 * Each bit indicates that a used bit in a qword needs to be
1050 		 * changed after unused_update is applied.
1051 		 */
1052 		if ((unused_update->data[i] & target_used.data[i]) !=
1053 		    target->data[i])
1054 			used_qword_diff |= 1 << i;
1055 	}
1056 	return used_qword_diff;
1057 }
1058 
1059 static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid,
1060 		      struct arm_smmu_ste *entry,
1061 		      const struct arm_smmu_ste *target, unsigned int start,
1062 		      unsigned int len)
1063 {
1064 	bool changed = false;
1065 	unsigned int i;
1066 
1067 	for (i = start; len != 0; len--, i++) {
1068 		if (entry->data[i] != target->data[i]) {
1069 			WRITE_ONCE(entry->data[i], target->data[i]);
1070 			changed = true;
1071 		}
1072 	}
1073 
1074 	if (changed)
1075 		arm_smmu_sync_ste_for_sid(smmu, sid);
1076 	return changed;
1077 }
1078 
1079 /*
1080  * Update the STE/CD to the target configuration. The transition from the
1081  * current entry to the target entry takes place over multiple steps that
1082  * attempts to make the transition hitless if possible. This function takes care
1083  * not to create a situation where the HW can perceive a corrupted entry. HW is
1084  * only required to have a 64 bit atomicity with stores from the CPU, while
1085  * entries are many 64 bit values big.
1086  *
1087  * The difference between the current value and the target value is analyzed to
1088  * determine which of three updates are required - disruptive, hitless or no
1089  * change.
1090  *
1091  * In the most general disruptive case we can make any update in three steps:
1092  *  - Disrupting the entry (V=0)
1093  *  - Fill now unused qwords, execpt qword 0 which contains V
1094  *  - Make qword 0 have the final value and valid (V=1) with a single 64
1095  *    bit store
1096  *
1097  * However this disrupts the HW while it is happening. There are several
1098  * interesting cases where a STE/CD can be updated without disturbing the HW
1099  * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
1100  * because the used bits don't intersect. We can detect this by calculating how
1101  * many 64 bit values need update after adjusting the unused bits and skip the
1102  * V=0 process. This relies on the IGNORED behavior described in the
1103  * specification.
1104  */
1105 static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
1106 			       struct arm_smmu_ste *entry,
1107 			       const struct arm_smmu_ste *target)
1108 {
1109 	unsigned int num_entry_qwords = ARRAY_SIZE(target->data);
1110 	struct arm_smmu_device *smmu = master->smmu;
1111 	struct arm_smmu_ste unused_update;
1112 	u8 used_qword_diff;
1113 
1114 	used_qword_diff =
1115 		arm_smmu_entry_qword_diff(entry, target, &unused_update);
1116 	if (hweight8(used_qword_diff) == 1) {
1117 		/*
1118 		 * Only one qword needs its used bits to be changed. This is a
1119 		 * hitless update, update all bits the current STE is ignoring
1120 		 * to their new values, then update a single "critical qword" to
1121 		 * change the STE and finally 0 out any bits that are now unused
1122 		 * in the target configuration.
1123 		 */
1124 		unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
1125 
1126 		/*
1127 		 * Skip writing unused bits in the critical qword since we'll be
1128 		 * writing it in the next step anyways. This can save a sync
1129 		 * when the only change is in that qword.
1130 		 */
1131 		unused_update.data[critical_qword_index] =
1132 			entry->data[critical_qword_index];
1133 		entry_set(smmu, sid, entry, &unused_update, 0, num_entry_qwords);
1134 		entry_set(smmu, sid, entry, target, critical_qword_index, 1);
1135 		entry_set(smmu, sid, entry, target, 0, num_entry_qwords);
1136 	} else if (used_qword_diff) {
1137 		/*
1138 		 * At least two qwords need their inuse bits to be changed. This
1139 		 * requires a breaking update, zero the V bit, write all qwords
1140 		 * but 0, then set qword 0
1141 		 */
1142 		unused_update.data[0] = entry->data[0] &
1143 					cpu_to_le64(~STRTAB_STE_0_V);
1144 		entry_set(smmu, sid, entry, &unused_update, 0, 1);
1145 		entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1);
1146 		entry_set(smmu, sid, entry, target, 0, 1);
1147 	} else {
1148 		/*
1149 		 * No inuse bit changed. Sanity check that all unused bits are 0
1150 		 * in the entry. The target was already sanity checked by
1151 		 * compute_qword_diff().
1152 		 */
1153 		WARN_ON_ONCE(
1154 			entry_set(smmu, sid, entry, target, 0, num_entry_qwords));
1155 	}
1156 
1157 	/* It's likely that we'll want to use the new STE soon */
1158 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
1159 		struct arm_smmu_cmdq_ent
1160 			prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
1161 					 .prefetch = {
1162 						 .sid = sid,
1163 					 } };
1164 
1165 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1166 	}
1167 }
1168 
1169 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
1170 			     int ssid, bool leaf)
1171 {
1172 	size_t i;
1173 	struct arm_smmu_cmdq_batch cmds;
1174 	struct arm_smmu_device *smmu = master->smmu;
1175 	struct arm_smmu_cmdq_ent cmd = {
1176 		.opcode	= CMDQ_OP_CFGI_CD,
1177 		.cfgi	= {
1178 			.ssid	= ssid,
1179 			.leaf	= leaf,
1180 		},
1181 	};
1182 
1183 	cmds.num = 0;
1184 	for (i = 0; i < master->num_streams; i++) {
1185 		cmd.cfgi.sid = master->streams[i].id;
1186 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1187 	}
1188 
1189 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1190 }
1191 
1192 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1193 					struct arm_smmu_l1_ctx_desc *l1_desc)
1194 {
1195 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1196 
1197 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1198 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1199 	if (!l1_desc->l2ptr) {
1200 		dev_warn(smmu->dev,
1201 			 "failed to allocate context descriptor table\n");
1202 		return -ENOMEM;
1203 	}
1204 	return 0;
1205 }
1206 
1207 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1208 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1209 {
1210 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1211 		  CTXDESC_L1_DESC_V;
1212 
1213 	/* See comment in arm_smmu_write_ctx_desc() */
1214 	WRITE_ONCE(*dst, cpu_to_le64(val));
1215 }
1216 
1217 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
1218 {
1219 	__le64 *l1ptr;
1220 	unsigned int idx;
1221 	struct arm_smmu_l1_ctx_desc *l1_desc;
1222 	struct arm_smmu_device *smmu = master->smmu;
1223 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1224 
1225 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1226 		return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
1227 
1228 	idx = ssid >> CTXDESC_SPLIT;
1229 	l1_desc = &cd_table->l1_desc[idx];
1230 	if (!l1_desc->l2ptr) {
1231 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1232 			return NULL;
1233 
1234 		l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1235 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1236 		/* An invalid L1CD can be cached */
1237 		arm_smmu_sync_cd(master, ssid, false);
1238 	}
1239 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1240 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1241 }
1242 
1243 int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
1244 			    struct arm_smmu_ctx_desc *cd)
1245 {
1246 	/*
1247 	 * This function handles the following cases:
1248 	 *
1249 	 * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
1250 	 * (2) Install a secondary CD, for SID+SSID traffic.
1251 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1252 	 *     CD, then invalidate the old entry and mappings.
1253 	 * (4) Quiesce the context without clearing the valid bit. Disable
1254 	 *     translation, and ignore any translation fault.
1255 	 * (5) Remove a secondary CD.
1256 	 */
1257 	u64 val;
1258 	bool cd_live;
1259 	__le64 *cdptr;
1260 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1261 	struct arm_smmu_device *smmu = master->smmu;
1262 
1263 	if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
1264 		return -E2BIG;
1265 
1266 	cdptr = arm_smmu_get_cd_ptr(master, ssid);
1267 	if (!cdptr)
1268 		return -ENOMEM;
1269 
1270 	val = le64_to_cpu(cdptr[0]);
1271 	cd_live = !!(val & CTXDESC_CD_0_V);
1272 
1273 	if (!cd) { /* (5) */
1274 		val = 0;
1275 	} else if (cd == &quiet_cd) { /* (4) */
1276 		if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1277 			val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R);
1278 		val |= CTXDESC_CD_0_TCR_EPD0;
1279 	} else if (cd_live) { /* (3) */
1280 		val &= ~CTXDESC_CD_0_ASID;
1281 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1282 		/*
1283 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1284 		 * this substream's traffic
1285 		 */
1286 	} else { /* (1) and (2) */
1287 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1288 		cdptr[2] = 0;
1289 		cdptr[3] = cpu_to_le64(cd->mair);
1290 
1291 		/*
1292 		 * STE may be live, and the SMMU might read dwords of this CD in any
1293 		 * order. Ensure that it observes valid values before reading
1294 		 * V=1.
1295 		 */
1296 		arm_smmu_sync_cd(master, ssid, true);
1297 
1298 		val = cd->tcr |
1299 #ifdef __BIG_ENDIAN
1300 			CTXDESC_CD_0_ENDI |
1301 #endif
1302 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1303 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1304 			CTXDESC_CD_0_AA64 |
1305 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1306 			CTXDESC_CD_0_V;
1307 
1308 		if (cd_table->stall_enabled)
1309 			val |= CTXDESC_CD_0_S;
1310 	}
1311 
1312 	/*
1313 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1314 	 * "Configuration structures and configuration invalidation completion"
1315 	 *
1316 	 *   The size of single-copy atomic reads made by the SMMU is
1317 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1318 	 *   field within an aligned 64-bit span of a structure can be altered
1319 	 *   without first making the structure invalid.
1320 	 */
1321 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1322 	arm_smmu_sync_cd(master, ssid, true);
1323 	return 0;
1324 }
1325 
1326 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1327 {
1328 	int ret;
1329 	size_t l1size;
1330 	size_t max_contexts;
1331 	struct arm_smmu_device *smmu = master->smmu;
1332 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1333 
1334 	cd_table->stall_enabled = master->stall_enabled;
1335 	cd_table->s1cdmax = master->ssid_bits;
1336 	max_contexts = 1 << cd_table->s1cdmax;
1337 
1338 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1339 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1340 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1341 		cd_table->num_l1_ents = max_contexts;
1342 
1343 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1344 	} else {
1345 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1346 		cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
1347 						  CTXDESC_L2_ENTRIES);
1348 
1349 		cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
1350 					      sizeof(*cd_table->l1_desc),
1351 					      GFP_KERNEL);
1352 		if (!cd_table->l1_desc)
1353 			return -ENOMEM;
1354 
1355 		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1356 	}
1357 
1358 	cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
1359 					   GFP_KERNEL);
1360 	if (!cd_table->cdtab) {
1361 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1362 		ret = -ENOMEM;
1363 		goto err_free_l1;
1364 	}
1365 
1366 	return 0;
1367 
1368 err_free_l1:
1369 	if (cd_table->l1_desc) {
1370 		devm_kfree(smmu->dev, cd_table->l1_desc);
1371 		cd_table->l1_desc = NULL;
1372 	}
1373 	return ret;
1374 }
1375 
1376 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1377 {
1378 	int i;
1379 	size_t size, l1size;
1380 	struct arm_smmu_device *smmu = master->smmu;
1381 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1382 
1383 	if (cd_table->l1_desc) {
1384 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1385 
1386 		for (i = 0; i < cd_table->num_l1_ents; i++) {
1387 			if (!cd_table->l1_desc[i].l2ptr)
1388 				continue;
1389 
1390 			dmam_free_coherent(smmu->dev, size,
1391 					   cd_table->l1_desc[i].l2ptr,
1392 					   cd_table->l1_desc[i].l2ptr_dma);
1393 		}
1394 		devm_kfree(smmu->dev, cd_table->l1_desc);
1395 		cd_table->l1_desc = NULL;
1396 
1397 		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1398 	} else {
1399 		l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1400 	}
1401 
1402 	dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
1403 	cd_table->cdtab_dma = 0;
1404 	cd_table->cdtab = NULL;
1405 }
1406 
1407 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1408 {
1409 	bool free;
1410 	struct arm_smmu_ctx_desc *old_cd;
1411 
1412 	if (!cd->asid)
1413 		return false;
1414 
1415 	free = refcount_dec_and_test(&cd->refs);
1416 	if (free) {
1417 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1418 		WARN_ON(old_cd != cd);
1419 	}
1420 	return free;
1421 }
1422 
1423 /* Stream table manipulation functions */
1424 static void
1425 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1426 {
1427 	u64 val = 0;
1428 
1429 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1430 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1431 
1432 	/* See comment in arm_smmu_write_ctx_desc() */
1433 	WRITE_ONCE(*dst, cpu_to_le64(val));
1434 }
1435 
1436 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1437 {
1438 	struct arm_smmu_cmdq_ent cmd = {
1439 		.opcode	= CMDQ_OP_CFGI_STE,
1440 		.cfgi	= {
1441 			.sid	= sid,
1442 			.leaf	= true,
1443 		},
1444 	};
1445 
1446 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1447 }
1448 
1449 static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
1450 {
1451 	memset(target, 0, sizeof(*target));
1452 	target->data[0] = cpu_to_le64(
1453 		STRTAB_STE_0_V |
1454 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
1455 }
1456 
1457 static void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
1458 				     struct arm_smmu_ste *target)
1459 {
1460 	memset(target, 0, sizeof(*target));
1461 	target->data[0] = cpu_to_le64(
1462 		STRTAB_STE_0_V |
1463 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
1464 
1465 	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1466 		target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1467 							 STRTAB_STE_1_SHCFG_INCOMING));
1468 }
1469 
1470 static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
1471 				      struct arm_smmu_master *master)
1472 {
1473 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1474 	struct arm_smmu_device *smmu = master->smmu;
1475 
1476 	memset(target, 0, sizeof(*target));
1477 	target->data[0] = cpu_to_le64(
1478 		STRTAB_STE_0_V |
1479 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1480 		FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
1481 		(cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1482 		FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
1483 
1484 	target->data[1] = cpu_to_le64(
1485 		FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1486 		FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1487 		FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1488 		FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1489 		((smmu->features & ARM_SMMU_FEAT_STALLS &&
1490 		  !master->stall_enabled) ?
1491 			 STRTAB_STE_1_S1STALLD :
1492 			 0) |
1493 		FIELD_PREP(STRTAB_STE_1_EATS,
1494 			   master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1495 
1496 	if (smmu->features & ARM_SMMU_FEAT_E2H) {
1497 		/*
1498 		 * To support BTM the streamworld needs to match the
1499 		 * configuration of the CPU so that the ASID broadcasts are
1500 		 * properly matched. This means either S/NS-EL2-E2H (hypervisor)
1501 		 * or NS-EL1 (guest). Since an SVA domain can be installed in a
1502 		 * PASID this should always use a BTM compatible configuration
1503 		 * if the HW supports it.
1504 		 */
1505 		target->data[1] |= cpu_to_le64(
1506 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
1507 	} else {
1508 		target->data[1] |= cpu_to_le64(
1509 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1510 
1511 		/*
1512 		 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
1513 		 * arm_smmu_domain_alloc_id()
1514 		 */
1515 		target->data[2] =
1516 			cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
1517 	}
1518 }
1519 
1520 static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
1521 					struct arm_smmu_master *master,
1522 					struct arm_smmu_domain *smmu_domain)
1523 {
1524 	struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
1525 	const struct io_pgtable_cfg *pgtbl_cfg =
1526 		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1527 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
1528 		&pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1529 	u64 vtcr_val;
1530 	struct arm_smmu_device *smmu = master->smmu;
1531 
1532 	memset(target, 0, sizeof(*target));
1533 	target->data[0] = cpu_to_le64(
1534 		STRTAB_STE_0_V |
1535 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
1536 
1537 	target->data[1] = cpu_to_le64(
1538 		FIELD_PREP(STRTAB_STE_1_EATS,
1539 			   master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1540 
1541 	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1542 		target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1543 							  STRTAB_STE_1_SHCFG_INCOMING));
1544 
1545 	vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1546 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1547 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1548 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1549 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1550 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1551 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1552 	target->data[2] = cpu_to_le64(
1553 		FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1554 		FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
1555 		STRTAB_STE_2_S2AA64 |
1556 #ifdef __BIG_ENDIAN
1557 		STRTAB_STE_2_S2ENDI |
1558 #endif
1559 		STRTAB_STE_2_S2PTW |
1560 		STRTAB_STE_2_S2R);
1561 
1562 	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
1563 				      STRTAB_STE_3_S2TTB_MASK);
1564 }
1565 
1566 /*
1567  * This can safely directly manipulate the STE memory without a sync sequence
1568  * because the STE table has not been installed in the SMMU yet.
1569  */
1570 static void arm_smmu_init_initial_stes(struct arm_smmu_device *smmu,
1571 				       struct arm_smmu_ste *strtab,
1572 				       unsigned int nent)
1573 {
1574 	unsigned int i;
1575 
1576 	for (i = 0; i < nent; ++i) {
1577 		if (disable_bypass)
1578 			arm_smmu_make_abort_ste(strtab);
1579 		else
1580 			arm_smmu_make_bypass_ste(smmu, strtab);
1581 		strtab++;
1582 	}
1583 }
1584 
1585 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1586 {
1587 	size_t size;
1588 	void *strtab;
1589 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1590 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1591 
1592 	if (desc->l2ptr)
1593 		return 0;
1594 
1595 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1596 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1597 
1598 	desc->span = STRTAB_SPLIT + 1;
1599 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1600 					  GFP_KERNEL);
1601 	if (!desc->l2ptr) {
1602 		dev_err(smmu->dev,
1603 			"failed to allocate l2 stream table for SID %u\n",
1604 			sid);
1605 		return -ENOMEM;
1606 	}
1607 
1608 	arm_smmu_init_initial_stes(smmu, desc->l2ptr, 1 << STRTAB_SPLIT);
1609 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1610 	return 0;
1611 }
1612 
1613 static struct arm_smmu_master *
1614 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1615 {
1616 	struct rb_node *node;
1617 	struct arm_smmu_stream *stream;
1618 
1619 	lockdep_assert_held(&smmu->streams_mutex);
1620 
1621 	node = smmu->streams.rb_node;
1622 	while (node) {
1623 		stream = rb_entry(node, struct arm_smmu_stream, node);
1624 		if (stream->id < sid)
1625 			node = node->rb_right;
1626 		else if (stream->id > sid)
1627 			node = node->rb_left;
1628 		else
1629 			return stream->master;
1630 	}
1631 
1632 	return NULL;
1633 }
1634 
1635 /* IRQ and event handlers */
1636 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1637 {
1638 	int ret = 0;
1639 	u32 perm = 0;
1640 	struct arm_smmu_master *master;
1641 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1642 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1643 	struct iopf_fault fault_evt = { };
1644 	struct iommu_fault *flt = &fault_evt.fault;
1645 
1646 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1647 	case EVT_ID_TRANSLATION_FAULT:
1648 	case EVT_ID_ADDR_SIZE_FAULT:
1649 	case EVT_ID_ACCESS_FAULT:
1650 	case EVT_ID_PERMISSION_FAULT:
1651 		break;
1652 	default:
1653 		return -EOPNOTSUPP;
1654 	}
1655 
1656 	/* Stage-2 is always pinned at the moment */
1657 	if (evt[1] & EVTQ_1_S2)
1658 		return -EFAULT;
1659 
1660 	if (!(evt[1] & EVTQ_1_STALL))
1661 		return -EOPNOTSUPP;
1662 
1663 	if (evt[1] & EVTQ_1_RnW)
1664 		perm |= IOMMU_FAULT_PERM_READ;
1665 	else
1666 		perm |= IOMMU_FAULT_PERM_WRITE;
1667 
1668 	if (evt[1] & EVTQ_1_InD)
1669 		perm |= IOMMU_FAULT_PERM_EXEC;
1670 
1671 	if (evt[1] & EVTQ_1_PnU)
1672 		perm |= IOMMU_FAULT_PERM_PRIV;
1673 
1674 	flt->type = IOMMU_FAULT_PAGE_REQ;
1675 	flt->prm = (struct iommu_fault_page_request) {
1676 		.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1677 		.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1678 		.perm = perm,
1679 		.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1680 	};
1681 
1682 	if (ssid_valid) {
1683 		flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1684 		flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1685 	}
1686 
1687 	mutex_lock(&smmu->streams_mutex);
1688 	master = arm_smmu_find_master(smmu, sid);
1689 	if (!master) {
1690 		ret = -EINVAL;
1691 		goto out_unlock;
1692 	}
1693 
1694 	iommu_report_device_fault(master->dev, &fault_evt);
1695 out_unlock:
1696 	mutex_unlock(&smmu->streams_mutex);
1697 	return ret;
1698 }
1699 
1700 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1701 {
1702 	int i, ret;
1703 	struct arm_smmu_device *smmu = dev;
1704 	struct arm_smmu_queue *q = &smmu->evtq.q;
1705 	struct arm_smmu_ll_queue *llq = &q->llq;
1706 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1707 				      DEFAULT_RATELIMIT_BURST);
1708 	u64 evt[EVTQ_ENT_DWORDS];
1709 
1710 	do {
1711 		while (!queue_remove_raw(q, evt)) {
1712 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1713 
1714 			ret = arm_smmu_handle_evt(smmu, evt);
1715 			if (!ret || !__ratelimit(&rs))
1716 				continue;
1717 
1718 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1719 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1720 				dev_info(smmu->dev, "\t0x%016llx\n",
1721 					 (unsigned long long)evt[i]);
1722 
1723 			cond_resched();
1724 		}
1725 
1726 		/*
1727 		 * Not much we can do on overflow, so scream and pretend we're
1728 		 * trying harder.
1729 		 */
1730 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1731 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1732 	} while (!queue_empty(llq));
1733 
1734 	/* Sync our overflow flag, as we believe we're up to speed */
1735 	queue_sync_cons_ovf(q);
1736 	return IRQ_HANDLED;
1737 }
1738 
1739 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1740 {
1741 	u32 sid, ssid;
1742 	u16 grpid;
1743 	bool ssv, last;
1744 
1745 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1746 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1747 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1748 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1749 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1750 
1751 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1752 	dev_info(smmu->dev,
1753 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1754 		 sid, ssid, grpid, last ? "L" : "",
1755 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1756 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1757 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1758 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1759 		 evt[1] & PRIQ_1_ADDR_MASK);
1760 
1761 	if (last) {
1762 		struct arm_smmu_cmdq_ent cmd = {
1763 			.opcode			= CMDQ_OP_PRI_RESP,
1764 			.substream_valid	= ssv,
1765 			.pri			= {
1766 				.sid	= sid,
1767 				.ssid	= ssid,
1768 				.grpid	= grpid,
1769 				.resp	= PRI_RESP_DENY,
1770 			},
1771 		};
1772 
1773 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1774 	}
1775 }
1776 
1777 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1778 {
1779 	struct arm_smmu_device *smmu = dev;
1780 	struct arm_smmu_queue *q = &smmu->priq.q;
1781 	struct arm_smmu_ll_queue *llq = &q->llq;
1782 	u64 evt[PRIQ_ENT_DWORDS];
1783 
1784 	do {
1785 		while (!queue_remove_raw(q, evt))
1786 			arm_smmu_handle_ppr(smmu, evt);
1787 
1788 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1789 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1790 	} while (!queue_empty(llq));
1791 
1792 	/* Sync our overflow flag, as we believe we're up to speed */
1793 	queue_sync_cons_ovf(q);
1794 	return IRQ_HANDLED;
1795 }
1796 
1797 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1798 
1799 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1800 {
1801 	u32 gerror, gerrorn, active;
1802 	struct arm_smmu_device *smmu = dev;
1803 
1804 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1805 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1806 
1807 	active = gerror ^ gerrorn;
1808 	if (!(active & GERROR_ERR_MASK))
1809 		return IRQ_NONE; /* No errors pending */
1810 
1811 	dev_warn(smmu->dev,
1812 		 "unexpected global error reported (0x%08x), this could be serious\n",
1813 		 active);
1814 
1815 	if (active & GERROR_SFM_ERR) {
1816 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1817 		arm_smmu_device_disable(smmu);
1818 	}
1819 
1820 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1821 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1822 
1823 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1824 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1825 
1826 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1827 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1828 
1829 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1830 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1831 
1832 	if (active & GERROR_PRIQ_ABT_ERR)
1833 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1834 
1835 	if (active & GERROR_EVTQ_ABT_ERR)
1836 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1837 
1838 	if (active & GERROR_CMDQ_ERR)
1839 		arm_smmu_cmdq_skip_err(smmu);
1840 
1841 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1842 	return IRQ_HANDLED;
1843 }
1844 
1845 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1846 {
1847 	struct arm_smmu_device *smmu = dev;
1848 
1849 	arm_smmu_evtq_thread(irq, dev);
1850 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1851 		arm_smmu_priq_thread(irq, dev);
1852 
1853 	return IRQ_HANDLED;
1854 }
1855 
1856 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1857 {
1858 	arm_smmu_gerror_handler(irq, dev);
1859 	return IRQ_WAKE_THREAD;
1860 }
1861 
1862 static void
1863 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1864 			struct arm_smmu_cmdq_ent *cmd)
1865 {
1866 	size_t log2_span;
1867 	size_t span_mask;
1868 	/* ATC invalidates are always on 4096-bytes pages */
1869 	size_t inval_grain_shift = 12;
1870 	unsigned long page_start, page_end;
1871 
1872 	/*
1873 	 * ATS and PASID:
1874 	 *
1875 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1876 	 * prefix. In that case all ATC entries within the address range are
1877 	 * invalidated, including those that were requested with a PASID! There
1878 	 * is no way to invalidate only entries without PASID.
1879 	 *
1880 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1881 	 * traffic), translation requests without PASID create ATC entries
1882 	 * without PASID, which must be invalidated with substream_valid clear.
1883 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1884 	 * ATC entries within the address range.
1885 	 */
1886 	*cmd = (struct arm_smmu_cmdq_ent) {
1887 		.opcode			= CMDQ_OP_ATC_INV,
1888 		.substream_valid	= (ssid != IOMMU_NO_PASID),
1889 		.atc.ssid		= ssid,
1890 	};
1891 
1892 	if (!size) {
1893 		cmd->atc.size = ATC_INV_SIZE_ALL;
1894 		return;
1895 	}
1896 
1897 	page_start	= iova >> inval_grain_shift;
1898 	page_end	= (iova + size - 1) >> inval_grain_shift;
1899 
1900 	/*
1901 	 * In an ATS Invalidate Request, the address must be aligned on the
1902 	 * range size, which must be a power of two number of page sizes. We
1903 	 * thus have to choose between grossly over-invalidating the region, or
1904 	 * splitting the invalidation into multiple commands. For simplicity
1905 	 * we'll go with the first solution, but should refine it in the future
1906 	 * if multiple commands are shown to be more efficient.
1907 	 *
1908 	 * Find the smallest power of two that covers the range. The most
1909 	 * significant differing bit between the start and end addresses,
1910 	 * fls(start ^ end), indicates the required span. For example:
1911 	 *
1912 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1913 	 *		x = 0b1000 ^ 0b1011 = 0b11
1914 	 *		span = 1 << fls(x) = 4
1915 	 *
1916 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1917 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1918 	 *		span = 1 << fls(x) = 16
1919 	 */
1920 	log2_span	= fls_long(page_start ^ page_end);
1921 	span_mask	= (1ULL << log2_span) - 1;
1922 
1923 	page_start	&= ~span_mask;
1924 
1925 	cmd->atc.addr	= page_start << inval_grain_shift;
1926 	cmd->atc.size	= log2_span;
1927 }
1928 
1929 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1930 {
1931 	int i;
1932 	struct arm_smmu_cmdq_ent cmd;
1933 	struct arm_smmu_cmdq_batch cmds;
1934 
1935 	arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
1936 
1937 	cmds.num = 0;
1938 	for (i = 0; i < master->num_streams; i++) {
1939 		cmd.atc.sid = master->streams[i].id;
1940 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1941 	}
1942 
1943 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1944 }
1945 
1946 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1947 			    unsigned long iova, size_t size)
1948 {
1949 	int i;
1950 	unsigned long flags;
1951 	struct arm_smmu_cmdq_ent cmd;
1952 	struct arm_smmu_master *master;
1953 	struct arm_smmu_cmdq_batch cmds;
1954 
1955 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1956 		return 0;
1957 
1958 	/*
1959 	 * Ensure that we've completed prior invalidation of the main TLBs
1960 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1961 	 * arm_smmu_enable_ats():
1962 	 *
1963 	 *	// unmap()			// arm_smmu_enable_ats()
1964 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1965 	 *	smp_mb();			[...]
1966 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1967 	 *
1968 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1969 	 * ATS was enabled at the PCI device before completion of the TLBI.
1970 	 */
1971 	smp_mb();
1972 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1973 		return 0;
1974 
1975 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1976 
1977 	cmds.num = 0;
1978 
1979 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1980 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1981 		if (!master->ats_enabled)
1982 			continue;
1983 
1984 		for (i = 0; i < master->num_streams; i++) {
1985 			cmd.atc.sid = master->streams[i].id;
1986 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1987 		}
1988 	}
1989 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1990 
1991 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1992 }
1993 
1994 /* IO_PGTABLE API */
1995 static void arm_smmu_tlb_inv_context(void *cookie)
1996 {
1997 	struct arm_smmu_domain *smmu_domain = cookie;
1998 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1999 	struct arm_smmu_cmdq_ent cmd;
2000 
2001 	/*
2002 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2003 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
2004 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2005 	 * insertion to guarantee those are observed before the TLBI. Do be
2006 	 * careful, 007.
2007 	 */
2008 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2009 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
2010 	} else {
2011 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
2012 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2013 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
2014 	}
2015 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2016 }
2017 
2018 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
2019 				     unsigned long iova, size_t size,
2020 				     size_t granule,
2021 				     struct arm_smmu_domain *smmu_domain)
2022 {
2023 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2024 	unsigned long end = iova + size, num_pages = 0, tg = 0;
2025 	size_t inv_range = granule;
2026 	struct arm_smmu_cmdq_batch cmds;
2027 
2028 	if (!size)
2029 		return;
2030 
2031 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2032 		/* Get the leaf page size */
2033 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2034 
2035 		num_pages = size >> tg;
2036 
2037 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
2038 		cmd->tlbi.tg = (tg - 10) / 2;
2039 
2040 		/*
2041 		 * Determine what level the granule is at. For non-leaf, both
2042 		 * io-pgtable and SVA pass a nominal last-level granule because
2043 		 * they don't know what level(s) actually apply, so ignore that
2044 		 * and leave TTL=0. However for various errata reasons we still
2045 		 * want to use a range command, so avoid the SVA corner case
2046 		 * where both scale and num could be 0 as well.
2047 		 */
2048 		if (cmd->tlbi.leaf)
2049 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2050 		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
2051 			num_pages++;
2052 	}
2053 
2054 	cmds.num = 0;
2055 
2056 	while (iova < end) {
2057 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2058 			/*
2059 			 * On each iteration of the loop, the range is 5 bits
2060 			 * worth of the aligned size remaining.
2061 			 * The range in pages is:
2062 			 *
2063 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
2064 			 */
2065 			unsigned long scale, num;
2066 
2067 			/* Determine the power of 2 multiple number of pages */
2068 			scale = __ffs(num_pages);
2069 			cmd->tlbi.scale = scale;
2070 
2071 			/* Determine how many chunks of 2^scale size we have */
2072 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2073 			cmd->tlbi.num = num - 1;
2074 
2075 			/* range is num * 2^scale * pgsize */
2076 			inv_range = num << (scale + tg);
2077 
2078 			/* Clear out the lower order bits for the next iteration */
2079 			num_pages -= num << scale;
2080 		}
2081 
2082 		cmd->tlbi.addr = iova;
2083 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
2084 		iova += inv_range;
2085 	}
2086 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
2087 }
2088 
2089 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
2090 					  size_t granule, bool leaf,
2091 					  struct arm_smmu_domain *smmu_domain)
2092 {
2093 	struct arm_smmu_cmdq_ent cmd = {
2094 		.tlbi = {
2095 			.leaf	= leaf,
2096 		},
2097 	};
2098 
2099 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2100 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2101 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
2102 		cmd.tlbi.asid	= smmu_domain->cd.asid;
2103 	} else {
2104 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
2105 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2106 	}
2107 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2108 
2109 	/*
2110 	 * Unfortunately, this can't be leaf-only since we may have
2111 	 * zapped an entire table.
2112 	 */
2113 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
2114 }
2115 
2116 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
2117 				 size_t granule, bool leaf,
2118 				 struct arm_smmu_domain *smmu_domain)
2119 {
2120 	struct arm_smmu_cmdq_ent cmd = {
2121 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2122 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
2123 		.tlbi = {
2124 			.asid	= asid,
2125 			.leaf	= leaf,
2126 		},
2127 	};
2128 
2129 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2130 }
2131 
2132 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2133 					 unsigned long iova, size_t granule,
2134 					 void *cookie)
2135 {
2136 	struct arm_smmu_domain *smmu_domain = cookie;
2137 	struct iommu_domain *domain = &smmu_domain->domain;
2138 
2139 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2140 }
2141 
2142 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2143 				  size_t granule, void *cookie)
2144 {
2145 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2146 }
2147 
2148 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2149 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2150 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2151 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2152 };
2153 
2154 /* IOMMU API */
2155 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2156 {
2157 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2158 
2159 	switch (cap) {
2160 	case IOMMU_CAP_CACHE_COHERENCY:
2161 		/* Assume that a coherent TCU implies coherent TBUs */
2162 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2163 	case IOMMU_CAP_NOEXEC:
2164 	case IOMMU_CAP_DEFERRED_FLUSH:
2165 		return true;
2166 	default:
2167 		return false;
2168 	}
2169 }
2170 
2171 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2172 {
2173 
2174 	if (type == IOMMU_DOMAIN_SVA)
2175 		return arm_smmu_sva_domain_alloc();
2176 	return ERR_PTR(-EOPNOTSUPP);
2177 }
2178 
2179 static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
2180 {
2181 	struct arm_smmu_domain *smmu_domain;
2182 
2183 	/*
2184 	 * Allocate the domain and initialise some of its data structures.
2185 	 * We can't really do anything meaningful until we've added a
2186 	 * master.
2187 	 */
2188 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2189 	if (!smmu_domain)
2190 		return ERR_PTR(-ENOMEM);
2191 
2192 	mutex_init(&smmu_domain->init_mutex);
2193 	INIT_LIST_HEAD(&smmu_domain->devices);
2194 	spin_lock_init(&smmu_domain->devices_lock);
2195 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2196 
2197 	if (dev) {
2198 		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2199 		int ret;
2200 
2201 		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
2202 		if (ret) {
2203 			kfree(smmu_domain);
2204 			return ERR_PTR(ret);
2205 		}
2206 	}
2207 	return &smmu_domain->domain;
2208 }
2209 
2210 static void arm_smmu_domain_free(struct iommu_domain *domain)
2211 {
2212 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2213 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2214 
2215 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2216 
2217 	/* Free the ASID or VMID */
2218 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2219 		/* Prevent SVA from touching the CD while we're freeing it */
2220 		mutex_lock(&arm_smmu_asid_lock);
2221 		arm_smmu_free_asid(&smmu_domain->cd);
2222 		mutex_unlock(&arm_smmu_asid_lock);
2223 	} else {
2224 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2225 		if (cfg->vmid)
2226 			ida_free(&smmu->vmid_map, cfg->vmid);
2227 	}
2228 
2229 	kfree(smmu_domain);
2230 }
2231 
2232 static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
2233 				       struct arm_smmu_domain *smmu_domain,
2234 				       struct io_pgtable_cfg *pgtbl_cfg)
2235 {
2236 	int ret;
2237 	u32 asid;
2238 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2239 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2240 
2241 	refcount_set(&cd->refs, 1);
2242 
2243 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2244 	mutex_lock(&arm_smmu_asid_lock);
2245 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
2246 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2247 	if (ret)
2248 		goto out_unlock;
2249 
2250 	cd->asid	= (u16)asid;
2251 	cd->ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2252 	cd->tcr		= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2253 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2254 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2255 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2256 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2257 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2258 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2259 	cd->mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2260 
2261 	mutex_unlock(&arm_smmu_asid_lock);
2262 	return 0;
2263 
2264 out_unlock:
2265 	mutex_unlock(&arm_smmu_asid_lock);
2266 	return ret;
2267 }
2268 
2269 static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
2270 				       struct arm_smmu_domain *smmu_domain,
2271 				       struct io_pgtable_cfg *pgtbl_cfg)
2272 {
2273 	int vmid;
2274 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2275 
2276 	/* Reserve VMID 0 for stage-2 bypass STEs */
2277 	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2278 			       GFP_KERNEL);
2279 	if (vmid < 0)
2280 		return vmid;
2281 
2282 	cfg->vmid	= (u16)vmid;
2283 	return 0;
2284 }
2285 
2286 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
2287 				    struct arm_smmu_device *smmu)
2288 {
2289 	int ret;
2290 	unsigned long ias, oas;
2291 	enum io_pgtable_fmt fmt;
2292 	struct io_pgtable_cfg pgtbl_cfg;
2293 	struct io_pgtable_ops *pgtbl_ops;
2294 	int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
2295 				 struct arm_smmu_domain *smmu_domain,
2296 				 struct io_pgtable_cfg *pgtbl_cfg);
2297 
2298 	/* Restrict the stage to what we can actually support */
2299 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2300 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2301 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2302 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2303 
2304 	switch (smmu_domain->stage) {
2305 	case ARM_SMMU_DOMAIN_S1:
2306 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2307 		ias = min_t(unsigned long, ias, VA_BITS);
2308 		oas = smmu->ias;
2309 		fmt = ARM_64_LPAE_S1;
2310 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2311 		break;
2312 	case ARM_SMMU_DOMAIN_S2:
2313 		ias = smmu->ias;
2314 		oas = smmu->oas;
2315 		fmt = ARM_64_LPAE_S2;
2316 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2317 		break;
2318 	default:
2319 		return -EINVAL;
2320 	}
2321 
2322 	pgtbl_cfg = (struct io_pgtable_cfg) {
2323 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2324 		.ias		= ias,
2325 		.oas		= oas,
2326 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2327 		.tlb		= &arm_smmu_flush_ops,
2328 		.iommu_dev	= smmu->dev,
2329 	};
2330 
2331 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2332 	if (!pgtbl_ops)
2333 		return -ENOMEM;
2334 
2335 	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2336 	smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2337 	smmu_domain->domain.geometry.force_aperture = true;
2338 
2339 	ret = finalise_stage_fn(smmu, smmu_domain, &pgtbl_cfg);
2340 	if (ret < 0) {
2341 		free_io_pgtable_ops(pgtbl_ops);
2342 		return ret;
2343 	}
2344 
2345 	smmu_domain->pgtbl_ops = pgtbl_ops;
2346 	smmu_domain->smmu = smmu;
2347 	return 0;
2348 }
2349 
2350 static struct arm_smmu_ste *
2351 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2352 {
2353 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2354 
2355 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2356 		unsigned int idx1, idx2;
2357 
2358 		/* Two-level walk */
2359 		idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2360 		idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
2361 		return &cfg->l1_desc[idx1].l2ptr[idx2];
2362 	} else {
2363 		/* Simple linear lookup */
2364 		return (struct arm_smmu_ste *)&cfg
2365 			       ->strtab[sid * STRTAB_STE_DWORDS];
2366 	}
2367 }
2368 
2369 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
2370 					 const struct arm_smmu_ste *target)
2371 {
2372 	int i, j;
2373 	struct arm_smmu_device *smmu = master->smmu;
2374 
2375 	for (i = 0; i < master->num_streams; ++i) {
2376 		u32 sid = master->streams[i].id;
2377 		struct arm_smmu_ste *step =
2378 			arm_smmu_get_step_for_sid(smmu, sid);
2379 
2380 		/* Bridged PCI devices may end up with duplicated IDs */
2381 		for (j = 0; j < i; j++)
2382 			if (master->streams[j].id == sid)
2383 				break;
2384 		if (j < i)
2385 			continue;
2386 
2387 		arm_smmu_write_ste(master, sid, step, target);
2388 	}
2389 }
2390 
2391 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2392 {
2393 	struct device *dev = master->dev;
2394 	struct arm_smmu_device *smmu = master->smmu;
2395 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2396 
2397 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2398 		return false;
2399 
2400 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2401 		return false;
2402 
2403 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2404 }
2405 
2406 static void arm_smmu_enable_ats(struct arm_smmu_master *master,
2407 				struct arm_smmu_domain *smmu_domain)
2408 {
2409 	size_t stu;
2410 	struct pci_dev *pdev;
2411 	struct arm_smmu_device *smmu = master->smmu;
2412 
2413 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2414 	if (!master->ats_enabled)
2415 		return;
2416 
2417 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2418 	stu = __ffs(smmu->pgsize_bitmap);
2419 	pdev = to_pci_dev(master->dev);
2420 
2421 	atomic_inc(&smmu_domain->nr_ats_masters);
2422 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2423 	if (pci_enable_ats(pdev, stu))
2424 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2425 }
2426 
2427 static void arm_smmu_disable_ats(struct arm_smmu_master *master,
2428 				 struct arm_smmu_domain *smmu_domain)
2429 {
2430 	if (!master->ats_enabled)
2431 		return;
2432 
2433 	pci_disable_ats(to_pci_dev(master->dev));
2434 	/*
2435 	 * Ensure ATS is disabled at the endpoint before we issue the
2436 	 * ATC invalidation via the SMMU.
2437 	 */
2438 	wmb();
2439 	arm_smmu_atc_inv_master(master);
2440 	atomic_dec(&smmu_domain->nr_ats_masters);
2441 }
2442 
2443 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2444 {
2445 	int ret;
2446 	int features;
2447 	int num_pasids;
2448 	struct pci_dev *pdev;
2449 
2450 	if (!dev_is_pci(master->dev))
2451 		return -ENODEV;
2452 
2453 	pdev = to_pci_dev(master->dev);
2454 
2455 	features = pci_pasid_features(pdev);
2456 	if (features < 0)
2457 		return features;
2458 
2459 	num_pasids = pci_max_pasids(pdev);
2460 	if (num_pasids <= 0)
2461 		return num_pasids;
2462 
2463 	ret = pci_enable_pasid(pdev, features);
2464 	if (ret) {
2465 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2466 		return ret;
2467 	}
2468 
2469 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2470 				  master->smmu->ssid_bits);
2471 	return 0;
2472 }
2473 
2474 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2475 {
2476 	struct pci_dev *pdev;
2477 
2478 	if (!dev_is_pci(master->dev))
2479 		return;
2480 
2481 	pdev = to_pci_dev(master->dev);
2482 
2483 	if (!pdev->pasid_enabled)
2484 		return;
2485 
2486 	master->ssid_bits = 0;
2487 	pci_disable_pasid(pdev);
2488 }
2489 
2490 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2491 {
2492 	struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev);
2493 	struct arm_smmu_domain *smmu_domain;
2494 	unsigned long flags;
2495 
2496 	if (!domain || !(domain->type & __IOMMU_DOMAIN_PAGING))
2497 		return;
2498 
2499 	smmu_domain = to_smmu_domain(domain);
2500 	arm_smmu_disable_ats(master, smmu_domain);
2501 
2502 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2503 	list_del_init(&master->domain_head);
2504 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2505 
2506 	master->ats_enabled = false;
2507 }
2508 
2509 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2510 {
2511 	int ret = 0;
2512 	unsigned long flags;
2513 	struct arm_smmu_ste target;
2514 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2515 	struct arm_smmu_device *smmu;
2516 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2517 	struct arm_smmu_master *master;
2518 
2519 	if (!fwspec)
2520 		return -ENOENT;
2521 
2522 	master = dev_iommu_priv_get(dev);
2523 	smmu = master->smmu;
2524 
2525 	/*
2526 	 * Checking that SVA is disabled ensures that this device isn't bound to
2527 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2528 	 * be removed concurrently since we're holding the group mutex.
2529 	 */
2530 	if (arm_smmu_master_sva_enabled(master)) {
2531 		dev_err(dev, "cannot attach - SVA enabled\n");
2532 		return -EBUSY;
2533 	}
2534 
2535 	mutex_lock(&smmu_domain->init_mutex);
2536 
2537 	if (!smmu_domain->smmu) {
2538 		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
2539 	} else if (smmu_domain->smmu != smmu)
2540 		ret = -EINVAL;
2541 
2542 	mutex_unlock(&smmu_domain->init_mutex);
2543 	if (ret)
2544 		return ret;
2545 
2546 	/*
2547 	 * Prevent arm_smmu_share_asid() from trying to change the ASID
2548 	 * of either the old or new domain while we are working on it.
2549 	 * This allows the STE and the smmu_domain->devices list to
2550 	 * be inconsistent during this routine.
2551 	 */
2552 	mutex_lock(&arm_smmu_asid_lock);
2553 
2554 	arm_smmu_detach_dev(master);
2555 
2556 	master->ats_enabled = arm_smmu_ats_supported(master);
2557 
2558 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2559 	list_add(&master->domain_head, &smmu_domain->devices);
2560 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2561 
2562 	switch (smmu_domain->stage) {
2563 	case ARM_SMMU_DOMAIN_S1:
2564 		if (!master->cd_table.cdtab) {
2565 			ret = arm_smmu_alloc_cd_tables(master);
2566 			if (ret)
2567 				goto out_list_del;
2568 		} else {
2569 			/*
2570 			 * arm_smmu_write_ctx_desc() relies on the entry being
2571 			 * invalid to work, clear any existing entry.
2572 			 */
2573 			ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID,
2574 						      NULL);
2575 			if (ret)
2576 				goto out_list_del;
2577 		}
2578 
2579 		ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
2580 		if (ret)
2581 			goto out_list_del;
2582 
2583 		arm_smmu_make_cdtable_ste(&target, master);
2584 		arm_smmu_install_ste_for_dev(master, &target);
2585 		break;
2586 	case ARM_SMMU_DOMAIN_S2:
2587 		arm_smmu_make_s2_domain_ste(&target, master, smmu_domain);
2588 		arm_smmu_install_ste_for_dev(master, &target);
2589 		if (master->cd_table.cdtab)
2590 			arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID,
2591 						      NULL);
2592 		break;
2593 	}
2594 
2595 	arm_smmu_enable_ats(master, smmu_domain);
2596 	goto out_unlock;
2597 
2598 out_list_del:
2599 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2600 	list_del_init(&master->domain_head);
2601 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2602 
2603 out_unlock:
2604 	mutex_unlock(&arm_smmu_asid_lock);
2605 	return ret;
2606 }
2607 
2608 static int arm_smmu_attach_dev_ste(struct device *dev,
2609 				   struct arm_smmu_ste *ste)
2610 {
2611 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2612 
2613 	if (arm_smmu_master_sva_enabled(master))
2614 		return -EBUSY;
2615 
2616 	/*
2617 	 * Do not allow any ASID to be changed while are working on the STE,
2618 	 * otherwise we could miss invalidations.
2619 	 */
2620 	mutex_lock(&arm_smmu_asid_lock);
2621 
2622 	/*
2623 	 * The SMMU does not support enabling ATS with bypass/abort. When the
2624 	 * STE is in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests
2625 	 * and Translated transactions are denied as though ATS is disabled for
2626 	 * the stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2627 	 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2628 	 */
2629 	arm_smmu_detach_dev(master);
2630 
2631 	arm_smmu_install_ste_for_dev(master, ste);
2632 	mutex_unlock(&arm_smmu_asid_lock);
2633 
2634 	/*
2635 	 * This has to be done after removing the master from the
2636 	 * arm_smmu_domain->devices to avoid races updating the same context
2637 	 * descriptor from arm_smmu_share_asid().
2638 	 */
2639 	if (master->cd_table.cdtab)
2640 		arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
2641 	return 0;
2642 }
2643 
2644 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
2645 					struct device *dev)
2646 {
2647 	struct arm_smmu_ste ste;
2648 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2649 
2650 	arm_smmu_make_bypass_ste(master->smmu, &ste);
2651 	return arm_smmu_attach_dev_ste(dev, &ste);
2652 }
2653 
2654 static const struct iommu_domain_ops arm_smmu_identity_ops = {
2655 	.attach_dev = arm_smmu_attach_dev_identity,
2656 };
2657 
2658 static struct iommu_domain arm_smmu_identity_domain = {
2659 	.type = IOMMU_DOMAIN_IDENTITY,
2660 	.ops = &arm_smmu_identity_ops,
2661 };
2662 
2663 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
2664 					struct device *dev)
2665 {
2666 	struct arm_smmu_ste ste;
2667 
2668 	arm_smmu_make_abort_ste(&ste);
2669 	return arm_smmu_attach_dev_ste(dev, &ste);
2670 }
2671 
2672 static const struct iommu_domain_ops arm_smmu_blocked_ops = {
2673 	.attach_dev = arm_smmu_attach_dev_blocked,
2674 };
2675 
2676 static struct iommu_domain arm_smmu_blocked_domain = {
2677 	.type = IOMMU_DOMAIN_BLOCKED,
2678 	.ops = &arm_smmu_blocked_ops,
2679 };
2680 
2681 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2682 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2683 			      int prot, gfp_t gfp, size_t *mapped)
2684 {
2685 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2686 
2687 	if (!ops)
2688 		return -ENODEV;
2689 
2690 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2691 }
2692 
2693 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2694 				   size_t pgsize, size_t pgcount,
2695 				   struct iommu_iotlb_gather *gather)
2696 {
2697 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2698 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2699 
2700 	if (!ops)
2701 		return 0;
2702 
2703 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2704 }
2705 
2706 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2707 {
2708 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2709 
2710 	if (smmu_domain->smmu)
2711 		arm_smmu_tlb_inv_context(smmu_domain);
2712 }
2713 
2714 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2715 				struct iommu_iotlb_gather *gather)
2716 {
2717 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2718 
2719 	if (!gather->pgsize)
2720 		return;
2721 
2722 	arm_smmu_tlb_inv_range_domain(gather->start,
2723 				      gather->end - gather->start + 1,
2724 				      gather->pgsize, true, smmu_domain);
2725 }
2726 
2727 static phys_addr_t
2728 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2729 {
2730 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2731 
2732 	if (!ops)
2733 		return 0;
2734 
2735 	return ops->iova_to_phys(ops, iova);
2736 }
2737 
2738 static struct platform_driver arm_smmu_driver;
2739 
2740 static
2741 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2742 {
2743 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2744 							  fwnode);
2745 	put_device(dev);
2746 	return dev ? dev_get_drvdata(dev) : NULL;
2747 }
2748 
2749 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2750 {
2751 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2752 
2753 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2754 		limit *= 1UL << STRTAB_SPLIT;
2755 
2756 	return sid < limit;
2757 }
2758 
2759 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2760 {
2761 	/* Check the SIDs are in range of the SMMU and our stream table */
2762 	if (!arm_smmu_sid_in_range(smmu, sid))
2763 		return -ERANGE;
2764 
2765 	/* Ensure l2 strtab is initialised */
2766 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2767 		return arm_smmu_init_l2_strtab(smmu, sid);
2768 
2769 	return 0;
2770 }
2771 
2772 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2773 				  struct arm_smmu_master *master)
2774 {
2775 	int i;
2776 	int ret = 0;
2777 	struct arm_smmu_stream *new_stream, *cur_stream;
2778 	struct rb_node **new_node, *parent_node = NULL;
2779 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2780 
2781 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2782 				  GFP_KERNEL);
2783 	if (!master->streams)
2784 		return -ENOMEM;
2785 	master->num_streams = fwspec->num_ids;
2786 
2787 	mutex_lock(&smmu->streams_mutex);
2788 	for (i = 0; i < fwspec->num_ids; i++) {
2789 		u32 sid = fwspec->ids[i];
2790 
2791 		new_stream = &master->streams[i];
2792 		new_stream->id = sid;
2793 		new_stream->master = master;
2794 
2795 		ret = arm_smmu_init_sid_strtab(smmu, sid);
2796 		if (ret)
2797 			break;
2798 
2799 		/* Insert into SID tree */
2800 		new_node = &(smmu->streams.rb_node);
2801 		while (*new_node) {
2802 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2803 					      node);
2804 			parent_node = *new_node;
2805 			if (cur_stream->id > new_stream->id) {
2806 				new_node = &((*new_node)->rb_left);
2807 			} else if (cur_stream->id < new_stream->id) {
2808 				new_node = &((*new_node)->rb_right);
2809 			} else {
2810 				dev_warn(master->dev,
2811 					 "stream %u already in tree\n",
2812 					 cur_stream->id);
2813 				ret = -EINVAL;
2814 				break;
2815 			}
2816 		}
2817 		if (ret)
2818 			break;
2819 
2820 		rb_link_node(&new_stream->node, parent_node, new_node);
2821 		rb_insert_color(&new_stream->node, &smmu->streams);
2822 	}
2823 
2824 	if (ret) {
2825 		for (i--; i >= 0; i--)
2826 			rb_erase(&master->streams[i].node, &smmu->streams);
2827 		kfree(master->streams);
2828 	}
2829 	mutex_unlock(&smmu->streams_mutex);
2830 
2831 	return ret;
2832 }
2833 
2834 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2835 {
2836 	int i;
2837 	struct arm_smmu_device *smmu = master->smmu;
2838 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2839 
2840 	if (!smmu || !master->streams)
2841 		return;
2842 
2843 	mutex_lock(&smmu->streams_mutex);
2844 	for (i = 0; i < fwspec->num_ids; i++)
2845 		rb_erase(&master->streams[i].node, &smmu->streams);
2846 	mutex_unlock(&smmu->streams_mutex);
2847 
2848 	kfree(master->streams);
2849 }
2850 
2851 static struct iommu_ops arm_smmu_ops;
2852 
2853 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2854 {
2855 	int ret;
2856 	struct arm_smmu_device *smmu;
2857 	struct arm_smmu_master *master;
2858 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2859 
2860 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2861 		return ERR_PTR(-EBUSY);
2862 
2863 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2864 	if (!smmu)
2865 		return ERR_PTR(-ENODEV);
2866 
2867 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2868 	if (!master)
2869 		return ERR_PTR(-ENOMEM);
2870 
2871 	master->dev = dev;
2872 	master->smmu = smmu;
2873 	INIT_LIST_HEAD(&master->bonds);
2874 	INIT_LIST_HEAD(&master->domain_head);
2875 	dev_iommu_priv_set(dev, master);
2876 
2877 	ret = arm_smmu_insert_master(smmu, master);
2878 	if (ret)
2879 		goto err_free_master;
2880 
2881 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2882 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2883 
2884 	/*
2885 	 * Note that PASID must be enabled before, and disabled after ATS:
2886 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2887 	 *
2888 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2889 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2890 	 *   are changed.
2891 	 */
2892 	arm_smmu_enable_pasid(master);
2893 
2894 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2895 		master->ssid_bits = min_t(u8, master->ssid_bits,
2896 					  CTXDESC_LINEAR_CDMAX);
2897 
2898 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2899 	     device_property_read_bool(dev, "dma-can-stall")) ||
2900 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2901 		master->stall_enabled = true;
2902 
2903 	return &smmu->iommu;
2904 
2905 err_free_master:
2906 	kfree(master);
2907 	return ERR_PTR(ret);
2908 }
2909 
2910 static void arm_smmu_release_device(struct device *dev)
2911 {
2912 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2913 
2914 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2915 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2916 
2917 	/* Put the STE back to what arm_smmu_init_strtab() sets */
2918 	if (disable_bypass && !dev->iommu->require_direct)
2919 		arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
2920 	else
2921 		arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
2922 
2923 	arm_smmu_disable_pasid(master);
2924 	arm_smmu_remove_master(master);
2925 	if (master->cd_table.cdtab)
2926 		arm_smmu_free_cd_tables(master);
2927 	kfree(master);
2928 }
2929 
2930 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2931 {
2932 	struct iommu_group *group;
2933 
2934 	/*
2935 	 * We don't support devices sharing stream IDs other than PCI RID
2936 	 * aliases, since the necessary ID-to-device lookup becomes rather
2937 	 * impractical given a potential sparse 32-bit stream ID space.
2938 	 */
2939 	if (dev_is_pci(dev))
2940 		group = pci_device_group(dev);
2941 	else
2942 		group = generic_device_group(dev);
2943 
2944 	return group;
2945 }
2946 
2947 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2948 {
2949 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2950 	int ret = 0;
2951 
2952 	mutex_lock(&smmu_domain->init_mutex);
2953 	if (smmu_domain->smmu)
2954 		ret = -EPERM;
2955 	else
2956 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2957 	mutex_unlock(&smmu_domain->init_mutex);
2958 
2959 	return ret;
2960 }
2961 
2962 static int arm_smmu_of_xlate(struct device *dev,
2963 			     const struct of_phandle_args *args)
2964 {
2965 	return iommu_fwspec_add_ids(dev, args->args, 1);
2966 }
2967 
2968 static void arm_smmu_get_resv_regions(struct device *dev,
2969 				      struct list_head *head)
2970 {
2971 	struct iommu_resv_region *region;
2972 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2973 
2974 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2975 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2976 	if (!region)
2977 		return;
2978 
2979 	list_add_tail(&region->list, head);
2980 
2981 	iommu_dma_get_resv_regions(dev, head);
2982 }
2983 
2984 static int arm_smmu_dev_enable_feature(struct device *dev,
2985 				       enum iommu_dev_features feat)
2986 {
2987 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2988 
2989 	if (!master)
2990 		return -ENODEV;
2991 
2992 	switch (feat) {
2993 	case IOMMU_DEV_FEAT_IOPF:
2994 		if (!arm_smmu_master_iopf_supported(master))
2995 			return -EINVAL;
2996 		if (master->iopf_enabled)
2997 			return -EBUSY;
2998 		master->iopf_enabled = true;
2999 		return 0;
3000 	case IOMMU_DEV_FEAT_SVA:
3001 		if (!arm_smmu_master_sva_supported(master))
3002 			return -EINVAL;
3003 		if (arm_smmu_master_sva_enabled(master))
3004 			return -EBUSY;
3005 		return arm_smmu_master_enable_sva(master);
3006 	default:
3007 		return -EINVAL;
3008 	}
3009 }
3010 
3011 static int arm_smmu_dev_disable_feature(struct device *dev,
3012 					enum iommu_dev_features feat)
3013 {
3014 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3015 
3016 	if (!master)
3017 		return -EINVAL;
3018 
3019 	switch (feat) {
3020 	case IOMMU_DEV_FEAT_IOPF:
3021 		if (!master->iopf_enabled)
3022 			return -EINVAL;
3023 		if (master->sva_enabled)
3024 			return -EBUSY;
3025 		master->iopf_enabled = false;
3026 		return 0;
3027 	case IOMMU_DEV_FEAT_SVA:
3028 		if (!arm_smmu_master_sva_enabled(master))
3029 			return -EINVAL;
3030 		return arm_smmu_master_disable_sva(master);
3031 	default:
3032 		return -EINVAL;
3033 	}
3034 }
3035 
3036 /*
3037  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
3038  * PCIe link and save the data to memory by DMA. The hardware is restricted to
3039  * use identity mapping only.
3040  */
3041 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
3042 					 (pdev)->device == 0xa12e)
3043 
3044 static int arm_smmu_def_domain_type(struct device *dev)
3045 {
3046 	if (dev_is_pci(dev)) {
3047 		struct pci_dev *pdev = to_pci_dev(dev);
3048 
3049 		if (IS_HISI_PTT_DEVICE(pdev))
3050 			return IOMMU_DOMAIN_IDENTITY;
3051 	}
3052 
3053 	return 0;
3054 }
3055 
3056 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
3057 {
3058 	struct iommu_domain *domain;
3059 
3060 	domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
3061 	if (WARN_ON(IS_ERR(domain)) || !domain)
3062 		return;
3063 
3064 	arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
3065 }
3066 
3067 static struct iommu_ops arm_smmu_ops = {
3068 	.identity_domain	= &arm_smmu_identity_domain,
3069 	.blocked_domain		= &arm_smmu_blocked_domain,
3070 	.capable		= arm_smmu_capable,
3071 	.domain_alloc		= arm_smmu_domain_alloc,
3072 	.domain_alloc_paging    = arm_smmu_domain_alloc_paging,
3073 	.probe_device		= arm_smmu_probe_device,
3074 	.release_device		= arm_smmu_release_device,
3075 	.device_group		= arm_smmu_device_group,
3076 	.of_xlate		= arm_smmu_of_xlate,
3077 	.get_resv_regions	= arm_smmu_get_resv_regions,
3078 	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
3079 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
3080 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
3081 	.page_response		= arm_smmu_page_response,
3082 	.def_domain_type	= arm_smmu_def_domain_type,
3083 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
3084 	.owner			= THIS_MODULE,
3085 	.default_domain_ops = &(const struct iommu_domain_ops) {
3086 		.attach_dev		= arm_smmu_attach_dev,
3087 		.map_pages		= arm_smmu_map_pages,
3088 		.unmap_pages		= arm_smmu_unmap_pages,
3089 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
3090 		.iotlb_sync		= arm_smmu_iotlb_sync,
3091 		.iova_to_phys		= arm_smmu_iova_to_phys,
3092 		.enable_nesting		= arm_smmu_enable_nesting,
3093 		.free			= arm_smmu_domain_free,
3094 	}
3095 };
3096 
3097 /* Probing and initialisation functions */
3098 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3099 				   struct arm_smmu_queue *q,
3100 				   void __iomem *page,
3101 				   unsigned long prod_off,
3102 				   unsigned long cons_off,
3103 				   size_t dwords, const char *name)
3104 {
3105 	size_t qsz;
3106 
3107 	do {
3108 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3109 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3110 					      GFP_KERNEL);
3111 		if (q->base || qsz < PAGE_SIZE)
3112 			break;
3113 
3114 		q->llq.max_n_shift--;
3115 	} while (1);
3116 
3117 	if (!q->base) {
3118 		dev_err(smmu->dev,
3119 			"failed to allocate queue (0x%zx bytes) for %s\n",
3120 			qsz, name);
3121 		return -ENOMEM;
3122 	}
3123 
3124 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
3125 		dev_info(smmu->dev, "allocated %u entries for %s\n",
3126 			 1 << q->llq.max_n_shift, name);
3127 	}
3128 
3129 	q->prod_reg	= page + prod_off;
3130 	q->cons_reg	= page + cons_off;
3131 	q->ent_dwords	= dwords;
3132 
3133 	q->q_base  = Q_BASE_RWA;
3134 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3135 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3136 
3137 	q->llq.prod = q->llq.cons = 0;
3138 	return 0;
3139 }
3140 
3141 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
3142 {
3143 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
3144 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3145 
3146 	atomic_set(&cmdq->owner_prod, 0);
3147 	atomic_set(&cmdq->lock, 0);
3148 
3149 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
3150 							      GFP_KERNEL);
3151 	if (!cmdq->valid_map)
3152 		return -ENOMEM;
3153 
3154 	return 0;
3155 }
3156 
3157 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3158 {
3159 	int ret;
3160 
3161 	/* cmdq */
3162 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
3163 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
3164 				      CMDQ_ENT_DWORDS, "cmdq");
3165 	if (ret)
3166 		return ret;
3167 
3168 	ret = arm_smmu_cmdq_init(smmu);
3169 	if (ret)
3170 		return ret;
3171 
3172 	/* evtq */
3173 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
3174 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
3175 				      EVTQ_ENT_DWORDS, "evtq");
3176 	if (ret)
3177 		return ret;
3178 
3179 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
3180 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
3181 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
3182 		if (!smmu->evtq.iopf)
3183 			return -ENOMEM;
3184 	}
3185 
3186 	/* priq */
3187 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3188 		return 0;
3189 
3190 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
3191 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
3192 				       PRIQ_ENT_DWORDS, "priq");
3193 }
3194 
3195 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
3196 {
3197 	unsigned int i;
3198 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3199 	void *strtab = smmu->strtab_cfg.strtab;
3200 
3201 	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
3202 				    sizeof(*cfg->l1_desc), GFP_KERNEL);
3203 	if (!cfg->l1_desc)
3204 		return -ENOMEM;
3205 
3206 	for (i = 0; i < cfg->num_l1_ents; ++i) {
3207 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3208 		strtab += STRTAB_L1_DESC_DWORDS << 3;
3209 	}
3210 
3211 	return 0;
3212 }
3213 
3214 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3215 {
3216 	void *strtab;
3217 	u64 reg;
3218 	u32 size, l1size;
3219 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3220 
3221 	/* Calculate the L1 size, capped to the SIDSIZE. */
3222 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3223 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3224 	cfg->num_l1_ents = 1 << size;
3225 
3226 	size += STRTAB_SPLIT;
3227 	if (size < smmu->sid_bits)
3228 		dev_warn(smmu->dev,
3229 			 "2-level strtab only covers %u/%u bits of SID\n",
3230 			 size, smmu->sid_bits);
3231 
3232 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3233 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3234 				     GFP_KERNEL);
3235 	if (!strtab) {
3236 		dev_err(smmu->dev,
3237 			"failed to allocate l1 stream table (%u bytes)\n",
3238 			l1size);
3239 		return -ENOMEM;
3240 	}
3241 	cfg->strtab = strtab;
3242 
3243 	/* Configure strtab_base_cfg for 2 levels */
3244 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3245 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3246 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3247 	cfg->strtab_base_cfg = reg;
3248 
3249 	return arm_smmu_init_l1_strtab(smmu);
3250 }
3251 
3252 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3253 {
3254 	void *strtab;
3255 	u64 reg;
3256 	u32 size;
3257 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3258 
3259 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3260 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3261 				     GFP_KERNEL);
3262 	if (!strtab) {
3263 		dev_err(smmu->dev,
3264 			"failed to allocate linear stream table (%u bytes)\n",
3265 			size);
3266 		return -ENOMEM;
3267 	}
3268 	cfg->strtab = strtab;
3269 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3270 
3271 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3272 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3273 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3274 	cfg->strtab_base_cfg = reg;
3275 
3276 	arm_smmu_init_initial_stes(smmu, strtab, cfg->num_l1_ents);
3277 	return 0;
3278 }
3279 
3280 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3281 {
3282 	u64 reg;
3283 	int ret;
3284 
3285 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3286 		ret = arm_smmu_init_strtab_2lvl(smmu);
3287 	else
3288 		ret = arm_smmu_init_strtab_linear(smmu);
3289 
3290 	if (ret)
3291 		return ret;
3292 
3293 	/* Set the strtab base address */
3294 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3295 	reg |= STRTAB_BASE_RA;
3296 	smmu->strtab_cfg.strtab_base = reg;
3297 
3298 	ida_init(&smmu->vmid_map);
3299 
3300 	return 0;
3301 }
3302 
3303 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3304 {
3305 	int ret;
3306 
3307 	mutex_init(&smmu->streams_mutex);
3308 	smmu->streams = RB_ROOT;
3309 
3310 	ret = arm_smmu_init_queues(smmu);
3311 	if (ret)
3312 		return ret;
3313 
3314 	return arm_smmu_init_strtab(smmu);
3315 }
3316 
3317 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3318 				   unsigned int reg_off, unsigned int ack_off)
3319 {
3320 	u32 reg;
3321 
3322 	writel_relaxed(val, smmu->base + reg_off);
3323 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3324 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3325 }
3326 
3327 /* GBPA is "special" */
3328 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3329 {
3330 	int ret;
3331 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3332 
3333 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3334 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3335 	if (ret)
3336 		return ret;
3337 
3338 	reg &= ~clr;
3339 	reg |= set;
3340 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3341 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3342 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3343 
3344 	if (ret)
3345 		dev_err(smmu->dev, "GBPA not responding to update\n");
3346 	return ret;
3347 }
3348 
3349 static void arm_smmu_free_msis(void *data)
3350 {
3351 	struct device *dev = data;
3352 
3353 	platform_device_msi_free_irqs_all(dev);
3354 }
3355 
3356 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3357 {
3358 	phys_addr_t doorbell;
3359 	struct device *dev = msi_desc_to_dev(desc);
3360 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3361 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3362 
3363 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3364 	doorbell &= MSI_CFG0_ADDR_MASK;
3365 
3366 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3367 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3368 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3369 }
3370 
3371 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3372 {
3373 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3374 	struct device *dev = smmu->dev;
3375 
3376 	/* Clear the MSI address regs */
3377 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3378 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3379 
3380 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3381 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3382 	else
3383 		nvec--;
3384 
3385 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3386 		return;
3387 
3388 	if (!dev->msi.domain) {
3389 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3390 		return;
3391 	}
3392 
3393 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3394 	ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3395 	if (ret) {
3396 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3397 		return;
3398 	}
3399 
3400 	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3401 	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3402 	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3403 
3404 	/* Add callback to free MSIs on teardown */
3405 	devm_add_action(dev, arm_smmu_free_msis, dev);
3406 }
3407 
3408 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3409 {
3410 	int irq, ret;
3411 
3412 	arm_smmu_setup_msis(smmu);
3413 
3414 	/* Request interrupt lines */
3415 	irq = smmu->evtq.q.irq;
3416 	if (irq) {
3417 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3418 						arm_smmu_evtq_thread,
3419 						IRQF_ONESHOT,
3420 						"arm-smmu-v3-evtq", smmu);
3421 		if (ret < 0)
3422 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3423 	} else {
3424 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3425 	}
3426 
3427 	irq = smmu->gerr_irq;
3428 	if (irq) {
3429 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3430 				       0, "arm-smmu-v3-gerror", smmu);
3431 		if (ret < 0)
3432 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3433 	} else {
3434 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3435 	}
3436 
3437 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3438 		irq = smmu->priq.q.irq;
3439 		if (irq) {
3440 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3441 							arm_smmu_priq_thread,
3442 							IRQF_ONESHOT,
3443 							"arm-smmu-v3-priq",
3444 							smmu);
3445 			if (ret < 0)
3446 				dev_warn(smmu->dev,
3447 					 "failed to enable priq irq\n");
3448 		} else {
3449 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3450 		}
3451 	}
3452 }
3453 
3454 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3455 {
3456 	int ret, irq;
3457 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3458 
3459 	/* Disable IRQs first */
3460 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3461 				      ARM_SMMU_IRQ_CTRLACK);
3462 	if (ret) {
3463 		dev_err(smmu->dev, "failed to disable irqs\n");
3464 		return ret;
3465 	}
3466 
3467 	irq = smmu->combined_irq;
3468 	if (irq) {
3469 		/*
3470 		 * Cavium ThunderX2 implementation doesn't support unique irq
3471 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3472 		 */
3473 		ret = devm_request_threaded_irq(smmu->dev, irq,
3474 					arm_smmu_combined_irq_handler,
3475 					arm_smmu_combined_irq_thread,
3476 					IRQF_ONESHOT,
3477 					"arm-smmu-v3-combined-irq", smmu);
3478 		if (ret < 0)
3479 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3480 	} else
3481 		arm_smmu_setup_unique_irqs(smmu);
3482 
3483 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3484 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3485 
3486 	/* Enable interrupt generation on the SMMU */
3487 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3488 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3489 	if (ret)
3490 		dev_warn(smmu->dev, "failed to enable irqs\n");
3491 
3492 	return 0;
3493 }
3494 
3495 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3496 {
3497 	int ret;
3498 
3499 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3500 	if (ret)
3501 		dev_err(smmu->dev, "failed to clear cr0\n");
3502 
3503 	return ret;
3504 }
3505 
3506 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3507 {
3508 	int ret;
3509 	u32 reg, enables;
3510 	struct arm_smmu_cmdq_ent cmd;
3511 
3512 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3513 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3514 	if (reg & CR0_SMMUEN) {
3515 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3516 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3517 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3518 	}
3519 
3520 	ret = arm_smmu_device_disable(smmu);
3521 	if (ret)
3522 		return ret;
3523 
3524 	/* CR1 (table and queue memory attributes) */
3525 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3526 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3527 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3528 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3529 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3530 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3531 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3532 
3533 	/* CR2 (random crap) */
3534 	reg = CR2_PTM | CR2_RECINVSID;
3535 
3536 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3537 		reg |= CR2_E2H;
3538 
3539 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3540 
3541 	/* Stream table */
3542 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3543 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3544 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3545 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3546 
3547 	/* Command queue */
3548 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3549 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3550 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3551 
3552 	enables = CR0_CMDQEN;
3553 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3554 				      ARM_SMMU_CR0ACK);
3555 	if (ret) {
3556 		dev_err(smmu->dev, "failed to enable command queue\n");
3557 		return ret;
3558 	}
3559 
3560 	/* Invalidate any cached configuration */
3561 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3562 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3563 
3564 	/* Invalidate any stale TLB entries */
3565 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3566 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3567 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3568 	}
3569 
3570 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3571 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3572 
3573 	/* Event queue */
3574 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3575 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3576 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3577 
3578 	enables |= CR0_EVTQEN;
3579 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3580 				      ARM_SMMU_CR0ACK);
3581 	if (ret) {
3582 		dev_err(smmu->dev, "failed to enable event queue\n");
3583 		return ret;
3584 	}
3585 
3586 	/* PRI queue */
3587 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3588 		writeq_relaxed(smmu->priq.q.q_base,
3589 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3590 		writel_relaxed(smmu->priq.q.llq.prod,
3591 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3592 		writel_relaxed(smmu->priq.q.llq.cons,
3593 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3594 
3595 		enables |= CR0_PRIQEN;
3596 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3597 					      ARM_SMMU_CR0ACK);
3598 		if (ret) {
3599 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3600 			return ret;
3601 		}
3602 	}
3603 
3604 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3605 		enables |= CR0_ATSCHK;
3606 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3607 					      ARM_SMMU_CR0ACK);
3608 		if (ret) {
3609 			dev_err(smmu->dev, "failed to enable ATS check\n");
3610 			return ret;
3611 		}
3612 	}
3613 
3614 	ret = arm_smmu_setup_irqs(smmu);
3615 	if (ret) {
3616 		dev_err(smmu->dev, "failed to setup irqs\n");
3617 		return ret;
3618 	}
3619 
3620 	if (is_kdump_kernel())
3621 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3622 
3623 	/* Enable the SMMU interface, or ensure bypass */
3624 	if (!bypass || disable_bypass) {
3625 		enables |= CR0_SMMUEN;
3626 	} else {
3627 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3628 		if (ret)
3629 			return ret;
3630 	}
3631 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3632 				      ARM_SMMU_CR0ACK);
3633 	if (ret) {
3634 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3635 		return ret;
3636 	}
3637 
3638 	return 0;
3639 }
3640 
3641 #define IIDR_IMPLEMENTER_ARM		0x43b
3642 #define IIDR_PRODUCTID_ARM_MMU_600	0x483
3643 #define IIDR_PRODUCTID_ARM_MMU_700	0x487
3644 
3645 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3646 {
3647 	u32 reg;
3648 	unsigned int implementer, productid, variant, revision;
3649 
3650 	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3651 	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3652 	productid = FIELD_GET(IIDR_PRODUCTID, reg);
3653 	variant = FIELD_GET(IIDR_VARIANT, reg);
3654 	revision = FIELD_GET(IIDR_REVISION, reg);
3655 
3656 	switch (implementer) {
3657 	case IIDR_IMPLEMENTER_ARM:
3658 		switch (productid) {
3659 		case IIDR_PRODUCTID_ARM_MMU_600:
3660 			/* Arm erratum 1076982 */
3661 			if (variant == 0 && revision <= 2)
3662 				smmu->features &= ~ARM_SMMU_FEAT_SEV;
3663 			/* Arm erratum 1209401 */
3664 			if (variant < 2)
3665 				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3666 			break;
3667 		case IIDR_PRODUCTID_ARM_MMU_700:
3668 			/* Arm erratum 2812531 */
3669 			smmu->features &= ~ARM_SMMU_FEAT_BTM;
3670 			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3671 			/* Arm errata 2268618, 2812531 */
3672 			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3673 			break;
3674 		}
3675 		break;
3676 	}
3677 }
3678 
3679 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3680 {
3681 	u32 reg;
3682 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3683 
3684 	/* IDR0 */
3685 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3686 
3687 	/* 2-level structures */
3688 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3689 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3690 
3691 	if (reg & IDR0_CD2L)
3692 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3693 
3694 	/*
3695 	 * Translation table endianness.
3696 	 * We currently require the same endianness as the CPU, but this
3697 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3698 	 */
3699 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3700 	case IDR0_TTENDIAN_MIXED:
3701 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3702 		break;
3703 #ifdef __BIG_ENDIAN
3704 	case IDR0_TTENDIAN_BE:
3705 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3706 		break;
3707 #else
3708 	case IDR0_TTENDIAN_LE:
3709 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3710 		break;
3711 #endif
3712 	default:
3713 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3714 		return -ENXIO;
3715 	}
3716 
3717 	/* Boolean feature flags */
3718 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3719 		smmu->features |= ARM_SMMU_FEAT_PRI;
3720 
3721 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3722 		smmu->features |= ARM_SMMU_FEAT_ATS;
3723 
3724 	if (reg & IDR0_SEV)
3725 		smmu->features |= ARM_SMMU_FEAT_SEV;
3726 
3727 	if (reg & IDR0_MSI) {
3728 		smmu->features |= ARM_SMMU_FEAT_MSI;
3729 		if (coherent && !disable_msipolling)
3730 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3731 	}
3732 
3733 	if (reg & IDR0_HYP) {
3734 		smmu->features |= ARM_SMMU_FEAT_HYP;
3735 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3736 			smmu->features |= ARM_SMMU_FEAT_E2H;
3737 	}
3738 
3739 	/*
3740 	 * The coherency feature as set by FW is used in preference to the ID
3741 	 * register, but warn on mismatch.
3742 	 */
3743 	if (!!(reg & IDR0_COHACC) != coherent)
3744 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3745 			 coherent ? "true" : "false");
3746 
3747 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3748 	case IDR0_STALL_MODEL_FORCE:
3749 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3750 		fallthrough;
3751 	case IDR0_STALL_MODEL_STALL:
3752 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3753 	}
3754 
3755 	if (reg & IDR0_S1P)
3756 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3757 
3758 	if (reg & IDR0_S2P)
3759 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3760 
3761 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3762 		dev_err(smmu->dev, "no translation support!\n");
3763 		return -ENXIO;
3764 	}
3765 
3766 	/* We only support the AArch64 table format at present */
3767 	switch (FIELD_GET(IDR0_TTF, reg)) {
3768 	case IDR0_TTF_AARCH32_64:
3769 		smmu->ias = 40;
3770 		fallthrough;
3771 	case IDR0_TTF_AARCH64:
3772 		break;
3773 	default:
3774 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3775 		return -ENXIO;
3776 	}
3777 
3778 	/* ASID/VMID sizes */
3779 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3780 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3781 
3782 	/* IDR1 */
3783 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3784 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3785 		dev_err(smmu->dev, "embedded implementation not supported\n");
3786 		return -ENXIO;
3787 	}
3788 
3789 	if (reg & IDR1_ATTR_TYPES_OVR)
3790 		smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
3791 
3792 	/* Queue sizes, capped to ensure natural alignment */
3793 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3794 					     FIELD_GET(IDR1_CMDQS, reg));
3795 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3796 		/*
3797 		 * We don't support splitting up batches, so one batch of
3798 		 * commands plus an extra sync needs to fit inside the command
3799 		 * queue. There's also no way we can handle the weird alignment
3800 		 * restrictions on the base pointer for a unit-length queue.
3801 		 */
3802 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3803 			CMDQ_BATCH_ENTRIES);
3804 		return -ENXIO;
3805 	}
3806 
3807 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3808 					     FIELD_GET(IDR1_EVTQS, reg));
3809 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3810 					     FIELD_GET(IDR1_PRIQS, reg));
3811 
3812 	/* SID/SSID sizes */
3813 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3814 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3815 	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3816 
3817 	/*
3818 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3819 	 * table, use a linear table instead.
3820 	 */
3821 	if (smmu->sid_bits <= STRTAB_SPLIT)
3822 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3823 
3824 	/* IDR3 */
3825 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3826 	if (FIELD_GET(IDR3_RIL, reg))
3827 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3828 
3829 	/* IDR5 */
3830 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3831 
3832 	/* Maximum number of outstanding stalls */
3833 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3834 
3835 	/* Page sizes */
3836 	if (reg & IDR5_GRAN64K)
3837 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3838 	if (reg & IDR5_GRAN16K)
3839 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3840 	if (reg & IDR5_GRAN4K)
3841 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3842 
3843 	/* Input address size */
3844 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3845 		smmu->features |= ARM_SMMU_FEAT_VAX;
3846 
3847 	/* Output address size */
3848 	switch (FIELD_GET(IDR5_OAS, reg)) {
3849 	case IDR5_OAS_32_BIT:
3850 		smmu->oas = 32;
3851 		break;
3852 	case IDR5_OAS_36_BIT:
3853 		smmu->oas = 36;
3854 		break;
3855 	case IDR5_OAS_40_BIT:
3856 		smmu->oas = 40;
3857 		break;
3858 	case IDR5_OAS_42_BIT:
3859 		smmu->oas = 42;
3860 		break;
3861 	case IDR5_OAS_44_BIT:
3862 		smmu->oas = 44;
3863 		break;
3864 	case IDR5_OAS_52_BIT:
3865 		smmu->oas = 52;
3866 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3867 		break;
3868 	default:
3869 		dev_info(smmu->dev,
3870 			"unknown output address size. Truncating to 48-bit\n");
3871 		fallthrough;
3872 	case IDR5_OAS_48_BIT:
3873 		smmu->oas = 48;
3874 	}
3875 
3876 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3877 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3878 	else
3879 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3880 
3881 	/* Set the DMA mask for our table walker */
3882 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3883 		dev_warn(smmu->dev,
3884 			 "failed to set DMA mask for table walker\n");
3885 
3886 	smmu->ias = max(smmu->ias, smmu->oas);
3887 
3888 	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3889 	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3890 		smmu->features |= ARM_SMMU_FEAT_NESTING;
3891 
3892 	arm_smmu_device_iidr_probe(smmu);
3893 
3894 	if (arm_smmu_sva_supported(smmu))
3895 		smmu->features |= ARM_SMMU_FEAT_SVA;
3896 
3897 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3898 		 smmu->ias, smmu->oas, smmu->features);
3899 	return 0;
3900 }
3901 
3902 #ifdef CONFIG_ACPI
3903 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3904 {
3905 	switch (model) {
3906 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3907 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3908 		break;
3909 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3910 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3911 		break;
3912 	}
3913 
3914 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3915 }
3916 
3917 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3918 				      struct arm_smmu_device *smmu)
3919 {
3920 	struct acpi_iort_smmu_v3 *iort_smmu;
3921 	struct device *dev = smmu->dev;
3922 	struct acpi_iort_node *node;
3923 
3924 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3925 
3926 	/* Retrieve SMMUv3 specific data */
3927 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3928 
3929 	acpi_smmu_get_options(iort_smmu->model, smmu);
3930 
3931 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3932 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3933 
3934 	return 0;
3935 }
3936 #else
3937 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3938 					     struct arm_smmu_device *smmu)
3939 {
3940 	return -ENODEV;
3941 }
3942 #endif
3943 
3944 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3945 				    struct arm_smmu_device *smmu)
3946 {
3947 	struct device *dev = &pdev->dev;
3948 	u32 cells;
3949 	int ret = -EINVAL;
3950 
3951 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3952 		dev_err(dev, "missing #iommu-cells property\n");
3953 	else if (cells != 1)
3954 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3955 	else
3956 		ret = 0;
3957 
3958 	parse_driver_options(smmu);
3959 
3960 	if (of_dma_is_coherent(dev->of_node))
3961 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3962 
3963 	return ret;
3964 }
3965 
3966 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3967 {
3968 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3969 		return SZ_64K;
3970 	else
3971 		return SZ_128K;
3972 }
3973 
3974 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3975 				      resource_size_t size)
3976 {
3977 	struct resource res = DEFINE_RES_MEM(start, size);
3978 
3979 	return devm_ioremap_resource(dev, &res);
3980 }
3981 
3982 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3983 {
3984 	struct list_head rmr_list;
3985 	struct iommu_resv_region *e;
3986 
3987 	INIT_LIST_HEAD(&rmr_list);
3988 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3989 
3990 	list_for_each_entry(e, &rmr_list, list) {
3991 		struct iommu_iort_rmr_data *rmr;
3992 		int ret, i;
3993 
3994 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3995 		for (i = 0; i < rmr->num_sids; i++) {
3996 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3997 			if (ret) {
3998 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3999 					rmr->sids[i]);
4000 				continue;
4001 			}
4002 
4003 			/*
4004 			 * STE table is not programmed to HW, see
4005 			 * arm_smmu_initial_bypass_stes()
4006 			 */
4007 			arm_smmu_make_bypass_ste(smmu,
4008 				arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
4009 		}
4010 	}
4011 
4012 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4013 }
4014 
4015 static int arm_smmu_device_probe(struct platform_device *pdev)
4016 {
4017 	int irq, ret;
4018 	struct resource *res;
4019 	resource_size_t ioaddr;
4020 	struct arm_smmu_device *smmu;
4021 	struct device *dev = &pdev->dev;
4022 	bool bypass;
4023 
4024 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4025 	if (!smmu)
4026 		return -ENOMEM;
4027 	smmu->dev = dev;
4028 
4029 	if (dev->of_node) {
4030 		ret = arm_smmu_device_dt_probe(pdev, smmu);
4031 	} else {
4032 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
4033 		if (ret == -ENODEV)
4034 			return ret;
4035 	}
4036 
4037 	/* Set bypass mode according to firmware probing result */
4038 	bypass = !!ret;
4039 
4040 	/* Base address */
4041 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4042 	if (!res)
4043 		return -EINVAL;
4044 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4045 		dev_err(dev, "MMIO region too small (%pr)\n", res);
4046 		return -EINVAL;
4047 	}
4048 	ioaddr = res->start;
4049 
4050 	/*
4051 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4052 	 * the PMCG registers which are reserved by the PMU driver.
4053 	 */
4054 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4055 	if (IS_ERR(smmu->base))
4056 		return PTR_ERR(smmu->base);
4057 
4058 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
4059 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4060 					       ARM_SMMU_REG_SZ);
4061 		if (IS_ERR(smmu->page1))
4062 			return PTR_ERR(smmu->page1);
4063 	} else {
4064 		smmu->page1 = smmu->base;
4065 	}
4066 
4067 	/* Interrupt lines */
4068 
4069 	irq = platform_get_irq_byname_optional(pdev, "combined");
4070 	if (irq > 0)
4071 		smmu->combined_irq = irq;
4072 	else {
4073 		irq = platform_get_irq_byname_optional(pdev, "eventq");
4074 		if (irq > 0)
4075 			smmu->evtq.q.irq = irq;
4076 
4077 		irq = platform_get_irq_byname_optional(pdev, "priq");
4078 		if (irq > 0)
4079 			smmu->priq.q.irq = irq;
4080 
4081 		irq = platform_get_irq_byname_optional(pdev, "gerror");
4082 		if (irq > 0)
4083 			smmu->gerr_irq = irq;
4084 	}
4085 	/* Probe the h/w */
4086 	ret = arm_smmu_device_hw_probe(smmu);
4087 	if (ret)
4088 		return ret;
4089 
4090 	/* Initialise in-memory data structures */
4091 	ret = arm_smmu_init_structures(smmu);
4092 	if (ret)
4093 		return ret;
4094 
4095 	/* Record our private device structure */
4096 	platform_set_drvdata(pdev, smmu);
4097 
4098 	/* Check for RMRs and install bypass STEs if any */
4099 	arm_smmu_rmr_install_bypass_ste(smmu);
4100 
4101 	/* Reset the device */
4102 	ret = arm_smmu_device_reset(smmu, bypass);
4103 	if (ret)
4104 		return ret;
4105 
4106 	/* And we're up. Go go go! */
4107 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4108 				     "smmu3.%pa", &ioaddr);
4109 	if (ret)
4110 		return ret;
4111 
4112 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
4113 	if (ret) {
4114 		dev_err(dev, "Failed to register iommu\n");
4115 		iommu_device_sysfs_remove(&smmu->iommu);
4116 		return ret;
4117 	}
4118 
4119 	return 0;
4120 }
4121 
4122 static void arm_smmu_device_remove(struct platform_device *pdev)
4123 {
4124 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4125 
4126 	iommu_device_unregister(&smmu->iommu);
4127 	iommu_device_sysfs_remove(&smmu->iommu);
4128 	arm_smmu_device_disable(smmu);
4129 	iopf_queue_free(smmu->evtq.iopf);
4130 	ida_destroy(&smmu->vmid_map);
4131 }
4132 
4133 static void arm_smmu_device_shutdown(struct platform_device *pdev)
4134 {
4135 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4136 
4137 	arm_smmu_device_disable(smmu);
4138 }
4139 
4140 static const struct of_device_id arm_smmu_of_match[] = {
4141 	{ .compatible = "arm,smmu-v3", },
4142 	{ },
4143 };
4144 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4145 
4146 static void arm_smmu_driver_unregister(struct platform_driver *drv)
4147 {
4148 	arm_smmu_sva_notifier_synchronize();
4149 	platform_driver_unregister(drv);
4150 }
4151 
4152 static struct platform_driver arm_smmu_driver = {
4153 	.driver	= {
4154 		.name			= "arm-smmu-v3",
4155 		.of_match_table		= arm_smmu_of_match,
4156 		.suppress_bind_attrs	= true,
4157 	},
4158 	.probe	= arm_smmu_device_probe,
4159 	.remove_new = arm_smmu_device_remove,
4160 	.shutdown = arm_smmu_device_shutdown,
4161 };
4162 module_driver(arm_smmu_driver, platform_driver_register,
4163 	      arm_smmu_driver_unregister);
4164 
4165 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4166 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4167 MODULE_ALIAS("platform:arm-smmu-v3");
4168 MODULE_LICENSE("GPL v2");
4169