xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 001821b0e79716c4e17c71d8e053a23599a7a508)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 #include <kunit/visibility.h>
30 
31 #include "arm-smmu-v3.h"
32 #include "../../dma-iommu.h"
33 
34 static bool disable_msipolling;
35 module_param(disable_msipolling, bool, 0444);
36 MODULE_PARM_DESC(disable_msipolling,
37 	"Disable MSI-based polling for CMD_SYNC completion.");
38 
39 enum arm_smmu_msi_index {
40 	EVTQ_MSI_INDEX,
41 	GERROR_MSI_INDEX,
42 	PRIQ_MSI_INDEX,
43 	ARM_SMMU_MAX_MSIS,
44 };
45 
46 #define NUM_ENTRY_QWORDS 8
47 static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64));
48 static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64));
49 
50 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
51 	[EVTQ_MSI_INDEX] = {
52 		ARM_SMMU_EVTQ_IRQ_CFG0,
53 		ARM_SMMU_EVTQ_IRQ_CFG1,
54 		ARM_SMMU_EVTQ_IRQ_CFG2,
55 	},
56 	[GERROR_MSI_INDEX] = {
57 		ARM_SMMU_GERROR_IRQ_CFG0,
58 		ARM_SMMU_GERROR_IRQ_CFG1,
59 		ARM_SMMU_GERROR_IRQ_CFG2,
60 	},
61 	[PRIQ_MSI_INDEX] = {
62 		ARM_SMMU_PRIQ_IRQ_CFG0,
63 		ARM_SMMU_PRIQ_IRQ_CFG1,
64 		ARM_SMMU_PRIQ_IRQ_CFG2,
65 	},
66 };
67 
68 struct arm_smmu_option_prop {
69 	u32 opt;
70 	const char *prop;
71 };
72 
73 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
74 DEFINE_MUTEX(arm_smmu_asid_lock);
75 
76 static struct arm_smmu_option_prop arm_smmu_options[] = {
77 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
78 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
79 	{ 0, NULL},
80 };
81 
82 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
83 				    struct arm_smmu_device *smmu);
84 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
85 
86 static void parse_driver_options(struct arm_smmu_device *smmu)
87 {
88 	int i = 0;
89 
90 	do {
91 		if (of_property_read_bool(smmu->dev->of_node,
92 						arm_smmu_options[i].prop)) {
93 			smmu->options |= arm_smmu_options[i].opt;
94 			dev_notice(smmu->dev, "option %s\n",
95 				arm_smmu_options[i].prop);
96 		}
97 	} while (arm_smmu_options[++i].opt);
98 }
99 
100 /* Low-level queue manipulation functions */
101 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
102 {
103 	u32 space, prod, cons;
104 
105 	prod = Q_IDX(q, q->prod);
106 	cons = Q_IDX(q, q->cons);
107 
108 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
109 		space = (1 << q->max_n_shift) - (prod - cons);
110 	else
111 		space = cons - prod;
112 
113 	return space >= n;
114 }
115 
116 static bool queue_full(struct arm_smmu_ll_queue *q)
117 {
118 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
119 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
120 }
121 
122 static bool queue_empty(struct arm_smmu_ll_queue *q)
123 {
124 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
125 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
126 }
127 
128 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
129 {
130 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
131 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
132 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
133 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
134 }
135 
136 static void queue_sync_cons_out(struct arm_smmu_queue *q)
137 {
138 	/*
139 	 * Ensure that all CPU accesses (reads and writes) to the queue
140 	 * are complete before we update the cons pointer.
141 	 */
142 	__iomb();
143 	writel_relaxed(q->llq.cons, q->cons_reg);
144 }
145 
146 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
147 {
148 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
149 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
150 }
151 
152 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
153 {
154 	struct arm_smmu_ll_queue *llq = &q->llq;
155 
156 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
157 		return;
158 
159 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
160 		      Q_IDX(llq, llq->cons);
161 	queue_sync_cons_out(q);
162 }
163 
164 static int queue_sync_prod_in(struct arm_smmu_queue *q)
165 {
166 	u32 prod;
167 	int ret = 0;
168 
169 	/*
170 	 * We can't use the _relaxed() variant here, as we must prevent
171 	 * speculative reads of the queue before we have determined that
172 	 * prod has indeed moved.
173 	 */
174 	prod = readl(q->prod_reg);
175 
176 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
177 		ret = -EOVERFLOW;
178 
179 	q->llq.prod = prod;
180 	return ret;
181 }
182 
183 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
184 {
185 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
186 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
187 }
188 
189 static void queue_poll_init(struct arm_smmu_device *smmu,
190 			    struct arm_smmu_queue_poll *qp)
191 {
192 	qp->delay = 1;
193 	qp->spin_cnt = 0;
194 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
195 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
196 }
197 
198 static int queue_poll(struct arm_smmu_queue_poll *qp)
199 {
200 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
201 		return -ETIMEDOUT;
202 
203 	if (qp->wfe) {
204 		wfe();
205 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
206 		cpu_relax();
207 	} else {
208 		udelay(qp->delay);
209 		qp->delay *= 2;
210 		qp->spin_cnt = 0;
211 	}
212 
213 	return 0;
214 }
215 
216 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
217 {
218 	int i;
219 
220 	for (i = 0; i < n_dwords; ++i)
221 		*dst++ = cpu_to_le64(*src++);
222 }
223 
224 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
225 {
226 	int i;
227 
228 	for (i = 0; i < n_dwords; ++i)
229 		*dst++ = le64_to_cpu(*src++);
230 }
231 
232 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
233 {
234 	if (queue_empty(&q->llq))
235 		return -EAGAIN;
236 
237 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
238 	queue_inc_cons(&q->llq);
239 	queue_sync_cons_out(q);
240 	return 0;
241 }
242 
243 /* High-level queue accessors */
244 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
245 {
246 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
247 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
248 
249 	switch (ent->opcode) {
250 	case CMDQ_OP_TLBI_EL2_ALL:
251 	case CMDQ_OP_TLBI_NSNH_ALL:
252 		break;
253 	case CMDQ_OP_PREFETCH_CFG:
254 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
255 		break;
256 	case CMDQ_OP_CFGI_CD:
257 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
258 		fallthrough;
259 	case CMDQ_OP_CFGI_STE:
260 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
261 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
262 		break;
263 	case CMDQ_OP_CFGI_CD_ALL:
264 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
265 		break;
266 	case CMDQ_OP_CFGI_ALL:
267 		/* Cover the entire SID range */
268 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
269 		break;
270 	case CMDQ_OP_TLBI_NH_VA:
271 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
272 		fallthrough;
273 	case CMDQ_OP_TLBI_EL2_VA:
274 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
275 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
277 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
278 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
279 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
280 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
281 		break;
282 	case CMDQ_OP_TLBI_S2_IPA:
283 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
284 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
285 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
286 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
287 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
288 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
289 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
290 		break;
291 	case CMDQ_OP_TLBI_NH_ASID:
292 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
293 		fallthrough;
294 	case CMDQ_OP_TLBI_S12_VMALL:
295 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
296 		break;
297 	case CMDQ_OP_TLBI_EL2_ASID:
298 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
299 		break;
300 	case CMDQ_OP_ATC_INV:
301 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
302 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
303 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
304 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
305 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
306 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
307 		break;
308 	case CMDQ_OP_PRI_RESP:
309 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
310 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
311 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
312 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
313 		switch (ent->pri.resp) {
314 		case PRI_RESP_DENY:
315 		case PRI_RESP_FAIL:
316 		case PRI_RESP_SUCC:
317 			break;
318 		default:
319 			return -EINVAL;
320 		}
321 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
322 		break;
323 	case CMDQ_OP_RESUME:
324 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
325 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
326 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
327 		break;
328 	case CMDQ_OP_CMD_SYNC:
329 		if (ent->sync.msiaddr) {
330 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
331 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
332 		} else {
333 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
334 		}
335 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
336 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
337 		break;
338 	default:
339 		return -ENOENT;
340 	}
341 
342 	return 0;
343 }
344 
345 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
346 {
347 	return &smmu->cmdq;
348 }
349 
350 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
351 					 struct arm_smmu_queue *q, u32 prod)
352 {
353 	struct arm_smmu_cmdq_ent ent = {
354 		.opcode = CMDQ_OP_CMD_SYNC,
355 	};
356 
357 	/*
358 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
359 	 * payload, so the write will zero the entire command on that platform.
360 	 */
361 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
362 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
363 				   q->ent_dwords * 8;
364 	}
365 
366 	arm_smmu_cmdq_build_cmd(cmd, &ent);
367 }
368 
369 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
370 				     struct arm_smmu_queue *q)
371 {
372 	static const char * const cerror_str[] = {
373 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
374 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
375 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
376 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
377 	};
378 
379 	int i;
380 	u64 cmd[CMDQ_ENT_DWORDS];
381 	u32 cons = readl_relaxed(q->cons_reg);
382 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
383 	struct arm_smmu_cmdq_ent cmd_sync = {
384 		.opcode = CMDQ_OP_CMD_SYNC,
385 	};
386 
387 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
388 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
389 
390 	switch (idx) {
391 	case CMDQ_ERR_CERROR_ABT_IDX:
392 		dev_err(smmu->dev, "retrying command fetch\n");
393 		return;
394 	case CMDQ_ERR_CERROR_NONE_IDX:
395 		return;
396 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
397 		/*
398 		 * ATC Invalidation Completion timeout. CONS is still pointing
399 		 * at the CMD_SYNC. Attempt to complete other pending commands
400 		 * by repeating the CMD_SYNC, though we might well end up back
401 		 * here since the ATC invalidation may still be pending.
402 		 */
403 		return;
404 	case CMDQ_ERR_CERROR_ILL_IDX:
405 	default:
406 		break;
407 	}
408 
409 	/*
410 	 * We may have concurrent producers, so we need to be careful
411 	 * not to touch any of the shadow cmdq state.
412 	 */
413 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
414 	dev_err(smmu->dev, "skipping command in error state:\n");
415 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
416 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
417 
418 	/* Convert the erroneous command into a CMD_SYNC */
419 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
420 
421 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
422 }
423 
424 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
425 {
426 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
427 }
428 
429 /*
430  * Command queue locking.
431  * This is a form of bastardised rwlock with the following major changes:
432  *
433  * - The only LOCK routines are exclusive_trylock() and shared_lock().
434  *   Neither have barrier semantics, and instead provide only a control
435  *   dependency.
436  *
437  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
438  *   fails if the caller appears to be the last lock holder (yes, this is
439  *   racy). All successful UNLOCK routines have RELEASE semantics.
440  */
441 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
442 {
443 	int val;
444 
445 	/*
446 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
447 	 * lock counter. When held in exclusive state, the lock counter is set
448 	 * to INT_MIN so these increments won't hurt as the value will remain
449 	 * negative.
450 	 */
451 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
452 		return;
453 
454 	do {
455 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
456 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
457 }
458 
459 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
460 {
461 	(void)atomic_dec_return_release(&cmdq->lock);
462 }
463 
464 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
465 {
466 	if (atomic_read(&cmdq->lock) == 1)
467 		return false;
468 
469 	arm_smmu_cmdq_shared_unlock(cmdq);
470 	return true;
471 }
472 
473 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
474 ({									\
475 	bool __ret;							\
476 	local_irq_save(flags);						\
477 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
478 	if (!__ret)							\
479 		local_irq_restore(flags);				\
480 	__ret;								\
481 })
482 
483 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
484 ({									\
485 	atomic_set_release(&cmdq->lock, 0);				\
486 	local_irq_restore(flags);					\
487 })
488 
489 
490 /*
491  * Command queue insertion.
492  * This is made fiddly by our attempts to achieve some sort of scalability
493  * since there is one queue shared amongst all of the CPUs in the system.  If
494  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
495  * then you'll *love* this monstrosity.
496  *
497  * The basic idea is to split the queue up into ranges of commands that are
498  * owned by a given CPU; the owner may not have written all of the commands
499  * itself, but is responsible for advancing the hardware prod pointer when
500  * the time comes. The algorithm is roughly:
501  *
502  * 	1. Allocate some space in the queue. At this point we also discover
503  *	   whether the head of the queue is currently owned by another CPU,
504  *	   or whether we are the owner.
505  *
506  *	2. Write our commands into our allocated slots in the queue.
507  *
508  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
509  *
510  *	4. If we are an owner:
511  *		a. Wait for the previous owner to finish.
512  *		b. Mark the queue head as unowned, which tells us the range
513  *		   that we are responsible for publishing.
514  *		c. Wait for all commands in our owned range to become valid.
515  *		d. Advance the hardware prod pointer.
516  *		e. Tell the next owner we've finished.
517  *
518  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
519  *	   owner), then we need to stick around until it has completed:
520  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
521  *		   to clear the first 4 bytes.
522  *		b. Otherwise, we spin waiting for the hardware cons pointer to
523  *		   advance past our command.
524  *
525  * The devil is in the details, particularly the use of locking for handling
526  * SYNC completion and freeing up space in the queue before we think that it is
527  * full.
528  */
529 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
530 					       u32 sprod, u32 eprod, bool set)
531 {
532 	u32 swidx, sbidx, ewidx, ebidx;
533 	struct arm_smmu_ll_queue llq = {
534 		.max_n_shift	= cmdq->q.llq.max_n_shift,
535 		.prod		= sprod,
536 	};
537 
538 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
539 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
540 
541 	while (llq.prod != eprod) {
542 		unsigned long mask;
543 		atomic_long_t *ptr;
544 		u32 limit = BITS_PER_LONG;
545 
546 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
547 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
548 
549 		ptr = &cmdq->valid_map[swidx];
550 
551 		if ((swidx == ewidx) && (sbidx < ebidx))
552 			limit = ebidx;
553 
554 		mask = GENMASK(limit - 1, sbidx);
555 
556 		/*
557 		 * The valid bit is the inverse of the wrap bit. This means
558 		 * that a zero-initialised queue is invalid and, after marking
559 		 * all entries as valid, they become invalid again when we
560 		 * wrap.
561 		 */
562 		if (set) {
563 			atomic_long_xor(mask, ptr);
564 		} else { /* Poll */
565 			unsigned long valid;
566 
567 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
568 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
569 		}
570 
571 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
572 	}
573 }
574 
575 /* Mark all entries in the range [sprod, eprod) as valid */
576 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
577 					u32 sprod, u32 eprod)
578 {
579 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
580 }
581 
582 /* Wait for all entries in the range [sprod, eprod) to become valid */
583 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
584 					 u32 sprod, u32 eprod)
585 {
586 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
587 }
588 
589 /* Wait for the command queue to become non-full */
590 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
591 					     struct arm_smmu_ll_queue *llq)
592 {
593 	unsigned long flags;
594 	struct arm_smmu_queue_poll qp;
595 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
596 	int ret = 0;
597 
598 	/*
599 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
600 	 * that fails, spin until somebody else updates it for us.
601 	 */
602 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
603 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
604 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
605 		llq->val = READ_ONCE(cmdq->q.llq.val);
606 		return 0;
607 	}
608 
609 	queue_poll_init(smmu, &qp);
610 	do {
611 		llq->val = READ_ONCE(cmdq->q.llq.val);
612 		if (!queue_full(llq))
613 			break;
614 
615 		ret = queue_poll(&qp);
616 	} while (!ret);
617 
618 	return ret;
619 }
620 
621 /*
622  * Wait until the SMMU signals a CMD_SYNC completion MSI.
623  * Must be called with the cmdq lock held in some capacity.
624  */
625 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
626 					  struct arm_smmu_ll_queue *llq)
627 {
628 	int ret = 0;
629 	struct arm_smmu_queue_poll qp;
630 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
631 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
632 
633 	queue_poll_init(smmu, &qp);
634 
635 	/*
636 	 * The MSI won't generate an event, since it's being written back
637 	 * into the command queue.
638 	 */
639 	qp.wfe = false;
640 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
641 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
642 	return ret;
643 }
644 
645 /*
646  * Wait until the SMMU cons index passes llq->prod.
647  * Must be called with the cmdq lock held in some capacity.
648  */
649 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
650 					       struct arm_smmu_ll_queue *llq)
651 {
652 	struct arm_smmu_queue_poll qp;
653 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
654 	u32 prod = llq->prod;
655 	int ret = 0;
656 
657 	queue_poll_init(smmu, &qp);
658 	llq->val = READ_ONCE(cmdq->q.llq.val);
659 	do {
660 		if (queue_consumed(llq, prod))
661 			break;
662 
663 		ret = queue_poll(&qp);
664 
665 		/*
666 		 * This needs to be a readl() so that our subsequent call
667 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
668 		 *
669 		 * Specifically, we need to ensure that we observe all
670 		 * shared_lock()s by other CMD_SYNCs that share our owner,
671 		 * so that a failing call to tryunlock() means that we're
672 		 * the last one out and therefore we can safely advance
673 		 * cmdq->q.llq.cons. Roughly speaking:
674 		 *
675 		 * CPU 0		CPU1			CPU2 (us)
676 		 *
677 		 * if (sync)
678 		 * 	shared_lock();
679 		 *
680 		 * dma_wmb();
681 		 * set_valid_map();
682 		 *
683 		 * 			if (owner) {
684 		 *				poll_valid_map();
685 		 *				<control dependency>
686 		 *				writel(prod_reg);
687 		 *
688 		 *						readl(cons_reg);
689 		 *						tryunlock();
690 		 *
691 		 * Requires us to see CPU 0's shared_lock() acquisition.
692 		 */
693 		llq->cons = readl(cmdq->q.cons_reg);
694 	} while (!ret);
695 
696 	return ret;
697 }
698 
699 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
700 					 struct arm_smmu_ll_queue *llq)
701 {
702 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
703 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
704 
705 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
706 }
707 
708 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
709 					u32 prod, int n)
710 {
711 	int i;
712 	struct arm_smmu_ll_queue llq = {
713 		.max_n_shift	= cmdq->q.llq.max_n_shift,
714 		.prod		= prod,
715 	};
716 
717 	for (i = 0; i < n; ++i) {
718 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
719 
720 		prod = queue_inc_prod_n(&llq, i);
721 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
722 	}
723 }
724 
725 /*
726  * This is the actual insertion function, and provides the following
727  * ordering guarantees to callers:
728  *
729  * - There is a dma_wmb() before publishing any commands to the queue.
730  *   This can be relied upon to order prior writes to data structures
731  *   in memory (such as a CD or an STE) before the command.
732  *
733  * - On completion of a CMD_SYNC, there is a control dependency.
734  *   This can be relied upon to order subsequent writes to memory (e.g.
735  *   freeing an IOVA) after completion of the CMD_SYNC.
736  *
737  * - Command insertion is totally ordered, so if two CPUs each race to
738  *   insert their own list of commands then all of the commands from one
739  *   CPU will appear before any of the commands from the other CPU.
740  */
741 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
742 				       u64 *cmds, int n, bool sync)
743 {
744 	u64 cmd_sync[CMDQ_ENT_DWORDS];
745 	u32 prod;
746 	unsigned long flags;
747 	bool owner;
748 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
749 	struct arm_smmu_ll_queue llq, head;
750 	int ret = 0;
751 
752 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
753 
754 	/* 1. Allocate some space in the queue */
755 	local_irq_save(flags);
756 	llq.val = READ_ONCE(cmdq->q.llq.val);
757 	do {
758 		u64 old;
759 
760 		while (!queue_has_space(&llq, n + sync)) {
761 			local_irq_restore(flags);
762 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
763 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
764 			local_irq_save(flags);
765 		}
766 
767 		head.cons = llq.cons;
768 		head.prod = queue_inc_prod_n(&llq, n + sync) |
769 					     CMDQ_PROD_OWNED_FLAG;
770 
771 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
772 		if (old == llq.val)
773 			break;
774 
775 		llq.val = old;
776 	} while (1);
777 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
778 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
779 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
780 
781 	/*
782 	 * 2. Write our commands into the queue
783 	 * Dependency ordering from the cmpxchg() loop above.
784 	 */
785 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
786 	if (sync) {
787 		prod = queue_inc_prod_n(&llq, n);
788 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
789 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
790 
791 		/*
792 		 * In order to determine completion of our CMD_SYNC, we must
793 		 * ensure that the queue can't wrap twice without us noticing.
794 		 * We achieve that by taking the cmdq lock as shared before
795 		 * marking our slot as valid.
796 		 */
797 		arm_smmu_cmdq_shared_lock(cmdq);
798 	}
799 
800 	/* 3. Mark our slots as valid, ensuring commands are visible first */
801 	dma_wmb();
802 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
803 
804 	/* 4. If we are the owner, take control of the SMMU hardware */
805 	if (owner) {
806 		/* a. Wait for previous owner to finish */
807 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
808 
809 		/* b. Stop gathering work by clearing the owned flag */
810 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
811 						   &cmdq->q.llq.atomic.prod);
812 		prod &= ~CMDQ_PROD_OWNED_FLAG;
813 
814 		/*
815 		 * c. Wait for any gathered work to be written to the queue.
816 		 * Note that we read our own entries so that we have the control
817 		 * dependency required by (d).
818 		 */
819 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
820 
821 		/*
822 		 * d. Advance the hardware prod pointer
823 		 * Control dependency ordering from the entries becoming valid.
824 		 */
825 		writel_relaxed(prod, cmdq->q.prod_reg);
826 
827 		/*
828 		 * e. Tell the next owner we're done
829 		 * Make sure we've updated the hardware first, so that we don't
830 		 * race to update prod and potentially move it backwards.
831 		 */
832 		atomic_set_release(&cmdq->owner_prod, prod);
833 	}
834 
835 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
836 	if (sync) {
837 		llq.prod = queue_inc_prod_n(&llq, n);
838 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
839 		if (ret) {
840 			dev_err_ratelimited(smmu->dev,
841 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
842 					    llq.prod,
843 					    readl_relaxed(cmdq->q.prod_reg),
844 					    readl_relaxed(cmdq->q.cons_reg));
845 		}
846 
847 		/*
848 		 * Try to unlock the cmdq lock. This will fail if we're the last
849 		 * reader, in which case we can safely update cmdq->q.llq.cons
850 		 */
851 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
852 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
853 			arm_smmu_cmdq_shared_unlock(cmdq);
854 		}
855 	}
856 
857 	local_irq_restore(flags);
858 	return ret;
859 }
860 
861 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
862 				     struct arm_smmu_cmdq_ent *ent,
863 				     bool sync)
864 {
865 	u64 cmd[CMDQ_ENT_DWORDS];
866 
867 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
868 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
869 			 ent->opcode);
870 		return -EINVAL;
871 	}
872 
873 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
874 }
875 
876 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
877 				   struct arm_smmu_cmdq_ent *ent)
878 {
879 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
880 }
881 
882 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
883 					     struct arm_smmu_cmdq_ent *ent)
884 {
885 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
886 }
887 
888 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
889 				    struct arm_smmu_cmdq_batch *cmds,
890 				    struct arm_smmu_cmdq_ent *cmd)
891 {
892 	int index;
893 
894 	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
895 	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
896 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
897 		cmds->num = 0;
898 	}
899 
900 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
901 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
902 		cmds->num = 0;
903 	}
904 
905 	index = cmds->num * CMDQ_ENT_DWORDS;
906 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
907 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
908 			 cmd->opcode);
909 		return;
910 	}
911 
912 	cmds->num++;
913 }
914 
915 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
916 				      struct arm_smmu_cmdq_batch *cmds)
917 {
918 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
919 }
920 
921 static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
922 				   struct iommu_page_response *resp)
923 {
924 	struct arm_smmu_cmdq_ent cmd = {0};
925 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
926 	int sid = master->streams[0].id;
927 
928 	if (WARN_ON(!master->stall_enabled))
929 		return;
930 
931 	cmd.opcode		= CMDQ_OP_RESUME;
932 	cmd.resume.sid		= sid;
933 	cmd.resume.stag		= resp->grpid;
934 	switch (resp->code) {
935 	case IOMMU_PAGE_RESP_INVALID:
936 	case IOMMU_PAGE_RESP_FAILURE:
937 		cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
938 		break;
939 	case IOMMU_PAGE_RESP_SUCCESS:
940 		cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
941 		break;
942 	default:
943 		break;
944 	}
945 
946 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
947 	/*
948 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
949 	 * RESUME consumption guarantees that the stalled transaction will be
950 	 * terminated... at some point in the future. PRI_RESP is fire and
951 	 * forget.
952 	 */
953 }
954 
955 /* Context descriptor manipulation functions */
956 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
957 {
958 	struct arm_smmu_cmdq_ent cmd = {
959 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
960 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
961 		.tlbi.asid = asid,
962 	};
963 
964 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
965 }
966 
967 /*
968  * Based on the value of ent report which bits of the STE the HW will access. It
969  * would be nice if this was complete according to the spec, but minimally it
970  * has to capture the bits this driver uses.
971  */
972 VISIBLE_IF_KUNIT
973 void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
974 {
975 	unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
976 
977 	used_bits[0] = cpu_to_le64(STRTAB_STE_0_V);
978 	if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V)))
979 		return;
980 
981 	used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
982 
983 	/* S1 translates */
984 	if (cfg & BIT(0)) {
985 		used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
986 					    STRTAB_STE_0_S1CTXPTR_MASK |
987 					    STRTAB_STE_0_S1CDMAX);
988 		used_bits[1] |=
989 			cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
990 				    STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
991 				    STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
992 				    STRTAB_STE_1_EATS);
993 		used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
994 	}
995 
996 	/* S2 translates */
997 	if (cfg & BIT(1)) {
998 		used_bits[1] |=
999 			cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
1000 		used_bits[2] |=
1001 			cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
1002 				    STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
1003 				    STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R);
1004 		used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
1005 	}
1006 
1007 	if (cfg == STRTAB_STE_0_CFG_BYPASS)
1008 		used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1009 }
1010 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
1011 
1012 /*
1013  * Figure out if we can do a hitless update of entry to become target. Returns a
1014  * bit mask where 1 indicates that qword needs to be set disruptively.
1015  * unused_update is an intermediate value of entry that has unused bits set to
1016  * their new values.
1017  */
1018 static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
1019 				    const __le64 *entry, const __le64 *target,
1020 				    __le64 *unused_update)
1021 {
1022 	__le64 target_used[NUM_ENTRY_QWORDS] = {};
1023 	__le64 cur_used[NUM_ENTRY_QWORDS] = {};
1024 	u8 used_qword_diff = 0;
1025 	unsigned int i;
1026 
1027 	writer->ops->get_used(entry, cur_used);
1028 	writer->ops->get_used(target, target_used);
1029 
1030 	for (i = 0; i != NUM_ENTRY_QWORDS; i++) {
1031 		/*
1032 		 * Check that masks are up to date, the make functions are not
1033 		 * allowed to set a bit to 1 if the used function doesn't say it
1034 		 * is used.
1035 		 */
1036 		WARN_ON_ONCE(target[i] & ~target_used[i]);
1037 
1038 		/* Bits can change because they are not currently being used */
1039 		unused_update[i] = (entry[i] & cur_used[i]) |
1040 				   (target[i] & ~cur_used[i]);
1041 		/*
1042 		 * Each bit indicates that a used bit in a qword needs to be
1043 		 * changed after unused_update is applied.
1044 		 */
1045 		if ((unused_update[i] & target_used[i]) != target[i])
1046 			used_qword_diff |= 1 << i;
1047 	}
1048 	return used_qword_diff;
1049 }
1050 
1051 static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
1052 		      const __le64 *target, unsigned int start,
1053 		      unsigned int len)
1054 {
1055 	bool changed = false;
1056 	unsigned int i;
1057 
1058 	for (i = start; len != 0; len--, i++) {
1059 		if (entry[i] != target[i]) {
1060 			WRITE_ONCE(entry[i], target[i]);
1061 			changed = true;
1062 		}
1063 	}
1064 
1065 	if (changed)
1066 		writer->ops->sync(writer);
1067 	return changed;
1068 }
1069 
1070 /*
1071  * Update the STE/CD to the target configuration. The transition from the
1072  * current entry to the target entry takes place over multiple steps that
1073  * attempts to make the transition hitless if possible. This function takes care
1074  * not to create a situation where the HW can perceive a corrupted entry. HW is
1075  * only required to have a 64 bit atomicity with stores from the CPU, while
1076  * entries are many 64 bit values big.
1077  *
1078  * The difference between the current value and the target value is analyzed to
1079  * determine which of three updates are required - disruptive, hitless or no
1080  * change.
1081  *
1082  * In the most general disruptive case we can make any update in three steps:
1083  *  - Disrupting the entry (V=0)
1084  *  - Fill now unused qwords, execpt qword 0 which contains V
1085  *  - Make qword 0 have the final value and valid (V=1) with a single 64
1086  *    bit store
1087  *
1088  * However this disrupts the HW while it is happening. There are several
1089  * interesting cases where a STE/CD can be updated without disturbing the HW
1090  * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
1091  * because the used bits don't intersect. We can detect this by calculating how
1092  * many 64 bit values need update after adjusting the unused bits and skip the
1093  * V=0 process. This relies on the IGNORED behavior described in the
1094  * specification.
1095  */
1096 VISIBLE_IF_KUNIT
1097 void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
1098 			  const __le64 *target)
1099 {
1100 	__le64 unused_update[NUM_ENTRY_QWORDS];
1101 	u8 used_qword_diff;
1102 
1103 	used_qword_diff =
1104 		arm_smmu_entry_qword_diff(writer, entry, target, unused_update);
1105 	if (hweight8(used_qword_diff) == 1) {
1106 		/*
1107 		 * Only one qword needs its used bits to be changed. This is a
1108 		 * hitless update, update all bits the current STE/CD is
1109 		 * ignoring to their new values, then update a single "critical
1110 		 * qword" to change the STE/CD and finally 0 out any bits that
1111 		 * are now unused in the target configuration.
1112 		 */
1113 		unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
1114 
1115 		/*
1116 		 * Skip writing unused bits in the critical qword since we'll be
1117 		 * writing it in the next step anyways. This can save a sync
1118 		 * when the only change is in that qword.
1119 		 */
1120 		unused_update[critical_qword_index] =
1121 			entry[critical_qword_index];
1122 		entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS);
1123 		entry_set(writer, entry, target, critical_qword_index, 1);
1124 		entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
1125 	} else if (used_qword_diff) {
1126 		/*
1127 		 * At least two qwords need their inuse bits to be changed. This
1128 		 * requires a breaking update, zero the V bit, write all qwords
1129 		 * but 0, then set qword 0
1130 		 */
1131 		unused_update[0] = 0;
1132 		entry_set(writer, entry, unused_update, 0, 1);
1133 		entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
1134 		entry_set(writer, entry, target, 0, 1);
1135 	} else {
1136 		/*
1137 		 * No inuse bit changed. Sanity check that all unused bits are 0
1138 		 * in the entry. The target was already sanity checked by
1139 		 * compute_qword_diff().
1140 		 */
1141 		WARN_ON_ONCE(
1142 			entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
1143 	}
1144 }
1145 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
1146 
1147 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
1148 			     int ssid, bool leaf)
1149 {
1150 	size_t i;
1151 	struct arm_smmu_cmdq_batch cmds;
1152 	struct arm_smmu_device *smmu = master->smmu;
1153 	struct arm_smmu_cmdq_ent cmd = {
1154 		.opcode	= CMDQ_OP_CFGI_CD,
1155 		.cfgi	= {
1156 			.ssid	= ssid,
1157 			.leaf	= leaf,
1158 		},
1159 	};
1160 
1161 	cmds.num = 0;
1162 	for (i = 0; i < master->num_streams; i++) {
1163 		cmd.cfgi.sid = master->streams[i].id;
1164 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1165 	}
1166 
1167 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1168 }
1169 
1170 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1171 					struct arm_smmu_l1_ctx_desc *l1_desc)
1172 {
1173 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1174 
1175 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1176 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1177 	if (!l1_desc->l2ptr) {
1178 		dev_warn(smmu->dev,
1179 			 "failed to allocate context descriptor table\n");
1180 		return -ENOMEM;
1181 	}
1182 	return 0;
1183 }
1184 
1185 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1186 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1187 {
1188 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1189 		  CTXDESC_L1_DESC_V;
1190 
1191 	/* The HW has 64 bit atomicity with stores to the L2 CD table */
1192 	WRITE_ONCE(*dst, cpu_to_le64(val));
1193 }
1194 
1195 struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
1196 					u32 ssid)
1197 {
1198 	struct arm_smmu_l1_ctx_desc *l1_desc;
1199 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1200 
1201 	if (!cd_table->cdtab)
1202 		return NULL;
1203 
1204 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1205 		return (struct arm_smmu_cd *)(cd_table->cdtab +
1206 					      ssid * CTXDESC_CD_DWORDS);
1207 
1208 	l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES];
1209 	if (!l1_desc->l2ptr)
1210 		return NULL;
1211 	return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES];
1212 }
1213 
1214 struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
1215 					  u32 ssid)
1216 {
1217 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1218 	struct arm_smmu_device *smmu = master->smmu;
1219 
1220 	might_sleep();
1221 	iommu_group_mutex_assert(master->dev);
1222 
1223 	if (!cd_table->cdtab) {
1224 		if (arm_smmu_alloc_cd_tables(master))
1225 			return NULL;
1226 	}
1227 
1228 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
1229 		unsigned int idx = ssid / CTXDESC_L2_ENTRIES;
1230 		struct arm_smmu_l1_ctx_desc *l1_desc;
1231 
1232 		l1_desc = &cd_table->l1_desc[idx];
1233 		if (!l1_desc->l2ptr) {
1234 			__le64 *l1ptr;
1235 
1236 			if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1237 				return NULL;
1238 
1239 			l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1240 			arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1241 			/* An invalid L1CD can be cached */
1242 			arm_smmu_sync_cd(master, ssid, false);
1243 		}
1244 	}
1245 	return arm_smmu_get_cd_ptr(master, ssid);
1246 }
1247 
1248 struct arm_smmu_cd_writer {
1249 	struct arm_smmu_entry_writer writer;
1250 	unsigned int ssid;
1251 };
1252 
1253 VISIBLE_IF_KUNIT
1254 void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
1255 {
1256 	used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
1257 	if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V)))
1258 		return;
1259 	memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));
1260 
1261 	/*
1262 	 * If EPD0 is set by the make function it means
1263 	 * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
1264 	 */
1265 	if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
1266 		used_bits[0] &= ~cpu_to_le64(
1267 			CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
1268 			CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
1269 			CTXDESC_CD_0_TCR_SH0);
1270 		used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
1271 	}
1272 }
1273 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used);
1274 
1275 static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1276 {
1277 	struct arm_smmu_cd_writer *cd_writer =
1278 		container_of(writer, struct arm_smmu_cd_writer, writer);
1279 
1280 	arm_smmu_sync_cd(writer->master, cd_writer->ssid, true);
1281 }
1282 
1283 static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
1284 	.sync = arm_smmu_cd_writer_sync_entry,
1285 	.get_used = arm_smmu_get_cd_used,
1286 };
1287 
1288 void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
1289 			     struct arm_smmu_cd *cdptr,
1290 			     const struct arm_smmu_cd *target)
1291 {
1292 	struct arm_smmu_cd_writer cd_writer = {
1293 		.writer = {
1294 			.ops = &arm_smmu_cd_writer_ops,
1295 			.master = master,
1296 		},
1297 		.ssid = ssid,
1298 	};
1299 
1300 	arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
1301 }
1302 
1303 void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
1304 			 struct arm_smmu_master *master,
1305 			 struct arm_smmu_domain *smmu_domain)
1306 {
1307 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
1308 	const struct io_pgtable_cfg *pgtbl_cfg =
1309 		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1310 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr =
1311 		&pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1312 
1313 	memset(target, 0, sizeof(*target));
1314 
1315 	target->data[0] = cpu_to_le64(
1316 		FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1317 		FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1318 		FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1319 		FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1320 		FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1321 #ifdef __BIG_ENDIAN
1322 		CTXDESC_CD_0_ENDI |
1323 #endif
1324 		CTXDESC_CD_0_TCR_EPD1 |
1325 		CTXDESC_CD_0_V |
1326 		FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1327 		CTXDESC_CD_0_AA64 |
1328 		(master->stall_enabled ? CTXDESC_CD_0_S : 0) |
1329 		CTXDESC_CD_0_R |
1330 		CTXDESC_CD_0_A |
1331 		CTXDESC_CD_0_ASET |
1332 		FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
1333 		);
1334 	target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr &
1335 				      CTXDESC_CD_1_TTB0_MASK);
1336 	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair);
1337 }
1338 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd);
1339 
1340 void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
1341 {
1342 	struct arm_smmu_cd target = {};
1343 	struct arm_smmu_cd *cdptr;
1344 
1345 	if (!master->cd_table.cdtab)
1346 		return;
1347 	cdptr = arm_smmu_get_cd_ptr(master, ssid);
1348 	if (WARN_ON(!cdptr))
1349 		return;
1350 	arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
1351 }
1352 
1353 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1354 {
1355 	int ret;
1356 	size_t l1size;
1357 	size_t max_contexts;
1358 	struct arm_smmu_device *smmu = master->smmu;
1359 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1360 
1361 	cd_table->s1cdmax = master->ssid_bits;
1362 	max_contexts = 1 << cd_table->s1cdmax;
1363 
1364 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1365 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1366 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1367 		cd_table->num_l1_ents = max_contexts;
1368 
1369 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1370 	} else {
1371 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1372 		cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
1373 						  CTXDESC_L2_ENTRIES);
1374 
1375 		cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
1376 					      sizeof(*cd_table->l1_desc),
1377 					      GFP_KERNEL);
1378 		if (!cd_table->l1_desc)
1379 			return -ENOMEM;
1380 
1381 		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1382 	}
1383 
1384 	cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
1385 					   GFP_KERNEL);
1386 	if (!cd_table->cdtab) {
1387 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1388 		ret = -ENOMEM;
1389 		goto err_free_l1;
1390 	}
1391 
1392 	return 0;
1393 
1394 err_free_l1:
1395 	if (cd_table->l1_desc) {
1396 		devm_kfree(smmu->dev, cd_table->l1_desc);
1397 		cd_table->l1_desc = NULL;
1398 	}
1399 	return ret;
1400 }
1401 
1402 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1403 {
1404 	int i;
1405 	size_t size, l1size;
1406 	struct arm_smmu_device *smmu = master->smmu;
1407 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1408 
1409 	if (cd_table->l1_desc) {
1410 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1411 
1412 		for (i = 0; i < cd_table->num_l1_ents; i++) {
1413 			if (!cd_table->l1_desc[i].l2ptr)
1414 				continue;
1415 
1416 			dmam_free_coherent(smmu->dev, size,
1417 					   cd_table->l1_desc[i].l2ptr,
1418 					   cd_table->l1_desc[i].l2ptr_dma);
1419 		}
1420 		devm_kfree(smmu->dev, cd_table->l1_desc);
1421 		cd_table->l1_desc = NULL;
1422 
1423 		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1424 	} else {
1425 		l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1426 	}
1427 
1428 	dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
1429 	cd_table->cdtab_dma = 0;
1430 	cd_table->cdtab = NULL;
1431 }
1432 
1433 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1434 {
1435 	bool free;
1436 	struct arm_smmu_ctx_desc *old_cd;
1437 
1438 	if (!cd->asid)
1439 		return false;
1440 
1441 	free = refcount_dec_and_test(&cd->refs);
1442 	if (free) {
1443 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1444 		WARN_ON(old_cd != cd);
1445 	}
1446 	return free;
1447 }
1448 
1449 /* Stream table manipulation functions */
1450 static void
1451 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1452 {
1453 	u64 val = 0;
1454 
1455 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1456 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1457 
1458 	/* The HW has 64 bit atomicity with stores to the L2 STE table */
1459 	WRITE_ONCE(*dst, cpu_to_le64(val));
1460 }
1461 
1462 struct arm_smmu_ste_writer {
1463 	struct arm_smmu_entry_writer writer;
1464 	u32 sid;
1465 };
1466 
1467 static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1468 {
1469 	struct arm_smmu_ste_writer *ste_writer =
1470 		container_of(writer, struct arm_smmu_ste_writer, writer);
1471 	struct arm_smmu_cmdq_ent cmd = {
1472 		.opcode	= CMDQ_OP_CFGI_STE,
1473 		.cfgi	= {
1474 			.sid	= ste_writer->sid,
1475 			.leaf	= true,
1476 		},
1477 	};
1478 
1479 	arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
1480 }
1481 
1482 static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
1483 	.sync = arm_smmu_ste_writer_sync_entry,
1484 	.get_used = arm_smmu_get_ste_used,
1485 };
1486 
1487 static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
1488 			       struct arm_smmu_ste *ste,
1489 			       const struct arm_smmu_ste *target)
1490 {
1491 	struct arm_smmu_device *smmu = master->smmu;
1492 	struct arm_smmu_ste_writer ste_writer = {
1493 		.writer = {
1494 			.ops = &arm_smmu_ste_writer_ops,
1495 			.master = master,
1496 		},
1497 		.sid = sid,
1498 	};
1499 
1500 	arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data);
1501 
1502 	/* It's likely that we'll want to use the new STE soon */
1503 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
1504 		struct arm_smmu_cmdq_ent
1505 			prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
1506 					 .prefetch = {
1507 						 .sid = sid,
1508 					 } };
1509 
1510 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1511 	}
1512 }
1513 
1514 VISIBLE_IF_KUNIT
1515 void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
1516 {
1517 	memset(target, 0, sizeof(*target));
1518 	target->data[0] = cpu_to_le64(
1519 		STRTAB_STE_0_V |
1520 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
1521 }
1522 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste);
1523 
1524 VISIBLE_IF_KUNIT
1525 void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
1526 			      struct arm_smmu_ste *target)
1527 {
1528 	memset(target, 0, sizeof(*target));
1529 	target->data[0] = cpu_to_le64(
1530 		STRTAB_STE_0_V |
1531 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
1532 
1533 	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1534 		target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1535 							 STRTAB_STE_1_SHCFG_INCOMING));
1536 }
1537 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste);
1538 
1539 VISIBLE_IF_KUNIT
1540 void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
1541 			       struct arm_smmu_master *master)
1542 {
1543 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1544 	struct arm_smmu_device *smmu = master->smmu;
1545 
1546 	memset(target, 0, sizeof(*target));
1547 	target->data[0] = cpu_to_le64(
1548 		STRTAB_STE_0_V |
1549 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1550 		FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
1551 		(cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1552 		FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
1553 
1554 	target->data[1] = cpu_to_le64(
1555 		FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1556 		FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1557 		FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1558 		FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1559 		((smmu->features & ARM_SMMU_FEAT_STALLS &&
1560 		  !master->stall_enabled) ?
1561 			 STRTAB_STE_1_S1STALLD :
1562 			 0) |
1563 		FIELD_PREP(STRTAB_STE_1_EATS,
1564 			   master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1565 
1566 	if (smmu->features & ARM_SMMU_FEAT_E2H) {
1567 		/*
1568 		 * To support BTM the streamworld needs to match the
1569 		 * configuration of the CPU so that the ASID broadcasts are
1570 		 * properly matched. This means either S/NS-EL2-E2H (hypervisor)
1571 		 * or NS-EL1 (guest). Since an SVA domain can be installed in a
1572 		 * PASID this should always use a BTM compatible configuration
1573 		 * if the HW supports it.
1574 		 */
1575 		target->data[1] |= cpu_to_le64(
1576 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
1577 	} else {
1578 		target->data[1] |= cpu_to_le64(
1579 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1580 
1581 		/*
1582 		 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
1583 		 * arm_smmu_domain_alloc_id()
1584 		 */
1585 		target->data[2] =
1586 			cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
1587 	}
1588 }
1589 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
1590 
1591 VISIBLE_IF_KUNIT
1592 void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
1593 				 struct arm_smmu_master *master,
1594 				 struct arm_smmu_domain *smmu_domain)
1595 {
1596 	struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
1597 	const struct io_pgtable_cfg *pgtbl_cfg =
1598 		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1599 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
1600 		&pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1601 	u64 vtcr_val;
1602 	struct arm_smmu_device *smmu = master->smmu;
1603 
1604 	memset(target, 0, sizeof(*target));
1605 	target->data[0] = cpu_to_le64(
1606 		STRTAB_STE_0_V |
1607 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
1608 
1609 	target->data[1] = cpu_to_le64(
1610 		FIELD_PREP(STRTAB_STE_1_EATS,
1611 			   master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1612 
1613 	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1614 		target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1615 							  STRTAB_STE_1_SHCFG_INCOMING));
1616 
1617 	vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1618 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1619 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1620 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1621 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1622 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1623 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1624 	target->data[2] = cpu_to_le64(
1625 		FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1626 		FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
1627 		STRTAB_STE_2_S2AA64 |
1628 #ifdef __BIG_ENDIAN
1629 		STRTAB_STE_2_S2ENDI |
1630 #endif
1631 		STRTAB_STE_2_S2PTW |
1632 		STRTAB_STE_2_S2R);
1633 
1634 	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
1635 				      STRTAB_STE_3_S2TTB_MASK);
1636 }
1637 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste);
1638 
1639 /*
1640  * This can safely directly manipulate the STE memory without a sync sequence
1641  * because the STE table has not been installed in the SMMU yet.
1642  */
1643 static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
1644 				       unsigned int nent)
1645 {
1646 	unsigned int i;
1647 
1648 	for (i = 0; i < nent; ++i) {
1649 		arm_smmu_make_abort_ste(strtab);
1650 		strtab++;
1651 	}
1652 }
1653 
1654 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1655 {
1656 	size_t size;
1657 	void *strtab;
1658 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1659 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1660 
1661 	if (desc->l2ptr)
1662 		return 0;
1663 
1664 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1665 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1666 
1667 	desc->span = STRTAB_SPLIT + 1;
1668 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1669 					  GFP_KERNEL);
1670 	if (!desc->l2ptr) {
1671 		dev_err(smmu->dev,
1672 			"failed to allocate l2 stream table for SID %u\n",
1673 			sid);
1674 		return -ENOMEM;
1675 	}
1676 
1677 	arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1678 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1679 	return 0;
1680 }
1681 
1682 static struct arm_smmu_master *
1683 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1684 {
1685 	struct rb_node *node;
1686 	struct arm_smmu_stream *stream;
1687 
1688 	lockdep_assert_held(&smmu->streams_mutex);
1689 
1690 	node = smmu->streams.rb_node;
1691 	while (node) {
1692 		stream = rb_entry(node, struct arm_smmu_stream, node);
1693 		if (stream->id < sid)
1694 			node = node->rb_right;
1695 		else if (stream->id > sid)
1696 			node = node->rb_left;
1697 		else
1698 			return stream->master;
1699 	}
1700 
1701 	return NULL;
1702 }
1703 
1704 /* IRQ and event handlers */
1705 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1706 {
1707 	int ret = 0;
1708 	u32 perm = 0;
1709 	struct arm_smmu_master *master;
1710 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1711 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1712 	struct iopf_fault fault_evt = { };
1713 	struct iommu_fault *flt = &fault_evt.fault;
1714 
1715 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1716 	case EVT_ID_TRANSLATION_FAULT:
1717 	case EVT_ID_ADDR_SIZE_FAULT:
1718 	case EVT_ID_ACCESS_FAULT:
1719 	case EVT_ID_PERMISSION_FAULT:
1720 		break;
1721 	default:
1722 		return -EOPNOTSUPP;
1723 	}
1724 
1725 	/* Stage-2 is always pinned at the moment */
1726 	if (evt[1] & EVTQ_1_S2)
1727 		return -EFAULT;
1728 
1729 	if (!(evt[1] & EVTQ_1_STALL))
1730 		return -EOPNOTSUPP;
1731 
1732 	if (evt[1] & EVTQ_1_RnW)
1733 		perm |= IOMMU_FAULT_PERM_READ;
1734 	else
1735 		perm |= IOMMU_FAULT_PERM_WRITE;
1736 
1737 	if (evt[1] & EVTQ_1_InD)
1738 		perm |= IOMMU_FAULT_PERM_EXEC;
1739 
1740 	if (evt[1] & EVTQ_1_PnU)
1741 		perm |= IOMMU_FAULT_PERM_PRIV;
1742 
1743 	flt->type = IOMMU_FAULT_PAGE_REQ;
1744 	flt->prm = (struct iommu_fault_page_request) {
1745 		.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1746 		.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1747 		.perm = perm,
1748 		.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1749 	};
1750 
1751 	if (ssid_valid) {
1752 		flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1753 		flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1754 	}
1755 
1756 	mutex_lock(&smmu->streams_mutex);
1757 	master = arm_smmu_find_master(smmu, sid);
1758 	if (!master) {
1759 		ret = -EINVAL;
1760 		goto out_unlock;
1761 	}
1762 
1763 	iommu_report_device_fault(master->dev, &fault_evt);
1764 out_unlock:
1765 	mutex_unlock(&smmu->streams_mutex);
1766 	return ret;
1767 }
1768 
1769 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1770 {
1771 	int i, ret;
1772 	struct arm_smmu_device *smmu = dev;
1773 	struct arm_smmu_queue *q = &smmu->evtq.q;
1774 	struct arm_smmu_ll_queue *llq = &q->llq;
1775 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1776 				      DEFAULT_RATELIMIT_BURST);
1777 	u64 evt[EVTQ_ENT_DWORDS];
1778 
1779 	do {
1780 		while (!queue_remove_raw(q, evt)) {
1781 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1782 
1783 			ret = arm_smmu_handle_evt(smmu, evt);
1784 			if (!ret || !__ratelimit(&rs))
1785 				continue;
1786 
1787 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1788 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1789 				dev_info(smmu->dev, "\t0x%016llx\n",
1790 					 (unsigned long long)evt[i]);
1791 
1792 			cond_resched();
1793 		}
1794 
1795 		/*
1796 		 * Not much we can do on overflow, so scream and pretend we're
1797 		 * trying harder.
1798 		 */
1799 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1800 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1801 	} while (!queue_empty(llq));
1802 
1803 	/* Sync our overflow flag, as we believe we're up to speed */
1804 	queue_sync_cons_ovf(q);
1805 	return IRQ_HANDLED;
1806 }
1807 
1808 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1809 {
1810 	u32 sid, ssid;
1811 	u16 grpid;
1812 	bool ssv, last;
1813 
1814 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1815 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1816 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1817 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1818 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1819 
1820 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1821 	dev_info(smmu->dev,
1822 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1823 		 sid, ssid, grpid, last ? "L" : "",
1824 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1825 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1826 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1827 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1828 		 evt[1] & PRIQ_1_ADDR_MASK);
1829 
1830 	if (last) {
1831 		struct arm_smmu_cmdq_ent cmd = {
1832 			.opcode			= CMDQ_OP_PRI_RESP,
1833 			.substream_valid	= ssv,
1834 			.pri			= {
1835 				.sid	= sid,
1836 				.ssid	= ssid,
1837 				.grpid	= grpid,
1838 				.resp	= PRI_RESP_DENY,
1839 			},
1840 		};
1841 
1842 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1843 	}
1844 }
1845 
1846 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1847 {
1848 	struct arm_smmu_device *smmu = dev;
1849 	struct arm_smmu_queue *q = &smmu->priq.q;
1850 	struct arm_smmu_ll_queue *llq = &q->llq;
1851 	u64 evt[PRIQ_ENT_DWORDS];
1852 
1853 	do {
1854 		while (!queue_remove_raw(q, evt))
1855 			arm_smmu_handle_ppr(smmu, evt);
1856 
1857 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1858 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1859 	} while (!queue_empty(llq));
1860 
1861 	/* Sync our overflow flag, as we believe we're up to speed */
1862 	queue_sync_cons_ovf(q);
1863 	return IRQ_HANDLED;
1864 }
1865 
1866 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1867 
1868 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1869 {
1870 	u32 gerror, gerrorn, active;
1871 	struct arm_smmu_device *smmu = dev;
1872 
1873 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1874 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1875 
1876 	active = gerror ^ gerrorn;
1877 	if (!(active & GERROR_ERR_MASK))
1878 		return IRQ_NONE; /* No errors pending */
1879 
1880 	dev_warn(smmu->dev,
1881 		 "unexpected global error reported (0x%08x), this could be serious\n",
1882 		 active);
1883 
1884 	if (active & GERROR_SFM_ERR) {
1885 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1886 		arm_smmu_device_disable(smmu);
1887 	}
1888 
1889 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1890 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1891 
1892 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1893 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1894 
1895 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1896 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1897 
1898 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1899 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1900 
1901 	if (active & GERROR_PRIQ_ABT_ERR)
1902 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1903 
1904 	if (active & GERROR_EVTQ_ABT_ERR)
1905 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1906 
1907 	if (active & GERROR_CMDQ_ERR)
1908 		arm_smmu_cmdq_skip_err(smmu);
1909 
1910 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1911 	return IRQ_HANDLED;
1912 }
1913 
1914 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1915 {
1916 	struct arm_smmu_device *smmu = dev;
1917 
1918 	arm_smmu_evtq_thread(irq, dev);
1919 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1920 		arm_smmu_priq_thread(irq, dev);
1921 
1922 	return IRQ_HANDLED;
1923 }
1924 
1925 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1926 {
1927 	arm_smmu_gerror_handler(irq, dev);
1928 	return IRQ_WAKE_THREAD;
1929 }
1930 
1931 static void
1932 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1933 			struct arm_smmu_cmdq_ent *cmd)
1934 {
1935 	size_t log2_span;
1936 	size_t span_mask;
1937 	/* ATC invalidates are always on 4096-bytes pages */
1938 	size_t inval_grain_shift = 12;
1939 	unsigned long page_start, page_end;
1940 
1941 	/*
1942 	 * ATS and PASID:
1943 	 *
1944 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1945 	 * prefix. In that case all ATC entries within the address range are
1946 	 * invalidated, including those that were requested with a PASID! There
1947 	 * is no way to invalidate only entries without PASID.
1948 	 *
1949 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1950 	 * traffic), translation requests without PASID create ATC entries
1951 	 * without PASID, which must be invalidated with substream_valid clear.
1952 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1953 	 * ATC entries within the address range.
1954 	 */
1955 	*cmd = (struct arm_smmu_cmdq_ent) {
1956 		.opcode			= CMDQ_OP_ATC_INV,
1957 		.substream_valid	= (ssid != IOMMU_NO_PASID),
1958 		.atc.ssid		= ssid,
1959 	};
1960 
1961 	if (!size) {
1962 		cmd->atc.size = ATC_INV_SIZE_ALL;
1963 		return;
1964 	}
1965 
1966 	page_start	= iova >> inval_grain_shift;
1967 	page_end	= (iova + size - 1) >> inval_grain_shift;
1968 
1969 	/*
1970 	 * In an ATS Invalidate Request, the address must be aligned on the
1971 	 * range size, which must be a power of two number of page sizes. We
1972 	 * thus have to choose between grossly over-invalidating the region, or
1973 	 * splitting the invalidation into multiple commands. For simplicity
1974 	 * we'll go with the first solution, but should refine it in the future
1975 	 * if multiple commands are shown to be more efficient.
1976 	 *
1977 	 * Find the smallest power of two that covers the range. The most
1978 	 * significant differing bit between the start and end addresses,
1979 	 * fls(start ^ end), indicates the required span. For example:
1980 	 *
1981 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1982 	 *		x = 0b1000 ^ 0b1011 = 0b11
1983 	 *		span = 1 << fls(x) = 4
1984 	 *
1985 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1986 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1987 	 *		span = 1 << fls(x) = 16
1988 	 */
1989 	log2_span	= fls_long(page_start ^ page_end);
1990 	span_mask	= (1ULL << log2_span) - 1;
1991 
1992 	page_start	&= ~span_mask;
1993 
1994 	cmd->atc.addr	= page_start << inval_grain_shift;
1995 	cmd->atc.size	= log2_span;
1996 }
1997 
1998 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1999 {
2000 	int i;
2001 	struct arm_smmu_cmdq_ent cmd;
2002 	struct arm_smmu_cmdq_batch cmds;
2003 
2004 	arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
2005 
2006 	cmds.num = 0;
2007 	for (i = 0; i < master->num_streams; i++) {
2008 		cmd.atc.sid = master->streams[i].id;
2009 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
2010 	}
2011 
2012 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
2013 }
2014 
2015 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
2016 			    unsigned long iova, size_t size)
2017 {
2018 	int i;
2019 	unsigned long flags;
2020 	struct arm_smmu_cmdq_ent cmd;
2021 	struct arm_smmu_master *master;
2022 	struct arm_smmu_cmdq_batch cmds;
2023 
2024 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2025 		return 0;
2026 
2027 	/*
2028 	 * Ensure that we've completed prior invalidation of the main TLBs
2029 	 * before we read 'nr_ats_masters' in case of a concurrent call to
2030 	 * arm_smmu_enable_ats():
2031 	 *
2032 	 *	// unmap()			// arm_smmu_enable_ats()
2033 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
2034 	 *	smp_mb();			[...]
2035 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
2036 	 *
2037 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
2038 	 * ATS was enabled at the PCI device before completion of the TLBI.
2039 	 */
2040 	smp_mb();
2041 	if (!atomic_read(&smmu_domain->nr_ats_masters))
2042 		return 0;
2043 
2044 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
2045 
2046 	cmds.num = 0;
2047 
2048 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2049 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
2050 		if (!master->ats_enabled)
2051 			continue;
2052 
2053 		for (i = 0; i < master->num_streams; i++) {
2054 			cmd.atc.sid = master->streams[i].id;
2055 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
2056 		}
2057 	}
2058 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2059 
2060 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
2061 }
2062 
2063 /* IO_PGTABLE API */
2064 static void arm_smmu_tlb_inv_context(void *cookie)
2065 {
2066 	struct arm_smmu_domain *smmu_domain = cookie;
2067 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2068 	struct arm_smmu_cmdq_ent cmd;
2069 
2070 	/*
2071 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2072 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
2073 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2074 	 * insertion to guarantee those are observed before the TLBI. Do be
2075 	 * careful, 007.
2076 	 */
2077 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2078 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
2079 	} else {
2080 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
2081 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2082 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
2083 	}
2084 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2085 }
2086 
2087 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
2088 				     unsigned long iova, size_t size,
2089 				     size_t granule,
2090 				     struct arm_smmu_domain *smmu_domain)
2091 {
2092 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2093 	unsigned long end = iova + size, num_pages = 0, tg = 0;
2094 	size_t inv_range = granule;
2095 	struct arm_smmu_cmdq_batch cmds;
2096 
2097 	if (!size)
2098 		return;
2099 
2100 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2101 		/* Get the leaf page size */
2102 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2103 
2104 		num_pages = size >> tg;
2105 
2106 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
2107 		cmd->tlbi.tg = (tg - 10) / 2;
2108 
2109 		/*
2110 		 * Determine what level the granule is at. For non-leaf, both
2111 		 * io-pgtable and SVA pass a nominal last-level granule because
2112 		 * they don't know what level(s) actually apply, so ignore that
2113 		 * and leave TTL=0. However for various errata reasons we still
2114 		 * want to use a range command, so avoid the SVA corner case
2115 		 * where both scale and num could be 0 as well.
2116 		 */
2117 		if (cmd->tlbi.leaf)
2118 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2119 		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
2120 			num_pages++;
2121 	}
2122 
2123 	cmds.num = 0;
2124 
2125 	while (iova < end) {
2126 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2127 			/*
2128 			 * On each iteration of the loop, the range is 5 bits
2129 			 * worth of the aligned size remaining.
2130 			 * The range in pages is:
2131 			 *
2132 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
2133 			 */
2134 			unsigned long scale, num;
2135 
2136 			/* Determine the power of 2 multiple number of pages */
2137 			scale = __ffs(num_pages);
2138 			cmd->tlbi.scale = scale;
2139 
2140 			/* Determine how many chunks of 2^scale size we have */
2141 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2142 			cmd->tlbi.num = num - 1;
2143 
2144 			/* range is num * 2^scale * pgsize */
2145 			inv_range = num << (scale + tg);
2146 
2147 			/* Clear out the lower order bits for the next iteration */
2148 			num_pages -= num << scale;
2149 		}
2150 
2151 		cmd->tlbi.addr = iova;
2152 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
2153 		iova += inv_range;
2154 	}
2155 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
2156 }
2157 
2158 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
2159 					  size_t granule, bool leaf,
2160 					  struct arm_smmu_domain *smmu_domain)
2161 {
2162 	struct arm_smmu_cmdq_ent cmd = {
2163 		.tlbi = {
2164 			.leaf	= leaf,
2165 		},
2166 	};
2167 
2168 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2169 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2170 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
2171 		cmd.tlbi.asid	= smmu_domain->cd.asid;
2172 	} else {
2173 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
2174 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2175 	}
2176 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2177 
2178 	/*
2179 	 * Unfortunately, this can't be leaf-only since we may have
2180 	 * zapped an entire table.
2181 	 */
2182 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
2183 }
2184 
2185 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
2186 				 size_t granule, bool leaf,
2187 				 struct arm_smmu_domain *smmu_domain)
2188 {
2189 	struct arm_smmu_cmdq_ent cmd = {
2190 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2191 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
2192 		.tlbi = {
2193 			.asid	= asid,
2194 			.leaf	= leaf,
2195 		},
2196 	};
2197 
2198 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2199 }
2200 
2201 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2202 					 unsigned long iova, size_t granule,
2203 					 void *cookie)
2204 {
2205 	struct arm_smmu_domain *smmu_domain = cookie;
2206 	struct iommu_domain *domain = &smmu_domain->domain;
2207 
2208 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2209 }
2210 
2211 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2212 				  size_t granule, void *cookie)
2213 {
2214 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2215 }
2216 
2217 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2218 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2219 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2220 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2221 };
2222 
2223 /* IOMMU API */
2224 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2225 {
2226 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2227 
2228 	switch (cap) {
2229 	case IOMMU_CAP_CACHE_COHERENCY:
2230 		/* Assume that a coherent TCU implies coherent TBUs */
2231 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2232 	case IOMMU_CAP_NOEXEC:
2233 	case IOMMU_CAP_DEFERRED_FLUSH:
2234 		return true;
2235 	default:
2236 		return false;
2237 	}
2238 }
2239 
2240 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2241 {
2242 
2243 	if (type == IOMMU_DOMAIN_SVA)
2244 		return arm_smmu_sva_domain_alloc();
2245 	return ERR_PTR(-EOPNOTSUPP);
2246 }
2247 
2248 static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
2249 {
2250 	struct arm_smmu_domain *smmu_domain;
2251 
2252 	/*
2253 	 * Allocate the domain and initialise some of its data structures.
2254 	 * We can't really do anything meaningful until we've added a
2255 	 * master.
2256 	 */
2257 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2258 	if (!smmu_domain)
2259 		return ERR_PTR(-ENOMEM);
2260 
2261 	mutex_init(&smmu_domain->init_mutex);
2262 	INIT_LIST_HEAD(&smmu_domain->devices);
2263 	spin_lock_init(&smmu_domain->devices_lock);
2264 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2265 
2266 	if (dev) {
2267 		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2268 		int ret;
2269 
2270 		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
2271 		if (ret) {
2272 			kfree(smmu_domain);
2273 			return ERR_PTR(ret);
2274 		}
2275 	}
2276 	return &smmu_domain->domain;
2277 }
2278 
2279 static void arm_smmu_domain_free(struct iommu_domain *domain)
2280 {
2281 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2282 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2283 
2284 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2285 
2286 	/* Free the ASID or VMID */
2287 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2288 		/* Prevent SVA from touching the CD while we're freeing it */
2289 		mutex_lock(&arm_smmu_asid_lock);
2290 		arm_smmu_free_asid(&smmu_domain->cd);
2291 		mutex_unlock(&arm_smmu_asid_lock);
2292 	} else {
2293 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2294 		if (cfg->vmid)
2295 			ida_free(&smmu->vmid_map, cfg->vmid);
2296 	}
2297 
2298 	kfree(smmu_domain);
2299 }
2300 
2301 static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
2302 				       struct arm_smmu_domain *smmu_domain)
2303 {
2304 	int ret;
2305 	u32 asid;
2306 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2307 
2308 	refcount_set(&cd->refs, 1);
2309 
2310 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2311 	mutex_lock(&arm_smmu_asid_lock);
2312 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
2313 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2314 	cd->asid	= (u16)asid;
2315 	mutex_unlock(&arm_smmu_asid_lock);
2316 	return ret;
2317 }
2318 
2319 static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
2320 				       struct arm_smmu_domain *smmu_domain)
2321 {
2322 	int vmid;
2323 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2324 
2325 	/* Reserve VMID 0 for stage-2 bypass STEs */
2326 	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2327 			       GFP_KERNEL);
2328 	if (vmid < 0)
2329 		return vmid;
2330 
2331 	cfg->vmid	= (u16)vmid;
2332 	return 0;
2333 }
2334 
2335 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
2336 				    struct arm_smmu_device *smmu)
2337 {
2338 	int ret;
2339 	unsigned long ias, oas;
2340 	enum io_pgtable_fmt fmt;
2341 	struct io_pgtable_cfg pgtbl_cfg;
2342 	struct io_pgtable_ops *pgtbl_ops;
2343 	int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
2344 				 struct arm_smmu_domain *smmu_domain);
2345 
2346 	/* Restrict the stage to what we can actually support */
2347 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2348 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2349 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2350 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2351 
2352 	switch (smmu_domain->stage) {
2353 	case ARM_SMMU_DOMAIN_S1:
2354 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2355 		ias = min_t(unsigned long, ias, VA_BITS);
2356 		oas = smmu->ias;
2357 		fmt = ARM_64_LPAE_S1;
2358 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2359 		break;
2360 	case ARM_SMMU_DOMAIN_S2:
2361 		ias = smmu->ias;
2362 		oas = smmu->oas;
2363 		fmt = ARM_64_LPAE_S2;
2364 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2365 		break;
2366 	default:
2367 		return -EINVAL;
2368 	}
2369 
2370 	pgtbl_cfg = (struct io_pgtable_cfg) {
2371 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2372 		.ias		= ias,
2373 		.oas		= oas,
2374 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2375 		.tlb		= &arm_smmu_flush_ops,
2376 		.iommu_dev	= smmu->dev,
2377 	};
2378 
2379 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2380 	if (!pgtbl_ops)
2381 		return -ENOMEM;
2382 
2383 	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2384 	smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2385 	smmu_domain->domain.geometry.force_aperture = true;
2386 
2387 	ret = finalise_stage_fn(smmu, smmu_domain);
2388 	if (ret < 0) {
2389 		free_io_pgtable_ops(pgtbl_ops);
2390 		return ret;
2391 	}
2392 
2393 	smmu_domain->pgtbl_ops = pgtbl_ops;
2394 	smmu_domain->smmu = smmu;
2395 	return 0;
2396 }
2397 
2398 static struct arm_smmu_ste *
2399 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2400 {
2401 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2402 
2403 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2404 		unsigned int idx1, idx2;
2405 
2406 		/* Two-level walk */
2407 		idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2408 		idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
2409 		return &cfg->l1_desc[idx1].l2ptr[idx2];
2410 	} else {
2411 		/* Simple linear lookup */
2412 		return (struct arm_smmu_ste *)&cfg
2413 			       ->strtab[sid * STRTAB_STE_DWORDS];
2414 	}
2415 }
2416 
2417 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
2418 					 const struct arm_smmu_ste *target)
2419 {
2420 	int i, j;
2421 	struct arm_smmu_device *smmu = master->smmu;
2422 
2423 	for (i = 0; i < master->num_streams; ++i) {
2424 		u32 sid = master->streams[i].id;
2425 		struct arm_smmu_ste *step =
2426 			arm_smmu_get_step_for_sid(smmu, sid);
2427 
2428 		/* Bridged PCI devices may end up with duplicated IDs */
2429 		for (j = 0; j < i; j++)
2430 			if (master->streams[j].id == sid)
2431 				break;
2432 		if (j < i)
2433 			continue;
2434 
2435 		arm_smmu_write_ste(master, sid, step, target);
2436 	}
2437 }
2438 
2439 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2440 {
2441 	struct device *dev = master->dev;
2442 	struct arm_smmu_device *smmu = master->smmu;
2443 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2444 
2445 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2446 		return false;
2447 
2448 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2449 		return false;
2450 
2451 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2452 }
2453 
2454 static void arm_smmu_enable_ats(struct arm_smmu_master *master,
2455 				struct arm_smmu_domain *smmu_domain)
2456 {
2457 	size_t stu;
2458 	struct pci_dev *pdev;
2459 	struct arm_smmu_device *smmu = master->smmu;
2460 
2461 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2462 	if (!master->ats_enabled)
2463 		return;
2464 
2465 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2466 	stu = __ffs(smmu->pgsize_bitmap);
2467 	pdev = to_pci_dev(master->dev);
2468 
2469 	atomic_inc(&smmu_domain->nr_ats_masters);
2470 	/*
2471 	 * ATC invalidation of PASID 0 causes the entire ATC to be flushed.
2472 	 */
2473 	arm_smmu_atc_inv_master(master);
2474 	if (pci_enable_ats(pdev, stu))
2475 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2476 }
2477 
2478 static void arm_smmu_disable_ats(struct arm_smmu_master *master,
2479 				 struct arm_smmu_domain *smmu_domain)
2480 {
2481 	if (!master->ats_enabled)
2482 		return;
2483 
2484 	pci_disable_ats(to_pci_dev(master->dev));
2485 	/*
2486 	 * Ensure ATS is disabled at the endpoint before we issue the
2487 	 * ATC invalidation via the SMMU.
2488 	 */
2489 	wmb();
2490 	arm_smmu_atc_inv_master(master);
2491 	atomic_dec(&smmu_domain->nr_ats_masters);
2492 }
2493 
2494 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2495 {
2496 	int ret;
2497 	int features;
2498 	int num_pasids;
2499 	struct pci_dev *pdev;
2500 
2501 	if (!dev_is_pci(master->dev))
2502 		return -ENODEV;
2503 
2504 	pdev = to_pci_dev(master->dev);
2505 
2506 	features = pci_pasid_features(pdev);
2507 	if (features < 0)
2508 		return features;
2509 
2510 	num_pasids = pci_max_pasids(pdev);
2511 	if (num_pasids <= 0)
2512 		return num_pasids;
2513 
2514 	ret = pci_enable_pasid(pdev, features);
2515 	if (ret) {
2516 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2517 		return ret;
2518 	}
2519 
2520 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2521 				  master->smmu->ssid_bits);
2522 	return 0;
2523 }
2524 
2525 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2526 {
2527 	struct pci_dev *pdev;
2528 
2529 	if (!dev_is_pci(master->dev))
2530 		return;
2531 
2532 	pdev = to_pci_dev(master->dev);
2533 
2534 	if (!pdev->pasid_enabled)
2535 		return;
2536 
2537 	master->ssid_bits = 0;
2538 	pci_disable_pasid(pdev);
2539 }
2540 
2541 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2542 {
2543 	struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev);
2544 	struct arm_smmu_domain *smmu_domain;
2545 	unsigned long flags;
2546 
2547 	if (!domain || !(domain->type & __IOMMU_DOMAIN_PAGING))
2548 		return;
2549 
2550 	smmu_domain = to_smmu_domain(domain);
2551 	arm_smmu_disable_ats(master, smmu_domain);
2552 
2553 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2554 	list_del_init(&master->domain_head);
2555 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2556 
2557 	master->ats_enabled = false;
2558 }
2559 
2560 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2561 {
2562 	int ret = 0;
2563 	unsigned long flags;
2564 	struct arm_smmu_ste target;
2565 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2566 	struct arm_smmu_device *smmu;
2567 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2568 	struct arm_smmu_master *master;
2569 	struct arm_smmu_cd *cdptr;
2570 
2571 	if (!fwspec)
2572 		return -ENOENT;
2573 
2574 	master = dev_iommu_priv_get(dev);
2575 	smmu = master->smmu;
2576 
2577 	/*
2578 	 * Checking that SVA is disabled ensures that this device isn't bound to
2579 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2580 	 * be removed concurrently since we're holding the group mutex.
2581 	 */
2582 	if (arm_smmu_master_sva_enabled(master)) {
2583 		dev_err(dev, "cannot attach - SVA enabled\n");
2584 		return -EBUSY;
2585 	}
2586 
2587 	mutex_lock(&smmu_domain->init_mutex);
2588 
2589 	if (!smmu_domain->smmu) {
2590 		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
2591 	} else if (smmu_domain->smmu != smmu)
2592 		ret = -EINVAL;
2593 
2594 	mutex_unlock(&smmu_domain->init_mutex);
2595 	if (ret)
2596 		return ret;
2597 
2598 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2599 		cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
2600 		if (!cdptr)
2601 			return -ENOMEM;
2602 	}
2603 
2604 	/*
2605 	 * Prevent arm_smmu_share_asid() from trying to change the ASID
2606 	 * of either the old or new domain while we are working on it.
2607 	 * This allows the STE and the smmu_domain->devices list to
2608 	 * be inconsistent during this routine.
2609 	 */
2610 	mutex_lock(&arm_smmu_asid_lock);
2611 
2612 	arm_smmu_detach_dev(master);
2613 
2614 	master->ats_enabled = arm_smmu_ats_supported(master);
2615 
2616 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2617 	list_add(&master->domain_head, &smmu_domain->devices);
2618 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2619 
2620 	switch (smmu_domain->stage) {
2621 	case ARM_SMMU_DOMAIN_S1: {
2622 		struct arm_smmu_cd target_cd;
2623 
2624 		arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
2625 		arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
2626 					&target_cd);
2627 		arm_smmu_make_cdtable_ste(&target, master);
2628 		arm_smmu_install_ste_for_dev(master, &target);
2629 		break;
2630 	}
2631 	case ARM_SMMU_DOMAIN_S2:
2632 		arm_smmu_make_s2_domain_ste(&target, master, smmu_domain);
2633 		arm_smmu_install_ste_for_dev(master, &target);
2634 		arm_smmu_clear_cd(master, IOMMU_NO_PASID);
2635 		break;
2636 	}
2637 
2638 	arm_smmu_enable_ats(master, smmu_domain);
2639 	mutex_unlock(&arm_smmu_asid_lock);
2640 	return 0;
2641 }
2642 
2643 static int arm_smmu_attach_dev_ste(struct device *dev,
2644 				   struct arm_smmu_ste *ste)
2645 {
2646 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2647 
2648 	if (arm_smmu_master_sva_enabled(master))
2649 		return -EBUSY;
2650 
2651 	/*
2652 	 * Do not allow any ASID to be changed while are working on the STE,
2653 	 * otherwise we could miss invalidations.
2654 	 */
2655 	mutex_lock(&arm_smmu_asid_lock);
2656 
2657 	/*
2658 	 * The SMMU does not support enabling ATS with bypass/abort. When the
2659 	 * STE is in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests
2660 	 * and Translated transactions are denied as though ATS is disabled for
2661 	 * the stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2662 	 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2663 	 */
2664 	arm_smmu_detach_dev(master);
2665 
2666 	arm_smmu_install_ste_for_dev(master, ste);
2667 	mutex_unlock(&arm_smmu_asid_lock);
2668 
2669 	/*
2670 	 * This has to be done after removing the master from the
2671 	 * arm_smmu_domain->devices to avoid races updating the same context
2672 	 * descriptor from arm_smmu_share_asid().
2673 	 */
2674 	arm_smmu_clear_cd(master, IOMMU_NO_PASID);
2675 	return 0;
2676 }
2677 
2678 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
2679 					struct device *dev)
2680 {
2681 	struct arm_smmu_ste ste;
2682 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2683 
2684 	arm_smmu_make_bypass_ste(master->smmu, &ste);
2685 	return arm_smmu_attach_dev_ste(dev, &ste);
2686 }
2687 
2688 static const struct iommu_domain_ops arm_smmu_identity_ops = {
2689 	.attach_dev = arm_smmu_attach_dev_identity,
2690 };
2691 
2692 static struct iommu_domain arm_smmu_identity_domain = {
2693 	.type = IOMMU_DOMAIN_IDENTITY,
2694 	.ops = &arm_smmu_identity_ops,
2695 };
2696 
2697 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
2698 					struct device *dev)
2699 {
2700 	struct arm_smmu_ste ste;
2701 
2702 	arm_smmu_make_abort_ste(&ste);
2703 	return arm_smmu_attach_dev_ste(dev, &ste);
2704 }
2705 
2706 static const struct iommu_domain_ops arm_smmu_blocked_ops = {
2707 	.attach_dev = arm_smmu_attach_dev_blocked,
2708 };
2709 
2710 static struct iommu_domain arm_smmu_blocked_domain = {
2711 	.type = IOMMU_DOMAIN_BLOCKED,
2712 	.ops = &arm_smmu_blocked_ops,
2713 };
2714 
2715 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2716 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2717 			      int prot, gfp_t gfp, size_t *mapped)
2718 {
2719 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2720 
2721 	if (!ops)
2722 		return -ENODEV;
2723 
2724 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2725 }
2726 
2727 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2728 				   size_t pgsize, size_t pgcount,
2729 				   struct iommu_iotlb_gather *gather)
2730 {
2731 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2732 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2733 
2734 	if (!ops)
2735 		return 0;
2736 
2737 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2738 }
2739 
2740 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2741 {
2742 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2743 
2744 	if (smmu_domain->smmu)
2745 		arm_smmu_tlb_inv_context(smmu_domain);
2746 }
2747 
2748 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2749 				struct iommu_iotlb_gather *gather)
2750 {
2751 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2752 
2753 	if (!gather->pgsize)
2754 		return;
2755 
2756 	arm_smmu_tlb_inv_range_domain(gather->start,
2757 				      gather->end - gather->start + 1,
2758 				      gather->pgsize, true, smmu_domain);
2759 }
2760 
2761 static phys_addr_t
2762 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2763 {
2764 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2765 
2766 	if (!ops)
2767 		return 0;
2768 
2769 	return ops->iova_to_phys(ops, iova);
2770 }
2771 
2772 static struct platform_driver arm_smmu_driver;
2773 
2774 static
2775 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2776 {
2777 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2778 							  fwnode);
2779 	put_device(dev);
2780 	return dev ? dev_get_drvdata(dev) : NULL;
2781 }
2782 
2783 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2784 {
2785 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2786 
2787 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2788 		limit *= 1UL << STRTAB_SPLIT;
2789 
2790 	return sid < limit;
2791 }
2792 
2793 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2794 {
2795 	/* Check the SIDs are in range of the SMMU and our stream table */
2796 	if (!arm_smmu_sid_in_range(smmu, sid))
2797 		return -ERANGE;
2798 
2799 	/* Ensure l2 strtab is initialised */
2800 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2801 		return arm_smmu_init_l2_strtab(smmu, sid);
2802 
2803 	return 0;
2804 }
2805 
2806 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2807 				  struct arm_smmu_master *master)
2808 {
2809 	int i;
2810 	int ret = 0;
2811 	struct arm_smmu_stream *new_stream, *cur_stream;
2812 	struct rb_node **new_node, *parent_node = NULL;
2813 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2814 
2815 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2816 				  GFP_KERNEL);
2817 	if (!master->streams)
2818 		return -ENOMEM;
2819 	master->num_streams = fwspec->num_ids;
2820 
2821 	mutex_lock(&smmu->streams_mutex);
2822 	for (i = 0; i < fwspec->num_ids; i++) {
2823 		u32 sid = fwspec->ids[i];
2824 
2825 		new_stream = &master->streams[i];
2826 		new_stream->id = sid;
2827 		new_stream->master = master;
2828 
2829 		ret = arm_smmu_init_sid_strtab(smmu, sid);
2830 		if (ret)
2831 			break;
2832 
2833 		/* Insert into SID tree */
2834 		new_node = &(smmu->streams.rb_node);
2835 		while (*new_node) {
2836 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2837 					      node);
2838 			parent_node = *new_node;
2839 			if (cur_stream->id > new_stream->id) {
2840 				new_node = &((*new_node)->rb_left);
2841 			} else if (cur_stream->id < new_stream->id) {
2842 				new_node = &((*new_node)->rb_right);
2843 			} else {
2844 				dev_warn(master->dev,
2845 					 "stream %u already in tree\n",
2846 					 cur_stream->id);
2847 				ret = -EINVAL;
2848 				break;
2849 			}
2850 		}
2851 		if (ret)
2852 			break;
2853 
2854 		rb_link_node(&new_stream->node, parent_node, new_node);
2855 		rb_insert_color(&new_stream->node, &smmu->streams);
2856 	}
2857 
2858 	if (ret) {
2859 		for (i--; i >= 0; i--)
2860 			rb_erase(&master->streams[i].node, &smmu->streams);
2861 		kfree(master->streams);
2862 	}
2863 	mutex_unlock(&smmu->streams_mutex);
2864 
2865 	return ret;
2866 }
2867 
2868 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2869 {
2870 	int i;
2871 	struct arm_smmu_device *smmu = master->smmu;
2872 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2873 
2874 	if (!smmu || !master->streams)
2875 		return;
2876 
2877 	mutex_lock(&smmu->streams_mutex);
2878 	for (i = 0; i < fwspec->num_ids; i++)
2879 		rb_erase(&master->streams[i].node, &smmu->streams);
2880 	mutex_unlock(&smmu->streams_mutex);
2881 
2882 	kfree(master->streams);
2883 }
2884 
2885 static struct iommu_ops arm_smmu_ops;
2886 
2887 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2888 {
2889 	int ret;
2890 	struct arm_smmu_device *smmu;
2891 	struct arm_smmu_master *master;
2892 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2893 
2894 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2895 		return ERR_PTR(-EBUSY);
2896 
2897 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2898 	if (!smmu)
2899 		return ERR_PTR(-ENODEV);
2900 
2901 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2902 	if (!master)
2903 		return ERR_PTR(-ENOMEM);
2904 
2905 	master->dev = dev;
2906 	master->smmu = smmu;
2907 	INIT_LIST_HEAD(&master->bonds);
2908 	INIT_LIST_HEAD(&master->domain_head);
2909 	dev_iommu_priv_set(dev, master);
2910 
2911 	ret = arm_smmu_insert_master(smmu, master);
2912 	if (ret)
2913 		goto err_free_master;
2914 
2915 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2916 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2917 
2918 	/*
2919 	 * Note that PASID must be enabled before, and disabled after ATS:
2920 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2921 	 *
2922 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2923 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2924 	 *   are changed.
2925 	 */
2926 	arm_smmu_enable_pasid(master);
2927 
2928 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2929 		master->ssid_bits = min_t(u8, master->ssid_bits,
2930 					  CTXDESC_LINEAR_CDMAX);
2931 
2932 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2933 	     device_property_read_bool(dev, "dma-can-stall")) ||
2934 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2935 		master->stall_enabled = true;
2936 
2937 	return &smmu->iommu;
2938 
2939 err_free_master:
2940 	kfree(master);
2941 	return ERR_PTR(ret);
2942 }
2943 
2944 static void arm_smmu_release_device(struct device *dev)
2945 {
2946 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2947 
2948 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2949 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2950 
2951 	/* Put the STE back to what arm_smmu_init_strtab() sets */
2952 	if (dev->iommu->require_direct)
2953 		arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
2954 	else
2955 		arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
2956 
2957 	arm_smmu_disable_pasid(master);
2958 	arm_smmu_remove_master(master);
2959 	if (master->cd_table.cdtab)
2960 		arm_smmu_free_cd_tables(master);
2961 	kfree(master);
2962 }
2963 
2964 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2965 {
2966 	struct iommu_group *group;
2967 
2968 	/*
2969 	 * We don't support devices sharing stream IDs other than PCI RID
2970 	 * aliases, since the necessary ID-to-device lookup becomes rather
2971 	 * impractical given a potential sparse 32-bit stream ID space.
2972 	 */
2973 	if (dev_is_pci(dev))
2974 		group = pci_device_group(dev);
2975 	else
2976 		group = generic_device_group(dev);
2977 
2978 	return group;
2979 }
2980 
2981 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2982 {
2983 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2984 	int ret = 0;
2985 
2986 	mutex_lock(&smmu_domain->init_mutex);
2987 	if (smmu_domain->smmu)
2988 		ret = -EPERM;
2989 	else
2990 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2991 	mutex_unlock(&smmu_domain->init_mutex);
2992 
2993 	return ret;
2994 }
2995 
2996 static int arm_smmu_of_xlate(struct device *dev,
2997 			     const struct of_phandle_args *args)
2998 {
2999 	return iommu_fwspec_add_ids(dev, args->args, 1);
3000 }
3001 
3002 static void arm_smmu_get_resv_regions(struct device *dev,
3003 				      struct list_head *head)
3004 {
3005 	struct iommu_resv_region *region;
3006 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
3007 
3008 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
3009 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
3010 	if (!region)
3011 		return;
3012 
3013 	list_add_tail(&region->list, head);
3014 
3015 	iommu_dma_get_resv_regions(dev, head);
3016 }
3017 
3018 static int arm_smmu_dev_enable_feature(struct device *dev,
3019 				       enum iommu_dev_features feat)
3020 {
3021 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3022 
3023 	if (!master)
3024 		return -ENODEV;
3025 
3026 	switch (feat) {
3027 	case IOMMU_DEV_FEAT_IOPF:
3028 		if (!arm_smmu_master_iopf_supported(master))
3029 			return -EINVAL;
3030 		if (master->iopf_enabled)
3031 			return -EBUSY;
3032 		master->iopf_enabled = true;
3033 		return 0;
3034 	case IOMMU_DEV_FEAT_SVA:
3035 		if (!arm_smmu_master_sva_supported(master))
3036 			return -EINVAL;
3037 		if (arm_smmu_master_sva_enabled(master))
3038 			return -EBUSY;
3039 		return arm_smmu_master_enable_sva(master);
3040 	default:
3041 		return -EINVAL;
3042 	}
3043 }
3044 
3045 static int arm_smmu_dev_disable_feature(struct device *dev,
3046 					enum iommu_dev_features feat)
3047 {
3048 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3049 
3050 	if (!master)
3051 		return -EINVAL;
3052 
3053 	switch (feat) {
3054 	case IOMMU_DEV_FEAT_IOPF:
3055 		if (!master->iopf_enabled)
3056 			return -EINVAL;
3057 		if (master->sva_enabled)
3058 			return -EBUSY;
3059 		master->iopf_enabled = false;
3060 		return 0;
3061 	case IOMMU_DEV_FEAT_SVA:
3062 		if (!arm_smmu_master_sva_enabled(master))
3063 			return -EINVAL;
3064 		return arm_smmu_master_disable_sva(master);
3065 	default:
3066 		return -EINVAL;
3067 	}
3068 }
3069 
3070 /*
3071  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
3072  * PCIe link and save the data to memory by DMA. The hardware is restricted to
3073  * use identity mapping only.
3074  */
3075 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
3076 					 (pdev)->device == 0xa12e)
3077 
3078 static int arm_smmu_def_domain_type(struct device *dev)
3079 {
3080 	if (dev_is_pci(dev)) {
3081 		struct pci_dev *pdev = to_pci_dev(dev);
3082 
3083 		if (IS_HISI_PTT_DEVICE(pdev))
3084 			return IOMMU_DOMAIN_IDENTITY;
3085 	}
3086 
3087 	return 0;
3088 }
3089 
3090 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
3091 				      struct iommu_domain *domain)
3092 {
3093 	arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
3094 }
3095 
3096 static struct iommu_ops arm_smmu_ops = {
3097 	.identity_domain	= &arm_smmu_identity_domain,
3098 	.blocked_domain		= &arm_smmu_blocked_domain,
3099 	.capable		= arm_smmu_capable,
3100 	.domain_alloc		= arm_smmu_domain_alloc,
3101 	.domain_alloc_paging    = arm_smmu_domain_alloc_paging,
3102 	.probe_device		= arm_smmu_probe_device,
3103 	.release_device		= arm_smmu_release_device,
3104 	.device_group		= arm_smmu_device_group,
3105 	.of_xlate		= arm_smmu_of_xlate,
3106 	.get_resv_regions	= arm_smmu_get_resv_regions,
3107 	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
3108 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
3109 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
3110 	.page_response		= arm_smmu_page_response,
3111 	.def_domain_type	= arm_smmu_def_domain_type,
3112 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
3113 	.owner			= THIS_MODULE,
3114 	.default_domain_ops = &(const struct iommu_domain_ops) {
3115 		.attach_dev		= arm_smmu_attach_dev,
3116 		.map_pages		= arm_smmu_map_pages,
3117 		.unmap_pages		= arm_smmu_unmap_pages,
3118 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
3119 		.iotlb_sync		= arm_smmu_iotlb_sync,
3120 		.iova_to_phys		= arm_smmu_iova_to_phys,
3121 		.enable_nesting		= arm_smmu_enable_nesting,
3122 		.free			= arm_smmu_domain_free,
3123 	}
3124 };
3125 
3126 /* Probing and initialisation functions */
3127 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3128 				   struct arm_smmu_queue *q,
3129 				   void __iomem *page,
3130 				   unsigned long prod_off,
3131 				   unsigned long cons_off,
3132 				   size_t dwords, const char *name)
3133 {
3134 	size_t qsz;
3135 
3136 	do {
3137 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3138 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3139 					      GFP_KERNEL);
3140 		if (q->base || qsz < PAGE_SIZE)
3141 			break;
3142 
3143 		q->llq.max_n_shift--;
3144 	} while (1);
3145 
3146 	if (!q->base) {
3147 		dev_err(smmu->dev,
3148 			"failed to allocate queue (0x%zx bytes) for %s\n",
3149 			qsz, name);
3150 		return -ENOMEM;
3151 	}
3152 
3153 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
3154 		dev_info(smmu->dev, "allocated %u entries for %s\n",
3155 			 1 << q->llq.max_n_shift, name);
3156 	}
3157 
3158 	q->prod_reg	= page + prod_off;
3159 	q->cons_reg	= page + cons_off;
3160 	q->ent_dwords	= dwords;
3161 
3162 	q->q_base  = Q_BASE_RWA;
3163 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3164 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3165 
3166 	q->llq.prod = q->llq.cons = 0;
3167 	return 0;
3168 }
3169 
3170 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
3171 {
3172 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
3173 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3174 
3175 	atomic_set(&cmdq->owner_prod, 0);
3176 	atomic_set(&cmdq->lock, 0);
3177 
3178 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
3179 							      GFP_KERNEL);
3180 	if (!cmdq->valid_map)
3181 		return -ENOMEM;
3182 
3183 	return 0;
3184 }
3185 
3186 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3187 {
3188 	int ret;
3189 
3190 	/* cmdq */
3191 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
3192 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
3193 				      CMDQ_ENT_DWORDS, "cmdq");
3194 	if (ret)
3195 		return ret;
3196 
3197 	ret = arm_smmu_cmdq_init(smmu);
3198 	if (ret)
3199 		return ret;
3200 
3201 	/* evtq */
3202 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
3203 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
3204 				      EVTQ_ENT_DWORDS, "evtq");
3205 	if (ret)
3206 		return ret;
3207 
3208 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
3209 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
3210 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
3211 		if (!smmu->evtq.iopf)
3212 			return -ENOMEM;
3213 	}
3214 
3215 	/* priq */
3216 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3217 		return 0;
3218 
3219 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
3220 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
3221 				       PRIQ_ENT_DWORDS, "priq");
3222 }
3223 
3224 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
3225 {
3226 	unsigned int i;
3227 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3228 	void *strtab = smmu->strtab_cfg.strtab;
3229 
3230 	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
3231 				    sizeof(*cfg->l1_desc), GFP_KERNEL);
3232 	if (!cfg->l1_desc)
3233 		return -ENOMEM;
3234 
3235 	for (i = 0; i < cfg->num_l1_ents; ++i) {
3236 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3237 		strtab += STRTAB_L1_DESC_DWORDS << 3;
3238 	}
3239 
3240 	return 0;
3241 }
3242 
3243 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3244 {
3245 	void *strtab;
3246 	u64 reg;
3247 	u32 size, l1size;
3248 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3249 
3250 	/* Calculate the L1 size, capped to the SIDSIZE. */
3251 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3252 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3253 	cfg->num_l1_ents = 1 << size;
3254 
3255 	size += STRTAB_SPLIT;
3256 	if (size < smmu->sid_bits)
3257 		dev_warn(smmu->dev,
3258 			 "2-level strtab only covers %u/%u bits of SID\n",
3259 			 size, smmu->sid_bits);
3260 
3261 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3262 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3263 				     GFP_KERNEL);
3264 	if (!strtab) {
3265 		dev_err(smmu->dev,
3266 			"failed to allocate l1 stream table (%u bytes)\n",
3267 			l1size);
3268 		return -ENOMEM;
3269 	}
3270 	cfg->strtab = strtab;
3271 
3272 	/* Configure strtab_base_cfg for 2 levels */
3273 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3274 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3275 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3276 	cfg->strtab_base_cfg = reg;
3277 
3278 	return arm_smmu_init_l1_strtab(smmu);
3279 }
3280 
3281 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3282 {
3283 	void *strtab;
3284 	u64 reg;
3285 	u32 size;
3286 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3287 
3288 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3289 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3290 				     GFP_KERNEL);
3291 	if (!strtab) {
3292 		dev_err(smmu->dev,
3293 			"failed to allocate linear stream table (%u bytes)\n",
3294 			size);
3295 		return -ENOMEM;
3296 	}
3297 	cfg->strtab = strtab;
3298 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3299 
3300 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3301 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3302 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3303 	cfg->strtab_base_cfg = reg;
3304 
3305 	arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents);
3306 	return 0;
3307 }
3308 
3309 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3310 {
3311 	u64 reg;
3312 	int ret;
3313 
3314 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3315 		ret = arm_smmu_init_strtab_2lvl(smmu);
3316 	else
3317 		ret = arm_smmu_init_strtab_linear(smmu);
3318 
3319 	if (ret)
3320 		return ret;
3321 
3322 	/* Set the strtab base address */
3323 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3324 	reg |= STRTAB_BASE_RA;
3325 	smmu->strtab_cfg.strtab_base = reg;
3326 
3327 	ida_init(&smmu->vmid_map);
3328 
3329 	return 0;
3330 }
3331 
3332 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3333 {
3334 	int ret;
3335 
3336 	mutex_init(&smmu->streams_mutex);
3337 	smmu->streams = RB_ROOT;
3338 
3339 	ret = arm_smmu_init_queues(smmu);
3340 	if (ret)
3341 		return ret;
3342 
3343 	return arm_smmu_init_strtab(smmu);
3344 }
3345 
3346 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3347 				   unsigned int reg_off, unsigned int ack_off)
3348 {
3349 	u32 reg;
3350 
3351 	writel_relaxed(val, smmu->base + reg_off);
3352 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3353 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3354 }
3355 
3356 /* GBPA is "special" */
3357 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3358 {
3359 	int ret;
3360 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3361 
3362 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3363 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3364 	if (ret)
3365 		return ret;
3366 
3367 	reg &= ~clr;
3368 	reg |= set;
3369 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3370 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3371 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3372 
3373 	if (ret)
3374 		dev_err(smmu->dev, "GBPA not responding to update\n");
3375 	return ret;
3376 }
3377 
3378 static void arm_smmu_free_msis(void *data)
3379 {
3380 	struct device *dev = data;
3381 
3382 	platform_device_msi_free_irqs_all(dev);
3383 }
3384 
3385 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3386 {
3387 	phys_addr_t doorbell;
3388 	struct device *dev = msi_desc_to_dev(desc);
3389 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3390 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3391 
3392 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3393 	doorbell &= MSI_CFG0_ADDR_MASK;
3394 
3395 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3396 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3397 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3398 }
3399 
3400 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3401 {
3402 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3403 	struct device *dev = smmu->dev;
3404 
3405 	/* Clear the MSI address regs */
3406 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3407 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3408 
3409 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3410 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3411 	else
3412 		nvec--;
3413 
3414 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3415 		return;
3416 
3417 	if (!dev->msi.domain) {
3418 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3419 		return;
3420 	}
3421 
3422 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3423 	ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3424 	if (ret) {
3425 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3426 		return;
3427 	}
3428 
3429 	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3430 	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3431 	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3432 
3433 	/* Add callback to free MSIs on teardown */
3434 	devm_add_action_or_reset(dev, arm_smmu_free_msis, dev);
3435 }
3436 
3437 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3438 {
3439 	int irq, ret;
3440 
3441 	arm_smmu_setup_msis(smmu);
3442 
3443 	/* Request interrupt lines */
3444 	irq = smmu->evtq.q.irq;
3445 	if (irq) {
3446 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3447 						arm_smmu_evtq_thread,
3448 						IRQF_ONESHOT,
3449 						"arm-smmu-v3-evtq", smmu);
3450 		if (ret < 0)
3451 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3452 	} else {
3453 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3454 	}
3455 
3456 	irq = smmu->gerr_irq;
3457 	if (irq) {
3458 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3459 				       0, "arm-smmu-v3-gerror", smmu);
3460 		if (ret < 0)
3461 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3462 	} else {
3463 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3464 	}
3465 
3466 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3467 		irq = smmu->priq.q.irq;
3468 		if (irq) {
3469 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3470 							arm_smmu_priq_thread,
3471 							IRQF_ONESHOT,
3472 							"arm-smmu-v3-priq",
3473 							smmu);
3474 			if (ret < 0)
3475 				dev_warn(smmu->dev,
3476 					 "failed to enable priq irq\n");
3477 		} else {
3478 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3479 		}
3480 	}
3481 }
3482 
3483 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3484 {
3485 	int ret, irq;
3486 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3487 
3488 	/* Disable IRQs first */
3489 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3490 				      ARM_SMMU_IRQ_CTRLACK);
3491 	if (ret) {
3492 		dev_err(smmu->dev, "failed to disable irqs\n");
3493 		return ret;
3494 	}
3495 
3496 	irq = smmu->combined_irq;
3497 	if (irq) {
3498 		/*
3499 		 * Cavium ThunderX2 implementation doesn't support unique irq
3500 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3501 		 */
3502 		ret = devm_request_threaded_irq(smmu->dev, irq,
3503 					arm_smmu_combined_irq_handler,
3504 					arm_smmu_combined_irq_thread,
3505 					IRQF_ONESHOT,
3506 					"arm-smmu-v3-combined-irq", smmu);
3507 		if (ret < 0)
3508 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3509 	} else
3510 		arm_smmu_setup_unique_irqs(smmu);
3511 
3512 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3513 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3514 
3515 	/* Enable interrupt generation on the SMMU */
3516 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3517 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3518 	if (ret)
3519 		dev_warn(smmu->dev, "failed to enable irqs\n");
3520 
3521 	return 0;
3522 }
3523 
3524 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3525 {
3526 	int ret;
3527 
3528 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3529 	if (ret)
3530 		dev_err(smmu->dev, "failed to clear cr0\n");
3531 
3532 	return ret;
3533 }
3534 
3535 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
3536 {
3537 	int ret;
3538 	u32 reg, enables;
3539 	struct arm_smmu_cmdq_ent cmd;
3540 
3541 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3542 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3543 	if (reg & CR0_SMMUEN) {
3544 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3545 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3546 	}
3547 
3548 	ret = arm_smmu_device_disable(smmu);
3549 	if (ret)
3550 		return ret;
3551 
3552 	/* CR1 (table and queue memory attributes) */
3553 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3554 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3555 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3556 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3557 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3558 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3559 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3560 
3561 	/* CR2 (random crap) */
3562 	reg = CR2_PTM | CR2_RECINVSID;
3563 
3564 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3565 		reg |= CR2_E2H;
3566 
3567 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3568 
3569 	/* Stream table */
3570 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3571 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3572 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3573 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3574 
3575 	/* Command queue */
3576 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3577 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3578 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3579 
3580 	enables = CR0_CMDQEN;
3581 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3582 				      ARM_SMMU_CR0ACK);
3583 	if (ret) {
3584 		dev_err(smmu->dev, "failed to enable command queue\n");
3585 		return ret;
3586 	}
3587 
3588 	/* Invalidate any cached configuration */
3589 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3590 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3591 
3592 	/* Invalidate any stale TLB entries */
3593 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3594 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3595 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3596 	}
3597 
3598 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3599 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3600 
3601 	/* Event queue */
3602 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3603 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3604 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3605 
3606 	enables |= CR0_EVTQEN;
3607 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3608 				      ARM_SMMU_CR0ACK);
3609 	if (ret) {
3610 		dev_err(smmu->dev, "failed to enable event queue\n");
3611 		return ret;
3612 	}
3613 
3614 	/* PRI queue */
3615 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3616 		writeq_relaxed(smmu->priq.q.q_base,
3617 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3618 		writel_relaxed(smmu->priq.q.llq.prod,
3619 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3620 		writel_relaxed(smmu->priq.q.llq.cons,
3621 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3622 
3623 		enables |= CR0_PRIQEN;
3624 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3625 					      ARM_SMMU_CR0ACK);
3626 		if (ret) {
3627 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3628 			return ret;
3629 		}
3630 	}
3631 
3632 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3633 		enables |= CR0_ATSCHK;
3634 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3635 					      ARM_SMMU_CR0ACK);
3636 		if (ret) {
3637 			dev_err(smmu->dev, "failed to enable ATS check\n");
3638 			return ret;
3639 		}
3640 	}
3641 
3642 	ret = arm_smmu_setup_irqs(smmu);
3643 	if (ret) {
3644 		dev_err(smmu->dev, "failed to setup irqs\n");
3645 		return ret;
3646 	}
3647 
3648 	if (is_kdump_kernel())
3649 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3650 
3651 	/* Enable the SMMU interface */
3652 	enables |= CR0_SMMUEN;
3653 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3654 				      ARM_SMMU_CR0ACK);
3655 	if (ret) {
3656 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3657 		return ret;
3658 	}
3659 
3660 	return 0;
3661 }
3662 
3663 #define IIDR_IMPLEMENTER_ARM		0x43b
3664 #define IIDR_PRODUCTID_ARM_MMU_600	0x483
3665 #define IIDR_PRODUCTID_ARM_MMU_700	0x487
3666 
3667 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3668 {
3669 	u32 reg;
3670 	unsigned int implementer, productid, variant, revision;
3671 
3672 	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3673 	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3674 	productid = FIELD_GET(IIDR_PRODUCTID, reg);
3675 	variant = FIELD_GET(IIDR_VARIANT, reg);
3676 	revision = FIELD_GET(IIDR_REVISION, reg);
3677 
3678 	switch (implementer) {
3679 	case IIDR_IMPLEMENTER_ARM:
3680 		switch (productid) {
3681 		case IIDR_PRODUCTID_ARM_MMU_600:
3682 			/* Arm erratum 1076982 */
3683 			if (variant == 0 && revision <= 2)
3684 				smmu->features &= ~ARM_SMMU_FEAT_SEV;
3685 			/* Arm erratum 1209401 */
3686 			if (variant < 2)
3687 				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3688 			break;
3689 		case IIDR_PRODUCTID_ARM_MMU_700:
3690 			/* Arm erratum 2812531 */
3691 			smmu->features &= ~ARM_SMMU_FEAT_BTM;
3692 			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3693 			/* Arm errata 2268618, 2812531 */
3694 			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3695 			break;
3696 		}
3697 		break;
3698 	}
3699 }
3700 
3701 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3702 {
3703 	u32 reg;
3704 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3705 
3706 	/* IDR0 */
3707 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3708 
3709 	/* 2-level structures */
3710 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3711 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3712 
3713 	if (reg & IDR0_CD2L)
3714 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3715 
3716 	/*
3717 	 * Translation table endianness.
3718 	 * We currently require the same endianness as the CPU, but this
3719 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3720 	 */
3721 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3722 	case IDR0_TTENDIAN_MIXED:
3723 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3724 		break;
3725 #ifdef __BIG_ENDIAN
3726 	case IDR0_TTENDIAN_BE:
3727 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3728 		break;
3729 #else
3730 	case IDR0_TTENDIAN_LE:
3731 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3732 		break;
3733 #endif
3734 	default:
3735 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3736 		return -ENXIO;
3737 	}
3738 
3739 	/* Boolean feature flags */
3740 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3741 		smmu->features |= ARM_SMMU_FEAT_PRI;
3742 
3743 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3744 		smmu->features |= ARM_SMMU_FEAT_ATS;
3745 
3746 	if (reg & IDR0_SEV)
3747 		smmu->features |= ARM_SMMU_FEAT_SEV;
3748 
3749 	if (reg & IDR0_MSI) {
3750 		smmu->features |= ARM_SMMU_FEAT_MSI;
3751 		if (coherent && !disable_msipolling)
3752 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3753 	}
3754 
3755 	if (reg & IDR0_HYP) {
3756 		smmu->features |= ARM_SMMU_FEAT_HYP;
3757 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3758 			smmu->features |= ARM_SMMU_FEAT_E2H;
3759 	}
3760 
3761 	/*
3762 	 * The coherency feature as set by FW is used in preference to the ID
3763 	 * register, but warn on mismatch.
3764 	 */
3765 	if (!!(reg & IDR0_COHACC) != coherent)
3766 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3767 			 coherent ? "true" : "false");
3768 
3769 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3770 	case IDR0_STALL_MODEL_FORCE:
3771 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3772 		fallthrough;
3773 	case IDR0_STALL_MODEL_STALL:
3774 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3775 	}
3776 
3777 	if (reg & IDR0_S1P)
3778 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3779 
3780 	if (reg & IDR0_S2P)
3781 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3782 
3783 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3784 		dev_err(smmu->dev, "no translation support!\n");
3785 		return -ENXIO;
3786 	}
3787 
3788 	/* We only support the AArch64 table format at present */
3789 	switch (FIELD_GET(IDR0_TTF, reg)) {
3790 	case IDR0_TTF_AARCH32_64:
3791 		smmu->ias = 40;
3792 		fallthrough;
3793 	case IDR0_TTF_AARCH64:
3794 		break;
3795 	default:
3796 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3797 		return -ENXIO;
3798 	}
3799 
3800 	/* ASID/VMID sizes */
3801 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3802 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3803 
3804 	/* IDR1 */
3805 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3806 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3807 		dev_err(smmu->dev, "embedded implementation not supported\n");
3808 		return -ENXIO;
3809 	}
3810 
3811 	if (reg & IDR1_ATTR_TYPES_OVR)
3812 		smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
3813 
3814 	/* Queue sizes, capped to ensure natural alignment */
3815 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3816 					     FIELD_GET(IDR1_CMDQS, reg));
3817 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3818 		/*
3819 		 * We don't support splitting up batches, so one batch of
3820 		 * commands plus an extra sync needs to fit inside the command
3821 		 * queue. There's also no way we can handle the weird alignment
3822 		 * restrictions on the base pointer for a unit-length queue.
3823 		 */
3824 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3825 			CMDQ_BATCH_ENTRIES);
3826 		return -ENXIO;
3827 	}
3828 
3829 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3830 					     FIELD_GET(IDR1_EVTQS, reg));
3831 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3832 					     FIELD_GET(IDR1_PRIQS, reg));
3833 
3834 	/* SID/SSID sizes */
3835 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3836 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3837 	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3838 
3839 	/*
3840 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3841 	 * table, use a linear table instead.
3842 	 */
3843 	if (smmu->sid_bits <= STRTAB_SPLIT)
3844 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3845 
3846 	/* IDR3 */
3847 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3848 	if (FIELD_GET(IDR3_RIL, reg))
3849 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3850 
3851 	/* IDR5 */
3852 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3853 
3854 	/* Maximum number of outstanding stalls */
3855 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3856 
3857 	/* Page sizes */
3858 	if (reg & IDR5_GRAN64K)
3859 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3860 	if (reg & IDR5_GRAN16K)
3861 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3862 	if (reg & IDR5_GRAN4K)
3863 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3864 
3865 	/* Input address size */
3866 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3867 		smmu->features |= ARM_SMMU_FEAT_VAX;
3868 
3869 	/* Output address size */
3870 	switch (FIELD_GET(IDR5_OAS, reg)) {
3871 	case IDR5_OAS_32_BIT:
3872 		smmu->oas = 32;
3873 		break;
3874 	case IDR5_OAS_36_BIT:
3875 		smmu->oas = 36;
3876 		break;
3877 	case IDR5_OAS_40_BIT:
3878 		smmu->oas = 40;
3879 		break;
3880 	case IDR5_OAS_42_BIT:
3881 		smmu->oas = 42;
3882 		break;
3883 	case IDR5_OAS_44_BIT:
3884 		smmu->oas = 44;
3885 		break;
3886 	case IDR5_OAS_52_BIT:
3887 		smmu->oas = 52;
3888 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3889 		break;
3890 	default:
3891 		dev_info(smmu->dev,
3892 			"unknown output address size. Truncating to 48-bit\n");
3893 		fallthrough;
3894 	case IDR5_OAS_48_BIT:
3895 		smmu->oas = 48;
3896 	}
3897 
3898 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3899 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3900 	else
3901 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3902 
3903 	/* Set the DMA mask for our table walker */
3904 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3905 		dev_warn(smmu->dev,
3906 			 "failed to set DMA mask for table walker\n");
3907 
3908 	smmu->ias = max(smmu->ias, smmu->oas);
3909 
3910 	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3911 	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3912 		smmu->features |= ARM_SMMU_FEAT_NESTING;
3913 
3914 	arm_smmu_device_iidr_probe(smmu);
3915 
3916 	if (arm_smmu_sva_supported(smmu))
3917 		smmu->features |= ARM_SMMU_FEAT_SVA;
3918 
3919 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3920 		 smmu->ias, smmu->oas, smmu->features);
3921 	return 0;
3922 }
3923 
3924 #ifdef CONFIG_ACPI
3925 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3926 {
3927 	switch (model) {
3928 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3929 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3930 		break;
3931 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3932 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3933 		break;
3934 	}
3935 
3936 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3937 }
3938 
3939 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3940 				      struct arm_smmu_device *smmu)
3941 {
3942 	struct acpi_iort_smmu_v3 *iort_smmu;
3943 	struct device *dev = smmu->dev;
3944 	struct acpi_iort_node *node;
3945 
3946 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3947 
3948 	/* Retrieve SMMUv3 specific data */
3949 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3950 
3951 	acpi_smmu_get_options(iort_smmu->model, smmu);
3952 
3953 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3954 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3955 
3956 	return 0;
3957 }
3958 #else
3959 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3960 					     struct arm_smmu_device *smmu)
3961 {
3962 	return -ENODEV;
3963 }
3964 #endif
3965 
3966 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3967 				    struct arm_smmu_device *smmu)
3968 {
3969 	struct device *dev = &pdev->dev;
3970 	u32 cells;
3971 	int ret = -EINVAL;
3972 
3973 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3974 		dev_err(dev, "missing #iommu-cells property\n");
3975 	else if (cells != 1)
3976 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3977 	else
3978 		ret = 0;
3979 
3980 	parse_driver_options(smmu);
3981 
3982 	if (of_dma_is_coherent(dev->of_node))
3983 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3984 
3985 	return ret;
3986 }
3987 
3988 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3989 {
3990 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3991 		return SZ_64K;
3992 	else
3993 		return SZ_128K;
3994 }
3995 
3996 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3997 				      resource_size_t size)
3998 {
3999 	struct resource res = DEFINE_RES_MEM(start, size);
4000 
4001 	return devm_ioremap_resource(dev, &res);
4002 }
4003 
4004 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
4005 {
4006 	struct list_head rmr_list;
4007 	struct iommu_resv_region *e;
4008 
4009 	INIT_LIST_HEAD(&rmr_list);
4010 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4011 
4012 	list_for_each_entry(e, &rmr_list, list) {
4013 		struct iommu_iort_rmr_data *rmr;
4014 		int ret, i;
4015 
4016 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
4017 		for (i = 0; i < rmr->num_sids; i++) {
4018 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
4019 			if (ret) {
4020 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
4021 					rmr->sids[i]);
4022 				continue;
4023 			}
4024 
4025 			/*
4026 			 * STE table is not programmed to HW, see
4027 			 * arm_smmu_initial_bypass_stes()
4028 			 */
4029 			arm_smmu_make_bypass_ste(smmu,
4030 				arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
4031 		}
4032 	}
4033 
4034 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4035 }
4036 
4037 static int arm_smmu_device_probe(struct platform_device *pdev)
4038 {
4039 	int irq, ret;
4040 	struct resource *res;
4041 	resource_size_t ioaddr;
4042 	struct arm_smmu_device *smmu;
4043 	struct device *dev = &pdev->dev;
4044 
4045 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4046 	if (!smmu)
4047 		return -ENOMEM;
4048 	smmu->dev = dev;
4049 
4050 	if (dev->of_node) {
4051 		ret = arm_smmu_device_dt_probe(pdev, smmu);
4052 	} else {
4053 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
4054 	}
4055 	if (ret)
4056 		return ret;
4057 
4058 	/* Base address */
4059 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4060 	if (!res)
4061 		return -EINVAL;
4062 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4063 		dev_err(dev, "MMIO region too small (%pr)\n", res);
4064 		return -EINVAL;
4065 	}
4066 	ioaddr = res->start;
4067 
4068 	/*
4069 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4070 	 * the PMCG registers which are reserved by the PMU driver.
4071 	 */
4072 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4073 	if (IS_ERR(smmu->base))
4074 		return PTR_ERR(smmu->base);
4075 
4076 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
4077 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4078 					       ARM_SMMU_REG_SZ);
4079 		if (IS_ERR(smmu->page1))
4080 			return PTR_ERR(smmu->page1);
4081 	} else {
4082 		smmu->page1 = smmu->base;
4083 	}
4084 
4085 	/* Interrupt lines */
4086 
4087 	irq = platform_get_irq_byname_optional(pdev, "combined");
4088 	if (irq > 0)
4089 		smmu->combined_irq = irq;
4090 	else {
4091 		irq = platform_get_irq_byname_optional(pdev, "eventq");
4092 		if (irq > 0)
4093 			smmu->evtq.q.irq = irq;
4094 
4095 		irq = platform_get_irq_byname_optional(pdev, "priq");
4096 		if (irq > 0)
4097 			smmu->priq.q.irq = irq;
4098 
4099 		irq = platform_get_irq_byname_optional(pdev, "gerror");
4100 		if (irq > 0)
4101 			smmu->gerr_irq = irq;
4102 	}
4103 	/* Probe the h/w */
4104 	ret = arm_smmu_device_hw_probe(smmu);
4105 	if (ret)
4106 		return ret;
4107 
4108 	/* Initialise in-memory data structures */
4109 	ret = arm_smmu_init_structures(smmu);
4110 	if (ret)
4111 		return ret;
4112 
4113 	/* Record our private device structure */
4114 	platform_set_drvdata(pdev, smmu);
4115 
4116 	/* Check for RMRs and install bypass STEs if any */
4117 	arm_smmu_rmr_install_bypass_ste(smmu);
4118 
4119 	/* Reset the device */
4120 	ret = arm_smmu_device_reset(smmu);
4121 	if (ret)
4122 		return ret;
4123 
4124 	/* And we're up. Go go go! */
4125 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4126 				     "smmu3.%pa", &ioaddr);
4127 	if (ret)
4128 		return ret;
4129 
4130 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
4131 	if (ret) {
4132 		dev_err(dev, "Failed to register iommu\n");
4133 		iommu_device_sysfs_remove(&smmu->iommu);
4134 		return ret;
4135 	}
4136 
4137 	return 0;
4138 }
4139 
4140 static void arm_smmu_device_remove(struct platform_device *pdev)
4141 {
4142 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4143 
4144 	iommu_device_unregister(&smmu->iommu);
4145 	iommu_device_sysfs_remove(&smmu->iommu);
4146 	arm_smmu_device_disable(smmu);
4147 	iopf_queue_free(smmu->evtq.iopf);
4148 	ida_destroy(&smmu->vmid_map);
4149 }
4150 
4151 static void arm_smmu_device_shutdown(struct platform_device *pdev)
4152 {
4153 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4154 
4155 	arm_smmu_device_disable(smmu);
4156 }
4157 
4158 static const struct of_device_id arm_smmu_of_match[] = {
4159 	{ .compatible = "arm,smmu-v3", },
4160 	{ },
4161 };
4162 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4163 
4164 static void arm_smmu_driver_unregister(struct platform_driver *drv)
4165 {
4166 	arm_smmu_sva_notifier_synchronize();
4167 	platform_driver_unregister(drv);
4168 }
4169 
4170 static struct platform_driver arm_smmu_driver = {
4171 	.driver	= {
4172 		.name			= "arm-smmu-v3",
4173 		.of_match_table		= arm_smmu_of_match,
4174 		.suppress_bind_attrs	= true,
4175 	},
4176 	.probe	= arm_smmu_device_probe,
4177 	.remove_new = arm_smmu_device_remove,
4178 	.shutdown = arm_smmu_device_shutdown,
4179 };
4180 module_driver(arm_smmu_driver, platform_driver_register,
4181 	      arm_smmu_driver_unregister);
4182 
4183 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4184 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4185 MODULE_ALIAS("platform:arm-smmu-v3");
4186 MODULE_LICENSE("GPL v2");
4187