xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision ee057c8c194b9283f4137b253b70e292693a39f0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 #include <kunit/visibility.h>
30 #include <uapi/linux/iommufd.h>
31 
32 #include "arm-smmu-v3.h"
33 #include "../../dma-iommu.h"
34 
35 static bool disable_msipolling;
36 module_param(disable_msipolling, bool, 0444);
37 MODULE_PARM_DESC(disable_msipolling,
38 	"Disable MSI-based polling for CMD_SYNC completion.");
39 
40 static struct iommu_ops arm_smmu_ops;
41 static struct iommu_dirty_ops arm_smmu_dirty_ops;
42 
43 enum arm_smmu_msi_index {
44 	EVTQ_MSI_INDEX,
45 	GERROR_MSI_INDEX,
46 	PRIQ_MSI_INDEX,
47 	ARM_SMMU_MAX_MSIS,
48 };
49 
50 #define NUM_ENTRY_QWORDS 8
51 static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64));
52 static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64));
53 
54 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
55 	[EVTQ_MSI_INDEX] = {
56 		ARM_SMMU_EVTQ_IRQ_CFG0,
57 		ARM_SMMU_EVTQ_IRQ_CFG1,
58 		ARM_SMMU_EVTQ_IRQ_CFG2,
59 	},
60 	[GERROR_MSI_INDEX] = {
61 		ARM_SMMU_GERROR_IRQ_CFG0,
62 		ARM_SMMU_GERROR_IRQ_CFG1,
63 		ARM_SMMU_GERROR_IRQ_CFG2,
64 	},
65 	[PRIQ_MSI_INDEX] = {
66 		ARM_SMMU_PRIQ_IRQ_CFG0,
67 		ARM_SMMU_PRIQ_IRQ_CFG1,
68 		ARM_SMMU_PRIQ_IRQ_CFG2,
69 	},
70 };
71 
72 struct arm_smmu_option_prop {
73 	u32 opt;
74 	const char *prop;
75 };
76 
77 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
78 DEFINE_MUTEX(arm_smmu_asid_lock);
79 
80 static struct arm_smmu_option_prop arm_smmu_options[] = {
81 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
82 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
83 	{ 0, NULL},
84 };
85 
86 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
87 				    struct arm_smmu_device *smmu, u32 flags);
88 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
89 
90 static void parse_driver_options(struct arm_smmu_device *smmu)
91 {
92 	int i = 0;
93 
94 	do {
95 		if (of_property_read_bool(smmu->dev->of_node,
96 						arm_smmu_options[i].prop)) {
97 			smmu->options |= arm_smmu_options[i].opt;
98 			dev_notice(smmu->dev, "option %s\n",
99 				arm_smmu_options[i].prop);
100 		}
101 	} while (arm_smmu_options[++i].opt);
102 }
103 
104 /* Low-level queue manipulation functions */
105 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
106 {
107 	u32 space, prod, cons;
108 
109 	prod = Q_IDX(q, q->prod);
110 	cons = Q_IDX(q, q->cons);
111 
112 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
113 		space = (1 << q->max_n_shift) - (prod - cons);
114 	else
115 		space = cons - prod;
116 
117 	return space >= n;
118 }
119 
120 static bool queue_full(struct arm_smmu_ll_queue *q)
121 {
122 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
123 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
124 }
125 
126 static bool queue_empty(struct arm_smmu_ll_queue *q)
127 {
128 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
129 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
130 }
131 
132 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
133 {
134 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
135 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
136 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
137 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
138 }
139 
140 static void queue_sync_cons_out(struct arm_smmu_queue *q)
141 {
142 	/*
143 	 * Ensure that all CPU accesses (reads and writes) to the queue
144 	 * are complete before we update the cons pointer.
145 	 */
146 	__iomb();
147 	writel_relaxed(q->llq.cons, q->cons_reg);
148 }
149 
150 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
151 {
152 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
153 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
154 }
155 
156 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
157 {
158 	struct arm_smmu_ll_queue *llq = &q->llq;
159 
160 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
161 		return;
162 
163 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
164 		      Q_IDX(llq, llq->cons);
165 	queue_sync_cons_out(q);
166 }
167 
168 static int queue_sync_prod_in(struct arm_smmu_queue *q)
169 {
170 	u32 prod;
171 	int ret = 0;
172 
173 	/*
174 	 * We can't use the _relaxed() variant here, as we must prevent
175 	 * speculative reads of the queue before we have determined that
176 	 * prod has indeed moved.
177 	 */
178 	prod = readl(q->prod_reg);
179 
180 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
181 		ret = -EOVERFLOW;
182 
183 	q->llq.prod = prod;
184 	return ret;
185 }
186 
187 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
188 {
189 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
190 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
191 }
192 
193 static void queue_poll_init(struct arm_smmu_device *smmu,
194 			    struct arm_smmu_queue_poll *qp)
195 {
196 	qp->delay = 1;
197 	qp->spin_cnt = 0;
198 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
199 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
200 }
201 
202 static int queue_poll(struct arm_smmu_queue_poll *qp)
203 {
204 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
205 		return -ETIMEDOUT;
206 
207 	if (qp->wfe) {
208 		wfe();
209 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
210 		cpu_relax();
211 	} else {
212 		udelay(qp->delay);
213 		qp->delay *= 2;
214 		qp->spin_cnt = 0;
215 	}
216 
217 	return 0;
218 }
219 
220 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
221 {
222 	int i;
223 
224 	for (i = 0; i < n_dwords; ++i)
225 		*dst++ = cpu_to_le64(*src++);
226 }
227 
228 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
229 {
230 	int i;
231 
232 	for (i = 0; i < n_dwords; ++i)
233 		*dst++ = le64_to_cpu(*src++);
234 }
235 
236 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
237 {
238 	if (queue_empty(&q->llq))
239 		return -EAGAIN;
240 
241 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
242 	queue_inc_cons(&q->llq);
243 	queue_sync_cons_out(q);
244 	return 0;
245 }
246 
247 /* High-level queue accessors */
248 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
249 {
250 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
251 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
252 
253 	switch (ent->opcode) {
254 	case CMDQ_OP_TLBI_EL2_ALL:
255 	case CMDQ_OP_TLBI_NSNH_ALL:
256 		break;
257 	case CMDQ_OP_PREFETCH_CFG:
258 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
259 		break;
260 	case CMDQ_OP_CFGI_CD:
261 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
262 		fallthrough;
263 	case CMDQ_OP_CFGI_STE:
264 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
265 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
266 		break;
267 	case CMDQ_OP_CFGI_CD_ALL:
268 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
269 		break;
270 	case CMDQ_OP_CFGI_ALL:
271 		/* Cover the entire SID range */
272 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
273 		break;
274 	case CMDQ_OP_TLBI_NH_VA:
275 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
276 		fallthrough;
277 	case CMDQ_OP_TLBI_EL2_VA:
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
279 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
280 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
282 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
283 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
284 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
285 		break;
286 	case CMDQ_OP_TLBI_S2_IPA:
287 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
289 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
290 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
291 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
292 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
293 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
294 		break;
295 	case CMDQ_OP_TLBI_NH_ASID:
296 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
297 		fallthrough;
298 	case CMDQ_OP_TLBI_S12_VMALL:
299 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
300 		break;
301 	case CMDQ_OP_TLBI_EL2_ASID:
302 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
303 		break;
304 	case CMDQ_OP_ATC_INV:
305 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
306 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
307 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
308 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
309 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
310 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
311 		break;
312 	case CMDQ_OP_PRI_RESP:
313 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
314 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
315 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
316 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
317 		switch (ent->pri.resp) {
318 		case PRI_RESP_DENY:
319 		case PRI_RESP_FAIL:
320 		case PRI_RESP_SUCC:
321 			break;
322 		default:
323 			return -EINVAL;
324 		}
325 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
326 		break;
327 	case CMDQ_OP_RESUME:
328 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
329 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
330 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
331 		break;
332 	case CMDQ_OP_CMD_SYNC:
333 		if (ent->sync.msiaddr) {
334 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
335 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
336 		} else {
337 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
338 		}
339 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
340 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
341 		break;
342 	default:
343 		return -ENOENT;
344 	}
345 
346 	return 0;
347 }
348 
349 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
350 {
351 	return &smmu->cmdq;
352 }
353 
354 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
355 					 struct arm_smmu_queue *q, u32 prod)
356 {
357 	struct arm_smmu_cmdq_ent ent = {
358 		.opcode = CMDQ_OP_CMD_SYNC,
359 	};
360 
361 	/*
362 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
363 	 * payload, so the write will zero the entire command on that platform.
364 	 */
365 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
366 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
367 				   q->ent_dwords * 8;
368 	}
369 
370 	arm_smmu_cmdq_build_cmd(cmd, &ent);
371 }
372 
373 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
374 				     struct arm_smmu_queue *q)
375 {
376 	static const char * const cerror_str[] = {
377 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
378 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
379 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
380 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
381 	};
382 
383 	int i;
384 	u64 cmd[CMDQ_ENT_DWORDS];
385 	u32 cons = readl_relaxed(q->cons_reg);
386 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
387 	struct arm_smmu_cmdq_ent cmd_sync = {
388 		.opcode = CMDQ_OP_CMD_SYNC,
389 	};
390 
391 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
392 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
393 
394 	switch (idx) {
395 	case CMDQ_ERR_CERROR_ABT_IDX:
396 		dev_err(smmu->dev, "retrying command fetch\n");
397 		return;
398 	case CMDQ_ERR_CERROR_NONE_IDX:
399 		return;
400 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
401 		/*
402 		 * ATC Invalidation Completion timeout. CONS is still pointing
403 		 * at the CMD_SYNC. Attempt to complete other pending commands
404 		 * by repeating the CMD_SYNC, though we might well end up back
405 		 * here since the ATC invalidation may still be pending.
406 		 */
407 		return;
408 	case CMDQ_ERR_CERROR_ILL_IDX:
409 	default:
410 		break;
411 	}
412 
413 	/*
414 	 * We may have concurrent producers, so we need to be careful
415 	 * not to touch any of the shadow cmdq state.
416 	 */
417 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
418 	dev_err(smmu->dev, "skipping command in error state:\n");
419 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
420 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
421 
422 	/* Convert the erroneous command into a CMD_SYNC */
423 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
424 
425 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
426 }
427 
428 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
429 {
430 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
431 }
432 
433 /*
434  * Command queue locking.
435  * This is a form of bastardised rwlock with the following major changes:
436  *
437  * - The only LOCK routines are exclusive_trylock() and shared_lock().
438  *   Neither have barrier semantics, and instead provide only a control
439  *   dependency.
440  *
441  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
442  *   fails if the caller appears to be the last lock holder (yes, this is
443  *   racy). All successful UNLOCK routines have RELEASE semantics.
444  */
445 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
446 {
447 	int val;
448 
449 	/*
450 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
451 	 * lock counter. When held in exclusive state, the lock counter is set
452 	 * to INT_MIN so these increments won't hurt as the value will remain
453 	 * negative.
454 	 */
455 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
456 		return;
457 
458 	do {
459 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
460 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
461 }
462 
463 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
464 {
465 	(void)atomic_dec_return_release(&cmdq->lock);
466 }
467 
468 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
469 {
470 	if (atomic_read(&cmdq->lock) == 1)
471 		return false;
472 
473 	arm_smmu_cmdq_shared_unlock(cmdq);
474 	return true;
475 }
476 
477 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
478 ({									\
479 	bool __ret;							\
480 	local_irq_save(flags);						\
481 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
482 	if (!__ret)							\
483 		local_irq_restore(flags);				\
484 	__ret;								\
485 })
486 
487 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
488 ({									\
489 	atomic_set_release(&cmdq->lock, 0);				\
490 	local_irq_restore(flags);					\
491 })
492 
493 
494 /*
495  * Command queue insertion.
496  * This is made fiddly by our attempts to achieve some sort of scalability
497  * since there is one queue shared amongst all of the CPUs in the system.  If
498  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
499  * then you'll *love* this monstrosity.
500  *
501  * The basic idea is to split the queue up into ranges of commands that are
502  * owned by a given CPU; the owner may not have written all of the commands
503  * itself, but is responsible for advancing the hardware prod pointer when
504  * the time comes. The algorithm is roughly:
505  *
506  * 	1. Allocate some space in the queue. At this point we also discover
507  *	   whether the head of the queue is currently owned by another CPU,
508  *	   or whether we are the owner.
509  *
510  *	2. Write our commands into our allocated slots in the queue.
511  *
512  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
513  *
514  *	4. If we are an owner:
515  *		a. Wait for the previous owner to finish.
516  *		b. Mark the queue head as unowned, which tells us the range
517  *		   that we are responsible for publishing.
518  *		c. Wait for all commands in our owned range to become valid.
519  *		d. Advance the hardware prod pointer.
520  *		e. Tell the next owner we've finished.
521  *
522  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
523  *	   owner), then we need to stick around until it has completed:
524  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
525  *		   to clear the first 4 bytes.
526  *		b. Otherwise, we spin waiting for the hardware cons pointer to
527  *		   advance past our command.
528  *
529  * The devil is in the details, particularly the use of locking for handling
530  * SYNC completion and freeing up space in the queue before we think that it is
531  * full.
532  */
533 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
534 					       u32 sprod, u32 eprod, bool set)
535 {
536 	u32 swidx, sbidx, ewidx, ebidx;
537 	struct arm_smmu_ll_queue llq = {
538 		.max_n_shift	= cmdq->q.llq.max_n_shift,
539 		.prod		= sprod,
540 	};
541 
542 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
543 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
544 
545 	while (llq.prod != eprod) {
546 		unsigned long mask;
547 		atomic_long_t *ptr;
548 		u32 limit = BITS_PER_LONG;
549 
550 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
551 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
552 
553 		ptr = &cmdq->valid_map[swidx];
554 
555 		if ((swidx == ewidx) && (sbidx < ebidx))
556 			limit = ebidx;
557 
558 		mask = GENMASK(limit - 1, sbidx);
559 
560 		/*
561 		 * The valid bit is the inverse of the wrap bit. This means
562 		 * that a zero-initialised queue is invalid and, after marking
563 		 * all entries as valid, they become invalid again when we
564 		 * wrap.
565 		 */
566 		if (set) {
567 			atomic_long_xor(mask, ptr);
568 		} else { /* Poll */
569 			unsigned long valid;
570 
571 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
572 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
573 		}
574 
575 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
576 	}
577 }
578 
579 /* Mark all entries in the range [sprod, eprod) as valid */
580 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
581 					u32 sprod, u32 eprod)
582 {
583 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
584 }
585 
586 /* Wait for all entries in the range [sprod, eprod) to become valid */
587 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
588 					 u32 sprod, u32 eprod)
589 {
590 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
591 }
592 
593 /* Wait for the command queue to become non-full */
594 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
595 					     struct arm_smmu_ll_queue *llq)
596 {
597 	unsigned long flags;
598 	struct arm_smmu_queue_poll qp;
599 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
600 	int ret = 0;
601 
602 	/*
603 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
604 	 * that fails, spin until somebody else updates it for us.
605 	 */
606 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
607 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
608 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
609 		llq->val = READ_ONCE(cmdq->q.llq.val);
610 		return 0;
611 	}
612 
613 	queue_poll_init(smmu, &qp);
614 	do {
615 		llq->val = READ_ONCE(cmdq->q.llq.val);
616 		if (!queue_full(llq))
617 			break;
618 
619 		ret = queue_poll(&qp);
620 	} while (!ret);
621 
622 	return ret;
623 }
624 
625 /*
626  * Wait until the SMMU signals a CMD_SYNC completion MSI.
627  * Must be called with the cmdq lock held in some capacity.
628  */
629 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
630 					  struct arm_smmu_ll_queue *llq)
631 {
632 	int ret = 0;
633 	struct arm_smmu_queue_poll qp;
634 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
635 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
636 
637 	queue_poll_init(smmu, &qp);
638 
639 	/*
640 	 * The MSI won't generate an event, since it's being written back
641 	 * into the command queue.
642 	 */
643 	qp.wfe = false;
644 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
645 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
646 	return ret;
647 }
648 
649 /*
650  * Wait until the SMMU cons index passes llq->prod.
651  * Must be called with the cmdq lock held in some capacity.
652  */
653 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
654 					       struct arm_smmu_ll_queue *llq)
655 {
656 	struct arm_smmu_queue_poll qp;
657 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
658 	u32 prod = llq->prod;
659 	int ret = 0;
660 
661 	queue_poll_init(smmu, &qp);
662 	llq->val = READ_ONCE(cmdq->q.llq.val);
663 	do {
664 		if (queue_consumed(llq, prod))
665 			break;
666 
667 		ret = queue_poll(&qp);
668 
669 		/*
670 		 * This needs to be a readl() so that our subsequent call
671 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
672 		 *
673 		 * Specifically, we need to ensure that we observe all
674 		 * shared_lock()s by other CMD_SYNCs that share our owner,
675 		 * so that a failing call to tryunlock() means that we're
676 		 * the last one out and therefore we can safely advance
677 		 * cmdq->q.llq.cons. Roughly speaking:
678 		 *
679 		 * CPU 0		CPU1			CPU2 (us)
680 		 *
681 		 * if (sync)
682 		 * 	shared_lock();
683 		 *
684 		 * dma_wmb();
685 		 * set_valid_map();
686 		 *
687 		 * 			if (owner) {
688 		 *				poll_valid_map();
689 		 *				<control dependency>
690 		 *				writel(prod_reg);
691 		 *
692 		 *						readl(cons_reg);
693 		 *						tryunlock();
694 		 *
695 		 * Requires us to see CPU 0's shared_lock() acquisition.
696 		 */
697 		llq->cons = readl(cmdq->q.cons_reg);
698 	} while (!ret);
699 
700 	return ret;
701 }
702 
703 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
704 					 struct arm_smmu_ll_queue *llq)
705 {
706 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
707 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
708 
709 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
710 }
711 
712 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
713 					u32 prod, int n)
714 {
715 	int i;
716 	struct arm_smmu_ll_queue llq = {
717 		.max_n_shift	= cmdq->q.llq.max_n_shift,
718 		.prod		= prod,
719 	};
720 
721 	for (i = 0; i < n; ++i) {
722 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
723 
724 		prod = queue_inc_prod_n(&llq, i);
725 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
726 	}
727 }
728 
729 /*
730  * This is the actual insertion function, and provides the following
731  * ordering guarantees to callers:
732  *
733  * - There is a dma_wmb() before publishing any commands to the queue.
734  *   This can be relied upon to order prior writes to data structures
735  *   in memory (such as a CD or an STE) before the command.
736  *
737  * - On completion of a CMD_SYNC, there is a control dependency.
738  *   This can be relied upon to order subsequent writes to memory (e.g.
739  *   freeing an IOVA) after completion of the CMD_SYNC.
740  *
741  * - Command insertion is totally ordered, so if two CPUs each race to
742  *   insert their own list of commands then all of the commands from one
743  *   CPU will appear before any of the commands from the other CPU.
744  */
745 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
746 				       u64 *cmds, int n, bool sync)
747 {
748 	u64 cmd_sync[CMDQ_ENT_DWORDS];
749 	u32 prod;
750 	unsigned long flags;
751 	bool owner;
752 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
753 	struct arm_smmu_ll_queue llq, head;
754 	int ret = 0;
755 
756 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
757 
758 	/* 1. Allocate some space in the queue */
759 	local_irq_save(flags);
760 	llq.val = READ_ONCE(cmdq->q.llq.val);
761 	do {
762 		u64 old;
763 
764 		while (!queue_has_space(&llq, n + sync)) {
765 			local_irq_restore(flags);
766 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
767 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
768 			local_irq_save(flags);
769 		}
770 
771 		head.cons = llq.cons;
772 		head.prod = queue_inc_prod_n(&llq, n + sync) |
773 					     CMDQ_PROD_OWNED_FLAG;
774 
775 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
776 		if (old == llq.val)
777 			break;
778 
779 		llq.val = old;
780 	} while (1);
781 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
782 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
783 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
784 
785 	/*
786 	 * 2. Write our commands into the queue
787 	 * Dependency ordering from the cmpxchg() loop above.
788 	 */
789 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
790 	if (sync) {
791 		prod = queue_inc_prod_n(&llq, n);
792 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
793 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
794 
795 		/*
796 		 * In order to determine completion of our CMD_SYNC, we must
797 		 * ensure that the queue can't wrap twice without us noticing.
798 		 * We achieve that by taking the cmdq lock as shared before
799 		 * marking our slot as valid.
800 		 */
801 		arm_smmu_cmdq_shared_lock(cmdq);
802 	}
803 
804 	/* 3. Mark our slots as valid, ensuring commands are visible first */
805 	dma_wmb();
806 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
807 
808 	/* 4. If we are the owner, take control of the SMMU hardware */
809 	if (owner) {
810 		/* a. Wait for previous owner to finish */
811 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
812 
813 		/* b. Stop gathering work by clearing the owned flag */
814 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
815 						   &cmdq->q.llq.atomic.prod);
816 		prod &= ~CMDQ_PROD_OWNED_FLAG;
817 
818 		/*
819 		 * c. Wait for any gathered work to be written to the queue.
820 		 * Note that we read our own entries so that we have the control
821 		 * dependency required by (d).
822 		 */
823 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
824 
825 		/*
826 		 * d. Advance the hardware prod pointer
827 		 * Control dependency ordering from the entries becoming valid.
828 		 */
829 		writel_relaxed(prod, cmdq->q.prod_reg);
830 
831 		/*
832 		 * e. Tell the next owner we're done
833 		 * Make sure we've updated the hardware first, so that we don't
834 		 * race to update prod and potentially move it backwards.
835 		 */
836 		atomic_set_release(&cmdq->owner_prod, prod);
837 	}
838 
839 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
840 	if (sync) {
841 		llq.prod = queue_inc_prod_n(&llq, n);
842 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
843 		if (ret) {
844 			dev_err_ratelimited(smmu->dev,
845 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
846 					    llq.prod,
847 					    readl_relaxed(cmdq->q.prod_reg),
848 					    readl_relaxed(cmdq->q.cons_reg));
849 		}
850 
851 		/*
852 		 * Try to unlock the cmdq lock. This will fail if we're the last
853 		 * reader, in which case we can safely update cmdq->q.llq.cons
854 		 */
855 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
856 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
857 			arm_smmu_cmdq_shared_unlock(cmdq);
858 		}
859 	}
860 
861 	local_irq_restore(flags);
862 	return ret;
863 }
864 
865 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
866 				     struct arm_smmu_cmdq_ent *ent,
867 				     bool sync)
868 {
869 	u64 cmd[CMDQ_ENT_DWORDS];
870 
871 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
872 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
873 			 ent->opcode);
874 		return -EINVAL;
875 	}
876 
877 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
878 }
879 
880 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
881 				   struct arm_smmu_cmdq_ent *ent)
882 {
883 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
884 }
885 
886 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
887 					     struct arm_smmu_cmdq_ent *ent)
888 {
889 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
890 }
891 
892 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
893 				    struct arm_smmu_cmdq_batch *cmds,
894 				    struct arm_smmu_cmdq_ent *cmd)
895 {
896 	int index;
897 
898 	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
899 	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
900 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
901 		cmds->num = 0;
902 	}
903 
904 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
905 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
906 		cmds->num = 0;
907 	}
908 
909 	index = cmds->num * CMDQ_ENT_DWORDS;
910 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
911 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
912 			 cmd->opcode);
913 		return;
914 	}
915 
916 	cmds->num++;
917 }
918 
919 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
920 				      struct arm_smmu_cmdq_batch *cmds)
921 {
922 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
923 }
924 
925 static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
926 				   struct iommu_page_response *resp)
927 {
928 	struct arm_smmu_cmdq_ent cmd = {0};
929 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
930 	int sid = master->streams[0].id;
931 
932 	if (WARN_ON(!master->stall_enabled))
933 		return;
934 
935 	cmd.opcode		= CMDQ_OP_RESUME;
936 	cmd.resume.sid		= sid;
937 	cmd.resume.stag		= resp->grpid;
938 	switch (resp->code) {
939 	case IOMMU_PAGE_RESP_INVALID:
940 	case IOMMU_PAGE_RESP_FAILURE:
941 		cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
942 		break;
943 	case IOMMU_PAGE_RESP_SUCCESS:
944 		cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
945 		break;
946 	default:
947 		break;
948 	}
949 
950 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
951 	/*
952 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
953 	 * RESUME consumption guarantees that the stalled transaction will be
954 	 * terminated... at some point in the future. PRI_RESP is fire and
955 	 * forget.
956 	 */
957 }
958 
959 /* Context descriptor manipulation functions */
960 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
961 {
962 	struct arm_smmu_cmdq_ent cmd = {
963 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
964 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
965 		.tlbi.asid = asid,
966 	};
967 
968 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
969 }
970 
971 /*
972  * Based on the value of ent report which bits of the STE the HW will access. It
973  * would be nice if this was complete according to the spec, but minimally it
974  * has to capture the bits this driver uses.
975  */
976 VISIBLE_IF_KUNIT
977 void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
978 {
979 	unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
980 
981 	used_bits[0] = cpu_to_le64(STRTAB_STE_0_V);
982 	if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V)))
983 		return;
984 
985 	used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
986 
987 	/* S1 translates */
988 	if (cfg & BIT(0)) {
989 		used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
990 					    STRTAB_STE_0_S1CTXPTR_MASK |
991 					    STRTAB_STE_0_S1CDMAX);
992 		used_bits[1] |=
993 			cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
994 				    STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
995 				    STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
996 				    STRTAB_STE_1_EATS);
997 		used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
998 
999 		/*
1000 		 * See 13.5 Summary of attribute/permission configuration fields
1001 		 * for the SHCFG behavior.
1002 		 */
1003 		if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
1004 		    STRTAB_STE_1_S1DSS_BYPASS)
1005 			used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1006 	}
1007 
1008 	/* S2 translates */
1009 	if (cfg & BIT(1)) {
1010 		used_bits[1] |=
1011 			cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
1012 		used_bits[2] |=
1013 			cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
1014 				    STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
1015 				    STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R);
1016 		used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
1017 	}
1018 
1019 	if (cfg == STRTAB_STE_0_CFG_BYPASS)
1020 		used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1021 }
1022 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
1023 
1024 /*
1025  * Figure out if we can do a hitless update of entry to become target. Returns a
1026  * bit mask where 1 indicates that qword needs to be set disruptively.
1027  * unused_update is an intermediate value of entry that has unused bits set to
1028  * their new values.
1029  */
1030 static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
1031 				    const __le64 *entry, const __le64 *target,
1032 				    __le64 *unused_update)
1033 {
1034 	__le64 target_used[NUM_ENTRY_QWORDS] = {};
1035 	__le64 cur_used[NUM_ENTRY_QWORDS] = {};
1036 	u8 used_qword_diff = 0;
1037 	unsigned int i;
1038 
1039 	writer->ops->get_used(entry, cur_used);
1040 	writer->ops->get_used(target, target_used);
1041 
1042 	for (i = 0; i != NUM_ENTRY_QWORDS; i++) {
1043 		/*
1044 		 * Check that masks are up to date, the make functions are not
1045 		 * allowed to set a bit to 1 if the used function doesn't say it
1046 		 * is used.
1047 		 */
1048 		WARN_ON_ONCE(target[i] & ~target_used[i]);
1049 
1050 		/* Bits can change because they are not currently being used */
1051 		unused_update[i] = (entry[i] & cur_used[i]) |
1052 				   (target[i] & ~cur_used[i]);
1053 		/*
1054 		 * Each bit indicates that a used bit in a qword needs to be
1055 		 * changed after unused_update is applied.
1056 		 */
1057 		if ((unused_update[i] & target_used[i]) != target[i])
1058 			used_qword_diff |= 1 << i;
1059 	}
1060 	return used_qword_diff;
1061 }
1062 
1063 static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
1064 		      const __le64 *target, unsigned int start,
1065 		      unsigned int len)
1066 {
1067 	bool changed = false;
1068 	unsigned int i;
1069 
1070 	for (i = start; len != 0; len--, i++) {
1071 		if (entry[i] != target[i]) {
1072 			WRITE_ONCE(entry[i], target[i]);
1073 			changed = true;
1074 		}
1075 	}
1076 
1077 	if (changed)
1078 		writer->ops->sync(writer);
1079 	return changed;
1080 }
1081 
1082 /*
1083  * Update the STE/CD to the target configuration. The transition from the
1084  * current entry to the target entry takes place over multiple steps that
1085  * attempts to make the transition hitless if possible. This function takes care
1086  * not to create a situation where the HW can perceive a corrupted entry. HW is
1087  * only required to have a 64 bit atomicity with stores from the CPU, while
1088  * entries are many 64 bit values big.
1089  *
1090  * The difference between the current value and the target value is analyzed to
1091  * determine which of three updates are required - disruptive, hitless or no
1092  * change.
1093  *
1094  * In the most general disruptive case we can make any update in three steps:
1095  *  - Disrupting the entry (V=0)
1096  *  - Fill now unused qwords, execpt qword 0 which contains V
1097  *  - Make qword 0 have the final value and valid (V=1) with a single 64
1098  *    bit store
1099  *
1100  * However this disrupts the HW while it is happening. There are several
1101  * interesting cases where a STE/CD can be updated without disturbing the HW
1102  * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
1103  * because the used bits don't intersect. We can detect this by calculating how
1104  * many 64 bit values need update after adjusting the unused bits and skip the
1105  * V=0 process. This relies on the IGNORED behavior described in the
1106  * specification.
1107  */
1108 VISIBLE_IF_KUNIT
1109 void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
1110 			  const __le64 *target)
1111 {
1112 	__le64 unused_update[NUM_ENTRY_QWORDS];
1113 	u8 used_qword_diff;
1114 
1115 	used_qword_diff =
1116 		arm_smmu_entry_qword_diff(writer, entry, target, unused_update);
1117 	if (hweight8(used_qword_diff) == 1) {
1118 		/*
1119 		 * Only one qword needs its used bits to be changed. This is a
1120 		 * hitless update, update all bits the current STE/CD is
1121 		 * ignoring to their new values, then update a single "critical
1122 		 * qword" to change the STE/CD and finally 0 out any bits that
1123 		 * are now unused in the target configuration.
1124 		 */
1125 		unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
1126 
1127 		/*
1128 		 * Skip writing unused bits in the critical qword since we'll be
1129 		 * writing it in the next step anyways. This can save a sync
1130 		 * when the only change is in that qword.
1131 		 */
1132 		unused_update[critical_qword_index] =
1133 			entry[critical_qword_index];
1134 		entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS);
1135 		entry_set(writer, entry, target, critical_qword_index, 1);
1136 		entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
1137 	} else if (used_qword_diff) {
1138 		/*
1139 		 * At least two qwords need their inuse bits to be changed. This
1140 		 * requires a breaking update, zero the V bit, write all qwords
1141 		 * but 0, then set qword 0
1142 		 */
1143 		unused_update[0] = 0;
1144 		entry_set(writer, entry, unused_update, 0, 1);
1145 		entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
1146 		entry_set(writer, entry, target, 0, 1);
1147 	} else {
1148 		/*
1149 		 * No inuse bit changed. Sanity check that all unused bits are 0
1150 		 * in the entry. The target was already sanity checked by
1151 		 * compute_qword_diff().
1152 		 */
1153 		WARN_ON_ONCE(
1154 			entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
1155 	}
1156 }
1157 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
1158 
1159 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
1160 			     int ssid, bool leaf)
1161 {
1162 	size_t i;
1163 	struct arm_smmu_cmdq_batch cmds;
1164 	struct arm_smmu_device *smmu = master->smmu;
1165 	struct arm_smmu_cmdq_ent cmd = {
1166 		.opcode	= CMDQ_OP_CFGI_CD,
1167 		.cfgi	= {
1168 			.ssid	= ssid,
1169 			.leaf	= leaf,
1170 		},
1171 	};
1172 
1173 	cmds.num = 0;
1174 	for (i = 0; i < master->num_streams; i++) {
1175 		cmd.cfgi.sid = master->streams[i].id;
1176 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1177 	}
1178 
1179 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1180 }
1181 
1182 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1183 					struct arm_smmu_l1_ctx_desc *l1_desc)
1184 {
1185 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1186 
1187 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1188 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1189 	if (!l1_desc->l2ptr) {
1190 		dev_warn(smmu->dev,
1191 			 "failed to allocate context descriptor table\n");
1192 		return -ENOMEM;
1193 	}
1194 	return 0;
1195 }
1196 
1197 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1198 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1199 {
1200 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1201 		  CTXDESC_L1_DESC_V;
1202 
1203 	/* The HW has 64 bit atomicity with stores to the L2 CD table */
1204 	WRITE_ONCE(*dst, cpu_to_le64(val));
1205 }
1206 
1207 struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
1208 					u32 ssid)
1209 {
1210 	struct arm_smmu_l1_ctx_desc *l1_desc;
1211 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1212 
1213 	if (!cd_table->cdtab)
1214 		return NULL;
1215 
1216 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1217 		return (struct arm_smmu_cd *)(cd_table->cdtab +
1218 					      ssid * CTXDESC_CD_DWORDS);
1219 
1220 	l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES];
1221 	if (!l1_desc->l2ptr)
1222 		return NULL;
1223 	return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES];
1224 }
1225 
1226 static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
1227 						 u32 ssid)
1228 {
1229 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1230 	struct arm_smmu_device *smmu = master->smmu;
1231 
1232 	might_sleep();
1233 	iommu_group_mutex_assert(master->dev);
1234 
1235 	if (!cd_table->cdtab) {
1236 		if (arm_smmu_alloc_cd_tables(master))
1237 			return NULL;
1238 	}
1239 
1240 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
1241 		unsigned int idx = ssid / CTXDESC_L2_ENTRIES;
1242 		struct arm_smmu_l1_ctx_desc *l1_desc;
1243 
1244 		l1_desc = &cd_table->l1_desc[idx];
1245 		if (!l1_desc->l2ptr) {
1246 			__le64 *l1ptr;
1247 
1248 			if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1249 				return NULL;
1250 
1251 			l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1252 			arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1253 			/* An invalid L1CD can be cached */
1254 			arm_smmu_sync_cd(master, ssid, false);
1255 		}
1256 	}
1257 	return arm_smmu_get_cd_ptr(master, ssid);
1258 }
1259 
1260 struct arm_smmu_cd_writer {
1261 	struct arm_smmu_entry_writer writer;
1262 	unsigned int ssid;
1263 };
1264 
1265 VISIBLE_IF_KUNIT
1266 void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
1267 {
1268 	used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
1269 	if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V)))
1270 		return;
1271 	memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));
1272 
1273 	/*
1274 	 * If EPD0 is set by the make function it means
1275 	 * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
1276 	 */
1277 	if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
1278 		used_bits[0] &= ~cpu_to_le64(
1279 			CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
1280 			CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
1281 			CTXDESC_CD_0_TCR_SH0);
1282 		used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
1283 	}
1284 }
1285 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used);
1286 
1287 static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1288 {
1289 	struct arm_smmu_cd_writer *cd_writer =
1290 		container_of(writer, struct arm_smmu_cd_writer, writer);
1291 
1292 	arm_smmu_sync_cd(writer->master, cd_writer->ssid, true);
1293 }
1294 
1295 static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
1296 	.sync = arm_smmu_cd_writer_sync_entry,
1297 	.get_used = arm_smmu_get_cd_used,
1298 };
1299 
1300 void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
1301 			     struct arm_smmu_cd *cdptr,
1302 			     const struct arm_smmu_cd *target)
1303 {
1304 	bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
1305 	bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
1306 	struct arm_smmu_cd_writer cd_writer = {
1307 		.writer = {
1308 			.ops = &arm_smmu_cd_writer_ops,
1309 			.master = master,
1310 		},
1311 		.ssid = ssid,
1312 	};
1313 
1314 	if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) {
1315 		if (cur_valid)
1316 			master->cd_table.used_ssids--;
1317 		else
1318 			master->cd_table.used_ssids++;
1319 	}
1320 
1321 	arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
1322 }
1323 
1324 void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
1325 			 struct arm_smmu_master *master,
1326 			 struct arm_smmu_domain *smmu_domain)
1327 {
1328 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
1329 	const struct io_pgtable_cfg *pgtbl_cfg =
1330 		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1331 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr =
1332 		&pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1333 
1334 	memset(target, 0, sizeof(*target));
1335 
1336 	target->data[0] = cpu_to_le64(
1337 		FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1338 		FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1339 		FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1340 		FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1341 		FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1342 #ifdef __BIG_ENDIAN
1343 		CTXDESC_CD_0_ENDI |
1344 #endif
1345 		CTXDESC_CD_0_TCR_EPD1 |
1346 		CTXDESC_CD_0_V |
1347 		FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1348 		CTXDESC_CD_0_AA64 |
1349 		(master->stall_enabled ? CTXDESC_CD_0_S : 0) |
1350 		CTXDESC_CD_0_R |
1351 		CTXDESC_CD_0_A |
1352 		CTXDESC_CD_0_ASET |
1353 		FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
1354 		);
1355 
1356 	/* To enable dirty flag update, set both Access flag and dirty state update */
1357 	if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
1358 		target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA |
1359 					       CTXDESC_CD_0_TCR_HD);
1360 
1361 	target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr &
1362 				      CTXDESC_CD_1_TTB0_MASK);
1363 	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair);
1364 }
1365 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd);
1366 
1367 void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
1368 {
1369 	struct arm_smmu_cd target = {};
1370 	struct arm_smmu_cd *cdptr;
1371 
1372 	if (!master->cd_table.cdtab)
1373 		return;
1374 	cdptr = arm_smmu_get_cd_ptr(master, ssid);
1375 	if (WARN_ON(!cdptr))
1376 		return;
1377 	arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
1378 }
1379 
1380 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1381 {
1382 	int ret;
1383 	size_t l1size;
1384 	size_t max_contexts;
1385 	struct arm_smmu_device *smmu = master->smmu;
1386 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1387 
1388 	cd_table->s1cdmax = master->ssid_bits;
1389 	max_contexts = 1 << cd_table->s1cdmax;
1390 
1391 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1392 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1393 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1394 		cd_table->num_l1_ents = max_contexts;
1395 
1396 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1397 	} else {
1398 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1399 		cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
1400 						  CTXDESC_L2_ENTRIES);
1401 
1402 		cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
1403 					      sizeof(*cd_table->l1_desc),
1404 					      GFP_KERNEL);
1405 		if (!cd_table->l1_desc)
1406 			return -ENOMEM;
1407 
1408 		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1409 	}
1410 
1411 	cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
1412 					   GFP_KERNEL);
1413 	if (!cd_table->cdtab) {
1414 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1415 		ret = -ENOMEM;
1416 		goto err_free_l1;
1417 	}
1418 
1419 	return 0;
1420 
1421 err_free_l1:
1422 	if (cd_table->l1_desc) {
1423 		devm_kfree(smmu->dev, cd_table->l1_desc);
1424 		cd_table->l1_desc = NULL;
1425 	}
1426 	return ret;
1427 }
1428 
1429 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1430 {
1431 	int i;
1432 	size_t size, l1size;
1433 	struct arm_smmu_device *smmu = master->smmu;
1434 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1435 
1436 	if (cd_table->l1_desc) {
1437 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1438 
1439 		for (i = 0; i < cd_table->num_l1_ents; i++) {
1440 			if (!cd_table->l1_desc[i].l2ptr)
1441 				continue;
1442 
1443 			dmam_free_coherent(smmu->dev, size,
1444 					   cd_table->l1_desc[i].l2ptr,
1445 					   cd_table->l1_desc[i].l2ptr_dma);
1446 		}
1447 		devm_kfree(smmu->dev, cd_table->l1_desc);
1448 		cd_table->l1_desc = NULL;
1449 
1450 		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1451 	} else {
1452 		l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1453 	}
1454 
1455 	dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
1456 	cd_table->cdtab_dma = 0;
1457 	cd_table->cdtab = NULL;
1458 }
1459 
1460 /* Stream table manipulation functions */
1461 static void arm_smmu_write_strtab_l1_desc(__le64 *dst, dma_addr_t l2ptr_dma)
1462 {
1463 	u64 val = 0;
1464 
1465 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1);
1466 	val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1467 
1468 	/* The HW has 64 bit atomicity with stores to the L2 STE table */
1469 	WRITE_ONCE(*dst, cpu_to_le64(val));
1470 }
1471 
1472 struct arm_smmu_ste_writer {
1473 	struct arm_smmu_entry_writer writer;
1474 	u32 sid;
1475 };
1476 
1477 static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1478 {
1479 	struct arm_smmu_ste_writer *ste_writer =
1480 		container_of(writer, struct arm_smmu_ste_writer, writer);
1481 	struct arm_smmu_cmdq_ent cmd = {
1482 		.opcode	= CMDQ_OP_CFGI_STE,
1483 		.cfgi	= {
1484 			.sid	= ste_writer->sid,
1485 			.leaf	= true,
1486 		},
1487 	};
1488 
1489 	arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
1490 }
1491 
1492 static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
1493 	.sync = arm_smmu_ste_writer_sync_entry,
1494 	.get_used = arm_smmu_get_ste_used,
1495 };
1496 
1497 static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
1498 			       struct arm_smmu_ste *ste,
1499 			       const struct arm_smmu_ste *target)
1500 {
1501 	struct arm_smmu_device *smmu = master->smmu;
1502 	struct arm_smmu_ste_writer ste_writer = {
1503 		.writer = {
1504 			.ops = &arm_smmu_ste_writer_ops,
1505 			.master = master,
1506 		},
1507 		.sid = sid,
1508 	};
1509 
1510 	arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data);
1511 
1512 	/* It's likely that we'll want to use the new STE soon */
1513 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
1514 		struct arm_smmu_cmdq_ent
1515 			prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
1516 					 .prefetch = {
1517 						 .sid = sid,
1518 					 } };
1519 
1520 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1521 	}
1522 }
1523 
1524 VISIBLE_IF_KUNIT
1525 void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
1526 {
1527 	memset(target, 0, sizeof(*target));
1528 	target->data[0] = cpu_to_le64(
1529 		STRTAB_STE_0_V |
1530 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
1531 }
1532 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste);
1533 
1534 VISIBLE_IF_KUNIT
1535 void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
1536 			      struct arm_smmu_ste *target)
1537 {
1538 	memset(target, 0, sizeof(*target));
1539 	target->data[0] = cpu_to_le64(
1540 		STRTAB_STE_0_V |
1541 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
1542 
1543 	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1544 		target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1545 							 STRTAB_STE_1_SHCFG_INCOMING));
1546 }
1547 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste);
1548 
1549 VISIBLE_IF_KUNIT
1550 void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
1551 			       struct arm_smmu_master *master, bool ats_enabled,
1552 			       unsigned int s1dss)
1553 {
1554 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1555 	struct arm_smmu_device *smmu = master->smmu;
1556 
1557 	memset(target, 0, sizeof(*target));
1558 	target->data[0] = cpu_to_le64(
1559 		STRTAB_STE_0_V |
1560 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1561 		FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
1562 		(cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1563 		FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
1564 
1565 	target->data[1] = cpu_to_le64(
1566 		FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) |
1567 		FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1568 		FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1569 		FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1570 		((smmu->features & ARM_SMMU_FEAT_STALLS &&
1571 		  !master->stall_enabled) ?
1572 			 STRTAB_STE_1_S1STALLD :
1573 			 0) |
1574 		FIELD_PREP(STRTAB_STE_1_EATS,
1575 			   ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1576 
1577 	if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) &&
1578 	    s1dss == STRTAB_STE_1_S1DSS_BYPASS)
1579 		target->data[1] |= cpu_to_le64(FIELD_PREP(
1580 			STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
1581 
1582 	if (smmu->features & ARM_SMMU_FEAT_E2H) {
1583 		/*
1584 		 * To support BTM the streamworld needs to match the
1585 		 * configuration of the CPU so that the ASID broadcasts are
1586 		 * properly matched. This means either S/NS-EL2-E2H (hypervisor)
1587 		 * or NS-EL1 (guest). Since an SVA domain can be installed in a
1588 		 * PASID this should always use a BTM compatible configuration
1589 		 * if the HW supports it.
1590 		 */
1591 		target->data[1] |= cpu_to_le64(
1592 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
1593 	} else {
1594 		target->data[1] |= cpu_to_le64(
1595 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1596 
1597 		/*
1598 		 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
1599 		 * arm_smmu_domain_alloc_id()
1600 		 */
1601 		target->data[2] =
1602 			cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
1603 	}
1604 }
1605 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
1606 
1607 VISIBLE_IF_KUNIT
1608 void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
1609 				 struct arm_smmu_master *master,
1610 				 struct arm_smmu_domain *smmu_domain,
1611 				 bool ats_enabled)
1612 {
1613 	struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
1614 	const struct io_pgtable_cfg *pgtbl_cfg =
1615 		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1616 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
1617 		&pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1618 	u64 vtcr_val;
1619 	struct arm_smmu_device *smmu = master->smmu;
1620 
1621 	memset(target, 0, sizeof(*target));
1622 	target->data[0] = cpu_to_le64(
1623 		STRTAB_STE_0_V |
1624 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
1625 
1626 	target->data[1] = cpu_to_le64(
1627 		FIELD_PREP(STRTAB_STE_1_EATS,
1628 			   ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1629 
1630 	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1631 		target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1632 							  STRTAB_STE_1_SHCFG_INCOMING));
1633 
1634 	vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1635 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1636 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1637 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1638 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1639 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1640 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1641 	target->data[2] = cpu_to_le64(
1642 		FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1643 		FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
1644 		STRTAB_STE_2_S2AA64 |
1645 #ifdef __BIG_ENDIAN
1646 		STRTAB_STE_2_S2ENDI |
1647 #endif
1648 		STRTAB_STE_2_S2PTW |
1649 		STRTAB_STE_2_S2R);
1650 
1651 	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
1652 				      STRTAB_STE_3_S2TTB_MASK);
1653 }
1654 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste);
1655 
1656 /*
1657  * This can safely directly manipulate the STE memory without a sync sequence
1658  * because the STE table has not been installed in the SMMU yet.
1659  */
1660 static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
1661 				       unsigned int nent)
1662 {
1663 	unsigned int i;
1664 
1665 	for (i = 0; i < nent; ++i) {
1666 		arm_smmu_make_abort_ste(strtab);
1667 		strtab++;
1668 	}
1669 }
1670 
1671 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1672 {
1673 	size_t size;
1674 	void *strtab;
1675 	dma_addr_t l2ptr_dma;
1676 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1677 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1678 
1679 	if (desc->l2ptr)
1680 		return 0;
1681 
1682 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1683 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1684 
1685 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &l2ptr_dma,
1686 					  GFP_KERNEL);
1687 	if (!desc->l2ptr) {
1688 		dev_err(smmu->dev,
1689 			"failed to allocate l2 stream table for SID %u\n",
1690 			sid);
1691 		return -ENOMEM;
1692 	}
1693 
1694 	arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1695 	arm_smmu_write_strtab_l1_desc(strtab, l2ptr_dma);
1696 	return 0;
1697 }
1698 
1699 static struct arm_smmu_master *
1700 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1701 {
1702 	struct rb_node *node;
1703 	struct arm_smmu_stream *stream;
1704 
1705 	lockdep_assert_held(&smmu->streams_mutex);
1706 
1707 	node = smmu->streams.rb_node;
1708 	while (node) {
1709 		stream = rb_entry(node, struct arm_smmu_stream, node);
1710 		if (stream->id < sid)
1711 			node = node->rb_right;
1712 		else if (stream->id > sid)
1713 			node = node->rb_left;
1714 		else
1715 			return stream->master;
1716 	}
1717 
1718 	return NULL;
1719 }
1720 
1721 /* IRQ and event handlers */
1722 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1723 {
1724 	int ret = 0;
1725 	u32 perm = 0;
1726 	struct arm_smmu_master *master;
1727 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1728 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1729 	struct iopf_fault fault_evt = { };
1730 	struct iommu_fault *flt = &fault_evt.fault;
1731 
1732 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1733 	case EVT_ID_TRANSLATION_FAULT:
1734 	case EVT_ID_ADDR_SIZE_FAULT:
1735 	case EVT_ID_ACCESS_FAULT:
1736 	case EVT_ID_PERMISSION_FAULT:
1737 		break;
1738 	default:
1739 		return -EOPNOTSUPP;
1740 	}
1741 
1742 	/* Stage-2 is always pinned at the moment */
1743 	if (evt[1] & EVTQ_1_S2)
1744 		return -EFAULT;
1745 
1746 	if (!(evt[1] & EVTQ_1_STALL))
1747 		return -EOPNOTSUPP;
1748 
1749 	if (evt[1] & EVTQ_1_RnW)
1750 		perm |= IOMMU_FAULT_PERM_READ;
1751 	else
1752 		perm |= IOMMU_FAULT_PERM_WRITE;
1753 
1754 	if (evt[1] & EVTQ_1_InD)
1755 		perm |= IOMMU_FAULT_PERM_EXEC;
1756 
1757 	if (evt[1] & EVTQ_1_PnU)
1758 		perm |= IOMMU_FAULT_PERM_PRIV;
1759 
1760 	flt->type = IOMMU_FAULT_PAGE_REQ;
1761 	flt->prm = (struct iommu_fault_page_request) {
1762 		.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1763 		.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1764 		.perm = perm,
1765 		.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1766 	};
1767 
1768 	if (ssid_valid) {
1769 		flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1770 		flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1771 	}
1772 
1773 	mutex_lock(&smmu->streams_mutex);
1774 	master = arm_smmu_find_master(smmu, sid);
1775 	if (!master) {
1776 		ret = -EINVAL;
1777 		goto out_unlock;
1778 	}
1779 
1780 	iommu_report_device_fault(master->dev, &fault_evt);
1781 out_unlock:
1782 	mutex_unlock(&smmu->streams_mutex);
1783 	return ret;
1784 }
1785 
1786 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1787 {
1788 	int i, ret;
1789 	struct arm_smmu_device *smmu = dev;
1790 	struct arm_smmu_queue *q = &smmu->evtq.q;
1791 	struct arm_smmu_ll_queue *llq = &q->llq;
1792 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1793 				      DEFAULT_RATELIMIT_BURST);
1794 	u64 evt[EVTQ_ENT_DWORDS];
1795 
1796 	do {
1797 		while (!queue_remove_raw(q, evt)) {
1798 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1799 
1800 			ret = arm_smmu_handle_evt(smmu, evt);
1801 			if (!ret || !__ratelimit(&rs))
1802 				continue;
1803 
1804 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1805 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1806 				dev_info(smmu->dev, "\t0x%016llx\n",
1807 					 (unsigned long long)evt[i]);
1808 
1809 			cond_resched();
1810 		}
1811 
1812 		/*
1813 		 * Not much we can do on overflow, so scream and pretend we're
1814 		 * trying harder.
1815 		 */
1816 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1817 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1818 	} while (!queue_empty(llq));
1819 
1820 	/* Sync our overflow flag, as we believe we're up to speed */
1821 	queue_sync_cons_ovf(q);
1822 	return IRQ_HANDLED;
1823 }
1824 
1825 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1826 {
1827 	u32 sid, ssid;
1828 	u16 grpid;
1829 	bool ssv, last;
1830 
1831 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1832 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1833 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1834 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1835 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1836 
1837 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1838 	dev_info(smmu->dev,
1839 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1840 		 sid, ssid, grpid, last ? "L" : "",
1841 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1842 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1843 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1844 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1845 		 evt[1] & PRIQ_1_ADDR_MASK);
1846 
1847 	if (last) {
1848 		struct arm_smmu_cmdq_ent cmd = {
1849 			.opcode			= CMDQ_OP_PRI_RESP,
1850 			.substream_valid	= ssv,
1851 			.pri			= {
1852 				.sid	= sid,
1853 				.ssid	= ssid,
1854 				.grpid	= grpid,
1855 				.resp	= PRI_RESP_DENY,
1856 			},
1857 		};
1858 
1859 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1860 	}
1861 }
1862 
1863 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1864 {
1865 	struct arm_smmu_device *smmu = dev;
1866 	struct arm_smmu_queue *q = &smmu->priq.q;
1867 	struct arm_smmu_ll_queue *llq = &q->llq;
1868 	u64 evt[PRIQ_ENT_DWORDS];
1869 
1870 	do {
1871 		while (!queue_remove_raw(q, evt))
1872 			arm_smmu_handle_ppr(smmu, evt);
1873 
1874 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1875 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1876 	} while (!queue_empty(llq));
1877 
1878 	/* Sync our overflow flag, as we believe we're up to speed */
1879 	queue_sync_cons_ovf(q);
1880 	return IRQ_HANDLED;
1881 }
1882 
1883 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1884 
1885 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1886 {
1887 	u32 gerror, gerrorn, active;
1888 	struct arm_smmu_device *smmu = dev;
1889 
1890 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1891 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1892 
1893 	active = gerror ^ gerrorn;
1894 	if (!(active & GERROR_ERR_MASK))
1895 		return IRQ_NONE; /* No errors pending */
1896 
1897 	dev_warn(smmu->dev,
1898 		 "unexpected global error reported (0x%08x), this could be serious\n",
1899 		 active);
1900 
1901 	if (active & GERROR_SFM_ERR) {
1902 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1903 		arm_smmu_device_disable(smmu);
1904 	}
1905 
1906 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1907 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1908 
1909 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1910 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1911 
1912 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1913 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1914 
1915 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1916 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1917 
1918 	if (active & GERROR_PRIQ_ABT_ERR)
1919 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1920 
1921 	if (active & GERROR_EVTQ_ABT_ERR)
1922 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1923 
1924 	if (active & GERROR_CMDQ_ERR)
1925 		arm_smmu_cmdq_skip_err(smmu);
1926 
1927 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1928 	return IRQ_HANDLED;
1929 }
1930 
1931 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1932 {
1933 	struct arm_smmu_device *smmu = dev;
1934 
1935 	arm_smmu_evtq_thread(irq, dev);
1936 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1937 		arm_smmu_priq_thread(irq, dev);
1938 
1939 	return IRQ_HANDLED;
1940 }
1941 
1942 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1943 {
1944 	arm_smmu_gerror_handler(irq, dev);
1945 	return IRQ_WAKE_THREAD;
1946 }
1947 
1948 static void
1949 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1950 			struct arm_smmu_cmdq_ent *cmd)
1951 {
1952 	size_t log2_span;
1953 	size_t span_mask;
1954 	/* ATC invalidates are always on 4096-bytes pages */
1955 	size_t inval_grain_shift = 12;
1956 	unsigned long page_start, page_end;
1957 
1958 	/*
1959 	 * ATS and PASID:
1960 	 *
1961 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1962 	 * prefix. In that case all ATC entries within the address range are
1963 	 * invalidated, including those that were requested with a PASID! There
1964 	 * is no way to invalidate only entries without PASID.
1965 	 *
1966 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1967 	 * traffic), translation requests without PASID create ATC entries
1968 	 * without PASID, which must be invalidated with substream_valid clear.
1969 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1970 	 * ATC entries within the address range.
1971 	 */
1972 	*cmd = (struct arm_smmu_cmdq_ent) {
1973 		.opcode			= CMDQ_OP_ATC_INV,
1974 		.substream_valid	= (ssid != IOMMU_NO_PASID),
1975 		.atc.ssid		= ssid,
1976 	};
1977 
1978 	if (!size) {
1979 		cmd->atc.size = ATC_INV_SIZE_ALL;
1980 		return;
1981 	}
1982 
1983 	page_start	= iova >> inval_grain_shift;
1984 	page_end	= (iova + size - 1) >> inval_grain_shift;
1985 
1986 	/*
1987 	 * In an ATS Invalidate Request, the address must be aligned on the
1988 	 * range size, which must be a power of two number of page sizes. We
1989 	 * thus have to choose between grossly over-invalidating the region, or
1990 	 * splitting the invalidation into multiple commands. For simplicity
1991 	 * we'll go with the first solution, but should refine it in the future
1992 	 * if multiple commands are shown to be more efficient.
1993 	 *
1994 	 * Find the smallest power of two that covers the range. The most
1995 	 * significant differing bit between the start and end addresses,
1996 	 * fls(start ^ end), indicates the required span. For example:
1997 	 *
1998 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1999 	 *		x = 0b1000 ^ 0b1011 = 0b11
2000 	 *		span = 1 << fls(x) = 4
2001 	 *
2002 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
2003 	 *		x = 0b0111 ^ 0b1010 = 0b1101
2004 	 *		span = 1 << fls(x) = 16
2005 	 */
2006 	log2_span	= fls_long(page_start ^ page_end);
2007 	span_mask	= (1ULL << log2_span) - 1;
2008 
2009 	page_start	&= ~span_mask;
2010 
2011 	cmd->atc.addr	= page_start << inval_grain_shift;
2012 	cmd->atc.size	= log2_span;
2013 }
2014 
2015 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
2016 				   ioasid_t ssid)
2017 {
2018 	int i;
2019 	struct arm_smmu_cmdq_ent cmd;
2020 	struct arm_smmu_cmdq_batch cmds;
2021 
2022 	arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
2023 
2024 	cmds.num = 0;
2025 	for (i = 0; i < master->num_streams; i++) {
2026 		cmd.atc.sid = master->streams[i].id;
2027 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
2028 	}
2029 
2030 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
2031 }
2032 
2033 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2034 			    unsigned long iova, size_t size)
2035 {
2036 	struct arm_smmu_master_domain *master_domain;
2037 	int i;
2038 	unsigned long flags;
2039 	struct arm_smmu_cmdq_ent cmd;
2040 	struct arm_smmu_cmdq_batch cmds;
2041 
2042 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2043 		return 0;
2044 
2045 	/*
2046 	 * Ensure that we've completed prior invalidation of the main TLBs
2047 	 * before we read 'nr_ats_masters' in case of a concurrent call to
2048 	 * arm_smmu_enable_ats():
2049 	 *
2050 	 *	// unmap()			// arm_smmu_enable_ats()
2051 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
2052 	 *	smp_mb();			[...]
2053 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
2054 	 *
2055 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
2056 	 * ATS was enabled at the PCI device before completion of the TLBI.
2057 	 */
2058 	smp_mb();
2059 	if (!atomic_read(&smmu_domain->nr_ats_masters))
2060 		return 0;
2061 
2062 	cmds.num = 0;
2063 
2064 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2065 	list_for_each_entry(master_domain, &smmu_domain->devices,
2066 			    devices_elm) {
2067 		struct arm_smmu_master *master = master_domain->master;
2068 
2069 		if (!master->ats_enabled)
2070 			continue;
2071 
2072 		arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, &cmd);
2073 
2074 		for (i = 0; i < master->num_streams; i++) {
2075 			cmd.atc.sid = master->streams[i].id;
2076 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
2077 		}
2078 	}
2079 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2080 
2081 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
2082 }
2083 
2084 /* IO_PGTABLE API */
2085 static void arm_smmu_tlb_inv_context(void *cookie)
2086 {
2087 	struct arm_smmu_domain *smmu_domain = cookie;
2088 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2089 	struct arm_smmu_cmdq_ent cmd;
2090 
2091 	/*
2092 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2093 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
2094 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2095 	 * insertion to guarantee those are observed before the TLBI. Do be
2096 	 * careful, 007.
2097 	 */
2098 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2099 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
2100 	} else {
2101 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
2102 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2103 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
2104 	}
2105 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
2106 }
2107 
2108 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
2109 				     unsigned long iova, size_t size,
2110 				     size_t granule,
2111 				     struct arm_smmu_domain *smmu_domain)
2112 {
2113 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2114 	unsigned long end = iova + size, num_pages = 0, tg = 0;
2115 	size_t inv_range = granule;
2116 	struct arm_smmu_cmdq_batch cmds;
2117 
2118 	if (!size)
2119 		return;
2120 
2121 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2122 		/* Get the leaf page size */
2123 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2124 
2125 		num_pages = size >> tg;
2126 
2127 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
2128 		cmd->tlbi.tg = (tg - 10) / 2;
2129 
2130 		/*
2131 		 * Determine what level the granule is at. For non-leaf, both
2132 		 * io-pgtable and SVA pass a nominal last-level granule because
2133 		 * they don't know what level(s) actually apply, so ignore that
2134 		 * and leave TTL=0. However for various errata reasons we still
2135 		 * want to use a range command, so avoid the SVA corner case
2136 		 * where both scale and num could be 0 as well.
2137 		 */
2138 		if (cmd->tlbi.leaf)
2139 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2140 		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
2141 			num_pages++;
2142 	}
2143 
2144 	cmds.num = 0;
2145 
2146 	while (iova < end) {
2147 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2148 			/*
2149 			 * On each iteration of the loop, the range is 5 bits
2150 			 * worth of the aligned size remaining.
2151 			 * The range in pages is:
2152 			 *
2153 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
2154 			 */
2155 			unsigned long scale, num;
2156 
2157 			/* Determine the power of 2 multiple number of pages */
2158 			scale = __ffs(num_pages);
2159 			cmd->tlbi.scale = scale;
2160 
2161 			/* Determine how many chunks of 2^scale size we have */
2162 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2163 			cmd->tlbi.num = num - 1;
2164 
2165 			/* range is num * 2^scale * pgsize */
2166 			inv_range = num << (scale + tg);
2167 
2168 			/* Clear out the lower order bits for the next iteration */
2169 			num_pages -= num << scale;
2170 		}
2171 
2172 		cmd->tlbi.addr = iova;
2173 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
2174 		iova += inv_range;
2175 	}
2176 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
2177 }
2178 
2179 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
2180 					  size_t granule, bool leaf,
2181 					  struct arm_smmu_domain *smmu_domain)
2182 {
2183 	struct arm_smmu_cmdq_ent cmd = {
2184 		.tlbi = {
2185 			.leaf	= leaf,
2186 		},
2187 	};
2188 
2189 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2190 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2191 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
2192 		cmd.tlbi.asid	= smmu_domain->cd.asid;
2193 	} else {
2194 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
2195 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2196 	}
2197 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2198 
2199 	/*
2200 	 * Unfortunately, this can't be leaf-only since we may have
2201 	 * zapped an entire table.
2202 	 */
2203 	arm_smmu_atc_inv_domain(smmu_domain, iova, size);
2204 }
2205 
2206 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
2207 				 size_t granule, bool leaf,
2208 				 struct arm_smmu_domain *smmu_domain)
2209 {
2210 	struct arm_smmu_cmdq_ent cmd = {
2211 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2212 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
2213 		.tlbi = {
2214 			.asid	= asid,
2215 			.leaf	= leaf,
2216 		},
2217 	};
2218 
2219 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2220 }
2221 
2222 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2223 					 unsigned long iova, size_t granule,
2224 					 void *cookie)
2225 {
2226 	struct arm_smmu_domain *smmu_domain = cookie;
2227 	struct iommu_domain *domain = &smmu_domain->domain;
2228 
2229 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2230 }
2231 
2232 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2233 				  size_t granule, void *cookie)
2234 {
2235 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2236 }
2237 
2238 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2239 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2240 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2241 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2242 };
2243 
2244 static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
2245 {
2246 	u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY);
2247 
2248 	return (smmu->features & features) == features;
2249 }
2250 
2251 /* IOMMU API */
2252 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2253 {
2254 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2255 
2256 	switch (cap) {
2257 	case IOMMU_CAP_CACHE_COHERENCY:
2258 		/* Assume that a coherent TCU implies coherent TBUs */
2259 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2260 	case IOMMU_CAP_NOEXEC:
2261 	case IOMMU_CAP_DEFERRED_FLUSH:
2262 		return true;
2263 	case IOMMU_CAP_DIRTY_TRACKING:
2264 		return arm_smmu_dbm_capable(master->smmu);
2265 	default:
2266 		return false;
2267 	}
2268 }
2269 
2270 struct arm_smmu_domain *arm_smmu_domain_alloc(void)
2271 {
2272 	struct arm_smmu_domain *smmu_domain;
2273 
2274 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2275 	if (!smmu_domain)
2276 		return ERR_PTR(-ENOMEM);
2277 
2278 	mutex_init(&smmu_domain->init_mutex);
2279 	INIT_LIST_HEAD(&smmu_domain->devices);
2280 	spin_lock_init(&smmu_domain->devices_lock);
2281 
2282 	return smmu_domain;
2283 }
2284 
2285 static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
2286 {
2287 	struct arm_smmu_domain *smmu_domain;
2288 
2289 	/*
2290 	 * Allocate the domain and initialise some of its data structures.
2291 	 * We can't really do anything meaningful until we've added a
2292 	 * master.
2293 	 */
2294 	smmu_domain = arm_smmu_domain_alloc();
2295 	if (IS_ERR(smmu_domain))
2296 		return ERR_CAST(smmu_domain);
2297 
2298 	if (dev) {
2299 		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2300 		int ret;
2301 
2302 		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0);
2303 		if (ret) {
2304 			kfree(smmu_domain);
2305 			return ERR_PTR(ret);
2306 		}
2307 	}
2308 	return &smmu_domain->domain;
2309 }
2310 
2311 static void arm_smmu_domain_free_paging(struct iommu_domain *domain)
2312 {
2313 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2314 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2315 
2316 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2317 
2318 	/* Free the ASID or VMID */
2319 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2320 		/* Prevent SVA from touching the CD while we're freeing it */
2321 		mutex_lock(&arm_smmu_asid_lock);
2322 		xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid);
2323 		mutex_unlock(&arm_smmu_asid_lock);
2324 	} else {
2325 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2326 		if (cfg->vmid)
2327 			ida_free(&smmu->vmid_map, cfg->vmid);
2328 	}
2329 
2330 	kfree(smmu_domain);
2331 }
2332 
2333 static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
2334 				       struct arm_smmu_domain *smmu_domain)
2335 {
2336 	int ret;
2337 	u32 asid = 0;
2338 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2339 
2340 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2341 	mutex_lock(&arm_smmu_asid_lock);
2342 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
2343 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2344 	cd->asid	= (u16)asid;
2345 	mutex_unlock(&arm_smmu_asid_lock);
2346 	return ret;
2347 }
2348 
2349 static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
2350 				       struct arm_smmu_domain *smmu_domain)
2351 {
2352 	int vmid;
2353 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2354 
2355 	/* Reserve VMID 0 for stage-2 bypass STEs */
2356 	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2357 			       GFP_KERNEL);
2358 	if (vmid < 0)
2359 		return vmid;
2360 
2361 	cfg->vmid	= (u16)vmid;
2362 	return 0;
2363 }
2364 
2365 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
2366 				    struct arm_smmu_device *smmu, u32 flags)
2367 {
2368 	int ret;
2369 	enum io_pgtable_fmt fmt;
2370 	struct io_pgtable_cfg pgtbl_cfg;
2371 	struct io_pgtable_ops *pgtbl_ops;
2372 	int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
2373 				 struct arm_smmu_domain *smmu_domain);
2374 	bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
2375 
2376 	/* Restrict the stage to what we can actually support */
2377 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2378 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2379 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2380 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2381 
2382 	pgtbl_cfg = (struct io_pgtable_cfg) {
2383 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2384 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2385 		.tlb		= &arm_smmu_flush_ops,
2386 		.iommu_dev	= smmu->dev,
2387 	};
2388 
2389 	switch (smmu_domain->stage) {
2390 	case ARM_SMMU_DOMAIN_S1: {
2391 		unsigned long ias = (smmu->features &
2392 				     ARM_SMMU_FEAT_VAX) ? 52 : 48;
2393 
2394 		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
2395 		pgtbl_cfg.oas = smmu->ias;
2396 		if (enable_dirty)
2397 			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
2398 		fmt = ARM_64_LPAE_S1;
2399 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2400 		break;
2401 	}
2402 	case ARM_SMMU_DOMAIN_S2:
2403 		if (enable_dirty)
2404 			return -EOPNOTSUPP;
2405 		pgtbl_cfg.ias = smmu->ias;
2406 		pgtbl_cfg.oas = smmu->oas;
2407 		fmt = ARM_64_LPAE_S2;
2408 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2409 		break;
2410 	default:
2411 		return -EINVAL;
2412 	}
2413 
2414 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2415 	if (!pgtbl_ops)
2416 		return -ENOMEM;
2417 
2418 	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2419 	smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2420 	smmu_domain->domain.geometry.force_aperture = true;
2421 	if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
2422 		smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
2423 
2424 	ret = finalise_stage_fn(smmu, smmu_domain);
2425 	if (ret < 0) {
2426 		free_io_pgtable_ops(pgtbl_ops);
2427 		return ret;
2428 	}
2429 
2430 	smmu_domain->pgtbl_ops = pgtbl_ops;
2431 	smmu_domain->smmu = smmu;
2432 	return 0;
2433 }
2434 
2435 static struct arm_smmu_ste *
2436 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2437 {
2438 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2439 
2440 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2441 		unsigned int idx1, idx2;
2442 
2443 		/* Two-level walk */
2444 		idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2445 		idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
2446 		return &cfg->l1_desc[idx1].l2ptr[idx2];
2447 	} else {
2448 		/* Simple linear lookup */
2449 		return (struct arm_smmu_ste *)&cfg
2450 			       ->strtab[sid * STRTAB_STE_DWORDS];
2451 	}
2452 }
2453 
2454 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
2455 					 const struct arm_smmu_ste *target)
2456 {
2457 	int i, j;
2458 	struct arm_smmu_device *smmu = master->smmu;
2459 
2460 	master->cd_table.in_ste =
2461 		FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) ==
2462 		STRTAB_STE_0_CFG_S1_TRANS;
2463 	master->ste_ats_enabled =
2464 		FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) ==
2465 		STRTAB_STE_1_EATS_TRANS;
2466 
2467 	for (i = 0; i < master->num_streams; ++i) {
2468 		u32 sid = master->streams[i].id;
2469 		struct arm_smmu_ste *step =
2470 			arm_smmu_get_step_for_sid(smmu, sid);
2471 
2472 		/* Bridged PCI devices may end up with duplicated IDs */
2473 		for (j = 0; j < i; j++)
2474 			if (master->streams[j].id == sid)
2475 				break;
2476 		if (j < i)
2477 			continue;
2478 
2479 		arm_smmu_write_ste(master, sid, step, target);
2480 	}
2481 }
2482 
2483 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2484 {
2485 	struct device *dev = master->dev;
2486 	struct arm_smmu_device *smmu = master->smmu;
2487 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2488 
2489 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2490 		return false;
2491 
2492 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2493 		return false;
2494 
2495 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2496 }
2497 
2498 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2499 {
2500 	size_t stu;
2501 	struct pci_dev *pdev;
2502 	struct arm_smmu_device *smmu = master->smmu;
2503 
2504 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2505 	stu = __ffs(smmu->pgsize_bitmap);
2506 	pdev = to_pci_dev(master->dev);
2507 
2508 	/*
2509 	 * ATC invalidation of PASID 0 causes the entire ATC to be flushed.
2510 	 */
2511 	arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
2512 	if (pci_enable_ats(pdev, stu))
2513 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2514 }
2515 
2516 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2517 {
2518 	int ret;
2519 	int features;
2520 	int num_pasids;
2521 	struct pci_dev *pdev;
2522 
2523 	if (!dev_is_pci(master->dev))
2524 		return -ENODEV;
2525 
2526 	pdev = to_pci_dev(master->dev);
2527 
2528 	features = pci_pasid_features(pdev);
2529 	if (features < 0)
2530 		return features;
2531 
2532 	num_pasids = pci_max_pasids(pdev);
2533 	if (num_pasids <= 0)
2534 		return num_pasids;
2535 
2536 	ret = pci_enable_pasid(pdev, features);
2537 	if (ret) {
2538 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2539 		return ret;
2540 	}
2541 
2542 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2543 				  master->smmu->ssid_bits);
2544 	return 0;
2545 }
2546 
2547 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2548 {
2549 	struct pci_dev *pdev;
2550 
2551 	if (!dev_is_pci(master->dev))
2552 		return;
2553 
2554 	pdev = to_pci_dev(master->dev);
2555 
2556 	if (!pdev->pasid_enabled)
2557 		return;
2558 
2559 	master->ssid_bits = 0;
2560 	pci_disable_pasid(pdev);
2561 }
2562 
2563 static struct arm_smmu_master_domain *
2564 arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
2565 			    struct arm_smmu_master *master,
2566 			    ioasid_t ssid)
2567 {
2568 	struct arm_smmu_master_domain *master_domain;
2569 
2570 	lockdep_assert_held(&smmu_domain->devices_lock);
2571 
2572 	list_for_each_entry(master_domain, &smmu_domain->devices,
2573 			    devices_elm) {
2574 		if (master_domain->master == master &&
2575 		    master_domain->ssid == ssid)
2576 			return master_domain;
2577 	}
2578 	return NULL;
2579 }
2580 
2581 /*
2582  * If the domain uses the smmu_domain->devices list return the arm_smmu_domain
2583  * structure, otherwise NULL. These domains track attached devices so they can
2584  * issue invalidations.
2585  */
2586 static struct arm_smmu_domain *
2587 to_smmu_domain_devices(struct iommu_domain *domain)
2588 {
2589 	/* The domain can be NULL only when processing the first attach */
2590 	if (!domain)
2591 		return NULL;
2592 	if ((domain->type & __IOMMU_DOMAIN_PAGING) ||
2593 	    domain->type == IOMMU_DOMAIN_SVA)
2594 		return to_smmu_domain(domain);
2595 	return NULL;
2596 }
2597 
2598 static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
2599 					  struct iommu_domain *domain,
2600 					  ioasid_t ssid)
2601 {
2602 	struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain);
2603 	struct arm_smmu_master_domain *master_domain;
2604 	unsigned long flags;
2605 
2606 	if (!smmu_domain)
2607 		return;
2608 
2609 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2610 	master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid);
2611 	if (master_domain) {
2612 		list_del(&master_domain->devices_elm);
2613 		kfree(master_domain);
2614 		if (master->ats_enabled)
2615 			atomic_dec(&smmu_domain->nr_ats_masters);
2616 	}
2617 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2618 }
2619 
2620 struct arm_smmu_attach_state {
2621 	/* Inputs */
2622 	struct iommu_domain *old_domain;
2623 	struct arm_smmu_master *master;
2624 	bool cd_needs_ats;
2625 	ioasid_t ssid;
2626 	/* Resulting state */
2627 	bool ats_enabled;
2628 };
2629 
2630 /*
2631  * Start the sequence to attach a domain to a master. The sequence contains three
2632  * steps:
2633  *  arm_smmu_attach_prepare()
2634  *  arm_smmu_install_ste_for_dev()
2635  *  arm_smmu_attach_commit()
2636  *
2637  * If prepare succeeds then the sequence must be completed. The STE installed
2638  * must set the STE.EATS field according to state.ats_enabled.
2639  *
2640  * If the device supports ATS then this determines if EATS should be enabled
2641  * in the STE, and starts sequencing EATS disable if required.
2642  *
2643  * The change of the EATS in the STE and the PCI ATS config space is managed by
2644  * this sequence to be in the right order so that if PCI ATS is enabled then
2645  * STE.ETAS is enabled.
2646  *
2647  * new_domain can be a non-paging domain. In this case ATS will not be enabled,
2648  * and invalidations won't be tracked.
2649  */
2650 static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
2651 				   struct iommu_domain *new_domain)
2652 {
2653 	struct arm_smmu_master *master = state->master;
2654 	struct arm_smmu_master_domain *master_domain;
2655 	struct arm_smmu_domain *smmu_domain =
2656 		to_smmu_domain_devices(new_domain);
2657 	unsigned long flags;
2658 
2659 	/*
2660 	 * arm_smmu_share_asid() must not see two domains pointing to the same
2661 	 * arm_smmu_master_domain contents otherwise it could randomly write one
2662 	 * or the other to the CD.
2663 	 */
2664 	lockdep_assert_held(&arm_smmu_asid_lock);
2665 
2666 	if (smmu_domain || state->cd_needs_ats) {
2667 		/*
2668 		 * The SMMU does not support enabling ATS with bypass/abort.
2669 		 * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS
2670 		 * Translation Requests and Translated transactions are denied
2671 		 * as though ATS is disabled for the stream (STE.EATS == 0b00),
2672 		 * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
2673 		 * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be
2674 		 * enabled if we have arm_smmu_domain, those always have page
2675 		 * tables.
2676 		 */
2677 		state->ats_enabled = arm_smmu_ats_supported(master);
2678 	}
2679 
2680 	if (smmu_domain) {
2681 		master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
2682 		if (!master_domain)
2683 			return -ENOMEM;
2684 		master_domain->master = master;
2685 		master_domain->ssid = state->ssid;
2686 
2687 		/*
2688 		 * During prepare we want the current smmu_domain and new
2689 		 * smmu_domain to be in the devices list before we change any
2690 		 * HW. This ensures that both domains will send ATS
2691 		 * invalidations to the master until we are done.
2692 		 *
2693 		 * It is tempting to make this list only track masters that are
2694 		 * using ATS, but arm_smmu_share_asid() also uses this to change
2695 		 * the ASID of a domain, unrelated to ATS.
2696 		 *
2697 		 * Notice if we are re-attaching the same domain then the list
2698 		 * will have two identical entries and commit will remove only
2699 		 * one of them.
2700 		 */
2701 		spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2702 		if (state->ats_enabled)
2703 			atomic_inc(&smmu_domain->nr_ats_masters);
2704 		list_add(&master_domain->devices_elm, &smmu_domain->devices);
2705 		spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2706 	}
2707 
2708 	if (!state->ats_enabled && master->ats_enabled) {
2709 		pci_disable_ats(to_pci_dev(master->dev));
2710 		/*
2711 		 * This is probably overkill, but the config write for disabling
2712 		 * ATS should complete before the STE is configured to generate
2713 		 * UR to avoid AER noise.
2714 		 */
2715 		wmb();
2716 	}
2717 	return 0;
2718 }
2719 
2720 /*
2721  * Commit is done after the STE/CD are configured with the EATS setting. It
2722  * completes synchronizing the PCI device's ATC and finishes manipulating the
2723  * smmu_domain->devices list.
2724  */
2725 static void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
2726 {
2727 	struct arm_smmu_master *master = state->master;
2728 
2729 	lockdep_assert_held(&arm_smmu_asid_lock);
2730 
2731 	if (state->ats_enabled && !master->ats_enabled) {
2732 		arm_smmu_enable_ats(master);
2733 	} else if (state->ats_enabled && master->ats_enabled) {
2734 		/*
2735 		 * The translation has changed, flush the ATC. At this point the
2736 		 * SMMU is translating for the new domain and both the old&new
2737 		 * domain will issue invalidations.
2738 		 */
2739 		arm_smmu_atc_inv_master(master, state->ssid);
2740 	} else if (!state->ats_enabled && master->ats_enabled) {
2741 		/* ATS is being switched off, invalidate the entire ATC */
2742 		arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
2743 	}
2744 	master->ats_enabled = state->ats_enabled;
2745 
2746 	arm_smmu_remove_master_domain(master, state->old_domain, state->ssid);
2747 }
2748 
2749 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2750 {
2751 	int ret = 0;
2752 	struct arm_smmu_ste target;
2753 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2754 	struct arm_smmu_device *smmu;
2755 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2756 	struct arm_smmu_attach_state state = {
2757 		.old_domain = iommu_get_domain_for_dev(dev),
2758 		.ssid = IOMMU_NO_PASID,
2759 	};
2760 	struct arm_smmu_master *master;
2761 	struct arm_smmu_cd *cdptr;
2762 
2763 	if (!fwspec)
2764 		return -ENOENT;
2765 
2766 	state.master = master = dev_iommu_priv_get(dev);
2767 	smmu = master->smmu;
2768 
2769 	mutex_lock(&smmu_domain->init_mutex);
2770 
2771 	if (!smmu_domain->smmu) {
2772 		ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
2773 	} else if (smmu_domain->smmu != smmu)
2774 		ret = -EINVAL;
2775 
2776 	mutex_unlock(&smmu_domain->init_mutex);
2777 	if (ret)
2778 		return ret;
2779 
2780 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2781 		cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
2782 		if (!cdptr)
2783 			return -ENOMEM;
2784 	} else if (arm_smmu_ssids_in_use(&master->cd_table))
2785 		return -EBUSY;
2786 
2787 	/*
2788 	 * Prevent arm_smmu_share_asid() from trying to change the ASID
2789 	 * of either the old or new domain while we are working on it.
2790 	 * This allows the STE and the smmu_domain->devices list to
2791 	 * be inconsistent during this routine.
2792 	 */
2793 	mutex_lock(&arm_smmu_asid_lock);
2794 
2795 	ret = arm_smmu_attach_prepare(&state, domain);
2796 	if (ret) {
2797 		mutex_unlock(&arm_smmu_asid_lock);
2798 		return ret;
2799 	}
2800 
2801 	switch (smmu_domain->stage) {
2802 	case ARM_SMMU_DOMAIN_S1: {
2803 		struct arm_smmu_cd target_cd;
2804 
2805 		arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
2806 		arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
2807 					&target_cd);
2808 		arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled,
2809 					  STRTAB_STE_1_S1DSS_SSID0);
2810 		arm_smmu_install_ste_for_dev(master, &target);
2811 		break;
2812 	}
2813 	case ARM_SMMU_DOMAIN_S2:
2814 		arm_smmu_make_s2_domain_ste(&target, master, smmu_domain,
2815 					    state.ats_enabled);
2816 		arm_smmu_install_ste_for_dev(master, &target);
2817 		arm_smmu_clear_cd(master, IOMMU_NO_PASID);
2818 		break;
2819 	}
2820 
2821 	arm_smmu_attach_commit(&state);
2822 	mutex_unlock(&arm_smmu_asid_lock);
2823 	return 0;
2824 }
2825 
2826 static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
2827 				      struct device *dev, ioasid_t id)
2828 {
2829 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2830 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2831 	struct arm_smmu_device *smmu = master->smmu;
2832 	struct arm_smmu_cd target_cd;
2833 	int ret = 0;
2834 
2835 	mutex_lock(&smmu_domain->init_mutex);
2836 	if (!smmu_domain->smmu)
2837 		ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
2838 	else if (smmu_domain->smmu != smmu)
2839 		ret = -EINVAL;
2840 	mutex_unlock(&smmu_domain->init_mutex);
2841 	if (ret)
2842 		return ret;
2843 
2844 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
2845 		return -EINVAL;
2846 
2847 	/*
2848 	 * We can read cd.asid outside the lock because arm_smmu_set_pasid()
2849 	 * will fix it
2850 	 */
2851 	arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
2852 	return arm_smmu_set_pasid(master, to_smmu_domain(domain), id,
2853 				  &target_cd);
2854 }
2855 
2856 static void arm_smmu_update_ste(struct arm_smmu_master *master,
2857 				struct iommu_domain *sid_domain,
2858 				bool ats_enabled)
2859 {
2860 	unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE;
2861 	struct arm_smmu_ste ste;
2862 
2863 	if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled)
2864 		return;
2865 
2866 	if (sid_domain->type == IOMMU_DOMAIN_IDENTITY)
2867 		s1dss = STRTAB_STE_1_S1DSS_BYPASS;
2868 	else
2869 		WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED);
2870 
2871 	/*
2872 	 * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior
2873 	 * using s1dss if necessary. If the cd_table is already installed then
2874 	 * the S1DSS is correct and this will just update the EATS. Otherwise it
2875 	 * installs the entire thing. This will be hitless.
2876 	 */
2877 	arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss);
2878 	arm_smmu_install_ste_for_dev(master, &ste);
2879 }
2880 
2881 int arm_smmu_set_pasid(struct arm_smmu_master *master,
2882 		       struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
2883 		       struct arm_smmu_cd *cd)
2884 {
2885 	struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);
2886 	struct arm_smmu_attach_state state = {
2887 		.master = master,
2888 		/*
2889 		 * For now the core code prevents calling this when a domain is
2890 		 * already attached, no need to set old_domain.
2891 		 */
2892 		.ssid = pasid,
2893 	};
2894 	struct arm_smmu_cd *cdptr;
2895 	int ret;
2896 
2897 	/* The core code validates pasid */
2898 
2899 	if (smmu_domain->smmu != master->smmu)
2900 		return -EINVAL;
2901 
2902 	if (!master->cd_table.in_ste &&
2903 	    sid_domain->type != IOMMU_DOMAIN_IDENTITY &&
2904 	    sid_domain->type != IOMMU_DOMAIN_BLOCKED)
2905 		return -EINVAL;
2906 
2907 	cdptr = arm_smmu_alloc_cd_ptr(master, pasid);
2908 	if (!cdptr)
2909 		return -ENOMEM;
2910 
2911 	mutex_lock(&arm_smmu_asid_lock);
2912 	ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain);
2913 	if (ret)
2914 		goto out_unlock;
2915 
2916 	/*
2917 	 * We don't want to obtain to the asid_lock too early, so fix up the
2918 	 * caller set ASID under the lock in case it changed.
2919 	 */
2920 	cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID);
2921 	cd->data[0] |= cpu_to_le64(
2922 		FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid));
2923 
2924 	arm_smmu_write_cd_entry(master, pasid, cdptr, cd);
2925 	arm_smmu_update_ste(master, sid_domain, state.ats_enabled);
2926 
2927 	arm_smmu_attach_commit(&state);
2928 
2929 out_unlock:
2930 	mutex_unlock(&arm_smmu_asid_lock);
2931 	return ret;
2932 }
2933 
2934 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
2935 				      struct iommu_domain *domain)
2936 {
2937 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2938 	struct arm_smmu_domain *smmu_domain;
2939 
2940 	smmu_domain = to_smmu_domain(domain);
2941 
2942 	mutex_lock(&arm_smmu_asid_lock);
2943 	arm_smmu_clear_cd(master, pasid);
2944 	if (master->ats_enabled)
2945 		arm_smmu_atc_inv_master(master, pasid);
2946 	arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid);
2947 	mutex_unlock(&arm_smmu_asid_lock);
2948 
2949 	/*
2950 	 * When the last user of the CD table goes away downgrade the STE back
2951 	 * to a non-cd_table one.
2952 	 */
2953 	if (!arm_smmu_ssids_in_use(&master->cd_table)) {
2954 		struct iommu_domain *sid_domain =
2955 			iommu_get_domain_for_dev(master->dev);
2956 
2957 		if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
2958 		    sid_domain->type == IOMMU_DOMAIN_BLOCKED)
2959 			sid_domain->ops->attach_dev(sid_domain, dev);
2960 	}
2961 }
2962 
2963 static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
2964 				    struct device *dev,
2965 				    struct arm_smmu_ste *ste,
2966 				    unsigned int s1dss)
2967 {
2968 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2969 	struct arm_smmu_attach_state state = {
2970 		.master = master,
2971 		.old_domain = iommu_get_domain_for_dev(dev),
2972 		.ssid = IOMMU_NO_PASID,
2973 	};
2974 
2975 	/*
2976 	 * Do not allow any ASID to be changed while are working on the STE,
2977 	 * otherwise we could miss invalidations.
2978 	 */
2979 	mutex_lock(&arm_smmu_asid_lock);
2980 
2981 	/*
2982 	 * If the CD table is not in use we can use the provided STE, otherwise
2983 	 * we use a cdtable STE with the provided S1DSS.
2984 	 */
2985 	if (arm_smmu_ssids_in_use(&master->cd_table)) {
2986 		/*
2987 		 * If a CD table has to be present then we need to run with ATS
2988 		 * on even though the RID will fail ATS queries with UR. This is
2989 		 * because we have no idea what the PASID's need.
2990 		 */
2991 		state.cd_needs_ats = true;
2992 		arm_smmu_attach_prepare(&state, domain);
2993 		arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss);
2994 	} else {
2995 		arm_smmu_attach_prepare(&state, domain);
2996 	}
2997 	arm_smmu_install_ste_for_dev(master, ste);
2998 	arm_smmu_attach_commit(&state);
2999 	mutex_unlock(&arm_smmu_asid_lock);
3000 
3001 	/*
3002 	 * This has to be done after removing the master from the
3003 	 * arm_smmu_domain->devices to avoid races updating the same context
3004 	 * descriptor from arm_smmu_share_asid().
3005 	 */
3006 	arm_smmu_clear_cd(master, IOMMU_NO_PASID);
3007 }
3008 
3009 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
3010 					struct device *dev)
3011 {
3012 	struct arm_smmu_ste ste;
3013 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3014 
3015 	arm_smmu_make_bypass_ste(master->smmu, &ste);
3016 	arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS);
3017 	return 0;
3018 }
3019 
3020 static const struct iommu_domain_ops arm_smmu_identity_ops = {
3021 	.attach_dev = arm_smmu_attach_dev_identity,
3022 };
3023 
3024 static struct iommu_domain arm_smmu_identity_domain = {
3025 	.type = IOMMU_DOMAIN_IDENTITY,
3026 	.ops = &arm_smmu_identity_ops,
3027 };
3028 
3029 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
3030 					struct device *dev)
3031 {
3032 	struct arm_smmu_ste ste;
3033 
3034 	arm_smmu_make_abort_ste(&ste);
3035 	arm_smmu_attach_dev_ste(domain, dev, &ste,
3036 				STRTAB_STE_1_S1DSS_TERMINATE);
3037 	return 0;
3038 }
3039 
3040 static const struct iommu_domain_ops arm_smmu_blocked_ops = {
3041 	.attach_dev = arm_smmu_attach_dev_blocked,
3042 };
3043 
3044 static struct iommu_domain arm_smmu_blocked_domain = {
3045 	.type = IOMMU_DOMAIN_BLOCKED,
3046 	.ops = &arm_smmu_blocked_ops,
3047 };
3048 
3049 static struct iommu_domain *
3050 arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
3051 			   struct iommu_domain *parent,
3052 			   const struct iommu_user_data *user_data)
3053 {
3054 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3055 	const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
3056 	struct arm_smmu_domain *smmu_domain;
3057 	int ret;
3058 
3059 	if (flags & ~PAGING_FLAGS)
3060 		return ERR_PTR(-EOPNOTSUPP);
3061 	if (parent || user_data)
3062 		return ERR_PTR(-EOPNOTSUPP);
3063 
3064 	smmu_domain = arm_smmu_domain_alloc();
3065 	if (!smmu_domain)
3066 		return ERR_PTR(-ENOMEM);
3067 
3068 	smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
3069 	smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
3070 	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags);
3071 	if (ret)
3072 		goto err_free;
3073 	return &smmu_domain->domain;
3074 
3075 err_free:
3076 	kfree(smmu_domain);
3077 	return ERR_PTR(ret);
3078 }
3079 
3080 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
3081 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
3082 			      int prot, gfp_t gfp, size_t *mapped)
3083 {
3084 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
3085 
3086 	if (!ops)
3087 		return -ENODEV;
3088 
3089 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
3090 }
3091 
3092 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
3093 				   size_t pgsize, size_t pgcount,
3094 				   struct iommu_iotlb_gather *gather)
3095 {
3096 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3097 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
3098 
3099 	if (!ops)
3100 		return 0;
3101 
3102 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
3103 }
3104 
3105 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
3106 {
3107 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3108 
3109 	if (smmu_domain->smmu)
3110 		arm_smmu_tlb_inv_context(smmu_domain);
3111 }
3112 
3113 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
3114 				struct iommu_iotlb_gather *gather)
3115 {
3116 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3117 
3118 	if (!gather->pgsize)
3119 		return;
3120 
3121 	arm_smmu_tlb_inv_range_domain(gather->start,
3122 				      gather->end - gather->start + 1,
3123 				      gather->pgsize, true, smmu_domain);
3124 }
3125 
3126 static phys_addr_t
3127 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
3128 {
3129 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
3130 
3131 	if (!ops)
3132 		return 0;
3133 
3134 	return ops->iova_to_phys(ops, iova);
3135 }
3136 
3137 static struct platform_driver arm_smmu_driver;
3138 
3139 static
3140 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
3141 {
3142 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
3143 							  fwnode);
3144 	put_device(dev);
3145 	return dev ? dev_get_drvdata(dev) : NULL;
3146 }
3147 
3148 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
3149 {
3150 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
3151 
3152 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3153 		limit *= 1UL << STRTAB_SPLIT;
3154 
3155 	return sid < limit;
3156 }
3157 
3158 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
3159 {
3160 	/* Check the SIDs are in range of the SMMU and our stream table */
3161 	if (!arm_smmu_sid_in_range(smmu, sid))
3162 		return -ERANGE;
3163 
3164 	/* Ensure l2 strtab is initialised */
3165 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3166 		return arm_smmu_init_l2_strtab(smmu, sid);
3167 
3168 	return 0;
3169 }
3170 
3171 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
3172 				  struct arm_smmu_master *master)
3173 {
3174 	int i;
3175 	int ret = 0;
3176 	struct arm_smmu_stream *new_stream, *cur_stream;
3177 	struct rb_node **new_node, *parent_node = NULL;
3178 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
3179 
3180 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
3181 				  GFP_KERNEL);
3182 	if (!master->streams)
3183 		return -ENOMEM;
3184 	master->num_streams = fwspec->num_ids;
3185 
3186 	mutex_lock(&smmu->streams_mutex);
3187 	for (i = 0; i < fwspec->num_ids; i++) {
3188 		u32 sid = fwspec->ids[i];
3189 
3190 		new_stream = &master->streams[i];
3191 		new_stream->id = sid;
3192 		new_stream->master = master;
3193 
3194 		ret = arm_smmu_init_sid_strtab(smmu, sid);
3195 		if (ret)
3196 			break;
3197 
3198 		/* Insert into SID tree */
3199 		new_node = &(smmu->streams.rb_node);
3200 		while (*new_node) {
3201 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
3202 					      node);
3203 			parent_node = *new_node;
3204 			if (cur_stream->id > new_stream->id) {
3205 				new_node = &((*new_node)->rb_left);
3206 			} else if (cur_stream->id < new_stream->id) {
3207 				new_node = &((*new_node)->rb_right);
3208 			} else {
3209 				dev_warn(master->dev,
3210 					 "stream %u already in tree\n",
3211 					 cur_stream->id);
3212 				ret = -EINVAL;
3213 				break;
3214 			}
3215 		}
3216 		if (ret)
3217 			break;
3218 
3219 		rb_link_node(&new_stream->node, parent_node, new_node);
3220 		rb_insert_color(&new_stream->node, &smmu->streams);
3221 	}
3222 
3223 	if (ret) {
3224 		for (i--; i >= 0; i--)
3225 			rb_erase(&master->streams[i].node, &smmu->streams);
3226 		kfree(master->streams);
3227 	}
3228 	mutex_unlock(&smmu->streams_mutex);
3229 
3230 	return ret;
3231 }
3232 
3233 static void arm_smmu_remove_master(struct arm_smmu_master *master)
3234 {
3235 	int i;
3236 	struct arm_smmu_device *smmu = master->smmu;
3237 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
3238 
3239 	if (!smmu || !master->streams)
3240 		return;
3241 
3242 	mutex_lock(&smmu->streams_mutex);
3243 	for (i = 0; i < fwspec->num_ids; i++)
3244 		rb_erase(&master->streams[i].node, &smmu->streams);
3245 	mutex_unlock(&smmu->streams_mutex);
3246 
3247 	kfree(master->streams);
3248 }
3249 
3250 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
3251 {
3252 	int ret;
3253 	struct arm_smmu_device *smmu;
3254 	struct arm_smmu_master *master;
3255 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
3256 
3257 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
3258 		return ERR_PTR(-EBUSY);
3259 
3260 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
3261 	if (!smmu)
3262 		return ERR_PTR(-ENODEV);
3263 
3264 	master = kzalloc(sizeof(*master), GFP_KERNEL);
3265 	if (!master)
3266 		return ERR_PTR(-ENOMEM);
3267 
3268 	master->dev = dev;
3269 	master->smmu = smmu;
3270 	dev_iommu_priv_set(dev, master);
3271 
3272 	ret = arm_smmu_insert_master(smmu, master);
3273 	if (ret)
3274 		goto err_free_master;
3275 
3276 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
3277 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
3278 
3279 	/*
3280 	 * Note that PASID must be enabled before, and disabled after ATS:
3281 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
3282 	 *
3283 	 *   Behavior is undefined if this bit is Set and the value of the PASID
3284 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
3285 	 *   are changed.
3286 	 */
3287 	arm_smmu_enable_pasid(master);
3288 
3289 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
3290 		master->ssid_bits = min_t(u8, master->ssid_bits,
3291 					  CTXDESC_LINEAR_CDMAX);
3292 
3293 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
3294 	     device_property_read_bool(dev, "dma-can-stall")) ||
3295 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
3296 		master->stall_enabled = true;
3297 
3298 	return &smmu->iommu;
3299 
3300 err_free_master:
3301 	kfree(master);
3302 	return ERR_PTR(ret);
3303 }
3304 
3305 static void arm_smmu_release_device(struct device *dev)
3306 {
3307 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3308 
3309 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
3310 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
3311 
3312 	/* Put the STE back to what arm_smmu_init_strtab() sets */
3313 	if (dev->iommu->require_direct)
3314 		arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
3315 	else
3316 		arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
3317 
3318 	arm_smmu_disable_pasid(master);
3319 	arm_smmu_remove_master(master);
3320 	if (master->cd_table.cdtab)
3321 		arm_smmu_free_cd_tables(master);
3322 	kfree(master);
3323 }
3324 
3325 static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
3326 					 unsigned long iova, size_t size,
3327 					 unsigned long flags,
3328 					 struct iommu_dirty_bitmap *dirty)
3329 {
3330 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3331 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
3332 
3333 	return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
3334 }
3335 
3336 static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
3337 				       bool enabled)
3338 {
3339 	/*
3340 	 * Always enabled and the dirty bitmap is cleared prior to
3341 	 * set_dirty_tracking().
3342 	 */
3343 	return 0;
3344 }
3345 
3346 static struct iommu_group *arm_smmu_device_group(struct device *dev)
3347 {
3348 	struct iommu_group *group;
3349 
3350 	/*
3351 	 * We don't support devices sharing stream IDs other than PCI RID
3352 	 * aliases, since the necessary ID-to-device lookup becomes rather
3353 	 * impractical given a potential sparse 32-bit stream ID space.
3354 	 */
3355 	if (dev_is_pci(dev))
3356 		group = pci_device_group(dev);
3357 	else
3358 		group = generic_device_group(dev);
3359 
3360 	return group;
3361 }
3362 
3363 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
3364 {
3365 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3366 	int ret = 0;
3367 
3368 	mutex_lock(&smmu_domain->init_mutex);
3369 	if (smmu_domain->smmu)
3370 		ret = -EPERM;
3371 	else
3372 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
3373 	mutex_unlock(&smmu_domain->init_mutex);
3374 
3375 	return ret;
3376 }
3377 
3378 static int arm_smmu_of_xlate(struct device *dev,
3379 			     const struct of_phandle_args *args)
3380 {
3381 	return iommu_fwspec_add_ids(dev, args->args, 1);
3382 }
3383 
3384 static void arm_smmu_get_resv_regions(struct device *dev,
3385 				      struct list_head *head)
3386 {
3387 	struct iommu_resv_region *region;
3388 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
3389 
3390 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
3391 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
3392 	if (!region)
3393 		return;
3394 
3395 	list_add_tail(&region->list, head);
3396 
3397 	iommu_dma_get_resv_regions(dev, head);
3398 }
3399 
3400 static int arm_smmu_dev_enable_feature(struct device *dev,
3401 				       enum iommu_dev_features feat)
3402 {
3403 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3404 
3405 	if (!master)
3406 		return -ENODEV;
3407 
3408 	switch (feat) {
3409 	case IOMMU_DEV_FEAT_IOPF:
3410 		if (!arm_smmu_master_iopf_supported(master))
3411 			return -EINVAL;
3412 		if (master->iopf_enabled)
3413 			return -EBUSY;
3414 		master->iopf_enabled = true;
3415 		return 0;
3416 	case IOMMU_DEV_FEAT_SVA:
3417 		if (!arm_smmu_master_sva_supported(master))
3418 			return -EINVAL;
3419 		if (arm_smmu_master_sva_enabled(master))
3420 			return -EBUSY;
3421 		return arm_smmu_master_enable_sva(master);
3422 	default:
3423 		return -EINVAL;
3424 	}
3425 }
3426 
3427 static int arm_smmu_dev_disable_feature(struct device *dev,
3428 					enum iommu_dev_features feat)
3429 {
3430 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3431 
3432 	if (!master)
3433 		return -EINVAL;
3434 
3435 	switch (feat) {
3436 	case IOMMU_DEV_FEAT_IOPF:
3437 		if (!master->iopf_enabled)
3438 			return -EINVAL;
3439 		if (master->sva_enabled)
3440 			return -EBUSY;
3441 		master->iopf_enabled = false;
3442 		return 0;
3443 	case IOMMU_DEV_FEAT_SVA:
3444 		if (!arm_smmu_master_sva_enabled(master))
3445 			return -EINVAL;
3446 		return arm_smmu_master_disable_sva(master);
3447 	default:
3448 		return -EINVAL;
3449 	}
3450 }
3451 
3452 /*
3453  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
3454  * PCIe link and save the data to memory by DMA. The hardware is restricted to
3455  * use identity mapping only.
3456  */
3457 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
3458 					 (pdev)->device == 0xa12e)
3459 
3460 static int arm_smmu_def_domain_type(struct device *dev)
3461 {
3462 	if (dev_is_pci(dev)) {
3463 		struct pci_dev *pdev = to_pci_dev(dev);
3464 
3465 		if (IS_HISI_PTT_DEVICE(pdev))
3466 			return IOMMU_DOMAIN_IDENTITY;
3467 	}
3468 
3469 	return 0;
3470 }
3471 
3472 static struct iommu_ops arm_smmu_ops = {
3473 	.identity_domain	= &arm_smmu_identity_domain,
3474 	.blocked_domain		= &arm_smmu_blocked_domain,
3475 	.capable		= arm_smmu_capable,
3476 	.domain_alloc_paging    = arm_smmu_domain_alloc_paging,
3477 	.domain_alloc_sva       = arm_smmu_sva_domain_alloc,
3478 	.domain_alloc_user	= arm_smmu_domain_alloc_user,
3479 	.probe_device		= arm_smmu_probe_device,
3480 	.release_device		= arm_smmu_release_device,
3481 	.device_group		= arm_smmu_device_group,
3482 	.of_xlate		= arm_smmu_of_xlate,
3483 	.get_resv_regions	= arm_smmu_get_resv_regions,
3484 	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
3485 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
3486 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
3487 	.page_response		= arm_smmu_page_response,
3488 	.def_domain_type	= arm_smmu_def_domain_type,
3489 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
3490 	.owner			= THIS_MODULE,
3491 	.default_domain_ops = &(const struct iommu_domain_ops) {
3492 		.attach_dev		= arm_smmu_attach_dev,
3493 		.set_dev_pasid		= arm_smmu_s1_set_dev_pasid,
3494 		.map_pages		= arm_smmu_map_pages,
3495 		.unmap_pages		= arm_smmu_unmap_pages,
3496 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
3497 		.iotlb_sync		= arm_smmu_iotlb_sync,
3498 		.iova_to_phys		= arm_smmu_iova_to_phys,
3499 		.enable_nesting		= arm_smmu_enable_nesting,
3500 		.free			= arm_smmu_domain_free_paging,
3501 	}
3502 };
3503 
3504 static struct iommu_dirty_ops arm_smmu_dirty_ops = {
3505 	.read_and_clear_dirty	= arm_smmu_read_and_clear_dirty,
3506 	.set_dirty_tracking     = arm_smmu_set_dirty_tracking,
3507 };
3508 
3509 /* Probing and initialisation functions */
3510 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3511 				   struct arm_smmu_queue *q,
3512 				   void __iomem *page,
3513 				   unsigned long prod_off,
3514 				   unsigned long cons_off,
3515 				   size_t dwords, const char *name)
3516 {
3517 	size_t qsz;
3518 
3519 	do {
3520 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3521 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3522 					      GFP_KERNEL);
3523 		if (q->base || qsz < PAGE_SIZE)
3524 			break;
3525 
3526 		q->llq.max_n_shift--;
3527 	} while (1);
3528 
3529 	if (!q->base) {
3530 		dev_err(smmu->dev,
3531 			"failed to allocate queue (0x%zx bytes) for %s\n",
3532 			qsz, name);
3533 		return -ENOMEM;
3534 	}
3535 
3536 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
3537 		dev_info(smmu->dev, "allocated %u entries for %s\n",
3538 			 1 << q->llq.max_n_shift, name);
3539 	}
3540 
3541 	q->prod_reg	= page + prod_off;
3542 	q->cons_reg	= page + cons_off;
3543 	q->ent_dwords	= dwords;
3544 
3545 	q->q_base  = Q_BASE_RWA;
3546 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3547 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3548 
3549 	q->llq.prod = q->llq.cons = 0;
3550 	return 0;
3551 }
3552 
3553 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
3554 {
3555 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
3556 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3557 
3558 	atomic_set(&cmdq->owner_prod, 0);
3559 	atomic_set(&cmdq->lock, 0);
3560 
3561 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
3562 							      GFP_KERNEL);
3563 	if (!cmdq->valid_map)
3564 		return -ENOMEM;
3565 
3566 	return 0;
3567 }
3568 
3569 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3570 {
3571 	int ret;
3572 
3573 	/* cmdq */
3574 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
3575 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
3576 				      CMDQ_ENT_DWORDS, "cmdq");
3577 	if (ret)
3578 		return ret;
3579 
3580 	ret = arm_smmu_cmdq_init(smmu);
3581 	if (ret)
3582 		return ret;
3583 
3584 	/* evtq */
3585 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
3586 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
3587 				      EVTQ_ENT_DWORDS, "evtq");
3588 	if (ret)
3589 		return ret;
3590 
3591 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
3592 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
3593 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
3594 		if (!smmu->evtq.iopf)
3595 			return -ENOMEM;
3596 	}
3597 
3598 	/* priq */
3599 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3600 		return 0;
3601 
3602 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
3603 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
3604 				       PRIQ_ENT_DWORDS, "priq");
3605 }
3606 
3607 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3608 {
3609 	void *strtab;
3610 	u64 reg;
3611 	u32 size, l1size;
3612 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3613 
3614 	/* Calculate the L1 size, capped to the SIDSIZE. */
3615 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3616 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3617 	cfg->num_l1_ents = 1 << size;
3618 
3619 	size += STRTAB_SPLIT;
3620 	if (size < smmu->sid_bits)
3621 		dev_warn(smmu->dev,
3622 			 "2-level strtab only covers %u/%u bits of SID\n",
3623 			 size, smmu->sid_bits);
3624 
3625 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3626 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3627 				     GFP_KERNEL);
3628 	if (!strtab) {
3629 		dev_err(smmu->dev,
3630 			"failed to allocate l1 stream table (%u bytes)\n",
3631 			l1size);
3632 		return -ENOMEM;
3633 	}
3634 	cfg->strtab = strtab;
3635 
3636 	/* Configure strtab_base_cfg for 2 levels */
3637 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3638 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3639 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3640 	cfg->strtab_base_cfg = reg;
3641 
3642 	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
3643 				    sizeof(*cfg->l1_desc), GFP_KERNEL);
3644 	if (!cfg->l1_desc)
3645 		return -ENOMEM;
3646 
3647 	return 0;
3648 }
3649 
3650 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3651 {
3652 	void *strtab;
3653 	u64 reg;
3654 	u32 size;
3655 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3656 
3657 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3658 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3659 				     GFP_KERNEL);
3660 	if (!strtab) {
3661 		dev_err(smmu->dev,
3662 			"failed to allocate linear stream table (%u bytes)\n",
3663 			size);
3664 		return -ENOMEM;
3665 	}
3666 	cfg->strtab = strtab;
3667 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3668 
3669 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3670 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3671 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3672 	cfg->strtab_base_cfg = reg;
3673 
3674 	arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents);
3675 	return 0;
3676 }
3677 
3678 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3679 {
3680 	u64 reg;
3681 	int ret;
3682 
3683 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3684 		ret = arm_smmu_init_strtab_2lvl(smmu);
3685 	else
3686 		ret = arm_smmu_init_strtab_linear(smmu);
3687 
3688 	if (ret)
3689 		return ret;
3690 
3691 	/* Set the strtab base address */
3692 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3693 	reg |= STRTAB_BASE_RA;
3694 	smmu->strtab_cfg.strtab_base = reg;
3695 
3696 	ida_init(&smmu->vmid_map);
3697 
3698 	return 0;
3699 }
3700 
3701 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3702 {
3703 	int ret;
3704 
3705 	mutex_init(&smmu->streams_mutex);
3706 	smmu->streams = RB_ROOT;
3707 
3708 	ret = arm_smmu_init_queues(smmu);
3709 	if (ret)
3710 		return ret;
3711 
3712 	return arm_smmu_init_strtab(smmu);
3713 }
3714 
3715 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3716 				   unsigned int reg_off, unsigned int ack_off)
3717 {
3718 	u32 reg;
3719 
3720 	writel_relaxed(val, smmu->base + reg_off);
3721 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3722 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3723 }
3724 
3725 /* GBPA is "special" */
3726 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3727 {
3728 	int ret;
3729 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3730 
3731 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3732 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3733 	if (ret)
3734 		return ret;
3735 
3736 	reg &= ~clr;
3737 	reg |= set;
3738 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3739 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3740 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3741 
3742 	if (ret)
3743 		dev_err(smmu->dev, "GBPA not responding to update\n");
3744 	return ret;
3745 }
3746 
3747 static void arm_smmu_free_msis(void *data)
3748 {
3749 	struct device *dev = data;
3750 
3751 	platform_device_msi_free_irqs_all(dev);
3752 }
3753 
3754 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3755 {
3756 	phys_addr_t doorbell;
3757 	struct device *dev = msi_desc_to_dev(desc);
3758 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3759 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3760 
3761 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3762 	doorbell &= MSI_CFG0_ADDR_MASK;
3763 
3764 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3765 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3766 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3767 }
3768 
3769 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3770 {
3771 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3772 	struct device *dev = smmu->dev;
3773 
3774 	/* Clear the MSI address regs */
3775 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3776 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3777 
3778 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3779 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3780 	else
3781 		nvec--;
3782 
3783 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3784 		return;
3785 
3786 	if (!dev->msi.domain) {
3787 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3788 		return;
3789 	}
3790 
3791 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3792 	ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3793 	if (ret) {
3794 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3795 		return;
3796 	}
3797 
3798 	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3799 	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3800 	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3801 
3802 	/* Add callback to free MSIs on teardown */
3803 	devm_add_action_or_reset(dev, arm_smmu_free_msis, dev);
3804 }
3805 
3806 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3807 {
3808 	int irq, ret;
3809 
3810 	arm_smmu_setup_msis(smmu);
3811 
3812 	/* Request interrupt lines */
3813 	irq = smmu->evtq.q.irq;
3814 	if (irq) {
3815 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3816 						arm_smmu_evtq_thread,
3817 						IRQF_ONESHOT,
3818 						"arm-smmu-v3-evtq", smmu);
3819 		if (ret < 0)
3820 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3821 	} else {
3822 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3823 	}
3824 
3825 	irq = smmu->gerr_irq;
3826 	if (irq) {
3827 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3828 				       0, "arm-smmu-v3-gerror", smmu);
3829 		if (ret < 0)
3830 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3831 	} else {
3832 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3833 	}
3834 
3835 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3836 		irq = smmu->priq.q.irq;
3837 		if (irq) {
3838 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3839 							arm_smmu_priq_thread,
3840 							IRQF_ONESHOT,
3841 							"arm-smmu-v3-priq",
3842 							smmu);
3843 			if (ret < 0)
3844 				dev_warn(smmu->dev,
3845 					 "failed to enable priq irq\n");
3846 		} else {
3847 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3848 		}
3849 	}
3850 }
3851 
3852 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3853 {
3854 	int ret, irq;
3855 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3856 
3857 	/* Disable IRQs first */
3858 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3859 				      ARM_SMMU_IRQ_CTRLACK);
3860 	if (ret) {
3861 		dev_err(smmu->dev, "failed to disable irqs\n");
3862 		return ret;
3863 	}
3864 
3865 	irq = smmu->combined_irq;
3866 	if (irq) {
3867 		/*
3868 		 * Cavium ThunderX2 implementation doesn't support unique irq
3869 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3870 		 */
3871 		ret = devm_request_threaded_irq(smmu->dev, irq,
3872 					arm_smmu_combined_irq_handler,
3873 					arm_smmu_combined_irq_thread,
3874 					IRQF_ONESHOT,
3875 					"arm-smmu-v3-combined-irq", smmu);
3876 		if (ret < 0)
3877 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3878 	} else
3879 		arm_smmu_setup_unique_irqs(smmu);
3880 
3881 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3882 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3883 
3884 	/* Enable interrupt generation on the SMMU */
3885 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3886 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3887 	if (ret)
3888 		dev_warn(smmu->dev, "failed to enable irqs\n");
3889 
3890 	return 0;
3891 }
3892 
3893 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3894 {
3895 	int ret;
3896 
3897 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3898 	if (ret)
3899 		dev_err(smmu->dev, "failed to clear cr0\n");
3900 
3901 	return ret;
3902 }
3903 
3904 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
3905 {
3906 	int ret;
3907 	u32 reg, enables;
3908 	struct arm_smmu_cmdq_ent cmd;
3909 
3910 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3911 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3912 	if (reg & CR0_SMMUEN) {
3913 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3914 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3915 	}
3916 
3917 	ret = arm_smmu_device_disable(smmu);
3918 	if (ret)
3919 		return ret;
3920 
3921 	/* CR1 (table and queue memory attributes) */
3922 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3923 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3924 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3925 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3926 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3927 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3928 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3929 
3930 	/* CR2 (random crap) */
3931 	reg = CR2_PTM | CR2_RECINVSID;
3932 
3933 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3934 		reg |= CR2_E2H;
3935 
3936 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3937 
3938 	/* Stream table */
3939 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3940 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3941 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3942 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3943 
3944 	/* Command queue */
3945 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3946 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3947 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3948 
3949 	enables = CR0_CMDQEN;
3950 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3951 				      ARM_SMMU_CR0ACK);
3952 	if (ret) {
3953 		dev_err(smmu->dev, "failed to enable command queue\n");
3954 		return ret;
3955 	}
3956 
3957 	/* Invalidate any cached configuration */
3958 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3959 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3960 
3961 	/* Invalidate any stale TLB entries */
3962 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3963 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3964 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3965 	}
3966 
3967 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3968 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3969 
3970 	/* Event queue */
3971 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3972 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3973 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3974 
3975 	enables |= CR0_EVTQEN;
3976 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3977 				      ARM_SMMU_CR0ACK);
3978 	if (ret) {
3979 		dev_err(smmu->dev, "failed to enable event queue\n");
3980 		return ret;
3981 	}
3982 
3983 	/* PRI queue */
3984 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3985 		writeq_relaxed(smmu->priq.q.q_base,
3986 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3987 		writel_relaxed(smmu->priq.q.llq.prod,
3988 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3989 		writel_relaxed(smmu->priq.q.llq.cons,
3990 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3991 
3992 		enables |= CR0_PRIQEN;
3993 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3994 					      ARM_SMMU_CR0ACK);
3995 		if (ret) {
3996 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3997 			return ret;
3998 		}
3999 	}
4000 
4001 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
4002 		enables |= CR0_ATSCHK;
4003 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4004 					      ARM_SMMU_CR0ACK);
4005 		if (ret) {
4006 			dev_err(smmu->dev, "failed to enable ATS check\n");
4007 			return ret;
4008 		}
4009 	}
4010 
4011 	ret = arm_smmu_setup_irqs(smmu);
4012 	if (ret) {
4013 		dev_err(smmu->dev, "failed to setup irqs\n");
4014 		return ret;
4015 	}
4016 
4017 	if (is_kdump_kernel())
4018 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
4019 
4020 	/* Enable the SMMU interface */
4021 	enables |= CR0_SMMUEN;
4022 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4023 				      ARM_SMMU_CR0ACK);
4024 	if (ret) {
4025 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
4026 		return ret;
4027 	}
4028 
4029 	return 0;
4030 }
4031 
4032 #define IIDR_IMPLEMENTER_ARM		0x43b
4033 #define IIDR_PRODUCTID_ARM_MMU_600	0x483
4034 #define IIDR_PRODUCTID_ARM_MMU_700	0x487
4035 
4036 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
4037 {
4038 	u32 reg;
4039 	unsigned int implementer, productid, variant, revision;
4040 
4041 	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
4042 	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
4043 	productid = FIELD_GET(IIDR_PRODUCTID, reg);
4044 	variant = FIELD_GET(IIDR_VARIANT, reg);
4045 	revision = FIELD_GET(IIDR_REVISION, reg);
4046 
4047 	switch (implementer) {
4048 	case IIDR_IMPLEMENTER_ARM:
4049 		switch (productid) {
4050 		case IIDR_PRODUCTID_ARM_MMU_600:
4051 			/* Arm erratum 1076982 */
4052 			if (variant == 0 && revision <= 2)
4053 				smmu->features &= ~ARM_SMMU_FEAT_SEV;
4054 			/* Arm erratum 1209401 */
4055 			if (variant < 2)
4056 				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
4057 			break;
4058 		case IIDR_PRODUCTID_ARM_MMU_700:
4059 			/* Arm erratum 2812531 */
4060 			smmu->features &= ~ARM_SMMU_FEAT_BTM;
4061 			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
4062 			/* Arm errata 2268618, 2812531 */
4063 			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
4064 			break;
4065 		}
4066 		break;
4067 	}
4068 }
4069 
4070 static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg)
4071 {
4072 	u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD);
4073 	u32 hw_features = 0;
4074 
4075 	switch (FIELD_GET(IDR0_HTTU, reg)) {
4076 	case IDR0_HTTU_ACCESS_DIRTY:
4077 		hw_features |= ARM_SMMU_FEAT_HD;
4078 		fallthrough;
4079 	case IDR0_HTTU_ACCESS:
4080 		hw_features |= ARM_SMMU_FEAT_HA;
4081 	}
4082 
4083 	if (smmu->dev->of_node)
4084 		smmu->features |= hw_features;
4085 	else if (hw_features != fw_features)
4086 		/* ACPI IORT sets the HTTU bits */
4087 		dev_warn(smmu->dev,
4088 			 "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n",
4089 			  hw_features, fw_features);
4090 }
4091 
4092 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
4093 {
4094 	u32 reg;
4095 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
4096 
4097 	/* IDR0 */
4098 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
4099 
4100 	/* 2-level structures */
4101 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
4102 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
4103 
4104 	if (reg & IDR0_CD2L)
4105 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
4106 
4107 	/*
4108 	 * Translation table endianness.
4109 	 * We currently require the same endianness as the CPU, but this
4110 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
4111 	 */
4112 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
4113 	case IDR0_TTENDIAN_MIXED:
4114 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
4115 		break;
4116 #ifdef __BIG_ENDIAN
4117 	case IDR0_TTENDIAN_BE:
4118 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
4119 		break;
4120 #else
4121 	case IDR0_TTENDIAN_LE:
4122 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
4123 		break;
4124 #endif
4125 	default:
4126 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
4127 		return -ENXIO;
4128 	}
4129 
4130 	/* Boolean feature flags */
4131 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
4132 		smmu->features |= ARM_SMMU_FEAT_PRI;
4133 
4134 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
4135 		smmu->features |= ARM_SMMU_FEAT_ATS;
4136 
4137 	if (reg & IDR0_SEV)
4138 		smmu->features |= ARM_SMMU_FEAT_SEV;
4139 
4140 	if (reg & IDR0_MSI) {
4141 		smmu->features |= ARM_SMMU_FEAT_MSI;
4142 		if (coherent && !disable_msipolling)
4143 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
4144 	}
4145 
4146 	if (reg & IDR0_HYP) {
4147 		smmu->features |= ARM_SMMU_FEAT_HYP;
4148 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
4149 			smmu->features |= ARM_SMMU_FEAT_E2H;
4150 	}
4151 
4152 	arm_smmu_get_httu(smmu, reg);
4153 
4154 	/*
4155 	 * The coherency feature as set by FW is used in preference to the ID
4156 	 * register, but warn on mismatch.
4157 	 */
4158 	if (!!(reg & IDR0_COHACC) != coherent)
4159 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
4160 			 coherent ? "true" : "false");
4161 
4162 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
4163 	case IDR0_STALL_MODEL_FORCE:
4164 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
4165 		fallthrough;
4166 	case IDR0_STALL_MODEL_STALL:
4167 		smmu->features |= ARM_SMMU_FEAT_STALLS;
4168 	}
4169 
4170 	if (reg & IDR0_S1P)
4171 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
4172 
4173 	if (reg & IDR0_S2P)
4174 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
4175 
4176 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
4177 		dev_err(smmu->dev, "no translation support!\n");
4178 		return -ENXIO;
4179 	}
4180 
4181 	/* We only support the AArch64 table format at present */
4182 	switch (FIELD_GET(IDR0_TTF, reg)) {
4183 	case IDR0_TTF_AARCH32_64:
4184 		smmu->ias = 40;
4185 		fallthrough;
4186 	case IDR0_TTF_AARCH64:
4187 		break;
4188 	default:
4189 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
4190 		return -ENXIO;
4191 	}
4192 
4193 	/* ASID/VMID sizes */
4194 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
4195 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
4196 
4197 	/* IDR1 */
4198 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
4199 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
4200 		dev_err(smmu->dev, "embedded implementation not supported\n");
4201 		return -ENXIO;
4202 	}
4203 
4204 	if (reg & IDR1_ATTR_TYPES_OVR)
4205 		smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
4206 
4207 	/* Queue sizes, capped to ensure natural alignment */
4208 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
4209 					     FIELD_GET(IDR1_CMDQS, reg));
4210 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
4211 		/*
4212 		 * We don't support splitting up batches, so one batch of
4213 		 * commands plus an extra sync needs to fit inside the command
4214 		 * queue. There's also no way we can handle the weird alignment
4215 		 * restrictions on the base pointer for a unit-length queue.
4216 		 */
4217 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
4218 			CMDQ_BATCH_ENTRIES);
4219 		return -ENXIO;
4220 	}
4221 
4222 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
4223 					     FIELD_GET(IDR1_EVTQS, reg));
4224 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
4225 					     FIELD_GET(IDR1_PRIQS, reg));
4226 
4227 	/* SID/SSID sizes */
4228 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
4229 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
4230 	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
4231 
4232 	/*
4233 	 * If the SMMU supports fewer bits than would fill a single L2 stream
4234 	 * table, use a linear table instead.
4235 	 */
4236 	if (smmu->sid_bits <= STRTAB_SPLIT)
4237 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
4238 
4239 	/* IDR3 */
4240 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
4241 	if (FIELD_GET(IDR3_RIL, reg))
4242 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
4243 
4244 	/* IDR5 */
4245 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
4246 
4247 	/* Maximum number of outstanding stalls */
4248 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
4249 
4250 	/* Page sizes */
4251 	if (reg & IDR5_GRAN64K)
4252 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
4253 	if (reg & IDR5_GRAN16K)
4254 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
4255 	if (reg & IDR5_GRAN4K)
4256 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
4257 
4258 	/* Input address size */
4259 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
4260 		smmu->features |= ARM_SMMU_FEAT_VAX;
4261 
4262 	/* Output address size */
4263 	switch (FIELD_GET(IDR5_OAS, reg)) {
4264 	case IDR5_OAS_32_BIT:
4265 		smmu->oas = 32;
4266 		break;
4267 	case IDR5_OAS_36_BIT:
4268 		smmu->oas = 36;
4269 		break;
4270 	case IDR5_OAS_40_BIT:
4271 		smmu->oas = 40;
4272 		break;
4273 	case IDR5_OAS_42_BIT:
4274 		smmu->oas = 42;
4275 		break;
4276 	case IDR5_OAS_44_BIT:
4277 		smmu->oas = 44;
4278 		break;
4279 	case IDR5_OAS_52_BIT:
4280 		smmu->oas = 52;
4281 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
4282 		break;
4283 	default:
4284 		dev_info(smmu->dev,
4285 			"unknown output address size. Truncating to 48-bit\n");
4286 		fallthrough;
4287 	case IDR5_OAS_48_BIT:
4288 		smmu->oas = 48;
4289 	}
4290 
4291 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
4292 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
4293 	else
4294 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
4295 
4296 	/* Set the DMA mask for our table walker */
4297 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
4298 		dev_warn(smmu->dev,
4299 			 "failed to set DMA mask for table walker\n");
4300 
4301 	smmu->ias = max(smmu->ias, smmu->oas);
4302 
4303 	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
4304 	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
4305 		smmu->features |= ARM_SMMU_FEAT_NESTING;
4306 
4307 	arm_smmu_device_iidr_probe(smmu);
4308 
4309 	if (arm_smmu_sva_supported(smmu))
4310 		smmu->features |= ARM_SMMU_FEAT_SVA;
4311 
4312 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
4313 		 smmu->ias, smmu->oas, smmu->features);
4314 	return 0;
4315 }
4316 
4317 #ifdef CONFIG_ACPI
4318 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
4319 {
4320 	switch (model) {
4321 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
4322 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
4323 		break;
4324 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
4325 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
4326 		break;
4327 	}
4328 
4329 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
4330 }
4331 
4332 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
4333 				      struct arm_smmu_device *smmu)
4334 {
4335 	struct acpi_iort_smmu_v3 *iort_smmu;
4336 	struct device *dev = smmu->dev;
4337 	struct acpi_iort_node *node;
4338 
4339 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
4340 
4341 	/* Retrieve SMMUv3 specific data */
4342 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
4343 
4344 	acpi_smmu_get_options(iort_smmu->model, smmu);
4345 
4346 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
4347 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
4348 
4349 	switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) {
4350 	case IDR0_HTTU_ACCESS_DIRTY:
4351 		smmu->features |= ARM_SMMU_FEAT_HD;
4352 		fallthrough;
4353 	case IDR0_HTTU_ACCESS:
4354 		smmu->features |= ARM_SMMU_FEAT_HA;
4355 	}
4356 
4357 	return 0;
4358 }
4359 #else
4360 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
4361 					     struct arm_smmu_device *smmu)
4362 {
4363 	return -ENODEV;
4364 }
4365 #endif
4366 
4367 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
4368 				    struct arm_smmu_device *smmu)
4369 {
4370 	struct device *dev = &pdev->dev;
4371 	u32 cells;
4372 	int ret = -EINVAL;
4373 
4374 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
4375 		dev_err(dev, "missing #iommu-cells property\n");
4376 	else if (cells != 1)
4377 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
4378 	else
4379 		ret = 0;
4380 
4381 	parse_driver_options(smmu);
4382 
4383 	if (of_dma_is_coherent(dev->of_node))
4384 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
4385 
4386 	return ret;
4387 }
4388 
4389 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
4390 {
4391 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
4392 		return SZ_64K;
4393 	else
4394 		return SZ_128K;
4395 }
4396 
4397 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
4398 				      resource_size_t size)
4399 {
4400 	struct resource res = DEFINE_RES_MEM(start, size);
4401 
4402 	return devm_ioremap_resource(dev, &res);
4403 }
4404 
4405 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
4406 {
4407 	struct list_head rmr_list;
4408 	struct iommu_resv_region *e;
4409 
4410 	INIT_LIST_HEAD(&rmr_list);
4411 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4412 
4413 	list_for_each_entry(e, &rmr_list, list) {
4414 		struct iommu_iort_rmr_data *rmr;
4415 		int ret, i;
4416 
4417 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
4418 		for (i = 0; i < rmr->num_sids; i++) {
4419 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
4420 			if (ret) {
4421 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
4422 					rmr->sids[i]);
4423 				continue;
4424 			}
4425 
4426 			/*
4427 			 * STE table is not programmed to HW, see
4428 			 * arm_smmu_initial_bypass_stes()
4429 			 */
4430 			arm_smmu_make_bypass_ste(smmu,
4431 				arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
4432 		}
4433 	}
4434 
4435 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4436 }
4437 
4438 static int arm_smmu_device_probe(struct platform_device *pdev)
4439 {
4440 	int irq, ret;
4441 	struct resource *res;
4442 	resource_size_t ioaddr;
4443 	struct arm_smmu_device *smmu;
4444 	struct device *dev = &pdev->dev;
4445 
4446 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4447 	if (!smmu)
4448 		return -ENOMEM;
4449 	smmu->dev = dev;
4450 
4451 	if (dev->of_node) {
4452 		ret = arm_smmu_device_dt_probe(pdev, smmu);
4453 	} else {
4454 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
4455 	}
4456 	if (ret)
4457 		return ret;
4458 
4459 	/* Base address */
4460 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4461 	if (!res)
4462 		return -EINVAL;
4463 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4464 		dev_err(dev, "MMIO region too small (%pr)\n", res);
4465 		return -EINVAL;
4466 	}
4467 	ioaddr = res->start;
4468 
4469 	/*
4470 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4471 	 * the PMCG registers which are reserved by the PMU driver.
4472 	 */
4473 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4474 	if (IS_ERR(smmu->base))
4475 		return PTR_ERR(smmu->base);
4476 
4477 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
4478 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4479 					       ARM_SMMU_REG_SZ);
4480 		if (IS_ERR(smmu->page1))
4481 			return PTR_ERR(smmu->page1);
4482 	} else {
4483 		smmu->page1 = smmu->base;
4484 	}
4485 
4486 	/* Interrupt lines */
4487 
4488 	irq = platform_get_irq_byname_optional(pdev, "combined");
4489 	if (irq > 0)
4490 		smmu->combined_irq = irq;
4491 	else {
4492 		irq = platform_get_irq_byname_optional(pdev, "eventq");
4493 		if (irq > 0)
4494 			smmu->evtq.q.irq = irq;
4495 
4496 		irq = platform_get_irq_byname_optional(pdev, "priq");
4497 		if (irq > 0)
4498 			smmu->priq.q.irq = irq;
4499 
4500 		irq = platform_get_irq_byname_optional(pdev, "gerror");
4501 		if (irq > 0)
4502 			smmu->gerr_irq = irq;
4503 	}
4504 	/* Probe the h/w */
4505 	ret = arm_smmu_device_hw_probe(smmu);
4506 	if (ret)
4507 		return ret;
4508 
4509 	/* Initialise in-memory data structures */
4510 	ret = arm_smmu_init_structures(smmu);
4511 	if (ret)
4512 		return ret;
4513 
4514 	/* Record our private device structure */
4515 	platform_set_drvdata(pdev, smmu);
4516 
4517 	/* Check for RMRs and install bypass STEs if any */
4518 	arm_smmu_rmr_install_bypass_ste(smmu);
4519 
4520 	/* Reset the device */
4521 	ret = arm_smmu_device_reset(smmu);
4522 	if (ret)
4523 		return ret;
4524 
4525 	/* And we're up. Go go go! */
4526 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4527 				     "smmu3.%pa", &ioaddr);
4528 	if (ret)
4529 		return ret;
4530 
4531 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
4532 	if (ret) {
4533 		dev_err(dev, "Failed to register iommu\n");
4534 		iommu_device_sysfs_remove(&smmu->iommu);
4535 		return ret;
4536 	}
4537 
4538 	return 0;
4539 }
4540 
4541 static void arm_smmu_device_remove(struct platform_device *pdev)
4542 {
4543 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4544 
4545 	iommu_device_unregister(&smmu->iommu);
4546 	iommu_device_sysfs_remove(&smmu->iommu);
4547 	arm_smmu_device_disable(smmu);
4548 	iopf_queue_free(smmu->evtq.iopf);
4549 	ida_destroy(&smmu->vmid_map);
4550 }
4551 
4552 static void arm_smmu_device_shutdown(struct platform_device *pdev)
4553 {
4554 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4555 
4556 	arm_smmu_device_disable(smmu);
4557 }
4558 
4559 static const struct of_device_id arm_smmu_of_match[] = {
4560 	{ .compatible = "arm,smmu-v3", },
4561 	{ },
4562 };
4563 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4564 
4565 static void arm_smmu_driver_unregister(struct platform_driver *drv)
4566 {
4567 	arm_smmu_sva_notifier_synchronize();
4568 	platform_driver_unregister(drv);
4569 }
4570 
4571 static struct platform_driver arm_smmu_driver = {
4572 	.driver	= {
4573 		.name			= "arm-smmu-v3",
4574 		.of_match_table		= arm_smmu_of_match,
4575 		.suppress_bind_attrs	= true,
4576 	},
4577 	.probe	= arm_smmu_device_probe,
4578 	.remove_new = arm_smmu_device_remove,
4579 	.shutdown = arm_smmu_device_shutdown,
4580 };
4581 module_driver(arm_smmu_driver, platform_driver_register,
4582 	      arm_smmu_driver_unregister);
4583 
4584 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4585 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4586 MODULE_ALIAS("platform:arm-smmu-v3");
4587 MODULE_LICENSE("GPL v2");
4588