xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision b50ecc5aca4d18f1f0c4942f5c797bc85edef144)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 #include <kunit/visibility.h>
30 #include <uapi/linux/iommufd.h>
31 
32 #include "arm-smmu-v3.h"
33 #include "../../dma-iommu.h"
34 
35 static bool disable_msipolling;
36 module_param(disable_msipolling, bool, 0444);
37 MODULE_PARM_DESC(disable_msipolling,
38 	"Disable MSI-based polling for CMD_SYNC completion.");
39 
40 static struct iommu_ops arm_smmu_ops;
41 static struct iommu_dirty_ops arm_smmu_dirty_ops;
42 
43 enum arm_smmu_msi_index {
44 	EVTQ_MSI_INDEX,
45 	GERROR_MSI_INDEX,
46 	PRIQ_MSI_INDEX,
47 	ARM_SMMU_MAX_MSIS,
48 };
49 
50 #define NUM_ENTRY_QWORDS 8
51 static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64));
52 static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64));
53 
54 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
55 	[EVTQ_MSI_INDEX] = {
56 		ARM_SMMU_EVTQ_IRQ_CFG0,
57 		ARM_SMMU_EVTQ_IRQ_CFG1,
58 		ARM_SMMU_EVTQ_IRQ_CFG2,
59 	},
60 	[GERROR_MSI_INDEX] = {
61 		ARM_SMMU_GERROR_IRQ_CFG0,
62 		ARM_SMMU_GERROR_IRQ_CFG1,
63 		ARM_SMMU_GERROR_IRQ_CFG2,
64 	},
65 	[PRIQ_MSI_INDEX] = {
66 		ARM_SMMU_PRIQ_IRQ_CFG0,
67 		ARM_SMMU_PRIQ_IRQ_CFG1,
68 		ARM_SMMU_PRIQ_IRQ_CFG2,
69 	},
70 };
71 
72 struct arm_smmu_option_prop {
73 	u32 opt;
74 	const char *prop;
75 };
76 
77 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
78 DEFINE_MUTEX(arm_smmu_asid_lock);
79 
80 static struct arm_smmu_option_prop arm_smmu_options[] = {
81 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
82 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
83 	{ 0, NULL},
84 };
85 
86 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
87 				    struct arm_smmu_device *smmu, u32 flags);
88 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
89 
90 static void parse_driver_options(struct arm_smmu_device *smmu)
91 {
92 	int i = 0;
93 
94 	do {
95 		if (of_property_read_bool(smmu->dev->of_node,
96 						arm_smmu_options[i].prop)) {
97 			smmu->options |= arm_smmu_options[i].opt;
98 			dev_notice(smmu->dev, "option %s\n",
99 				arm_smmu_options[i].prop);
100 		}
101 	} while (arm_smmu_options[++i].opt);
102 }
103 
104 /* Low-level queue manipulation functions */
105 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
106 {
107 	u32 space, prod, cons;
108 
109 	prod = Q_IDX(q, q->prod);
110 	cons = Q_IDX(q, q->cons);
111 
112 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
113 		space = (1 << q->max_n_shift) - (prod - cons);
114 	else
115 		space = cons - prod;
116 
117 	return space >= n;
118 }
119 
120 static bool queue_full(struct arm_smmu_ll_queue *q)
121 {
122 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
123 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
124 }
125 
126 static bool queue_empty(struct arm_smmu_ll_queue *q)
127 {
128 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
129 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
130 }
131 
132 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
133 {
134 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
135 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
136 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
137 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
138 }
139 
140 static void queue_sync_cons_out(struct arm_smmu_queue *q)
141 {
142 	/*
143 	 * Ensure that all CPU accesses (reads and writes) to the queue
144 	 * are complete before we update the cons pointer.
145 	 */
146 	__iomb();
147 	writel_relaxed(q->llq.cons, q->cons_reg);
148 }
149 
150 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
151 {
152 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
153 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
154 }
155 
156 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
157 {
158 	struct arm_smmu_ll_queue *llq = &q->llq;
159 
160 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
161 		return;
162 
163 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
164 		      Q_IDX(llq, llq->cons);
165 	queue_sync_cons_out(q);
166 }
167 
168 static int queue_sync_prod_in(struct arm_smmu_queue *q)
169 {
170 	u32 prod;
171 	int ret = 0;
172 
173 	/*
174 	 * We can't use the _relaxed() variant here, as we must prevent
175 	 * speculative reads of the queue before we have determined that
176 	 * prod has indeed moved.
177 	 */
178 	prod = readl(q->prod_reg);
179 
180 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
181 		ret = -EOVERFLOW;
182 
183 	q->llq.prod = prod;
184 	return ret;
185 }
186 
187 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
188 {
189 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
190 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
191 }
192 
193 static void queue_poll_init(struct arm_smmu_device *smmu,
194 			    struct arm_smmu_queue_poll *qp)
195 {
196 	qp->delay = 1;
197 	qp->spin_cnt = 0;
198 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
199 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
200 }
201 
202 static int queue_poll(struct arm_smmu_queue_poll *qp)
203 {
204 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
205 		return -ETIMEDOUT;
206 
207 	if (qp->wfe) {
208 		wfe();
209 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
210 		cpu_relax();
211 	} else {
212 		udelay(qp->delay);
213 		qp->delay *= 2;
214 		qp->spin_cnt = 0;
215 	}
216 
217 	return 0;
218 }
219 
220 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
221 {
222 	int i;
223 
224 	for (i = 0; i < n_dwords; ++i)
225 		*dst++ = cpu_to_le64(*src++);
226 }
227 
228 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
229 {
230 	int i;
231 
232 	for (i = 0; i < n_dwords; ++i)
233 		*dst++ = le64_to_cpu(*src++);
234 }
235 
236 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
237 {
238 	if (queue_empty(&q->llq))
239 		return -EAGAIN;
240 
241 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
242 	queue_inc_cons(&q->llq);
243 	queue_sync_cons_out(q);
244 	return 0;
245 }
246 
247 /* High-level queue accessors */
248 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
249 {
250 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
251 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
252 
253 	switch (ent->opcode) {
254 	case CMDQ_OP_TLBI_EL2_ALL:
255 	case CMDQ_OP_TLBI_NSNH_ALL:
256 		break;
257 	case CMDQ_OP_PREFETCH_CFG:
258 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
259 		break;
260 	case CMDQ_OP_CFGI_CD:
261 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
262 		fallthrough;
263 	case CMDQ_OP_CFGI_STE:
264 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
265 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
266 		break;
267 	case CMDQ_OP_CFGI_CD_ALL:
268 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
269 		break;
270 	case CMDQ_OP_CFGI_ALL:
271 		/* Cover the entire SID range */
272 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
273 		break;
274 	case CMDQ_OP_TLBI_NH_VA:
275 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
276 		fallthrough;
277 	case CMDQ_OP_TLBI_EL2_VA:
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
279 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
280 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
282 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
283 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
284 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
285 		break;
286 	case CMDQ_OP_TLBI_S2_IPA:
287 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
289 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
290 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
291 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
292 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
293 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
294 		break;
295 	case CMDQ_OP_TLBI_NH_ASID:
296 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
297 		fallthrough;
298 	case CMDQ_OP_TLBI_NH_ALL:
299 	case CMDQ_OP_TLBI_S12_VMALL:
300 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
301 		break;
302 	case CMDQ_OP_TLBI_EL2_ASID:
303 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
304 		break;
305 	case CMDQ_OP_ATC_INV:
306 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
307 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
308 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
309 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
310 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
311 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
312 		break;
313 	case CMDQ_OP_PRI_RESP:
314 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
315 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
316 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
317 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
318 		switch (ent->pri.resp) {
319 		case PRI_RESP_DENY:
320 		case PRI_RESP_FAIL:
321 		case PRI_RESP_SUCC:
322 			break;
323 		default:
324 			return -EINVAL;
325 		}
326 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
327 		break;
328 	case CMDQ_OP_RESUME:
329 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
330 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
331 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
332 		break;
333 	case CMDQ_OP_CMD_SYNC:
334 		if (ent->sync.msiaddr) {
335 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
336 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
337 		} else {
338 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
339 		}
340 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
341 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
342 		break;
343 	default:
344 		return -ENOENT;
345 	}
346 
347 	return 0;
348 }
349 
350 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
351 					       struct arm_smmu_cmdq_ent *ent)
352 {
353 	struct arm_smmu_cmdq *cmdq = NULL;
354 
355 	if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq)
356 		cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent);
357 
358 	return cmdq ?: &smmu->cmdq;
359 }
360 
361 static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
362 					     struct arm_smmu_cmdq *cmdq)
363 {
364 	if (cmdq == &smmu->cmdq)
365 		return false;
366 
367 	return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV;
368 }
369 
370 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
371 					 struct arm_smmu_cmdq *cmdq, u32 prod)
372 {
373 	struct arm_smmu_queue *q = &cmdq->q;
374 	struct arm_smmu_cmdq_ent ent = {
375 		.opcode = CMDQ_OP_CMD_SYNC,
376 	};
377 
378 	/*
379 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
380 	 * payload, so the write will zero the entire command on that platform.
381 	 */
382 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
383 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
384 				   q->ent_dwords * 8;
385 	}
386 
387 	arm_smmu_cmdq_build_cmd(cmd, &ent);
388 	if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
389 		u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
390 }
391 
392 void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
393 			      struct arm_smmu_cmdq *cmdq)
394 {
395 	static const char * const cerror_str[] = {
396 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
397 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
398 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
399 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
400 	};
401 	struct arm_smmu_queue *q = &cmdq->q;
402 
403 	int i;
404 	u64 cmd[CMDQ_ENT_DWORDS];
405 	u32 cons = readl_relaxed(q->cons_reg);
406 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
407 	struct arm_smmu_cmdq_ent cmd_sync = {
408 		.opcode = CMDQ_OP_CMD_SYNC,
409 	};
410 
411 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
412 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
413 
414 	switch (idx) {
415 	case CMDQ_ERR_CERROR_ABT_IDX:
416 		dev_err(smmu->dev, "retrying command fetch\n");
417 		return;
418 	case CMDQ_ERR_CERROR_NONE_IDX:
419 		return;
420 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
421 		/*
422 		 * ATC Invalidation Completion timeout. CONS is still pointing
423 		 * at the CMD_SYNC. Attempt to complete other pending commands
424 		 * by repeating the CMD_SYNC, though we might well end up back
425 		 * here since the ATC invalidation may still be pending.
426 		 */
427 		return;
428 	case CMDQ_ERR_CERROR_ILL_IDX:
429 	default:
430 		break;
431 	}
432 
433 	/*
434 	 * We may have concurrent producers, so we need to be careful
435 	 * not to touch any of the shadow cmdq state.
436 	 */
437 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
438 	dev_err(smmu->dev, "skipping command in error state:\n");
439 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
440 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
441 
442 	/* Convert the erroneous command into a CMD_SYNC */
443 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
444 	if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
445 		u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
446 
447 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
448 }
449 
450 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
451 {
452 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq);
453 }
454 
455 /*
456  * Command queue locking.
457  * This is a form of bastardised rwlock with the following major changes:
458  *
459  * - The only LOCK routines are exclusive_trylock() and shared_lock().
460  *   Neither have barrier semantics, and instead provide only a control
461  *   dependency.
462  *
463  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
464  *   fails if the caller appears to be the last lock holder (yes, this is
465  *   racy). All successful UNLOCK routines have RELEASE semantics.
466  */
467 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
468 {
469 	int val;
470 
471 	/*
472 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
473 	 * lock counter. When held in exclusive state, the lock counter is set
474 	 * to INT_MIN so these increments won't hurt as the value will remain
475 	 * negative.
476 	 */
477 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
478 		return;
479 
480 	do {
481 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
482 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
483 }
484 
485 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
486 {
487 	(void)atomic_dec_return_release(&cmdq->lock);
488 }
489 
490 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
491 {
492 	if (atomic_read(&cmdq->lock) == 1)
493 		return false;
494 
495 	arm_smmu_cmdq_shared_unlock(cmdq);
496 	return true;
497 }
498 
499 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
500 ({									\
501 	bool __ret;							\
502 	local_irq_save(flags);						\
503 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
504 	if (!__ret)							\
505 		local_irq_restore(flags);				\
506 	__ret;								\
507 })
508 
509 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
510 ({									\
511 	atomic_set_release(&cmdq->lock, 0);				\
512 	local_irq_restore(flags);					\
513 })
514 
515 
516 /*
517  * Command queue insertion.
518  * This is made fiddly by our attempts to achieve some sort of scalability
519  * since there is one queue shared amongst all of the CPUs in the system.  If
520  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
521  * then you'll *love* this monstrosity.
522  *
523  * The basic idea is to split the queue up into ranges of commands that are
524  * owned by a given CPU; the owner may not have written all of the commands
525  * itself, but is responsible for advancing the hardware prod pointer when
526  * the time comes. The algorithm is roughly:
527  *
528  * 	1. Allocate some space in the queue. At this point we also discover
529  *	   whether the head of the queue is currently owned by another CPU,
530  *	   or whether we are the owner.
531  *
532  *	2. Write our commands into our allocated slots in the queue.
533  *
534  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
535  *
536  *	4. If we are an owner:
537  *		a. Wait for the previous owner to finish.
538  *		b. Mark the queue head as unowned, which tells us the range
539  *		   that we are responsible for publishing.
540  *		c. Wait for all commands in our owned range to become valid.
541  *		d. Advance the hardware prod pointer.
542  *		e. Tell the next owner we've finished.
543  *
544  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
545  *	   owner), then we need to stick around until it has completed:
546  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
547  *		   to clear the first 4 bytes.
548  *		b. Otherwise, we spin waiting for the hardware cons pointer to
549  *		   advance past our command.
550  *
551  * The devil is in the details, particularly the use of locking for handling
552  * SYNC completion and freeing up space in the queue before we think that it is
553  * full.
554  */
555 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
556 					       u32 sprod, u32 eprod, bool set)
557 {
558 	u32 swidx, sbidx, ewidx, ebidx;
559 	struct arm_smmu_ll_queue llq = {
560 		.max_n_shift	= cmdq->q.llq.max_n_shift,
561 		.prod		= sprod,
562 	};
563 
564 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
565 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
566 
567 	while (llq.prod != eprod) {
568 		unsigned long mask;
569 		atomic_long_t *ptr;
570 		u32 limit = BITS_PER_LONG;
571 
572 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
573 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
574 
575 		ptr = &cmdq->valid_map[swidx];
576 
577 		if ((swidx == ewidx) && (sbidx < ebidx))
578 			limit = ebidx;
579 
580 		mask = GENMASK(limit - 1, sbidx);
581 
582 		/*
583 		 * The valid bit is the inverse of the wrap bit. This means
584 		 * that a zero-initialised queue is invalid and, after marking
585 		 * all entries as valid, they become invalid again when we
586 		 * wrap.
587 		 */
588 		if (set) {
589 			atomic_long_xor(mask, ptr);
590 		} else { /* Poll */
591 			unsigned long valid;
592 
593 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
594 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
595 		}
596 
597 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
598 	}
599 }
600 
601 /* Mark all entries in the range [sprod, eprod) as valid */
602 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
603 					u32 sprod, u32 eprod)
604 {
605 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
606 }
607 
608 /* Wait for all entries in the range [sprod, eprod) to become valid */
609 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
610 					 u32 sprod, u32 eprod)
611 {
612 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
613 }
614 
615 /* Wait for the command queue to become non-full */
616 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
617 					     struct arm_smmu_cmdq *cmdq,
618 					     struct arm_smmu_ll_queue *llq)
619 {
620 	unsigned long flags;
621 	struct arm_smmu_queue_poll qp;
622 	int ret = 0;
623 
624 	/*
625 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
626 	 * that fails, spin until somebody else updates it for us.
627 	 */
628 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
629 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
630 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
631 		llq->val = READ_ONCE(cmdq->q.llq.val);
632 		return 0;
633 	}
634 
635 	queue_poll_init(smmu, &qp);
636 	do {
637 		llq->val = READ_ONCE(cmdq->q.llq.val);
638 		if (!queue_full(llq))
639 			break;
640 
641 		ret = queue_poll(&qp);
642 	} while (!ret);
643 
644 	return ret;
645 }
646 
647 /*
648  * Wait until the SMMU signals a CMD_SYNC completion MSI.
649  * Must be called with the cmdq lock held in some capacity.
650  */
651 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
652 					  struct arm_smmu_cmdq *cmdq,
653 					  struct arm_smmu_ll_queue *llq)
654 {
655 	int ret = 0;
656 	struct arm_smmu_queue_poll qp;
657 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
658 
659 	queue_poll_init(smmu, &qp);
660 
661 	/*
662 	 * The MSI won't generate an event, since it's being written back
663 	 * into the command queue.
664 	 */
665 	qp.wfe = false;
666 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
667 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
668 	return ret;
669 }
670 
671 /*
672  * Wait until the SMMU cons index passes llq->prod.
673  * Must be called with the cmdq lock held in some capacity.
674  */
675 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
676 					       struct arm_smmu_cmdq *cmdq,
677 					       struct arm_smmu_ll_queue *llq)
678 {
679 	struct arm_smmu_queue_poll qp;
680 	u32 prod = llq->prod;
681 	int ret = 0;
682 
683 	queue_poll_init(smmu, &qp);
684 	llq->val = READ_ONCE(cmdq->q.llq.val);
685 	do {
686 		if (queue_consumed(llq, prod))
687 			break;
688 
689 		ret = queue_poll(&qp);
690 
691 		/*
692 		 * This needs to be a readl() so that our subsequent call
693 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
694 		 *
695 		 * Specifically, we need to ensure that we observe all
696 		 * shared_lock()s by other CMD_SYNCs that share our owner,
697 		 * so that a failing call to tryunlock() means that we're
698 		 * the last one out and therefore we can safely advance
699 		 * cmdq->q.llq.cons. Roughly speaking:
700 		 *
701 		 * CPU 0		CPU1			CPU2 (us)
702 		 *
703 		 * if (sync)
704 		 * 	shared_lock();
705 		 *
706 		 * dma_wmb();
707 		 * set_valid_map();
708 		 *
709 		 * 			if (owner) {
710 		 *				poll_valid_map();
711 		 *				<control dependency>
712 		 *				writel(prod_reg);
713 		 *
714 		 *						readl(cons_reg);
715 		 *						tryunlock();
716 		 *
717 		 * Requires us to see CPU 0's shared_lock() acquisition.
718 		 */
719 		llq->cons = readl(cmdq->q.cons_reg);
720 	} while (!ret);
721 
722 	return ret;
723 }
724 
725 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
726 					 struct arm_smmu_cmdq *cmdq,
727 					 struct arm_smmu_ll_queue *llq)
728 {
729 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
730 	    !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
731 		return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
732 
733 	return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
734 }
735 
736 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
737 					u32 prod, int n)
738 {
739 	int i;
740 	struct arm_smmu_ll_queue llq = {
741 		.max_n_shift	= cmdq->q.llq.max_n_shift,
742 		.prod		= prod,
743 	};
744 
745 	for (i = 0; i < n; ++i) {
746 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
747 
748 		prod = queue_inc_prod_n(&llq, i);
749 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
750 	}
751 }
752 
753 /*
754  * This is the actual insertion function, and provides the following
755  * ordering guarantees to callers:
756  *
757  * - There is a dma_wmb() before publishing any commands to the queue.
758  *   This can be relied upon to order prior writes to data structures
759  *   in memory (such as a CD or an STE) before the command.
760  *
761  * - On completion of a CMD_SYNC, there is a control dependency.
762  *   This can be relied upon to order subsequent writes to memory (e.g.
763  *   freeing an IOVA) after completion of the CMD_SYNC.
764  *
765  * - Command insertion is totally ordered, so if two CPUs each race to
766  *   insert their own list of commands then all of the commands from one
767  *   CPU will appear before any of the commands from the other CPU.
768  */
769 int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
770 				struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
771 				bool sync)
772 {
773 	u64 cmd_sync[CMDQ_ENT_DWORDS];
774 	u32 prod;
775 	unsigned long flags;
776 	bool owner;
777 	struct arm_smmu_ll_queue llq, head;
778 	int ret = 0;
779 
780 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
781 
782 	/* 1. Allocate some space in the queue */
783 	local_irq_save(flags);
784 	llq.val = READ_ONCE(cmdq->q.llq.val);
785 	do {
786 		u64 old;
787 
788 		while (!queue_has_space(&llq, n + sync)) {
789 			local_irq_restore(flags);
790 			if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
791 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
792 			local_irq_save(flags);
793 		}
794 
795 		head.cons = llq.cons;
796 		head.prod = queue_inc_prod_n(&llq, n + sync) |
797 					     CMDQ_PROD_OWNED_FLAG;
798 
799 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
800 		if (old == llq.val)
801 			break;
802 
803 		llq.val = old;
804 	} while (1);
805 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
806 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
807 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
808 
809 	/*
810 	 * 2. Write our commands into the queue
811 	 * Dependency ordering from the cmpxchg() loop above.
812 	 */
813 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
814 	if (sync) {
815 		prod = queue_inc_prod_n(&llq, n);
816 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
817 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
818 
819 		/*
820 		 * In order to determine completion of our CMD_SYNC, we must
821 		 * ensure that the queue can't wrap twice without us noticing.
822 		 * We achieve that by taking the cmdq lock as shared before
823 		 * marking our slot as valid.
824 		 */
825 		arm_smmu_cmdq_shared_lock(cmdq);
826 	}
827 
828 	/* 3. Mark our slots as valid, ensuring commands are visible first */
829 	dma_wmb();
830 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
831 
832 	/* 4. If we are the owner, take control of the SMMU hardware */
833 	if (owner) {
834 		/* a. Wait for previous owner to finish */
835 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
836 
837 		/* b. Stop gathering work by clearing the owned flag */
838 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
839 						   &cmdq->q.llq.atomic.prod);
840 		prod &= ~CMDQ_PROD_OWNED_FLAG;
841 
842 		/*
843 		 * c. Wait for any gathered work to be written to the queue.
844 		 * Note that we read our own entries so that we have the control
845 		 * dependency required by (d).
846 		 */
847 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
848 
849 		/*
850 		 * d. Advance the hardware prod pointer
851 		 * Control dependency ordering from the entries becoming valid.
852 		 */
853 		writel_relaxed(prod, cmdq->q.prod_reg);
854 
855 		/*
856 		 * e. Tell the next owner we're done
857 		 * Make sure we've updated the hardware first, so that we don't
858 		 * race to update prod and potentially move it backwards.
859 		 */
860 		atomic_set_release(&cmdq->owner_prod, prod);
861 	}
862 
863 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
864 	if (sync) {
865 		llq.prod = queue_inc_prod_n(&llq, n);
866 		ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
867 		if (ret) {
868 			dev_err_ratelimited(smmu->dev,
869 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
870 					    llq.prod,
871 					    readl_relaxed(cmdq->q.prod_reg),
872 					    readl_relaxed(cmdq->q.cons_reg));
873 		}
874 
875 		/*
876 		 * Try to unlock the cmdq lock. This will fail if we're the last
877 		 * reader, in which case we can safely update cmdq->q.llq.cons
878 		 */
879 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
880 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
881 			arm_smmu_cmdq_shared_unlock(cmdq);
882 		}
883 	}
884 
885 	local_irq_restore(flags);
886 	return ret;
887 }
888 
889 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
890 				     struct arm_smmu_cmdq_ent *ent,
891 				     bool sync)
892 {
893 	u64 cmd[CMDQ_ENT_DWORDS];
894 
895 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
896 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
897 			 ent->opcode);
898 		return -EINVAL;
899 	}
900 
901 	return arm_smmu_cmdq_issue_cmdlist(
902 		smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync);
903 }
904 
905 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
906 				   struct arm_smmu_cmdq_ent *ent)
907 {
908 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
909 }
910 
911 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
912 					     struct arm_smmu_cmdq_ent *ent)
913 {
914 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
915 }
916 
917 static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
918 				     struct arm_smmu_cmdq_batch *cmds,
919 				     struct arm_smmu_cmdq_ent *ent)
920 {
921 	cmds->num = 0;
922 	cmds->cmdq = arm_smmu_get_cmdq(smmu, ent);
923 }
924 
925 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
926 				    struct arm_smmu_cmdq_batch *cmds,
927 				    struct arm_smmu_cmdq_ent *cmd)
928 {
929 	bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
930 	bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
931 			  (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
932 	int index;
933 
934 	if (force_sync || unsupported_cmd) {
935 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
936 					    cmds->num, true);
937 		arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
938 	}
939 
940 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
941 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
942 					    cmds->num, false);
943 		arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
944 	}
945 
946 	index = cmds->num * CMDQ_ENT_DWORDS;
947 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
948 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
949 			 cmd->opcode);
950 		return;
951 	}
952 
953 	cmds->num++;
954 }
955 
956 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
957 				      struct arm_smmu_cmdq_batch *cmds)
958 {
959 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
960 					   cmds->num, true);
961 }
962 
963 static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
964 				   struct iommu_page_response *resp)
965 {
966 	struct arm_smmu_cmdq_ent cmd = {0};
967 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
968 	int sid = master->streams[0].id;
969 
970 	if (WARN_ON(!master->stall_enabled))
971 		return;
972 
973 	cmd.opcode		= CMDQ_OP_RESUME;
974 	cmd.resume.sid		= sid;
975 	cmd.resume.stag		= resp->grpid;
976 	switch (resp->code) {
977 	case IOMMU_PAGE_RESP_INVALID:
978 	case IOMMU_PAGE_RESP_FAILURE:
979 		cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
980 		break;
981 	case IOMMU_PAGE_RESP_SUCCESS:
982 		cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
983 		break;
984 	default:
985 		break;
986 	}
987 
988 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
989 	/*
990 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
991 	 * RESUME consumption guarantees that the stalled transaction will be
992 	 * terminated... at some point in the future. PRI_RESP is fire and
993 	 * forget.
994 	 */
995 }
996 
997 /* Context descriptor manipulation functions */
998 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
999 {
1000 	struct arm_smmu_cmdq_ent cmd = {
1001 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
1002 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
1003 		.tlbi.asid = asid,
1004 	};
1005 
1006 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1007 }
1008 
1009 /*
1010  * Based on the value of ent report which bits of the STE the HW will access. It
1011  * would be nice if this was complete according to the spec, but minimally it
1012  * has to capture the bits this driver uses.
1013  */
1014 VISIBLE_IF_KUNIT
1015 void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
1016 {
1017 	unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
1018 
1019 	used_bits[0] = cpu_to_le64(STRTAB_STE_0_V);
1020 	if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V)))
1021 		return;
1022 
1023 	used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
1024 
1025 	/* S1 translates */
1026 	if (cfg & BIT(0)) {
1027 		used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
1028 					    STRTAB_STE_0_S1CTXPTR_MASK |
1029 					    STRTAB_STE_0_S1CDMAX);
1030 		used_bits[1] |=
1031 			cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
1032 				    STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
1033 				    STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
1034 				    STRTAB_STE_1_EATS);
1035 		used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
1036 
1037 		/*
1038 		 * See 13.5 Summary of attribute/permission configuration fields
1039 		 * for the SHCFG behavior.
1040 		 */
1041 		if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
1042 		    STRTAB_STE_1_S1DSS_BYPASS)
1043 			used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1044 	}
1045 
1046 	/* S2 translates */
1047 	if (cfg & BIT(1)) {
1048 		used_bits[1] |=
1049 			cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS |
1050 				    STRTAB_STE_1_SHCFG);
1051 		used_bits[2] |=
1052 			cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
1053 				    STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
1054 				    STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S |
1055 				    STRTAB_STE_2_S2R);
1056 		used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
1057 	}
1058 
1059 	if (cfg == STRTAB_STE_0_CFG_BYPASS)
1060 		used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1061 }
1062 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
1063 
1064 /*
1065  * Figure out if we can do a hitless update of entry to become target. Returns a
1066  * bit mask where 1 indicates that qword needs to be set disruptively.
1067  * unused_update is an intermediate value of entry that has unused bits set to
1068  * their new values.
1069  */
1070 static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
1071 				    const __le64 *entry, const __le64 *target,
1072 				    __le64 *unused_update)
1073 {
1074 	__le64 target_used[NUM_ENTRY_QWORDS] = {};
1075 	__le64 cur_used[NUM_ENTRY_QWORDS] = {};
1076 	u8 used_qword_diff = 0;
1077 	unsigned int i;
1078 
1079 	writer->ops->get_used(entry, cur_used);
1080 	writer->ops->get_used(target, target_used);
1081 
1082 	for (i = 0; i != NUM_ENTRY_QWORDS; i++) {
1083 		/*
1084 		 * Check that masks are up to date, the make functions are not
1085 		 * allowed to set a bit to 1 if the used function doesn't say it
1086 		 * is used.
1087 		 */
1088 		WARN_ON_ONCE(target[i] & ~target_used[i]);
1089 
1090 		/* Bits can change because they are not currently being used */
1091 		unused_update[i] = (entry[i] & cur_used[i]) |
1092 				   (target[i] & ~cur_used[i]);
1093 		/*
1094 		 * Each bit indicates that a used bit in a qword needs to be
1095 		 * changed after unused_update is applied.
1096 		 */
1097 		if ((unused_update[i] & target_used[i]) != target[i])
1098 			used_qword_diff |= 1 << i;
1099 	}
1100 	return used_qword_diff;
1101 }
1102 
1103 static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
1104 		      const __le64 *target, unsigned int start,
1105 		      unsigned int len)
1106 {
1107 	bool changed = false;
1108 	unsigned int i;
1109 
1110 	for (i = start; len != 0; len--, i++) {
1111 		if (entry[i] != target[i]) {
1112 			WRITE_ONCE(entry[i], target[i]);
1113 			changed = true;
1114 		}
1115 	}
1116 
1117 	if (changed)
1118 		writer->ops->sync(writer);
1119 	return changed;
1120 }
1121 
1122 /*
1123  * Update the STE/CD to the target configuration. The transition from the
1124  * current entry to the target entry takes place over multiple steps that
1125  * attempts to make the transition hitless if possible. This function takes care
1126  * not to create a situation where the HW can perceive a corrupted entry. HW is
1127  * only required to have a 64 bit atomicity with stores from the CPU, while
1128  * entries are many 64 bit values big.
1129  *
1130  * The difference between the current value and the target value is analyzed to
1131  * determine which of three updates are required - disruptive, hitless or no
1132  * change.
1133  *
1134  * In the most general disruptive case we can make any update in three steps:
1135  *  - Disrupting the entry (V=0)
1136  *  - Fill now unused qwords, execpt qword 0 which contains V
1137  *  - Make qword 0 have the final value and valid (V=1) with a single 64
1138  *    bit store
1139  *
1140  * However this disrupts the HW while it is happening. There are several
1141  * interesting cases where a STE/CD can be updated without disturbing the HW
1142  * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
1143  * because the used bits don't intersect. We can detect this by calculating how
1144  * many 64 bit values need update after adjusting the unused bits and skip the
1145  * V=0 process. This relies on the IGNORED behavior described in the
1146  * specification.
1147  */
1148 VISIBLE_IF_KUNIT
1149 void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
1150 			  const __le64 *target)
1151 {
1152 	__le64 unused_update[NUM_ENTRY_QWORDS];
1153 	u8 used_qword_diff;
1154 
1155 	used_qword_diff =
1156 		arm_smmu_entry_qword_diff(writer, entry, target, unused_update);
1157 	if (hweight8(used_qword_diff) == 1) {
1158 		/*
1159 		 * Only one qword needs its used bits to be changed. This is a
1160 		 * hitless update, update all bits the current STE/CD is
1161 		 * ignoring to their new values, then update a single "critical
1162 		 * qword" to change the STE/CD and finally 0 out any bits that
1163 		 * are now unused in the target configuration.
1164 		 */
1165 		unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
1166 
1167 		/*
1168 		 * Skip writing unused bits in the critical qword since we'll be
1169 		 * writing it in the next step anyways. This can save a sync
1170 		 * when the only change is in that qword.
1171 		 */
1172 		unused_update[critical_qword_index] =
1173 			entry[critical_qword_index];
1174 		entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS);
1175 		entry_set(writer, entry, target, critical_qword_index, 1);
1176 		entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
1177 	} else if (used_qword_diff) {
1178 		/*
1179 		 * At least two qwords need their inuse bits to be changed. This
1180 		 * requires a breaking update, zero the V bit, write all qwords
1181 		 * but 0, then set qword 0
1182 		 */
1183 		unused_update[0] = 0;
1184 		entry_set(writer, entry, unused_update, 0, 1);
1185 		entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
1186 		entry_set(writer, entry, target, 0, 1);
1187 	} else {
1188 		/*
1189 		 * No inuse bit changed. Sanity check that all unused bits are 0
1190 		 * in the entry. The target was already sanity checked by
1191 		 * compute_qword_diff().
1192 		 */
1193 		WARN_ON_ONCE(
1194 			entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
1195 	}
1196 }
1197 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
1198 
1199 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
1200 			     int ssid, bool leaf)
1201 {
1202 	size_t i;
1203 	struct arm_smmu_cmdq_batch cmds;
1204 	struct arm_smmu_device *smmu = master->smmu;
1205 	struct arm_smmu_cmdq_ent cmd = {
1206 		.opcode	= CMDQ_OP_CFGI_CD,
1207 		.cfgi	= {
1208 			.ssid	= ssid,
1209 			.leaf	= leaf,
1210 		},
1211 	};
1212 
1213 	arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
1214 	for (i = 0; i < master->num_streams; i++) {
1215 		cmd.cfgi.sid = master->streams[i].id;
1216 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1217 	}
1218 
1219 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1220 }
1221 
1222 static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst,
1223 				      dma_addr_t l2ptr_dma)
1224 {
1225 	u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V;
1226 
1227 	/* The HW has 64 bit atomicity with stores to the L2 CD table */
1228 	WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
1229 }
1230 
1231 static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src)
1232 {
1233 	return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK;
1234 }
1235 
1236 struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
1237 					u32 ssid)
1238 {
1239 	struct arm_smmu_cdtab_l2 *l2;
1240 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1241 
1242 	if (!arm_smmu_cdtab_allocated(cd_table))
1243 		return NULL;
1244 
1245 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1246 		return &cd_table->linear.table[ssid];
1247 
1248 	l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)];
1249 	if (!l2)
1250 		return NULL;
1251 	return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)];
1252 }
1253 
1254 static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
1255 						 u32 ssid)
1256 {
1257 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1258 	struct arm_smmu_device *smmu = master->smmu;
1259 
1260 	might_sleep();
1261 	iommu_group_mutex_assert(master->dev);
1262 
1263 	if (!arm_smmu_cdtab_allocated(cd_table)) {
1264 		if (arm_smmu_alloc_cd_tables(master))
1265 			return NULL;
1266 	}
1267 
1268 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
1269 		unsigned int idx = arm_smmu_cdtab_l1_idx(ssid);
1270 		struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx];
1271 
1272 		if (!*l2ptr) {
1273 			dma_addr_t l2ptr_dma;
1274 
1275 			*l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr),
1276 						    &l2ptr_dma, GFP_KERNEL);
1277 			if (!*l2ptr)
1278 				return NULL;
1279 
1280 			arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx],
1281 						  l2ptr_dma);
1282 			/* An invalid L1CD can be cached */
1283 			arm_smmu_sync_cd(master, ssid, false);
1284 		}
1285 	}
1286 	return arm_smmu_get_cd_ptr(master, ssid);
1287 }
1288 
1289 struct arm_smmu_cd_writer {
1290 	struct arm_smmu_entry_writer writer;
1291 	unsigned int ssid;
1292 };
1293 
1294 VISIBLE_IF_KUNIT
1295 void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
1296 {
1297 	used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
1298 	if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V)))
1299 		return;
1300 	memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));
1301 
1302 	/*
1303 	 * If EPD0 is set by the make function it means
1304 	 * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
1305 	 */
1306 	if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
1307 		used_bits[0] &= ~cpu_to_le64(
1308 			CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
1309 			CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
1310 			CTXDESC_CD_0_TCR_SH0);
1311 		used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
1312 	}
1313 }
1314 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used);
1315 
1316 static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1317 {
1318 	struct arm_smmu_cd_writer *cd_writer =
1319 		container_of(writer, struct arm_smmu_cd_writer, writer);
1320 
1321 	arm_smmu_sync_cd(writer->master, cd_writer->ssid, true);
1322 }
1323 
1324 static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
1325 	.sync = arm_smmu_cd_writer_sync_entry,
1326 	.get_used = arm_smmu_get_cd_used,
1327 };
1328 
1329 void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
1330 			     struct arm_smmu_cd *cdptr,
1331 			     const struct arm_smmu_cd *target)
1332 {
1333 	bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
1334 	bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
1335 	struct arm_smmu_cd_writer cd_writer = {
1336 		.writer = {
1337 			.ops = &arm_smmu_cd_writer_ops,
1338 			.master = master,
1339 		},
1340 		.ssid = ssid,
1341 	};
1342 
1343 	if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) {
1344 		if (cur_valid)
1345 			master->cd_table.used_ssids--;
1346 		else
1347 			master->cd_table.used_ssids++;
1348 	}
1349 
1350 	arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
1351 }
1352 
1353 void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
1354 			 struct arm_smmu_master *master,
1355 			 struct arm_smmu_domain *smmu_domain)
1356 {
1357 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
1358 	const struct io_pgtable_cfg *pgtbl_cfg =
1359 		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1360 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr =
1361 		&pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1362 
1363 	memset(target, 0, sizeof(*target));
1364 
1365 	target->data[0] = cpu_to_le64(
1366 		FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1367 		FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1368 		FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1369 		FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1370 		FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1371 #ifdef __BIG_ENDIAN
1372 		CTXDESC_CD_0_ENDI |
1373 #endif
1374 		CTXDESC_CD_0_TCR_EPD1 |
1375 		CTXDESC_CD_0_V |
1376 		FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1377 		CTXDESC_CD_0_AA64 |
1378 		(master->stall_enabled ? CTXDESC_CD_0_S : 0) |
1379 		CTXDESC_CD_0_R |
1380 		CTXDESC_CD_0_A |
1381 		CTXDESC_CD_0_ASET |
1382 		FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
1383 		);
1384 
1385 	/* To enable dirty flag update, set both Access flag and dirty state update */
1386 	if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
1387 		target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA |
1388 					       CTXDESC_CD_0_TCR_HD);
1389 
1390 	target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr &
1391 				      CTXDESC_CD_1_TTB0_MASK);
1392 	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair);
1393 }
1394 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd);
1395 
1396 void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
1397 {
1398 	struct arm_smmu_cd target = {};
1399 	struct arm_smmu_cd *cdptr;
1400 
1401 	if (!arm_smmu_cdtab_allocated(&master->cd_table))
1402 		return;
1403 	cdptr = arm_smmu_get_cd_ptr(master, ssid);
1404 	if (WARN_ON(!cdptr))
1405 		return;
1406 	arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
1407 }
1408 
1409 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1410 {
1411 	int ret;
1412 	size_t l1size;
1413 	size_t max_contexts;
1414 	struct arm_smmu_device *smmu = master->smmu;
1415 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1416 
1417 	cd_table->s1cdmax = master->ssid_bits;
1418 	max_contexts = 1 << cd_table->s1cdmax;
1419 
1420 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1421 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1422 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1423 		cd_table->linear.num_ents = max_contexts;
1424 
1425 		l1size = max_contexts * sizeof(struct arm_smmu_cd);
1426 		cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size,
1427 							    &cd_table->cdtab_dma,
1428 							    GFP_KERNEL);
1429 		if (!cd_table->linear.table)
1430 			return -ENOMEM;
1431 	} else {
1432 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1433 		cd_table->l2.num_l1_ents =
1434 			DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES);
1435 
1436 		cd_table->l2.l2ptrs = kcalloc(cd_table->l2.num_l1_ents,
1437 					     sizeof(*cd_table->l2.l2ptrs),
1438 					     GFP_KERNEL);
1439 		if (!cd_table->l2.l2ptrs)
1440 			return -ENOMEM;
1441 
1442 		l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1);
1443 		cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size,
1444 							&cd_table->cdtab_dma,
1445 							GFP_KERNEL);
1446 		if (!cd_table->l2.l2ptrs) {
1447 			ret = -ENOMEM;
1448 			goto err_free_l2ptrs;
1449 		}
1450 	}
1451 	return 0;
1452 
1453 err_free_l2ptrs:
1454 	kfree(cd_table->l2.l2ptrs);
1455 	cd_table->l2.l2ptrs = NULL;
1456 	return ret;
1457 }
1458 
1459 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1460 {
1461 	int i;
1462 	struct arm_smmu_device *smmu = master->smmu;
1463 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1464 
1465 	if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) {
1466 		for (i = 0; i < cd_table->l2.num_l1_ents; i++) {
1467 			if (!cd_table->l2.l2ptrs[i])
1468 				continue;
1469 
1470 			dma_free_coherent(smmu->dev,
1471 					  sizeof(*cd_table->l2.l2ptrs[i]),
1472 					  cd_table->l2.l2ptrs[i],
1473 					  arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i]));
1474 		}
1475 		kfree(cd_table->l2.l2ptrs);
1476 
1477 		dma_free_coherent(smmu->dev,
1478 				  cd_table->l2.num_l1_ents *
1479 					  sizeof(struct arm_smmu_cdtab_l1),
1480 				  cd_table->l2.l1tab, cd_table->cdtab_dma);
1481 	} else {
1482 		dma_free_coherent(smmu->dev,
1483 				  cd_table->linear.num_ents *
1484 					  sizeof(struct arm_smmu_cd),
1485 				  cd_table->linear.table, cd_table->cdtab_dma);
1486 	}
1487 }
1488 
1489 /* Stream table manipulation functions */
1490 static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 *dst,
1491 					  dma_addr_t l2ptr_dma)
1492 {
1493 	u64 val = 0;
1494 
1495 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1);
1496 	val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1497 
1498 	/* The HW has 64 bit atomicity with stores to the L2 STE table */
1499 	WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
1500 }
1501 
1502 struct arm_smmu_ste_writer {
1503 	struct arm_smmu_entry_writer writer;
1504 	u32 sid;
1505 };
1506 
1507 static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1508 {
1509 	struct arm_smmu_ste_writer *ste_writer =
1510 		container_of(writer, struct arm_smmu_ste_writer, writer);
1511 	struct arm_smmu_cmdq_ent cmd = {
1512 		.opcode	= CMDQ_OP_CFGI_STE,
1513 		.cfgi	= {
1514 			.sid	= ste_writer->sid,
1515 			.leaf	= true,
1516 		},
1517 	};
1518 
1519 	arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
1520 }
1521 
1522 static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
1523 	.sync = arm_smmu_ste_writer_sync_entry,
1524 	.get_used = arm_smmu_get_ste_used,
1525 };
1526 
1527 static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
1528 			       struct arm_smmu_ste *ste,
1529 			       const struct arm_smmu_ste *target)
1530 {
1531 	struct arm_smmu_device *smmu = master->smmu;
1532 	struct arm_smmu_ste_writer ste_writer = {
1533 		.writer = {
1534 			.ops = &arm_smmu_ste_writer_ops,
1535 			.master = master,
1536 		},
1537 		.sid = sid,
1538 	};
1539 
1540 	arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data);
1541 
1542 	/* It's likely that we'll want to use the new STE soon */
1543 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
1544 		struct arm_smmu_cmdq_ent
1545 			prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
1546 					 .prefetch = {
1547 						 .sid = sid,
1548 					 } };
1549 
1550 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1551 	}
1552 }
1553 
1554 void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
1555 {
1556 	memset(target, 0, sizeof(*target));
1557 	target->data[0] = cpu_to_le64(
1558 		STRTAB_STE_0_V |
1559 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
1560 }
1561 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste);
1562 
1563 VISIBLE_IF_KUNIT
1564 void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
1565 			      struct arm_smmu_ste *target)
1566 {
1567 	memset(target, 0, sizeof(*target));
1568 	target->data[0] = cpu_to_le64(
1569 		STRTAB_STE_0_V |
1570 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
1571 
1572 	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1573 		target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1574 							 STRTAB_STE_1_SHCFG_INCOMING));
1575 }
1576 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste);
1577 
1578 VISIBLE_IF_KUNIT
1579 void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
1580 			       struct arm_smmu_master *master, bool ats_enabled,
1581 			       unsigned int s1dss)
1582 {
1583 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1584 	struct arm_smmu_device *smmu = master->smmu;
1585 
1586 	memset(target, 0, sizeof(*target));
1587 	target->data[0] = cpu_to_le64(
1588 		STRTAB_STE_0_V |
1589 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1590 		FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
1591 		(cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1592 		FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
1593 
1594 	target->data[1] = cpu_to_le64(
1595 		FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) |
1596 		FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1597 		FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1598 		FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1599 		((smmu->features & ARM_SMMU_FEAT_STALLS &&
1600 		  !master->stall_enabled) ?
1601 			 STRTAB_STE_1_S1STALLD :
1602 			 0) |
1603 		FIELD_PREP(STRTAB_STE_1_EATS,
1604 			   ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1605 
1606 	if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) &&
1607 	    s1dss == STRTAB_STE_1_S1DSS_BYPASS)
1608 		target->data[1] |= cpu_to_le64(FIELD_PREP(
1609 			STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
1610 
1611 	if (smmu->features & ARM_SMMU_FEAT_E2H) {
1612 		/*
1613 		 * To support BTM the streamworld needs to match the
1614 		 * configuration of the CPU so that the ASID broadcasts are
1615 		 * properly matched. This means either S/NS-EL2-E2H (hypervisor)
1616 		 * or NS-EL1 (guest). Since an SVA domain can be installed in a
1617 		 * PASID this should always use a BTM compatible configuration
1618 		 * if the HW supports it.
1619 		 */
1620 		target->data[1] |= cpu_to_le64(
1621 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
1622 	} else {
1623 		target->data[1] |= cpu_to_le64(
1624 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1625 
1626 		/*
1627 		 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
1628 		 * arm_smmu_domain_alloc_id()
1629 		 */
1630 		target->data[2] =
1631 			cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
1632 	}
1633 }
1634 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
1635 
1636 void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
1637 				 struct arm_smmu_master *master,
1638 				 struct arm_smmu_domain *smmu_domain,
1639 				 bool ats_enabled)
1640 {
1641 	struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
1642 	const struct io_pgtable_cfg *pgtbl_cfg =
1643 		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1644 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
1645 		&pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1646 	u64 vtcr_val;
1647 	struct arm_smmu_device *smmu = master->smmu;
1648 
1649 	memset(target, 0, sizeof(*target));
1650 	target->data[0] = cpu_to_le64(
1651 		STRTAB_STE_0_V |
1652 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
1653 
1654 	target->data[1] = cpu_to_le64(
1655 		FIELD_PREP(STRTAB_STE_1_EATS,
1656 			   ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1657 
1658 	if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB)
1659 		target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB);
1660 	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1661 		target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1662 							  STRTAB_STE_1_SHCFG_INCOMING));
1663 
1664 	vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1665 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1666 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1667 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1668 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1669 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1670 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1671 	target->data[2] = cpu_to_le64(
1672 		FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1673 		FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
1674 		STRTAB_STE_2_S2AA64 |
1675 #ifdef __BIG_ENDIAN
1676 		STRTAB_STE_2_S2ENDI |
1677 #endif
1678 		STRTAB_STE_2_S2PTW |
1679 		(master->stall_enabled ? STRTAB_STE_2_S2S : 0) |
1680 		STRTAB_STE_2_S2R);
1681 
1682 	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
1683 				      STRTAB_STE_3_S2TTB_MASK);
1684 }
1685 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste);
1686 
1687 /*
1688  * This can safely directly manipulate the STE memory without a sync sequence
1689  * because the STE table has not been installed in the SMMU yet.
1690  */
1691 static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
1692 				       unsigned int nent)
1693 {
1694 	unsigned int i;
1695 
1696 	for (i = 0; i < nent; ++i) {
1697 		arm_smmu_make_abort_ste(strtab);
1698 		strtab++;
1699 	}
1700 }
1701 
1702 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1703 {
1704 	dma_addr_t l2ptr_dma;
1705 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1706 	struct arm_smmu_strtab_l2 **l2table;
1707 
1708 	l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)];
1709 	if (*l2table)
1710 		return 0;
1711 
1712 	*l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table),
1713 				       &l2ptr_dma, GFP_KERNEL);
1714 	if (!*l2table) {
1715 		dev_err(smmu->dev,
1716 			"failed to allocate l2 stream table for SID %u\n",
1717 			sid);
1718 		return -ENOMEM;
1719 	}
1720 
1721 	arm_smmu_init_initial_stes((*l2table)->stes,
1722 				   ARRAY_SIZE((*l2table)->stes));
1723 	arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)],
1724 				      l2ptr_dma);
1725 	return 0;
1726 }
1727 
1728 static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs)
1729 {
1730 	struct arm_smmu_stream *stream_rhs =
1731 		rb_entry(rhs, struct arm_smmu_stream, node);
1732 	const u32 *sid_lhs = lhs;
1733 
1734 	if (*sid_lhs < stream_rhs->id)
1735 		return -1;
1736 	if (*sid_lhs > stream_rhs->id)
1737 		return 1;
1738 	return 0;
1739 }
1740 
1741 static int arm_smmu_streams_cmp_node(struct rb_node *lhs,
1742 				     const struct rb_node *rhs)
1743 {
1744 	return arm_smmu_streams_cmp_key(
1745 		&rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs);
1746 }
1747 
1748 static struct arm_smmu_master *
1749 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1750 {
1751 	struct rb_node *node;
1752 
1753 	lockdep_assert_held(&smmu->streams_mutex);
1754 
1755 	node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key);
1756 	if (!node)
1757 		return NULL;
1758 	return rb_entry(node, struct arm_smmu_stream, node)->master;
1759 }
1760 
1761 /* IRQ and event handlers */
1762 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1763 {
1764 	int ret = 0;
1765 	u32 perm = 0;
1766 	struct arm_smmu_master *master;
1767 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1768 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1769 	struct iopf_fault fault_evt = { };
1770 	struct iommu_fault *flt = &fault_evt.fault;
1771 
1772 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1773 	case EVT_ID_TRANSLATION_FAULT:
1774 	case EVT_ID_ADDR_SIZE_FAULT:
1775 	case EVT_ID_ACCESS_FAULT:
1776 	case EVT_ID_PERMISSION_FAULT:
1777 		break;
1778 	default:
1779 		return -EOPNOTSUPP;
1780 	}
1781 
1782 	if (!(evt[1] & EVTQ_1_STALL))
1783 		return -EOPNOTSUPP;
1784 
1785 	if (evt[1] & EVTQ_1_RnW)
1786 		perm |= IOMMU_FAULT_PERM_READ;
1787 	else
1788 		perm |= IOMMU_FAULT_PERM_WRITE;
1789 
1790 	if (evt[1] & EVTQ_1_InD)
1791 		perm |= IOMMU_FAULT_PERM_EXEC;
1792 
1793 	if (evt[1] & EVTQ_1_PnU)
1794 		perm |= IOMMU_FAULT_PERM_PRIV;
1795 
1796 	flt->type = IOMMU_FAULT_PAGE_REQ;
1797 	flt->prm = (struct iommu_fault_page_request) {
1798 		.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1799 		.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1800 		.perm = perm,
1801 		.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1802 	};
1803 
1804 	if (ssid_valid) {
1805 		flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1806 		flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1807 	}
1808 
1809 	mutex_lock(&smmu->streams_mutex);
1810 	master = arm_smmu_find_master(smmu, sid);
1811 	if (!master) {
1812 		ret = -EINVAL;
1813 		goto out_unlock;
1814 	}
1815 
1816 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1817 out_unlock:
1818 	mutex_unlock(&smmu->streams_mutex);
1819 	return ret;
1820 }
1821 
1822 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1823 {
1824 	int i, ret;
1825 	struct arm_smmu_device *smmu = dev;
1826 	struct arm_smmu_queue *q = &smmu->evtq.q;
1827 	struct arm_smmu_ll_queue *llq = &q->llq;
1828 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1829 				      DEFAULT_RATELIMIT_BURST);
1830 	u64 evt[EVTQ_ENT_DWORDS];
1831 
1832 	do {
1833 		while (!queue_remove_raw(q, evt)) {
1834 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1835 
1836 			ret = arm_smmu_handle_evt(smmu, evt);
1837 			if (!ret || !__ratelimit(&rs))
1838 				continue;
1839 
1840 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1841 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1842 				dev_info(smmu->dev, "\t0x%016llx\n",
1843 					 (unsigned long long)evt[i]);
1844 
1845 			cond_resched();
1846 		}
1847 
1848 		/*
1849 		 * Not much we can do on overflow, so scream and pretend we're
1850 		 * trying harder.
1851 		 */
1852 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1853 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1854 	} while (!queue_empty(llq));
1855 
1856 	/* Sync our overflow flag, as we believe we're up to speed */
1857 	queue_sync_cons_ovf(q);
1858 	return IRQ_HANDLED;
1859 }
1860 
1861 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1862 {
1863 	u32 sid, ssid;
1864 	u16 grpid;
1865 	bool ssv, last;
1866 
1867 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1868 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1869 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1870 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1871 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1872 
1873 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1874 	dev_info(smmu->dev,
1875 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1876 		 sid, ssid, grpid, last ? "L" : "",
1877 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1878 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1879 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1880 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1881 		 evt[1] & PRIQ_1_ADDR_MASK);
1882 
1883 	if (last) {
1884 		struct arm_smmu_cmdq_ent cmd = {
1885 			.opcode			= CMDQ_OP_PRI_RESP,
1886 			.substream_valid	= ssv,
1887 			.pri			= {
1888 				.sid	= sid,
1889 				.ssid	= ssid,
1890 				.grpid	= grpid,
1891 				.resp	= PRI_RESP_DENY,
1892 			},
1893 		};
1894 
1895 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1896 	}
1897 }
1898 
1899 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1900 {
1901 	struct arm_smmu_device *smmu = dev;
1902 	struct arm_smmu_queue *q = &smmu->priq.q;
1903 	struct arm_smmu_ll_queue *llq = &q->llq;
1904 	u64 evt[PRIQ_ENT_DWORDS];
1905 
1906 	do {
1907 		while (!queue_remove_raw(q, evt))
1908 			arm_smmu_handle_ppr(smmu, evt);
1909 
1910 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1911 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1912 	} while (!queue_empty(llq));
1913 
1914 	/* Sync our overflow flag, as we believe we're up to speed */
1915 	queue_sync_cons_ovf(q);
1916 	return IRQ_HANDLED;
1917 }
1918 
1919 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1920 
1921 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1922 {
1923 	u32 gerror, gerrorn, active;
1924 	struct arm_smmu_device *smmu = dev;
1925 
1926 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1927 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1928 
1929 	active = gerror ^ gerrorn;
1930 	if (!(active & GERROR_ERR_MASK))
1931 		return IRQ_NONE; /* No errors pending */
1932 
1933 	dev_warn(smmu->dev,
1934 		 "unexpected global error reported (0x%08x), this could be serious\n",
1935 		 active);
1936 
1937 	if (active & GERROR_SFM_ERR) {
1938 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1939 		arm_smmu_device_disable(smmu);
1940 	}
1941 
1942 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1943 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1944 
1945 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1946 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1947 
1948 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1949 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1950 
1951 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1952 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1953 
1954 	if (active & GERROR_PRIQ_ABT_ERR)
1955 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1956 
1957 	if (active & GERROR_EVTQ_ABT_ERR)
1958 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1959 
1960 	if (active & GERROR_CMDQ_ERR)
1961 		arm_smmu_cmdq_skip_err(smmu);
1962 
1963 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1964 	return IRQ_HANDLED;
1965 }
1966 
1967 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1968 {
1969 	struct arm_smmu_device *smmu = dev;
1970 
1971 	arm_smmu_evtq_thread(irq, dev);
1972 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1973 		arm_smmu_priq_thread(irq, dev);
1974 
1975 	return IRQ_HANDLED;
1976 }
1977 
1978 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1979 {
1980 	arm_smmu_gerror_handler(irq, dev);
1981 	return IRQ_WAKE_THREAD;
1982 }
1983 
1984 static void
1985 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1986 			struct arm_smmu_cmdq_ent *cmd)
1987 {
1988 	size_t log2_span;
1989 	size_t span_mask;
1990 	/* ATC invalidates are always on 4096-bytes pages */
1991 	size_t inval_grain_shift = 12;
1992 	unsigned long page_start, page_end;
1993 
1994 	/*
1995 	 * ATS and PASID:
1996 	 *
1997 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1998 	 * prefix. In that case all ATC entries within the address range are
1999 	 * invalidated, including those that were requested with a PASID! There
2000 	 * is no way to invalidate only entries without PASID.
2001 	 *
2002 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
2003 	 * traffic), translation requests without PASID create ATC entries
2004 	 * without PASID, which must be invalidated with substream_valid clear.
2005 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
2006 	 * ATC entries within the address range.
2007 	 */
2008 	*cmd = (struct arm_smmu_cmdq_ent) {
2009 		.opcode			= CMDQ_OP_ATC_INV,
2010 		.substream_valid	= (ssid != IOMMU_NO_PASID),
2011 		.atc.ssid		= ssid,
2012 	};
2013 
2014 	if (!size) {
2015 		cmd->atc.size = ATC_INV_SIZE_ALL;
2016 		return;
2017 	}
2018 
2019 	page_start	= iova >> inval_grain_shift;
2020 	page_end	= (iova + size - 1) >> inval_grain_shift;
2021 
2022 	/*
2023 	 * In an ATS Invalidate Request, the address must be aligned on the
2024 	 * range size, which must be a power of two number of page sizes. We
2025 	 * thus have to choose between grossly over-invalidating the region, or
2026 	 * splitting the invalidation into multiple commands. For simplicity
2027 	 * we'll go with the first solution, but should refine it in the future
2028 	 * if multiple commands are shown to be more efficient.
2029 	 *
2030 	 * Find the smallest power of two that covers the range. The most
2031 	 * significant differing bit between the start and end addresses,
2032 	 * fls(start ^ end), indicates the required span. For example:
2033 	 *
2034 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
2035 	 *		x = 0b1000 ^ 0b1011 = 0b11
2036 	 *		span = 1 << fls(x) = 4
2037 	 *
2038 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
2039 	 *		x = 0b0111 ^ 0b1010 = 0b1101
2040 	 *		span = 1 << fls(x) = 16
2041 	 */
2042 	log2_span	= fls_long(page_start ^ page_end);
2043 	span_mask	= (1ULL << log2_span) - 1;
2044 
2045 	page_start	&= ~span_mask;
2046 
2047 	cmd->atc.addr	= page_start << inval_grain_shift;
2048 	cmd->atc.size	= log2_span;
2049 }
2050 
2051 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
2052 				   ioasid_t ssid)
2053 {
2054 	int i;
2055 	struct arm_smmu_cmdq_ent cmd;
2056 	struct arm_smmu_cmdq_batch cmds;
2057 
2058 	arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
2059 
2060 	arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
2061 	for (i = 0; i < master->num_streams; i++) {
2062 		cmd.atc.sid = master->streams[i].id;
2063 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
2064 	}
2065 
2066 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
2067 }
2068 
2069 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2070 			    unsigned long iova, size_t size)
2071 {
2072 	struct arm_smmu_master_domain *master_domain;
2073 	int i;
2074 	unsigned long flags;
2075 	struct arm_smmu_cmdq_ent cmd = {
2076 		.opcode = CMDQ_OP_ATC_INV,
2077 	};
2078 	struct arm_smmu_cmdq_batch cmds;
2079 
2080 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2081 		return 0;
2082 
2083 	/*
2084 	 * Ensure that we've completed prior invalidation of the main TLBs
2085 	 * before we read 'nr_ats_masters' in case of a concurrent call to
2086 	 * arm_smmu_enable_ats():
2087 	 *
2088 	 *	// unmap()			// arm_smmu_enable_ats()
2089 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
2090 	 *	smp_mb();			[...]
2091 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
2092 	 *
2093 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
2094 	 * ATS was enabled at the PCI device before completion of the TLBI.
2095 	 */
2096 	smp_mb();
2097 	if (!atomic_read(&smmu_domain->nr_ats_masters))
2098 		return 0;
2099 
2100 	arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
2101 
2102 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2103 	list_for_each_entry(master_domain, &smmu_domain->devices,
2104 			    devices_elm) {
2105 		struct arm_smmu_master *master = master_domain->master;
2106 
2107 		if (!master->ats_enabled)
2108 			continue;
2109 
2110 		if (master_domain->nested_ats_flush) {
2111 			/*
2112 			 * If a S2 used as a nesting parent is changed we have
2113 			 * no option but to completely flush the ATC.
2114 			 */
2115 			arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
2116 		} else {
2117 			arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size,
2118 						&cmd);
2119 		}
2120 
2121 		for (i = 0; i < master->num_streams; i++) {
2122 			cmd.atc.sid = master->streams[i].id;
2123 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
2124 		}
2125 	}
2126 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2127 
2128 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
2129 }
2130 
2131 /* IO_PGTABLE API */
2132 static void arm_smmu_tlb_inv_context(void *cookie)
2133 {
2134 	struct arm_smmu_domain *smmu_domain = cookie;
2135 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2136 	struct arm_smmu_cmdq_ent cmd;
2137 
2138 	/*
2139 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2140 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
2141 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2142 	 * insertion to guarantee those are observed before the TLBI. Do be
2143 	 * careful, 007.
2144 	 */
2145 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2146 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
2147 	} else {
2148 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
2149 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2150 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
2151 	}
2152 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
2153 }
2154 
2155 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
2156 				     unsigned long iova, size_t size,
2157 				     size_t granule,
2158 				     struct arm_smmu_domain *smmu_domain)
2159 {
2160 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2161 	unsigned long end = iova + size, num_pages = 0, tg = 0;
2162 	size_t inv_range = granule;
2163 	struct arm_smmu_cmdq_batch cmds;
2164 
2165 	if (!size)
2166 		return;
2167 
2168 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2169 		/* Get the leaf page size */
2170 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2171 
2172 		num_pages = size >> tg;
2173 
2174 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
2175 		cmd->tlbi.tg = (tg - 10) / 2;
2176 
2177 		/*
2178 		 * Determine what level the granule is at. For non-leaf, both
2179 		 * io-pgtable and SVA pass a nominal last-level granule because
2180 		 * they don't know what level(s) actually apply, so ignore that
2181 		 * and leave TTL=0. However for various errata reasons we still
2182 		 * want to use a range command, so avoid the SVA corner case
2183 		 * where both scale and num could be 0 as well.
2184 		 */
2185 		if (cmd->tlbi.leaf)
2186 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2187 		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
2188 			num_pages++;
2189 	}
2190 
2191 	arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
2192 
2193 	while (iova < end) {
2194 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2195 			/*
2196 			 * On each iteration of the loop, the range is 5 bits
2197 			 * worth of the aligned size remaining.
2198 			 * The range in pages is:
2199 			 *
2200 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
2201 			 */
2202 			unsigned long scale, num;
2203 
2204 			/* Determine the power of 2 multiple number of pages */
2205 			scale = __ffs(num_pages);
2206 			cmd->tlbi.scale = scale;
2207 
2208 			/* Determine how many chunks of 2^scale size we have */
2209 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2210 			cmd->tlbi.num = num - 1;
2211 
2212 			/* range is num * 2^scale * pgsize */
2213 			inv_range = num << (scale + tg);
2214 
2215 			/* Clear out the lower order bits for the next iteration */
2216 			num_pages -= num << scale;
2217 		}
2218 
2219 		cmd->tlbi.addr = iova;
2220 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
2221 		iova += inv_range;
2222 	}
2223 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
2224 }
2225 
2226 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
2227 					  size_t granule, bool leaf,
2228 					  struct arm_smmu_domain *smmu_domain)
2229 {
2230 	struct arm_smmu_cmdq_ent cmd = {
2231 		.tlbi = {
2232 			.leaf	= leaf,
2233 		},
2234 	};
2235 
2236 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2237 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2238 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
2239 		cmd.tlbi.asid	= smmu_domain->cd.asid;
2240 	} else {
2241 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
2242 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2243 	}
2244 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2245 
2246 	if (smmu_domain->nest_parent) {
2247 		/*
2248 		 * When the S2 domain changes all the nested S1 ASIDs have to be
2249 		 * flushed too.
2250 		 */
2251 		cmd.opcode = CMDQ_OP_TLBI_NH_ALL;
2252 		arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);
2253 	}
2254 
2255 	/*
2256 	 * Unfortunately, this can't be leaf-only since we may have
2257 	 * zapped an entire table.
2258 	 */
2259 	arm_smmu_atc_inv_domain(smmu_domain, iova, size);
2260 }
2261 
2262 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
2263 				 size_t granule, bool leaf,
2264 				 struct arm_smmu_domain *smmu_domain)
2265 {
2266 	struct arm_smmu_cmdq_ent cmd = {
2267 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2268 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
2269 		.tlbi = {
2270 			.asid	= asid,
2271 			.leaf	= leaf,
2272 		},
2273 	};
2274 
2275 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2276 }
2277 
2278 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2279 					 unsigned long iova, size_t granule,
2280 					 void *cookie)
2281 {
2282 	struct arm_smmu_domain *smmu_domain = cookie;
2283 	struct iommu_domain *domain = &smmu_domain->domain;
2284 
2285 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2286 }
2287 
2288 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2289 				  size_t granule, void *cookie)
2290 {
2291 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2292 }
2293 
2294 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2295 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2296 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2297 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2298 };
2299 
2300 static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
2301 {
2302 	u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY);
2303 
2304 	return (smmu->features & features) == features;
2305 }
2306 
2307 /* IOMMU API */
2308 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2309 {
2310 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2311 
2312 	switch (cap) {
2313 	case IOMMU_CAP_CACHE_COHERENCY:
2314 		/* Assume that a coherent TCU implies coherent TBUs */
2315 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2316 	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
2317 		return arm_smmu_master_canwbs(master);
2318 	case IOMMU_CAP_NOEXEC:
2319 	case IOMMU_CAP_DEFERRED_FLUSH:
2320 		return true;
2321 	case IOMMU_CAP_DIRTY_TRACKING:
2322 		return arm_smmu_dbm_capable(master->smmu);
2323 	default:
2324 		return false;
2325 	}
2326 }
2327 
2328 static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
2329 {
2330 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2331 	struct arm_smmu_master_domain *master_domain;
2332 	unsigned long flags;
2333 	bool ret = true;
2334 
2335 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2336 	list_for_each_entry(master_domain, &smmu_domain->devices,
2337 			    devices_elm) {
2338 		if (!arm_smmu_master_canwbs(master_domain->master)) {
2339 			ret = false;
2340 			break;
2341 		}
2342 	}
2343 	smmu_domain->enforce_cache_coherency = ret;
2344 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2345 	return ret;
2346 }
2347 
2348 struct arm_smmu_domain *arm_smmu_domain_alloc(void)
2349 {
2350 	struct arm_smmu_domain *smmu_domain;
2351 
2352 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2353 	if (!smmu_domain)
2354 		return ERR_PTR(-ENOMEM);
2355 
2356 	mutex_init(&smmu_domain->init_mutex);
2357 	INIT_LIST_HEAD(&smmu_domain->devices);
2358 	spin_lock_init(&smmu_domain->devices_lock);
2359 
2360 	return smmu_domain;
2361 }
2362 
2363 static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
2364 {
2365 	struct arm_smmu_domain *smmu_domain;
2366 
2367 	/*
2368 	 * Allocate the domain and initialise some of its data structures.
2369 	 * We can't really do anything meaningful until we've added a
2370 	 * master.
2371 	 */
2372 	smmu_domain = arm_smmu_domain_alloc();
2373 	if (IS_ERR(smmu_domain))
2374 		return ERR_CAST(smmu_domain);
2375 
2376 	if (dev) {
2377 		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2378 		int ret;
2379 
2380 		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0);
2381 		if (ret) {
2382 			kfree(smmu_domain);
2383 			return ERR_PTR(ret);
2384 		}
2385 	}
2386 	return &smmu_domain->domain;
2387 }
2388 
2389 static void arm_smmu_domain_free_paging(struct iommu_domain *domain)
2390 {
2391 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2392 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2393 
2394 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2395 
2396 	/* Free the ASID or VMID */
2397 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2398 		/* Prevent SVA from touching the CD while we're freeing it */
2399 		mutex_lock(&arm_smmu_asid_lock);
2400 		xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid);
2401 		mutex_unlock(&arm_smmu_asid_lock);
2402 	} else {
2403 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2404 		if (cfg->vmid)
2405 			ida_free(&smmu->vmid_map, cfg->vmid);
2406 	}
2407 
2408 	kfree(smmu_domain);
2409 }
2410 
2411 static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
2412 				       struct arm_smmu_domain *smmu_domain)
2413 {
2414 	int ret;
2415 	u32 asid = 0;
2416 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2417 
2418 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2419 	mutex_lock(&arm_smmu_asid_lock);
2420 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
2421 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2422 	cd->asid	= (u16)asid;
2423 	mutex_unlock(&arm_smmu_asid_lock);
2424 	return ret;
2425 }
2426 
2427 static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
2428 				       struct arm_smmu_domain *smmu_domain)
2429 {
2430 	int vmid;
2431 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2432 
2433 	/* Reserve VMID 0 for stage-2 bypass STEs */
2434 	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2435 			       GFP_KERNEL);
2436 	if (vmid < 0)
2437 		return vmid;
2438 
2439 	cfg->vmid	= (u16)vmid;
2440 	return 0;
2441 }
2442 
2443 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
2444 				    struct arm_smmu_device *smmu, u32 flags)
2445 {
2446 	int ret;
2447 	enum io_pgtable_fmt fmt;
2448 	struct io_pgtable_cfg pgtbl_cfg;
2449 	struct io_pgtable_ops *pgtbl_ops;
2450 	int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
2451 				 struct arm_smmu_domain *smmu_domain);
2452 	bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
2453 
2454 	/* Restrict the stage to what we can actually support */
2455 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2456 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2457 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2458 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2459 
2460 	pgtbl_cfg = (struct io_pgtable_cfg) {
2461 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2462 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2463 		.tlb		= &arm_smmu_flush_ops,
2464 		.iommu_dev	= smmu->dev,
2465 	};
2466 
2467 	switch (smmu_domain->stage) {
2468 	case ARM_SMMU_DOMAIN_S1: {
2469 		unsigned long ias = (smmu->features &
2470 				     ARM_SMMU_FEAT_VAX) ? 52 : 48;
2471 
2472 		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
2473 		pgtbl_cfg.oas = smmu->ias;
2474 		if (enable_dirty)
2475 			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
2476 		fmt = ARM_64_LPAE_S1;
2477 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2478 		break;
2479 	}
2480 	case ARM_SMMU_DOMAIN_S2:
2481 		if (enable_dirty)
2482 			return -EOPNOTSUPP;
2483 		pgtbl_cfg.ias = smmu->ias;
2484 		pgtbl_cfg.oas = smmu->oas;
2485 		fmt = ARM_64_LPAE_S2;
2486 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2487 		if ((smmu->features & ARM_SMMU_FEAT_S2FWB) &&
2488 		    (flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
2489 			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB;
2490 		break;
2491 	default:
2492 		return -EINVAL;
2493 	}
2494 
2495 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2496 	if (!pgtbl_ops)
2497 		return -ENOMEM;
2498 
2499 	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2500 	smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2501 	smmu_domain->domain.geometry.force_aperture = true;
2502 	if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
2503 		smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
2504 
2505 	ret = finalise_stage_fn(smmu, smmu_domain);
2506 	if (ret < 0) {
2507 		free_io_pgtable_ops(pgtbl_ops);
2508 		return ret;
2509 	}
2510 
2511 	smmu_domain->pgtbl_ops = pgtbl_ops;
2512 	smmu_domain->smmu = smmu;
2513 	return 0;
2514 }
2515 
2516 static struct arm_smmu_ste *
2517 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2518 {
2519 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2520 
2521 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2522 		/* Two-level walk */
2523 		return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]
2524 				->stes[arm_smmu_strtab_l2_idx(sid)];
2525 	} else {
2526 		/* Simple linear lookup */
2527 		return &cfg->linear.table[sid];
2528 	}
2529 }
2530 
2531 void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
2532 				  const struct arm_smmu_ste *target)
2533 {
2534 	int i, j;
2535 	struct arm_smmu_device *smmu = master->smmu;
2536 
2537 	master->cd_table.in_ste =
2538 		FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) ==
2539 		STRTAB_STE_0_CFG_S1_TRANS;
2540 	master->ste_ats_enabled =
2541 		FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) ==
2542 		STRTAB_STE_1_EATS_TRANS;
2543 
2544 	for (i = 0; i < master->num_streams; ++i) {
2545 		u32 sid = master->streams[i].id;
2546 		struct arm_smmu_ste *step =
2547 			arm_smmu_get_step_for_sid(smmu, sid);
2548 
2549 		/* Bridged PCI devices may end up with duplicated IDs */
2550 		for (j = 0; j < i; j++)
2551 			if (master->streams[j].id == sid)
2552 				break;
2553 		if (j < i)
2554 			continue;
2555 
2556 		arm_smmu_write_ste(master, sid, step, target);
2557 	}
2558 }
2559 
2560 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2561 {
2562 	struct device *dev = master->dev;
2563 	struct arm_smmu_device *smmu = master->smmu;
2564 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2565 
2566 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2567 		return false;
2568 
2569 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2570 		return false;
2571 
2572 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2573 }
2574 
2575 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2576 {
2577 	size_t stu;
2578 	struct pci_dev *pdev;
2579 	struct arm_smmu_device *smmu = master->smmu;
2580 
2581 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2582 	stu = __ffs(smmu->pgsize_bitmap);
2583 	pdev = to_pci_dev(master->dev);
2584 
2585 	/*
2586 	 * ATC invalidation of PASID 0 causes the entire ATC to be flushed.
2587 	 */
2588 	arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
2589 	if (pci_enable_ats(pdev, stu))
2590 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2591 }
2592 
2593 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2594 {
2595 	int ret;
2596 	int features;
2597 	int num_pasids;
2598 	struct pci_dev *pdev;
2599 
2600 	if (!dev_is_pci(master->dev))
2601 		return -ENODEV;
2602 
2603 	pdev = to_pci_dev(master->dev);
2604 
2605 	features = pci_pasid_features(pdev);
2606 	if (features < 0)
2607 		return features;
2608 
2609 	num_pasids = pci_max_pasids(pdev);
2610 	if (num_pasids <= 0)
2611 		return num_pasids;
2612 
2613 	ret = pci_enable_pasid(pdev, features);
2614 	if (ret) {
2615 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2616 		return ret;
2617 	}
2618 
2619 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2620 				  master->smmu->ssid_bits);
2621 	return 0;
2622 }
2623 
2624 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2625 {
2626 	struct pci_dev *pdev;
2627 
2628 	if (!dev_is_pci(master->dev))
2629 		return;
2630 
2631 	pdev = to_pci_dev(master->dev);
2632 
2633 	if (!pdev->pasid_enabled)
2634 		return;
2635 
2636 	master->ssid_bits = 0;
2637 	pci_disable_pasid(pdev);
2638 }
2639 
2640 static struct arm_smmu_master_domain *
2641 arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
2642 			    struct arm_smmu_master *master,
2643 			    ioasid_t ssid, bool nested_ats_flush)
2644 {
2645 	struct arm_smmu_master_domain *master_domain;
2646 
2647 	lockdep_assert_held(&smmu_domain->devices_lock);
2648 
2649 	list_for_each_entry(master_domain, &smmu_domain->devices,
2650 			    devices_elm) {
2651 		if (master_domain->master == master &&
2652 		    master_domain->ssid == ssid &&
2653 		    master_domain->nested_ats_flush == nested_ats_flush)
2654 			return master_domain;
2655 	}
2656 	return NULL;
2657 }
2658 
2659 /*
2660  * If the domain uses the smmu_domain->devices list return the arm_smmu_domain
2661  * structure, otherwise NULL. These domains track attached devices so they can
2662  * issue invalidations.
2663  */
2664 static struct arm_smmu_domain *
2665 to_smmu_domain_devices(struct iommu_domain *domain)
2666 {
2667 	/* The domain can be NULL only when processing the first attach */
2668 	if (!domain)
2669 		return NULL;
2670 	if ((domain->type & __IOMMU_DOMAIN_PAGING) ||
2671 	    domain->type == IOMMU_DOMAIN_SVA)
2672 		return to_smmu_domain(domain);
2673 	if (domain->type == IOMMU_DOMAIN_NESTED)
2674 		return to_smmu_nested_domain(domain)->vsmmu->s2_parent;
2675 	return NULL;
2676 }
2677 
2678 static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
2679 					  struct iommu_domain *domain,
2680 					  ioasid_t ssid)
2681 {
2682 	struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain);
2683 	struct arm_smmu_master_domain *master_domain;
2684 	bool nested_ats_flush = false;
2685 	unsigned long flags;
2686 
2687 	if (!smmu_domain)
2688 		return;
2689 
2690 	if (domain->type == IOMMU_DOMAIN_NESTED)
2691 		nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats;
2692 
2693 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2694 	master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid,
2695 						    nested_ats_flush);
2696 	if (master_domain) {
2697 		list_del(&master_domain->devices_elm);
2698 		kfree(master_domain);
2699 		if (master->ats_enabled)
2700 			atomic_dec(&smmu_domain->nr_ats_masters);
2701 	}
2702 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2703 }
2704 
2705 /*
2706  * Start the sequence to attach a domain to a master. The sequence contains three
2707  * steps:
2708  *  arm_smmu_attach_prepare()
2709  *  arm_smmu_install_ste_for_dev()
2710  *  arm_smmu_attach_commit()
2711  *
2712  * If prepare succeeds then the sequence must be completed. The STE installed
2713  * must set the STE.EATS field according to state.ats_enabled.
2714  *
2715  * If the device supports ATS then this determines if EATS should be enabled
2716  * in the STE, and starts sequencing EATS disable if required.
2717  *
2718  * The change of the EATS in the STE and the PCI ATS config space is managed by
2719  * this sequence to be in the right order so that if PCI ATS is enabled then
2720  * STE.ETAS is enabled.
2721  *
2722  * new_domain can be a non-paging domain. In this case ATS will not be enabled,
2723  * and invalidations won't be tracked.
2724  */
2725 int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
2726 			    struct iommu_domain *new_domain)
2727 {
2728 	struct arm_smmu_master *master = state->master;
2729 	struct arm_smmu_master_domain *master_domain;
2730 	struct arm_smmu_domain *smmu_domain =
2731 		to_smmu_domain_devices(new_domain);
2732 	unsigned long flags;
2733 
2734 	/*
2735 	 * arm_smmu_share_asid() must not see two domains pointing to the same
2736 	 * arm_smmu_master_domain contents otherwise it could randomly write one
2737 	 * or the other to the CD.
2738 	 */
2739 	lockdep_assert_held(&arm_smmu_asid_lock);
2740 
2741 	if (smmu_domain || state->cd_needs_ats) {
2742 		/*
2743 		 * The SMMU does not support enabling ATS with bypass/abort.
2744 		 * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS
2745 		 * Translation Requests and Translated transactions are denied
2746 		 * as though ATS is disabled for the stream (STE.EATS == 0b00),
2747 		 * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
2748 		 * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be
2749 		 * enabled if we have arm_smmu_domain, those always have page
2750 		 * tables.
2751 		 */
2752 		state->ats_enabled = !state->disable_ats &&
2753 				     arm_smmu_ats_supported(master);
2754 	}
2755 
2756 	if (smmu_domain) {
2757 		master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
2758 		if (!master_domain)
2759 			return -ENOMEM;
2760 		master_domain->master = master;
2761 		master_domain->ssid = state->ssid;
2762 		if (new_domain->type == IOMMU_DOMAIN_NESTED)
2763 			master_domain->nested_ats_flush =
2764 				to_smmu_nested_domain(new_domain)->enable_ats;
2765 
2766 		/*
2767 		 * During prepare we want the current smmu_domain and new
2768 		 * smmu_domain to be in the devices list before we change any
2769 		 * HW. This ensures that both domains will send ATS
2770 		 * invalidations to the master until we are done.
2771 		 *
2772 		 * It is tempting to make this list only track masters that are
2773 		 * using ATS, but arm_smmu_share_asid() also uses this to change
2774 		 * the ASID of a domain, unrelated to ATS.
2775 		 *
2776 		 * Notice if we are re-attaching the same domain then the list
2777 		 * will have two identical entries and commit will remove only
2778 		 * one of them.
2779 		 */
2780 		spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2781 		if (smmu_domain->enforce_cache_coherency &&
2782 		    !arm_smmu_master_canwbs(master)) {
2783 			spin_unlock_irqrestore(&smmu_domain->devices_lock,
2784 					       flags);
2785 			kfree(master_domain);
2786 			return -EINVAL;
2787 		}
2788 
2789 		if (state->ats_enabled)
2790 			atomic_inc(&smmu_domain->nr_ats_masters);
2791 		list_add(&master_domain->devices_elm, &smmu_domain->devices);
2792 		spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2793 	}
2794 
2795 	if (!state->ats_enabled && master->ats_enabled) {
2796 		pci_disable_ats(to_pci_dev(master->dev));
2797 		/*
2798 		 * This is probably overkill, but the config write for disabling
2799 		 * ATS should complete before the STE is configured to generate
2800 		 * UR to avoid AER noise.
2801 		 */
2802 		wmb();
2803 	}
2804 	return 0;
2805 }
2806 
2807 /*
2808  * Commit is done after the STE/CD are configured with the EATS setting. It
2809  * completes synchronizing the PCI device's ATC and finishes manipulating the
2810  * smmu_domain->devices list.
2811  */
2812 void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
2813 {
2814 	struct arm_smmu_master *master = state->master;
2815 
2816 	lockdep_assert_held(&arm_smmu_asid_lock);
2817 
2818 	if (state->ats_enabled && !master->ats_enabled) {
2819 		arm_smmu_enable_ats(master);
2820 	} else if (state->ats_enabled && master->ats_enabled) {
2821 		/*
2822 		 * The translation has changed, flush the ATC. At this point the
2823 		 * SMMU is translating for the new domain and both the old&new
2824 		 * domain will issue invalidations.
2825 		 */
2826 		arm_smmu_atc_inv_master(master, state->ssid);
2827 	} else if (!state->ats_enabled && master->ats_enabled) {
2828 		/* ATS is being switched off, invalidate the entire ATC */
2829 		arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
2830 	}
2831 	master->ats_enabled = state->ats_enabled;
2832 
2833 	arm_smmu_remove_master_domain(master, state->old_domain, state->ssid);
2834 }
2835 
2836 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2837 {
2838 	int ret = 0;
2839 	struct arm_smmu_ste target;
2840 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2841 	struct arm_smmu_device *smmu;
2842 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2843 	struct arm_smmu_attach_state state = {
2844 		.old_domain = iommu_get_domain_for_dev(dev),
2845 		.ssid = IOMMU_NO_PASID,
2846 	};
2847 	struct arm_smmu_master *master;
2848 	struct arm_smmu_cd *cdptr;
2849 
2850 	if (!fwspec)
2851 		return -ENOENT;
2852 
2853 	state.master = master = dev_iommu_priv_get(dev);
2854 	smmu = master->smmu;
2855 
2856 	mutex_lock(&smmu_domain->init_mutex);
2857 
2858 	if (!smmu_domain->smmu) {
2859 		ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
2860 	} else if (smmu_domain->smmu != smmu)
2861 		ret = -EINVAL;
2862 
2863 	mutex_unlock(&smmu_domain->init_mutex);
2864 	if (ret)
2865 		return ret;
2866 
2867 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2868 		cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
2869 		if (!cdptr)
2870 			return -ENOMEM;
2871 	} else if (arm_smmu_ssids_in_use(&master->cd_table))
2872 		return -EBUSY;
2873 
2874 	/*
2875 	 * Prevent arm_smmu_share_asid() from trying to change the ASID
2876 	 * of either the old or new domain while we are working on it.
2877 	 * This allows the STE and the smmu_domain->devices list to
2878 	 * be inconsistent during this routine.
2879 	 */
2880 	mutex_lock(&arm_smmu_asid_lock);
2881 
2882 	ret = arm_smmu_attach_prepare(&state, domain);
2883 	if (ret) {
2884 		mutex_unlock(&arm_smmu_asid_lock);
2885 		return ret;
2886 	}
2887 
2888 	switch (smmu_domain->stage) {
2889 	case ARM_SMMU_DOMAIN_S1: {
2890 		struct arm_smmu_cd target_cd;
2891 
2892 		arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
2893 		arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
2894 					&target_cd);
2895 		arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled,
2896 					  STRTAB_STE_1_S1DSS_SSID0);
2897 		arm_smmu_install_ste_for_dev(master, &target);
2898 		break;
2899 	}
2900 	case ARM_SMMU_DOMAIN_S2:
2901 		arm_smmu_make_s2_domain_ste(&target, master, smmu_domain,
2902 					    state.ats_enabled);
2903 		arm_smmu_install_ste_for_dev(master, &target);
2904 		arm_smmu_clear_cd(master, IOMMU_NO_PASID);
2905 		break;
2906 	}
2907 
2908 	arm_smmu_attach_commit(&state);
2909 	mutex_unlock(&arm_smmu_asid_lock);
2910 	return 0;
2911 }
2912 
2913 static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
2914 				     struct device *dev, ioasid_t id,
2915 				     struct iommu_domain *old)
2916 {
2917 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2918 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2919 	struct arm_smmu_device *smmu = master->smmu;
2920 	struct arm_smmu_cd target_cd;
2921 	int ret = 0;
2922 
2923 	mutex_lock(&smmu_domain->init_mutex);
2924 	if (!smmu_domain->smmu)
2925 		ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
2926 	else if (smmu_domain->smmu != smmu)
2927 		ret = -EINVAL;
2928 	mutex_unlock(&smmu_domain->init_mutex);
2929 	if (ret)
2930 		return ret;
2931 
2932 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
2933 		return -EINVAL;
2934 
2935 	/*
2936 	 * We can read cd.asid outside the lock because arm_smmu_set_pasid()
2937 	 * will fix it
2938 	 */
2939 	arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
2940 	return arm_smmu_set_pasid(master, to_smmu_domain(domain), id,
2941 				  &target_cd, old);
2942 }
2943 
2944 static void arm_smmu_update_ste(struct arm_smmu_master *master,
2945 				struct iommu_domain *sid_domain,
2946 				bool ats_enabled)
2947 {
2948 	unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE;
2949 	struct arm_smmu_ste ste;
2950 
2951 	if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled)
2952 		return;
2953 
2954 	if (sid_domain->type == IOMMU_DOMAIN_IDENTITY)
2955 		s1dss = STRTAB_STE_1_S1DSS_BYPASS;
2956 	else
2957 		WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED);
2958 
2959 	/*
2960 	 * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior
2961 	 * using s1dss if necessary. If the cd_table is already installed then
2962 	 * the S1DSS is correct and this will just update the EATS. Otherwise it
2963 	 * installs the entire thing. This will be hitless.
2964 	 */
2965 	arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss);
2966 	arm_smmu_install_ste_for_dev(master, &ste);
2967 }
2968 
2969 int arm_smmu_set_pasid(struct arm_smmu_master *master,
2970 		       struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
2971 		       struct arm_smmu_cd *cd, struct iommu_domain *old)
2972 {
2973 	struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);
2974 	struct arm_smmu_attach_state state = {
2975 		.master = master,
2976 		.ssid = pasid,
2977 		.old_domain = old,
2978 	};
2979 	struct arm_smmu_cd *cdptr;
2980 	int ret;
2981 
2982 	/* The core code validates pasid */
2983 
2984 	if (smmu_domain->smmu != master->smmu)
2985 		return -EINVAL;
2986 
2987 	if (!master->cd_table.in_ste &&
2988 	    sid_domain->type != IOMMU_DOMAIN_IDENTITY &&
2989 	    sid_domain->type != IOMMU_DOMAIN_BLOCKED)
2990 		return -EINVAL;
2991 
2992 	cdptr = arm_smmu_alloc_cd_ptr(master, pasid);
2993 	if (!cdptr)
2994 		return -ENOMEM;
2995 
2996 	mutex_lock(&arm_smmu_asid_lock);
2997 	ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain);
2998 	if (ret)
2999 		goto out_unlock;
3000 
3001 	/*
3002 	 * We don't want to obtain to the asid_lock too early, so fix up the
3003 	 * caller set ASID under the lock in case it changed.
3004 	 */
3005 	cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID);
3006 	cd->data[0] |= cpu_to_le64(
3007 		FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid));
3008 
3009 	arm_smmu_write_cd_entry(master, pasid, cdptr, cd);
3010 	arm_smmu_update_ste(master, sid_domain, state.ats_enabled);
3011 
3012 	arm_smmu_attach_commit(&state);
3013 
3014 out_unlock:
3015 	mutex_unlock(&arm_smmu_asid_lock);
3016 	return ret;
3017 }
3018 
3019 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
3020 				      struct iommu_domain *domain)
3021 {
3022 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3023 	struct arm_smmu_domain *smmu_domain;
3024 
3025 	smmu_domain = to_smmu_domain(domain);
3026 
3027 	mutex_lock(&arm_smmu_asid_lock);
3028 	arm_smmu_clear_cd(master, pasid);
3029 	if (master->ats_enabled)
3030 		arm_smmu_atc_inv_master(master, pasid);
3031 	arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid);
3032 	mutex_unlock(&arm_smmu_asid_lock);
3033 
3034 	/*
3035 	 * When the last user of the CD table goes away downgrade the STE back
3036 	 * to a non-cd_table one.
3037 	 */
3038 	if (!arm_smmu_ssids_in_use(&master->cd_table)) {
3039 		struct iommu_domain *sid_domain =
3040 			iommu_get_domain_for_dev(master->dev);
3041 
3042 		if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
3043 		    sid_domain->type == IOMMU_DOMAIN_BLOCKED)
3044 			sid_domain->ops->attach_dev(sid_domain, dev);
3045 	}
3046 }
3047 
3048 static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
3049 				    struct device *dev,
3050 				    struct arm_smmu_ste *ste,
3051 				    unsigned int s1dss)
3052 {
3053 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3054 	struct arm_smmu_attach_state state = {
3055 		.master = master,
3056 		.old_domain = iommu_get_domain_for_dev(dev),
3057 		.ssid = IOMMU_NO_PASID,
3058 	};
3059 
3060 	/*
3061 	 * Do not allow any ASID to be changed while are working on the STE,
3062 	 * otherwise we could miss invalidations.
3063 	 */
3064 	mutex_lock(&arm_smmu_asid_lock);
3065 
3066 	/*
3067 	 * If the CD table is not in use we can use the provided STE, otherwise
3068 	 * we use a cdtable STE with the provided S1DSS.
3069 	 */
3070 	if (arm_smmu_ssids_in_use(&master->cd_table)) {
3071 		/*
3072 		 * If a CD table has to be present then we need to run with ATS
3073 		 * on even though the RID will fail ATS queries with UR. This is
3074 		 * because we have no idea what the PASID's need.
3075 		 */
3076 		state.cd_needs_ats = true;
3077 		arm_smmu_attach_prepare(&state, domain);
3078 		arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss);
3079 	} else {
3080 		arm_smmu_attach_prepare(&state, domain);
3081 	}
3082 	arm_smmu_install_ste_for_dev(master, ste);
3083 	arm_smmu_attach_commit(&state);
3084 	mutex_unlock(&arm_smmu_asid_lock);
3085 
3086 	/*
3087 	 * This has to be done after removing the master from the
3088 	 * arm_smmu_domain->devices to avoid races updating the same context
3089 	 * descriptor from arm_smmu_share_asid().
3090 	 */
3091 	arm_smmu_clear_cd(master, IOMMU_NO_PASID);
3092 }
3093 
3094 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
3095 					struct device *dev)
3096 {
3097 	struct arm_smmu_ste ste;
3098 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3099 
3100 	arm_smmu_make_bypass_ste(master->smmu, &ste);
3101 	arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS);
3102 	return 0;
3103 }
3104 
3105 static const struct iommu_domain_ops arm_smmu_identity_ops = {
3106 	.attach_dev = arm_smmu_attach_dev_identity,
3107 };
3108 
3109 static struct iommu_domain arm_smmu_identity_domain = {
3110 	.type = IOMMU_DOMAIN_IDENTITY,
3111 	.ops = &arm_smmu_identity_ops,
3112 };
3113 
3114 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
3115 					struct device *dev)
3116 {
3117 	struct arm_smmu_ste ste;
3118 
3119 	arm_smmu_make_abort_ste(&ste);
3120 	arm_smmu_attach_dev_ste(domain, dev, &ste,
3121 				STRTAB_STE_1_S1DSS_TERMINATE);
3122 	return 0;
3123 }
3124 
3125 static const struct iommu_domain_ops arm_smmu_blocked_ops = {
3126 	.attach_dev = arm_smmu_attach_dev_blocked,
3127 };
3128 
3129 static struct iommu_domain arm_smmu_blocked_domain = {
3130 	.type = IOMMU_DOMAIN_BLOCKED,
3131 	.ops = &arm_smmu_blocked_ops,
3132 };
3133 
3134 static struct iommu_domain *
3135 arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
3136 			   struct iommu_domain *parent,
3137 			   const struct iommu_user_data *user_data)
3138 {
3139 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3140 	const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
3141 				 IOMMU_HWPT_ALLOC_PASID |
3142 				 IOMMU_HWPT_ALLOC_NEST_PARENT;
3143 	struct arm_smmu_domain *smmu_domain;
3144 	int ret;
3145 
3146 	if (flags & ~PAGING_FLAGS)
3147 		return ERR_PTR(-EOPNOTSUPP);
3148 	if (parent || user_data)
3149 		return ERR_PTR(-EOPNOTSUPP);
3150 
3151 	if (flags & IOMMU_HWPT_ALLOC_PASID)
3152 		return arm_smmu_domain_alloc_paging(dev);
3153 
3154 	smmu_domain = arm_smmu_domain_alloc();
3155 	if (IS_ERR(smmu_domain))
3156 		return ERR_CAST(smmu_domain);
3157 
3158 	if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) {
3159 		if (!(master->smmu->features & ARM_SMMU_FEAT_NESTING)) {
3160 			ret = -EOPNOTSUPP;
3161 			goto err_free;
3162 		}
3163 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
3164 		smmu_domain->nest_parent = true;
3165 	}
3166 
3167 	smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
3168 	smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
3169 	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags);
3170 	if (ret)
3171 		goto err_free;
3172 	return &smmu_domain->domain;
3173 
3174 err_free:
3175 	kfree(smmu_domain);
3176 	return ERR_PTR(ret);
3177 }
3178 
3179 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
3180 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
3181 			      int prot, gfp_t gfp, size_t *mapped)
3182 {
3183 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
3184 
3185 	if (!ops)
3186 		return -ENODEV;
3187 
3188 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
3189 }
3190 
3191 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
3192 				   size_t pgsize, size_t pgcount,
3193 				   struct iommu_iotlb_gather *gather)
3194 {
3195 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3196 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
3197 
3198 	if (!ops)
3199 		return 0;
3200 
3201 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
3202 }
3203 
3204 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
3205 {
3206 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3207 
3208 	if (smmu_domain->smmu)
3209 		arm_smmu_tlb_inv_context(smmu_domain);
3210 }
3211 
3212 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
3213 				struct iommu_iotlb_gather *gather)
3214 {
3215 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3216 
3217 	if (!gather->pgsize)
3218 		return;
3219 
3220 	arm_smmu_tlb_inv_range_domain(gather->start,
3221 				      gather->end - gather->start + 1,
3222 				      gather->pgsize, true, smmu_domain);
3223 }
3224 
3225 static phys_addr_t
3226 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
3227 {
3228 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
3229 
3230 	if (!ops)
3231 		return 0;
3232 
3233 	return ops->iova_to_phys(ops, iova);
3234 }
3235 
3236 static struct platform_driver arm_smmu_driver;
3237 
3238 static
3239 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
3240 {
3241 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
3242 							  fwnode);
3243 	put_device(dev);
3244 	return dev ? dev_get_drvdata(dev) : NULL;
3245 }
3246 
3247 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
3248 {
3249 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3250 		return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents;
3251 	return sid < smmu->strtab_cfg.linear.num_ents;
3252 }
3253 
3254 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
3255 {
3256 	/* Check the SIDs are in range of the SMMU and our stream table */
3257 	if (!arm_smmu_sid_in_range(smmu, sid))
3258 		return -ERANGE;
3259 
3260 	/* Ensure l2 strtab is initialised */
3261 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3262 		return arm_smmu_init_l2_strtab(smmu, sid);
3263 
3264 	return 0;
3265 }
3266 
3267 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
3268 				  struct arm_smmu_master *master)
3269 {
3270 	int i;
3271 	int ret = 0;
3272 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
3273 
3274 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
3275 				  GFP_KERNEL);
3276 	if (!master->streams)
3277 		return -ENOMEM;
3278 	master->num_streams = fwspec->num_ids;
3279 
3280 	mutex_lock(&smmu->streams_mutex);
3281 	for (i = 0; i < fwspec->num_ids; i++) {
3282 		struct arm_smmu_stream *new_stream = &master->streams[i];
3283 		u32 sid = fwspec->ids[i];
3284 
3285 		new_stream->id = sid;
3286 		new_stream->master = master;
3287 
3288 		ret = arm_smmu_init_sid_strtab(smmu, sid);
3289 		if (ret)
3290 			break;
3291 
3292 		/* Insert into SID tree */
3293 		if (rb_find_add(&new_stream->node, &smmu->streams,
3294 				arm_smmu_streams_cmp_node)) {
3295 			dev_warn(master->dev, "stream %u already in tree\n",
3296 				 sid);
3297 			ret = -EINVAL;
3298 			break;
3299 		}
3300 	}
3301 
3302 	if (ret) {
3303 		for (i--; i >= 0; i--)
3304 			rb_erase(&master->streams[i].node, &smmu->streams);
3305 		kfree(master->streams);
3306 	}
3307 	mutex_unlock(&smmu->streams_mutex);
3308 
3309 	return ret;
3310 }
3311 
3312 static void arm_smmu_remove_master(struct arm_smmu_master *master)
3313 {
3314 	int i;
3315 	struct arm_smmu_device *smmu = master->smmu;
3316 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
3317 
3318 	if (!smmu || !master->streams)
3319 		return;
3320 
3321 	mutex_lock(&smmu->streams_mutex);
3322 	for (i = 0; i < fwspec->num_ids; i++)
3323 		rb_erase(&master->streams[i].node, &smmu->streams);
3324 	mutex_unlock(&smmu->streams_mutex);
3325 
3326 	kfree(master->streams);
3327 }
3328 
3329 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
3330 {
3331 	int ret;
3332 	struct arm_smmu_device *smmu;
3333 	struct arm_smmu_master *master;
3334 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
3335 
3336 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
3337 		return ERR_PTR(-EBUSY);
3338 
3339 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
3340 	if (!smmu)
3341 		return ERR_PTR(-ENODEV);
3342 
3343 	master = kzalloc(sizeof(*master), GFP_KERNEL);
3344 	if (!master)
3345 		return ERR_PTR(-ENOMEM);
3346 
3347 	master->dev = dev;
3348 	master->smmu = smmu;
3349 	dev_iommu_priv_set(dev, master);
3350 
3351 	ret = arm_smmu_insert_master(smmu, master);
3352 	if (ret)
3353 		goto err_free_master;
3354 
3355 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
3356 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
3357 
3358 	/*
3359 	 * Note that PASID must be enabled before, and disabled after ATS:
3360 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
3361 	 *
3362 	 *   Behavior is undefined if this bit is Set and the value of the PASID
3363 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
3364 	 *   are changed.
3365 	 */
3366 	arm_smmu_enable_pasid(master);
3367 
3368 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
3369 		master->ssid_bits = min_t(u8, master->ssid_bits,
3370 					  CTXDESC_LINEAR_CDMAX);
3371 
3372 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
3373 	     device_property_read_bool(dev, "dma-can-stall")) ||
3374 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
3375 		master->stall_enabled = true;
3376 
3377 	if (dev_is_pci(dev)) {
3378 		unsigned int stu = __ffs(smmu->pgsize_bitmap);
3379 
3380 		pci_prepare_ats(to_pci_dev(dev), stu);
3381 	}
3382 
3383 	return &smmu->iommu;
3384 
3385 err_free_master:
3386 	kfree(master);
3387 	return ERR_PTR(ret);
3388 }
3389 
3390 static void arm_smmu_release_device(struct device *dev)
3391 {
3392 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3393 
3394 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
3395 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
3396 
3397 	/* Put the STE back to what arm_smmu_init_strtab() sets */
3398 	if (dev->iommu->require_direct)
3399 		arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
3400 	else
3401 		arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
3402 
3403 	arm_smmu_disable_pasid(master);
3404 	arm_smmu_remove_master(master);
3405 	if (arm_smmu_cdtab_allocated(&master->cd_table))
3406 		arm_smmu_free_cd_tables(master);
3407 	kfree(master);
3408 }
3409 
3410 static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
3411 					 unsigned long iova, size_t size,
3412 					 unsigned long flags,
3413 					 struct iommu_dirty_bitmap *dirty)
3414 {
3415 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3416 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
3417 
3418 	return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
3419 }
3420 
3421 static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
3422 				       bool enabled)
3423 {
3424 	/*
3425 	 * Always enabled and the dirty bitmap is cleared prior to
3426 	 * set_dirty_tracking().
3427 	 */
3428 	return 0;
3429 }
3430 
3431 static struct iommu_group *arm_smmu_device_group(struct device *dev)
3432 {
3433 	struct iommu_group *group;
3434 
3435 	/*
3436 	 * We don't support devices sharing stream IDs other than PCI RID
3437 	 * aliases, since the necessary ID-to-device lookup becomes rather
3438 	 * impractical given a potential sparse 32-bit stream ID space.
3439 	 */
3440 	if (dev_is_pci(dev))
3441 		group = pci_device_group(dev);
3442 	else
3443 		group = generic_device_group(dev);
3444 
3445 	return group;
3446 }
3447 
3448 static int arm_smmu_of_xlate(struct device *dev,
3449 			     const struct of_phandle_args *args)
3450 {
3451 	return iommu_fwspec_add_ids(dev, args->args, 1);
3452 }
3453 
3454 static void arm_smmu_get_resv_regions(struct device *dev,
3455 				      struct list_head *head)
3456 {
3457 	struct iommu_resv_region *region;
3458 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
3459 
3460 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
3461 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
3462 	if (!region)
3463 		return;
3464 
3465 	list_add_tail(&region->list, head);
3466 
3467 	iommu_dma_get_resv_regions(dev, head);
3468 }
3469 
3470 static int arm_smmu_dev_enable_feature(struct device *dev,
3471 				       enum iommu_dev_features feat)
3472 {
3473 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3474 
3475 	if (!master)
3476 		return -ENODEV;
3477 
3478 	switch (feat) {
3479 	case IOMMU_DEV_FEAT_IOPF:
3480 		if (!arm_smmu_master_iopf_supported(master))
3481 			return -EINVAL;
3482 		if (master->iopf_enabled)
3483 			return -EBUSY;
3484 		master->iopf_enabled = true;
3485 		return 0;
3486 	case IOMMU_DEV_FEAT_SVA:
3487 		if (!arm_smmu_master_sva_supported(master))
3488 			return -EINVAL;
3489 		if (arm_smmu_master_sva_enabled(master))
3490 			return -EBUSY;
3491 		return arm_smmu_master_enable_sva(master);
3492 	default:
3493 		return -EINVAL;
3494 	}
3495 }
3496 
3497 static int arm_smmu_dev_disable_feature(struct device *dev,
3498 					enum iommu_dev_features feat)
3499 {
3500 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3501 
3502 	if (!master)
3503 		return -EINVAL;
3504 
3505 	switch (feat) {
3506 	case IOMMU_DEV_FEAT_IOPF:
3507 		if (!master->iopf_enabled)
3508 			return -EINVAL;
3509 		if (master->sva_enabled)
3510 			return -EBUSY;
3511 		master->iopf_enabled = false;
3512 		return 0;
3513 	case IOMMU_DEV_FEAT_SVA:
3514 		if (!arm_smmu_master_sva_enabled(master))
3515 			return -EINVAL;
3516 		return arm_smmu_master_disable_sva(master);
3517 	default:
3518 		return -EINVAL;
3519 	}
3520 }
3521 
3522 /*
3523  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
3524  * PCIe link and save the data to memory by DMA. The hardware is restricted to
3525  * use identity mapping only.
3526  */
3527 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
3528 					 (pdev)->device == 0xa12e)
3529 
3530 static int arm_smmu_def_domain_type(struct device *dev)
3531 {
3532 	if (dev_is_pci(dev)) {
3533 		struct pci_dev *pdev = to_pci_dev(dev);
3534 
3535 		if (IS_HISI_PTT_DEVICE(pdev))
3536 			return IOMMU_DOMAIN_IDENTITY;
3537 	}
3538 
3539 	return 0;
3540 }
3541 
3542 static struct iommu_ops arm_smmu_ops = {
3543 	.identity_domain	= &arm_smmu_identity_domain,
3544 	.blocked_domain		= &arm_smmu_blocked_domain,
3545 	.capable		= arm_smmu_capable,
3546 	.hw_info		= arm_smmu_hw_info,
3547 	.domain_alloc_paging    = arm_smmu_domain_alloc_paging,
3548 	.domain_alloc_sva       = arm_smmu_sva_domain_alloc,
3549 	.domain_alloc_user	= arm_smmu_domain_alloc_user,
3550 	.probe_device		= arm_smmu_probe_device,
3551 	.release_device		= arm_smmu_release_device,
3552 	.device_group		= arm_smmu_device_group,
3553 	.of_xlate		= arm_smmu_of_xlate,
3554 	.get_resv_regions	= arm_smmu_get_resv_regions,
3555 	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
3556 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
3557 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
3558 	.page_response		= arm_smmu_page_response,
3559 	.def_domain_type	= arm_smmu_def_domain_type,
3560 	.viommu_alloc		= arm_vsmmu_alloc,
3561 	.user_pasid_table	= 1,
3562 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
3563 	.owner			= THIS_MODULE,
3564 	.default_domain_ops = &(const struct iommu_domain_ops) {
3565 		.attach_dev		= arm_smmu_attach_dev,
3566 		.enforce_cache_coherency = arm_smmu_enforce_cache_coherency,
3567 		.set_dev_pasid		= arm_smmu_s1_set_dev_pasid,
3568 		.map_pages		= arm_smmu_map_pages,
3569 		.unmap_pages		= arm_smmu_unmap_pages,
3570 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
3571 		.iotlb_sync		= arm_smmu_iotlb_sync,
3572 		.iova_to_phys		= arm_smmu_iova_to_phys,
3573 		.free			= arm_smmu_domain_free_paging,
3574 	}
3575 };
3576 
3577 static struct iommu_dirty_ops arm_smmu_dirty_ops = {
3578 	.read_and_clear_dirty	= arm_smmu_read_and_clear_dirty,
3579 	.set_dirty_tracking     = arm_smmu_set_dirty_tracking,
3580 };
3581 
3582 /* Probing and initialisation functions */
3583 int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3584 			    struct arm_smmu_queue *q, void __iomem *page,
3585 			    unsigned long prod_off, unsigned long cons_off,
3586 			    size_t dwords, const char *name)
3587 {
3588 	size_t qsz;
3589 
3590 	do {
3591 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3592 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3593 					      GFP_KERNEL);
3594 		if (q->base || qsz < PAGE_SIZE)
3595 			break;
3596 
3597 		q->llq.max_n_shift--;
3598 	} while (1);
3599 
3600 	if (!q->base) {
3601 		dev_err(smmu->dev,
3602 			"failed to allocate queue (0x%zx bytes) for %s\n",
3603 			qsz, name);
3604 		return -ENOMEM;
3605 	}
3606 
3607 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
3608 		dev_info(smmu->dev, "allocated %u entries for %s\n",
3609 			 1 << q->llq.max_n_shift, name);
3610 	}
3611 
3612 	q->prod_reg	= page + prod_off;
3613 	q->cons_reg	= page + cons_off;
3614 	q->ent_dwords	= dwords;
3615 
3616 	q->q_base  = Q_BASE_RWA;
3617 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3618 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3619 
3620 	q->llq.prod = q->llq.cons = 0;
3621 	return 0;
3622 }
3623 
3624 int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
3625 		       struct arm_smmu_cmdq *cmdq)
3626 {
3627 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3628 
3629 	atomic_set(&cmdq->owner_prod, 0);
3630 	atomic_set(&cmdq->lock, 0);
3631 
3632 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
3633 							      GFP_KERNEL);
3634 	if (!cmdq->valid_map)
3635 		return -ENOMEM;
3636 
3637 	return 0;
3638 }
3639 
3640 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3641 {
3642 	int ret;
3643 
3644 	/* cmdq */
3645 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
3646 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
3647 				      CMDQ_ENT_DWORDS, "cmdq");
3648 	if (ret)
3649 		return ret;
3650 
3651 	ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq);
3652 	if (ret)
3653 		return ret;
3654 
3655 	/* evtq */
3656 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
3657 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
3658 				      EVTQ_ENT_DWORDS, "evtq");
3659 	if (ret)
3660 		return ret;
3661 
3662 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
3663 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
3664 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
3665 		if (!smmu->evtq.iopf)
3666 			return -ENOMEM;
3667 	}
3668 
3669 	/* priq */
3670 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3671 		return 0;
3672 
3673 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
3674 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
3675 				       PRIQ_ENT_DWORDS, "priq");
3676 }
3677 
3678 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3679 {
3680 	u32 l1size;
3681 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3682 	unsigned int last_sid_idx =
3683 		arm_smmu_strtab_l1_idx((1ULL << smmu->sid_bits) - 1);
3684 
3685 	/* Calculate the L1 size, capped to the SIDSIZE. */
3686 	cfg->l2.num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES);
3687 	if (cfg->l2.num_l1_ents <= last_sid_idx)
3688 		dev_warn(smmu->dev,
3689 			 "2-level strtab only covers %u/%u bits of SID\n",
3690 			 ilog2(cfg->l2.num_l1_ents * STRTAB_NUM_L2_STES),
3691 			 smmu->sid_bits);
3692 
3693 	l1size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1);
3694 	cfg->l2.l1tab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->l2.l1_dma,
3695 					    GFP_KERNEL);
3696 	if (!cfg->l2.l1tab) {
3697 		dev_err(smmu->dev,
3698 			"failed to allocate l1 stream table (%u bytes)\n",
3699 			l1size);
3700 		return -ENOMEM;
3701 	}
3702 
3703 	cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents,
3704 				      sizeof(*cfg->l2.l2ptrs), GFP_KERNEL);
3705 	if (!cfg->l2.l2ptrs)
3706 		return -ENOMEM;
3707 
3708 	return 0;
3709 }
3710 
3711 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3712 {
3713 	u32 size;
3714 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3715 
3716 	size = (1 << smmu->sid_bits) * sizeof(struct arm_smmu_ste);
3717 	cfg->linear.table = dmam_alloc_coherent(smmu->dev, size,
3718 						&cfg->linear.ste_dma,
3719 						GFP_KERNEL);
3720 	if (!cfg->linear.table) {
3721 		dev_err(smmu->dev,
3722 			"failed to allocate linear stream table (%u bytes)\n",
3723 			size);
3724 		return -ENOMEM;
3725 	}
3726 	cfg->linear.num_ents = 1 << smmu->sid_bits;
3727 
3728 	arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents);
3729 	return 0;
3730 }
3731 
3732 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3733 {
3734 	int ret;
3735 
3736 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3737 		ret = arm_smmu_init_strtab_2lvl(smmu);
3738 	else
3739 		ret = arm_smmu_init_strtab_linear(smmu);
3740 	if (ret)
3741 		return ret;
3742 
3743 	ida_init(&smmu->vmid_map);
3744 
3745 	return 0;
3746 }
3747 
3748 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3749 {
3750 	int ret;
3751 
3752 	mutex_init(&smmu->streams_mutex);
3753 	smmu->streams = RB_ROOT;
3754 
3755 	ret = arm_smmu_init_queues(smmu);
3756 	if (ret)
3757 		return ret;
3758 
3759 	ret = arm_smmu_init_strtab(smmu);
3760 	if (ret)
3761 		return ret;
3762 
3763 	if (smmu->impl_ops && smmu->impl_ops->init_structures)
3764 		return smmu->impl_ops->init_structures(smmu);
3765 
3766 	return 0;
3767 }
3768 
3769 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3770 				   unsigned int reg_off, unsigned int ack_off)
3771 {
3772 	u32 reg;
3773 
3774 	writel_relaxed(val, smmu->base + reg_off);
3775 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3776 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3777 }
3778 
3779 /* GBPA is "special" */
3780 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3781 {
3782 	int ret;
3783 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3784 
3785 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3786 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3787 	if (ret)
3788 		return ret;
3789 
3790 	reg &= ~clr;
3791 	reg |= set;
3792 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3793 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3794 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3795 
3796 	if (ret)
3797 		dev_err(smmu->dev, "GBPA not responding to update\n");
3798 	return ret;
3799 }
3800 
3801 static void arm_smmu_free_msis(void *data)
3802 {
3803 	struct device *dev = data;
3804 
3805 	platform_device_msi_free_irqs_all(dev);
3806 }
3807 
3808 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3809 {
3810 	phys_addr_t doorbell;
3811 	struct device *dev = msi_desc_to_dev(desc);
3812 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3813 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3814 
3815 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3816 	doorbell &= MSI_CFG0_ADDR_MASK;
3817 
3818 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3819 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3820 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3821 }
3822 
3823 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3824 {
3825 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3826 	struct device *dev = smmu->dev;
3827 
3828 	/* Clear the MSI address regs */
3829 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3830 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3831 
3832 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3833 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3834 	else
3835 		nvec--;
3836 
3837 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3838 		return;
3839 
3840 	if (!dev->msi.domain) {
3841 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3842 		return;
3843 	}
3844 
3845 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3846 	ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3847 	if (ret) {
3848 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3849 		return;
3850 	}
3851 
3852 	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3853 	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3854 	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3855 
3856 	/* Add callback to free MSIs on teardown */
3857 	devm_add_action_or_reset(dev, arm_smmu_free_msis, dev);
3858 }
3859 
3860 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3861 {
3862 	int irq, ret;
3863 
3864 	arm_smmu_setup_msis(smmu);
3865 
3866 	/* Request interrupt lines */
3867 	irq = smmu->evtq.q.irq;
3868 	if (irq) {
3869 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3870 						arm_smmu_evtq_thread,
3871 						IRQF_ONESHOT,
3872 						"arm-smmu-v3-evtq", smmu);
3873 		if (ret < 0)
3874 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3875 	} else {
3876 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3877 	}
3878 
3879 	irq = smmu->gerr_irq;
3880 	if (irq) {
3881 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3882 				       0, "arm-smmu-v3-gerror", smmu);
3883 		if (ret < 0)
3884 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3885 	} else {
3886 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3887 	}
3888 
3889 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3890 		irq = smmu->priq.q.irq;
3891 		if (irq) {
3892 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3893 							arm_smmu_priq_thread,
3894 							IRQF_ONESHOT,
3895 							"arm-smmu-v3-priq",
3896 							smmu);
3897 			if (ret < 0)
3898 				dev_warn(smmu->dev,
3899 					 "failed to enable priq irq\n");
3900 		} else {
3901 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3902 		}
3903 	}
3904 }
3905 
3906 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3907 {
3908 	int ret, irq;
3909 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3910 
3911 	/* Disable IRQs first */
3912 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3913 				      ARM_SMMU_IRQ_CTRLACK);
3914 	if (ret) {
3915 		dev_err(smmu->dev, "failed to disable irqs\n");
3916 		return ret;
3917 	}
3918 
3919 	irq = smmu->combined_irq;
3920 	if (irq) {
3921 		/*
3922 		 * Cavium ThunderX2 implementation doesn't support unique irq
3923 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3924 		 */
3925 		ret = devm_request_threaded_irq(smmu->dev, irq,
3926 					arm_smmu_combined_irq_handler,
3927 					arm_smmu_combined_irq_thread,
3928 					IRQF_ONESHOT,
3929 					"arm-smmu-v3-combined-irq", smmu);
3930 		if (ret < 0)
3931 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3932 	} else
3933 		arm_smmu_setup_unique_irqs(smmu);
3934 
3935 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3936 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3937 
3938 	/* Enable interrupt generation on the SMMU */
3939 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3940 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3941 	if (ret)
3942 		dev_warn(smmu->dev, "failed to enable irqs\n");
3943 
3944 	return 0;
3945 }
3946 
3947 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3948 {
3949 	int ret;
3950 
3951 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3952 	if (ret)
3953 		dev_err(smmu->dev, "failed to clear cr0\n");
3954 
3955 	return ret;
3956 }
3957 
3958 static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
3959 {
3960 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3961 	dma_addr_t dma;
3962 	u32 reg;
3963 
3964 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
3965 		reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
3966 				 STRTAB_BASE_CFG_FMT_2LVL) |
3967 		      FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE,
3968 				 ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT) |
3969 		      FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3970 		dma = cfg->l2.l1_dma;
3971 	} else {
3972 		reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
3973 				 STRTAB_BASE_CFG_FMT_LINEAR) |
3974 		      FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3975 		dma = cfg->linear.ste_dma;
3976 	}
3977 	writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA,
3978 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3979 	writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3980 }
3981 
3982 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
3983 {
3984 	int ret;
3985 	u32 reg, enables;
3986 	struct arm_smmu_cmdq_ent cmd;
3987 
3988 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3989 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3990 	if (reg & CR0_SMMUEN) {
3991 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3992 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3993 	}
3994 
3995 	ret = arm_smmu_device_disable(smmu);
3996 	if (ret)
3997 		return ret;
3998 
3999 	/* CR1 (table and queue memory attributes) */
4000 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
4001 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
4002 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
4003 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
4004 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
4005 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
4006 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
4007 
4008 	/* CR2 (random crap) */
4009 	reg = CR2_PTM | CR2_RECINVSID;
4010 
4011 	if (smmu->features & ARM_SMMU_FEAT_E2H)
4012 		reg |= CR2_E2H;
4013 
4014 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
4015 
4016 	/* Stream table */
4017 	arm_smmu_write_strtab(smmu);
4018 
4019 	/* Command queue */
4020 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
4021 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
4022 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
4023 
4024 	enables = CR0_CMDQEN;
4025 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4026 				      ARM_SMMU_CR0ACK);
4027 	if (ret) {
4028 		dev_err(smmu->dev, "failed to enable command queue\n");
4029 		return ret;
4030 	}
4031 
4032 	/* Invalidate any cached configuration */
4033 	cmd.opcode = CMDQ_OP_CFGI_ALL;
4034 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
4035 
4036 	/* Invalidate any stale TLB entries */
4037 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
4038 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
4039 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
4040 	}
4041 
4042 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
4043 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
4044 
4045 	/* Event queue */
4046 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
4047 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
4048 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
4049 
4050 	enables |= CR0_EVTQEN;
4051 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4052 				      ARM_SMMU_CR0ACK);
4053 	if (ret) {
4054 		dev_err(smmu->dev, "failed to enable event queue\n");
4055 		return ret;
4056 	}
4057 
4058 	/* PRI queue */
4059 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
4060 		writeq_relaxed(smmu->priq.q.q_base,
4061 			       smmu->base + ARM_SMMU_PRIQ_BASE);
4062 		writel_relaxed(smmu->priq.q.llq.prod,
4063 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
4064 		writel_relaxed(smmu->priq.q.llq.cons,
4065 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
4066 
4067 		enables |= CR0_PRIQEN;
4068 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4069 					      ARM_SMMU_CR0ACK);
4070 		if (ret) {
4071 			dev_err(smmu->dev, "failed to enable PRI queue\n");
4072 			return ret;
4073 		}
4074 	}
4075 
4076 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
4077 		enables |= CR0_ATSCHK;
4078 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4079 					      ARM_SMMU_CR0ACK);
4080 		if (ret) {
4081 			dev_err(smmu->dev, "failed to enable ATS check\n");
4082 			return ret;
4083 		}
4084 	}
4085 
4086 	ret = arm_smmu_setup_irqs(smmu);
4087 	if (ret) {
4088 		dev_err(smmu->dev, "failed to setup irqs\n");
4089 		return ret;
4090 	}
4091 
4092 	if (is_kdump_kernel())
4093 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
4094 
4095 	/* Enable the SMMU interface */
4096 	enables |= CR0_SMMUEN;
4097 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4098 				      ARM_SMMU_CR0ACK);
4099 	if (ret) {
4100 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
4101 		return ret;
4102 	}
4103 
4104 	if (smmu->impl_ops && smmu->impl_ops->device_reset) {
4105 		ret = smmu->impl_ops->device_reset(smmu);
4106 		if (ret) {
4107 			dev_err(smmu->dev, "failed to reset impl\n");
4108 			return ret;
4109 		}
4110 	}
4111 
4112 	return 0;
4113 }
4114 
4115 #define IIDR_IMPLEMENTER_ARM		0x43b
4116 #define IIDR_PRODUCTID_ARM_MMU_600	0x483
4117 #define IIDR_PRODUCTID_ARM_MMU_700	0x487
4118 
4119 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
4120 {
4121 	u32 reg;
4122 	unsigned int implementer, productid, variant, revision;
4123 
4124 	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
4125 	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
4126 	productid = FIELD_GET(IIDR_PRODUCTID, reg);
4127 	variant = FIELD_GET(IIDR_VARIANT, reg);
4128 	revision = FIELD_GET(IIDR_REVISION, reg);
4129 
4130 	switch (implementer) {
4131 	case IIDR_IMPLEMENTER_ARM:
4132 		switch (productid) {
4133 		case IIDR_PRODUCTID_ARM_MMU_600:
4134 			/* Arm erratum 1076982 */
4135 			if (variant == 0 && revision <= 2)
4136 				smmu->features &= ~ARM_SMMU_FEAT_SEV;
4137 			/* Arm erratum 1209401 */
4138 			if (variant < 2)
4139 				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
4140 			break;
4141 		case IIDR_PRODUCTID_ARM_MMU_700:
4142 			/* Arm erratum 2812531 */
4143 			smmu->features &= ~ARM_SMMU_FEAT_BTM;
4144 			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
4145 			/* Arm errata 2268618, 2812531 */
4146 			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
4147 			break;
4148 		}
4149 		break;
4150 	}
4151 }
4152 
4153 static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg)
4154 {
4155 	u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD);
4156 	u32 hw_features = 0;
4157 
4158 	switch (FIELD_GET(IDR0_HTTU, reg)) {
4159 	case IDR0_HTTU_ACCESS_DIRTY:
4160 		hw_features |= ARM_SMMU_FEAT_HD;
4161 		fallthrough;
4162 	case IDR0_HTTU_ACCESS:
4163 		hw_features |= ARM_SMMU_FEAT_HA;
4164 	}
4165 
4166 	if (smmu->dev->of_node)
4167 		smmu->features |= hw_features;
4168 	else if (hw_features != fw_features)
4169 		/* ACPI IORT sets the HTTU bits */
4170 		dev_warn(smmu->dev,
4171 			 "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n",
4172 			  hw_features, fw_features);
4173 }
4174 
4175 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
4176 {
4177 	u32 reg;
4178 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
4179 
4180 	/* IDR0 */
4181 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
4182 
4183 	/* 2-level structures */
4184 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
4185 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
4186 
4187 	if (reg & IDR0_CD2L)
4188 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
4189 
4190 	/*
4191 	 * Translation table endianness.
4192 	 * We currently require the same endianness as the CPU, but this
4193 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
4194 	 */
4195 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
4196 	case IDR0_TTENDIAN_MIXED:
4197 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
4198 		break;
4199 #ifdef __BIG_ENDIAN
4200 	case IDR0_TTENDIAN_BE:
4201 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
4202 		break;
4203 #else
4204 	case IDR0_TTENDIAN_LE:
4205 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
4206 		break;
4207 #endif
4208 	default:
4209 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
4210 		return -ENXIO;
4211 	}
4212 
4213 	/* Boolean feature flags */
4214 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
4215 		smmu->features |= ARM_SMMU_FEAT_PRI;
4216 
4217 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
4218 		smmu->features |= ARM_SMMU_FEAT_ATS;
4219 
4220 	if (reg & IDR0_SEV)
4221 		smmu->features |= ARM_SMMU_FEAT_SEV;
4222 
4223 	if (reg & IDR0_MSI) {
4224 		smmu->features |= ARM_SMMU_FEAT_MSI;
4225 		if (coherent && !disable_msipolling)
4226 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
4227 	}
4228 
4229 	if (reg & IDR0_HYP) {
4230 		smmu->features |= ARM_SMMU_FEAT_HYP;
4231 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
4232 			smmu->features |= ARM_SMMU_FEAT_E2H;
4233 	}
4234 
4235 	arm_smmu_get_httu(smmu, reg);
4236 
4237 	/*
4238 	 * The coherency feature as set by FW is used in preference to the ID
4239 	 * register, but warn on mismatch.
4240 	 */
4241 	if (!!(reg & IDR0_COHACC) != coherent)
4242 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
4243 			 coherent ? "true" : "false");
4244 
4245 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
4246 	case IDR0_STALL_MODEL_FORCE:
4247 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
4248 		fallthrough;
4249 	case IDR0_STALL_MODEL_STALL:
4250 		smmu->features |= ARM_SMMU_FEAT_STALLS;
4251 	}
4252 
4253 	if (reg & IDR0_S1P)
4254 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
4255 
4256 	if (reg & IDR0_S2P)
4257 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
4258 
4259 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
4260 		dev_err(smmu->dev, "no translation support!\n");
4261 		return -ENXIO;
4262 	}
4263 
4264 	/* We only support the AArch64 table format at present */
4265 	switch (FIELD_GET(IDR0_TTF, reg)) {
4266 	case IDR0_TTF_AARCH32_64:
4267 		smmu->ias = 40;
4268 		fallthrough;
4269 	case IDR0_TTF_AARCH64:
4270 		break;
4271 	default:
4272 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
4273 		return -ENXIO;
4274 	}
4275 
4276 	/* ASID/VMID sizes */
4277 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
4278 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
4279 
4280 	/* IDR1 */
4281 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
4282 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
4283 		dev_err(smmu->dev, "embedded implementation not supported\n");
4284 		return -ENXIO;
4285 	}
4286 
4287 	if (reg & IDR1_ATTR_TYPES_OVR)
4288 		smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
4289 
4290 	/* Queue sizes, capped to ensure natural alignment */
4291 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
4292 					     FIELD_GET(IDR1_CMDQS, reg));
4293 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
4294 		/*
4295 		 * We don't support splitting up batches, so one batch of
4296 		 * commands plus an extra sync needs to fit inside the command
4297 		 * queue. There's also no way we can handle the weird alignment
4298 		 * restrictions on the base pointer for a unit-length queue.
4299 		 */
4300 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
4301 			CMDQ_BATCH_ENTRIES);
4302 		return -ENXIO;
4303 	}
4304 
4305 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
4306 					     FIELD_GET(IDR1_EVTQS, reg));
4307 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
4308 					     FIELD_GET(IDR1_PRIQS, reg));
4309 
4310 	/* SID/SSID sizes */
4311 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
4312 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
4313 	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
4314 
4315 	/*
4316 	 * If the SMMU supports fewer bits than would fill a single L2 stream
4317 	 * table, use a linear table instead.
4318 	 */
4319 	if (smmu->sid_bits <= STRTAB_SPLIT)
4320 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
4321 
4322 	/* IDR3 */
4323 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
4324 	if (FIELD_GET(IDR3_RIL, reg))
4325 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
4326 
4327 	/* IDR5 */
4328 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
4329 
4330 	/* Maximum number of outstanding stalls */
4331 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
4332 
4333 	/* Page sizes */
4334 	if (reg & IDR5_GRAN64K)
4335 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
4336 	if (reg & IDR5_GRAN16K)
4337 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
4338 	if (reg & IDR5_GRAN4K)
4339 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
4340 
4341 	/* Input address size */
4342 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
4343 		smmu->features |= ARM_SMMU_FEAT_VAX;
4344 
4345 	/* Output address size */
4346 	switch (FIELD_GET(IDR5_OAS, reg)) {
4347 	case IDR5_OAS_32_BIT:
4348 		smmu->oas = 32;
4349 		break;
4350 	case IDR5_OAS_36_BIT:
4351 		smmu->oas = 36;
4352 		break;
4353 	case IDR5_OAS_40_BIT:
4354 		smmu->oas = 40;
4355 		break;
4356 	case IDR5_OAS_42_BIT:
4357 		smmu->oas = 42;
4358 		break;
4359 	case IDR5_OAS_44_BIT:
4360 		smmu->oas = 44;
4361 		break;
4362 	case IDR5_OAS_52_BIT:
4363 		smmu->oas = 52;
4364 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
4365 		break;
4366 	default:
4367 		dev_info(smmu->dev,
4368 			"unknown output address size. Truncating to 48-bit\n");
4369 		fallthrough;
4370 	case IDR5_OAS_48_BIT:
4371 		smmu->oas = 48;
4372 	}
4373 
4374 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
4375 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
4376 	else
4377 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
4378 
4379 	/* Set the DMA mask for our table walker */
4380 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
4381 		dev_warn(smmu->dev,
4382 			 "failed to set DMA mask for table walker\n");
4383 
4384 	smmu->ias = max(smmu->ias, smmu->oas);
4385 
4386 	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
4387 	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
4388 		smmu->features |= ARM_SMMU_FEAT_NESTING;
4389 
4390 	arm_smmu_device_iidr_probe(smmu);
4391 
4392 	if (arm_smmu_sva_supported(smmu))
4393 		smmu->features |= ARM_SMMU_FEAT_SVA;
4394 
4395 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
4396 		 smmu->ias, smmu->oas, smmu->features);
4397 	return 0;
4398 }
4399 
4400 #ifdef CONFIG_ACPI
4401 #ifdef CONFIG_TEGRA241_CMDQV
4402 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
4403 						struct arm_smmu_device *smmu)
4404 {
4405 	const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
4406 	struct acpi_device *adev;
4407 
4408 	/* Look for an NVDA200C node whose _UID matches the SMMU node ID */
4409 	adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);
4410 	if (adev) {
4411 		/* Tegra241 CMDQV driver is responsible for put_device() */
4412 		smmu->impl_dev = &adev->dev;
4413 		smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
4414 		dev_info(smmu->dev, "found companion CMDQV device: %s\n",
4415 			 dev_name(smmu->impl_dev));
4416 	}
4417 	kfree(uid);
4418 }
4419 #else
4420 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
4421 						struct arm_smmu_device *smmu)
4422 {
4423 }
4424 #endif
4425 
4426 static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
4427 				      struct arm_smmu_device *smmu)
4428 {
4429 	struct acpi_iort_smmu_v3 *iort_smmu =
4430 		(struct acpi_iort_smmu_v3 *)node->node_data;
4431 
4432 	switch (iort_smmu->model) {
4433 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
4434 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
4435 		break;
4436 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
4437 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
4438 		break;
4439 	case ACPI_IORT_SMMU_V3_GENERIC:
4440 		/*
4441 		 * Tegra241 implementation stores its SMMU options and impl_dev
4442 		 * in DSDT. Thus, go through the ACPI tables unconditionally.
4443 		 */
4444 		acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu);
4445 		break;
4446 	}
4447 
4448 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
4449 	return 0;
4450 }
4451 
4452 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
4453 				      struct arm_smmu_device *smmu)
4454 {
4455 	struct acpi_iort_smmu_v3 *iort_smmu;
4456 	struct device *dev = smmu->dev;
4457 	struct acpi_iort_node *node;
4458 
4459 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
4460 
4461 	/* Retrieve SMMUv3 specific data */
4462 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
4463 
4464 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
4465 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
4466 
4467 	switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) {
4468 	case IDR0_HTTU_ACCESS_DIRTY:
4469 		smmu->features |= ARM_SMMU_FEAT_HD;
4470 		fallthrough;
4471 	case IDR0_HTTU_ACCESS:
4472 		smmu->features |= ARM_SMMU_FEAT_HA;
4473 	}
4474 
4475 	return acpi_smmu_iort_probe_model(node, smmu);
4476 }
4477 #else
4478 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
4479 					     struct arm_smmu_device *smmu)
4480 {
4481 	return -ENODEV;
4482 }
4483 #endif
4484 
4485 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
4486 				    struct arm_smmu_device *smmu)
4487 {
4488 	struct device *dev = &pdev->dev;
4489 	u32 cells;
4490 	int ret = -EINVAL;
4491 
4492 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
4493 		dev_err(dev, "missing #iommu-cells property\n");
4494 	else if (cells != 1)
4495 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
4496 	else
4497 		ret = 0;
4498 
4499 	parse_driver_options(smmu);
4500 
4501 	if (of_dma_is_coherent(dev->of_node))
4502 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
4503 
4504 	return ret;
4505 }
4506 
4507 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
4508 {
4509 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
4510 		return SZ_64K;
4511 	else
4512 		return SZ_128K;
4513 }
4514 
4515 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
4516 				      resource_size_t size)
4517 {
4518 	struct resource res = DEFINE_RES_MEM(start, size);
4519 
4520 	return devm_ioremap_resource(dev, &res);
4521 }
4522 
4523 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
4524 {
4525 	struct list_head rmr_list;
4526 	struct iommu_resv_region *e;
4527 
4528 	INIT_LIST_HEAD(&rmr_list);
4529 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4530 
4531 	list_for_each_entry(e, &rmr_list, list) {
4532 		struct iommu_iort_rmr_data *rmr;
4533 		int ret, i;
4534 
4535 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
4536 		for (i = 0; i < rmr->num_sids; i++) {
4537 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
4538 			if (ret) {
4539 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
4540 					rmr->sids[i]);
4541 				continue;
4542 			}
4543 
4544 			/*
4545 			 * STE table is not programmed to HW, see
4546 			 * arm_smmu_initial_bypass_stes()
4547 			 */
4548 			arm_smmu_make_bypass_ste(smmu,
4549 				arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
4550 		}
4551 	}
4552 
4553 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4554 }
4555 
4556 static void arm_smmu_impl_remove(void *data)
4557 {
4558 	struct arm_smmu_device *smmu = data;
4559 
4560 	if (smmu->impl_ops && smmu->impl_ops->device_remove)
4561 		smmu->impl_ops->device_remove(smmu);
4562 }
4563 
4564 /*
4565  * Probe all the compiled in implementations. Each one checks to see if it
4566  * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which
4567  * replaces the callers. Otherwise the original is returned or ERR_PTR.
4568  */
4569 static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
4570 {
4571 	struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
4572 	int ret;
4573 
4574 	if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
4575 		new_smmu = tegra241_cmdqv_probe(smmu);
4576 
4577 	if (new_smmu == ERR_PTR(-ENODEV))
4578 		return smmu;
4579 	if (IS_ERR(new_smmu))
4580 		return new_smmu;
4581 
4582 	ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
4583 				       new_smmu);
4584 	if (ret)
4585 		return ERR_PTR(ret);
4586 	return new_smmu;
4587 }
4588 
4589 static int arm_smmu_device_probe(struct platform_device *pdev)
4590 {
4591 	int irq, ret;
4592 	struct resource *res;
4593 	resource_size_t ioaddr;
4594 	struct arm_smmu_device *smmu;
4595 	struct device *dev = &pdev->dev;
4596 
4597 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4598 	if (!smmu)
4599 		return -ENOMEM;
4600 	smmu->dev = dev;
4601 
4602 	if (dev->of_node) {
4603 		ret = arm_smmu_device_dt_probe(pdev, smmu);
4604 	} else {
4605 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
4606 	}
4607 	if (ret)
4608 		return ret;
4609 
4610 	smmu = arm_smmu_impl_probe(smmu);
4611 	if (IS_ERR(smmu))
4612 		return PTR_ERR(smmu);
4613 
4614 	/* Base address */
4615 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4616 	if (!res)
4617 		return -EINVAL;
4618 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4619 		dev_err(dev, "MMIO region too small (%pr)\n", res);
4620 		return -EINVAL;
4621 	}
4622 	ioaddr = res->start;
4623 
4624 	/*
4625 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4626 	 * the PMCG registers which are reserved by the PMU driver.
4627 	 */
4628 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4629 	if (IS_ERR(smmu->base))
4630 		return PTR_ERR(smmu->base);
4631 
4632 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
4633 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4634 					       ARM_SMMU_REG_SZ);
4635 		if (IS_ERR(smmu->page1))
4636 			return PTR_ERR(smmu->page1);
4637 	} else {
4638 		smmu->page1 = smmu->base;
4639 	}
4640 
4641 	/* Interrupt lines */
4642 
4643 	irq = platform_get_irq_byname_optional(pdev, "combined");
4644 	if (irq > 0)
4645 		smmu->combined_irq = irq;
4646 	else {
4647 		irq = platform_get_irq_byname_optional(pdev, "eventq");
4648 		if (irq > 0)
4649 			smmu->evtq.q.irq = irq;
4650 
4651 		irq = platform_get_irq_byname_optional(pdev, "priq");
4652 		if (irq > 0)
4653 			smmu->priq.q.irq = irq;
4654 
4655 		irq = platform_get_irq_byname_optional(pdev, "gerror");
4656 		if (irq > 0)
4657 			smmu->gerr_irq = irq;
4658 	}
4659 	/* Probe the h/w */
4660 	ret = arm_smmu_device_hw_probe(smmu);
4661 	if (ret)
4662 		return ret;
4663 
4664 	/* Initialise in-memory data structures */
4665 	ret = arm_smmu_init_structures(smmu);
4666 	if (ret)
4667 		return ret;
4668 
4669 	/* Record our private device structure */
4670 	platform_set_drvdata(pdev, smmu);
4671 
4672 	/* Check for RMRs and install bypass STEs if any */
4673 	arm_smmu_rmr_install_bypass_ste(smmu);
4674 
4675 	/* Reset the device */
4676 	ret = arm_smmu_device_reset(smmu);
4677 	if (ret)
4678 		return ret;
4679 
4680 	/* And we're up. Go go go! */
4681 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4682 				     "smmu3.%pa", &ioaddr);
4683 	if (ret)
4684 		return ret;
4685 
4686 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
4687 	if (ret) {
4688 		dev_err(dev, "Failed to register iommu\n");
4689 		iommu_device_sysfs_remove(&smmu->iommu);
4690 		return ret;
4691 	}
4692 
4693 	return 0;
4694 }
4695 
4696 static void arm_smmu_device_remove(struct platform_device *pdev)
4697 {
4698 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4699 
4700 	iommu_device_unregister(&smmu->iommu);
4701 	iommu_device_sysfs_remove(&smmu->iommu);
4702 	arm_smmu_device_disable(smmu);
4703 	iopf_queue_free(smmu->evtq.iopf);
4704 	ida_destroy(&smmu->vmid_map);
4705 }
4706 
4707 static void arm_smmu_device_shutdown(struct platform_device *pdev)
4708 {
4709 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4710 
4711 	arm_smmu_device_disable(smmu);
4712 }
4713 
4714 static const struct of_device_id arm_smmu_of_match[] = {
4715 	{ .compatible = "arm,smmu-v3", },
4716 	{ },
4717 };
4718 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4719 
4720 static void arm_smmu_driver_unregister(struct platform_driver *drv)
4721 {
4722 	arm_smmu_sva_notifier_synchronize();
4723 	platform_driver_unregister(drv);
4724 }
4725 
4726 static struct platform_driver arm_smmu_driver = {
4727 	.driver	= {
4728 		.name			= "arm-smmu-v3",
4729 		.of_match_table		= arm_smmu_of_match,
4730 		.suppress_bind_attrs	= true,
4731 	},
4732 	.probe	= arm_smmu_device_probe,
4733 	.remove_new = arm_smmu_device_remove,
4734 	.shutdown = arm_smmu_device_shutdown,
4735 };
4736 module_driver(arm_smmu_driver, platform_driver_register,
4737 	      arm_smmu_driver_unregister);
4738 
4739 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4740 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4741 MODULE_ALIAS("platform:arm-smmu-v3");
4742 MODULE_LICENSE("GPL v2");
4743