xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 335bbdf01d25517ae832ac1807fd8323c1f4f3b9)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 
30 #include "arm-smmu-v3.h"
31 #include "../../dma-iommu.h"
32 #include "../../iommu-sva.h"
33 
34 static bool disable_bypass = true;
35 module_param(disable_bypass, bool, 0444);
36 MODULE_PARM_DESC(disable_bypass,
37 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
38 
39 static bool disable_msipolling;
40 module_param(disable_msipolling, bool, 0444);
41 MODULE_PARM_DESC(disable_msipolling,
42 	"Disable MSI-based polling for CMD_SYNC completion.");
43 
44 enum arm_smmu_msi_index {
45 	EVTQ_MSI_INDEX,
46 	GERROR_MSI_INDEX,
47 	PRIQ_MSI_INDEX,
48 	ARM_SMMU_MAX_MSIS,
49 };
50 
51 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
52 	[EVTQ_MSI_INDEX] = {
53 		ARM_SMMU_EVTQ_IRQ_CFG0,
54 		ARM_SMMU_EVTQ_IRQ_CFG1,
55 		ARM_SMMU_EVTQ_IRQ_CFG2,
56 	},
57 	[GERROR_MSI_INDEX] = {
58 		ARM_SMMU_GERROR_IRQ_CFG0,
59 		ARM_SMMU_GERROR_IRQ_CFG1,
60 		ARM_SMMU_GERROR_IRQ_CFG2,
61 	},
62 	[PRIQ_MSI_INDEX] = {
63 		ARM_SMMU_PRIQ_IRQ_CFG0,
64 		ARM_SMMU_PRIQ_IRQ_CFG1,
65 		ARM_SMMU_PRIQ_IRQ_CFG2,
66 	},
67 };
68 
69 struct arm_smmu_option_prop {
70 	u32 opt;
71 	const char *prop;
72 };
73 
74 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
75 DEFINE_MUTEX(arm_smmu_asid_lock);
76 
77 /*
78  * Special value used by SVA when a process dies, to quiesce a CD without
79  * disabling it.
80  */
81 struct arm_smmu_ctx_desc quiet_cd = { 0 };
82 
83 static struct arm_smmu_option_prop arm_smmu_options[] = {
84 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
85 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
86 	{ 0, NULL},
87 };
88 
89 static void parse_driver_options(struct arm_smmu_device *smmu)
90 {
91 	int i = 0;
92 
93 	do {
94 		if (of_property_read_bool(smmu->dev->of_node,
95 						arm_smmu_options[i].prop)) {
96 			smmu->options |= arm_smmu_options[i].opt;
97 			dev_notice(smmu->dev, "option %s\n",
98 				arm_smmu_options[i].prop);
99 		}
100 	} while (arm_smmu_options[++i].opt);
101 }
102 
103 /* Low-level queue manipulation functions */
104 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
105 {
106 	u32 space, prod, cons;
107 
108 	prod = Q_IDX(q, q->prod);
109 	cons = Q_IDX(q, q->cons);
110 
111 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
112 		space = (1 << q->max_n_shift) - (prod - cons);
113 	else
114 		space = cons - prod;
115 
116 	return space >= n;
117 }
118 
119 static bool queue_full(struct arm_smmu_ll_queue *q)
120 {
121 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
122 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
123 }
124 
125 static bool queue_empty(struct arm_smmu_ll_queue *q)
126 {
127 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
128 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
129 }
130 
131 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
132 {
133 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
134 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
135 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
137 }
138 
139 static void queue_sync_cons_out(struct arm_smmu_queue *q)
140 {
141 	/*
142 	 * Ensure that all CPU accesses (reads and writes) to the queue
143 	 * are complete before we update the cons pointer.
144 	 */
145 	__iomb();
146 	writel_relaxed(q->llq.cons, q->cons_reg);
147 }
148 
149 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
150 {
151 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
152 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
153 }
154 
155 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
156 {
157 	struct arm_smmu_ll_queue *llq = &q->llq;
158 
159 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
160 		return;
161 
162 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
163 		      Q_IDX(llq, llq->cons);
164 	queue_sync_cons_out(q);
165 }
166 
167 static int queue_sync_prod_in(struct arm_smmu_queue *q)
168 {
169 	u32 prod;
170 	int ret = 0;
171 
172 	/*
173 	 * We can't use the _relaxed() variant here, as we must prevent
174 	 * speculative reads of the queue before we have determined that
175 	 * prod has indeed moved.
176 	 */
177 	prod = readl(q->prod_reg);
178 
179 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
180 		ret = -EOVERFLOW;
181 
182 	q->llq.prod = prod;
183 	return ret;
184 }
185 
186 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
187 {
188 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
189 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
190 }
191 
192 static void queue_poll_init(struct arm_smmu_device *smmu,
193 			    struct arm_smmu_queue_poll *qp)
194 {
195 	qp->delay = 1;
196 	qp->spin_cnt = 0;
197 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
198 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
199 }
200 
201 static int queue_poll(struct arm_smmu_queue_poll *qp)
202 {
203 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
204 		return -ETIMEDOUT;
205 
206 	if (qp->wfe) {
207 		wfe();
208 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
209 		cpu_relax();
210 	} else {
211 		udelay(qp->delay);
212 		qp->delay *= 2;
213 		qp->spin_cnt = 0;
214 	}
215 
216 	return 0;
217 }
218 
219 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
220 {
221 	int i;
222 
223 	for (i = 0; i < n_dwords; ++i)
224 		*dst++ = cpu_to_le64(*src++);
225 }
226 
227 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
228 {
229 	int i;
230 
231 	for (i = 0; i < n_dwords; ++i)
232 		*dst++ = le64_to_cpu(*src++);
233 }
234 
235 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
236 {
237 	if (queue_empty(&q->llq))
238 		return -EAGAIN;
239 
240 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
241 	queue_inc_cons(&q->llq);
242 	queue_sync_cons_out(q);
243 	return 0;
244 }
245 
246 /* High-level queue accessors */
247 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
248 {
249 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
250 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
251 
252 	switch (ent->opcode) {
253 	case CMDQ_OP_TLBI_EL2_ALL:
254 	case CMDQ_OP_TLBI_NSNH_ALL:
255 		break;
256 	case CMDQ_OP_PREFETCH_CFG:
257 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
258 		break;
259 	case CMDQ_OP_CFGI_CD:
260 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261 		fallthrough;
262 	case CMDQ_OP_CFGI_STE:
263 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265 		break;
266 	case CMDQ_OP_CFGI_CD_ALL:
267 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268 		break;
269 	case CMDQ_OP_CFGI_ALL:
270 		/* Cover the entire SID range */
271 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272 		break;
273 	case CMDQ_OP_TLBI_NH_VA:
274 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
275 		fallthrough;
276 	case CMDQ_OP_TLBI_EL2_VA:
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
279 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
282 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
283 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
284 		break;
285 	case CMDQ_OP_TLBI_S2_IPA:
286 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
287 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
290 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
291 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
292 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
293 		break;
294 	case CMDQ_OP_TLBI_NH_ASID:
295 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
296 		fallthrough;
297 	case CMDQ_OP_TLBI_S12_VMALL:
298 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
299 		break;
300 	case CMDQ_OP_TLBI_EL2_ASID:
301 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
302 		break;
303 	case CMDQ_OP_ATC_INV:
304 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
305 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
306 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
307 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
308 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
309 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
310 		break;
311 	case CMDQ_OP_PRI_RESP:
312 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
313 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
314 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
315 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
316 		switch (ent->pri.resp) {
317 		case PRI_RESP_DENY:
318 		case PRI_RESP_FAIL:
319 		case PRI_RESP_SUCC:
320 			break;
321 		default:
322 			return -EINVAL;
323 		}
324 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
325 		break;
326 	case CMDQ_OP_RESUME:
327 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
328 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
329 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
330 		break;
331 	case CMDQ_OP_CMD_SYNC:
332 		if (ent->sync.msiaddr) {
333 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
334 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
335 		} else {
336 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
337 		}
338 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
339 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
340 		break;
341 	default:
342 		return -ENOENT;
343 	}
344 
345 	return 0;
346 }
347 
348 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
349 {
350 	return &smmu->cmdq;
351 }
352 
353 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
354 					 struct arm_smmu_queue *q, u32 prod)
355 {
356 	struct arm_smmu_cmdq_ent ent = {
357 		.opcode = CMDQ_OP_CMD_SYNC,
358 	};
359 
360 	/*
361 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
362 	 * payload, so the write will zero the entire command on that platform.
363 	 */
364 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
365 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
366 				   q->ent_dwords * 8;
367 	}
368 
369 	arm_smmu_cmdq_build_cmd(cmd, &ent);
370 }
371 
372 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
373 				     struct arm_smmu_queue *q)
374 {
375 	static const char * const cerror_str[] = {
376 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
377 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
378 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
379 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
380 	};
381 
382 	int i;
383 	u64 cmd[CMDQ_ENT_DWORDS];
384 	u32 cons = readl_relaxed(q->cons_reg);
385 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
386 	struct arm_smmu_cmdq_ent cmd_sync = {
387 		.opcode = CMDQ_OP_CMD_SYNC,
388 	};
389 
390 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
391 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
392 
393 	switch (idx) {
394 	case CMDQ_ERR_CERROR_ABT_IDX:
395 		dev_err(smmu->dev, "retrying command fetch\n");
396 		return;
397 	case CMDQ_ERR_CERROR_NONE_IDX:
398 		return;
399 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
400 		/*
401 		 * ATC Invalidation Completion timeout. CONS is still pointing
402 		 * at the CMD_SYNC. Attempt to complete other pending commands
403 		 * by repeating the CMD_SYNC, though we might well end up back
404 		 * here since the ATC invalidation may still be pending.
405 		 */
406 		return;
407 	case CMDQ_ERR_CERROR_ILL_IDX:
408 	default:
409 		break;
410 	}
411 
412 	/*
413 	 * We may have concurrent producers, so we need to be careful
414 	 * not to touch any of the shadow cmdq state.
415 	 */
416 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
417 	dev_err(smmu->dev, "skipping command in error state:\n");
418 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
419 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
420 
421 	/* Convert the erroneous command into a CMD_SYNC */
422 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
423 
424 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
425 }
426 
427 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
428 {
429 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
430 }
431 
432 /*
433  * Command queue locking.
434  * This is a form of bastardised rwlock with the following major changes:
435  *
436  * - The only LOCK routines are exclusive_trylock() and shared_lock().
437  *   Neither have barrier semantics, and instead provide only a control
438  *   dependency.
439  *
440  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
441  *   fails if the caller appears to be the last lock holder (yes, this is
442  *   racy). All successful UNLOCK routines have RELEASE semantics.
443  */
444 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
445 {
446 	int val;
447 
448 	/*
449 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
450 	 * lock counter. When held in exclusive state, the lock counter is set
451 	 * to INT_MIN so these increments won't hurt as the value will remain
452 	 * negative.
453 	 */
454 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
455 		return;
456 
457 	do {
458 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
459 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
460 }
461 
462 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
463 {
464 	(void)atomic_dec_return_release(&cmdq->lock);
465 }
466 
467 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
468 {
469 	if (atomic_read(&cmdq->lock) == 1)
470 		return false;
471 
472 	arm_smmu_cmdq_shared_unlock(cmdq);
473 	return true;
474 }
475 
476 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
477 ({									\
478 	bool __ret;							\
479 	local_irq_save(flags);						\
480 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
481 	if (!__ret)							\
482 		local_irq_restore(flags);				\
483 	__ret;								\
484 })
485 
486 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
487 ({									\
488 	atomic_set_release(&cmdq->lock, 0);				\
489 	local_irq_restore(flags);					\
490 })
491 
492 
493 /*
494  * Command queue insertion.
495  * This is made fiddly by our attempts to achieve some sort of scalability
496  * since there is one queue shared amongst all of the CPUs in the system.  If
497  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
498  * then you'll *love* this monstrosity.
499  *
500  * The basic idea is to split the queue up into ranges of commands that are
501  * owned by a given CPU; the owner may not have written all of the commands
502  * itself, but is responsible for advancing the hardware prod pointer when
503  * the time comes. The algorithm is roughly:
504  *
505  * 	1. Allocate some space in the queue. At this point we also discover
506  *	   whether the head of the queue is currently owned by another CPU,
507  *	   or whether we are the owner.
508  *
509  *	2. Write our commands into our allocated slots in the queue.
510  *
511  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
512  *
513  *	4. If we are an owner:
514  *		a. Wait for the previous owner to finish.
515  *		b. Mark the queue head as unowned, which tells us the range
516  *		   that we are responsible for publishing.
517  *		c. Wait for all commands in our owned range to become valid.
518  *		d. Advance the hardware prod pointer.
519  *		e. Tell the next owner we've finished.
520  *
521  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
522  *	   owner), then we need to stick around until it has completed:
523  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
524  *		   to clear the first 4 bytes.
525  *		b. Otherwise, we spin waiting for the hardware cons pointer to
526  *		   advance past our command.
527  *
528  * The devil is in the details, particularly the use of locking for handling
529  * SYNC completion and freeing up space in the queue before we think that it is
530  * full.
531  */
532 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
533 					       u32 sprod, u32 eprod, bool set)
534 {
535 	u32 swidx, sbidx, ewidx, ebidx;
536 	struct arm_smmu_ll_queue llq = {
537 		.max_n_shift	= cmdq->q.llq.max_n_shift,
538 		.prod		= sprod,
539 	};
540 
541 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
542 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
543 
544 	while (llq.prod != eprod) {
545 		unsigned long mask;
546 		atomic_long_t *ptr;
547 		u32 limit = BITS_PER_LONG;
548 
549 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
550 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
551 
552 		ptr = &cmdq->valid_map[swidx];
553 
554 		if ((swidx == ewidx) && (sbidx < ebidx))
555 			limit = ebidx;
556 
557 		mask = GENMASK(limit - 1, sbidx);
558 
559 		/*
560 		 * The valid bit is the inverse of the wrap bit. This means
561 		 * that a zero-initialised queue is invalid and, after marking
562 		 * all entries as valid, they become invalid again when we
563 		 * wrap.
564 		 */
565 		if (set) {
566 			atomic_long_xor(mask, ptr);
567 		} else { /* Poll */
568 			unsigned long valid;
569 
570 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
571 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
572 		}
573 
574 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
575 	}
576 }
577 
578 /* Mark all entries in the range [sprod, eprod) as valid */
579 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
580 					u32 sprod, u32 eprod)
581 {
582 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
583 }
584 
585 /* Wait for all entries in the range [sprod, eprod) to become valid */
586 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
587 					 u32 sprod, u32 eprod)
588 {
589 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
590 }
591 
592 /* Wait for the command queue to become non-full */
593 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
594 					     struct arm_smmu_ll_queue *llq)
595 {
596 	unsigned long flags;
597 	struct arm_smmu_queue_poll qp;
598 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
599 	int ret = 0;
600 
601 	/*
602 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
603 	 * that fails, spin until somebody else updates it for us.
604 	 */
605 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
606 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
607 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
608 		llq->val = READ_ONCE(cmdq->q.llq.val);
609 		return 0;
610 	}
611 
612 	queue_poll_init(smmu, &qp);
613 	do {
614 		llq->val = READ_ONCE(cmdq->q.llq.val);
615 		if (!queue_full(llq))
616 			break;
617 
618 		ret = queue_poll(&qp);
619 	} while (!ret);
620 
621 	return ret;
622 }
623 
624 /*
625  * Wait until the SMMU signals a CMD_SYNC completion MSI.
626  * Must be called with the cmdq lock held in some capacity.
627  */
628 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
629 					  struct arm_smmu_ll_queue *llq)
630 {
631 	int ret = 0;
632 	struct arm_smmu_queue_poll qp;
633 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
634 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
635 
636 	queue_poll_init(smmu, &qp);
637 
638 	/*
639 	 * The MSI won't generate an event, since it's being written back
640 	 * into the command queue.
641 	 */
642 	qp.wfe = false;
643 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
644 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
645 	return ret;
646 }
647 
648 /*
649  * Wait until the SMMU cons index passes llq->prod.
650  * Must be called with the cmdq lock held in some capacity.
651  */
652 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
653 					       struct arm_smmu_ll_queue *llq)
654 {
655 	struct arm_smmu_queue_poll qp;
656 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
657 	u32 prod = llq->prod;
658 	int ret = 0;
659 
660 	queue_poll_init(smmu, &qp);
661 	llq->val = READ_ONCE(cmdq->q.llq.val);
662 	do {
663 		if (queue_consumed(llq, prod))
664 			break;
665 
666 		ret = queue_poll(&qp);
667 
668 		/*
669 		 * This needs to be a readl() so that our subsequent call
670 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
671 		 *
672 		 * Specifically, we need to ensure that we observe all
673 		 * shared_lock()s by other CMD_SYNCs that share our owner,
674 		 * so that a failing call to tryunlock() means that we're
675 		 * the last one out and therefore we can safely advance
676 		 * cmdq->q.llq.cons. Roughly speaking:
677 		 *
678 		 * CPU 0		CPU1			CPU2 (us)
679 		 *
680 		 * if (sync)
681 		 * 	shared_lock();
682 		 *
683 		 * dma_wmb();
684 		 * set_valid_map();
685 		 *
686 		 * 			if (owner) {
687 		 *				poll_valid_map();
688 		 *				<control dependency>
689 		 *				writel(prod_reg);
690 		 *
691 		 *						readl(cons_reg);
692 		 *						tryunlock();
693 		 *
694 		 * Requires us to see CPU 0's shared_lock() acquisition.
695 		 */
696 		llq->cons = readl(cmdq->q.cons_reg);
697 	} while (!ret);
698 
699 	return ret;
700 }
701 
702 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
703 					 struct arm_smmu_ll_queue *llq)
704 {
705 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
706 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
707 
708 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
709 }
710 
711 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
712 					u32 prod, int n)
713 {
714 	int i;
715 	struct arm_smmu_ll_queue llq = {
716 		.max_n_shift	= cmdq->q.llq.max_n_shift,
717 		.prod		= prod,
718 	};
719 
720 	for (i = 0; i < n; ++i) {
721 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
722 
723 		prod = queue_inc_prod_n(&llq, i);
724 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
725 	}
726 }
727 
728 /*
729  * This is the actual insertion function, and provides the following
730  * ordering guarantees to callers:
731  *
732  * - There is a dma_wmb() before publishing any commands to the queue.
733  *   This can be relied upon to order prior writes to data structures
734  *   in memory (such as a CD or an STE) before the command.
735  *
736  * - On completion of a CMD_SYNC, there is a control dependency.
737  *   This can be relied upon to order subsequent writes to memory (e.g.
738  *   freeing an IOVA) after completion of the CMD_SYNC.
739  *
740  * - Command insertion is totally ordered, so if two CPUs each race to
741  *   insert their own list of commands then all of the commands from one
742  *   CPU will appear before any of the commands from the other CPU.
743  */
744 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
745 				       u64 *cmds, int n, bool sync)
746 {
747 	u64 cmd_sync[CMDQ_ENT_DWORDS];
748 	u32 prod;
749 	unsigned long flags;
750 	bool owner;
751 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
752 	struct arm_smmu_ll_queue llq, head;
753 	int ret = 0;
754 
755 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
756 
757 	/* 1. Allocate some space in the queue */
758 	local_irq_save(flags);
759 	llq.val = READ_ONCE(cmdq->q.llq.val);
760 	do {
761 		u64 old;
762 
763 		while (!queue_has_space(&llq, n + sync)) {
764 			local_irq_restore(flags);
765 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
766 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
767 			local_irq_save(flags);
768 		}
769 
770 		head.cons = llq.cons;
771 		head.prod = queue_inc_prod_n(&llq, n + sync) |
772 					     CMDQ_PROD_OWNED_FLAG;
773 
774 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
775 		if (old == llq.val)
776 			break;
777 
778 		llq.val = old;
779 	} while (1);
780 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
781 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
782 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
783 
784 	/*
785 	 * 2. Write our commands into the queue
786 	 * Dependency ordering from the cmpxchg() loop above.
787 	 */
788 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
789 	if (sync) {
790 		prod = queue_inc_prod_n(&llq, n);
791 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
792 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
793 
794 		/*
795 		 * In order to determine completion of our CMD_SYNC, we must
796 		 * ensure that the queue can't wrap twice without us noticing.
797 		 * We achieve that by taking the cmdq lock as shared before
798 		 * marking our slot as valid.
799 		 */
800 		arm_smmu_cmdq_shared_lock(cmdq);
801 	}
802 
803 	/* 3. Mark our slots as valid, ensuring commands are visible first */
804 	dma_wmb();
805 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
806 
807 	/* 4. If we are the owner, take control of the SMMU hardware */
808 	if (owner) {
809 		/* a. Wait for previous owner to finish */
810 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
811 
812 		/* b. Stop gathering work by clearing the owned flag */
813 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
814 						   &cmdq->q.llq.atomic.prod);
815 		prod &= ~CMDQ_PROD_OWNED_FLAG;
816 
817 		/*
818 		 * c. Wait for any gathered work to be written to the queue.
819 		 * Note that we read our own entries so that we have the control
820 		 * dependency required by (d).
821 		 */
822 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
823 
824 		/*
825 		 * d. Advance the hardware prod pointer
826 		 * Control dependency ordering from the entries becoming valid.
827 		 */
828 		writel_relaxed(prod, cmdq->q.prod_reg);
829 
830 		/*
831 		 * e. Tell the next owner we're done
832 		 * Make sure we've updated the hardware first, so that we don't
833 		 * race to update prod and potentially move it backwards.
834 		 */
835 		atomic_set_release(&cmdq->owner_prod, prod);
836 	}
837 
838 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
839 	if (sync) {
840 		llq.prod = queue_inc_prod_n(&llq, n);
841 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
842 		if (ret) {
843 			dev_err_ratelimited(smmu->dev,
844 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
845 					    llq.prod,
846 					    readl_relaxed(cmdq->q.prod_reg),
847 					    readl_relaxed(cmdq->q.cons_reg));
848 		}
849 
850 		/*
851 		 * Try to unlock the cmdq lock. This will fail if we're the last
852 		 * reader, in which case we can safely update cmdq->q.llq.cons
853 		 */
854 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
855 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
856 			arm_smmu_cmdq_shared_unlock(cmdq);
857 		}
858 	}
859 
860 	local_irq_restore(flags);
861 	return ret;
862 }
863 
864 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
865 				     struct arm_smmu_cmdq_ent *ent,
866 				     bool sync)
867 {
868 	u64 cmd[CMDQ_ENT_DWORDS];
869 
870 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
871 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
872 			 ent->opcode);
873 		return -EINVAL;
874 	}
875 
876 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
877 }
878 
879 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
880 				   struct arm_smmu_cmdq_ent *ent)
881 {
882 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
883 }
884 
885 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
886 					     struct arm_smmu_cmdq_ent *ent)
887 {
888 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
889 }
890 
891 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
892 				    struct arm_smmu_cmdq_batch *cmds,
893 				    struct arm_smmu_cmdq_ent *cmd)
894 {
895 	int index;
896 
897 	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
898 	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
899 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
900 		cmds->num = 0;
901 	}
902 
903 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
904 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
905 		cmds->num = 0;
906 	}
907 
908 	index = cmds->num * CMDQ_ENT_DWORDS;
909 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
910 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
911 			 cmd->opcode);
912 		return;
913 	}
914 
915 	cmds->num++;
916 }
917 
918 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
919 				      struct arm_smmu_cmdq_batch *cmds)
920 {
921 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
922 }
923 
924 static int arm_smmu_page_response(struct device *dev,
925 				  struct iommu_fault_event *unused,
926 				  struct iommu_page_response *resp)
927 {
928 	struct arm_smmu_cmdq_ent cmd = {0};
929 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
930 	int sid = master->streams[0].id;
931 
932 	if (master->stall_enabled) {
933 		cmd.opcode		= CMDQ_OP_RESUME;
934 		cmd.resume.sid		= sid;
935 		cmd.resume.stag		= resp->grpid;
936 		switch (resp->code) {
937 		case IOMMU_PAGE_RESP_INVALID:
938 		case IOMMU_PAGE_RESP_FAILURE:
939 			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
940 			break;
941 		case IOMMU_PAGE_RESP_SUCCESS:
942 			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
943 			break;
944 		default:
945 			return -EINVAL;
946 		}
947 	} else {
948 		return -ENODEV;
949 	}
950 
951 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
952 	/*
953 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
954 	 * RESUME consumption guarantees that the stalled transaction will be
955 	 * terminated... at some point in the future. PRI_RESP is fire and
956 	 * forget.
957 	 */
958 
959 	return 0;
960 }
961 
962 /* Context descriptor manipulation functions */
963 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
964 {
965 	struct arm_smmu_cmdq_ent cmd = {
966 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
967 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
968 		.tlbi.asid = asid,
969 	};
970 
971 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
972 }
973 
974 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
975 			     int ssid, bool leaf)
976 {
977 	size_t i;
978 	struct arm_smmu_cmdq_batch cmds;
979 	struct arm_smmu_device *smmu = master->smmu;
980 	struct arm_smmu_cmdq_ent cmd = {
981 		.opcode	= CMDQ_OP_CFGI_CD,
982 		.cfgi	= {
983 			.ssid	= ssid,
984 			.leaf	= leaf,
985 		},
986 	};
987 
988 	cmds.num = 0;
989 	for (i = 0; i < master->num_streams; i++) {
990 		cmd.cfgi.sid = master->streams[i].id;
991 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
992 	}
993 
994 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
995 }
996 
997 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
998 					struct arm_smmu_l1_ctx_desc *l1_desc)
999 {
1000 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1001 
1002 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1003 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1004 	if (!l1_desc->l2ptr) {
1005 		dev_warn(smmu->dev,
1006 			 "failed to allocate context descriptor table\n");
1007 		return -ENOMEM;
1008 	}
1009 	return 0;
1010 }
1011 
1012 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1013 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1014 {
1015 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1016 		  CTXDESC_L1_DESC_V;
1017 
1018 	/* See comment in arm_smmu_write_ctx_desc() */
1019 	WRITE_ONCE(*dst, cpu_to_le64(val));
1020 }
1021 
1022 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
1023 {
1024 	__le64 *l1ptr;
1025 	unsigned int idx;
1026 	struct arm_smmu_l1_ctx_desc *l1_desc;
1027 	struct arm_smmu_device *smmu = master->smmu;
1028 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1029 
1030 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1031 		return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
1032 
1033 	idx = ssid >> CTXDESC_SPLIT;
1034 	l1_desc = &cd_table->l1_desc[idx];
1035 	if (!l1_desc->l2ptr) {
1036 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1037 			return NULL;
1038 
1039 		l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1040 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1041 		/* An invalid L1CD can be cached */
1042 		arm_smmu_sync_cd(master, ssid, false);
1043 	}
1044 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1045 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1046 }
1047 
1048 int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
1049 			    struct arm_smmu_ctx_desc *cd)
1050 {
1051 	/*
1052 	 * This function handles the following cases:
1053 	 *
1054 	 * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
1055 	 * (2) Install a secondary CD, for SID+SSID traffic.
1056 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1057 	 *     CD, then invalidate the old entry and mappings.
1058 	 * (4) Quiesce the context without clearing the valid bit. Disable
1059 	 *     translation, and ignore any translation fault.
1060 	 * (5) Remove a secondary CD.
1061 	 */
1062 	u64 val;
1063 	bool cd_live;
1064 	__le64 *cdptr;
1065 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1066 
1067 	if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
1068 		return -E2BIG;
1069 
1070 	cdptr = arm_smmu_get_cd_ptr(master, ssid);
1071 	if (!cdptr)
1072 		return -ENOMEM;
1073 
1074 	val = le64_to_cpu(cdptr[0]);
1075 	cd_live = !!(val & CTXDESC_CD_0_V);
1076 
1077 	if (!cd) { /* (5) */
1078 		val = 0;
1079 	} else if (cd == &quiet_cd) { /* (4) */
1080 		val |= CTXDESC_CD_0_TCR_EPD0;
1081 	} else if (cd_live) { /* (3) */
1082 		val &= ~CTXDESC_CD_0_ASID;
1083 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1084 		/*
1085 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1086 		 * this substream's traffic
1087 		 */
1088 	} else { /* (1) and (2) */
1089 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1090 		cdptr[2] = 0;
1091 		cdptr[3] = cpu_to_le64(cd->mair);
1092 
1093 		/*
1094 		 * STE may be live, and the SMMU might read dwords of this CD in any
1095 		 * order. Ensure that it observes valid values before reading
1096 		 * V=1.
1097 		 */
1098 		arm_smmu_sync_cd(master, ssid, true);
1099 
1100 		val = cd->tcr |
1101 #ifdef __BIG_ENDIAN
1102 			CTXDESC_CD_0_ENDI |
1103 #endif
1104 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1105 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1106 			CTXDESC_CD_0_AA64 |
1107 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1108 			CTXDESC_CD_0_V;
1109 
1110 		if (cd_table->stall_enabled)
1111 			val |= CTXDESC_CD_0_S;
1112 	}
1113 
1114 	/*
1115 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1116 	 * "Configuration structures and configuration invalidation completion"
1117 	 *
1118 	 *   The size of single-copy atomic reads made by the SMMU is
1119 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1120 	 *   field within an aligned 64-bit span of a structure can be altered
1121 	 *   without first making the structure invalid.
1122 	 */
1123 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1124 	arm_smmu_sync_cd(master, ssid, true);
1125 	return 0;
1126 }
1127 
1128 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1129 {
1130 	int ret;
1131 	size_t l1size;
1132 	size_t max_contexts;
1133 	struct arm_smmu_device *smmu = master->smmu;
1134 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1135 
1136 	cd_table->stall_enabled = master->stall_enabled;
1137 	cd_table->s1cdmax = master->ssid_bits;
1138 	max_contexts = 1 << cd_table->s1cdmax;
1139 
1140 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1141 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1142 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1143 		cd_table->num_l1_ents = max_contexts;
1144 
1145 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1146 	} else {
1147 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1148 		cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
1149 						  CTXDESC_L2_ENTRIES);
1150 
1151 		cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
1152 					      sizeof(*cd_table->l1_desc),
1153 					      GFP_KERNEL);
1154 		if (!cd_table->l1_desc)
1155 			return -ENOMEM;
1156 
1157 		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1158 	}
1159 
1160 	cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
1161 					   GFP_KERNEL);
1162 	if (!cd_table->cdtab) {
1163 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1164 		ret = -ENOMEM;
1165 		goto err_free_l1;
1166 	}
1167 
1168 	return 0;
1169 
1170 err_free_l1:
1171 	if (cd_table->l1_desc) {
1172 		devm_kfree(smmu->dev, cd_table->l1_desc);
1173 		cd_table->l1_desc = NULL;
1174 	}
1175 	return ret;
1176 }
1177 
1178 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1179 {
1180 	int i;
1181 	size_t size, l1size;
1182 	struct arm_smmu_device *smmu = master->smmu;
1183 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1184 
1185 	if (cd_table->l1_desc) {
1186 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1187 
1188 		for (i = 0; i < cd_table->num_l1_ents; i++) {
1189 			if (!cd_table->l1_desc[i].l2ptr)
1190 				continue;
1191 
1192 			dmam_free_coherent(smmu->dev, size,
1193 					   cd_table->l1_desc[i].l2ptr,
1194 					   cd_table->l1_desc[i].l2ptr_dma);
1195 		}
1196 		devm_kfree(smmu->dev, cd_table->l1_desc);
1197 		cd_table->l1_desc = NULL;
1198 
1199 		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1200 	} else {
1201 		l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1202 	}
1203 
1204 	dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
1205 	cd_table->cdtab_dma = 0;
1206 	cd_table->cdtab = NULL;
1207 }
1208 
1209 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1210 {
1211 	bool free;
1212 	struct arm_smmu_ctx_desc *old_cd;
1213 
1214 	if (!cd->asid)
1215 		return false;
1216 
1217 	free = refcount_dec_and_test(&cd->refs);
1218 	if (free) {
1219 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1220 		WARN_ON(old_cd != cd);
1221 	}
1222 	return free;
1223 }
1224 
1225 /* Stream table manipulation functions */
1226 static void
1227 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1228 {
1229 	u64 val = 0;
1230 
1231 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1232 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1233 
1234 	/* See comment in arm_smmu_write_ctx_desc() */
1235 	WRITE_ONCE(*dst, cpu_to_le64(val));
1236 }
1237 
1238 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1239 {
1240 	struct arm_smmu_cmdq_ent cmd = {
1241 		.opcode	= CMDQ_OP_CFGI_STE,
1242 		.cfgi	= {
1243 			.sid	= sid,
1244 			.leaf	= true,
1245 		},
1246 	};
1247 
1248 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1249 }
1250 
1251 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1252 				      __le64 *dst)
1253 {
1254 	/*
1255 	 * This is hideously complicated, but we only really care about
1256 	 * three cases at the moment:
1257 	 *
1258 	 * 1. Invalid (all zero) -> bypass/fault (init)
1259 	 * 2. Bypass/fault -> translation/bypass (attach)
1260 	 * 3. Translation/bypass -> bypass/fault (detach)
1261 	 *
1262 	 * Given that we can't update the STE atomically and the SMMU
1263 	 * doesn't read the thing in a defined order, that leaves us
1264 	 * with the following maintenance requirements:
1265 	 *
1266 	 * 1. Update Config, return (init time STEs aren't live)
1267 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1268 	 * 3. Update Config, sync
1269 	 */
1270 	u64 val = le64_to_cpu(dst[0]);
1271 	bool ste_live = false;
1272 	struct arm_smmu_device *smmu = NULL;
1273 	struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
1274 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1275 	struct arm_smmu_domain *smmu_domain = NULL;
1276 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1277 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1278 		.prefetch	= {
1279 			.sid	= sid,
1280 		},
1281 	};
1282 
1283 	if (master) {
1284 		smmu_domain = master->domain;
1285 		smmu = master->smmu;
1286 	}
1287 
1288 	if (smmu_domain) {
1289 		switch (smmu_domain->stage) {
1290 		case ARM_SMMU_DOMAIN_S1:
1291 			cd_table = &master->cd_table;
1292 			break;
1293 		case ARM_SMMU_DOMAIN_S2:
1294 		case ARM_SMMU_DOMAIN_NESTED:
1295 			s2_cfg = &smmu_domain->s2_cfg;
1296 			break;
1297 		default:
1298 			break;
1299 		}
1300 	}
1301 
1302 	if (val & STRTAB_STE_0_V) {
1303 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1304 		case STRTAB_STE_0_CFG_BYPASS:
1305 			break;
1306 		case STRTAB_STE_0_CFG_S1_TRANS:
1307 		case STRTAB_STE_0_CFG_S2_TRANS:
1308 			ste_live = true;
1309 			break;
1310 		case STRTAB_STE_0_CFG_ABORT:
1311 			BUG_ON(!disable_bypass);
1312 			break;
1313 		default:
1314 			BUG(); /* STE corruption */
1315 		}
1316 	}
1317 
1318 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1319 	val = STRTAB_STE_0_V;
1320 
1321 	/* Bypass/fault */
1322 	if (!smmu_domain || !(cd_table || s2_cfg)) {
1323 		if (!smmu_domain && disable_bypass)
1324 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1325 		else
1326 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1327 
1328 		dst[0] = cpu_to_le64(val);
1329 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1330 						STRTAB_STE_1_SHCFG_INCOMING));
1331 		dst[2] = 0; /* Nuke the VMID */
1332 		/*
1333 		 * The SMMU can perform negative caching, so we must sync
1334 		 * the STE regardless of whether the old value was live.
1335 		 */
1336 		if (smmu)
1337 			arm_smmu_sync_ste_for_sid(smmu, sid);
1338 		return;
1339 	}
1340 
1341 	if (cd_table) {
1342 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1343 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1344 
1345 		BUG_ON(ste_live);
1346 		dst[1] = cpu_to_le64(
1347 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1348 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1349 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1350 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1351 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1352 
1353 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1354 		    !master->stall_enabled)
1355 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1356 
1357 		val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1358 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1359 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
1360 			FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
1361 	}
1362 
1363 	if (s2_cfg) {
1364 		BUG_ON(ste_live);
1365 		dst[2] = cpu_to_le64(
1366 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1367 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1368 #ifdef __BIG_ENDIAN
1369 			 STRTAB_STE_2_S2ENDI |
1370 #endif
1371 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1372 			 STRTAB_STE_2_S2R);
1373 
1374 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1375 
1376 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1377 	}
1378 
1379 	if (master->ats_enabled)
1380 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1381 						 STRTAB_STE_1_EATS_TRANS));
1382 
1383 	arm_smmu_sync_ste_for_sid(smmu, sid);
1384 	/* See comment in arm_smmu_write_ctx_desc() */
1385 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1386 	arm_smmu_sync_ste_for_sid(smmu, sid);
1387 
1388 	/* It's likely that we'll want to use the new STE soon */
1389 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1390 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1391 }
1392 
1393 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
1394 {
1395 	unsigned int i;
1396 	u64 val = STRTAB_STE_0_V;
1397 
1398 	if (disable_bypass && !force)
1399 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1400 	else
1401 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1402 
1403 	for (i = 0; i < nent; ++i) {
1404 		strtab[0] = cpu_to_le64(val);
1405 		strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1406 						   STRTAB_STE_1_SHCFG_INCOMING));
1407 		strtab[2] = 0;
1408 		strtab += STRTAB_STE_DWORDS;
1409 	}
1410 }
1411 
1412 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1413 {
1414 	size_t size;
1415 	void *strtab;
1416 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1417 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1418 
1419 	if (desc->l2ptr)
1420 		return 0;
1421 
1422 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1423 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1424 
1425 	desc->span = STRTAB_SPLIT + 1;
1426 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1427 					  GFP_KERNEL);
1428 	if (!desc->l2ptr) {
1429 		dev_err(smmu->dev,
1430 			"failed to allocate l2 stream table for SID %u\n",
1431 			sid);
1432 		return -ENOMEM;
1433 	}
1434 
1435 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1436 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1437 	return 0;
1438 }
1439 
1440 static struct arm_smmu_master *
1441 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1442 {
1443 	struct rb_node *node;
1444 	struct arm_smmu_stream *stream;
1445 
1446 	lockdep_assert_held(&smmu->streams_mutex);
1447 
1448 	node = smmu->streams.rb_node;
1449 	while (node) {
1450 		stream = rb_entry(node, struct arm_smmu_stream, node);
1451 		if (stream->id < sid)
1452 			node = node->rb_right;
1453 		else if (stream->id > sid)
1454 			node = node->rb_left;
1455 		else
1456 			return stream->master;
1457 	}
1458 
1459 	return NULL;
1460 }
1461 
1462 /* IRQ and event handlers */
1463 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1464 {
1465 	int ret;
1466 	u32 reason;
1467 	u32 perm = 0;
1468 	struct arm_smmu_master *master;
1469 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1470 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1471 	struct iommu_fault_event fault_evt = { };
1472 	struct iommu_fault *flt = &fault_evt.fault;
1473 
1474 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1475 	case EVT_ID_TRANSLATION_FAULT:
1476 		reason = IOMMU_FAULT_REASON_PTE_FETCH;
1477 		break;
1478 	case EVT_ID_ADDR_SIZE_FAULT:
1479 		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1480 		break;
1481 	case EVT_ID_ACCESS_FAULT:
1482 		reason = IOMMU_FAULT_REASON_ACCESS;
1483 		break;
1484 	case EVT_ID_PERMISSION_FAULT:
1485 		reason = IOMMU_FAULT_REASON_PERMISSION;
1486 		break;
1487 	default:
1488 		return -EOPNOTSUPP;
1489 	}
1490 
1491 	/* Stage-2 is always pinned at the moment */
1492 	if (evt[1] & EVTQ_1_S2)
1493 		return -EFAULT;
1494 
1495 	if (evt[1] & EVTQ_1_RnW)
1496 		perm |= IOMMU_FAULT_PERM_READ;
1497 	else
1498 		perm |= IOMMU_FAULT_PERM_WRITE;
1499 
1500 	if (evt[1] & EVTQ_1_InD)
1501 		perm |= IOMMU_FAULT_PERM_EXEC;
1502 
1503 	if (evt[1] & EVTQ_1_PnU)
1504 		perm |= IOMMU_FAULT_PERM_PRIV;
1505 
1506 	if (evt[1] & EVTQ_1_STALL) {
1507 		flt->type = IOMMU_FAULT_PAGE_REQ;
1508 		flt->prm = (struct iommu_fault_page_request) {
1509 			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1510 			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1511 			.perm = perm,
1512 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1513 		};
1514 
1515 		if (ssid_valid) {
1516 			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1517 			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1518 		}
1519 	} else {
1520 		flt->type = IOMMU_FAULT_DMA_UNRECOV;
1521 		flt->event = (struct iommu_fault_unrecoverable) {
1522 			.reason = reason,
1523 			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1524 			.perm = perm,
1525 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1526 		};
1527 
1528 		if (ssid_valid) {
1529 			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1530 			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1531 		}
1532 	}
1533 
1534 	mutex_lock(&smmu->streams_mutex);
1535 	master = arm_smmu_find_master(smmu, sid);
1536 	if (!master) {
1537 		ret = -EINVAL;
1538 		goto out_unlock;
1539 	}
1540 
1541 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1542 	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1543 		/* Nobody cared, abort the access */
1544 		struct iommu_page_response resp = {
1545 			.pasid		= flt->prm.pasid,
1546 			.grpid		= flt->prm.grpid,
1547 			.code		= IOMMU_PAGE_RESP_FAILURE,
1548 		};
1549 		arm_smmu_page_response(master->dev, &fault_evt, &resp);
1550 	}
1551 
1552 out_unlock:
1553 	mutex_unlock(&smmu->streams_mutex);
1554 	return ret;
1555 }
1556 
1557 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1558 {
1559 	int i, ret;
1560 	struct arm_smmu_device *smmu = dev;
1561 	struct arm_smmu_queue *q = &smmu->evtq.q;
1562 	struct arm_smmu_ll_queue *llq = &q->llq;
1563 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1564 				      DEFAULT_RATELIMIT_BURST);
1565 	u64 evt[EVTQ_ENT_DWORDS];
1566 
1567 	do {
1568 		while (!queue_remove_raw(q, evt)) {
1569 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1570 
1571 			ret = arm_smmu_handle_evt(smmu, evt);
1572 			if (!ret || !__ratelimit(&rs))
1573 				continue;
1574 
1575 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1576 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1577 				dev_info(smmu->dev, "\t0x%016llx\n",
1578 					 (unsigned long long)evt[i]);
1579 
1580 			cond_resched();
1581 		}
1582 
1583 		/*
1584 		 * Not much we can do on overflow, so scream and pretend we're
1585 		 * trying harder.
1586 		 */
1587 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1588 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1589 	} while (!queue_empty(llq));
1590 
1591 	/* Sync our overflow flag, as we believe we're up to speed */
1592 	queue_sync_cons_ovf(q);
1593 	return IRQ_HANDLED;
1594 }
1595 
1596 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1597 {
1598 	u32 sid, ssid;
1599 	u16 grpid;
1600 	bool ssv, last;
1601 
1602 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1603 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1604 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1605 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1606 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1607 
1608 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1609 	dev_info(smmu->dev,
1610 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1611 		 sid, ssid, grpid, last ? "L" : "",
1612 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1613 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1614 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1615 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1616 		 evt[1] & PRIQ_1_ADDR_MASK);
1617 
1618 	if (last) {
1619 		struct arm_smmu_cmdq_ent cmd = {
1620 			.opcode			= CMDQ_OP_PRI_RESP,
1621 			.substream_valid	= ssv,
1622 			.pri			= {
1623 				.sid	= sid,
1624 				.ssid	= ssid,
1625 				.grpid	= grpid,
1626 				.resp	= PRI_RESP_DENY,
1627 			},
1628 		};
1629 
1630 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1631 	}
1632 }
1633 
1634 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1635 {
1636 	struct arm_smmu_device *smmu = dev;
1637 	struct arm_smmu_queue *q = &smmu->priq.q;
1638 	struct arm_smmu_ll_queue *llq = &q->llq;
1639 	u64 evt[PRIQ_ENT_DWORDS];
1640 
1641 	do {
1642 		while (!queue_remove_raw(q, evt))
1643 			arm_smmu_handle_ppr(smmu, evt);
1644 
1645 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1646 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1647 	} while (!queue_empty(llq));
1648 
1649 	/* Sync our overflow flag, as we believe we're up to speed */
1650 	queue_sync_cons_ovf(q);
1651 	return IRQ_HANDLED;
1652 }
1653 
1654 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1655 
1656 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1657 {
1658 	u32 gerror, gerrorn, active;
1659 	struct arm_smmu_device *smmu = dev;
1660 
1661 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1662 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1663 
1664 	active = gerror ^ gerrorn;
1665 	if (!(active & GERROR_ERR_MASK))
1666 		return IRQ_NONE; /* No errors pending */
1667 
1668 	dev_warn(smmu->dev,
1669 		 "unexpected global error reported (0x%08x), this could be serious\n",
1670 		 active);
1671 
1672 	if (active & GERROR_SFM_ERR) {
1673 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1674 		arm_smmu_device_disable(smmu);
1675 	}
1676 
1677 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1678 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1679 
1680 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1681 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1682 
1683 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1684 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1685 
1686 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1687 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1688 
1689 	if (active & GERROR_PRIQ_ABT_ERR)
1690 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1691 
1692 	if (active & GERROR_EVTQ_ABT_ERR)
1693 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1694 
1695 	if (active & GERROR_CMDQ_ERR)
1696 		arm_smmu_cmdq_skip_err(smmu);
1697 
1698 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1699 	return IRQ_HANDLED;
1700 }
1701 
1702 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1703 {
1704 	struct arm_smmu_device *smmu = dev;
1705 
1706 	arm_smmu_evtq_thread(irq, dev);
1707 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1708 		arm_smmu_priq_thread(irq, dev);
1709 
1710 	return IRQ_HANDLED;
1711 }
1712 
1713 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1714 {
1715 	arm_smmu_gerror_handler(irq, dev);
1716 	return IRQ_WAKE_THREAD;
1717 }
1718 
1719 static void
1720 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1721 			struct arm_smmu_cmdq_ent *cmd)
1722 {
1723 	size_t log2_span;
1724 	size_t span_mask;
1725 	/* ATC invalidates are always on 4096-bytes pages */
1726 	size_t inval_grain_shift = 12;
1727 	unsigned long page_start, page_end;
1728 
1729 	/*
1730 	 * ATS and PASID:
1731 	 *
1732 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1733 	 * prefix. In that case all ATC entries within the address range are
1734 	 * invalidated, including those that were requested with a PASID! There
1735 	 * is no way to invalidate only entries without PASID.
1736 	 *
1737 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1738 	 * traffic), translation requests without PASID create ATC entries
1739 	 * without PASID, which must be invalidated with substream_valid clear.
1740 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1741 	 * ATC entries within the address range.
1742 	 */
1743 	*cmd = (struct arm_smmu_cmdq_ent) {
1744 		.opcode			= CMDQ_OP_ATC_INV,
1745 		.substream_valid	= (ssid != IOMMU_NO_PASID),
1746 		.atc.ssid		= ssid,
1747 	};
1748 
1749 	if (!size) {
1750 		cmd->atc.size = ATC_INV_SIZE_ALL;
1751 		return;
1752 	}
1753 
1754 	page_start	= iova >> inval_grain_shift;
1755 	page_end	= (iova + size - 1) >> inval_grain_shift;
1756 
1757 	/*
1758 	 * In an ATS Invalidate Request, the address must be aligned on the
1759 	 * range size, which must be a power of two number of page sizes. We
1760 	 * thus have to choose between grossly over-invalidating the region, or
1761 	 * splitting the invalidation into multiple commands. For simplicity
1762 	 * we'll go with the first solution, but should refine it in the future
1763 	 * if multiple commands are shown to be more efficient.
1764 	 *
1765 	 * Find the smallest power of two that covers the range. The most
1766 	 * significant differing bit between the start and end addresses,
1767 	 * fls(start ^ end), indicates the required span. For example:
1768 	 *
1769 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1770 	 *		x = 0b1000 ^ 0b1011 = 0b11
1771 	 *		span = 1 << fls(x) = 4
1772 	 *
1773 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1774 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1775 	 *		span = 1 << fls(x) = 16
1776 	 */
1777 	log2_span	= fls_long(page_start ^ page_end);
1778 	span_mask	= (1ULL << log2_span) - 1;
1779 
1780 	page_start	&= ~span_mask;
1781 
1782 	cmd->atc.addr	= page_start << inval_grain_shift;
1783 	cmd->atc.size	= log2_span;
1784 }
1785 
1786 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1787 {
1788 	int i;
1789 	struct arm_smmu_cmdq_ent cmd;
1790 	struct arm_smmu_cmdq_batch cmds;
1791 
1792 	arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
1793 
1794 	cmds.num = 0;
1795 	for (i = 0; i < master->num_streams; i++) {
1796 		cmd.atc.sid = master->streams[i].id;
1797 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1798 	}
1799 
1800 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1801 }
1802 
1803 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1804 			    unsigned long iova, size_t size)
1805 {
1806 	int i;
1807 	unsigned long flags;
1808 	struct arm_smmu_cmdq_ent cmd;
1809 	struct arm_smmu_master *master;
1810 	struct arm_smmu_cmdq_batch cmds;
1811 
1812 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1813 		return 0;
1814 
1815 	/*
1816 	 * Ensure that we've completed prior invalidation of the main TLBs
1817 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1818 	 * arm_smmu_enable_ats():
1819 	 *
1820 	 *	// unmap()			// arm_smmu_enable_ats()
1821 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1822 	 *	smp_mb();			[...]
1823 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1824 	 *
1825 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1826 	 * ATS was enabled at the PCI device before completion of the TLBI.
1827 	 */
1828 	smp_mb();
1829 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1830 		return 0;
1831 
1832 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1833 
1834 	cmds.num = 0;
1835 
1836 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1837 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1838 		if (!master->ats_enabled)
1839 			continue;
1840 
1841 		for (i = 0; i < master->num_streams; i++) {
1842 			cmd.atc.sid = master->streams[i].id;
1843 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1844 		}
1845 	}
1846 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1847 
1848 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1849 }
1850 
1851 /* IO_PGTABLE API */
1852 static void arm_smmu_tlb_inv_context(void *cookie)
1853 {
1854 	struct arm_smmu_domain *smmu_domain = cookie;
1855 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1856 	struct arm_smmu_cmdq_ent cmd;
1857 
1858 	/*
1859 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1860 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1861 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1862 	 * insertion to guarantee those are observed before the TLBI. Do be
1863 	 * careful, 007.
1864 	 */
1865 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1866 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
1867 	} else {
1868 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1869 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1870 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1871 	}
1872 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
1873 }
1874 
1875 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1876 				     unsigned long iova, size_t size,
1877 				     size_t granule,
1878 				     struct arm_smmu_domain *smmu_domain)
1879 {
1880 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1881 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1882 	size_t inv_range = granule;
1883 	struct arm_smmu_cmdq_batch cmds;
1884 
1885 	if (!size)
1886 		return;
1887 
1888 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1889 		/* Get the leaf page size */
1890 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1891 
1892 		num_pages = size >> tg;
1893 
1894 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1895 		cmd->tlbi.tg = (tg - 10) / 2;
1896 
1897 		/*
1898 		 * Determine what level the granule is at. For non-leaf, both
1899 		 * io-pgtable and SVA pass a nominal last-level granule because
1900 		 * they don't know what level(s) actually apply, so ignore that
1901 		 * and leave TTL=0. However for various errata reasons we still
1902 		 * want to use a range command, so avoid the SVA corner case
1903 		 * where both scale and num could be 0 as well.
1904 		 */
1905 		if (cmd->tlbi.leaf)
1906 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1907 		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
1908 			num_pages++;
1909 	}
1910 
1911 	cmds.num = 0;
1912 
1913 	while (iova < end) {
1914 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1915 			/*
1916 			 * On each iteration of the loop, the range is 5 bits
1917 			 * worth of the aligned size remaining.
1918 			 * The range in pages is:
1919 			 *
1920 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1921 			 */
1922 			unsigned long scale, num;
1923 
1924 			/* Determine the power of 2 multiple number of pages */
1925 			scale = __ffs(num_pages);
1926 			cmd->tlbi.scale = scale;
1927 
1928 			/* Determine how many chunks of 2^scale size we have */
1929 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1930 			cmd->tlbi.num = num - 1;
1931 
1932 			/* range is num * 2^scale * pgsize */
1933 			inv_range = num << (scale + tg);
1934 
1935 			/* Clear out the lower order bits for the next iteration */
1936 			num_pages -= num << scale;
1937 		}
1938 
1939 		cmd->tlbi.addr = iova;
1940 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1941 		iova += inv_range;
1942 	}
1943 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1944 }
1945 
1946 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1947 					  size_t granule, bool leaf,
1948 					  struct arm_smmu_domain *smmu_domain)
1949 {
1950 	struct arm_smmu_cmdq_ent cmd = {
1951 		.tlbi = {
1952 			.leaf	= leaf,
1953 		},
1954 	};
1955 
1956 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1957 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1958 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1959 		cmd.tlbi.asid	= smmu_domain->cd.asid;
1960 	} else {
1961 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1962 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1963 	}
1964 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1965 
1966 	/*
1967 	 * Unfortunately, this can't be leaf-only since we may have
1968 	 * zapped an entire table.
1969 	 */
1970 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
1971 }
1972 
1973 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1974 				 size_t granule, bool leaf,
1975 				 struct arm_smmu_domain *smmu_domain)
1976 {
1977 	struct arm_smmu_cmdq_ent cmd = {
1978 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1979 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1980 		.tlbi = {
1981 			.asid	= asid,
1982 			.leaf	= leaf,
1983 		},
1984 	};
1985 
1986 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1987 }
1988 
1989 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1990 					 unsigned long iova, size_t granule,
1991 					 void *cookie)
1992 {
1993 	struct arm_smmu_domain *smmu_domain = cookie;
1994 	struct iommu_domain *domain = &smmu_domain->domain;
1995 
1996 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1997 }
1998 
1999 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2000 				  size_t granule, void *cookie)
2001 {
2002 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2003 }
2004 
2005 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2006 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2007 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2008 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2009 };
2010 
2011 /* IOMMU API */
2012 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2013 {
2014 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2015 
2016 	switch (cap) {
2017 	case IOMMU_CAP_CACHE_COHERENCY:
2018 		/* Assume that a coherent TCU implies coherent TBUs */
2019 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2020 	case IOMMU_CAP_NOEXEC:
2021 	case IOMMU_CAP_DEFERRED_FLUSH:
2022 		return true;
2023 	default:
2024 		return false;
2025 	}
2026 }
2027 
2028 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2029 {
2030 	struct arm_smmu_domain *smmu_domain;
2031 
2032 	if (type == IOMMU_DOMAIN_SVA)
2033 		return arm_smmu_sva_domain_alloc();
2034 
2035 	if (type != IOMMU_DOMAIN_UNMANAGED &&
2036 	    type != IOMMU_DOMAIN_DMA &&
2037 	    type != IOMMU_DOMAIN_IDENTITY)
2038 		return NULL;
2039 
2040 	/*
2041 	 * Allocate the domain and initialise some of its data structures.
2042 	 * We can't really do anything meaningful until we've added a
2043 	 * master.
2044 	 */
2045 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2046 	if (!smmu_domain)
2047 		return NULL;
2048 
2049 	mutex_init(&smmu_domain->init_mutex);
2050 	INIT_LIST_HEAD(&smmu_domain->devices);
2051 	spin_lock_init(&smmu_domain->devices_lock);
2052 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2053 
2054 	return &smmu_domain->domain;
2055 }
2056 
2057 static void arm_smmu_domain_free(struct iommu_domain *domain)
2058 {
2059 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2060 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2061 
2062 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2063 
2064 	/* Free the ASID or VMID */
2065 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2066 		/* Prevent SVA from touching the CD while we're freeing it */
2067 		mutex_lock(&arm_smmu_asid_lock);
2068 		arm_smmu_free_asid(&smmu_domain->cd);
2069 		mutex_unlock(&arm_smmu_asid_lock);
2070 	} else {
2071 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2072 		if (cfg->vmid)
2073 			ida_free(&smmu->vmid_map, cfg->vmid);
2074 	}
2075 
2076 	kfree(smmu_domain);
2077 }
2078 
2079 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2080 				       struct io_pgtable_cfg *pgtbl_cfg)
2081 {
2082 	int ret;
2083 	u32 asid;
2084 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2085 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2086 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2087 
2088 	refcount_set(&cd->refs, 1);
2089 
2090 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2091 	mutex_lock(&arm_smmu_asid_lock);
2092 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
2093 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2094 	if (ret)
2095 		goto out_unlock;
2096 
2097 	cd->asid	= (u16)asid;
2098 	cd->ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2099 	cd->tcr		= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2100 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2101 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2102 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2103 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2104 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2105 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2106 	cd->mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2107 
2108 	mutex_unlock(&arm_smmu_asid_lock);
2109 	return 0;
2110 
2111 out_unlock:
2112 	mutex_unlock(&arm_smmu_asid_lock);
2113 	return ret;
2114 }
2115 
2116 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2117 				       struct io_pgtable_cfg *pgtbl_cfg)
2118 {
2119 	int vmid;
2120 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2121 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2122 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2123 
2124 	/* Reserve VMID 0 for stage-2 bypass STEs */
2125 	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2126 			       GFP_KERNEL);
2127 	if (vmid < 0)
2128 		return vmid;
2129 
2130 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2131 	cfg->vmid	= (u16)vmid;
2132 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2133 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2134 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2135 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2136 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2137 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2138 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2139 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2140 	return 0;
2141 }
2142 
2143 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
2144 {
2145 	int ret;
2146 	unsigned long ias, oas;
2147 	enum io_pgtable_fmt fmt;
2148 	struct io_pgtable_cfg pgtbl_cfg;
2149 	struct io_pgtable_ops *pgtbl_ops;
2150 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2151 				 struct io_pgtable_cfg *);
2152 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2153 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2154 
2155 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2156 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2157 		return 0;
2158 	}
2159 
2160 	/* Restrict the stage to what we can actually support */
2161 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2162 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2163 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2164 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2165 
2166 	switch (smmu_domain->stage) {
2167 	case ARM_SMMU_DOMAIN_S1:
2168 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2169 		ias = min_t(unsigned long, ias, VA_BITS);
2170 		oas = smmu->ias;
2171 		fmt = ARM_64_LPAE_S1;
2172 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2173 		break;
2174 	case ARM_SMMU_DOMAIN_NESTED:
2175 	case ARM_SMMU_DOMAIN_S2:
2176 		ias = smmu->ias;
2177 		oas = smmu->oas;
2178 		fmt = ARM_64_LPAE_S2;
2179 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2180 		break;
2181 	default:
2182 		return -EINVAL;
2183 	}
2184 
2185 	pgtbl_cfg = (struct io_pgtable_cfg) {
2186 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2187 		.ias		= ias,
2188 		.oas		= oas,
2189 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2190 		.tlb		= &arm_smmu_flush_ops,
2191 		.iommu_dev	= smmu->dev,
2192 	};
2193 
2194 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2195 	if (!pgtbl_ops)
2196 		return -ENOMEM;
2197 
2198 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2199 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2200 	domain->geometry.force_aperture = true;
2201 
2202 	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
2203 	if (ret < 0) {
2204 		free_io_pgtable_ops(pgtbl_ops);
2205 		return ret;
2206 	}
2207 
2208 	smmu_domain->pgtbl_ops = pgtbl_ops;
2209 	return 0;
2210 }
2211 
2212 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2213 {
2214 	__le64 *step;
2215 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2216 
2217 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2218 		struct arm_smmu_strtab_l1_desc *l1_desc;
2219 		int idx;
2220 
2221 		/* Two-level walk */
2222 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2223 		l1_desc = &cfg->l1_desc[idx];
2224 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2225 		step = &l1_desc->l2ptr[idx];
2226 	} else {
2227 		/* Simple linear lookup */
2228 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2229 	}
2230 
2231 	return step;
2232 }
2233 
2234 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2235 {
2236 	int i, j;
2237 	struct arm_smmu_device *smmu = master->smmu;
2238 
2239 	for (i = 0; i < master->num_streams; ++i) {
2240 		u32 sid = master->streams[i].id;
2241 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2242 
2243 		/* Bridged PCI devices may end up with duplicated IDs */
2244 		for (j = 0; j < i; j++)
2245 			if (master->streams[j].id == sid)
2246 				break;
2247 		if (j < i)
2248 			continue;
2249 
2250 		arm_smmu_write_strtab_ent(master, sid, step);
2251 	}
2252 }
2253 
2254 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2255 {
2256 	struct device *dev = master->dev;
2257 	struct arm_smmu_device *smmu = master->smmu;
2258 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2259 
2260 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2261 		return false;
2262 
2263 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2264 		return false;
2265 
2266 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2267 }
2268 
2269 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2270 {
2271 	size_t stu;
2272 	struct pci_dev *pdev;
2273 	struct arm_smmu_device *smmu = master->smmu;
2274 	struct arm_smmu_domain *smmu_domain = master->domain;
2275 
2276 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2277 	if (!master->ats_enabled)
2278 		return;
2279 
2280 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2281 	stu = __ffs(smmu->pgsize_bitmap);
2282 	pdev = to_pci_dev(master->dev);
2283 
2284 	atomic_inc(&smmu_domain->nr_ats_masters);
2285 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2286 	if (pci_enable_ats(pdev, stu))
2287 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2288 }
2289 
2290 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2291 {
2292 	struct arm_smmu_domain *smmu_domain = master->domain;
2293 
2294 	if (!master->ats_enabled)
2295 		return;
2296 
2297 	pci_disable_ats(to_pci_dev(master->dev));
2298 	/*
2299 	 * Ensure ATS is disabled at the endpoint before we issue the
2300 	 * ATC invalidation via the SMMU.
2301 	 */
2302 	wmb();
2303 	arm_smmu_atc_inv_master(master);
2304 	atomic_dec(&smmu_domain->nr_ats_masters);
2305 }
2306 
2307 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2308 {
2309 	int ret;
2310 	int features;
2311 	int num_pasids;
2312 	struct pci_dev *pdev;
2313 
2314 	if (!dev_is_pci(master->dev))
2315 		return -ENODEV;
2316 
2317 	pdev = to_pci_dev(master->dev);
2318 
2319 	features = pci_pasid_features(pdev);
2320 	if (features < 0)
2321 		return features;
2322 
2323 	num_pasids = pci_max_pasids(pdev);
2324 	if (num_pasids <= 0)
2325 		return num_pasids;
2326 
2327 	ret = pci_enable_pasid(pdev, features);
2328 	if (ret) {
2329 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2330 		return ret;
2331 	}
2332 
2333 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2334 				  master->smmu->ssid_bits);
2335 	return 0;
2336 }
2337 
2338 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2339 {
2340 	struct pci_dev *pdev;
2341 
2342 	if (!dev_is_pci(master->dev))
2343 		return;
2344 
2345 	pdev = to_pci_dev(master->dev);
2346 
2347 	if (!pdev->pasid_enabled)
2348 		return;
2349 
2350 	master->ssid_bits = 0;
2351 	pci_disable_pasid(pdev);
2352 }
2353 
2354 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2355 {
2356 	unsigned long flags;
2357 	struct arm_smmu_domain *smmu_domain = master->domain;
2358 
2359 	if (!smmu_domain)
2360 		return;
2361 
2362 	arm_smmu_disable_ats(master);
2363 
2364 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2365 	list_del(&master->domain_head);
2366 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2367 
2368 	master->domain = NULL;
2369 	master->ats_enabled = false;
2370 	arm_smmu_install_ste_for_dev(master);
2371 	/*
2372 	 * Clearing the CD entry isn't strictly required to detach the domain
2373 	 * since the table is uninstalled anyway, but it helps avoid confusion
2374 	 * in the call to arm_smmu_write_ctx_desc on the next attach (which
2375 	 * expects the entry to be empty).
2376 	 */
2377 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab)
2378 		arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
2379 }
2380 
2381 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2382 {
2383 	int ret = 0;
2384 	unsigned long flags;
2385 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2386 	struct arm_smmu_device *smmu;
2387 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2388 	struct arm_smmu_master *master;
2389 
2390 	if (!fwspec)
2391 		return -ENOENT;
2392 
2393 	master = dev_iommu_priv_get(dev);
2394 	smmu = master->smmu;
2395 
2396 	/*
2397 	 * Checking that SVA is disabled ensures that this device isn't bound to
2398 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2399 	 * be removed concurrently since we're holding the group mutex.
2400 	 */
2401 	if (arm_smmu_master_sva_enabled(master)) {
2402 		dev_err(dev, "cannot attach - SVA enabled\n");
2403 		return -EBUSY;
2404 	}
2405 
2406 	arm_smmu_detach_dev(master);
2407 
2408 	mutex_lock(&smmu_domain->init_mutex);
2409 
2410 	if (!smmu_domain->smmu) {
2411 		smmu_domain->smmu = smmu;
2412 		ret = arm_smmu_domain_finalise(domain);
2413 		if (ret)
2414 			smmu_domain->smmu = NULL;
2415 	} else if (smmu_domain->smmu != smmu)
2416 		ret = -EINVAL;
2417 
2418 	mutex_unlock(&smmu_domain->init_mutex);
2419 	if (ret)
2420 		return ret;
2421 
2422 	master->domain = smmu_domain;
2423 
2424 	/*
2425 	 * The SMMU does not support enabling ATS with bypass. When the STE is
2426 	 * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and
2427 	 * Translated transactions are denied as though ATS is disabled for the
2428 	 * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2429 	 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2430 	 */
2431 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2432 		master->ats_enabled = arm_smmu_ats_supported(master);
2433 
2434 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2435 	list_add(&master->domain_head, &smmu_domain->devices);
2436 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2437 
2438 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2439 		if (!master->cd_table.cdtab) {
2440 			ret = arm_smmu_alloc_cd_tables(master);
2441 			if (ret) {
2442 				master->domain = NULL;
2443 				goto out_list_del;
2444 			}
2445 		}
2446 
2447 		/*
2448 		 * Prevent SVA from concurrently modifying the CD or writing to
2449 		 * the CD entry
2450 		 */
2451 		mutex_lock(&arm_smmu_asid_lock);
2452 		ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
2453 		mutex_unlock(&arm_smmu_asid_lock);
2454 		if (ret) {
2455 			master->domain = NULL;
2456 			goto out_list_del;
2457 		}
2458 	}
2459 
2460 	arm_smmu_install_ste_for_dev(master);
2461 
2462 	arm_smmu_enable_ats(master);
2463 	return 0;
2464 
2465 out_list_del:
2466 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2467 	list_del(&master->domain_head);
2468 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2469 
2470 	return ret;
2471 }
2472 
2473 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2474 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2475 			      int prot, gfp_t gfp, size_t *mapped)
2476 {
2477 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2478 
2479 	if (!ops)
2480 		return -ENODEV;
2481 
2482 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2483 }
2484 
2485 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2486 				   size_t pgsize, size_t pgcount,
2487 				   struct iommu_iotlb_gather *gather)
2488 {
2489 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2490 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2491 
2492 	if (!ops)
2493 		return 0;
2494 
2495 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2496 }
2497 
2498 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2499 {
2500 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2501 
2502 	if (smmu_domain->smmu)
2503 		arm_smmu_tlb_inv_context(smmu_domain);
2504 }
2505 
2506 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2507 				struct iommu_iotlb_gather *gather)
2508 {
2509 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2510 
2511 	if (!gather->pgsize)
2512 		return;
2513 
2514 	arm_smmu_tlb_inv_range_domain(gather->start,
2515 				      gather->end - gather->start + 1,
2516 				      gather->pgsize, true, smmu_domain);
2517 }
2518 
2519 static phys_addr_t
2520 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2521 {
2522 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2523 
2524 	if (!ops)
2525 		return 0;
2526 
2527 	return ops->iova_to_phys(ops, iova);
2528 }
2529 
2530 static struct platform_driver arm_smmu_driver;
2531 
2532 static
2533 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2534 {
2535 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2536 							  fwnode);
2537 	put_device(dev);
2538 	return dev ? dev_get_drvdata(dev) : NULL;
2539 }
2540 
2541 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2542 {
2543 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2544 
2545 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2546 		limit *= 1UL << STRTAB_SPLIT;
2547 
2548 	return sid < limit;
2549 }
2550 
2551 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2552 {
2553 	/* Check the SIDs are in range of the SMMU and our stream table */
2554 	if (!arm_smmu_sid_in_range(smmu, sid))
2555 		return -ERANGE;
2556 
2557 	/* Ensure l2 strtab is initialised */
2558 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2559 		return arm_smmu_init_l2_strtab(smmu, sid);
2560 
2561 	return 0;
2562 }
2563 
2564 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2565 				  struct arm_smmu_master *master)
2566 {
2567 	int i;
2568 	int ret = 0;
2569 	struct arm_smmu_stream *new_stream, *cur_stream;
2570 	struct rb_node **new_node, *parent_node = NULL;
2571 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2572 
2573 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2574 				  GFP_KERNEL);
2575 	if (!master->streams)
2576 		return -ENOMEM;
2577 	master->num_streams = fwspec->num_ids;
2578 
2579 	mutex_lock(&smmu->streams_mutex);
2580 	for (i = 0; i < fwspec->num_ids; i++) {
2581 		u32 sid = fwspec->ids[i];
2582 
2583 		new_stream = &master->streams[i];
2584 		new_stream->id = sid;
2585 		new_stream->master = master;
2586 
2587 		ret = arm_smmu_init_sid_strtab(smmu, sid);
2588 		if (ret)
2589 			break;
2590 
2591 		/* Insert into SID tree */
2592 		new_node = &(smmu->streams.rb_node);
2593 		while (*new_node) {
2594 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2595 					      node);
2596 			parent_node = *new_node;
2597 			if (cur_stream->id > new_stream->id) {
2598 				new_node = &((*new_node)->rb_left);
2599 			} else if (cur_stream->id < new_stream->id) {
2600 				new_node = &((*new_node)->rb_right);
2601 			} else {
2602 				dev_warn(master->dev,
2603 					 "stream %u already in tree\n",
2604 					 cur_stream->id);
2605 				ret = -EINVAL;
2606 				break;
2607 			}
2608 		}
2609 		if (ret)
2610 			break;
2611 
2612 		rb_link_node(&new_stream->node, parent_node, new_node);
2613 		rb_insert_color(&new_stream->node, &smmu->streams);
2614 	}
2615 
2616 	if (ret) {
2617 		for (i--; i >= 0; i--)
2618 			rb_erase(&master->streams[i].node, &smmu->streams);
2619 		kfree(master->streams);
2620 	}
2621 	mutex_unlock(&smmu->streams_mutex);
2622 
2623 	return ret;
2624 }
2625 
2626 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2627 {
2628 	int i;
2629 	struct arm_smmu_device *smmu = master->smmu;
2630 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2631 
2632 	if (!smmu || !master->streams)
2633 		return;
2634 
2635 	mutex_lock(&smmu->streams_mutex);
2636 	for (i = 0; i < fwspec->num_ids; i++)
2637 		rb_erase(&master->streams[i].node, &smmu->streams);
2638 	mutex_unlock(&smmu->streams_mutex);
2639 
2640 	kfree(master->streams);
2641 }
2642 
2643 static struct iommu_ops arm_smmu_ops;
2644 
2645 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2646 {
2647 	int ret;
2648 	struct arm_smmu_device *smmu;
2649 	struct arm_smmu_master *master;
2650 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2651 
2652 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2653 		return ERR_PTR(-ENODEV);
2654 
2655 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2656 		return ERR_PTR(-EBUSY);
2657 
2658 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2659 	if (!smmu)
2660 		return ERR_PTR(-ENODEV);
2661 
2662 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2663 	if (!master)
2664 		return ERR_PTR(-ENOMEM);
2665 
2666 	master->dev = dev;
2667 	master->smmu = smmu;
2668 	INIT_LIST_HEAD(&master->bonds);
2669 	dev_iommu_priv_set(dev, master);
2670 
2671 	ret = arm_smmu_insert_master(smmu, master);
2672 	if (ret)
2673 		goto err_free_master;
2674 
2675 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2676 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2677 
2678 	/*
2679 	 * Note that PASID must be enabled before, and disabled after ATS:
2680 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2681 	 *
2682 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2683 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2684 	 *   are changed.
2685 	 */
2686 	arm_smmu_enable_pasid(master);
2687 
2688 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2689 		master->ssid_bits = min_t(u8, master->ssid_bits,
2690 					  CTXDESC_LINEAR_CDMAX);
2691 
2692 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2693 	     device_property_read_bool(dev, "dma-can-stall")) ||
2694 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2695 		master->stall_enabled = true;
2696 
2697 	return &smmu->iommu;
2698 
2699 err_free_master:
2700 	kfree(master);
2701 	dev_iommu_priv_set(dev, NULL);
2702 	return ERR_PTR(ret);
2703 }
2704 
2705 static void arm_smmu_release_device(struct device *dev)
2706 {
2707 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2708 
2709 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2710 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2711 	arm_smmu_detach_dev(master);
2712 	arm_smmu_disable_pasid(master);
2713 	arm_smmu_remove_master(master);
2714 	if (master->cd_table.cdtab)
2715 		arm_smmu_free_cd_tables(master);
2716 	kfree(master);
2717 }
2718 
2719 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2720 {
2721 	struct iommu_group *group;
2722 
2723 	/*
2724 	 * We don't support devices sharing stream IDs other than PCI RID
2725 	 * aliases, since the necessary ID-to-device lookup becomes rather
2726 	 * impractical given a potential sparse 32-bit stream ID space.
2727 	 */
2728 	if (dev_is_pci(dev))
2729 		group = pci_device_group(dev);
2730 	else
2731 		group = generic_device_group(dev);
2732 
2733 	return group;
2734 }
2735 
2736 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2737 {
2738 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2739 	int ret = 0;
2740 
2741 	mutex_lock(&smmu_domain->init_mutex);
2742 	if (smmu_domain->smmu)
2743 		ret = -EPERM;
2744 	else
2745 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2746 	mutex_unlock(&smmu_domain->init_mutex);
2747 
2748 	return ret;
2749 }
2750 
2751 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2752 {
2753 	return iommu_fwspec_add_ids(dev, args->args, 1);
2754 }
2755 
2756 static void arm_smmu_get_resv_regions(struct device *dev,
2757 				      struct list_head *head)
2758 {
2759 	struct iommu_resv_region *region;
2760 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2761 
2762 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2763 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2764 	if (!region)
2765 		return;
2766 
2767 	list_add_tail(&region->list, head);
2768 
2769 	iommu_dma_get_resv_regions(dev, head);
2770 }
2771 
2772 static int arm_smmu_dev_enable_feature(struct device *dev,
2773 				       enum iommu_dev_features feat)
2774 {
2775 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2776 
2777 	if (!master)
2778 		return -ENODEV;
2779 
2780 	switch (feat) {
2781 	case IOMMU_DEV_FEAT_IOPF:
2782 		if (!arm_smmu_master_iopf_supported(master))
2783 			return -EINVAL;
2784 		if (master->iopf_enabled)
2785 			return -EBUSY;
2786 		master->iopf_enabled = true;
2787 		return 0;
2788 	case IOMMU_DEV_FEAT_SVA:
2789 		if (!arm_smmu_master_sva_supported(master))
2790 			return -EINVAL;
2791 		if (arm_smmu_master_sva_enabled(master))
2792 			return -EBUSY;
2793 		return arm_smmu_master_enable_sva(master);
2794 	default:
2795 		return -EINVAL;
2796 	}
2797 }
2798 
2799 static int arm_smmu_dev_disable_feature(struct device *dev,
2800 					enum iommu_dev_features feat)
2801 {
2802 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2803 
2804 	if (!master)
2805 		return -EINVAL;
2806 
2807 	switch (feat) {
2808 	case IOMMU_DEV_FEAT_IOPF:
2809 		if (!master->iopf_enabled)
2810 			return -EINVAL;
2811 		if (master->sva_enabled)
2812 			return -EBUSY;
2813 		master->iopf_enabled = false;
2814 		return 0;
2815 	case IOMMU_DEV_FEAT_SVA:
2816 		if (!arm_smmu_master_sva_enabled(master))
2817 			return -EINVAL;
2818 		return arm_smmu_master_disable_sva(master);
2819 	default:
2820 		return -EINVAL;
2821 	}
2822 }
2823 
2824 /*
2825  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
2826  * PCIe link and save the data to memory by DMA. The hardware is restricted to
2827  * use identity mapping only.
2828  */
2829 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
2830 					 (pdev)->device == 0xa12e)
2831 
2832 static int arm_smmu_def_domain_type(struct device *dev)
2833 {
2834 	if (dev_is_pci(dev)) {
2835 		struct pci_dev *pdev = to_pci_dev(dev);
2836 
2837 		if (IS_HISI_PTT_DEVICE(pdev))
2838 			return IOMMU_DOMAIN_IDENTITY;
2839 	}
2840 
2841 	return 0;
2842 }
2843 
2844 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
2845 {
2846 	struct iommu_domain *domain;
2847 
2848 	domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
2849 	if (WARN_ON(IS_ERR(domain)) || !domain)
2850 		return;
2851 
2852 	arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
2853 }
2854 
2855 static struct iommu_ops arm_smmu_ops = {
2856 	.capable		= arm_smmu_capable,
2857 	.domain_alloc		= arm_smmu_domain_alloc,
2858 	.probe_device		= arm_smmu_probe_device,
2859 	.release_device		= arm_smmu_release_device,
2860 	.device_group		= arm_smmu_device_group,
2861 	.of_xlate		= arm_smmu_of_xlate,
2862 	.get_resv_regions	= arm_smmu_get_resv_regions,
2863 	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
2864 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2865 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2866 	.page_response		= arm_smmu_page_response,
2867 	.def_domain_type	= arm_smmu_def_domain_type,
2868 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2869 	.owner			= THIS_MODULE,
2870 	.default_domain_ops = &(const struct iommu_domain_ops) {
2871 		.attach_dev		= arm_smmu_attach_dev,
2872 		.map_pages		= arm_smmu_map_pages,
2873 		.unmap_pages		= arm_smmu_unmap_pages,
2874 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2875 		.iotlb_sync		= arm_smmu_iotlb_sync,
2876 		.iova_to_phys		= arm_smmu_iova_to_phys,
2877 		.enable_nesting		= arm_smmu_enable_nesting,
2878 		.free			= arm_smmu_domain_free,
2879 	}
2880 };
2881 
2882 /* Probing and initialisation functions */
2883 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2884 				   struct arm_smmu_queue *q,
2885 				   void __iomem *page,
2886 				   unsigned long prod_off,
2887 				   unsigned long cons_off,
2888 				   size_t dwords, const char *name)
2889 {
2890 	size_t qsz;
2891 
2892 	do {
2893 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2894 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2895 					      GFP_KERNEL);
2896 		if (q->base || qsz < PAGE_SIZE)
2897 			break;
2898 
2899 		q->llq.max_n_shift--;
2900 	} while (1);
2901 
2902 	if (!q->base) {
2903 		dev_err(smmu->dev,
2904 			"failed to allocate queue (0x%zx bytes) for %s\n",
2905 			qsz, name);
2906 		return -ENOMEM;
2907 	}
2908 
2909 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2910 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2911 			 1 << q->llq.max_n_shift, name);
2912 	}
2913 
2914 	q->prod_reg	= page + prod_off;
2915 	q->cons_reg	= page + cons_off;
2916 	q->ent_dwords	= dwords;
2917 
2918 	q->q_base  = Q_BASE_RWA;
2919 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2920 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2921 
2922 	q->llq.prod = q->llq.cons = 0;
2923 	return 0;
2924 }
2925 
2926 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2927 {
2928 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2929 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2930 
2931 	atomic_set(&cmdq->owner_prod, 0);
2932 	atomic_set(&cmdq->lock, 0);
2933 
2934 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2935 							      GFP_KERNEL);
2936 	if (!cmdq->valid_map)
2937 		return -ENOMEM;
2938 
2939 	return 0;
2940 }
2941 
2942 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2943 {
2944 	int ret;
2945 
2946 	/* cmdq */
2947 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2948 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2949 				      CMDQ_ENT_DWORDS, "cmdq");
2950 	if (ret)
2951 		return ret;
2952 
2953 	ret = arm_smmu_cmdq_init(smmu);
2954 	if (ret)
2955 		return ret;
2956 
2957 	/* evtq */
2958 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2959 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2960 				      EVTQ_ENT_DWORDS, "evtq");
2961 	if (ret)
2962 		return ret;
2963 
2964 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2965 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2966 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2967 		if (!smmu->evtq.iopf)
2968 			return -ENOMEM;
2969 	}
2970 
2971 	/* priq */
2972 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2973 		return 0;
2974 
2975 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2976 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2977 				       PRIQ_ENT_DWORDS, "priq");
2978 }
2979 
2980 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2981 {
2982 	unsigned int i;
2983 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2984 	void *strtab = smmu->strtab_cfg.strtab;
2985 
2986 	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
2987 				    sizeof(*cfg->l1_desc), GFP_KERNEL);
2988 	if (!cfg->l1_desc)
2989 		return -ENOMEM;
2990 
2991 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2992 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2993 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2994 	}
2995 
2996 	return 0;
2997 }
2998 
2999 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3000 {
3001 	void *strtab;
3002 	u64 reg;
3003 	u32 size, l1size;
3004 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3005 
3006 	/* Calculate the L1 size, capped to the SIDSIZE. */
3007 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3008 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3009 	cfg->num_l1_ents = 1 << size;
3010 
3011 	size += STRTAB_SPLIT;
3012 	if (size < smmu->sid_bits)
3013 		dev_warn(smmu->dev,
3014 			 "2-level strtab only covers %u/%u bits of SID\n",
3015 			 size, smmu->sid_bits);
3016 
3017 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3018 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3019 				     GFP_KERNEL);
3020 	if (!strtab) {
3021 		dev_err(smmu->dev,
3022 			"failed to allocate l1 stream table (%u bytes)\n",
3023 			l1size);
3024 		return -ENOMEM;
3025 	}
3026 	cfg->strtab = strtab;
3027 
3028 	/* Configure strtab_base_cfg for 2 levels */
3029 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3030 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3031 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3032 	cfg->strtab_base_cfg = reg;
3033 
3034 	return arm_smmu_init_l1_strtab(smmu);
3035 }
3036 
3037 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3038 {
3039 	void *strtab;
3040 	u64 reg;
3041 	u32 size;
3042 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3043 
3044 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3045 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3046 				     GFP_KERNEL);
3047 	if (!strtab) {
3048 		dev_err(smmu->dev,
3049 			"failed to allocate linear stream table (%u bytes)\n",
3050 			size);
3051 		return -ENOMEM;
3052 	}
3053 	cfg->strtab = strtab;
3054 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3055 
3056 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3057 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3058 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3059 	cfg->strtab_base_cfg = reg;
3060 
3061 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3062 	return 0;
3063 }
3064 
3065 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3066 {
3067 	u64 reg;
3068 	int ret;
3069 
3070 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3071 		ret = arm_smmu_init_strtab_2lvl(smmu);
3072 	else
3073 		ret = arm_smmu_init_strtab_linear(smmu);
3074 
3075 	if (ret)
3076 		return ret;
3077 
3078 	/* Set the strtab base address */
3079 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3080 	reg |= STRTAB_BASE_RA;
3081 	smmu->strtab_cfg.strtab_base = reg;
3082 
3083 	ida_init(&smmu->vmid_map);
3084 
3085 	return 0;
3086 }
3087 
3088 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3089 {
3090 	int ret;
3091 
3092 	mutex_init(&smmu->streams_mutex);
3093 	smmu->streams = RB_ROOT;
3094 
3095 	ret = arm_smmu_init_queues(smmu);
3096 	if (ret)
3097 		return ret;
3098 
3099 	return arm_smmu_init_strtab(smmu);
3100 }
3101 
3102 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3103 				   unsigned int reg_off, unsigned int ack_off)
3104 {
3105 	u32 reg;
3106 
3107 	writel_relaxed(val, smmu->base + reg_off);
3108 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3109 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3110 }
3111 
3112 /* GBPA is "special" */
3113 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3114 {
3115 	int ret;
3116 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3117 
3118 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3119 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3120 	if (ret)
3121 		return ret;
3122 
3123 	reg &= ~clr;
3124 	reg |= set;
3125 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3126 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3127 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3128 
3129 	if (ret)
3130 		dev_err(smmu->dev, "GBPA not responding to update\n");
3131 	return ret;
3132 }
3133 
3134 static void arm_smmu_free_msis(void *data)
3135 {
3136 	struct device *dev = data;
3137 	platform_msi_domain_free_irqs(dev);
3138 }
3139 
3140 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3141 {
3142 	phys_addr_t doorbell;
3143 	struct device *dev = msi_desc_to_dev(desc);
3144 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3145 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3146 
3147 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3148 	doorbell &= MSI_CFG0_ADDR_MASK;
3149 
3150 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3151 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3152 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3153 }
3154 
3155 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3156 {
3157 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3158 	struct device *dev = smmu->dev;
3159 
3160 	/* Clear the MSI address regs */
3161 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3162 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3163 
3164 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3165 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3166 	else
3167 		nvec--;
3168 
3169 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3170 		return;
3171 
3172 	if (!dev->msi.domain) {
3173 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3174 		return;
3175 	}
3176 
3177 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3178 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3179 	if (ret) {
3180 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3181 		return;
3182 	}
3183 
3184 	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3185 	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3186 	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3187 
3188 	/* Add callback to free MSIs on teardown */
3189 	devm_add_action(dev, arm_smmu_free_msis, dev);
3190 }
3191 
3192 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3193 {
3194 	int irq, ret;
3195 
3196 	arm_smmu_setup_msis(smmu);
3197 
3198 	/* Request interrupt lines */
3199 	irq = smmu->evtq.q.irq;
3200 	if (irq) {
3201 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3202 						arm_smmu_evtq_thread,
3203 						IRQF_ONESHOT,
3204 						"arm-smmu-v3-evtq", smmu);
3205 		if (ret < 0)
3206 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3207 	} else {
3208 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3209 	}
3210 
3211 	irq = smmu->gerr_irq;
3212 	if (irq) {
3213 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3214 				       0, "arm-smmu-v3-gerror", smmu);
3215 		if (ret < 0)
3216 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3217 	} else {
3218 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3219 	}
3220 
3221 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3222 		irq = smmu->priq.q.irq;
3223 		if (irq) {
3224 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3225 							arm_smmu_priq_thread,
3226 							IRQF_ONESHOT,
3227 							"arm-smmu-v3-priq",
3228 							smmu);
3229 			if (ret < 0)
3230 				dev_warn(smmu->dev,
3231 					 "failed to enable priq irq\n");
3232 		} else {
3233 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3234 		}
3235 	}
3236 }
3237 
3238 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3239 {
3240 	int ret, irq;
3241 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3242 
3243 	/* Disable IRQs first */
3244 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3245 				      ARM_SMMU_IRQ_CTRLACK);
3246 	if (ret) {
3247 		dev_err(smmu->dev, "failed to disable irqs\n");
3248 		return ret;
3249 	}
3250 
3251 	irq = smmu->combined_irq;
3252 	if (irq) {
3253 		/*
3254 		 * Cavium ThunderX2 implementation doesn't support unique irq
3255 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3256 		 */
3257 		ret = devm_request_threaded_irq(smmu->dev, irq,
3258 					arm_smmu_combined_irq_handler,
3259 					arm_smmu_combined_irq_thread,
3260 					IRQF_ONESHOT,
3261 					"arm-smmu-v3-combined-irq", smmu);
3262 		if (ret < 0)
3263 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3264 	} else
3265 		arm_smmu_setup_unique_irqs(smmu);
3266 
3267 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3268 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3269 
3270 	/* Enable interrupt generation on the SMMU */
3271 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3272 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3273 	if (ret)
3274 		dev_warn(smmu->dev, "failed to enable irqs\n");
3275 
3276 	return 0;
3277 }
3278 
3279 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3280 {
3281 	int ret;
3282 
3283 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3284 	if (ret)
3285 		dev_err(smmu->dev, "failed to clear cr0\n");
3286 
3287 	return ret;
3288 }
3289 
3290 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3291 {
3292 	int ret;
3293 	u32 reg, enables;
3294 	struct arm_smmu_cmdq_ent cmd;
3295 
3296 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3297 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3298 	if (reg & CR0_SMMUEN) {
3299 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3300 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3301 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3302 	}
3303 
3304 	ret = arm_smmu_device_disable(smmu);
3305 	if (ret)
3306 		return ret;
3307 
3308 	/* CR1 (table and queue memory attributes) */
3309 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3310 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3311 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3312 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3313 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3314 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3315 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3316 
3317 	/* CR2 (random crap) */
3318 	reg = CR2_PTM | CR2_RECINVSID;
3319 
3320 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3321 		reg |= CR2_E2H;
3322 
3323 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3324 
3325 	/* Stream table */
3326 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3327 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3328 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3329 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3330 
3331 	/* Command queue */
3332 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3333 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3334 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3335 
3336 	enables = CR0_CMDQEN;
3337 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3338 				      ARM_SMMU_CR0ACK);
3339 	if (ret) {
3340 		dev_err(smmu->dev, "failed to enable command queue\n");
3341 		return ret;
3342 	}
3343 
3344 	/* Invalidate any cached configuration */
3345 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3346 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3347 
3348 	/* Invalidate any stale TLB entries */
3349 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3350 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3351 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3352 	}
3353 
3354 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3355 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3356 
3357 	/* Event queue */
3358 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3359 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3360 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3361 
3362 	enables |= CR0_EVTQEN;
3363 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3364 				      ARM_SMMU_CR0ACK);
3365 	if (ret) {
3366 		dev_err(smmu->dev, "failed to enable event queue\n");
3367 		return ret;
3368 	}
3369 
3370 	/* PRI queue */
3371 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3372 		writeq_relaxed(smmu->priq.q.q_base,
3373 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3374 		writel_relaxed(smmu->priq.q.llq.prod,
3375 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3376 		writel_relaxed(smmu->priq.q.llq.cons,
3377 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3378 
3379 		enables |= CR0_PRIQEN;
3380 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3381 					      ARM_SMMU_CR0ACK);
3382 		if (ret) {
3383 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3384 			return ret;
3385 		}
3386 	}
3387 
3388 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3389 		enables |= CR0_ATSCHK;
3390 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3391 					      ARM_SMMU_CR0ACK);
3392 		if (ret) {
3393 			dev_err(smmu->dev, "failed to enable ATS check\n");
3394 			return ret;
3395 		}
3396 	}
3397 
3398 	ret = arm_smmu_setup_irqs(smmu);
3399 	if (ret) {
3400 		dev_err(smmu->dev, "failed to setup irqs\n");
3401 		return ret;
3402 	}
3403 
3404 	if (is_kdump_kernel())
3405 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3406 
3407 	/* Enable the SMMU interface, or ensure bypass */
3408 	if (!bypass || disable_bypass) {
3409 		enables |= CR0_SMMUEN;
3410 	} else {
3411 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3412 		if (ret)
3413 			return ret;
3414 	}
3415 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3416 				      ARM_SMMU_CR0ACK);
3417 	if (ret) {
3418 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3419 		return ret;
3420 	}
3421 
3422 	return 0;
3423 }
3424 
3425 #define IIDR_IMPLEMENTER_ARM		0x43b
3426 #define IIDR_PRODUCTID_ARM_MMU_600	0x483
3427 #define IIDR_PRODUCTID_ARM_MMU_700	0x487
3428 
3429 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3430 {
3431 	u32 reg;
3432 	unsigned int implementer, productid, variant, revision;
3433 
3434 	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3435 	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3436 	productid = FIELD_GET(IIDR_PRODUCTID, reg);
3437 	variant = FIELD_GET(IIDR_VARIANT, reg);
3438 	revision = FIELD_GET(IIDR_REVISION, reg);
3439 
3440 	switch (implementer) {
3441 	case IIDR_IMPLEMENTER_ARM:
3442 		switch (productid) {
3443 		case IIDR_PRODUCTID_ARM_MMU_600:
3444 			/* Arm erratum 1076982 */
3445 			if (variant == 0 && revision <= 2)
3446 				smmu->features &= ~ARM_SMMU_FEAT_SEV;
3447 			/* Arm erratum 1209401 */
3448 			if (variant < 2)
3449 				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3450 			break;
3451 		case IIDR_PRODUCTID_ARM_MMU_700:
3452 			/* Arm erratum 2812531 */
3453 			smmu->features &= ~ARM_SMMU_FEAT_BTM;
3454 			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3455 			/* Arm errata 2268618, 2812531 */
3456 			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3457 			break;
3458 		}
3459 		break;
3460 	}
3461 }
3462 
3463 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3464 {
3465 	u32 reg;
3466 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3467 
3468 	/* IDR0 */
3469 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3470 
3471 	/* 2-level structures */
3472 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3473 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3474 
3475 	if (reg & IDR0_CD2L)
3476 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3477 
3478 	/*
3479 	 * Translation table endianness.
3480 	 * We currently require the same endianness as the CPU, but this
3481 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3482 	 */
3483 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3484 	case IDR0_TTENDIAN_MIXED:
3485 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3486 		break;
3487 #ifdef __BIG_ENDIAN
3488 	case IDR0_TTENDIAN_BE:
3489 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3490 		break;
3491 #else
3492 	case IDR0_TTENDIAN_LE:
3493 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3494 		break;
3495 #endif
3496 	default:
3497 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3498 		return -ENXIO;
3499 	}
3500 
3501 	/* Boolean feature flags */
3502 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3503 		smmu->features |= ARM_SMMU_FEAT_PRI;
3504 
3505 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3506 		smmu->features |= ARM_SMMU_FEAT_ATS;
3507 
3508 	if (reg & IDR0_SEV)
3509 		smmu->features |= ARM_SMMU_FEAT_SEV;
3510 
3511 	if (reg & IDR0_MSI) {
3512 		smmu->features |= ARM_SMMU_FEAT_MSI;
3513 		if (coherent && !disable_msipolling)
3514 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3515 	}
3516 
3517 	if (reg & IDR0_HYP) {
3518 		smmu->features |= ARM_SMMU_FEAT_HYP;
3519 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3520 			smmu->features |= ARM_SMMU_FEAT_E2H;
3521 	}
3522 
3523 	/*
3524 	 * The coherency feature as set by FW is used in preference to the ID
3525 	 * register, but warn on mismatch.
3526 	 */
3527 	if (!!(reg & IDR0_COHACC) != coherent)
3528 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3529 			 coherent ? "true" : "false");
3530 
3531 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3532 	case IDR0_STALL_MODEL_FORCE:
3533 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3534 		fallthrough;
3535 	case IDR0_STALL_MODEL_STALL:
3536 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3537 	}
3538 
3539 	if (reg & IDR0_S1P)
3540 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3541 
3542 	if (reg & IDR0_S2P)
3543 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3544 
3545 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3546 		dev_err(smmu->dev, "no translation support!\n");
3547 		return -ENXIO;
3548 	}
3549 
3550 	/* We only support the AArch64 table format at present */
3551 	switch (FIELD_GET(IDR0_TTF, reg)) {
3552 	case IDR0_TTF_AARCH32_64:
3553 		smmu->ias = 40;
3554 		fallthrough;
3555 	case IDR0_TTF_AARCH64:
3556 		break;
3557 	default:
3558 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3559 		return -ENXIO;
3560 	}
3561 
3562 	/* ASID/VMID sizes */
3563 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3564 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3565 
3566 	/* IDR1 */
3567 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3568 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3569 		dev_err(smmu->dev, "embedded implementation not supported\n");
3570 		return -ENXIO;
3571 	}
3572 
3573 	/* Queue sizes, capped to ensure natural alignment */
3574 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3575 					     FIELD_GET(IDR1_CMDQS, reg));
3576 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3577 		/*
3578 		 * We don't support splitting up batches, so one batch of
3579 		 * commands plus an extra sync needs to fit inside the command
3580 		 * queue. There's also no way we can handle the weird alignment
3581 		 * restrictions on the base pointer for a unit-length queue.
3582 		 */
3583 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3584 			CMDQ_BATCH_ENTRIES);
3585 		return -ENXIO;
3586 	}
3587 
3588 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3589 					     FIELD_GET(IDR1_EVTQS, reg));
3590 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3591 					     FIELD_GET(IDR1_PRIQS, reg));
3592 
3593 	/* SID/SSID sizes */
3594 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3595 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3596 	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3597 
3598 	/*
3599 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3600 	 * table, use a linear table instead.
3601 	 */
3602 	if (smmu->sid_bits <= STRTAB_SPLIT)
3603 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3604 
3605 	/* IDR3 */
3606 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3607 	if (FIELD_GET(IDR3_RIL, reg))
3608 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3609 
3610 	/* IDR5 */
3611 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3612 
3613 	/* Maximum number of outstanding stalls */
3614 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3615 
3616 	/* Page sizes */
3617 	if (reg & IDR5_GRAN64K)
3618 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3619 	if (reg & IDR5_GRAN16K)
3620 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3621 	if (reg & IDR5_GRAN4K)
3622 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3623 
3624 	/* Input address size */
3625 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3626 		smmu->features |= ARM_SMMU_FEAT_VAX;
3627 
3628 	/* Output address size */
3629 	switch (FIELD_GET(IDR5_OAS, reg)) {
3630 	case IDR5_OAS_32_BIT:
3631 		smmu->oas = 32;
3632 		break;
3633 	case IDR5_OAS_36_BIT:
3634 		smmu->oas = 36;
3635 		break;
3636 	case IDR5_OAS_40_BIT:
3637 		smmu->oas = 40;
3638 		break;
3639 	case IDR5_OAS_42_BIT:
3640 		smmu->oas = 42;
3641 		break;
3642 	case IDR5_OAS_44_BIT:
3643 		smmu->oas = 44;
3644 		break;
3645 	case IDR5_OAS_52_BIT:
3646 		smmu->oas = 52;
3647 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3648 		break;
3649 	default:
3650 		dev_info(smmu->dev,
3651 			"unknown output address size. Truncating to 48-bit\n");
3652 		fallthrough;
3653 	case IDR5_OAS_48_BIT:
3654 		smmu->oas = 48;
3655 	}
3656 
3657 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3658 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3659 	else
3660 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3661 
3662 	/* Set the DMA mask for our table walker */
3663 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3664 		dev_warn(smmu->dev,
3665 			 "failed to set DMA mask for table walker\n");
3666 
3667 	smmu->ias = max(smmu->ias, smmu->oas);
3668 
3669 	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3670 	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3671 		smmu->features |= ARM_SMMU_FEAT_NESTING;
3672 
3673 	arm_smmu_device_iidr_probe(smmu);
3674 
3675 	if (arm_smmu_sva_supported(smmu))
3676 		smmu->features |= ARM_SMMU_FEAT_SVA;
3677 
3678 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3679 		 smmu->ias, smmu->oas, smmu->features);
3680 	return 0;
3681 }
3682 
3683 #ifdef CONFIG_ACPI
3684 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3685 {
3686 	switch (model) {
3687 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3688 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3689 		break;
3690 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3691 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3692 		break;
3693 	}
3694 
3695 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3696 }
3697 
3698 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3699 				      struct arm_smmu_device *smmu)
3700 {
3701 	struct acpi_iort_smmu_v3 *iort_smmu;
3702 	struct device *dev = smmu->dev;
3703 	struct acpi_iort_node *node;
3704 
3705 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3706 
3707 	/* Retrieve SMMUv3 specific data */
3708 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3709 
3710 	acpi_smmu_get_options(iort_smmu->model, smmu);
3711 
3712 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3713 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3714 
3715 	return 0;
3716 }
3717 #else
3718 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3719 					     struct arm_smmu_device *smmu)
3720 {
3721 	return -ENODEV;
3722 }
3723 #endif
3724 
3725 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3726 				    struct arm_smmu_device *smmu)
3727 {
3728 	struct device *dev = &pdev->dev;
3729 	u32 cells;
3730 	int ret = -EINVAL;
3731 
3732 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3733 		dev_err(dev, "missing #iommu-cells property\n");
3734 	else if (cells != 1)
3735 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3736 	else
3737 		ret = 0;
3738 
3739 	parse_driver_options(smmu);
3740 
3741 	if (of_dma_is_coherent(dev->of_node))
3742 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3743 
3744 	return ret;
3745 }
3746 
3747 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3748 {
3749 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3750 		return SZ_64K;
3751 	else
3752 		return SZ_128K;
3753 }
3754 
3755 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3756 				      resource_size_t size)
3757 {
3758 	struct resource res = DEFINE_RES_MEM(start, size);
3759 
3760 	return devm_ioremap_resource(dev, &res);
3761 }
3762 
3763 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3764 {
3765 	struct list_head rmr_list;
3766 	struct iommu_resv_region *e;
3767 
3768 	INIT_LIST_HEAD(&rmr_list);
3769 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3770 
3771 	list_for_each_entry(e, &rmr_list, list) {
3772 		__le64 *step;
3773 		struct iommu_iort_rmr_data *rmr;
3774 		int ret, i;
3775 
3776 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3777 		for (i = 0; i < rmr->num_sids; i++) {
3778 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3779 			if (ret) {
3780 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3781 					rmr->sids[i]);
3782 				continue;
3783 			}
3784 
3785 			step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3786 			arm_smmu_init_bypass_stes(step, 1, true);
3787 		}
3788 	}
3789 
3790 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3791 }
3792 
3793 static int arm_smmu_device_probe(struct platform_device *pdev)
3794 {
3795 	int irq, ret;
3796 	struct resource *res;
3797 	resource_size_t ioaddr;
3798 	struct arm_smmu_device *smmu;
3799 	struct device *dev = &pdev->dev;
3800 	bool bypass;
3801 
3802 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3803 	if (!smmu)
3804 		return -ENOMEM;
3805 	smmu->dev = dev;
3806 
3807 	if (dev->of_node) {
3808 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3809 	} else {
3810 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3811 		if (ret == -ENODEV)
3812 			return ret;
3813 	}
3814 
3815 	/* Set bypass mode according to firmware probing result */
3816 	bypass = !!ret;
3817 
3818 	/* Base address */
3819 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3820 	if (!res)
3821 		return -EINVAL;
3822 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3823 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3824 		return -EINVAL;
3825 	}
3826 	ioaddr = res->start;
3827 
3828 	/*
3829 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3830 	 * the PMCG registers which are reserved by the PMU driver.
3831 	 */
3832 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3833 	if (IS_ERR(smmu->base))
3834 		return PTR_ERR(smmu->base);
3835 
3836 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3837 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3838 					       ARM_SMMU_REG_SZ);
3839 		if (IS_ERR(smmu->page1))
3840 			return PTR_ERR(smmu->page1);
3841 	} else {
3842 		smmu->page1 = smmu->base;
3843 	}
3844 
3845 	/* Interrupt lines */
3846 
3847 	irq = platform_get_irq_byname_optional(pdev, "combined");
3848 	if (irq > 0)
3849 		smmu->combined_irq = irq;
3850 	else {
3851 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3852 		if (irq > 0)
3853 			smmu->evtq.q.irq = irq;
3854 
3855 		irq = platform_get_irq_byname_optional(pdev, "priq");
3856 		if (irq > 0)
3857 			smmu->priq.q.irq = irq;
3858 
3859 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3860 		if (irq > 0)
3861 			smmu->gerr_irq = irq;
3862 	}
3863 	/* Probe the h/w */
3864 	ret = arm_smmu_device_hw_probe(smmu);
3865 	if (ret)
3866 		return ret;
3867 
3868 	/* Initialise in-memory data structures */
3869 	ret = arm_smmu_init_structures(smmu);
3870 	if (ret)
3871 		return ret;
3872 
3873 	/* Record our private device structure */
3874 	platform_set_drvdata(pdev, smmu);
3875 
3876 	/* Check for RMRs and install bypass STEs if any */
3877 	arm_smmu_rmr_install_bypass_ste(smmu);
3878 
3879 	/* Reset the device */
3880 	ret = arm_smmu_device_reset(smmu, bypass);
3881 	if (ret)
3882 		return ret;
3883 
3884 	/* And we're up. Go go go! */
3885 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3886 				     "smmu3.%pa", &ioaddr);
3887 	if (ret)
3888 		return ret;
3889 
3890 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3891 	if (ret) {
3892 		dev_err(dev, "Failed to register iommu\n");
3893 		iommu_device_sysfs_remove(&smmu->iommu);
3894 		return ret;
3895 	}
3896 
3897 	return 0;
3898 }
3899 
3900 static void arm_smmu_device_remove(struct platform_device *pdev)
3901 {
3902 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3903 
3904 	iommu_device_unregister(&smmu->iommu);
3905 	iommu_device_sysfs_remove(&smmu->iommu);
3906 	arm_smmu_device_disable(smmu);
3907 	iopf_queue_free(smmu->evtq.iopf);
3908 	ida_destroy(&smmu->vmid_map);
3909 }
3910 
3911 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3912 {
3913 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3914 
3915 	arm_smmu_device_disable(smmu);
3916 }
3917 
3918 static const struct of_device_id arm_smmu_of_match[] = {
3919 	{ .compatible = "arm,smmu-v3", },
3920 	{ },
3921 };
3922 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3923 
3924 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3925 {
3926 	arm_smmu_sva_notifier_synchronize();
3927 	platform_driver_unregister(drv);
3928 }
3929 
3930 static struct platform_driver arm_smmu_driver = {
3931 	.driver	= {
3932 		.name			= "arm-smmu-v3",
3933 		.of_match_table		= arm_smmu_of_match,
3934 		.suppress_bind_attrs	= true,
3935 	},
3936 	.probe	= arm_smmu_device_probe,
3937 	.remove_new = arm_smmu_device_remove,
3938 	.shutdown = arm_smmu_device_shutdown,
3939 };
3940 module_driver(arm_smmu_driver, platform_driver_register,
3941 	      arm_smmu_driver_unregister);
3942 
3943 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3944 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3945 MODULE_ALIAS("platform:arm-smmu-v3");
3946 MODULE_LICENSE("GPL v2");
3947