xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision e881e7839fba8a8452459a656eca90340cc34a2e)
1e86d1aa8SWill Deacon // SPDX-License-Identifier: GPL-2.0
2e86d1aa8SWill Deacon /*
3e86d1aa8SWill Deacon  * IOMMU API for ARM architected SMMUv3 implementations.
4e86d1aa8SWill Deacon  *
5e86d1aa8SWill Deacon  * Copyright (C) 2015 ARM Limited
6e86d1aa8SWill Deacon  *
7e86d1aa8SWill Deacon  * Author: Will Deacon <will.deacon@arm.com>
8e86d1aa8SWill Deacon  *
9e86d1aa8SWill Deacon  * This driver is powered by bad coffee and bombay mix.
10e86d1aa8SWill Deacon  */
11e86d1aa8SWill Deacon 
12e86d1aa8SWill Deacon #include <linux/acpi.h>
13e86d1aa8SWill Deacon #include <linux/acpi_iort.h>
14e86d1aa8SWill Deacon #include <linux/bitops.h>
15e86d1aa8SWill Deacon #include <linux/crash_dump.h>
16e86d1aa8SWill Deacon #include <linux/delay.h>
17e86d1aa8SWill Deacon #include <linux/dma-iommu.h>
18e86d1aa8SWill Deacon #include <linux/err.h>
19e86d1aa8SWill Deacon #include <linux/interrupt.h>
20e86d1aa8SWill Deacon #include <linux/io-pgtable.h>
21e86d1aa8SWill Deacon #include <linux/iopoll.h>
22e86d1aa8SWill Deacon #include <linux/module.h>
23e86d1aa8SWill Deacon #include <linux/msi.h>
24e86d1aa8SWill Deacon #include <linux/of.h>
25e86d1aa8SWill Deacon #include <linux/of_address.h>
26e86d1aa8SWill Deacon #include <linux/of_iommu.h>
27e86d1aa8SWill Deacon #include <linux/of_platform.h>
28e86d1aa8SWill Deacon #include <linux/pci.h>
29e86d1aa8SWill Deacon #include <linux/pci-ats.h>
30e86d1aa8SWill Deacon #include <linux/platform_device.h>
31e86d1aa8SWill Deacon 
32e86d1aa8SWill Deacon #include <linux/amba/bus.h>
33e86d1aa8SWill Deacon 
34*e881e783SJean-Philippe Brucker #include "arm-smmu-v3.h"
35e86d1aa8SWill Deacon 
36e86d1aa8SWill Deacon static bool disable_bypass = 1;
379305d02aSBarry Song module_param(disable_bypass, bool, 0444);
38e86d1aa8SWill Deacon MODULE_PARM_DESC(disable_bypass,
39e86d1aa8SWill Deacon 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40e86d1aa8SWill Deacon 
41bd07a20aSBarry Song static bool disable_msipolling;
42bd07a20aSBarry Song module_param(disable_msipolling, bool, 0444);
43bd07a20aSBarry Song MODULE_PARM_DESC(disable_msipolling,
44bd07a20aSBarry Song 	"Disable MSI-based polling for CMD_SYNC completion.");
45bd07a20aSBarry Song 
46e86d1aa8SWill Deacon enum arm_smmu_msi_index {
47e86d1aa8SWill Deacon 	EVTQ_MSI_INDEX,
48e86d1aa8SWill Deacon 	GERROR_MSI_INDEX,
49e86d1aa8SWill Deacon 	PRIQ_MSI_INDEX,
50e86d1aa8SWill Deacon 	ARM_SMMU_MAX_MSIS,
51e86d1aa8SWill Deacon };
52e86d1aa8SWill Deacon 
53e86d1aa8SWill Deacon static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54e86d1aa8SWill Deacon 	[EVTQ_MSI_INDEX] = {
55e86d1aa8SWill Deacon 		ARM_SMMU_EVTQ_IRQ_CFG0,
56e86d1aa8SWill Deacon 		ARM_SMMU_EVTQ_IRQ_CFG1,
57e86d1aa8SWill Deacon 		ARM_SMMU_EVTQ_IRQ_CFG2,
58e86d1aa8SWill Deacon 	},
59e86d1aa8SWill Deacon 	[GERROR_MSI_INDEX] = {
60e86d1aa8SWill Deacon 		ARM_SMMU_GERROR_IRQ_CFG0,
61e86d1aa8SWill Deacon 		ARM_SMMU_GERROR_IRQ_CFG1,
62e86d1aa8SWill Deacon 		ARM_SMMU_GERROR_IRQ_CFG2,
63e86d1aa8SWill Deacon 	},
64e86d1aa8SWill Deacon 	[PRIQ_MSI_INDEX] = {
65e86d1aa8SWill Deacon 		ARM_SMMU_PRIQ_IRQ_CFG0,
66e86d1aa8SWill Deacon 		ARM_SMMU_PRIQ_IRQ_CFG1,
67e86d1aa8SWill Deacon 		ARM_SMMU_PRIQ_IRQ_CFG2,
68e86d1aa8SWill Deacon 	},
69e86d1aa8SWill Deacon };
70e86d1aa8SWill Deacon 
71e86d1aa8SWill Deacon struct arm_smmu_option_prop {
72e86d1aa8SWill Deacon 	u32 opt;
73e86d1aa8SWill Deacon 	const char *prop;
74e86d1aa8SWill Deacon };
75e86d1aa8SWill Deacon 
76e86d1aa8SWill Deacon static DEFINE_XARRAY_ALLOC1(asid_xa);
77e86d1aa8SWill Deacon 
78e86d1aa8SWill Deacon static struct arm_smmu_option_prop arm_smmu_options[] = {
79e86d1aa8SWill Deacon 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
80e86d1aa8SWill Deacon 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
81e86d1aa8SWill Deacon 	{ 0, NULL},
82e86d1aa8SWill Deacon };
83e86d1aa8SWill Deacon 
84e86d1aa8SWill Deacon static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
85e86d1aa8SWill Deacon 						 struct arm_smmu_device *smmu)
86e86d1aa8SWill Deacon {
87e86d1aa8SWill Deacon 	if (offset > SZ_64K)
88e86d1aa8SWill Deacon 		return smmu->page1 + offset - SZ_64K;
89e86d1aa8SWill Deacon 
90e86d1aa8SWill Deacon 	return smmu->base + offset;
91e86d1aa8SWill Deacon }
92e86d1aa8SWill Deacon 
93e86d1aa8SWill Deacon static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
94e86d1aa8SWill Deacon {
95e86d1aa8SWill Deacon 	return container_of(dom, struct arm_smmu_domain, domain);
96e86d1aa8SWill Deacon }
97e86d1aa8SWill Deacon 
98e86d1aa8SWill Deacon static void parse_driver_options(struct arm_smmu_device *smmu)
99e86d1aa8SWill Deacon {
100e86d1aa8SWill Deacon 	int i = 0;
101e86d1aa8SWill Deacon 
102e86d1aa8SWill Deacon 	do {
103e86d1aa8SWill Deacon 		if (of_property_read_bool(smmu->dev->of_node,
104e86d1aa8SWill Deacon 						arm_smmu_options[i].prop)) {
105e86d1aa8SWill Deacon 			smmu->options |= arm_smmu_options[i].opt;
106e86d1aa8SWill Deacon 			dev_notice(smmu->dev, "option %s\n",
107e86d1aa8SWill Deacon 				arm_smmu_options[i].prop);
108e86d1aa8SWill Deacon 		}
109e86d1aa8SWill Deacon 	} while (arm_smmu_options[++i].opt);
110e86d1aa8SWill Deacon }
111e86d1aa8SWill Deacon 
112e86d1aa8SWill Deacon /* Low-level queue manipulation functions */
113e86d1aa8SWill Deacon static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
114e86d1aa8SWill Deacon {
115e86d1aa8SWill Deacon 	u32 space, prod, cons;
116e86d1aa8SWill Deacon 
117e86d1aa8SWill Deacon 	prod = Q_IDX(q, q->prod);
118e86d1aa8SWill Deacon 	cons = Q_IDX(q, q->cons);
119e86d1aa8SWill Deacon 
120e86d1aa8SWill Deacon 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
121e86d1aa8SWill Deacon 		space = (1 << q->max_n_shift) - (prod - cons);
122e86d1aa8SWill Deacon 	else
123e86d1aa8SWill Deacon 		space = cons - prod;
124e86d1aa8SWill Deacon 
125e86d1aa8SWill Deacon 	return space >= n;
126e86d1aa8SWill Deacon }
127e86d1aa8SWill Deacon 
128e86d1aa8SWill Deacon static bool queue_full(struct arm_smmu_ll_queue *q)
129e86d1aa8SWill Deacon {
130e86d1aa8SWill Deacon 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
131e86d1aa8SWill Deacon 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
132e86d1aa8SWill Deacon }
133e86d1aa8SWill Deacon 
134e86d1aa8SWill Deacon static bool queue_empty(struct arm_smmu_ll_queue *q)
135e86d1aa8SWill Deacon {
136e86d1aa8SWill Deacon 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
137e86d1aa8SWill Deacon 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
138e86d1aa8SWill Deacon }
139e86d1aa8SWill Deacon 
140e86d1aa8SWill Deacon static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
141e86d1aa8SWill Deacon {
142e86d1aa8SWill Deacon 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
143e86d1aa8SWill Deacon 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
144e86d1aa8SWill Deacon 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
145e86d1aa8SWill Deacon 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
146e86d1aa8SWill Deacon }
147e86d1aa8SWill Deacon 
148e86d1aa8SWill Deacon static void queue_sync_cons_out(struct arm_smmu_queue *q)
149e86d1aa8SWill Deacon {
150e86d1aa8SWill Deacon 	/*
151e86d1aa8SWill Deacon 	 * Ensure that all CPU accesses (reads and writes) to the queue
152e86d1aa8SWill Deacon 	 * are complete before we update the cons pointer.
153e86d1aa8SWill Deacon 	 */
154a76a3777SZhou Wang 	__iomb();
155e86d1aa8SWill Deacon 	writel_relaxed(q->llq.cons, q->cons_reg);
156e86d1aa8SWill Deacon }
157e86d1aa8SWill Deacon 
158e86d1aa8SWill Deacon static void queue_inc_cons(struct arm_smmu_ll_queue *q)
159e86d1aa8SWill Deacon {
160e86d1aa8SWill Deacon 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
161e86d1aa8SWill Deacon 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
162e86d1aa8SWill Deacon }
163e86d1aa8SWill Deacon 
164e86d1aa8SWill Deacon static int queue_sync_prod_in(struct arm_smmu_queue *q)
165e86d1aa8SWill Deacon {
166a76a3777SZhou Wang 	u32 prod;
167e86d1aa8SWill Deacon 	int ret = 0;
168a76a3777SZhou Wang 
169a76a3777SZhou Wang 	/*
170a76a3777SZhou Wang 	 * We can't use the _relaxed() variant here, as we must prevent
171a76a3777SZhou Wang 	 * speculative reads of the queue before we have determined that
172a76a3777SZhou Wang 	 * prod has indeed moved.
173a76a3777SZhou Wang 	 */
174a76a3777SZhou Wang 	prod = readl(q->prod_reg);
175e86d1aa8SWill Deacon 
176e86d1aa8SWill Deacon 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
177e86d1aa8SWill Deacon 		ret = -EOVERFLOW;
178e86d1aa8SWill Deacon 
179e86d1aa8SWill Deacon 	q->llq.prod = prod;
180e86d1aa8SWill Deacon 	return ret;
181e86d1aa8SWill Deacon }
182e86d1aa8SWill Deacon 
183e86d1aa8SWill Deacon static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
184e86d1aa8SWill Deacon {
185e86d1aa8SWill Deacon 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
186e86d1aa8SWill Deacon 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
187e86d1aa8SWill Deacon }
188e86d1aa8SWill Deacon 
189e86d1aa8SWill Deacon static void queue_poll_init(struct arm_smmu_device *smmu,
190e86d1aa8SWill Deacon 			    struct arm_smmu_queue_poll *qp)
191e86d1aa8SWill Deacon {
192e86d1aa8SWill Deacon 	qp->delay = 1;
193e86d1aa8SWill Deacon 	qp->spin_cnt = 0;
194e86d1aa8SWill Deacon 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
195e86d1aa8SWill Deacon 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
196e86d1aa8SWill Deacon }
197e86d1aa8SWill Deacon 
198e86d1aa8SWill Deacon static int queue_poll(struct arm_smmu_queue_poll *qp)
199e86d1aa8SWill Deacon {
200e86d1aa8SWill Deacon 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
201e86d1aa8SWill Deacon 		return -ETIMEDOUT;
202e86d1aa8SWill Deacon 
203e86d1aa8SWill Deacon 	if (qp->wfe) {
204e86d1aa8SWill Deacon 		wfe();
205e86d1aa8SWill Deacon 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
206e86d1aa8SWill Deacon 		cpu_relax();
207e86d1aa8SWill Deacon 	} else {
208e86d1aa8SWill Deacon 		udelay(qp->delay);
209e86d1aa8SWill Deacon 		qp->delay *= 2;
210e86d1aa8SWill Deacon 		qp->spin_cnt = 0;
211e86d1aa8SWill Deacon 	}
212e86d1aa8SWill Deacon 
213e86d1aa8SWill Deacon 	return 0;
214e86d1aa8SWill Deacon }
215e86d1aa8SWill Deacon 
216e86d1aa8SWill Deacon static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
217e86d1aa8SWill Deacon {
218e86d1aa8SWill Deacon 	int i;
219e86d1aa8SWill Deacon 
220e86d1aa8SWill Deacon 	for (i = 0; i < n_dwords; ++i)
221e86d1aa8SWill Deacon 		*dst++ = cpu_to_le64(*src++);
222e86d1aa8SWill Deacon }
223e86d1aa8SWill Deacon 
224376cdf66SJean-Philippe Brucker static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
225e86d1aa8SWill Deacon {
226e86d1aa8SWill Deacon 	int i;
227e86d1aa8SWill Deacon 
228e86d1aa8SWill Deacon 	for (i = 0; i < n_dwords; ++i)
229e86d1aa8SWill Deacon 		*dst++ = le64_to_cpu(*src++);
230e86d1aa8SWill Deacon }
231e86d1aa8SWill Deacon 
232e86d1aa8SWill Deacon static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
233e86d1aa8SWill Deacon {
234e86d1aa8SWill Deacon 	if (queue_empty(&q->llq))
235e86d1aa8SWill Deacon 		return -EAGAIN;
236e86d1aa8SWill Deacon 
237e86d1aa8SWill Deacon 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
238e86d1aa8SWill Deacon 	queue_inc_cons(&q->llq);
239e86d1aa8SWill Deacon 	queue_sync_cons_out(q);
240e86d1aa8SWill Deacon 	return 0;
241e86d1aa8SWill Deacon }
242e86d1aa8SWill Deacon 
243e86d1aa8SWill Deacon /* High-level queue accessors */
244e86d1aa8SWill Deacon static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
245e86d1aa8SWill Deacon {
246e86d1aa8SWill Deacon 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
247e86d1aa8SWill Deacon 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
248e86d1aa8SWill Deacon 
249e86d1aa8SWill Deacon 	switch (ent->opcode) {
250e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_EL2_ALL:
251e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_NSNH_ALL:
252e86d1aa8SWill Deacon 		break;
253e86d1aa8SWill Deacon 	case CMDQ_OP_PREFETCH_CFG:
254e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
255e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
256e86d1aa8SWill Deacon 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
257e86d1aa8SWill Deacon 		break;
258e86d1aa8SWill Deacon 	case CMDQ_OP_CFGI_CD:
259e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
260df561f66SGustavo A. R. Silva 		fallthrough;
261e86d1aa8SWill Deacon 	case CMDQ_OP_CFGI_STE:
262e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
263e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
264e86d1aa8SWill Deacon 		break;
265e86d1aa8SWill Deacon 	case CMDQ_OP_CFGI_CD_ALL:
266e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
267e86d1aa8SWill Deacon 		break;
268e86d1aa8SWill Deacon 	case CMDQ_OP_CFGI_ALL:
269e86d1aa8SWill Deacon 		/* Cover the entire SID range */
270e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
271e86d1aa8SWill Deacon 		break;
272e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_NH_VA:
273e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
274e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
275e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
276e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
277e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
278e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
279e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
280e86d1aa8SWill Deacon 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
281e86d1aa8SWill Deacon 		break;
282e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_S2_IPA:
283e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
284e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
285e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
286e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
287e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
288e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
289e86d1aa8SWill Deacon 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
290e86d1aa8SWill Deacon 		break;
291e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_NH_ASID:
292e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
293df561f66SGustavo A. R. Silva 		fallthrough;
294e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_S12_VMALL:
295e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
296e86d1aa8SWill Deacon 		break;
297e86d1aa8SWill Deacon 	case CMDQ_OP_ATC_INV:
298e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
299e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
300e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
301e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
302e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
303e86d1aa8SWill Deacon 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
304e86d1aa8SWill Deacon 		break;
305e86d1aa8SWill Deacon 	case CMDQ_OP_PRI_RESP:
306e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
307e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
308e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
309e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
310e86d1aa8SWill Deacon 		switch (ent->pri.resp) {
311e86d1aa8SWill Deacon 		case PRI_RESP_DENY:
312e86d1aa8SWill Deacon 		case PRI_RESP_FAIL:
313e86d1aa8SWill Deacon 		case PRI_RESP_SUCC:
314e86d1aa8SWill Deacon 			break;
315e86d1aa8SWill Deacon 		default:
316e86d1aa8SWill Deacon 			return -EINVAL;
317e86d1aa8SWill Deacon 		}
318e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
319e86d1aa8SWill Deacon 		break;
320e86d1aa8SWill Deacon 	case CMDQ_OP_CMD_SYNC:
321e86d1aa8SWill Deacon 		if (ent->sync.msiaddr) {
322e86d1aa8SWill Deacon 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
323e86d1aa8SWill Deacon 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
324e86d1aa8SWill Deacon 		} else {
325e86d1aa8SWill Deacon 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
326e86d1aa8SWill Deacon 		}
327e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
328e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
329e86d1aa8SWill Deacon 		break;
330e86d1aa8SWill Deacon 	default:
331e86d1aa8SWill Deacon 		return -ENOENT;
332e86d1aa8SWill Deacon 	}
333e86d1aa8SWill Deacon 
334e86d1aa8SWill Deacon 	return 0;
335e86d1aa8SWill Deacon }
336e86d1aa8SWill Deacon 
337e86d1aa8SWill Deacon static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
338e86d1aa8SWill Deacon 					 u32 prod)
339e86d1aa8SWill Deacon {
340e86d1aa8SWill Deacon 	struct arm_smmu_queue *q = &smmu->cmdq.q;
341e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent ent = {
342e86d1aa8SWill Deacon 		.opcode = CMDQ_OP_CMD_SYNC,
343e86d1aa8SWill Deacon 	};
344e86d1aa8SWill Deacon 
345e86d1aa8SWill Deacon 	/*
346e86d1aa8SWill Deacon 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
347e86d1aa8SWill Deacon 	 * payload, so the write will zero the entire command on that platform.
348e86d1aa8SWill Deacon 	 */
349bd07a20aSBarry Song 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
350e86d1aa8SWill Deacon 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
351e86d1aa8SWill Deacon 				   q->ent_dwords * 8;
352e86d1aa8SWill Deacon 	}
353e86d1aa8SWill Deacon 
354e86d1aa8SWill Deacon 	arm_smmu_cmdq_build_cmd(cmd, &ent);
355e86d1aa8SWill Deacon }
356e86d1aa8SWill Deacon 
357e86d1aa8SWill Deacon static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
358e86d1aa8SWill Deacon {
359e86d1aa8SWill Deacon 	static const char *cerror_str[] = {
360e86d1aa8SWill Deacon 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
361e86d1aa8SWill Deacon 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
362e86d1aa8SWill Deacon 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
363e86d1aa8SWill Deacon 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
364e86d1aa8SWill Deacon 	};
365e86d1aa8SWill Deacon 
366e86d1aa8SWill Deacon 	int i;
367e86d1aa8SWill Deacon 	u64 cmd[CMDQ_ENT_DWORDS];
368e86d1aa8SWill Deacon 	struct arm_smmu_queue *q = &smmu->cmdq.q;
369e86d1aa8SWill Deacon 	u32 cons = readl_relaxed(q->cons_reg);
370e86d1aa8SWill Deacon 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
371e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd_sync = {
372e86d1aa8SWill Deacon 		.opcode = CMDQ_OP_CMD_SYNC,
373e86d1aa8SWill Deacon 	};
374e86d1aa8SWill Deacon 
375e86d1aa8SWill Deacon 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
376e86d1aa8SWill Deacon 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
377e86d1aa8SWill Deacon 
378e86d1aa8SWill Deacon 	switch (idx) {
379e86d1aa8SWill Deacon 	case CMDQ_ERR_CERROR_ABT_IDX:
380e86d1aa8SWill Deacon 		dev_err(smmu->dev, "retrying command fetch\n");
381e86d1aa8SWill Deacon 	case CMDQ_ERR_CERROR_NONE_IDX:
382e86d1aa8SWill Deacon 		return;
383e86d1aa8SWill Deacon 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
384e86d1aa8SWill Deacon 		/*
385e86d1aa8SWill Deacon 		 * ATC Invalidation Completion timeout. CONS is still pointing
386e86d1aa8SWill Deacon 		 * at the CMD_SYNC. Attempt to complete other pending commands
387e86d1aa8SWill Deacon 		 * by repeating the CMD_SYNC, though we might well end up back
388e86d1aa8SWill Deacon 		 * here since the ATC invalidation may still be pending.
389e86d1aa8SWill Deacon 		 */
390e86d1aa8SWill Deacon 		return;
391e86d1aa8SWill Deacon 	case CMDQ_ERR_CERROR_ILL_IDX:
392e86d1aa8SWill Deacon 	default:
393e86d1aa8SWill Deacon 		break;
394e86d1aa8SWill Deacon 	}
395e86d1aa8SWill Deacon 
396e86d1aa8SWill Deacon 	/*
397e86d1aa8SWill Deacon 	 * We may have concurrent producers, so we need to be careful
398e86d1aa8SWill Deacon 	 * not to touch any of the shadow cmdq state.
399e86d1aa8SWill Deacon 	 */
400e86d1aa8SWill Deacon 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
401e86d1aa8SWill Deacon 	dev_err(smmu->dev, "skipping command in error state:\n");
402e86d1aa8SWill Deacon 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
403e86d1aa8SWill Deacon 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
404e86d1aa8SWill Deacon 
405e86d1aa8SWill Deacon 	/* Convert the erroneous command into a CMD_SYNC */
406e86d1aa8SWill Deacon 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
407e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
408e86d1aa8SWill Deacon 		return;
409e86d1aa8SWill Deacon 	}
410e86d1aa8SWill Deacon 
411e86d1aa8SWill Deacon 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
412e86d1aa8SWill Deacon }
413e86d1aa8SWill Deacon 
414e86d1aa8SWill Deacon /*
415e86d1aa8SWill Deacon  * Command queue locking.
416e86d1aa8SWill Deacon  * This is a form of bastardised rwlock with the following major changes:
417e86d1aa8SWill Deacon  *
418e86d1aa8SWill Deacon  * - The only LOCK routines are exclusive_trylock() and shared_lock().
419e86d1aa8SWill Deacon  *   Neither have barrier semantics, and instead provide only a control
420e86d1aa8SWill Deacon  *   dependency.
421e86d1aa8SWill Deacon  *
422e86d1aa8SWill Deacon  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
423e86d1aa8SWill Deacon  *   fails if the caller appears to be the last lock holder (yes, this is
424e86d1aa8SWill Deacon  *   racy). All successful UNLOCK routines have RELEASE semantics.
425e86d1aa8SWill Deacon  */
426e86d1aa8SWill Deacon static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
427e86d1aa8SWill Deacon {
428e86d1aa8SWill Deacon 	int val;
429e86d1aa8SWill Deacon 
430e86d1aa8SWill Deacon 	/*
431e86d1aa8SWill Deacon 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
432e86d1aa8SWill Deacon 	 * lock counter. When held in exclusive state, the lock counter is set
433e86d1aa8SWill Deacon 	 * to INT_MIN so these increments won't hurt as the value will remain
434e86d1aa8SWill Deacon 	 * negative.
435e86d1aa8SWill Deacon 	 */
436e86d1aa8SWill Deacon 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
437e86d1aa8SWill Deacon 		return;
438e86d1aa8SWill Deacon 
439e86d1aa8SWill Deacon 	do {
440e86d1aa8SWill Deacon 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
441e86d1aa8SWill Deacon 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
442e86d1aa8SWill Deacon }
443e86d1aa8SWill Deacon 
444e86d1aa8SWill Deacon static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
445e86d1aa8SWill Deacon {
446e86d1aa8SWill Deacon 	(void)atomic_dec_return_release(&cmdq->lock);
447e86d1aa8SWill Deacon }
448e86d1aa8SWill Deacon 
449e86d1aa8SWill Deacon static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
450e86d1aa8SWill Deacon {
451e86d1aa8SWill Deacon 	if (atomic_read(&cmdq->lock) == 1)
452e86d1aa8SWill Deacon 		return false;
453e86d1aa8SWill Deacon 
454e86d1aa8SWill Deacon 	arm_smmu_cmdq_shared_unlock(cmdq);
455e86d1aa8SWill Deacon 	return true;
456e86d1aa8SWill Deacon }
457e86d1aa8SWill Deacon 
458e86d1aa8SWill Deacon #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
459e86d1aa8SWill Deacon ({									\
460e86d1aa8SWill Deacon 	bool __ret;							\
461e86d1aa8SWill Deacon 	local_irq_save(flags);						\
462e86d1aa8SWill Deacon 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
463e86d1aa8SWill Deacon 	if (!__ret)							\
464e86d1aa8SWill Deacon 		local_irq_restore(flags);				\
465e86d1aa8SWill Deacon 	__ret;								\
466e86d1aa8SWill Deacon })
467e86d1aa8SWill Deacon 
468e86d1aa8SWill Deacon #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
469e86d1aa8SWill Deacon ({									\
470e86d1aa8SWill Deacon 	atomic_set_release(&cmdq->lock, 0);				\
471e86d1aa8SWill Deacon 	local_irq_restore(flags);					\
472e86d1aa8SWill Deacon })
473e86d1aa8SWill Deacon 
474e86d1aa8SWill Deacon 
475e86d1aa8SWill Deacon /*
476e86d1aa8SWill Deacon  * Command queue insertion.
477e86d1aa8SWill Deacon  * This is made fiddly by our attempts to achieve some sort of scalability
478e86d1aa8SWill Deacon  * since there is one queue shared amongst all of the CPUs in the system.  If
479e86d1aa8SWill Deacon  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
480e86d1aa8SWill Deacon  * then you'll *love* this monstrosity.
481e86d1aa8SWill Deacon  *
482e86d1aa8SWill Deacon  * The basic idea is to split the queue up into ranges of commands that are
483e86d1aa8SWill Deacon  * owned by a given CPU; the owner may not have written all of the commands
484e86d1aa8SWill Deacon  * itself, but is responsible for advancing the hardware prod pointer when
485e86d1aa8SWill Deacon  * the time comes. The algorithm is roughly:
486e86d1aa8SWill Deacon  *
487e86d1aa8SWill Deacon  * 	1. Allocate some space in the queue. At this point we also discover
488e86d1aa8SWill Deacon  *	   whether the head of the queue is currently owned by another CPU,
489e86d1aa8SWill Deacon  *	   or whether we are the owner.
490e86d1aa8SWill Deacon  *
491e86d1aa8SWill Deacon  *	2. Write our commands into our allocated slots in the queue.
492e86d1aa8SWill Deacon  *
493e86d1aa8SWill Deacon  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
494e86d1aa8SWill Deacon  *
495e86d1aa8SWill Deacon  *	4. If we are an owner:
496e86d1aa8SWill Deacon  *		a. Wait for the previous owner to finish.
497e86d1aa8SWill Deacon  *		b. Mark the queue head as unowned, which tells us the range
498e86d1aa8SWill Deacon  *		   that we are responsible for publishing.
499e86d1aa8SWill Deacon  *		c. Wait for all commands in our owned range to become valid.
500e86d1aa8SWill Deacon  *		d. Advance the hardware prod pointer.
501e86d1aa8SWill Deacon  *		e. Tell the next owner we've finished.
502e86d1aa8SWill Deacon  *
503e86d1aa8SWill Deacon  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
504e86d1aa8SWill Deacon  *	   owner), then we need to stick around until it has completed:
505e86d1aa8SWill Deacon  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
506e86d1aa8SWill Deacon  *		   to clear the first 4 bytes.
507e86d1aa8SWill Deacon  *		b. Otherwise, we spin waiting for the hardware cons pointer to
508e86d1aa8SWill Deacon  *		   advance past our command.
509e86d1aa8SWill Deacon  *
510e86d1aa8SWill Deacon  * The devil is in the details, particularly the use of locking for handling
511e86d1aa8SWill Deacon  * SYNC completion and freeing up space in the queue before we think that it is
512e86d1aa8SWill Deacon  * full.
513e86d1aa8SWill Deacon  */
514e86d1aa8SWill Deacon static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
515e86d1aa8SWill Deacon 					       u32 sprod, u32 eprod, bool set)
516e86d1aa8SWill Deacon {
517e86d1aa8SWill Deacon 	u32 swidx, sbidx, ewidx, ebidx;
518e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue llq = {
519e86d1aa8SWill Deacon 		.max_n_shift	= cmdq->q.llq.max_n_shift,
520e86d1aa8SWill Deacon 		.prod		= sprod,
521e86d1aa8SWill Deacon 	};
522e86d1aa8SWill Deacon 
523e86d1aa8SWill Deacon 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
524e86d1aa8SWill Deacon 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
525e86d1aa8SWill Deacon 
526e86d1aa8SWill Deacon 	while (llq.prod != eprod) {
527e86d1aa8SWill Deacon 		unsigned long mask;
528e86d1aa8SWill Deacon 		atomic_long_t *ptr;
529e86d1aa8SWill Deacon 		u32 limit = BITS_PER_LONG;
530e86d1aa8SWill Deacon 
531e86d1aa8SWill Deacon 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
532e86d1aa8SWill Deacon 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
533e86d1aa8SWill Deacon 
534e86d1aa8SWill Deacon 		ptr = &cmdq->valid_map[swidx];
535e86d1aa8SWill Deacon 
536e86d1aa8SWill Deacon 		if ((swidx == ewidx) && (sbidx < ebidx))
537e86d1aa8SWill Deacon 			limit = ebidx;
538e86d1aa8SWill Deacon 
539e86d1aa8SWill Deacon 		mask = GENMASK(limit - 1, sbidx);
540e86d1aa8SWill Deacon 
541e86d1aa8SWill Deacon 		/*
542e86d1aa8SWill Deacon 		 * The valid bit is the inverse of the wrap bit. This means
543e86d1aa8SWill Deacon 		 * that a zero-initialised queue is invalid and, after marking
544e86d1aa8SWill Deacon 		 * all entries as valid, they become invalid again when we
545e86d1aa8SWill Deacon 		 * wrap.
546e86d1aa8SWill Deacon 		 */
547e86d1aa8SWill Deacon 		if (set) {
548e86d1aa8SWill Deacon 			atomic_long_xor(mask, ptr);
549e86d1aa8SWill Deacon 		} else { /* Poll */
550e86d1aa8SWill Deacon 			unsigned long valid;
551e86d1aa8SWill Deacon 
552e86d1aa8SWill Deacon 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
553e86d1aa8SWill Deacon 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
554e86d1aa8SWill Deacon 		}
555e86d1aa8SWill Deacon 
556e86d1aa8SWill Deacon 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
557e86d1aa8SWill Deacon 	}
558e86d1aa8SWill Deacon }
559e86d1aa8SWill Deacon 
560e86d1aa8SWill Deacon /* Mark all entries in the range [sprod, eprod) as valid */
561e86d1aa8SWill Deacon static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
562e86d1aa8SWill Deacon 					u32 sprod, u32 eprod)
563e86d1aa8SWill Deacon {
564e86d1aa8SWill Deacon 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
565e86d1aa8SWill Deacon }
566e86d1aa8SWill Deacon 
567e86d1aa8SWill Deacon /* Wait for all entries in the range [sprod, eprod) to become valid */
568e86d1aa8SWill Deacon static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
569e86d1aa8SWill Deacon 					 u32 sprod, u32 eprod)
570e86d1aa8SWill Deacon {
571e86d1aa8SWill Deacon 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
572e86d1aa8SWill Deacon }
573e86d1aa8SWill Deacon 
574e86d1aa8SWill Deacon /* Wait for the command queue to become non-full */
575e86d1aa8SWill Deacon static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
576e86d1aa8SWill Deacon 					     struct arm_smmu_ll_queue *llq)
577e86d1aa8SWill Deacon {
578e86d1aa8SWill Deacon 	unsigned long flags;
579e86d1aa8SWill Deacon 	struct arm_smmu_queue_poll qp;
580e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
581e86d1aa8SWill Deacon 	int ret = 0;
582e86d1aa8SWill Deacon 
583e86d1aa8SWill Deacon 	/*
584e86d1aa8SWill Deacon 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
585e86d1aa8SWill Deacon 	 * that fails, spin until somebody else updates it for us.
586e86d1aa8SWill Deacon 	 */
587e86d1aa8SWill Deacon 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
588e86d1aa8SWill Deacon 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
589e86d1aa8SWill Deacon 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
590e86d1aa8SWill Deacon 		llq->val = READ_ONCE(cmdq->q.llq.val);
591e86d1aa8SWill Deacon 		return 0;
592e86d1aa8SWill Deacon 	}
593e86d1aa8SWill Deacon 
594e86d1aa8SWill Deacon 	queue_poll_init(smmu, &qp);
595e86d1aa8SWill Deacon 	do {
596e86d1aa8SWill Deacon 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
597e86d1aa8SWill Deacon 		if (!queue_full(llq))
598e86d1aa8SWill Deacon 			break;
599e86d1aa8SWill Deacon 
600e86d1aa8SWill Deacon 		ret = queue_poll(&qp);
601e86d1aa8SWill Deacon 	} while (!ret);
602e86d1aa8SWill Deacon 
603e86d1aa8SWill Deacon 	return ret;
604e86d1aa8SWill Deacon }
605e86d1aa8SWill Deacon 
606e86d1aa8SWill Deacon /*
607e86d1aa8SWill Deacon  * Wait until the SMMU signals a CMD_SYNC completion MSI.
608e86d1aa8SWill Deacon  * Must be called with the cmdq lock held in some capacity.
609e86d1aa8SWill Deacon  */
610e86d1aa8SWill Deacon static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
611e86d1aa8SWill Deacon 					  struct arm_smmu_ll_queue *llq)
612e86d1aa8SWill Deacon {
613e86d1aa8SWill Deacon 	int ret = 0;
614e86d1aa8SWill Deacon 	struct arm_smmu_queue_poll qp;
615e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
616e86d1aa8SWill Deacon 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
617e86d1aa8SWill Deacon 
618e86d1aa8SWill Deacon 	queue_poll_init(smmu, &qp);
619e86d1aa8SWill Deacon 
620e86d1aa8SWill Deacon 	/*
621e86d1aa8SWill Deacon 	 * The MSI won't generate an event, since it's being written back
622e86d1aa8SWill Deacon 	 * into the command queue.
623e86d1aa8SWill Deacon 	 */
624e86d1aa8SWill Deacon 	qp.wfe = false;
625e86d1aa8SWill Deacon 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
626e86d1aa8SWill Deacon 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
627e86d1aa8SWill Deacon 	return ret;
628e86d1aa8SWill Deacon }
629e86d1aa8SWill Deacon 
630e86d1aa8SWill Deacon /*
631e86d1aa8SWill Deacon  * Wait until the SMMU cons index passes llq->prod.
632e86d1aa8SWill Deacon  * Must be called with the cmdq lock held in some capacity.
633e86d1aa8SWill Deacon  */
634e86d1aa8SWill Deacon static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
635e86d1aa8SWill Deacon 					       struct arm_smmu_ll_queue *llq)
636e86d1aa8SWill Deacon {
637e86d1aa8SWill Deacon 	struct arm_smmu_queue_poll qp;
638e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
639e86d1aa8SWill Deacon 	u32 prod = llq->prod;
640e86d1aa8SWill Deacon 	int ret = 0;
641e86d1aa8SWill Deacon 
642e86d1aa8SWill Deacon 	queue_poll_init(smmu, &qp);
643e86d1aa8SWill Deacon 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
644e86d1aa8SWill Deacon 	do {
645e86d1aa8SWill Deacon 		if (queue_consumed(llq, prod))
646e86d1aa8SWill Deacon 			break;
647e86d1aa8SWill Deacon 
648e86d1aa8SWill Deacon 		ret = queue_poll(&qp);
649e86d1aa8SWill Deacon 
650e86d1aa8SWill Deacon 		/*
651e86d1aa8SWill Deacon 		 * This needs to be a readl() so that our subsequent call
652e86d1aa8SWill Deacon 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
653e86d1aa8SWill Deacon 		 *
654e86d1aa8SWill Deacon 		 * Specifically, we need to ensure that we observe all
655e86d1aa8SWill Deacon 		 * shared_lock()s by other CMD_SYNCs that share our owner,
656e86d1aa8SWill Deacon 		 * so that a failing call to tryunlock() means that we're
657e86d1aa8SWill Deacon 		 * the last one out and therefore we can safely advance
658e86d1aa8SWill Deacon 		 * cmdq->q.llq.cons. Roughly speaking:
659e86d1aa8SWill Deacon 		 *
660e86d1aa8SWill Deacon 		 * CPU 0		CPU1			CPU2 (us)
661e86d1aa8SWill Deacon 		 *
662e86d1aa8SWill Deacon 		 * if (sync)
663e86d1aa8SWill Deacon 		 * 	shared_lock();
664e86d1aa8SWill Deacon 		 *
665e86d1aa8SWill Deacon 		 * dma_wmb();
666e86d1aa8SWill Deacon 		 * set_valid_map();
667e86d1aa8SWill Deacon 		 *
668e86d1aa8SWill Deacon 		 * 			if (owner) {
669e86d1aa8SWill Deacon 		 *				poll_valid_map();
670e86d1aa8SWill Deacon 		 *				<control dependency>
671e86d1aa8SWill Deacon 		 *				writel(prod_reg);
672e86d1aa8SWill Deacon 		 *
673e86d1aa8SWill Deacon 		 *						readl(cons_reg);
674e86d1aa8SWill Deacon 		 *						tryunlock();
675e86d1aa8SWill Deacon 		 *
676e86d1aa8SWill Deacon 		 * Requires us to see CPU 0's shared_lock() acquisition.
677e86d1aa8SWill Deacon 		 */
678e86d1aa8SWill Deacon 		llq->cons = readl(cmdq->q.cons_reg);
679e86d1aa8SWill Deacon 	} while (!ret);
680e86d1aa8SWill Deacon 
681e86d1aa8SWill Deacon 	return ret;
682e86d1aa8SWill Deacon }
683e86d1aa8SWill Deacon 
684e86d1aa8SWill Deacon static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
685e86d1aa8SWill Deacon 					 struct arm_smmu_ll_queue *llq)
686e86d1aa8SWill Deacon {
687bd07a20aSBarry Song 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
688e86d1aa8SWill Deacon 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
689e86d1aa8SWill Deacon 
690e86d1aa8SWill Deacon 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
691e86d1aa8SWill Deacon }
692e86d1aa8SWill Deacon 
693e86d1aa8SWill Deacon static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
694e86d1aa8SWill Deacon 					u32 prod, int n)
695e86d1aa8SWill Deacon {
696e86d1aa8SWill Deacon 	int i;
697e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue llq = {
698e86d1aa8SWill Deacon 		.max_n_shift	= cmdq->q.llq.max_n_shift,
699e86d1aa8SWill Deacon 		.prod		= prod,
700e86d1aa8SWill Deacon 	};
701e86d1aa8SWill Deacon 
702e86d1aa8SWill Deacon 	for (i = 0; i < n; ++i) {
703e86d1aa8SWill Deacon 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
704e86d1aa8SWill Deacon 
705e86d1aa8SWill Deacon 		prod = queue_inc_prod_n(&llq, i);
706e86d1aa8SWill Deacon 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
707e86d1aa8SWill Deacon 	}
708e86d1aa8SWill Deacon }
709e86d1aa8SWill Deacon 
710e86d1aa8SWill Deacon /*
711e86d1aa8SWill Deacon  * This is the actual insertion function, and provides the following
712e86d1aa8SWill Deacon  * ordering guarantees to callers:
713e86d1aa8SWill Deacon  *
714e86d1aa8SWill Deacon  * - There is a dma_wmb() before publishing any commands to the queue.
715e86d1aa8SWill Deacon  *   This can be relied upon to order prior writes to data structures
716e86d1aa8SWill Deacon  *   in memory (such as a CD or an STE) before the command.
717e86d1aa8SWill Deacon  *
718e86d1aa8SWill Deacon  * - On completion of a CMD_SYNC, there is a control dependency.
719e86d1aa8SWill Deacon  *   This can be relied upon to order subsequent writes to memory (e.g.
720e86d1aa8SWill Deacon  *   freeing an IOVA) after completion of the CMD_SYNC.
721e86d1aa8SWill Deacon  *
722e86d1aa8SWill Deacon  * - Command insertion is totally ordered, so if two CPUs each race to
723e86d1aa8SWill Deacon  *   insert their own list of commands then all of the commands from one
724e86d1aa8SWill Deacon  *   CPU will appear before any of the commands from the other CPU.
725e86d1aa8SWill Deacon  */
726e86d1aa8SWill Deacon static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
727e86d1aa8SWill Deacon 				       u64 *cmds, int n, bool sync)
728e86d1aa8SWill Deacon {
729e86d1aa8SWill Deacon 	u64 cmd_sync[CMDQ_ENT_DWORDS];
730e86d1aa8SWill Deacon 	u32 prod;
731e86d1aa8SWill Deacon 	unsigned long flags;
732e86d1aa8SWill Deacon 	bool owner;
733e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
734e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue llq = {
735e86d1aa8SWill Deacon 		.max_n_shift = cmdq->q.llq.max_n_shift,
736e86d1aa8SWill Deacon 	}, head = llq;
737e86d1aa8SWill Deacon 	int ret = 0;
738e86d1aa8SWill Deacon 
739e86d1aa8SWill Deacon 	/* 1. Allocate some space in the queue */
740e86d1aa8SWill Deacon 	local_irq_save(flags);
741e86d1aa8SWill Deacon 	llq.val = READ_ONCE(cmdq->q.llq.val);
742e86d1aa8SWill Deacon 	do {
743e86d1aa8SWill Deacon 		u64 old;
744e86d1aa8SWill Deacon 
745e86d1aa8SWill Deacon 		while (!queue_has_space(&llq, n + sync)) {
746e86d1aa8SWill Deacon 			local_irq_restore(flags);
747e86d1aa8SWill Deacon 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
748e86d1aa8SWill Deacon 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
749e86d1aa8SWill Deacon 			local_irq_save(flags);
750e86d1aa8SWill Deacon 		}
751e86d1aa8SWill Deacon 
752e86d1aa8SWill Deacon 		head.cons = llq.cons;
753e86d1aa8SWill Deacon 		head.prod = queue_inc_prod_n(&llq, n + sync) |
754e86d1aa8SWill Deacon 					     CMDQ_PROD_OWNED_FLAG;
755e86d1aa8SWill Deacon 
756e86d1aa8SWill Deacon 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
757e86d1aa8SWill Deacon 		if (old == llq.val)
758e86d1aa8SWill Deacon 			break;
759e86d1aa8SWill Deacon 
760e86d1aa8SWill Deacon 		llq.val = old;
761e86d1aa8SWill Deacon 	} while (1);
762e86d1aa8SWill Deacon 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
763e86d1aa8SWill Deacon 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
764e86d1aa8SWill Deacon 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
765e86d1aa8SWill Deacon 
766e86d1aa8SWill Deacon 	/*
767e86d1aa8SWill Deacon 	 * 2. Write our commands into the queue
768e86d1aa8SWill Deacon 	 * Dependency ordering from the cmpxchg() loop above.
769e86d1aa8SWill Deacon 	 */
770e86d1aa8SWill Deacon 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
771e86d1aa8SWill Deacon 	if (sync) {
772e86d1aa8SWill Deacon 		prod = queue_inc_prod_n(&llq, n);
773e86d1aa8SWill Deacon 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
774e86d1aa8SWill Deacon 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
775e86d1aa8SWill Deacon 
776e86d1aa8SWill Deacon 		/*
777e86d1aa8SWill Deacon 		 * In order to determine completion of our CMD_SYNC, we must
778e86d1aa8SWill Deacon 		 * ensure that the queue can't wrap twice without us noticing.
779e86d1aa8SWill Deacon 		 * We achieve that by taking the cmdq lock as shared before
780e86d1aa8SWill Deacon 		 * marking our slot as valid.
781e86d1aa8SWill Deacon 		 */
782e86d1aa8SWill Deacon 		arm_smmu_cmdq_shared_lock(cmdq);
783e86d1aa8SWill Deacon 	}
784e86d1aa8SWill Deacon 
785e86d1aa8SWill Deacon 	/* 3. Mark our slots as valid, ensuring commands are visible first */
786e86d1aa8SWill Deacon 	dma_wmb();
787e86d1aa8SWill Deacon 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
788e86d1aa8SWill Deacon 
789e86d1aa8SWill Deacon 	/* 4. If we are the owner, take control of the SMMU hardware */
790e86d1aa8SWill Deacon 	if (owner) {
791e86d1aa8SWill Deacon 		/* a. Wait for previous owner to finish */
792e86d1aa8SWill Deacon 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
793e86d1aa8SWill Deacon 
794e86d1aa8SWill Deacon 		/* b. Stop gathering work by clearing the owned flag */
795e86d1aa8SWill Deacon 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
796e86d1aa8SWill Deacon 						   &cmdq->q.llq.atomic.prod);
797e86d1aa8SWill Deacon 		prod &= ~CMDQ_PROD_OWNED_FLAG;
798e86d1aa8SWill Deacon 
799e86d1aa8SWill Deacon 		/*
800e86d1aa8SWill Deacon 		 * c. Wait for any gathered work to be written to the queue.
801e86d1aa8SWill Deacon 		 * Note that we read our own entries so that we have the control
802e86d1aa8SWill Deacon 		 * dependency required by (d).
803e86d1aa8SWill Deacon 		 */
804e86d1aa8SWill Deacon 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
805e86d1aa8SWill Deacon 
806e86d1aa8SWill Deacon 		/*
807e86d1aa8SWill Deacon 		 * d. Advance the hardware prod pointer
808e86d1aa8SWill Deacon 		 * Control dependency ordering from the entries becoming valid.
809e86d1aa8SWill Deacon 		 */
810e86d1aa8SWill Deacon 		writel_relaxed(prod, cmdq->q.prod_reg);
811e86d1aa8SWill Deacon 
812e86d1aa8SWill Deacon 		/*
813e86d1aa8SWill Deacon 		 * e. Tell the next owner we're done
814e86d1aa8SWill Deacon 		 * Make sure we've updated the hardware first, so that we don't
815e86d1aa8SWill Deacon 		 * race to update prod and potentially move it backwards.
816e86d1aa8SWill Deacon 		 */
817e86d1aa8SWill Deacon 		atomic_set_release(&cmdq->owner_prod, prod);
818e86d1aa8SWill Deacon 	}
819e86d1aa8SWill Deacon 
820e86d1aa8SWill Deacon 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
821e86d1aa8SWill Deacon 	if (sync) {
822e86d1aa8SWill Deacon 		llq.prod = queue_inc_prod_n(&llq, n);
823e86d1aa8SWill Deacon 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
824e86d1aa8SWill Deacon 		if (ret) {
825e86d1aa8SWill Deacon 			dev_err_ratelimited(smmu->dev,
826e86d1aa8SWill Deacon 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
827e86d1aa8SWill Deacon 					    llq.prod,
828e86d1aa8SWill Deacon 					    readl_relaxed(cmdq->q.prod_reg),
829e86d1aa8SWill Deacon 					    readl_relaxed(cmdq->q.cons_reg));
830e86d1aa8SWill Deacon 		}
831e86d1aa8SWill Deacon 
832e86d1aa8SWill Deacon 		/*
833e86d1aa8SWill Deacon 		 * Try to unlock the cmdq lock. This will fail if we're the last
834e86d1aa8SWill Deacon 		 * reader, in which case we can safely update cmdq->q.llq.cons
835e86d1aa8SWill Deacon 		 */
836e86d1aa8SWill Deacon 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
837e86d1aa8SWill Deacon 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
838e86d1aa8SWill Deacon 			arm_smmu_cmdq_shared_unlock(cmdq);
839e86d1aa8SWill Deacon 		}
840e86d1aa8SWill Deacon 	}
841e86d1aa8SWill Deacon 
842e86d1aa8SWill Deacon 	local_irq_restore(flags);
843e86d1aa8SWill Deacon 	return ret;
844e86d1aa8SWill Deacon }
845e86d1aa8SWill Deacon 
846e86d1aa8SWill Deacon static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
847e86d1aa8SWill Deacon 				   struct arm_smmu_cmdq_ent *ent)
848e86d1aa8SWill Deacon {
849e86d1aa8SWill Deacon 	u64 cmd[CMDQ_ENT_DWORDS];
850e86d1aa8SWill Deacon 
851e86d1aa8SWill Deacon 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
852e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
853e86d1aa8SWill Deacon 			 ent->opcode);
854e86d1aa8SWill Deacon 		return -EINVAL;
855e86d1aa8SWill Deacon 	}
856e86d1aa8SWill Deacon 
857e86d1aa8SWill Deacon 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
858e86d1aa8SWill Deacon }
859e86d1aa8SWill Deacon 
860e86d1aa8SWill Deacon static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
861e86d1aa8SWill Deacon {
862e86d1aa8SWill Deacon 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
863e86d1aa8SWill Deacon }
864e86d1aa8SWill Deacon 
865e86d1aa8SWill Deacon static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
866e86d1aa8SWill Deacon 				    struct arm_smmu_cmdq_batch *cmds,
867e86d1aa8SWill Deacon 				    struct arm_smmu_cmdq_ent *cmd)
868e86d1aa8SWill Deacon {
869e86d1aa8SWill Deacon 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
870e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
871e86d1aa8SWill Deacon 		cmds->num = 0;
872e86d1aa8SWill Deacon 	}
873e86d1aa8SWill Deacon 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
874e86d1aa8SWill Deacon 	cmds->num++;
875e86d1aa8SWill Deacon }
876e86d1aa8SWill Deacon 
877e86d1aa8SWill Deacon static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
878e86d1aa8SWill Deacon 				      struct arm_smmu_cmdq_batch *cmds)
879e86d1aa8SWill Deacon {
880e86d1aa8SWill Deacon 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
881e86d1aa8SWill Deacon }
882e86d1aa8SWill Deacon 
883e86d1aa8SWill Deacon /* Context descriptor manipulation functions */
884e86d1aa8SWill Deacon static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
885e86d1aa8SWill Deacon 			     int ssid, bool leaf)
886e86d1aa8SWill Deacon {
887e86d1aa8SWill Deacon 	size_t i;
888e86d1aa8SWill Deacon 	unsigned long flags;
889e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
890e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_batch cmds = {};
891e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
892e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd = {
893e86d1aa8SWill Deacon 		.opcode	= CMDQ_OP_CFGI_CD,
894e86d1aa8SWill Deacon 		.cfgi	= {
895e86d1aa8SWill Deacon 			.ssid	= ssid,
896e86d1aa8SWill Deacon 			.leaf	= leaf,
897e86d1aa8SWill Deacon 		},
898e86d1aa8SWill Deacon 	};
899e86d1aa8SWill Deacon 
900e86d1aa8SWill Deacon 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
901e86d1aa8SWill Deacon 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
902e86d1aa8SWill Deacon 		for (i = 0; i < master->num_sids; i++) {
903e86d1aa8SWill Deacon 			cmd.cfgi.sid = master->sids[i];
904e86d1aa8SWill Deacon 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
905e86d1aa8SWill Deacon 		}
906e86d1aa8SWill Deacon 	}
907e86d1aa8SWill Deacon 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
908e86d1aa8SWill Deacon 
909e86d1aa8SWill Deacon 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
910e86d1aa8SWill Deacon }
911e86d1aa8SWill Deacon 
912e86d1aa8SWill Deacon static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
913e86d1aa8SWill Deacon 					struct arm_smmu_l1_ctx_desc *l1_desc)
914e86d1aa8SWill Deacon {
915e86d1aa8SWill Deacon 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
916e86d1aa8SWill Deacon 
917e86d1aa8SWill Deacon 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
918e86d1aa8SWill Deacon 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
919e86d1aa8SWill Deacon 	if (!l1_desc->l2ptr) {
920e86d1aa8SWill Deacon 		dev_warn(smmu->dev,
921e86d1aa8SWill Deacon 			 "failed to allocate context descriptor table\n");
922e86d1aa8SWill Deacon 		return -ENOMEM;
923e86d1aa8SWill Deacon 	}
924e86d1aa8SWill Deacon 	return 0;
925e86d1aa8SWill Deacon }
926e86d1aa8SWill Deacon 
927e86d1aa8SWill Deacon static void arm_smmu_write_cd_l1_desc(__le64 *dst,
928e86d1aa8SWill Deacon 				      struct arm_smmu_l1_ctx_desc *l1_desc)
929e86d1aa8SWill Deacon {
930e86d1aa8SWill Deacon 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
931e86d1aa8SWill Deacon 		  CTXDESC_L1_DESC_V;
932e86d1aa8SWill Deacon 
933e86d1aa8SWill Deacon 	/* See comment in arm_smmu_write_ctx_desc() */
934e86d1aa8SWill Deacon 	WRITE_ONCE(*dst, cpu_to_le64(val));
935e86d1aa8SWill Deacon }
936e86d1aa8SWill Deacon 
937e86d1aa8SWill Deacon static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
938e86d1aa8SWill Deacon 				   u32 ssid)
939e86d1aa8SWill Deacon {
940e86d1aa8SWill Deacon 	__le64 *l1ptr;
941e86d1aa8SWill Deacon 	unsigned int idx;
942e86d1aa8SWill Deacon 	struct arm_smmu_l1_ctx_desc *l1_desc;
943e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
944e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
945e86d1aa8SWill Deacon 
946e86d1aa8SWill Deacon 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
947e86d1aa8SWill Deacon 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
948e86d1aa8SWill Deacon 
949e86d1aa8SWill Deacon 	idx = ssid >> CTXDESC_SPLIT;
950e86d1aa8SWill Deacon 	l1_desc = &cdcfg->l1_desc[idx];
951e86d1aa8SWill Deacon 	if (!l1_desc->l2ptr) {
952e86d1aa8SWill Deacon 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
953e86d1aa8SWill Deacon 			return NULL;
954e86d1aa8SWill Deacon 
955e86d1aa8SWill Deacon 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
956e86d1aa8SWill Deacon 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
957e86d1aa8SWill Deacon 		/* An invalid L1CD can be cached */
958e86d1aa8SWill Deacon 		arm_smmu_sync_cd(smmu_domain, ssid, false);
959e86d1aa8SWill Deacon 	}
960e86d1aa8SWill Deacon 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
961e86d1aa8SWill Deacon 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
962e86d1aa8SWill Deacon }
963e86d1aa8SWill Deacon 
964e86d1aa8SWill Deacon static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
965e86d1aa8SWill Deacon 				   int ssid, struct arm_smmu_ctx_desc *cd)
966e86d1aa8SWill Deacon {
967e86d1aa8SWill Deacon 	/*
968e86d1aa8SWill Deacon 	 * This function handles the following cases:
969e86d1aa8SWill Deacon 	 *
970e86d1aa8SWill Deacon 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
971e86d1aa8SWill Deacon 	 * (2) Install a secondary CD, for SID+SSID traffic.
972e86d1aa8SWill Deacon 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
973e86d1aa8SWill Deacon 	 *     CD, then invalidate the old entry and mappings.
974e86d1aa8SWill Deacon 	 * (4) Remove a secondary CD.
975e86d1aa8SWill Deacon 	 */
976e86d1aa8SWill Deacon 	u64 val;
977e86d1aa8SWill Deacon 	bool cd_live;
978e86d1aa8SWill Deacon 	__le64 *cdptr;
979e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
980e86d1aa8SWill Deacon 
981e86d1aa8SWill Deacon 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
982e86d1aa8SWill Deacon 		return -E2BIG;
983e86d1aa8SWill Deacon 
984e86d1aa8SWill Deacon 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
985e86d1aa8SWill Deacon 	if (!cdptr)
986e86d1aa8SWill Deacon 		return -ENOMEM;
987e86d1aa8SWill Deacon 
988e86d1aa8SWill Deacon 	val = le64_to_cpu(cdptr[0]);
989e86d1aa8SWill Deacon 	cd_live = !!(val & CTXDESC_CD_0_V);
990e86d1aa8SWill Deacon 
991e86d1aa8SWill Deacon 	if (!cd) { /* (4) */
992e86d1aa8SWill Deacon 		val = 0;
993e86d1aa8SWill Deacon 	} else if (cd_live) { /* (3) */
994e86d1aa8SWill Deacon 		val &= ~CTXDESC_CD_0_ASID;
995e86d1aa8SWill Deacon 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
996e86d1aa8SWill Deacon 		/*
997e86d1aa8SWill Deacon 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
998e86d1aa8SWill Deacon 		 * this substream's traffic
999e86d1aa8SWill Deacon 		 */
1000e86d1aa8SWill Deacon 	} else { /* (1) and (2) */
1001e86d1aa8SWill Deacon 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1002e86d1aa8SWill Deacon 		cdptr[2] = 0;
1003e86d1aa8SWill Deacon 		cdptr[3] = cpu_to_le64(cd->mair);
1004e86d1aa8SWill Deacon 
1005e86d1aa8SWill Deacon 		/*
1006e86d1aa8SWill Deacon 		 * STE is live, and the SMMU might read dwords of this CD in any
1007e86d1aa8SWill Deacon 		 * order. Ensure that it observes valid values before reading
1008e86d1aa8SWill Deacon 		 * V=1.
1009e86d1aa8SWill Deacon 		 */
1010e86d1aa8SWill Deacon 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1011e86d1aa8SWill Deacon 
1012e86d1aa8SWill Deacon 		val = cd->tcr |
1013e86d1aa8SWill Deacon #ifdef __BIG_ENDIAN
1014e86d1aa8SWill Deacon 			CTXDESC_CD_0_ENDI |
1015e86d1aa8SWill Deacon #endif
1016e86d1aa8SWill Deacon 			CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1017e86d1aa8SWill Deacon 			CTXDESC_CD_0_AA64 |
1018e86d1aa8SWill Deacon 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1019e86d1aa8SWill Deacon 			CTXDESC_CD_0_V;
1020e86d1aa8SWill Deacon 
1021e86d1aa8SWill Deacon 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1022e86d1aa8SWill Deacon 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1023e86d1aa8SWill Deacon 			val |= CTXDESC_CD_0_S;
1024e86d1aa8SWill Deacon 	}
1025e86d1aa8SWill Deacon 
1026e86d1aa8SWill Deacon 	/*
1027e86d1aa8SWill Deacon 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1028e86d1aa8SWill Deacon 	 * "Configuration structures and configuration invalidation completion"
1029e86d1aa8SWill Deacon 	 *
1030e86d1aa8SWill Deacon 	 *   The size of single-copy atomic reads made by the SMMU is
1031e86d1aa8SWill Deacon 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1032e86d1aa8SWill Deacon 	 *   field within an aligned 64-bit span of a structure can be altered
1033e86d1aa8SWill Deacon 	 *   without first making the structure invalid.
1034e86d1aa8SWill Deacon 	 */
1035e86d1aa8SWill Deacon 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1036e86d1aa8SWill Deacon 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1037e86d1aa8SWill Deacon 	return 0;
1038e86d1aa8SWill Deacon }
1039e86d1aa8SWill Deacon 
1040e86d1aa8SWill Deacon static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1041e86d1aa8SWill Deacon {
1042e86d1aa8SWill Deacon 	int ret;
1043e86d1aa8SWill Deacon 	size_t l1size;
1044e86d1aa8SWill Deacon 	size_t max_contexts;
1045e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1046e86d1aa8SWill Deacon 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1047e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1048e86d1aa8SWill Deacon 
1049e86d1aa8SWill Deacon 	max_contexts = 1 << cfg->s1cdmax;
1050e86d1aa8SWill Deacon 
1051e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1052e86d1aa8SWill Deacon 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1053e86d1aa8SWill Deacon 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1054e86d1aa8SWill Deacon 		cdcfg->num_l1_ents = max_contexts;
1055e86d1aa8SWill Deacon 
1056e86d1aa8SWill Deacon 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1057e86d1aa8SWill Deacon 	} else {
1058e86d1aa8SWill Deacon 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1059e86d1aa8SWill Deacon 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1060e86d1aa8SWill Deacon 						  CTXDESC_L2_ENTRIES);
1061e86d1aa8SWill Deacon 
1062e86d1aa8SWill Deacon 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1063e86d1aa8SWill Deacon 					      sizeof(*cdcfg->l1_desc),
1064e86d1aa8SWill Deacon 					      GFP_KERNEL);
1065e86d1aa8SWill Deacon 		if (!cdcfg->l1_desc)
1066e86d1aa8SWill Deacon 			return -ENOMEM;
1067e86d1aa8SWill Deacon 
1068e86d1aa8SWill Deacon 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1069e86d1aa8SWill Deacon 	}
1070e86d1aa8SWill Deacon 
1071e86d1aa8SWill Deacon 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1072e86d1aa8SWill Deacon 					   GFP_KERNEL);
1073e86d1aa8SWill Deacon 	if (!cdcfg->cdtab) {
1074e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1075e86d1aa8SWill Deacon 		ret = -ENOMEM;
1076e86d1aa8SWill Deacon 		goto err_free_l1;
1077e86d1aa8SWill Deacon 	}
1078e86d1aa8SWill Deacon 
1079e86d1aa8SWill Deacon 	return 0;
1080e86d1aa8SWill Deacon 
1081e86d1aa8SWill Deacon err_free_l1:
1082e86d1aa8SWill Deacon 	if (cdcfg->l1_desc) {
1083e86d1aa8SWill Deacon 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1084e86d1aa8SWill Deacon 		cdcfg->l1_desc = NULL;
1085e86d1aa8SWill Deacon 	}
1086e86d1aa8SWill Deacon 	return ret;
1087e86d1aa8SWill Deacon }
1088e86d1aa8SWill Deacon 
1089e86d1aa8SWill Deacon static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1090e86d1aa8SWill Deacon {
1091e86d1aa8SWill Deacon 	int i;
1092e86d1aa8SWill Deacon 	size_t size, l1size;
1093e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1094e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1095e86d1aa8SWill Deacon 
1096e86d1aa8SWill Deacon 	if (cdcfg->l1_desc) {
1097e86d1aa8SWill Deacon 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1098e86d1aa8SWill Deacon 
1099e86d1aa8SWill Deacon 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1100e86d1aa8SWill Deacon 			if (!cdcfg->l1_desc[i].l2ptr)
1101e86d1aa8SWill Deacon 				continue;
1102e86d1aa8SWill Deacon 
1103e86d1aa8SWill Deacon 			dmam_free_coherent(smmu->dev, size,
1104e86d1aa8SWill Deacon 					   cdcfg->l1_desc[i].l2ptr,
1105e86d1aa8SWill Deacon 					   cdcfg->l1_desc[i].l2ptr_dma);
1106e86d1aa8SWill Deacon 		}
1107e86d1aa8SWill Deacon 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1108e86d1aa8SWill Deacon 		cdcfg->l1_desc = NULL;
1109e86d1aa8SWill Deacon 
1110e86d1aa8SWill Deacon 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1111e86d1aa8SWill Deacon 	} else {
1112e86d1aa8SWill Deacon 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1113e86d1aa8SWill Deacon 	}
1114e86d1aa8SWill Deacon 
1115e86d1aa8SWill Deacon 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1116e86d1aa8SWill Deacon 	cdcfg->cdtab_dma = 0;
1117e86d1aa8SWill Deacon 	cdcfg->cdtab = NULL;
1118e86d1aa8SWill Deacon }
1119e86d1aa8SWill Deacon 
1120e86d1aa8SWill Deacon static void arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1121e86d1aa8SWill Deacon {
1122e86d1aa8SWill Deacon 	if (!cd->asid)
1123e86d1aa8SWill Deacon 		return;
1124e86d1aa8SWill Deacon 
1125e86d1aa8SWill Deacon 	xa_erase(&asid_xa, cd->asid);
1126e86d1aa8SWill Deacon }
1127e86d1aa8SWill Deacon 
1128e86d1aa8SWill Deacon /* Stream table manipulation functions */
1129e86d1aa8SWill Deacon static void
1130e86d1aa8SWill Deacon arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1131e86d1aa8SWill Deacon {
1132e86d1aa8SWill Deacon 	u64 val = 0;
1133e86d1aa8SWill Deacon 
1134e86d1aa8SWill Deacon 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1135e86d1aa8SWill Deacon 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1136e86d1aa8SWill Deacon 
1137e86d1aa8SWill Deacon 	/* See comment in arm_smmu_write_ctx_desc() */
1138e86d1aa8SWill Deacon 	WRITE_ONCE(*dst, cpu_to_le64(val));
1139e86d1aa8SWill Deacon }
1140e86d1aa8SWill Deacon 
1141e86d1aa8SWill Deacon static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1142e86d1aa8SWill Deacon {
1143e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd = {
1144e86d1aa8SWill Deacon 		.opcode	= CMDQ_OP_CFGI_STE,
1145e86d1aa8SWill Deacon 		.cfgi	= {
1146e86d1aa8SWill Deacon 			.sid	= sid,
1147e86d1aa8SWill Deacon 			.leaf	= true,
1148e86d1aa8SWill Deacon 		},
1149e86d1aa8SWill Deacon 	};
1150e86d1aa8SWill Deacon 
1151e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1152e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_sync(smmu);
1153e86d1aa8SWill Deacon }
1154e86d1aa8SWill Deacon 
1155e86d1aa8SWill Deacon static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1156e86d1aa8SWill Deacon 				      __le64 *dst)
1157e86d1aa8SWill Deacon {
1158e86d1aa8SWill Deacon 	/*
1159e86d1aa8SWill Deacon 	 * This is hideously complicated, but we only really care about
1160e86d1aa8SWill Deacon 	 * three cases at the moment:
1161e86d1aa8SWill Deacon 	 *
1162e86d1aa8SWill Deacon 	 * 1. Invalid (all zero) -> bypass/fault (init)
1163e86d1aa8SWill Deacon 	 * 2. Bypass/fault -> translation/bypass (attach)
1164e86d1aa8SWill Deacon 	 * 3. Translation/bypass -> bypass/fault (detach)
1165e86d1aa8SWill Deacon 	 *
1166e86d1aa8SWill Deacon 	 * Given that we can't update the STE atomically and the SMMU
1167e86d1aa8SWill Deacon 	 * doesn't read the thing in a defined order, that leaves us
1168e86d1aa8SWill Deacon 	 * with the following maintenance requirements:
1169e86d1aa8SWill Deacon 	 *
1170e86d1aa8SWill Deacon 	 * 1. Update Config, return (init time STEs aren't live)
1171e86d1aa8SWill Deacon 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1172e86d1aa8SWill Deacon 	 * 3. Update Config, sync
1173e86d1aa8SWill Deacon 	 */
1174e86d1aa8SWill Deacon 	u64 val = le64_to_cpu(dst[0]);
1175e86d1aa8SWill Deacon 	bool ste_live = false;
1176e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = NULL;
1177e86d1aa8SWill Deacon 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1178e86d1aa8SWill Deacon 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1179e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = NULL;
1180e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1181e86d1aa8SWill Deacon 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1182e86d1aa8SWill Deacon 		.prefetch	= {
1183e86d1aa8SWill Deacon 			.sid	= sid,
1184e86d1aa8SWill Deacon 		},
1185e86d1aa8SWill Deacon 	};
1186e86d1aa8SWill Deacon 
1187e86d1aa8SWill Deacon 	if (master) {
1188e86d1aa8SWill Deacon 		smmu_domain = master->domain;
1189e86d1aa8SWill Deacon 		smmu = master->smmu;
1190e86d1aa8SWill Deacon 	}
1191e86d1aa8SWill Deacon 
1192e86d1aa8SWill Deacon 	if (smmu_domain) {
1193e86d1aa8SWill Deacon 		switch (smmu_domain->stage) {
1194e86d1aa8SWill Deacon 		case ARM_SMMU_DOMAIN_S1:
1195e86d1aa8SWill Deacon 			s1_cfg = &smmu_domain->s1_cfg;
1196e86d1aa8SWill Deacon 			break;
1197e86d1aa8SWill Deacon 		case ARM_SMMU_DOMAIN_S2:
1198e86d1aa8SWill Deacon 		case ARM_SMMU_DOMAIN_NESTED:
1199e86d1aa8SWill Deacon 			s2_cfg = &smmu_domain->s2_cfg;
1200e86d1aa8SWill Deacon 			break;
1201e86d1aa8SWill Deacon 		default:
1202e86d1aa8SWill Deacon 			break;
1203e86d1aa8SWill Deacon 		}
1204e86d1aa8SWill Deacon 	}
1205e86d1aa8SWill Deacon 
1206e86d1aa8SWill Deacon 	if (val & STRTAB_STE_0_V) {
1207e86d1aa8SWill Deacon 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1208e86d1aa8SWill Deacon 		case STRTAB_STE_0_CFG_BYPASS:
1209e86d1aa8SWill Deacon 			break;
1210e86d1aa8SWill Deacon 		case STRTAB_STE_0_CFG_S1_TRANS:
1211e86d1aa8SWill Deacon 		case STRTAB_STE_0_CFG_S2_TRANS:
1212e86d1aa8SWill Deacon 			ste_live = true;
1213e86d1aa8SWill Deacon 			break;
1214e86d1aa8SWill Deacon 		case STRTAB_STE_0_CFG_ABORT:
1215e86d1aa8SWill Deacon 			BUG_ON(!disable_bypass);
1216e86d1aa8SWill Deacon 			break;
1217e86d1aa8SWill Deacon 		default:
1218e86d1aa8SWill Deacon 			BUG(); /* STE corruption */
1219e86d1aa8SWill Deacon 		}
1220e86d1aa8SWill Deacon 	}
1221e86d1aa8SWill Deacon 
1222e86d1aa8SWill Deacon 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1223e86d1aa8SWill Deacon 	val = STRTAB_STE_0_V;
1224e86d1aa8SWill Deacon 
1225e86d1aa8SWill Deacon 	/* Bypass/fault */
1226e86d1aa8SWill Deacon 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1227e86d1aa8SWill Deacon 		if (!smmu_domain && disable_bypass)
1228e86d1aa8SWill Deacon 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1229e86d1aa8SWill Deacon 		else
1230e86d1aa8SWill Deacon 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1231e86d1aa8SWill Deacon 
1232e86d1aa8SWill Deacon 		dst[0] = cpu_to_le64(val);
1233e86d1aa8SWill Deacon 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1234e86d1aa8SWill Deacon 						STRTAB_STE_1_SHCFG_INCOMING));
1235e86d1aa8SWill Deacon 		dst[2] = 0; /* Nuke the VMID */
1236e86d1aa8SWill Deacon 		/*
1237e86d1aa8SWill Deacon 		 * The SMMU can perform negative caching, so we must sync
1238e86d1aa8SWill Deacon 		 * the STE regardless of whether the old value was live.
1239e86d1aa8SWill Deacon 		 */
1240e86d1aa8SWill Deacon 		if (smmu)
1241e86d1aa8SWill Deacon 			arm_smmu_sync_ste_for_sid(smmu, sid);
1242e86d1aa8SWill Deacon 		return;
1243e86d1aa8SWill Deacon 	}
1244e86d1aa8SWill Deacon 
1245e86d1aa8SWill Deacon 	if (s1_cfg) {
1246e86d1aa8SWill Deacon 		BUG_ON(ste_live);
1247e86d1aa8SWill Deacon 		dst[1] = cpu_to_le64(
1248e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1249e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1250e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1251e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1252e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1253e86d1aa8SWill Deacon 
1254e86d1aa8SWill Deacon 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1255e86d1aa8SWill Deacon 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1256e86d1aa8SWill Deacon 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1257e86d1aa8SWill Deacon 
1258e86d1aa8SWill Deacon 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1259e86d1aa8SWill Deacon 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1260e86d1aa8SWill Deacon 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1261e86d1aa8SWill Deacon 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1262e86d1aa8SWill Deacon 	}
1263e86d1aa8SWill Deacon 
1264e86d1aa8SWill Deacon 	if (s2_cfg) {
1265e86d1aa8SWill Deacon 		BUG_ON(ste_live);
1266e86d1aa8SWill Deacon 		dst[2] = cpu_to_le64(
1267e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1268e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1269e86d1aa8SWill Deacon #ifdef __BIG_ENDIAN
1270e86d1aa8SWill Deacon 			 STRTAB_STE_2_S2ENDI |
1271e86d1aa8SWill Deacon #endif
1272e86d1aa8SWill Deacon 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1273e86d1aa8SWill Deacon 			 STRTAB_STE_2_S2R);
1274e86d1aa8SWill Deacon 
1275e86d1aa8SWill Deacon 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1276e86d1aa8SWill Deacon 
1277e86d1aa8SWill Deacon 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1278e86d1aa8SWill Deacon 	}
1279e86d1aa8SWill Deacon 
1280e86d1aa8SWill Deacon 	if (master->ats_enabled)
1281e86d1aa8SWill Deacon 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1282e86d1aa8SWill Deacon 						 STRTAB_STE_1_EATS_TRANS));
1283e86d1aa8SWill Deacon 
1284e86d1aa8SWill Deacon 	arm_smmu_sync_ste_for_sid(smmu, sid);
1285e86d1aa8SWill Deacon 	/* See comment in arm_smmu_write_ctx_desc() */
1286e86d1aa8SWill Deacon 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1287e86d1aa8SWill Deacon 	arm_smmu_sync_ste_for_sid(smmu, sid);
1288e86d1aa8SWill Deacon 
1289e86d1aa8SWill Deacon 	/* It's likely that we'll want to use the new STE soon */
1290e86d1aa8SWill Deacon 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1291e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1292e86d1aa8SWill Deacon }
1293e86d1aa8SWill Deacon 
1294376cdf66SJean-Philippe Brucker static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1295e86d1aa8SWill Deacon {
1296e86d1aa8SWill Deacon 	unsigned int i;
1297e86d1aa8SWill Deacon 
1298e86d1aa8SWill Deacon 	for (i = 0; i < nent; ++i) {
1299e86d1aa8SWill Deacon 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1300e86d1aa8SWill Deacon 		strtab += STRTAB_STE_DWORDS;
1301e86d1aa8SWill Deacon 	}
1302e86d1aa8SWill Deacon }
1303e86d1aa8SWill Deacon 
1304e86d1aa8SWill Deacon static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1305e86d1aa8SWill Deacon {
1306e86d1aa8SWill Deacon 	size_t size;
1307e86d1aa8SWill Deacon 	void *strtab;
1308e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1309e86d1aa8SWill Deacon 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1310e86d1aa8SWill Deacon 
1311e86d1aa8SWill Deacon 	if (desc->l2ptr)
1312e86d1aa8SWill Deacon 		return 0;
1313e86d1aa8SWill Deacon 
1314e86d1aa8SWill Deacon 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1315e86d1aa8SWill Deacon 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1316e86d1aa8SWill Deacon 
1317e86d1aa8SWill Deacon 	desc->span = STRTAB_SPLIT + 1;
1318e86d1aa8SWill Deacon 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1319e86d1aa8SWill Deacon 					  GFP_KERNEL);
1320e86d1aa8SWill Deacon 	if (!desc->l2ptr) {
1321e86d1aa8SWill Deacon 		dev_err(smmu->dev,
1322e86d1aa8SWill Deacon 			"failed to allocate l2 stream table for SID %u\n",
1323e86d1aa8SWill Deacon 			sid);
1324e86d1aa8SWill Deacon 		return -ENOMEM;
1325e86d1aa8SWill Deacon 	}
1326e86d1aa8SWill Deacon 
1327e86d1aa8SWill Deacon 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1328e86d1aa8SWill Deacon 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1329e86d1aa8SWill Deacon 	return 0;
1330e86d1aa8SWill Deacon }
1331e86d1aa8SWill Deacon 
1332e86d1aa8SWill Deacon /* IRQ and event handlers */
1333e86d1aa8SWill Deacon static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1334e86d1aa8SWill Deacon {
1335e86d1aa8SWill Deacon 	int i;
1336e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev;
1337e86d1aa8SWill Deacon 	struct arm_smmu_queue *q = &smmu->evtq.q;
1338e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue *llq = &q->llq;
1339e86d1aa8SWill Deacon 	u64 evt[EVTQ_ENT_DWORDS];
1340e86d1aa8SWill Deacon 
1341e86d1aa8SWill Deacon 	do {
1342e86d1aa8SWill Deacon 		while (!queue_remove_raw(q, evt)) {
1343e86d1aa8SWill Deacon 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1344e86d1aa8SWill Deacon 
1345e86d1aa8SWill Deacon 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1346e86d1aa8SWill Deacon 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1347e86d1aa8SWill Deacon 				dev_info(smmu->dev, "\t0x%016llx\n",
1348e86d1aa8SWill Deacon 					 (unsigned long long)evt[i]);
1349e86d1aa8SWill Deacon 
1350e86d1aa8SWill Deacon 		}
1351e86d1aa8SWill Deacon 
1352e86d1aa8SWill Deacon 		/*
1353e86d1aa8SWill Deacon 		 * Not much we can do on overflow, so scream and pretend we're
1354e86d1aa8SWill Deacon 		 * trying harder.
1355e86d1aa8SWill Deacon 		 */
1356e86d1aa8SWill Deacon 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1357e86d1aa8SWill Deacon 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1358e86d1aa8SWill Deacon 	} while (!queue_empty(llq));
1359e86d1aa8SWill Deacon 
1360e86d1aa8SWill Deacon 	/* Sync our overflow flag, as we believe we're up to speed */
1361e86d1aa8SWill Deacon 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1362e86d1aa8SWill Deacon 		    Q_IDX(llq, llq->cons);
1363e86d1aa8SWill Deacon 	return IRQ_HANDLED;
1364e86d1aa8SWill Deacon }
1365e86d1aa8SWill Deacon 
1366e86d1aa8SWill Deacon static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1367e86d1aa8SWill Deacon {
1368e86d1aa8SWill Deacon 	u32 sid, ssid;
1369e86d1aa8SWill Deacon 	u16 grpid;
1370e86d1aa8SWill Deacon 	bool ssv, last;
1371e86d1aa8SWill Deacon 
1372e86d1aa8SWill Deacon 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1373e86d1aa8SWill Deacon 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1374e86d1aa8SWill Deacon 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1375e86d1aa8SWill Deacon 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1376e86d1aa8SWill Deacon 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1377e86d1aa8SWill Deacon 
1378e86d1aa8SWill Deacon 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1379e86d1aa8SWill Deacon 	dev_info(smmu->dev,
1380e86d1aa8SWill Deacon 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1381e86d1aa8SWill Deacon 		 sid, ssid, grpid, last ? "L" : "",
1382e86d1aa8SWill Deacon 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1383e86d1aa8SWill Deacon 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1384e86d1aa8SWill Deacon 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1385e86d1aa8SWill Deacon 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1386e86d1aa8SWill Deacon 		 evt[1] & PRIQ_1_ADDR_MASK);
1387e86d1aa8SWill Deacon 
1388e86d1aa8SWill Deacon 	if (last) {
1389e86d1aa8SWill Deacon 		struct arm_smmu_cmdq_ent cmd = {
1390e86d1aa8SWill Deacon 			.opcode			= CMDQ_OP_PRI_RESP,
1391e86d1aa8SWill Deacon 			.substream_valid	= ssv,
1392e86d1aa8SWill Deacon 			.pri			= {
1393e86d1aa8SWill Deacon 				.sid	= sid,
1394e86d1aa8SWill Deacon 				.ssid	= ssid,
1395e86d1aa8SWill Deacon 				.grpid	= grpid,
1396e86d1aa8SWill Deacon 				.resp	= PRI_RESP_DENY,
1397e86d1aa8SWill Deacon 			},
1398e86d1aa8SWill Deacon 		};
1399e86d1aa8SWill Deacon 
1400e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1401e86d1aa8SWill Deacon 	}
1402e86d1aa8SWill Deacon }
1403e86d1aa8SWill Deacon 
1404e86d1aa8SWill Deacon static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1405e86d1aa8SWill Deacon {
1406e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev;
1407e86d1aa8SWill Deacon 	struct arm_smmu_queue *q = &smmu->priq.q;
1408e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue *llq = &q->llq;
1409e86d1aa8SWill Deacon 	u64 evt[PRIQ_ENT_DWORDS];
1410e86d1aa8SWill Deacon 
1411e86d1aa8SWill Deacon 	do {
1412e86d1aa8SWill Deacon 		while (!queue_remove_raw(q, evt))
1413e86d1aa8SWill Deacon 			arm_smmu_handle_ppr(smmu, evt);
1414e86d1aa8SWill Deacon 
1415e86d1aa8SWill Deacon 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1416e86d1aa8SWill Deacon 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1417e86d1aa8SWill Deacon 	} while (!queue_empty(llq));
1418e86d1aa8SWill Deacon 
1419e86d1aa8SWill Deacon 	/* Sync our overflow flag, as we believe we're up to speed */
1420e86d1aa8SWill Deacon 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1421e86d1aa8SWill Deacon 		      Q_IDX(llq, llq->cons);
1422e86d1aa8SWill Deacon 	queue_sync_cons_out(q);
1423e86d1aa8SWill Deacon 	return IRQ_HANDLED;
1424e86d1aa8SWill Deacon }
1425e86d1aa8SWill Deacon 
1426e86d1aa8SWill Deacon static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1427e86d1aa8SWill Deacon 
1428e86d1aa8SWill Deacon static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1429e86d1aa8SWill Deacon {
1430e86d1aa8SWill Deacon 	u32 gerror, gerrorn, active;
1431e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev;
1432e86d1aa8SWill Deacon 
1433e86d1aa8SWill Deacon 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1434e86d1aa8SWill Deacon 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1435e86d1aa8SWill Deacon 
1436e86d1aa8SWill Deacon 	active = gerror ^ gerrorn;
1437e86d1aa8SWill Deacon 	if (!(active & GERROR_ERR_MASK))
1438e86d1aa8SWill Deacon 		return IRQ_NONE; /* No errors pending */
1439e86d1aa8SWill Deacon 
1440e86d1aa8SWill Deacon 	dev_warn(smmu->dev,
1441e86d1aa8SWill Deacon 		 "unexpected global error reported (0x%08x), this could be serious\n",
1442e86d1aa8SWill Deacon 		 active);
1443e86d1aa8SWill Deacon 
1444e86d1aa8SWill Deacon 	if (active & GERROR_SFM_ERR) {
1445e86d1aa8SWill Deacon 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1446e86d1aa8SWill Deacon 		arm_smmu_device_disable(smmu);
1447e86d1aa8SWill Deacon 	}
1448e86d1aa8SWill Deacon 
1449e86d1aa8SWill Deacon 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1450e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1451e86d1aa8SWill Deacon 
1452e86d1aa8SWill Deacon 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1453e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1454e86d1aa8SWill Deacon 
1455e86d1aa8SWill Deacon 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1456e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1457e86d1aa8SWill Deacon 
1458e86d1aa8SWill Deacon 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1459e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1460e86d1aa8SWill Deacon 
1461e86d1aa8SWill Deacon 	if (active & GERROR_PRIQ_ABT_ERR)
1462e86d1aa8SWill Deacon 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1463e86d1aa8SWill Deacon 
1464e86d1aa8SWill Deacon 	if (active & GERROR_EVTQ_ABT_ERR)
1465e86d1aa8SWill Deacon 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1466e86d1aa8SWill Deacon 
1467e86d1aa8SWill Deacon 	if (active & GERROR_CMDQ_ERR)
1468e86d1aa8SWill Deacon 		arm_smmu_cmdq_skip_err(smmu);
1469e86d1aa8SWill Deacon 
1470e86d1aa8SWill Deacon 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1471e86d1aa8SWill Deacon 	return IRQ_HANDLED;
1472e86d1aa8SWill Deacon }
1473e86d1aa8SWill Deacon 
1474e86d1aa8SWill Deacon static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1475e86d1aa8SWill Deacon {
1476e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev;
1477e86d1aa8SWill Deacon 
1478e86d1aa8SWill Deacon 	arm_smmu_evtq_thread(irq, dev);
1479e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1480e86d1aa8SWill Deacon 		arm_smmu_priq_thread(irq, dev);
1481e86d1aa8SWill Deacon 
1482e86d1aa8SWill Deacon 	return IRQ_HANDLED;
1483e86d1aa8SWill Deacon }
1484e86d1aa8SWill Deacon 
1485e86d1aa8SWill Deacon static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1486e86d1aa8SWill Deacon {
1487e86d1aa8SWill Deacon 	arm_smmu_gerror_handler(irq, dev);
1488e86d1aa8SWill Deacon 	return IRQ_WAKE_THREAD;
1489e86d1aa8SWill Deacon }
1490e86d1aa8SWill Deacon 
1491e86d1aa8SWill Deacon static void
1492e86d1aa8SWill Deacon arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1493e86d1aa8SWill Deacon 			struct arm_smmu_cmdq_ent *cmd)
1494e86d1aa8SWill Deacon {
1495e86d1aa8SWill Deacon 	size_t log2_span;
1496e86d1aa8SWill Deacon 	size_t span_mask;
1497e86d1aa8SWill Deacon 	/* ATC invalidates are always on 4096-bytes pages */
1498e86d1aa8SWill Deacon 	size_t inval_grain_shift = 12;
1499e86d1aa8SWill Deacon 	unsigned long page_start, page_end;
1500e86d1aa8SWill Deacon 
1501e86d1aa8SWill Deacon 	*cmd = (struct arm_smmu_cmdq_ent) {
1502e86d1aa8SWill Deacon 		.opcode			= CMDQ_OP_ATC_INV,
1503e86d1aa8SWill Deacon 		.substream_valid	= !!ssid,
1504e86d1aa8SWill Deacon 		.atc.ssid		= ssid,
1505e86d1aa8SWill Deacon 	};
1506e86d1aa8SWill Deacon 
1507e86d1aa8SWill Deacon 	if (!size) {
1508e86d1aa8SWill Deacon 		cmd->atc.size = ATC_INV_SIZE_ALL;
1509e86d1aa8SWill Deacon 		return;
1510e86d1aa8SWill Deacon 	}
1511e86d1aa8SWill Deacon 
1512e86d1aa8SWill Deacon 	page_start	= iova >> inval_grain_shift;
1513e86d1aa8SWill Deacon 	page_end	= (iova + size - 1) >> inval_grain_shift;
1514e86d1aa8SWill Deacon 
1515e86d1aa8SWill Deacon 	/*
1516e86d1aa8SWill Deacon 	 * In an ATS Invalidate Request, the address must be aligned on the
1517e86d1aa8SWill Deacon 	 * range size, which must be a power of two number of page sizes. We
1518e86d1aa8SWill Deacon 	 * thus have to choose between grossly over-invalidating the region, or
1519e86d1aa8SWill Deacon 	 * splitting the invalidation into multiple commands. For simplicity
1520e86d1aa8SWill Deacon 	 * we'll go with the first solution, but should refine it in the future
1521e86d1aa8SWill Deacon 	 * if multiple commands are shown to be more efficient.
1522e86d1aa8SWill Deacon 	 *
1523e86d1aa8SWill Deacon 	 * Find the smallest power of two that covers the range. The most
1524e86d1aa8SWill Deacon 	 * significant differing bit between the start and end addresses,
1525e86d1aa8SWill Deacon 	 * fls(start ^ end), indicates the required span. For example:
1526e86d1aa8SWill Deacon 	 *
1527e86d1aa8SWill Deacon 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1528e86d1aa8SWill Deacon 	 *		x = 0b1000 ^ 0b1011 = 0b11
1529e86d1aa8SWill Deacon 	 *		span = 1 << fls(x) = 4
1530e86d1aa8SWill Deacon 	 *
1531e86d1aa8SWill Deacon 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1532e86d1aa8SWill Deacon 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1533e86d1aa8SWill Deacon 	 *		span = 1 << fls(x) = 16
1534e86d1aa8SWill Deacon 	 */
1535e86d1aa8SWill Deacon 	log2_span	= fls_long(page_start ^ page_end);
1536e86d1aa8SWill Deacon 	span_mask	= (1ULL << log2_span) - 1;
1537e86d1aa8SWill Deacon 
1538e86d1aa8SWill Deacon 	page_start	&= ~span_mask;
1539e86d1aa8SWill Deacon 
1540e86d1aa8SWill Deacon 	cmd->atc.addr	= page_start << inval_grain_shift;
1541e86d1aa8SWill Deacon 	cmd->atc.size	= log2_span;
1542e86d1aa8SWill Deacon }
1543e86d1aa8SWill Deacon 
1544e86d1aa8SWill Deacon static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1545e86d1aa8SWill Deacon {
1546e86d1aa8SWill Deacon 	int i;
1547e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd;
1548e86d1aa8SWill Deacon 
1549e86d1aa8SWill Deacon 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1550e86d1aa8SWill Deacon 
1551e86d1aa8SWill Deacon 	for (i = 0; i < master->num_sids; i++) {
1552e86d1aa8SWill Deacon 		cmd.atc.sid = master->sids[i];
1553e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1554e86d1aa8SWill Deacon 	}
1555e86d1aa8SWill Deacon 
1556e86d1aa8SWill Deacon 	return arm_smmu_cmdq_issue_sync(master->smmu);
1557e86d1aa8SWill Deacon }
1558e86d1aa8SWill Deacon 
1559e86d1aa8SWill Deacon static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
1560e86d1aa8SWill Deacon 				   int ssid, unsigned long iova, size_t size)
1561e86d1aa8SWill Deacon {
1562e86d1aa8SWill Deacon 	int i;
1563e86d1aa8SWill Deacon 	unsigned long flags;
1564e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd;
1565e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
1566e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_batch cmds = {};
1567e86d1aa8SWill Deacon 
1568e86d1aa8SWill Deacon 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1569e86d1aa8SWill Deacon 		return 0;
1570e86d1aa8SWill Deacon 
1571e86d1aa8SWill Deacon 	/*
1572e86d1aa8SWill Deacon 	 * Ensure that we've completed prior invalidation of the main TLBs
1573e86d1aa8SWill Deacon 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1574e86d1aa8SWill Deacon 	 * arm_smmu_enable_ats():
1575e86d1aa8SWill Deacon 	 *
1576e86d1aa8SWill Deacon 	 *	// unmap()			// arm_smmu_enable_ats()
1577e86d1aa8SWill Deacon 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1578e86d1aa8SWill Deacon 	 *	smp_mb();			[...]
1579e86d1aa8SWill Deacon 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1580e86d1aa8SWill Deacon 	 *
1581e86d1aa8SWill Deacon 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1582e86d1aa8SWill Deacon 	 * ATS was enabled at the PCI device before completion of the TLBI.
1583e86d1aa8SWill Deacon 	 */
1584e86d1aa8SWill Deacon 	smp_mb();
1585e86d1aa8SWill Deacon 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1586e86d1aa8SWill Deacon 		return 0;
1587e86d1aa8SWill Deacon 
1588e86d1aa8SWill Deacon 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1589e86d1aa8SWill Deacon 
1590e86d1aa8SWill Deacon 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1591e86d1aa8SWill Deacon 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1592e86d1aa8SWill Deacon 		if (!master->ats_enabled)
1593e86d1aa8SWill Deacon 			continue;
1594e86d1aa8SWill Deacon 
1595e86d1aa8SWill Deacon 		for (i = 0; i < master->num_sids; i++) {
1596e86d1aa8SWill Deacon 			cmd.atc.sid = master->sids[i];
1597e86d1aa8SWill Deacon 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1598e86d1aa8SWill Deacon 		}
1599e86d1aa8SWill Deacon 	}
1600e86d1aa8SWill Deacon 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1601e86d1aa8SWill Deacon 
1602e86d1aa8SWill Deacon 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1603e86d1aa8SWill Deacon }
1604e86d1aa8SWill Deacon 
1605e86d1aa8SWill Deacon /* IO_PGTABLE API */
1606e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_context(void *cookie)
1607e86d1aa8SWill Deacon {
1608e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = cookie;
1609e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1610e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd;
1611e86d1aa8SWill Deacon 
1612e86d1aa8SWill Deacon 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1613e86d1aa8SWill Deacon 		cmd.opcode	= CMDQ_OP_TLBI_NH_ASID;
1614e86d1aa8SWill Deacon 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1615e86d1aa8SWill Deacon 		cmd.tlbi.vmid	= 0;
1616e86d1aa8SWill Deacon 	} else {
1617e86d1aa8SWill Deacon 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1618e86d1aa8SWill Deacon 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1619e86d1aa8SWill Deacon 	}
1620e86d1aa8SWill Deacon 
1621e86d1aa8SWill Deacon 	/*
1622e86d1aa8SWill Deacon 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1623e86d1aa8SWill Deacon 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1624e86d1aa8SWill Deacon 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1625e86d1aa8SWill Deacon 	 * insertion to guarantee those are observed before the TLBI. Do be
1626e86d1aa8SWill Deacon 	 * careful, 007.
1627e86d1aa8SWill Deacon 	 */
1628e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1629e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_sync(smmu);
1630e86d1aa8SWill Deacon 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1631e86d1aa8SWill Deacon }
1632e86d1aa8SWill Deacon 
1633e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
1634e86d1aa8SWill Deacon 				   size_t granule, bool leaf,
1635e86d1aa8SWill Deacon 				   struct arm_smmu_domain *smmu_domain)
1636e86d1aa8SWill Deacon {
1637e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1638e86d1aa8SWill Deacon 	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
1639e86d1aa8SWill Deacon 	size_t inv_range = granule;
1640e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_batch cmds = {};
1641e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd = {
1642e86d1aa8SWill Deacon 		.tlbi = {
1643e86d1aa8SWill Deacon 			.leaf	= leaf,
1644e86d1aa8SWill Deacon 		},
1645e86d1aa8SWill Deacon 	};
1646e86d1aa8SWill Deacon 
1647e86d1aa8SWill Deacon 	if (!size)
1648e86d1aa8SWill Deacon 		return;
1649e86d1aa8SWill Deacon 
1650e86d1aa8SWill Deacon 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1651e86d1aa8SWill Deacon 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1652e86d1aa8SWill Deacon 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1653e86d1aa8SWill Deacon 	} else {
1654e86d1aa8SWill Deacon 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1655e86d1aa8SWill Deacon 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1656e86d1aa8SWill Deacon 	}
1657e86d1aa8SWill Deacon 
1658e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1659e86d1aa8SWill Deacon 		/* Get the leaf page size */
1660e86d1aa8SWill Deacon 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1661e86d1aa8SWill Deacon 
1662e86d1aa8SWill Deacon 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1663e86d1aa8SWill Deacon 		cmd.tlbi.tg = (tg - 10) / 2;
1664e86d1aa8SWill Deacon 
1665e86d1aa8SWill Deacon 		/* Determine what level the granule is at */
1666e86d1aa8SWill Deacon 		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1667e86d1aa8SWill Deacon 
1668e86d1aa8SWill Deacon 		num_pages = size >> tg;
1669e86d1aa8SWill Deacon 	}
1670e86d1aa8SWill Deacon 
1671e86d1aa8SWill Deacon 	while (iova < end) {
1672e86d1aa8SWill Deacon 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1673e86d1aa8SWill Deacon 			/*
1674e86d1aa8SWill Deacon 			 * On each iteration of the loop, the range is 5 bits
1675e86d1aa8SWill Deacon 			 * worth of the aligned size remaining.
1676e86d1aa8SWill Deacon 			 * The range in pages is:
1677e86d1aa8SWill Deacon 			 *
1678e86d1aa8SWill Deacon 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1679e86d1aa8SWill Deacon 			 */
1680e86d1aa8SWill Deacon 			unsigned long scale, num;
1681e86d1aa8SWill Deacon 
1682e86d1aa8SWill Deacon 			/* Determine the power of 2 multiple number of pages */
1683e86d1aa8SWill Deacon 			scale = __ffs(num_pages);
1684e86d1aa8SWill Deacon 			cmd.tlbi.scale = scale;
1685e86d1aa8SWill Deacon 
1686e86d1aa8SWill Deacon 			/* Determine how many chunks of 2^scale size we have */
1687e86d1aa8SWill Deacon 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1688e86d1aa8SWill Deacon 			cmd.tlbi.num = num - 1;
1689e86d1aa8SWill Deacon 
1690e86d1aa8SWill Deacon 			/* range is num * 2^scale * pgsize */
1691e86d1aa8SWill Deacon 			inv_range = num << (scale + tg);
1692e86d1aa8SWill Deacon 
1693e86d1aa8SWill Deacon 			/* Clear out the lower order bits for the next iteration */
1694e86d1aa8SWill Deacon 			num_pages -= num << scale;
1695e86d1aa8SWill Deacon 		}
1696e86d1aa8SWill Deacon 
1697e86d1aa8SWill Deacon 		cmd.tlbi.addr = iova;
1698e86d1aa8SWill Deacon 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1699e86d1aa8SWill Deacon 		iova += inv_range;
1700e86d1aa8SWill Deacon 	}
1701e86d1aa8SWill Deacon 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1702e86d1aa8SWill Deacon 
1703e86d1aa8SWill Deacon 	/*
1704e86d1aa8SWill Deacon 	 * Unfortunately, this can't be leaf-only since we may have
1705e86d1aa8SWill Deacon 	 * zapped an entire table.
1706e86d1aa8SWill Deacon 	 */
1707e86d1aa8SWill Deacon 	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
1708e86d1aa8SWill Deacon }
1709e86d1aa8SWill Deacon 
1710e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1711e86d1aa8SWill Deacon 					 unsigned long iova, size_t granule,
1712e86d1aa8SWill Deacon 					 void *cookie)
1713e86d1aa8SWill Deacon {
1714e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = cookie;
1715e86d1aa8SWill Deacon 	struct iommu_domain *domain = &smmu_domain->domain;
1716e86d1aa8SWill Deacon 
1717e86d1aa8SWill Deacon 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1718e86d1aa8SWill Deacon }
1719e86d1aa8SWill Deacon 
1720e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1721e86d1aa8SWill Deacon 				  size_t granule, void *cookie)
1722e86d1aa8SWill Deacon {
1723e86d1aa8SWill Deacon 	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
1724e86d1aa8SWill Deacon }
1725e86d1aa8SWill Deacon 
1726e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
1727e86d1aa8SWill Deacon 				  size_t granule, void *cookie)
1728e86d1aa8SWill Deacon {
1729e86d1aa8SWill Deacon 	arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
1730e86d1aa8SWill Deacon }
1731e86d1aa8SWill Deacon 
1732e86d1aa8SWill Deacon static const struct iommu_flush_ops arm_smmu_flush_ops = {
1733e86d1aa8SWill Deacon 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1734e86d1aa8SWill Deacon 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1735e86d1aa8SWill Deacon 	.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
1736e86d1aa8SWill Deacon 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1737e86d1aa8SWill Deacon };
1738e86d1aa8SWill Deacon 
1739e86d1aa8SWill Deacon /* IOMMU API */
1740e86d1aa8SWill Deacon static bool arm_smmu_capable(enum iommu_cap cap)
1741e86d1aa8SWill Deacon {
1742e86d1aa8SWill Deacon 	switch (cap) {
1743e86d1aa8SWill Deacon 	case IOMMU_CAP_CACHE_COHERENCY:
1744e86d1aa8SWill Deacon 		return true;
1745e86d1aa8SWill Deacon 	case IOMMU_CAP_NOEXEC:
1746e86d1aa8SWill Deacon 		return true;
1747e86d1aa8SWill Deacon 	default:
1748e86d1aa8SWill Deacon 		return false;
1749e86d1aa8SWill Deacon 	}
1750e86d1aa8SWill Deacon }
1751e86d1aa8SWill Deacon 
1752e86d1aa8SWill Deacon static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1753e86d1aa8SWill Deacon {
1754e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain;
1755e86d1aa8SWill Deacon 
1756e86d1aa8SWill Deacon 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1757e86d1aa8SWill Deacon 	    type != IOMMU_DOMAIN_DMA &&
1758e86d1aa8SWill Deacon 	    type != IOMMU_DOMAIN_IDENTITY)
1759e86d1aa8SWill Deacon 		return NULL;
1760e86d1aa8SWill Deacon 
1761e86d1aa8SWill Deacon 	/*
1762e86d1aa8SWill Deacon 	 * Allocate the domain and initialise some of its data structures.
1763e86d1aa8SWill Deacon 	 * We can't really do anything meaningful until we've added a
1764e86d1aa8SWill Deacon 	 * master.
1765e86d1aa8SWill Deacon 	 */
1766e86d1aa8SWill Deacon 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1767e86d1aa8SWill Deacon 	if (!smmu_domain)
1768e86d1aa8SWill Deacon 		return NULL;
1769e86d1aa8SWill Deacon 
1770e86d1aa8SWill Deacon 	if (type == IOMMU_DOMAIN_DMA &&
1771e86d1aa8SWill Deacon 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1772e86d1aa8SWill Deacon 		kfree(smmu_domain);
1773e86d1aa8SWill Deacon 		return NULL;
1774e86d1aa8SWill Deacon 	}
1775e86d1aa8SWill Deacon 
1776e86d1aa8SWill Deacon 	mutex_init(&smmu_domain->init_mutex);
1777e86d1aa8SWill Deacon 	INIT_LIST_HEAD(&smmu_domain->devices);
1778e86d1aa8SWill Deacon 	spin_lock_init(&smmu_domain->devices_lock);
1779e86d1aa8SWill Deacon 
1780e86d1aa8SWill Deacon 	return &smmu_domain->domain;
1781e86d1aa8SWill Deacon }
1782e86d1aa8SWill Deacon 
1783e86d1aa8SWill Deacon static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1784e86d1aa8SWill Deacon {
1785e86d1aa8SWill Deacon 	int idx, size = 1 << span;
1786e86d1aa8SWill Deacon 
1787e86d1aa8SWill Deacon 	do {
1788e86d1aa8SWill Deacon 		idx = find_first_zero_bit(map, size);
1789e86d1aa8SWill Deacon 		if (idx == size)
1790e86d1aa8SWill Deacon 			return -ENOSPC;
1791e86d1aa8SWill Deacon 	} while (test_and_set_bit(idx, map));
1792e86d1aa8SWill Deacon 
1793e86d1aa8SWill Deacon 	return idx;
1794e86d1aa8SWill Deacon }
1795e86d1aa8SWill Deacon 
1796e86d1aa8SWill Deacon static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1797e86d1aa8SWill Deacon {
1798e86d1aa8SWill Deacon 	clear_bit(idx, map);
1799e86d1aa8SWill Deacon }
1800e86d1aa8SWill Deacon 
1801e86d1aa8SWill Deacon static void arm_smmu_domain_free(struct iommu_domain *domain)
1802e86d1aa8SWill Deacon {
1803e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1804e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1805e86d1aa8SWill Deacon 
1806e86d1aa8SWill Deacon 	iommu_put_dma_cookie(domain);
1807e86d1aa8SWill Deacon 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1808e86d1aa8SWill Deacon 
1809e86d1aa8SWill Deacon 	/* Free the CD and ASID, if we allocated them */
1810e86d1aa8SWill Deacon 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1811e86d1aa8SWill Deacon 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1812e86d1aa8SWill Deacon 
1813e86d1aa8SWill Deacon 		if (cfg->cdcfg.cdtab)
1814e86d1aa8SWill Deacon 			arm_smmu_free_cd_tables(smmu_domain);
1815e86d1aa8SWill Deacon 		arm_smmu_free_asid(&cfg->cd);
1816e86d1aa8SWill Deacon 	} else {
1817e86d1aa8SWill Deacon 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1818e86d1aa8SWill Deacon 		if (cfg->vmid)
1819e86d1aa8SWill Deacon 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1820e86d1aa8SWill Deacon 	}
1821e86d1aa8SWill Deacon 
1822e86d1aa8SWill Deacon 	kfree(smmu_domain);
1823e86d1aa8SWill Deacon }
1824e86d1aa8SWill Deacon 
1825e86d1aa8SWill Deacon static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1826e86d1aa8SWill Deacon 				       struct arm_smmu_master *master,
1827e86d1aa8SWill Deacon 				       struct io_pgtable_cfg *pgtbl_cfg)
1828e86d1aa8SWill Deacon {
1829e86d1aa8SWill Deacon 	int ret;
1830e86d1aa8SWill Deacon 	u32 asid;
1831e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1832e86d1aa8SWill Deacon 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1833e86d1aa8SWill Deacon 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1834e86d1aa8SWill Deacon 
1835e86d1aa8SWill Deacon 	ret = xa_alloc(&asid_xa, &asid, &cfg->cd,
1836e86d1aa8SWill Deacon 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1837e86d1aa8SWill Deacon 	if (ret)
1838e86d1aa8SWill Deacon 		return ret;
1839e86d1aa8SWill Deacon 
1840e86d1aa8SWill Deacon 	cfg->s1cdmax = master->ssid_bits;
1841e86d1aa8SWill Deacon 
1842e86d1aa8SWill Deacon 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
1843e86d1aa8SWill Deacon 	if (ret)
1844e86d1aa8SWill Deacon 		goto out_free_asid;
1845e86d1aa8SWill Deacon 
1846e86d1aa8SWill Deacon 	cfg->cd.asid	= (u16)asid;
1847e86d1aa8SWill Deacon 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1848e86d1aa8SWill Deacon 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1849e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1850e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1851e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1852e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1853e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1854e86d1aa8SWill Deacon 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1855e86d1aa8SWill Deacon 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
1856e86d1aa8SWill Deacon 
1857e86d1aa8SWill Deacon 	/*
1858e86d1aa8SWill Deacon 	 * Note that this will end up calling arm_smmu_sync_cd() before
1859e86d1aa8SWill Deacon 	 * the master has been added to the devices list for this domain.
1860e86d1aa8SWill Deacon 	 * This isn't an issue because the STE hasn't been installed yet.
1861e86d1aa8SWill Deacon 	 */
1862e86d1aa8SWill Deacon 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1863e86d1aa8SWill Deacon 	if (ret)
1864e86d1aa8SWill Deacon 		goto out_free_cd_tables;
1865e86d1aa8SWill Deacon 
1866e86d1aa8SWill Deacon 	return 0;
1867e86d1aa8SWill Deacon 
1868e86d1aa8SWill Deacon out_free_cd_tables:
1869e86d1aa8SWill Deacon 	arm_smmu_free_cd_tables(smmu_domain);
1870e86d1aa8SWill Deacon out_free_asid:
1871e86d1aa8SWill Deacon 	arm_smmu_free_asid(&cfg->cd);
1872e86d1aa8SWill Deacon 	return ret;
1873e86d1aa8SWill Deacon }
1874e86d1aa8SWill Deacon 
1875e86d1aa8SWill Deacon static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1876e86d1aa8SWill Deacon 				       struct arm_smmu_master *master,
1877e86d1aa8SWill Deacon 				       struct io_pgtable_cfg *pgtbl_cfg)
1878e86d1aa8SWill Deacon {
1879e86d1aa8SWill Deacon 	int vmid;
1880e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1881e86d1aa8SWill Deacon 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1882e86d1aa8SWill Deacon 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1883e86d1aa8SWill Deacon 
1884e86d1aa8SWill Deacon 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1885e86d1aa8SWill Deacon 	if (vmid < 0)
1886e86d1aa8SWill Deacon 		return vmid;
1887e86d1aa8SWill Deacon 
1888e86d1aa8SWill Deacon 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1889e86d1aa8SWill Deacon 	cfg->vmid	= (u16)vmid;
1890e86d1aa8SWill Deacon 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1891e86d1aa8SWill Deacon 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1892e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1893e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1894e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1895e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1896e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1897e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1898e86d1aa8SWill Deacon 	return 0;
1899e86d1aa8SWill Deacon }
1900e86d1aa8SWill Deacon 
1901e86d1aa8SWill Deacon static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1902e86d1aa8SWill Deacon 				    struct arm_smmu_master *master)
1903e86d1aa8SWill Deacon {
1904e86d1aa8SWill Deacon 	int ret;
1905e86d1aa8SWill Deacon 	unsigned long ias, oas;
1906e86d1aa8SWill Deacon 	enum io_pgtable_fmt fmt;
1907e86d1aa8SWill Deacon 	struct io_pgtable_cfg pgtbl_cfg;
1908e86d1aa8SWill Deacon 	struct io_pgtable_ops *pgtbl_ops;
1909e86d1aa8SWill Deacon 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1910e86d1aa8SWill Deacon 				 struct arm_smmu_master *,
1911e86d1aa8SWill Deacon 				 struct io_pgtable_cfg *);
1912e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1913e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1914e86d1aa8SWill Deacon 
1915e86d1aa8SWill Deacon 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1916e86d1aa8SWill Deacon 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1917e86d1aa8SWill Deacon 		return 0;
1918e86d1aa8SWill Deacon 	}
1919e86d1aa8SWill Deacon 
1920e86d1aa8SWill Deacon 	/* Restrict the stage to what we can actually support */
1921e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1922e86d1aa8SWill Deacon 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1923e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1924e86d1aa8SWill Deacon 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1925e86d1aa8SWill Deacon 
1926e86d1aa8SWill Deacon 	switch (smmu_domain->stage) {
1927e86d1aa8SWill Deacon 	case ARM_SMMU_DOMAIN_S1:
1928e86d1aa8SWill Deacon 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1929e86d1aa8SWill Deacon 		ias = min_t(unsigned long, ias, VA_BITS);
1930e86d1aa8SWill Deacon 		oas = smmu->ias;
1931e86d1aa8SWill Deacon 		fmt = ARM_64_LPAE_S1;
1932e86d1aa8SWill Deacon 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1933e86d1aa8SWill Deacon 		break;
1934e86d1aa8SWill Deacon 	case ARM_SMMU_DOMAIN_NESTED:
1935e86d1aa8SWill Deacon 	case ARM_SMMU_DOMAIN_S2:
1936e86d1aa8SWill Deacon 		ias = smmu->ias;
1937e86d1aa8SWill Deacon 		oas = smmu->oas;
1938e86d1aa8SWill Deacon 		fmt = ARM_64_LPAE_S2;
1939e86d1aa8SWill Deacon 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1940e86d1aa8SWill Deacon 		break;
1941e86d1aa8SWill Deacon 	default:
1942e86d1aa8SWill Deacon 		return -EINVAL;
1943e86d1aa8SWill Deacon 	}
1944e86d1aa8SWill Deacon 
1945e86d1aa8SWill Deacon 	pgtbl_cfg = (struct io_pgtable_cfg) {
1946e86d1aa8SWill Deacon 		.pgsize_bitmap	= smmu->pgsize_bitmap,
1947e86d1aa8SWill Deacon 		.ias		= ias,
1948e86d1aa8SWill Deacon 		.oas		= oas,
1949e86d1aa8SWill Deacon 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
1950e86d1aa8SWill Deacon 		.tlb		= &arm_smmu_flush_ops,
1951e86d1aa8SWill Deacon 		.iommu_dev	= smmu->dev,
1952e86d1aa8SWill Deacon 	};
1953e86d1aa8SWill Deacon 
1954e86d1aa8SWill Deacon 	if (smmu_domain->non_strict)
1955e86d1aa8SWill Deacon 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1956e86d1aa8SWill Deacon 
1957e86d1aa8SWill Deacon 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1958e86d1aa8SWill Deacon 	if (!pgtbl_ops)
1959e86d1aa8SWill Deacon 		return -ENOMEM;
1960e86d1aa8SWill Deacon 
1961e86d1aa8SWill Deacon 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1962e86d1aa8SWill Deacon 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1963e86d1aa8SWill Deacon 	domain->geometry.force_aperture = true;
1964e86d1aa8SWill Deacon 
1965e86d1aa8SWill Deacon 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
1966e86d1aa8SWill Deacon 	if (ret < 0) {
1967e86d1aa8SWill Deacon 		free_io_pgtable_ops(pgtbl_ops);
1968e86d1aa8SWill Deacon 		return ret;
1969e86d1aa8SWill Deacon 	}
1970e86d1aa8SWill Deacon 
1971e86d1aa8SWill Deacon 	smmu_domain->pgtbl_ops = pgtbl_ops;
1972e86d1aa8SWill Deacon 	return 0;
1973e86d1aa8SWill Deacon }
1974e86d1aa8SWill Deacon 
1975e86d1aa8SWill Deacon static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1976e86d1aa8SWill Deacon {
1977e86d1aa8SWill Deacon 	__le64 *step;
1978e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1979e86d1aa8SWill Deacon 
1980e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1981e86d1aa8SWill Deacon 		struct arm_smmu_strtab_l1_desc *l1_desc;
1982e86d1aa8SWill Deacon 		int idx;
1983e86d1aa8SWill Deacon 
1984e86d1aa8SWill Deacon 		/* Two-level walk */
1985e86d1aa8SWill Deacon 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1986e86d1aa8SWill Deacon 		l1_desc = &cfg->l1_desc[idx];
1987e86d1aa8SWill Deacon 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1988e86d1aa8SWill Deacon 		step = &l1_desc->l2ptr[idx];
1989e86d1aa8SWill Deacon 	} else {
1990e86d1aa8SWill Deacon 		/* Simple linear lookup */
1991e86d1aa8SWill Deacon 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1992e86d1aa8SWill Deacon 	}
1993e86d1aa8SWill Deacon 
1994e86d1aa8SWill Deacon 	return step;
1995e86d1aa8SWill Deacon }
1996e86d1aa8SWill Deacon 
1997e86d1aa8SWill Deacon static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
1998e86d1aa8SWill Deacon {
1999e86d1aa8SWill Deacon 	int i, j;
2000e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = master->smmu;
2001e86d1aa8SWill Deacon 
2002e86d1aa8SWill Deacon 	for (i = 0; i < master->num_sids; ++i) {
2003e86d1aa8SWill Deacon 		u32 sid = master->sids[i];
2004e86d1aa8SWill Deacon 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2005e86d1aa8SWill Deacon 
2006e86d1aa8SWill Deacon 		/* Bridged PCI devices may end up with duplicated IDs */
2007e86d1aa8SWill Deacon 		for (j = 0; j < i; j++)
2008e86d1aa8SWill Deacon 			if (master->sids[j] == sid)
2009e86d1aa8SWill Deacon 				break;
2010e86d1aa8SWill Deacon 		if (j < i)
2011e86d1aa8SWill Deacon 			continue;
2012e86d1aa8SWill Deacon 
2013e86d1aa8SWill Deacon 		arm_smmu_write_strtab_ent(master, sid, step);
2014e86d1aa8SWill Deacon 	}
2015e86d1aa8SWill Deacon }
2016e86d1aa8SWill Deacon 
2017e86d1aa8SWill Deacon static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2018e86d1aa8SWill Deacon {
2019e86d1aa8SWill Deacon 	struct device *dev = master->dev;
2020e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = master->smmu;
2021e86d1aa8SWill Deacon 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2022e86d1aa8SWill Deacon 
2023e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2024e86d1aa8SWill Deacon 		return false;
2025e86d1aa8SWill Deacon 
2026e86d1aa8SWill Deacon 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2027e86d1aa8SWill Deacon 		return false;
2028e86d1aa8SWill Deacon 
2029e86d1aa8SWill Deacon 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2030e86d1aa8SWill Deacon }
2031e86d1aa8SWill Deacon 
2032e86d1aa8SWill Deacon static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2033e86d1aa8SWill Deacon {
2034e86d1aa8SWill Deacon 	size_t stu;
2035e86d1aa8SWill Deacon 	struct pci_dev *pdev;
2036e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = master->smmu;
2037e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = master->domain;
2038e86d1aa8SWill Deacon 
2039e86d1aa8SWill Deacon 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2040e86d1aa8SWill Deacon 	if (!master->ats_enabled)
2041e86d1aa8SWill Deacon 		return;
2042e86d1aa8SWill Deacon 
2043e86d1aa8SWill Deacon 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2044e86d1aa8SWill Deacon 	stu = __ffs(smmu->pgsize_bitmap);
2045e86d1aa8SWill Deacon 	pdev = to_pci_dev(master->dev);
2046e86d1aa8SWill Deacon 
2047e86d1aa8SWill Deacon 	atomic_inc(&smmu_domain->nr_ats_masters);
2048e86d1aa8SWill Deacon 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2049e86d1aa8SWill Deacon 	if (pci_enable_ats(pdev, stu))
2050e86d1aa8SWill Deacon 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2051e86d1aa8SWill Deacon }
2052e86d1aa8SWill Deacon 
2053e86d1aa8SWill Deacon static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2054e86d1aa8SWill Deacon {
2055e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = master->domain;
2056e86d1aa8SWill Deacon 
2057e86d1aa8SWill Deacon 	if (!master->ats_enabled)
2058e86d1aa8SWill Deacon 		return;
2059e86d1aa8SWill Deacon 
2060e86d1aa8SWill Deacon 	pci_disable_ats(to_pci_dev(master->dev));
2061e86d1aa8SWill Deacon 	/*
2062e86d1aa8SWill Deacon 	 * Ensure ATS is disabled at the endpoint before we issue the
2063e86d1aa8SWill Deacon 	 * ATC invalidation via the SMMU.
2064e86d1aa8SWill Deacon 	 */
2065e86d1aa8SWill Deacon 	wmb();
2066e86d1aa8SWill Deacon 	arm_smmu_atc_inv_master(master);
2067e86d1aa8SWill Deacon 	atomic_dec(&smmu_domain->nr_ats_masters);
2068e86d1aa8SWill Deacon }
2069e86d1aa8SWill Deacon 
2070e86d1aa8SWill Deacon static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2071e86d1aa8SWill Deacon {
2072e86d1aa8SWill Deacon 	int ret;
2073e86d1aa8SWill Deacon 	int features;
2074e86d1aa8SWill Deacon 	int num_pasids;
2075e86d1aa8SWill Deacon 	struct pci_dev *pdev;
2076e86d1aa8SWill Deacon 
2077e86d1aa8SWill Deacon 	if (!dev_is_pci(master->dev))
2078e86d1aa8SWill Deacon 		return -ENODEV;
2079e86d1aa8SWill Deacon 
2080e86d1aa8SWill Deacon 	pdev = to_pci_dev(master->dev);
2081e86d1aa8SWill Deacon 
2082e86d1aa8SWill Deacon 	features = pci_pasid_features(pdev);
2083e86d1aa8SWill Deacon 	if (features < 0)
2084e86d1aa8SWill Deacon 		return features;
2085e86d1aa8SWill Deacon 
2086e86d1aa8SWill Deacon 	num_pasids = pci_max_pasids(pdev);
2087e86d1aa8SWill Deacon 	if (num_pasids <= 0)
2088e86d1aa8SWill Deacon 		return num_pasids;
2089e86d1aa8SWill Deacon 
2090e86d1aa8SWill Deacon 	ret = pci_enable_pasid(pdev, features);
2091e86d1aa8SWill Deacon 	if (ret) {
2092e86d1aa8SWill Deacon 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2093e86d1aa8SWill Deacon 		return ret;
2094e86d1aa8SWill Deacon 	}
2095e86d1aa8SWill Deacon 
2096e86d1aa8SWill Deacon 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2097e86d1aa8SWill Deacon 				  master->smmu->ssid_bits);
2098e86d1aa8SWill Deacon 	return 0;
2099e86d1aa8SWill Deacon }
2100e86d1aa8SWill Deacon 
2101e86d1aa8SWill Deacon static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2102e86d1aa8SWill Deacon {
2103e86d1aa8SWill Deacon 	struct pci_dev *pdev;
2104e86d1aa8SWill Deacon 
2105e86d1aa8SWill Deacon 	if (!dev_is_pci(master->dev))
2106e86d1aa8SWill Deacon 		return;
2107e86d1aa8SWill Deacon 
2108e86d1aa8SWill Deacon 	pdev = to_pci_dev(master->dev);
2109e86d1aa8SWill Deacon 
2110e86d1aa8SWill Deacon 	if (!pdev->pasid_enabled)
2111e86d1aa8SWill Deacon 		return;
2112e86d1aa8SWill Deacon 
2113e86d1aa8SWill Deacon 	master->ssid_bits = 0;
2114e86d1aa8SWill Deacon 	pci_disable_pasid(pdev);
2115e86d1aa8SWill Deacon }
2116e86d1aa8SWill Deacon 
2117e86d1aa8SWill Deacon static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2118e86d1aa8SWill Deacon {
2119e86d1aa8SWill Deacon 	unsigned long flags;
2120e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = master->domain;
2121e86d1aa8SWill Deacon 
2122e86d1aa8SWill Deacon 	if (!smmu_domain)
2123e86d1aa8SWill Deacon 		return;
2124e86d1aa8SWill Deacon 
2125e86d1aa8SWill Deacon 	arm_smmu_disable_ats(master);
2126e86d1aa8SWill Deacon 
2127e86d1aa8SWill Deacon 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2128e86d1aa8SWill Deacon 	list_del(&master->domain_head);
2129e86d1aa8SWill Deacon 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2130e86d1aa8SWill Deacon 
2131e86d1aa8SWill Deacon 	master->domain = NULL;
2132e86d1aa8SWill Deacon 	master->ats_enabled = false;
2133e86d1aa8SWill Deacon 	arm_smmu_install_ste_for_dev(master);
2134e86d1aa8SWill Deacon }
2135e86d1aa8SWill Deacon 
2136e86d1aa8SWill Deacon static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2137e86d1aa8SWill Deacon {
2138e86d1aa8SWill Deacon 	int ret = 0;
2139e86d1aa8SWill Deacon 	unsigned long flags;
2140e86d1aa8SWill Deacon 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2141e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu;
2142e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2143e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
2144e86d1aa8SWill Deacon 
2145e86d1aa8SWill Deacon 	if (!fwspec)
2146e86d1aa8SWill Deacon 		return -ENOENT;
2147e86d1aa8SWill Deacon 
2148e86d1aa8SWill Deacon 	master = dev_iommu_priv_get(dev);
2149e86d1aa8SWill Deacon 	smmu = master->smmu;
2150e86d1aa8SWill Deacon 
2151e86d1aa8SWill Deacon 	arm_smmu_detach_dev(master);
2152e86d1aa8SWill Deacon 
2153e86d1aa8SWill Deacon 	mutex_lock(&smmu_domain->init_mutex);
2154e86d1aa8SWill Deacon 
2155e86d1aa8SWill Deacon 	if (!smmu_domain->smmu) {
2156e86d1aa8SWill Deacon 		smmu_domain->smmu = smmu;
2157e86d1aa8SWill Deacon 		ret = arm_smmu_domain_finalise(domain, master);
2158e86d1aa8SWill Deacon 		if (ret) {
2159e86d1aa8SWill Deacon 			smmu_domain->smmu = NULL;
2160e86d1aa8SWill Deacon 			goto out_unlock;
2161e86d1aa8SWill Deacon 		}
2162e86d1aa8SWill Deacon 	} else if (smmu_domain->smmu != smmu) {
2163e86d1aa8SWill Deacon 		dev_err(dev,
2164e86d1aa8SWill Deacon 			"cannot attach to SMMU %s (upstream of %s)\n",
2165e86d1aa8SWill Deacon 			dev_name(smmu_domain->smmu->dev),
2166e86d1aa8SWill Deacon 			dev_name(smmu->dev));
2167e86d1aa8SWill Deacon 		ret = -ENXIO;
2168e86d1aa8SWill Deacon 		goto out_unlock;
2169e86d1aa8SWill Deacon 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2170e86d1aa8SWill Deacon 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2171e86d1aa8SWill Deacon 		dev_err(dev,
2172e86d1aa8SWill Deacon 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2173e86d1aa8SWill Deacon 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2174e86d1aa8SWill Deacon 		ret = -EINVAL;
2175e86d1aa8SWill Deacon 		goto out_unlock;
2176e86d1aa8SWill Deacon 	}
2177e86d1aa8SWill Deacon 
2178e86d1aa8SWill Deacon 	master->domain = smmu_domain;
2179e86d1aa8SWill Deacon 
2180e86d1aa8SWill Deacon 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2181e86d1aa8SWill Deacon 		master->ats_enabled = arm_smmu_ats_supported(master);
2182e86d1aa8SWill Deacon 
2183e86d1aa8SWill Deacon 	arm_smmu_install_ste_for_dev(master);
2184e86d1aa8SWill Deacon 
2185e86d1aa8SWill Deacon 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2186e86d1aa8SWill Deacon 	list_add(&master->domain_head, &smmu_domain->devices);
2187e86d1aa8SWill Deacon 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2188e86d1aa8SWill Deacon 
2189e86d1aa8SWill Deacon 	arm_smmu_enable_ats(master);
2190e86d1aa8SWill Deacon 
2191e86d1aa8SWill Deacon out_unlock:
2192e86d1aa8SWill Deacon 	mutex_unlock(&smmu_domain->init_mutex);
2193e86d1aa8SWill Deacon 	return ret;
2194e86d1aa8SWill Deacon }
2195e86d1aa8SWill Deacon 
2196e86d1aa8SWill Deacon static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2197e86d1aa8SWill Deacon 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2198e86d1aa8SWill Deacon {
2199e86d1aa8SWill Deacon 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2200e86d1aa8SWill Deacon 
2201e86d1aa8SWill Deacon 	if (!ops)
2202e86d1aa8SWill Deacon 		return -ENODEV;
2203e86d1aa8SWill Deacon 
2204e46b3c0dSJoerg Roedel 	return ops->map(ops, iova, paddr, size, prot, gfp);
2205e86d1aa8SWill Deacon }
2206e86d1aa8SWill Deacon 
2207e86d1aa8SWill Deacon static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2208e86d1aa8SWill Deacon 			     size_t size, struct iommu_iotlb_gather *gather)
2209e86d1aa8SWill Deacon {
2210e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2211e86d1aa8SWill Deacon 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2212e86d1aa8SWill Deacon 
2213e86d1aa8SWill Deacon 	if (!ops)
2214e86d1aa8SWill Deacon 		return 0;
2215e86d1aa8SWill Deacon 
2216e86d1aa8SWill Deacon 	return ops->unmap(ops, iova, size, gather);
2217e86d1aa8SWill Deacon }
2218e86d1aa8SWill Deacon 
2219e86d1aa8SWill Deacon static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2220e86d1aa8SWill Deacon {
2221e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2222e86d1aa8SWill Deacon 
2223e86d1aa8SWill Deacon 	if (smmu_domain->smmu)
2224e86d1aa8SWill Deacon 		arm_smmu_tlb_inv_context(smmu_domain);
2225e86d1aa8SWill Deacon }
2226e86d1aa8SWill Deacon 
2227e86d1aa8SWill Deacon static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2228e86d1aa8SWill Deacon 				struct iommu_iotlb_gather *gather)
2229e86d1aa8SWill Deacon {
2230e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2231e86d1aa8SWill Deacon 
2232e86d1aa8SWill Deacon 	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
2233e86d1aa8SWill Deacon 			       gather->pgsize, true, smmu_domain);
2234e86d1aa8SWill Deacon }
2235e86d1aa8SWill Deacon 
2236e86d1aa8SWill Deacon static phys_addr_t
2237e86d1aa8SWill Deacon arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2238e86d1aa8SWill Deacon {
2239e86d1aa8SWill Deacon 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2240e86d1aa8SWill Deacon 
2241e86d1aa8SWill Deacon 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2242e86d1aa8SWill Deacon 		return iova;
2243e86d1aa8SWill Deacon 
2244e86d1aa8SWill Deacon 	if (!ops)
2245e86d1aa8SWill Deacon 		return 0;
2246e86d1aa8SWill Deacon 
2247e86d1aa8SWill Deacon 	return ops->iova_to_phys(ops, iova);
2248e86d1aa8SWill Deacon }
2249e86d1aa8SWill Deacon 
2250e86d1aa8SWill Deacon static struct platform_driver arm_smmu_driver;
2251e86d1aa8SWill Deacon 
2252e86d1aa8SWill Deacon static
2253e86d1aa8SWill Deacon struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2254e86d1aa8SWill Deacon {
2255e86d1aa8SWill Deacon 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2256e86d1aa8SWill Deacon 							  fwnode);
2257e86d1aa8SWill Deacon 	put_device(dev);
2258e86d1aa8SWill Deacon 	return dev ? dev_get_drvdata(dev) : NULL;
2259e86d1aa8SWill Deacon }
2260e86d1aa8SWill Deacon 
2261e86d1aa8SWill Deacon static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2262e86d1aa8SWill Deacon {
2263e86d1aa8SWill Deacon 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2264e86d1aa8SWill Deacon 
2265e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2266e86d1aa8SWill Deacon 		limit *= 1UL << STRTAB_SPLIT;
2267e86d1aa8SWill Deacon 
2268e86d1aa8SWill Deacon 	return sid < limit;
2269e86d1aa8SWill Deacon }
2270e86d1aa8SWill Deacon 
2271e86d1aa8SWill Deacon static struct iommu_ops arm_smmu_ops;
2272e86d1aa8SWill Deacon 
2273e86d1aa8SWill Deacon static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2274e86d1aa8SWill Deacon {
2275e86d1aa8SWill Deacon 	int i, ret;
2276e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu;
2277e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
2278e86d1aa8SWill Deacon 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2279e86d1aa8SWill Deacon 
2280e86d1aa8SWill Deacon 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2281e86d1aa8SWill Deacon 		return ERR_PTR(-ENODEV);
2282e86d1aa8SWill Deacon 
2283e86d1aa8SWill Deacon 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2284e86d1aa8SWill Deacon 		return ERR_PTR(-EBUSY);
2285e86d1aa8SWill Deacon 
2286e86d1aa8SWill Deacon 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2287e86d1aa8SWill Deacon 	if (!smmu)
2288e86d1aa8SWill Deacon 		return ERR_PTR(-ENODEV);
2289e86d1aa8SWill Deacon 
2290e86d1aa8SWill Deacon 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2291e86d1aa8SWill Deacon 	if (!master)
2292e86d1aa8SWill Deacon 		return ERR_PTR(-ENOMEM);
2293e86d1aa8SWill Deacon 
2294e86d1aa8SWill Deacon 	master->dev = dev;
2295e86d1aa8SWill Deacon 	master->smmu = smmu;
2296e86d1aa8SWill Deacon 	master->sids = fwspec->ids;
2297e86d1aa8SWill Deacon 	master->num_sids = fwspec->num_ids;
2298e86d1aa8SWill Deacon 	dev_iommu_priv_set(dev, master);
2299e86d1aa8SWill Deacon 
2300e86d1aa8SWill Deacon 	/* Check the SIDs are in range of the SMMU and our stream table */
2301e86d1aa8SWill Deacon 	for (i = 0; i < master->num_sids; i++) {
2302e86d1aa8SWill Deacon 		u32 sid = master->sids[i];
2303e86d1aa8SWill Deacon 
2304e86d1aa8SWill Deacon 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2305e86d1aa8SWill Deacon 			ret = -ERANGE;
2306e86d1aa8SWill Deacon 			goto err_free_master;
2307e86d1aa8SWill Deacon 		}
2308e86d1aa8SWill Deacon 
2309e86d1aa8SWill Deacon 		/* Ensure l2 strtab is initialised */
2310e86d1aa8SWill Deacon 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2311e86d1aa8SWill Deacon 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2312e86d1aa8SWill Deacon 			if (ret)
2313e86d1aa8SWill Deacon 				goto err_free_master;
2314e86d1aa8SWill Deacon 		}
2315e86d1aa8SWill Deacon 	}
2316e86d1aa8SWill Deacon 
2317e86d1aa8SWill Deacon 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2318e86d1aa8SWill Deacon 
2319e86d1aa8SWill Deacon 	/*
2320e86d1aa8SWill Deacon 	 * Note that PASID must be enabled before, and disabled after ATS:
2321e86d1aa8SWill Deacon 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2322e86d1aa8SWill Deacon 	 *
2323e86d1aa8SWill Deacon 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2324e86d1aa8SWill Deacon 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2325e86d1aa8SWill Deacon 	 *   are changed.
2326e86d1aa8SWill Deacon 	 */
2327e86d1aa8SWill Deacon 	arm_smmu_enable_pasid(master);
2328e86d1aa8SWill Deacon 
2329e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2330e86d1aa8SWill Deacon 		master->ssid_bits = min_t(u8, master->ssid_bits,
2331e86d1aa8SWill Deacon 					  CTXDESC_LINEAR_CDMAX);
2332e86d1aa8SWill Deacon 
2333e86d1aa8SWill Deacon 	return &smmu->iommu;
2334e86d1aa8SWill Deacon 
2335e86d1aa8SWill Deacon err_free_master:
2336e86d1aa8SWill Deacon 	kfree(master);
2337e86d1aa8SWill Deacon 	dev_iommu_priv_set(dev, NULL);
2338e86d1aa8SWill Deacon 	return ERR_PTR(ret);
2339e86d1aa8SWill Deacon }
2340e86d1aa8SWill Deacon 
2341e86d1aa8SWill Deacon static void arm_smmu_release_device(struct device *dev)
2342e86d1aa8SWill Deacon {
2343e86d1aa8SWill Deacon 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2344e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
2345e86d1aa8SWill Deacon 
2346e86d1aa8SWill Deacon 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2347e86d1aa8SWill Deacon 		return;
2348e86d1aa8SWill Deacon 
2349e86d1aa8SWill Deacon 	master = dev_iommu_priv_get(dev);
2350e86d1aa8SWill Deacon 	arm_smmu_detach_dev(master);
2351e86d1aa8SWill Deacon 	arm_smmu_disable_pasid(master);
2352e86d1aa8SWill Deacon 	kfree(master);
2353e86d1aa8SWill Deacon 	iommu_fwspec_free(dev);
2354e86d1aa8SWill Deacon }
2355e86d1aa8SWill Deacon 
2356e86d1aa8SWill Deacon static struct iommu_group *arm_smmu_device_group(struct device *dev)
2357e86d1aa8SWill Deacon {
2358e86d1aa8SWill Deacon 	struct iommu_group *group;
2359e86d1aa8SWill Deacon 
2360e86d1aa8SWill Deacon 	/*
2361e86d1aa8SWill Deacon 	 * We don't support devices sharing stream IDs other than PCI RID
2362e86d1aa8SWill Deacon 	 * aliases, since the necessary ID-to-device lookup becomes rather
2363e86d1aa8SWill Deacon 	 * impractical given a potential sparse 32-bit stream ID space.
2364e86d1aa8SWill Deacon 	 */
2365e86d1aa8SWill Deacon 	if (dev_is_pci(dev))
2366e86d1aa8SWill Deacon 		group = pci_device_group(dev);
2367e86d1aa8SWill Deacon 	else
2368e86d1aa8SWill Deacon 		group = generic_device_group(dev);
2369e86d1aa8SWill Deacon 
2370e86d1aa8SWill Deacon 	return group;
2371e86d1aa8SWill Deacon }
2372e86d1aa8SWill Deacon 
2373e86d1aa8SWill Deacon static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2374e86d1aa8SWill Deacon 				    enum iommu_attr attr, void *data)
2375e86d1aa8SWill Deacon {
2376e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2377e86d1aa8SWill Deacon 
2378e86d1aa8SWill Deacon 	switch (domain->type) {
2379e86d1aa8SWill Deacon 	case IOMMU_DOMAIN_UNMANAGED:
2380e86d1aa8SWill Deacon 		switch (attr) {
2381e86d1aa8SWill Deacon 		case DOMAIN_ATTR_NESTING:
2382e86d1aa8SWill Deacon 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2383e86d1aa8SWill Deacon 			return 0;
2384e86d1aa8SWill Deacon 		default:
2385e86d1aa8SWill Deacon 			return -ENODEV;
2386e86d1aa8SWill Deacon 		}
2387e86d1aa8SWill Deacon 		break;
2388e86d1aa8SWill Deacon 	case IOMMU_DOMAIN_DMA:
2389e86d1aa8SWill Deacon 		switch (attr) {
2390e86d1aa8SWill Deacon 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2391e86d1aa8SWill Deacon 			*(int *)data = smmu_domain->non_strict;
2392e86d1aa8SWill Deacon 			return 0;
2393e86d1aa8SWill Deacon 		default:
2394e86d1aa8SWill Deacon 			return -ENODEV;
2395e86d1aa8SWill Deacon 		}
2396e86d1aa8SWill Deacon 		break;
2397e86d1aa8SWill Deacon 	default:
2398e86d1aa8SWill Deacon 		return -EINVAL;
2399e86d1aa8SWill Deacon 	}
2400e86d1aa8SWill Deacon }
2401e86d1aa8SWill Deacon 
2402e86d1aa8SWill Deacon static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2403e86d1aa8SWill Deacon 				    enum iommu_attr attr, void *data)
2404e86d1aa8SWill Deacon {
2405e86d1aa8SWill Deacon 	int ret = 0;
2406e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2407e86d1aa8SWill Deacon 
2408e86d1aa8SWill Deacon 	mutex_lock(&smmu_domain->init_mutex);
2409e86d1aa8SWill Deacon 
2410e86d1aa8SWill Deacon 	switch (domain->type) {
2411e86d1aa8SWill Deacon 	case IOMMU_DOMAIN_UNMANAGED:
2412e86d1aa8SWill Deacon 		switch (attr) {
2413e86d1aa8SWill Deacon 		case DOMAIN_ATTR_NESTING:
2414e86d1aa8SWill Deacon 			if (smmu_domain->smmu) {
2415e86d1aa8SWill Deacon 				ret = -EPERM;
2416e86d1aa8SWill Deacon 				goto out_unlock;
2417e86d1aa8SWill Deacon 			}
2418e86d1aa8SWill Deacon 
2419e86d1aa8SWill Deacon 			if (*(int *)data)
2420e86d1aa8SWill Deacon 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2421e86d1aa8SWill Deacon 			else
2422e86d1aa8SWill Deacon 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2423e86d1aa8SWill Deacon 			break;
2424e86d1aa8SWill Deacon 		default:
2425e86d1aa8SWill Deacon 			ret = -ENODEV;
2426e86d1aa8SWill Deacon 		}
2427e86d1aa8SWill Deacon 		break;
2428e86d1aa8SWill Deacon 	case IOMMU_DOMAIN_DMA:
2429e86d1aa8SWill Deacon 		switch(attr) {
2430e86d1aa8SWill Deacon 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2431e86d1aa8SWill Deacon 			smmu_domain->non_strict = *(int *)data;
2432e86d1aa8SWill Deacon 			break;
2433e86d1aa8SWill Deacon 		default:
2434e86d1aa8SWill Deacon 			ret = -ENODEV;
2435e86d1aa8SWill Deacon 		}
2436e86d1aa8SWill Deacon 		break;
2437e86d1aa8SWill Deacon 	default:
2438e86d1aa8SWill Deacon 		ret = -EINVAL;
2439e86d1aa8SWill Deacon 	}
2440e86d1aa8SWill Deacon 
2441e86d1aa8SWill Deacon out_unlock:
2442e86d1aa8SWill Deacon 	mutex_unlock(&smmu_domain->init_mutex);
2443e86d1aa8SWill Deacon 	return ret;
2444e86d1aa8SWill Deacon }
2445e86d1aa8SWill Deacon 
2446e86d1aa8SWill Deacon static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2447e86d1aa8SWill Deacon {
2448e86d1aa8SWill Deacon 	return iommu_fwspec_add_ids(dev, args->args, 1);
2449e86d1aa8SWill Deacon }
2450e86d1aa8SWill Deacon 
2451e86d1aa8SWill Deacon static void arm_smmu_get_resv_regions(struct device *dev,
2452e86d1aa8SWill Deacon 				      struct list_head *head)
2453e86d1aa8SWill Deacon {
2454e86d1aa8SWill Deacon 	struct iommu_resv_region *region;
2455e86d1aa8SWill Deacon 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2456e86d1aa8SWill Deacon 
2457e86d1aa8SWill Deacon 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2458e86d1aa8SWill Deacon 					 prot, IOMMU_RESV_SW_MSI);
2459e86d1aa8SWill Deacon 	if (!region)
2460e86d1aa8SWill Deacon 		return;
2461e86d1aa8SWill Deacon 
2462e86d1aa8SWill Deacon 	list_add_tail(&region->list, head);
2463e86d1aa8SWill Deacon 
2464e86d1aa8SWill Deacon 	iommu_dma_get_resv_regions(dev, head);
2465e86d1aa8SWill Deacon }
2466e86d1aa8SWill Deacon 
2467e86d1aa8SWill Deacon static struct iommu_ops arm_smmu_ops = {
2468e86d1aa8SWill Deacon 	.capable		= arm_smmu_capable,
2469e86d1aa8SWill Deacon 	.domain_alloc		= arm_smmu_domain_alloc,
2470e86d1aa8SWill Deacon 	.domain_free		= arm_smmu_domain_free,
2471e86d1aa8SWill Deacon 	.attach_dev		= arm_smmu_attach_dev,
2472e86d1aa8SWill Deacon 	.map			= arm_smmu_map,
2473e86d1aa8SWill Deacon 	.unmap			= arm_smmu_unmap,
2474e86d1aa8SWill Deacon 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2475e86d1aa8SWill Deacon 	.iotlb_sync		= arm_smmu_iotlb_sync,
2476e86d1aa8SWill Deacon 	.iova_to_phys		= arm_smmu_iova_to_phys,
2477e86d1aa8SWill Deacon 	.probe_device		= arm_smmu_probe_device,
2478e86d1aa8SWill Deacon 	.release_device		= arm_smmu_release_device,
2479e86d1aa8SWill Deacon 	.device_group		= arm_smmu_device_group,
2480e86d1aa8SWill Deacon 	.domain_get_attr	= arm_smmu_domain_get_attr,
2481e86d1aa8SWill Deacon 	.domain_set_attr	= arm_smmu_domain_set_attr,
2482e86d1aa8SWill Deacon 	.of_xlate		= arm_smmu_of_xlate,
2483e86d1aa8SWill Deacon 	.get_resv_regions	= arm_smmu_get_resv_regions,
2484e86d1aa8SWill Deacon 	.put_resv_regions	= generic_iommu_put_resv_regions,
2485e86d1aa8SWill Deacon 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2486e86d1aa8SWill Deacon };
2487e86d1aa8SWill Deacon 
2488e86d1aa8SWill Deacon /* Probing and initialisation functions */
2489e86d1aa8SWill Deacon static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2490e86d1aa8SWill Deacon 				   struct arm_smmu_queue *q,
2491e86d1aa8SWill Deacon 				   unsigned long prod_off,
2492e86d1aa8SWill Deacon 				   unsigned long cons_off,
2493e86d1aa8SWill Deacon 				   size_t dwords, const char *name)
2494e86d1aa8SWill Deacon {
2495e86d1aa8SWill Deacon 	size_t qsz;
2496e86d1aa8SWill Deacon 
2497e86d1aa8SWill Deacon 	do {
2498e86d1aa8SWill Deacon 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2499e86d1aa8SWill Deacon 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2500e86d1aa8SWill Deacon 					      GFP_KERNEL);
2501e86d1aa8SWill Deacon 		if (q->base || qsz < PAGE_SIZE)
2502e86d1aa8SWill Deacon 			break;
2503e86d1aa8SWill Deacon 
2504e86d1aa8SWill Deacon 		q->llq.max_n_shift--;
2505e86d1aa8SWill Deacon 	} while (1);
2506e86d1aa8SWill Deacon 
2507e86d1aa8SWill Deacon 	if (!q->base) {
2508e86d1aa8SWill Deacon 		dev_err(smmu->dev,
2509e86d1aa8SWill Deacon 			"failed to allocate queue (0x%zx bytes) for %s\n",
2510e86d1aa8SWill Deacon 			qsz, name);
2511e86d1aa8SWill Deacon 		return -ENOMEM;
2512e86d1aa8SWill Deacon 	}
2513e86d1aa8SWill Deacon 
2514e86d1aa8SWill Deacon 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2515e86d1aa8SWill Deacon 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2516e86d1aa8SWill Deacon 			 1 << q->llq.max_n_shift, name);
2517e86d1aa8SWill Deacon 	}
2518e86d1aa8SWill Deacon 
2519e86d1aa8SWill Deacon 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
2520e86d1aa8SWill Deacon 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
2521e86d1aa8SWill Deacon 	q->ent_dwords	= dwords;
2522e86d1aa8SWill Deacon 
2523e86d1aa8SWill Deacon 	q->q_base  = Q_BASE_RWA;
2524e86d1aa8SWill Deacon 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2525e86d1aa8SWill Deacon 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2526e86d1aa8SWill Deacon 
2527e86d1aa8SWill Deacon 	q->llq.prod = q->llq.cons = 0;
2528e86d1aa8SWill Deacon 	return 0;
2529e86d1aa8SWill Deacon }
2530e86d1aa8SWill Deacon 
2531e86d1aa8SWill Deacon static void arm_smmu_cmdq_free_bitmap(void *data)
2532e86d1aa8SWill Deacon {
2533e86d1aa8SWill Deacon 	unsigned long *bitmap = data;
2534e86d1aa8SWill Deacon 	bitmap_free(bitmap);
2535e86d1aa8SWill Deacon }
2536e86d1aa8SWill Deacon 
2537e86d1aa8SWill Deacon static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2538e86d1aa8SWill Deacon {
2539e86d1aa8SWill Deacon 	int ret = 0;
2540e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2541e86d1aa8SWill Deacon 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2542e86d1aa8SWill Deacon 	atomic_long_t *bitmap;
2543e86d1aa8SWill Deacon 
2544e86d1aa8SWill Deacon 	atomic_set(&cmdq->owner_prod, 0);
2545e86d1aa8SWill Deacon 	atomic_set(&cmdq->lock, 0);
2546e86d1aa8SWill Deacon 
2547e86d1aa8SWill Deacon 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2548e86d1aa8SWill Deacon 	if (!bitmap) {
2549e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2550e86d1aa8SWill Deacon 		ret = -ENOMEM;
2551e86d1aa8SWill Deacon 	} else {
2552e86d1aa8SWill Deacon 		cmdq->valid_map = bitmap;
2553e86d1aa8SWill Deacon 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2554e86d1aa8SWill Deacon 	}
2555e86d1aa8SWill Deacon 
2556e86d1aa8SWill Deacon 	return ret;
2557e86d1aa8SWill Deacon }
2558e86d1aa8SWill Deacon 
2559e86d1aa8SWill Deacon static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2560e86d1aa8SWill Deacon {
2561e86d1aa8SWill Deacon 	int ret;
2562e86d1aa8SWill Deacon 
2563e86d1aa8SWill Deacon 	/* cmdq */
2564e86d1aa8SWill Deacon 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2565e86d1aa8SWill Deacon 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2566e86d1aa8SWill Deacon 				      "cmdq");
2567e86d1aa8SWill Deacon 	if (ret)
2568e86d1aa8SWill Deacon 		return ret;
2569e86d1aa8SWill Deacon 
2570e86d1aa8SWill Deacon 	ret = arm_smmu_cmdq_init(smmu);
2571e86d1aa8SWill Deacon 	if (ret)
2572e86d1aa8SWill Deacon 		return ret;
2573e86d1aa8SWill Deacon 
2574e86d1aa8SWill Deacon 	/* evtq */
2575e86d1aa8SWill Deacon 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2576e86d1aa8SWill Deacon 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2577e86d1aa8SWill Deacon 				      "evtq");
2578e86d1aa8SWill Deacon 	if (ret)
2579e86d1aa8SWill Deacon 		return ret;
2580e86d1aa8SWill Deacon 
2581e86d1aa8SWill Deacon 	/* priq */
2582e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2583e86d1aa8SWill Deacon 		return 0;
2584e86d1aa8SWill Deacon 
2585e86d1aa8SWill Deacon 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2586e86d1aa8SWill Deacon 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2587e86d1aa8SWill Deacon 				       "priq");
2588e86d1aa8SWill Deacon }
2589e86d1aa8SWill Deacon 
2590e86d1aa8SWill Deacon static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2591e86d1aa8SWill Deacon {
2592e86d1aa8SWill Deacon 	unsigned int i;
2593e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2594e86d1aa8SWill Deacon 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2595e86d1aa8SWill Deacon 	void *strtab = smmu->strtab_cfg.strtab;
2596e86d1aa8SWill Deacon 
2597e86d1aa8SWill Deacon 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2598e86d1aa8SWill Deacon 	if (!cfg->l1_desc) {
2599e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2600e86d1aa8SWill Deacon 		return -ENOMEM;
2601e86d1aa8SWill Deacon 	}
2602e86d1aa8SWill Deacon 
2603e86d1aa8SWill Deacon 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2604e86d1aa8SWill Deacon 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2605e86d1aa8SWill Deacon 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2606e86d1aa8SWill Deacon 	}
2607e86d1aa8SWill Deacon 
2608e86d1aa8SWill Deacon 	return 0;
2609e86d1aa8SWill Deacon }
2610e86d1aa8SWill Deacon 
2611e86d1aa8SWill Deacon static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2612e86d1aa8SWill Deacon {
2613e86d1aa8SWill Deacon 	void *strtab;
2614e86d1aa8SWill Deacon 	u64 reg;
2615e86d1aa8SWill Deacon 	u32 size, l1size;
2616e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2617e86d1aa8SWill Deacon 
2618e86d1aa8SWill Deacon 	/* Calculate the L1 size, capped to the SIDSIZE. */
2619e86d1aa8SWill Deacon 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2620e86d1aa8SWill Deacon 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2621e86d1aa8SWill Deacon 	cfg->num_l1_ents = 1 << size;
2622e86d1aa8SWill Deacon 
2623e86d1aa8SWill Deacon 	size += STRTAB_SPLIT;
2624e86d1aa8SWill Deacon 	if (size < smmu->sid_bits)
2625e86d1aa8SWill Deacon 		dev_warn(smmu->dev,
2626e86d1aa8SWill Deacon 			 "2-level strtab only covers %u/%u bits of SID\n",
2627e86d1aa8SWill Deacon 			 size, smmu->sid_bits);
2628e86d1aa8SWill Deacon 
2629e86d1aa8SWill Deacon 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2630e86d1aa8SWill Deacon 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2631e86d1aa8SWill Deacon 				     GFP_KERNEL);
2632e86d1aa8SWill Deacon 	if (!strtab) {
2633e86d1aa8SWill Deacon 		dev_err(smmu->dev,
2634e86d1aa8SWill Deacon 			"failed to allocate l1 stream table (%u bytes)\n",
2635dc898eb8SZenghui Yu 			l1size);
2636e86d1aa8SWill Deacon 		return -ENOMEM;
2637e86d1aa8SWill Deacon 	}
2638e86d1aa8SWill Deacon 	cfg->strtab = strtab;
2639e86d1aa8SWill Deacon 
2640e86d1aa8SWill Deacon 	/* Configure strtab_base_cfg for 2 levels */
2641e86d1aa8SWill Deacon 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2642e86d1aa8SWill Deacon 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2643e86d1aa8SWill Deacon 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2644e86d1aa8SWill Deacon 	cfg->strtab_base_cfg = reg;
2645e86d1aa8SWill Deacon 
2646e86d1aa8SWill Deacon 	return arm_smmu_init_l1_strtab(smmu);
2647e86d1aa8SWill Deacon }
2648e86d1aa8SWill Deacon 
2649e86d1aa8SWill Deacon static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2650e86d1aa8SWill Deacon {
2651e86d1aa8SWill Deacon 	void *strtab;
2652e86d1aa8SWill Deacon 	u64 reg;
2653e86d1aa8SWill Deacon 	u32 size;
2654e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2655e86d1aa8SWill Deacon 
2656e86d1aa8SWill Deacon 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2657e86d1aa8SWill Deacon 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2658e86d1aa8SWill Deacon 				     GFP_KERNEL);
2659e86d1aa8SWill Deacon 	if (!strtab) {
2660e86d1aa8SWill Deacon 		dev_err(smmu->dev,
2661e86d1aa8SWill Deacon 			"failed to allocate linear stream table (%u bytes)\n",
2662e86d1aa8SWill Deacon 			size);
2663e86d1aa8SWill Deacon 		return -ENOMEM;
2664e86d1aa8SWill Deacon 	}
2665e86d1aa8SWill Deacon 	cfg->strtab = strtab;
2666e86d1aa8SWill Deacon 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2667e86d1aa8SWill Deacon 
2668e86d1aa8SWill Deacon 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2669e86d1aa8SWill Deacon 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2670e86d1aa8SWill Deacon 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2671e86d1aa8SWill Deacon 	cfg->strtab_base_cfg = reg;
2672e86d1aa8SWill Deacon 
2673e86d1aa8SWill Deacon 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2674e86d1aa8SWill Deacon 	return 0;
2675e86d1aa8SWill Deacon }
2676e86d1aa8SWill Deacon 
2677e86d1aa8SWill Deacon static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2678e86d1aa8SWill Deacon {
2679e86d1aa8SWill Deacon 	u64 reg;
2680e86d1aa8SWill Deacon 	int ret;
2681e86d1aa8SWill Deacon 
2682e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2683e86d1aa8SWill Deacon 		ret = arm_smmu_init_strtab_2lvl(smmu);
2684e86d1aa8SWill Deacon 	else
2685e86d1aa8SWill Deacon 		ret = arm_smmu_init_strtab_linear(smmu);
2686e86d1aa8SWill Deacon 
2687e86d1aa8SWill Deacon 	if (ret)
2688e86d1aa8SWill Deacon 		return ret;
2689e86d1aa8SWill Deacon 
2690e86d1aa8SWill Deacon 	/* Set the strtab base address */
2691e86d1aa8SWill Deacon 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2692e86d1aa8SWill Deacon 	reg |= STRTAB_BASE_RA;
2693e86d1aa8SWill Deacon 	smmu->strtab_cfg.strtab_base = reg;
2694e86d1aa8SWill Deacon 
2695e86d1aa8SWill Deacon 	/* Allocate the first VMID for stage-2 bypass STEs */
2696e86d1aa8SWill Deacon 	set_bit(0, smmu->vmid_map);
2697e86d1aa8SWill Deacon 	return 0;
2698e86d1aa8SWill Deacon }
2699e86d1aa8SWill Deacon 
2700e86d1aa8SWill Deacon static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2701e86d1aa8SWill Deacon {
2702e86d1aa8SWill Deacon 	int ret;
2703e86d1aa8SWill Deacon 
2704e86d1aa8SWill Deacon 	ret = arm_smmu_init_queues(smmu);
2705e86d1aa8SWill Deacon 	if (ret)
2706e86d1aa8SWill Deacon 		return ret;
2707e86d1aa8SWill Deacon 
2708e86d1aa8SWill Deacon 	return arm_smmu_init_strtab(smmu);
2709e86d1aa8SWill Deacon }
2710e86d1aa8SWill Deacon 
2711e86d1aa8SWill Deacon static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2712e86d1aa8SWill Deacon 				   unsigned int reg_off, unsigned int ack_off)
2713e86d1aa8SWill Deacon {
2714e86d1aa8SWill Deacon 	u32 reg;
2715e86d1aa8SWill Deacon 
2716e86d1aa8SWill Deacon 	writel_relaxed(val, smmu->base + reg_off);
2717e86d1aa8SWill Deacon 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2718e86d1aa8SWill Deacon 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2719e86d1aa8SWill Deacon }
2720e86d1aa8SWill Deacon 
2721e86d1aa8SWill Deacon /* GBPA is "special" */
2722e86d1aa8SWill Deacon static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2723e86d1aa8SWill Deacon {
2724e86d1aa8SWill Deacon 	int ret;
2725e86d1aa8SWill Deacon 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2726e86d1aa8SWill Deacon 
2727e86d1aa8SWill Deacon 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2728e86d1aa8SWill Deacon 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2729e86d1aa8SWill Deacon 	if (ret)
2730e86d1aa8SWill Deacon 		return ret;
2731e86d1aa8SWill Deacon 
2732e86d1aa8SWill Deacon 	reg &= ~clr;
2733e86d1aa8SWill Deacon 	reg |= set;
2734e86d1aa8SWill Deacon 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
2735e86d1aa8SWill Deacon 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2736e86d1aa8SWill Deacon 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2737e86d1aa8SWill Deacon 
2738e86d1aa8SWill Deacon 	if (ret)
2739e86d1aa8SWill Deacon 		dev_err(smmu->dev, "GBPA not responding to update\n");
2740e86d1aa8SWill Deacon 	return ret;
2741e86d1aa8SWill Deacon }
2742e86d1aa8SWill Deacon 
2743e86d1aa8SWill Deacon static void arm_smmu_free_msis(void *data)
2744e86d1aa8SWill Deacon {
2745e86d1aa8SWill Deacon 	struct device *dev = data;
2746e86d1aa8SWill Deacon 	platform_msi_domain_free_irqs(dev);
2747e86d1aa8SWill Deacon }
2748e86d1aa8SWill Deacon 
2749e86d1aa8SWill Deacon static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2750e86d1aa8SWill Deacon {
2751e86d1aa8SWill Deacon 	phys_addr_t doorbell;
2752e86d1aa8SWill Deacon 	struct device *dev = msi_desc_to_dev(desc);
2753e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2754e86d1aa8SWill Deacon 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2755e86d1aa8SWill Deacon 
2756e86d1aa8SWill Deacon 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2757e86d1aa8SWill Deacon 	doorbell &= MSI_CFG0_ADDR_MASK;
2758e86d1aa8SWill Deacon 
2759e86d1aa8SWill Deacon 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2760e86d1aa8SWill Deacon 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2761e86d1aa8SWill Deacon 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2762e86d1aa8SWill Deacon }
2763e86d1aa8SWill Deacon 
2764e86d1aa8SWill Deacon static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2765e86d1aa8SWill Deacon {
2766e86d1aa8SWill Deacon 	struct msi_desc *desc;
2767e86d1aa8SWill Deacon 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2768e86d1aa8SWill Deacon 	struct device *dev = smmu->dev;
2769e86d1aa8SWill Deacon 
2770e86d1aa8SWill Deacon 	/* Clear the MSI address regs */
2771e86d1aa8SWill Deacon 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2772e86d1aa8SWill Deacon 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2773e86d1aa8SWill Deacon 
2774e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2775e86d1aa8SWill Deacon 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2776e86d1aa8SWill Deacon 	else
2777e86d1aa8SWill Deacon 		nvec--;
2778e86d1aa8SWill Deacon 
2779e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2780e86d1aa8SWill Deacon 		return;
2781e86d1aa8SWill Deacon 
2782e86d1aa8SWill Deacon 	if (!dev->msi_domain) {
2783e86d1aa8SWill Deacon 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2784e86d1aa8SWill Deacon 		return;
2785e86d1aa8SWill Deacon 	}
2786e86d1aa8SWill Deacon 
2787e86d1aa8SWill Deacon 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2788e86d1aa8SWill Deacon 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2789e86d1aa8SWill Deacon 	if (ret) {
2790e86d1aa8SWill Deacon 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2791e86d1aa8SWill Deacon 		return;
2792e86d1aa8SWill Deacon 	}
2793e86d1aa8SWill Deacon 
2794e86d1aa8SWill Deacon 	for_each_msi_entry(desc, dev) {
2795e86d1aa8SWill Deacon 		switch (desc->platform.msi_index) {
2796e86d1aa8SWill Deacon 		case EVTQ_MSI_INDEX:
2797e86d1aa8SWill Deacon 			smmu->evtq.q.irq = desc->irq;
2798e86d1aa8SWill Deacon 			break;
2799e86d1aa8SWill Deacon 		case GERROR_MSI_INDEX:
2800e86d1aa8SWill Deacon 			smmu->gerr_irq = desc->irq;
2801e86d1aa8SWill Deacon 			break;
2802e86d1aa8SWill Deacon 		case PRIQ_MSI_INDEX:
2803e86d1aa8SWill Deacon 			smmu->priq.q.irq = desc->irq;
2804e86d1aa8SWill Deacon 			break;
2805e86d1aa8SWill Deacon 		default:	/* Unknown */
2806e86d1aa8SWill Deacon 			continue;
2807e86d1aa8SWill Deacon 		}
2808e86d1aa8SWill Deacon 	}
2809e86d1aa8SWill Deacon 
2810e86d1aa8SWill Deacon 	/* Add callback to free MSIs on teardown */
2811e86d1aa8SWill Deacon 	devm_add_action(dev, arm_smmu_free_msis, dev);
2812e86d1aa8SWill Deacon }
2813e86d1aa8SWill Deacon 
2814e86d1aa8SWill Deacon static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2815e86d1aa8SWill Deacon {
2816e86d1aa8SWill Deacon 	int irq, ret;
2817e86d1aa8SWill Deacon 
2818e86d1aa8SWill Deacon 	arm_smmu_setup_msis(smmu);
2819e86d1aa8SWill Deacon 
2820e86d1aa8SWill Deacon 	/* Request interrupt lines */
2821e86d1aa8SWill Deacon 	irq = smmu->evtq.q.irq;
2822e86d1aa8SWill Deacon 	if (irq) {
2823e86d1aa8SWill Deacon 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2824e86d1aa8SWill Deacon 						arm_smmu_evtq_thread,
2825e86d1aa8SWill Deacon 						IRQF_ONESHOT,
2826e86d1aa8SWill Deacon 						"arm-smmu-v3-evtq", smmu);
2827e86d1aa8SWill Deacon 		if (ret < 0)
2828e86d1aa8SWill Deacon 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2829e86d1aa8SWill Deacon 	} else {
2830e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2831e86d1aa8SWill Deacon 	}
2832e86d1aa8SWill Deacon 
2833e86d1aa8SWill Deacon 	irq = smmu->gerr_irq;
2834e86d1aa8SWill Deacon 	if (irq) {
2835e86d1aa8SWill Deacon 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2836e86d1aa8SWill Deacon 				       0, "arm-smmu-v3-gerror", smmu);
2837e86d1aa8SWill Deacon 		if (ret < 0)
2838e86d1aa8SWill Deacon 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2839e86d1aa8SWill Deacon 	} else {
2840e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2841e86d1aa8SWill Deacon 	}
2842e86d1aa8SWill Deacon 
2843e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2844e86d1aa8SWill Deacon 		irq = smmu->priq.q.irq;
2845e86d1aa8SWill Deacon 		if (irq) {
2846e86d1aa8SWill Deacon 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2847e86d1aa8SWill Deacon 							arm_smmu_priq_thread,
2848e86d1aa8SWill Deacon 							IRQF_ONESHOT,
2849e86d1aa8SWill Deacon 							"arm-smmu-v3-priq",
2850e86d1aa8SWill Deacon 							smmu);
2851e86d1aa8SWill Deacon 			if (ret < 0)
2852e86d1aa8SWill Deacon 				dev_warn(smmu->dev,
2853e86d1aa8SWill Deacon 					 "failed to enable priq irq\n");
2854e86d1aa8SWill Deacon 		} else {
2855e86d1aa8SWill Deacon 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2856e86d1aa8SWill Deacon 		}
2857e86d1aa8SWill Deacon 	}
2858e86d1aa8SWill Deacon }
2859e86d1aa8SWill Deacon 
2860e86d1aa8SWill Deacon static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2861e86d1aa8SWill Deacon {
2862e86d1aa8SWill Deacon 	int ret, irq;
2863e86d1aa8SWill Deacon 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2864e86d1aa8SWill Deacon 
2865e86d1aa8SWill Deacon 	/* Disable IRQs first */
2866e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2867e86d1aa8SWill Deacon 				      ARM_SMMU_IRQ_CTRLACK);
2868e86d1aa8SWill Deacon 	if (ret) {
2869e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to disable irqs\n");
2870e86d1aa8SWill Deacon 		return ret;
2871e86d1aa8SWill Deacon 	}
2872e86d1aa8SWill Deacon 
2873e86d1aa8SWill Deacon 	irq = smmu->combined_irq;
2874e86d1aa8SWill Deacon 	if (irq) {
2875e86d1aa8SWill Deacon 		/*
2876e86d1aa8SWill Deacon 		 * Cavium ThunderX2 implementation doesn't support unique irq
2877e86d1aa8SWill Deacon 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
2878e86d1aa8SWill Deacon 		 */
2879e86d1aa8SWill Deacon 		ret = devm_request_threaded_irq(smmu->dev, irq,
2880e86d1aa8SWill Deacon 					arm_smmu_combined_irq_handler,
2881e86d1aa8SWill Deacon 					arm_smmu_combined_irq_thread,
2882e86d1aa8SWill Deacon 					IRQF_ONESHOT,
2883e86d1aa8SWill Deacon 					"arm-smmu-v3-combined-irq", smmu);
2884e86d1aa8SWill Deacon 		if (ret < 0)
2885e86d1aa8SWill Deacon 			dev_warn(smmu->dev, "failed to enable combined irq\n");
2886e86d1aa8SWill Deacon 	} else
2887e86d1aa8SWill Deacon 		arm_smmu_setup_unique_irqs(smmu);
2888e86d1aa8SWill Deacon 
2889e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2890e86d1aa8SWill Deacon 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2891e86d1aa8SWill Deacon 
2892e86d1aa8SWill Deacon 	/* Enable interrupt generation on the SMMU */
2893e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2894e86d1aa8SWill Deacon 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2895e86d1aa8SWill Deacon 	if (ret)
2896e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "failed to enable irqs\n");
2897e86d1aa8SWill Deacon 
2898e86d1aa8SWill Deacon 	return 0;
2899e86d1aa8SWill Deacon }
2900e86d1aa8SWill Deacon 
2901e86d1aa8SWill Deacon static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2902e86d1aa8SWill Deacon {
2903e86d1aa8SWill Deacon 	int ret;
2904e86d1aa8SWill Deacon 
2905e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2906e86d1aa8SWill Deacon 	if (ret)
2907e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to clear cr0\n");
2908e86d1aa8SWill Deacon 
2909e86d1aa8SWill Deacon 	return ret;
2910e86d1aa8SWill Deacon }
2911e86d1aa8SWill Deacon 
2912e86d1aa8SWill Deacon static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2913e86d1aa8SWill Deacon {
2914e86d1aa8SWill Deacon 	int ret;
2915e86d1aa8SWill Deacon 	u32 reg, enables;
2916e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd;
2917e86d1aa8SWill Deacon 
2918e86d1aa8SWill Deacon 	/* Clear CR0 and sync (disables SMMU and queue processing) */
2919e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2920e86d1aa8SWill Deacon 	if (reg & CR0_SMMUEN) {
2921e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2922e86d1aa8SWill Deacon 		WARN_ON(is_kdump_kernel() && !disable_bypass);
2923e86d1aa8SWill Deacon 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
2924e86d1aa8SWill Deacon 	}
2925e86d1aa8SWill Deacon 
2926e86d1aa8SWill Deacon 	ret = arm_smmu_device_disable(smmu);
2927e86d1aa8SWill Deacon 	if (ret)
2928e86d1aa8SWill Deacon 		return ret;
2929e86d1aa8SWill Deacon 
2930e86d1aa8SWill Deacon 	/* CR1 (table and queue memory attributes) */
2931e86d1aa8SWill Deacon 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
2932e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
2933e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
2934e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
2935e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
2936e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
2937e86d1aa8SWill Deacon 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2938e86d1aa8SWill Deacon 
2939e86d1aa8SWill Deacon 	/* CR2 (random crap) */
2940e86d1aa8SWill Deacon 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2941e86d1aa8SWill Deacon 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2942e86d1aa8SWill Deacon 
2943e86d1aa8SWill Deacon 	/* Stream table */
2944e86d1aa8SWill Deacon 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
2945e86d1aa8SWill Deacon 		       smmu->base + ARM_SMMU_STRTAB_BASE);
2946e86d1aa8SWill Deacon 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2947e86d1aa8SWill Deacon 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2948e86d1aa8SWill Deacon 
2949e86d1aa8SWill Deacon 	/* Command queue */
2950e86d1aa8SWill Deacon 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2951e86d1aa8SWill Deacon 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2952e86d1aa8SWill Deacon 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2953e86d1aa8SWill Deacon 
2954e86d1aa8SWill Deacon 	enables = CR0_CMDQEN;
2955e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2956e86d1aa8SWill Deacon 				      ARM_SMMU_CR0ACK);
2957e86d1aa8SWill Deacon 	if (ret) {
2958e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to enable command queue\n");
2959e86d1aa8SWill Deacon 		return ret;
2960e86d1aa8SWill Deacon 	}
2961e86d1aa8SWill Deacon 
2962e86d1aa8SWill Deacon 	/* Invalidate any cached configuration */
2963e86d1aa8SWill Deacon 	cmd.opcode = CMDQ_OP_CFGI_ALL;
2964e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2965e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_sync(smmu);
2966e86d1aa8SWill Deacon 
2967e86d1aa8SWill Deacon 	/* Invalidate any stale TLB entries */
2968e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
2969e86d1aa8SWill Deacon 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2970e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2971e86d1aa8SWill Deacon 	}
2972e86d1aa8SWill Deacon 
2973e86d1aa8SWill Deacon 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2974e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2975e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_sync(smmu);
2976e86d1aa8SWill Deacon 
2977e86d1aa8SWill Deacon 	/* Event queue */
2978e86d1aa8SWill Deacon 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2979e86d1aa8SWill Deacon 	writel_relaxed(smmu->evtq.q.llq.prod,
2980e86d1aa8SWill Deacon 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
2981e86d1aa8SWill Deacon 	writel_relaxed(smmu->evtq.q.llq.cons,
2982e86d1aa8SWill Deacon 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
2983e86d1aa8SWill Deacon 
2984e86d1aa8SWill Deacon 	enables |= CR0_EVTQEN;
2985e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2986e86d1aa8SWill Deacon 				      ARM_SMMU_CR0ACK);
2987e86d1aa8SWill Deacon 	if (ret) {
2988e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to enable event queue\n");
2989e86d1aa8SWill Deacon 		return ret;
2990e86d1aa8SWill Deacon 	}
2991e86d1aa8SWill Deacon 
2992e86d1aa8SWill Deacon 	/* PRI queue */
2993e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2994e86d1aa8SWill Deacon 		writeq_relaxed(smmu->priq.q.q_base,
2995e86d1aa8SWill Deacon 			       smmu->base + ARM_SMMU_PRIQ_BASE);
2996e86d1aa8SWill Deacon 		writel_relaxed(smmu->priq.q.llq.prod,
2997e86d1aa8SWill Deacon 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
2998e86d1aa8SWill Deacon 		writel_relaxed(smmu->priq.q.llq.cons,
2999e86d1aa8SWill Deacon 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3000e86d1aa8SWill Deacon 
3001e86d1aa8SWill Deacon 		enables |= CR0_PRIQEN;
3002e86d1aa8SWill Deacon 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3003e86d1aa8SWill Deacon 					      ARM_SMMU_CR0ACK);
3004e86d1aa8SWill Deacon 		if (ret) {
3005e86d1aa8SWill Deacon 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3006e86d1aa8SWill Deacon 			return ret;
3007e86d1aa8SWill Deacon 		}
3008e86d1aa8SWill Deacon 	}
3009e86d1aa8SWill Deacon 
3010e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3011e86d1aa8SWill Deacon 		enables |= CR0_ATSCHK;
3012e86d1aa8SWill Deacon 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3013e86d1aa8SWill Deacon 					      ARM_SMMU_CR0ACK);
3014e86d1aa8SWill Deacon 		if (ret) {
3015e86d1aa8SWill Deacon 			dev_err(smmu->dev, "failed to enable ATS check\n");
3016e86d1aa8SWill Deacon 			return ret;
3017e86d1aa8SWill Deacon 		}
3018e86d1aa8SWill Deacon 	}
3019e86d1aa8SWill Deacon 
3020e86d1aa8SWill Deacon 	ret = arm_smmu_setup_irqs(smmu);
3021e86d1aa8SWill Deacon 	if (ret) {
3022e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to setup irqs\n");
3023e86d1aa8SWill Deacon 		return ret;
3024e86d1aa8SWill Deacon 	}
3025e86d1aa8SWill Deacon 
3026e86d1aa8SWill Deacon 	if (is_kdump_kernel())
3027e86d1aa8SWill Deacon 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3028e86d1aa8SWill Deacon 
3029e86d1aa8SWill Deacon 	/* Enable the SMMU interface, or ensure bypass */
3030e86d1aa8SWill Deacon 	if (!bypass || disable_bypass) {
3031e86d1aa8SWill Deacon 		enables |= CR0_SMMUEN;
3032e86d1aa8SWill Deacon 	} else {
3033e86d1aa8SWill Deacon 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3034e86d1aa8SWill Deacon 		if (ret)
3035e86d1aa8SWill Deacon 			return ret;
3036e86d1aa8SWill Deacon 	}
3037e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3038e86d1aa8SWill Deacon 				      ARM_SMMU_CR0ACK);
3039e86d1aa8SWill Deacon 	if (ret) {
3040e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3041e86d1aa8SWill Deacon 		return ret;
3042e86d1aa8SWill Deacon 	}
3043e86d1aa8SWill Deacon 
3044e86d1aa8SWill Deacon 	return 0;
3045e86d1aa8SWill Deacon }
3046e86d1aa8SWill Deacon 
3047e86d1aa8SWill Deacon static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3048e86d1aa8SWill Deacon {
3049e86d1aa8SWill Deacon 	u32 reg;
3050e86d1aa8SWill Deacon 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3051e86d1aa8SWill Deacon 
3052e86d1aa8SWill Deacon 	/* IDR0 */
3053e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3054e86d1aa8SWill Deacon 
3055e86d1aa8SWill Deacon 	/* 2-level structures */
3056e86d1aa8SWill Deacon 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3057e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3058e86d1aa8SWill Deacon 
3059e86d1aa8SWill Deacon 	if (reg & IDR0_CD2L)
3060e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3061e86d1aa8SWill Deacon 
3062e86d1aa8SWill Deacon 	/*
3063e86d1aa8SWill Deacon 	 * Translation table endianness.
3064e86d1aa8SWill Deacon 	 * We currently require the same endianness as the CPU, but this
3065e86d1aa8SWill Deacon 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3066e86d1aa8SWill Deacon 	 */
3067e86d1aa8SWill Deacon 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3068e86d1aa8SWill Deacon 	case IDR0_TTENDIAN_MIXED:
3069e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3070e86d1aa8SWill Deacon 		break;
3071e86d1aa8SWill Deacon #ifdef __BIG_ENDIAN
3072e86d1aa8SWill Deacon 	case IDR0_TTENDIAN_BE:
3073e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3074e86d1aa8SWill Deacon 		break;
3075e86d1aa8SWill Deacon #else
3076e86d1aa8SWill Deacon 	case IDR0_TTENDIAN_LE:
3077e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3078e86d1aa8SWill Deacon 		break;
3079e86d1aa8SWill Deacon #endif
3080e86d1aa8SWill Deacon 	default:
3081e86d1aa8SWill Deacon 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3082e86d1aa8SWill Deacon 		return -ENXIO;
3083e86d1aa8SWill Deacon 	}
3084e86d1aa8SWill Deacon 
3085e86d1aa8SWill Deacon 	/* Boolean feature flags */
3086e86d1aa8SWill Deacon 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3087e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_PRI;
3088e86d1aa8SWill Deacon 
3089e86d1aa8SWill Deacon 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3090e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_ATS;
3091e86d1aa8SWill Deacon 
3092e86d1aa8SWill Deacon 	if (reg & IDR0_SEV)
3093e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_SEV;
3094e86d1aa8SWill Deacon 
3095bd07a20aSBarry Song 	if (reg & IDR0_MSI) {
3096e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_MSI;
3097bd07a20aSBarry Song 		if (coherent && !disable_msipolling)
3098bd07a20aSBarry Song 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3099bd07a20aSBarry Song 	}
3100e86d1aa8SWill Deacon 
3101e86d1aa8SWill Deacon 	if (reg & IDR0_HYP)
3102e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_HYP;
3103e86d1aa8SWill Deacon 
3104e86d1aa8SWill Deacon 	/*
3105e86d1aa8SWill Deacon 	 * The coherency feature as set by FW is used in preference to the ID
3106e86d1aa8SWill Deacon 	 * register, but warn on mismatch.
3107e86d1aa8SWill Deacon 	 */
3108e86d1aa8SWill Deacon 	if (!!(reg & IDR0_COHACC) != coherent)
3109e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3110e86d1aa8SWill Deacon 			 coherent ? "true" : "false");
3111e86d1aa8SWill Deacon 
3112e86d1aa8SWill Deacon 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3113e86d1aa8SWill Deacon 	case IDR0_STALL_MODEL_FORCE:
3114e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3115df561f66SGustavo A. R. Silva 		fallthrough;
3116e86d1aa8SWill Deacon 	case IDR0_STALL_MODEL_STALL:
3117e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3118e86d1aa8SWill Deacon 	}
3119e86d1aa8SWill Deacon 
3120e86d1aa8SWill Deacon 	if (reg & IDR0_S1P)
3121e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3122e86d1aa8SWill Deacon 
3123e86d1aa8SWill Deacon 	if (reg & IDR0_S2P)
3124e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3125e86d1aa8SWill Deacon 
3126e86d1aa8SWill Deacon 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3127e86d1aa8SWill Deacon 		dev_err(smmu->dev, "no translation support!\n");
3128e86d1aa8SWill Deacon 		return -ENXIO;
3129e86d1aa8SWill Deacon 	}
3130e86d1aa8SWill Deacon 
3131e86d1aa8SWill Deacon 	/* We only support the AArch64 table format at present */
3132e86d1aa8SWill Deacon 	switch (FIELD_GET(IDR0_TTF, reg)) {
3133e86d1aa8SWill Deacon 	case IDR0_TTF_AARCH32_64:
3134e86d1aa8SWill Deacon 		smmu->ias = 40;
3135df561f66SGustavo A. R. Silva 		fallthrough;
3136e86d1aa8SWill Deacon 	case IDR0_TTF_AARCH64:
3137e86d1aa8SWill Deacon 		break;
3138e86d1aa8SWill Deacon 	default:
3139e86d1aa8SWill Deacon 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3140e86d1aa8SWill Deacon 		return -ENXIO;
3141e86d1aa8SWill Deacon 	}
3142e86d1aa8SWill Deacon 
3143e86d1aa8SWill Deacon 	/* ASID/VMID sizes */
3144e86d1aa8SWill Deacon 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3145e86d1aa8SWill Deacon 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3146e86d1aa8SWill Deacon 
3147e86d1aa8SWill Deacon 	/* IDR1 */
3148e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3149e86d1aa8SWill Deacon 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3150e86d1aa8SWill Deacon 		dev_err(smmu->dev, "embedded implementation not supported\n");
3151e86d1aa8SWill Deacon 		return -ENXIO;
3152e86d1aa8SWill Deacon 	}
3153e86d1aa8SWill Deacon 
3154e86d1aa8SWill Deacon 	/* Queue sizes, capped to ensure natural alignment */
3155e86d1aa8SWill Deacon 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3156e86d1aa8SWill Deacon 					     FIELD_GET(IDR1_CMDQS, reg));
3157e86d1aa8SWill Deacon 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3158e86d1aa8SWill Deacon 		/*
3159e86d1aa8SWill Deacon 		 * We don't support splitting up batches, so one batch of
3160e86d1aa8SWill Deacon 		 * commands plus an extra sync needs to fit inside the command
3161e86d1aa8SWill Deacon 		 * queue. There's also no way we can handle the weird alignment
3162e86d1aa8SWill Deacon 		 * restrictions on the base pointer for a unit-length queue.
3163e86d1aa8SWill Deacon 		 */
3164e86d1aa8SWill Deacon 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3165e86d1aa8SWill Deacon 			CMDQ_BATCH_ENTRIES);
3166e86d1aa8SWill Deacon 		return -ENXIO;
3167e86d1aa8SWill Deacon 	}
3168e86d1aa8SWill Deacon 
3169e86d1aa8SWill Deacon 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3170e86d1aa8SWill Deacon 					     FIELD_GET(IDR1_EVTQS, reg));
3171e86d1aa8SWill Deacon 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3172e86d1aa8SWill Deacon 					     FIELD_GET(IDR1_PRIQS, reg));
3173e86d1aa8SWill Deacon 
3174e86d1aa8SWill Deacon 	/* SID/SSID sizes */
3175e86d1aa8SWill Deacon 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3176e86d1aa8SWill Deacon 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3177e86d1aa8SWill Deacon 
3178e86d1aa8SWill Deacon 	/*
3179e86d1aa8SWill Deacon 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3180e86d1aa8SWill Deacon 	 * table, use a linear table instead.
3181e86d1aa8SWill Deacon 	 */
3182e86d1aa8SWill Deacon 	if (smmu->sid_bits <= STRTAB_SPLIT)
3183e86d1aa8SWill Deacon 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3184e86d1aa8SWill Deacon 
3185e86d1aa8SWill Deacon 	/* IDR3 */
3186e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3187e86d1aa8SWill Deacon 	if (FIELD_GET(IDR3_RIL, reg))
3188e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3189e86d1aa8SWill Deacon 
3190e86d1aa8SWill Deacon 	/* IDR5 */
3191e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3192e86d1aa8SWill Deacon 
3193e86d1aa8SWill Deacon 	/* Maximum number of outstanding stalls */
3194e86d1aa8SWill Deacon 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3195e86d1aa8SWill Deacon 
3196e86d1aa8SWill Deacon 	/* Page sizes */
3197e86d1aa8SWill Deacon 	if (reg & IDR5_GRAN64K)
3198e86d1aa8SWill Deacon 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3199e86d1aa8SWill Deacon 	if (reg & IDR5_GRAN16K)
3200e86d1aa8SWill Deacon 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3201e86d1aa8SWill Deacon 	if (reg & IDR5_GRAN4K)
3202e86d1aa8SWill Deacon 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3203e86d1aa8SWill Deacon 
3204e86d1aa8SWill Deacon 	/* Input address size */
3205e86d1aa8SWill Deacon 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3206e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_VAX;
3207e86d1aa8SWill Deacon 
3208e86d1aa8SWill Deacon 	/* Output address size */
3209e86d1aa8SWill Deacon 	switch (FIELD_GET(IDR5_OAS, reg)) {
3210e86d1aa8SWill Deacon 	case IDR5_OAS_32_BIT:
3211e86d1aa8SWill Deacon 		smmu->oas = 32;
3212e86d1aa8SWill Deacon 		break;
3213e86d1aa8SWill Deacon 	case IDR5_OAS_36_BIT:
3214e86d1aa8SWill Deacon 		smmu->oas = 36;
3215e86d1aa8SWill Deacon 		break;
3216e86d1aa8SWill Deacon 	case IDR5_OAS_40_BIT:
3217e86d1aa8SWill Deacon 		smmu->oas = 40;
3218e86d1aa8SWill Deacon 		break;
3219e86d1aa8SWill Deacon 	case IDR5_OAS_42_BIT:
3220e86d1aa8SWill Deacon 		smmu->oas = 42;
3221e86d1aa8SWill Deacon 		break;
3222e86d1aa8SWill Deacon 	case IDR5_OAS_44_BIT:
3223e86d1aa8SWill Deacon 		smmu->oas = 44;
3224e86d1aa8SWill Deacon 		break;
3225e86d1aa8SWill Deacon 	case IDR5_OAS_52_BIT:
3226e86d1aa8SWill Deacon 		smmu->oas = 52;
3227e86d1aa8SWill Deacon 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3228e86d1aa8SWill Deacon 		break;
3229e86d1aa8SWill Deacon 	default:
3230e86d1aa8SWill Deacon 		dev_info(smmu->dev,
3231e86d1aa8SWill Deacon 			"unknown output address size. Truncating to 48-bit\n");
3232df561f66SGustavo A. R. Silva 		fallthrough;
3233e86d1aa8SWill Deacon 	case IDR5_OAS_48_BIT:
3234e86d1aa8SWill Deacon 		smmu->oas = 48;
3235e86d1aa8SWill Deacon 	}
3236e86d1aa8SWill Deacon 
3237e86d1aa8SWill Deacon 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3238e86d1aa8SWill Deacon 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3239e86d1aa8SWill Deacon 	else
3240e86d1aa8SWill Deacon 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3241e86d1aa8SWill Deacon 
3242e86d1aa8SWill Deacon 	/* Set the DMA mask for our table walker */
3243e86d1aa8SWill Deacon 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3244e86d1aa8SWill Deacon 		dev_warn(smmu->dev,
3245e86d1aa8SWill Deacon 			 "failed to set DMA mask for table walker\n");
3246e86d1aa8SWill Deacon 
3247e86d1aa8SWill Deacon 	smmu->ias = max(smmu->ias, smmu->oas);
3248e86d1aa8SWill Deacon 
3249e86d1aa8SWill Deacon 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3250e86d1aa8SWill Deacon 		 smmu->ias, smmu->oas, smmu->features);
3251e86d1aa8SWill Deacon 	return 0;
3252e86d1aa8SWill Deacon }
3253e86d1aa8SWill Deacon 
3254e86d1aa8SWill Deacon #ifdef CONFIG_ACPI
3255e86d1aa8SWill Deacon static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3256e86d1aa8SWill Deacon {
3257e86d1aa8SWill Deacon 	switch (model) {
3258e86d1aa8SWill Deacon 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3259e86d1aa8SWill Deacon 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3260e86d1aa8SWill Deacon 		break;
3261e86d1aa8SWill Deacon 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3262e86d1aa8SWill Deacon 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3263e86d1aa8SWill Deacon 		break;
3264e86d1aa8SWill Deacon 	}
3265e86d1aa8SWill Deacon 
3266e86d1aa8SWill Deacon 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3267e86d1aa8SWill Deacon }
3268e86d1aa8SWill Deacon 
3269e86d1aa8SWill Deacon static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3270e86d1aa8SWill Deacon 				      struct arm_smmu_device *smmu)
3271e86d1aa8SWill Deacon {
3272e86d1aa8SWill Deacon 	struct acpi_iort_smmu_v3 *iort_smmu;
3273e86d1aa8SWill Deacon 	struct device *dev = smmu->dev;
3274e86d1aa8SWill Deacon 	struct acpi_iort_node *node;
3275e86d1aa8SWill Deacon 
3276e86d1aa8SWill Deacon 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3277e86d1aa8SWill Deacon 
3278e86d1aa8SWill Deacon 	/* Retrieve SMMUv3 specific data */
3279e86d1aa8SWill Deacon 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3280e86d1aa8SWill Deacon 
3281e86d1aa8SWill Deacon 	acpi_smmu_get_options(iort_smmu->model, smmu);
3282e86d1aa8SWill Deacon 
3283e86d1aa8SWill Deacon 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3284e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3285e86d1aa8SWill Deacon 
3286e86d1aa8SWill Deacon 	return 0;
3287e86d1aa8SWill Deacon }
3288e86d1aa8SWill Deacon #else
3289e86d1aa8SWill Deacon static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3290e86d1aa8SWill Deacon 					     struct arm_smmu_device *smmu)
3291e86d1aa8SWill Deacon {
3292e86d1aa8SWill Deacon 	return -ENODEV;
3293e86d1aa8SWill Deacon }
3294e86d1aa8SWill Deacon #endif
3295e86d1aa8SWill Deacon 
3296e86d1aa8SWill Deacon static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3297e86d1aa8SWill Deacon 				    struct arm_smmu_device *smmu)
3298e86d1aa8SWill Deacon {
3299e86d1aa8SWill Deacon 	struct device *dev = &pdev->dev;
3300e86d1aa8SWill Deacon 	u32 cells;
3301e86d1aa8SWill Deacon 	int ret = -EINVAL;
3302e86d1aa8SWill Deacon 
3303e86d1aa8SWill Deacon 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3304e86d1aa8SWill Deacon 		dev_err(dev, "missing #iommu-cells property\n");
3305e86d1aa8SWill Deacon 	else if (cells != 1)
3306e86d1aa8SWill Deacon 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3307e86d1aa8SWill Deacon 	else
3308e86d1aa8SWill Deacon 		ret = 0;
3309e86d1aa8SWill Deacon 
3310e86d1aa8SWill Deacon 	parse_driver_options(smmu);
3311e86d1aa8SWill Deacon 
3312e86d1aa8SWill Deacon 	if (of_dma_is_coherent(dev->of_node))
3313e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3314e86d1aa8SWill Deacon 
3315e86d1aa8SWill Deacon 	return ret;
3316e86d1aa8SWill Deacon }
3317e86d1aa8SWill Deacon 
3318e86d1aa8SWill Deacon static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3319e86d1aa8SWill Deacon {
3320e86d1aa8SWill Deacon 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3321e86d1aa8SWill Deacon 		return SZ_64K;
3322e86d1aa8SWill Deacon 	else
3323e86d1aa8SWill Deacon 		return SZ_128K;
3324e86d1aa8SWill Deacon }
3325e86d1aa8SWill Deacon 
3326e86d1aa8SWill Deacon static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3327e86d1aa8SWill Deacon {
3328e86d1aa8SWill Deacon 	int err;
3329e86d1aa8SWill Deacon 
3330e86d1aa8SWill Deacon #ifdef CONFIG_PCI
3331e86d1aa8SWill Deacon 	if (pci_bus_type.iommu_ops != ops) {
3332e86d1aa8SWill Deacon 		err = bus_set_iommu(&pci_bus_type, ops);
3333e86d1aa8SWill Deacon 		if (err)
3334e86d1aa8SWill Deacon 			return err;
3335e86d1aa8SWill Deacon 	}
3336e86d1aa8SWill Deacon #endif
3337e86d1aa8SWill Deacon #ifdef CONFIG_ARM_AMBA
3338e86d1aa8SWill Deacon 	if (amba_bustype.iommu_ops != ops) {
3339e86d1aa8SWill Deacon 		err = bus_set_iommu(&amba_bustype, ops);
3340e86d1aa8SWill Deacon 		if (err)
3341e86d1aa8SWill Deacon 			goto err_reset_pci_ops;
3342e86d1aa8SWill Deacon 	}
3343e86d1aa8SWill Deacon #endif
3344e86d1aa8SWill Deacon 	if (platform_bus_type.iommu_ops != ops) {
3345e86d1aa8SWill Deacon 		err = bus_set_iommu(&platform_bus_type, ops);
3346e86d1aa8SWill Deacon 		if (err)
3347e86d1aa8SWill Deacon 			goto err_reset_amba_ops;
3348e86d1aa8SWill Deacon 	}
3349e86d1aa8SWill Deacon 
3350e86d1aa8SWill Deacon 	return 0;
3351e86d1aa8SWill Deacon 
3352e86d1aa8SWill Deacon err_reset_amba_ops:
3353e86d1aa8SWill Deacon #ifdef CONFIG_ARM_AMBA
3354e86d1aa8SWill Deacon 	bus_set_iommu(&amba_bustype, NULL);
3355e86d1aa8SWill Deacon #endif
3356e86d1aa8SWill Deacon err_reset_pci_ops: __maybe_unused;
3357e86d1aa8SWill Deacon #ifdef CONFIG_PCI
3358e86d1aa8SWill Deacon 	bus_set_iommu(&pci_bus_type, NULL);
3359e86d1aa8SWill Deacon #endif
3360e86d1aa8SWill Deacon 	return err;
3361e86d1aa8SWill Deacon }
3362e86d1aa8SWill Deacon 
3363e86d1aa8SWill Deacon static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3364e86d1aa8SWill Deacon 				      resource_size_t size)
3365e86d1aa8SWill Deacon {
3366e86d1aa8SWill Deacon 	struct resource res = {
3367e86d1aa8SWill Deacon 		.flags = IORESOURCE_MEM,
3368e86d1aa8SWill Deacon 		.start = start,
3369e86d1aa8SWill Deacon 		.end = start + size - 1,
3370e86d1aa8SWill Deacon 	};
3371e86d1aa8SWill Deacon 
3372e86d1aa8SWill Deacon 	return devm_ioremap_resource(dev, &res);
3373e86d1aa8SWill Deacon }
3374e86d1aa8SWill Deacon 
3375e86d1aa8SWill Deacon static int arm_smmu_device_probe(struct platform_device *pdev)
3376e86d1aa8SWill Deacon {
3377e86d1aa8SWill Deacon 	int irq, ret;
3378e86d1aa8SWill Deacon 	struct resource *res;
3379e86d1aa8SWill Deacon 	resource_size_t ioaddr;
3380e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu;
3381e86d1aa8SWill Deacon 	struct device *dev = &pdev->dev;
3382e86d1aa8SWill Deacon 	bool bypass;
3383e86d1aa8SWill Deacon 
3384e86d1aa8SWill Deacon 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3385e86d1aa8SWill Deacon 	if (!smmu) {
3386e86d1aa8SWill Deacon 		dev_err(dev, "failed to allocate arm_smmu_device\n");
3387e86d1aa8SWill Deacon 		return -ENOMEM;
3388e86d1aa8SWill Deacon 	}
3389e86d1aa8SWill Deacon 	smmu->dev = dev;
3390e86d1aa8SWill Deacon 
3391e86d1aa8SWill Deacon 	if (dev->of_node) {
3392e86d1aa8SWill Deacon 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3393e86d1aa8SWill Deacon 	} else {
3394e86d1aa8SWill Deacon 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3395e86d1aa8SWill Deacon 		if (ret == -ENODEV)
3396e86d1aa8SWill Deacon 			return ret;
3397e86d1aa8SWill Deacon 	}
3398e86d1aa8SWill Deacon 
3399e86d1aa8SWill Deacon 	/* Set bypass mode according to firmware probing result */
3400e86d1aa8SWill Deacon 	bypass = !!ret;
3401e86d1aa8SWill Deacon 
3402e86d1aa8SWill Deacon 	/* Base address */
3403e86d1aa8SWill Deacon 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3404e86d1aa8SWill Deacon 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3405e86d1aa8SWill Deacon 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3406e86d1aa8SWill Deacon 		return -EINVAL;
3407e86d1aa8SWill Deacon 	}
3408e86d1aa8SWill Deacon 	ioaddr = res->start;
3409e86d1aa8SWill Deacon 
3410e86d1aa8SWill Deacon 	/*
3411e86d1aa8SWill Deacon 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3412e86d1aa8SWill Deacon 	 * the PMCG registers which are reserved by the PMU driver.
3413e86d1aa8SWill Deacon 	 */
3414e86d1aa8SWill Deacon 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3415e86d1aa8SWill Deacon 	if (IS_ERR(smmu->base))
3416e86d1aa8SWill Deacon 		return PTR_ERR(smmu->base);
3417e86d1aa8SWill Deacon 
3418e86d1aa8SWill Deacon 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3419e86d1aa8SWill Deacon 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3420e86d1aa8SWill Deacon 					       ARM_SMMU_REG_SZ);
3421e86d1aa8SWill Deacon 		if (IS_ERR(smmu->page1))
3422e86d1aa8SWill Deacon 			return PTR_ERR(smmu->page1);
3423e86d1aa8SWill Deacon 	} else {
3424e86d1aa8SWill Deacon 		smmu->page1 = smmu->base;
3425e86d1aa8SWill Deacon 	}
3426e86d1aa8SWill Deacon 
3427e86d1aa8SWill Deacon 	/* Interrupt lines */
3428e86d1aa8SWill Deacon 
3429e86d1aa8SWill Deacon 	irq = platform_get_irq_byname_optional(pdev, "combined");
3430e86d1aa8SWill Deacon 	if (irq > 0)
3431e86d1aa8SWill Deacon 		smmu->combined_irq = irq;
3432e86d1aa8SWill Deacon 	else {
3433e86d1aa8SWill Deacon 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3434e86d1aa8SWill Deacon 		if (irq > 0)
3435e86d1aa8SWill Deacon 			smmu->evtq.q.irq = irq;
3436e86d1aa8SWill Deacon 
3437e86d1aa8SWill Deacon 		irq = platform_get_irq_byname_optional(pdev, "priq");
3438e86d1aa8SWill Deacon 		if (irq > 0)
3439e86d1aa8SWill Deacon 			smmu->priq.q.irq = irq;
3440e86d1aa8SWill Deacon 
3441e86d1aa8SWill Deacon 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3442e86d1aa8SWill Deacon 		if (irq > 0)
3443e86d1aa8SWill Deacon 			smmu->gerr_irq = irq;
3444e86d1aa8SWill Deacon 	}
3445e86d1aa8SWill Deacon 	/* Probe the h/w */
3446e86d1aa8SWill Deacon 	ret = arm_smmu_device_hw_probe(smmu);
3447e86d1aa8SWill Deacon 	if (ret)
3448e86d1aa8SWill Deacon 		return ret;
3449e86d1aa8SWill Deacon 
3450e86d1aa8SWill Deacon 	/* Initialise in-memory data structures */
3451e86d1aa8SWill Deacon 	ret = arm_smmu_init_structures(smmu);
3452e86d1aa8SWill Deacon 	if (ret)
3453e86d1aa8SWill Deacon 		return ret;
3454e86d1aa8SWill Deacon 
3455e86d1aa8SWill Deacon 	/* Record our private device structure */
3456e86d1aa8SWill Deacon 	platform_set_drvdata(pdev, smmu);
3457e86d1aa8SWill Deacon 
3458e86d1aa8SWill Deacon 	/* Reset the device */
3459e86d1aa8SWill Deacon 	ret = arm_smmu_device_reset(smmu, bypass);
3460e86d1aa8SWill Deacon 	if (ret)
3461e86d1aa8SWill Deacon 		return ret;
3462e86d1aa8SWill Deacon 
3463e86d1aa8SWill Deacon 	/* And we're up. Go go go! */
3464e86d1aa8SWill Deacon 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3465e86d1aa8SWill Deacon 				     "smmu3.%pa", &ioaddr);
3466e86d1aa8SWill Deacon 	if (ret)
3467e86d1aa8SWill Deacon 		return ret;
3468e86d1aa8SWill Deacon 
3469e86d1aa8SWill Deacon 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3470e86d1aa8SWill Deacon 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3471e86d1aa8SWill Deacon 
3472e86d1aa8SWill Deacon 	ret = iommu_device_register(&smmu->iommu);
3473e86d1aa8SWill Deacon 	if (ret) {
3474e86d1aa8SWill Deacon 		dev_err(dev, "Failed to register iommu\n");
3475e86d1aa8SWill Deacon 		return ret;
3476e86d1aa8SWill Deacon 	}
3477e86d1aa8SWill Deacon 
3478e86d1aa8SWill Deacon 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
3479e86d1aa8SWill Deacon }
3480e86d1aa8SWill Deacon 
3481e86d1aa8SWill Deacon static int arm_smmu_device_remove(struct platform_device *pdev)
3482e86d1aa8SWill Deacon {
3483e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3484e86d1aa8SWill Deacon 
3485e86d1aa8SWill Deacon 	arm_smmu_set_bus_ops(NULL);
3486e86d1aa8SWill Deacon 	iommu_device_unregister(&smmu->iommu);
3487e86d1aa8SWill Deacon 	iommu_device_sysfs_remove(&smmu->iommu);
3488e86d1aa8SWill Deacon 	arm_smmu_device_disable(smmu);
3489e86d1aa8SWill Deacon 
3490e86d1aa8SWill Deacon 	return 0;
3491e86d1aa8SWill Deacon }
3492e86d1aa8SWill Deacon 
3493e86d1aa8SWill Deacon static void arm_smmu_device_shutdown(struct platform_device *pdev)
3494e86d1aa8SWill Deacon {
3495e86d1aa8SWill Deacon 	arm_smmu_device_remove(pdev);
3496e86d1aa8SWill Deacon }
3497e86d1aa8SWill Deacon 
3498e86d1aa8SWill Deacon static const struct of_device_id arm_smmu_of_match[] = {
3499e86d1aa8SWill Deacon 	{ .compatible = "arm,smmu-v3", },
3500e86d1aa8SWill Deacon 	{ },
3501e86d1aa8SWill Deacon };
3502e86d1aa8SWill Deacon MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3503e86d1aa8SWill Deacon 
3504e86d1aa8SWill Deacon static struct platform_driver arm_smmu_driver = {
3505e86d1aa8SWill Deacon 	.driver	= {
3506e86d1aa8SWill Deacon 		.name			= "arm-smmu-v3",
3507e86d1aa8SWill Deacon 		.of_match_table		= arm_smmu_of_match,
3508e86d1aa8SWill Deacon 		.suppress_bind_attrs	= true,
3509e86d1aa8SWill Deacon 	},
3510e86d1aa8SWill Deacon 	.probe	= arm_smmu_device_probe,
3511e86d1aa8SWill Deacon 	.remove	= arm_smmu_device_remove,
3512e86d1aa8SWill Deacon 	.shutdown = arm_smmu_device_shutdown,
3513e86d1aa8SWill Deacon };
3514e86d1aa8SWill Deacon module_platform_driver(arm_smmu_driver);
3515e86d1aa8SWill Deacon 
3516e86d1aa8SWill Deacon MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3517e86d1aa8SWill Deacon MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3518e86d1aa8SWill Deacon MODULE_ALIAS("platform:arm-smmu-v3");
3519e86d1aa8SWill Deacon MODULE_LICENSE("GPL v2");
3520