xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 376cdf66f62444a1b4ebb393bab9fc44966f9b49)
1e86d1aa8SWill Deacon // SPDX-License-Identifier: GPL-2.0
2e86d1aa8SWill Deacon /*
3e86d1aa8SWill Deacon  * IOMMU API for ARM architected SMMUv3 implementations.
4e86d1aa8SWill Deacon  *
5e86d1aa8SWill Deacon  * Copyright (C) 2015 ARM Limited
6e86d1aa8SWill Deacon  *
7e86d1aa8SWill Deacon  * Author: Will Deacon <will.deacon@arm.com>
8e86d1aa8SWill Deacon  *
9e86d1aa8SWill Deacon  * This driver is powered by bad coffee and bombay mix.
10e86d1aa8SWill Deacon  */
11e86d1aa8SWill Deacon 
12e86d1aa8SWill Deacon #include <linux/acpi.h>
13e86d1aa8SWill Deacon #include <linux/acpi_iort.h>
14e86d1aa8SWill Deacon #include <linux/bitfield.h>
15e86d1aa8SWill Deacon #include <linux/bitops.h>
16e86d1aa8SWill Deacon #include <linux/crash_dump.h>
17e86d1aa8SWill Deacon #include <linux/delay.h>
18e86d1aa8SWill Deacon #include <linux/dma-iommu.h>
19e86d1aa8SWill Deacon #include <linux/err.h>
20e86d1aa8SWill Deacon #include <linux/interrupt.h>
21e86d1aa8SWill Deacon #include <linux/io-pgtable.h>
22e86d1aa8SWill Deacon #include <linux/iommu.h>
23e86d1aa8SWill Deacon #include <linux/iopoll.h>
24e86d1aa8SWill Deacon #include <linux/module.h>
25e86d1aa8SWill Deacon #include <linux/msi.h>
26e86d1aa8SWill Deacon #include <linux/of.h>
27e86d1aa8SWill Deacon #include <linux/of_address.h>
28e86d1aa8SWill Deacon #include <linux/of_iommu.h>
29e86d1aa8SWill Deacon #include <linux/of_platform.h>
30e86d1aa8SWill Deacon #include <linux/pci.h>
31e86d1aa8SWill Deacon #include <linux/pci-ats.h>
32e86d1aa8SWill Deacon #include <linux/platform_device.h>
33e86d1aa8SWill Deacon 
34e86d1aa8SWill Deacon #include <linux/amba/bus.h>
35e86d1aa8SWill Deacon 
36e86d1aa8SWill Deacon /* MMIO registers */
37e86d1aa8SWill Deacon #define ARM_SMMU_IDR0			0x0
38e86d1aa8SWill Deacon #define IDR0_ST_LVL			GENMASK(28, 27)
39e86d1aa8SWill Deacon #define IDR0_ST_LVL_2LVL		1
40e86d1aa8SWill Deacon #define IDR0_STALL_MODEL		GENMASK(25, 24)
41e86d1aa8SWill Deacon #define IDR0_STALL_MODEL_STALL		0
42e86d1aa8SWill Deacon #define IDR0_STALL_MODEL_FORCE		2
43e86d1aa8SWill Deacon #define IDR0_TTENDIAN			GENMASK(22, 21)
44e86d1aa8SWill Deacon #define IDR0_TTENDIAN_MIXED		0
45e86d1aa8SWill Deacon #define IDR0_TTENDIAN_LE		2
46e86d1aa8SWill Deacon #define IDR0_TTENDIAN_BE		3
47e86d1aa8SWill Deacon #define IDR0_CD2L			(1 << 19)
48e86d1aa8SWill Deacon #define IDR0_VMID16			(1 << 18)
49e86d1aa8SWill Deacon #define IDR0_PRI			(1 << 16)
50e86d1aa8SWill Deacon #define IDR0_SEV			(1 << 14)
51e86d1aa8SWill Deacon #define IDR0_MSI			(1 << 13)
52e86d1aa8SWill Deacon #define IDR0_ASID16			(1 << 12)
53e86d1aa8SWill Deacon #define IDR0_ATS			(1 << 10)
54e86d1aa8SWill Deacon #define IDR0_HYP			(1 << 9)
55e86d1aa8SWill Deacon #define IDR0_COHACC			(1 << 4)
56e86d1aa8SWill Deacon #define IDR0_TTF			GENMASK(3, 2)
57e86d1aa8SWill Deacon #define IDR0_TTF_AARCH64		2
58e86d1aa8SWill Deacon #define IDR0_TTF_AARCH32_64		3
59e86d1aa8SWill Deacon #define IDR0_S1P			(1 << 1)
60e86d1aa8SWill Deacon #define IDR0_S2P			(1 << 0)
61e86d1aa8SWill Deacon 
62e86d1aa8SWill Deacon #define ARM_SMMU_IDR1			0x4
63e86d1aa8SWill Deacon #define IDR1_TABLES_PRESET		(1 << 30)
64e86d1aa8SWill Deacon #define IDR1_QUEUES_PRESET		(1 << 29)
65e86d1aa8SWill Deacon #define IDR1_REL			(1 << 28)
66e86d1aa8SWill Deacon #define IDR1_CMDQS			GENMASK(25, 21)
67e86d1aa8SWill Deacon #define IDR1_EVTQS			GENMASK(20, 16)
68e86d1aa8SWill Deacon #define IDR1_PRIQS			GENMASK(15, 11)
69e86d1aa8SWill Deacon #define IDR1_SSIDSIZE			GENMASK(10, 6)
70e86d1aa8SWill Deacon #define IDR1_SIDSIZE			GENMASK(5, 0)
71e86d1aa8SWill Deacon 
72e86d1aa8SWill Deacon #define ARM_SMMU_IDR3			0xc
73e86d1aa8SWill Deacon #define IDR3_RIL			(1 << 10)
74e86d1aa8SWill Deacon 
75e86d1aa8SWill Deacon #define ARM_SMMU_IDR5			0x14
76e86d1aa8SWill Deacon #define IDR5_STALL_MAX			GENMASK(31, 16)
77e86d1aa8SWill Deacon #define IDR5_GRAN64K			(1 << 6)
78e86d1aa8SWill Deacon #define IDR5_GRAN16K			(1 << 5)
79e86d1aa8SWill Deacon #define IDR5_GRAN4K			(1 << 4)
80e86d1aa8SWill Deacon #define IDR5_OAS			GENMASK(2, 0)
81e86d1aa8SWill Deacon #define IDR5_OAS_32_BIT			0
82e86d1aa8SWill Deacon #define IDR5_OAS_36_BIT			1
83e86d1aa8SWill Deacon #define IDR5_OAS_40_BIT			2
84e86d1aa8SWill Deacon #define IDR5_OAS_42_BIT			3
85e86d1aa8SWill Deacon #define IDR5_OAS_44_BIT			4
86e86d1aa8SWill Deacon #define IDR5_OAS_48_BIT			5
87e86d1aa8SWill Deacon #define IDR5_OAS_52_BIT			6
88e86d1aa8SWill Deacon #define IDR5_VAX			GENMASK(11, 10)
89e86d1aa8SWill Deacon #define IDR5_VAX_52_BIT			1
90e86d1aa8SWill Deacon 
91e86d1aa8SWill Deacon #define ARM_SMMU_CR0			0x20
92e86d1aa8SWill Deacon #define CR0_ATSCHK			(1 << 4)
93e86d1aa8SWill Deacon #define CR0_CMDQEN			(1 << 3)
94e86d1aa8SWill Deacon #define CR0_EVTQEN			(1 << 2)
95e86d1aa8SWill Deacon #define CR0_PRIQEN			(1 << 1)
96e86d1aa8SWill Deacon #define CR0_SMMUEN			(1 << 0)
97e86d1aa8SWill Deacon 
98e86d1aa8SWill Deacon #define ARM_SMMU_CR0ACK			0x24
99e86d1aa8SWill Deacon 
100e86d1aa8SWill Deacon #define ARM_SMMU_CR1			0x28
101e86d1aa8SWill Deacon #define CR1_TABLE_SH			GENMASK(11, 10)
102e86d1aa8SWill Deacon #define CR1_TABLE_OC			GENMASK(9, 8)
103e86d1aa8SWill Deacon #define CR1_TABLE_IC			GENMASK(7, 6)
104e86d1aa8SWill Deacon #define CR1_QUEUE_SH			GENMASK(5, 4)
105e86d1aa8SWill Deacon #define CR1_QUEUE_OC			GENMASK(3, 2)
106e86d1aa8SWill Deacon #define CR1_QUEUE_IC			GENMASK(1, 0)
107e86d1aa8SWill Deacon /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
108e86d1aa8SWill Deacon #define CR1_CACHE_NC			0
109e86d1aa8SWill Deacon #define CR1_CACHE_WB			1
110e86d1aa8SWill Deacon #define CR1_CACHE_WT			2
111e86d1aa8SWill Deacon 
112e86d1aa8SWill Deacon #define ARM_SMMU_CR2			0x2c
113e86d1aa8SWill Deacon #define CR2_PTM				(1 << 2)
114e86d1aa8SWill Deacon #define CR2_RECINVSID			(1 << 1)
115e86d1aa8SWill Deacon #define CR2_E2H				(1 << 0)
116e86d1aa8SWill Deacon 
117e86d1aa8SWill Deacon #define ARM_SMMU_GBPA			0x44
118e86d1aa8SWill Deacon #define GBPA_UPDATE			(1 << 31)
119e86d1aa8SWill Deacon #define GBPA_ABORT			(1 << 20)
120e86d1aa8SWill Deacon 
121e86d1aa8SWill Deacon #define ARM_SMMU_IRQ_CTRL		0x50
122e86d1aa8SWill Deacon #define IRQ_CTRL_EVTQ_IRQEN		(1 << 2)
123e86d1aa8SWill Deacon #define IRQ_CTRL_PRIQ_IRQEN		(1 << 1)
124e86d1aa8SWill Deacon #define IRQ_CTRL_GERROR_IRQEN		(1 << 0)
125e86d1aa8SWill Deacon 
126e86d1aa8SWill Deacon #define ARM_SMMU_IRQ_CTRLACK		0x54
127e86d1aa8SWill Deacon 
128e86d1aa8SWill Deacon #define ARM_SMMU_GERROR			0x60
129e86d1aa8SWill Deacon #define GERROR_SFM_ERR			(1 << 8)
130e86d1aa8SWill Deacon #define GERROR_MSI_GERROR_ABT_ERR	(1 << 7)
131e86d1aa8SWill Deacon #define GERROR_MSI_PRIQ_ABT_ERR		(1 << 6)
132e86d1aa8SWill Deacon #define GERROR_MSI_EVTQ_ABT_ERR		(1 << 5)
133e86d1aa8SWill Deacon #define GERROR_MSI_CMDQ_ABT_ERR		(1 << 4)
134e86d1aa8SWill Deacon #define GERROR_PRIQ_ABT_ERR		(1 << 3)
135e86d1aa8SWill Deacon #define GERROR_EVTQ_ABT_ERR		(1 << 2)
136e86d1aa8SWill Deacon #define GERROR_CMDQ_ERR			(1 << 0)
137e86d1aa8SWill Deacon #define GERROR_ERR_MASK			0xfd
138e86d1aa8SWill Deacon 
139e86d1aa8SWill Deacon #define ARM_SMMU_GERRORN		0x64
140e86d1aa8SWill Deacon 
141e86d1aa8SWill Deacon #define ARM_SMMU_GERROR_IRQ_CFG0	0x68
142e86d1aa8SWill Deacon #define ARM_SMMU_GERROR_IRQ_CFG1	0x70
143e86d1aa8SWill Deacon #define ARM_SMMU_GERROR_IRQ_CFG2	0x74
144e86d1aa8SWill Deacon 
145e86d1aa8SWill Deacon #define ARM_SMMU_STRTAB_BASE		0x80
146e86d1aa8SWill Deacon #define STRTAB_BASE_RA			(1UL << 62)
147e86d1aa8SWill Deacon #define STRTAB_BASE_ADDR_MASK		GENMASK_ULL(51, 6)
148e86d1aa8SWill Deacon 
149e86d1aa8SWill Deacon #define ARM_SMMU_STRTAB_BASE_CFG	0x88
150e86d1aa8SWill Deacon #define STRTAB_BASE_CFG_FMT		GENMASK(17, 16)
151e86d1aa8SWill Deacon #define STRTAB_BASE_CFG_FMT_LINEAR	0
152e86d1aa8SWill Deacon #define STRTAB_BASE_CFG_FMT_2LVL	1
153e86d1aa8SWill Deacon #define STRTAB_BASE_CFG_SPLIT		GENMASK(10, 6)
154e86d1aa8SWill Deacon #define STRTAB_BASE_CFG_LOG2SIZE	GENMASK(5, 0)
155e86d1aa8SWill Deacon 
156e86d1aa8SWill Deacon #define ARM_SMMU_CMDQ_BASE		0x90
157e86d1aa8SWill Deacon #define ARM_SMMU_CMDQ_PROD		0x98
158e86d1aa8SWill Deacon #define ARM_SMMU_CMDQ_CONS		0x9c
159e86d1aa8SWill Deacon 
160e86d1aa8SWill Deacon #define ARM_SMMU_EVTQ_BASE		0xa0
161e86d1aa8SWill Deacon #define ARM_SMMU_EVTQ_PROD		0x100a8
162e86d1aa8SWill Deacon #define ARM_SMMU_EVTQ_CONS		0x100ac
163e86d1aa8SWill Deacon #define ARM_SMMU_EVTQ_IRQ_CFG0		0xb0
164e86d1aa8SWill Deacon #define ARM_SMMU_EVTQ_IRQ_CFG1		0xb8
165e86d1aa8SWill Deacon #define ARM_SMMU_EVTQ_IRQ_CFG2		0xbc
166e86d1aa8SWill Deacon 
167e86d1aa8SWill Deacon #define ARM_SMMU_PRIQ_BASE		0xc0
168e86d1aa8SWill Deacon #define ARM_SMMU_PRIQ_PROD		0x100c8
169e86d1aa8SWill Deacon #define ARM_SMMU_PRIQ_CONS		0x100cc
170e86d1aa8SWill Deacon #define ARM_SMMU_PRIQ_IRQ_CFG0		0xd0
171e86d1aa8SWill Deacon #define ARM_SMMU_PRIQ_IRQ_CFG1		0xd8
172e86d1aa8SWill Deacon #define ARM_SMMU_PRIQ_IRQ_CFG2		0xdc
173e86d1aa8SWill Deacon 
174e86d1aa8SWill Deacon #define ARM_SMMU_REG_SZ			0xe00
175e86d1aa8SWill Deacon 
176e86d1aa8SWill Deacon /* Common MSI config fields */
177e86d1aa8SWill Deacon #define MSI_CFG0_ADDR_MASK		GENMASK_ULL(51, 2)
178e86d1aa8SWill Deacon #define MSI_CFG2_SH			GENMASK(5, 4)
179e86d1aa8SWill Deacon #define MSI_CFG2_MEMATTR		GENMASK(3, 0)
180e86d1aa8SWill Deacon 
181e86d1aa8SWill Deacon /* Common memory attribute values */
182e86d1aa8SWill Deacon #define ARM_SMMU_SH_NSH			0
183e86d1aa8SWill Deacon #define ARM_SMMU_SH_OSH			2
184e86d1aa8SWill Deacon #define ARM_SMMU_SH_ISH			3
185e86d1aa8SWill Deacon #define ARM_SMMU_MEMATTR_DEVICE_nGnRE	0x1
186e86d1aa8SWill Deacon #define ARM_SMMU_MEMATTR_OIWB		0xf
187e86d1aa8SWill Deacon 
188e86d1aa8SWill Deacon #define Q_IDX(llq, p)			((p) & ((1 << (llq)->max_n_shift) - 1))
189e86d1aa8SWill Deacon #define Q_WRP(llq, p)			((p) & (1 << (llq)->max_n_shift))
190e86d1aa8SWill Deacon #define Q_OVERFLOW_FLAG			(1U << 31)
191e86d1aa8SWill Deacon #define Q_OVF(p)			((p) & Q_OVERFLOW_FLAG)
192e86d1aa8SWill Deacon #define Q_ENT(q, p)			((q)->base +			\
193e86d1aa8SWill Deacon 					 Q_IDX(&((q)->llq), p) *	\
194e86d1aa8SWill Deacon 					 (q)->ent_dwords)
195e86d1aa8SWill Deacon 
196e86d1aa8SWill Deacon #define Q_BASE_RWA			(1UL << 62)
197e86d1aa8SWill Deacon #define Q_BASE_ADDR_MASK		GENMASK_ULL(51, 5)
198e86d1aa8SWill Deacon #define Q_BASE_LOG2SIZE			GENMASK(4, 0)
199e86d1aa8SWill Deacon 
200e86d1aa8SWill Deacon /* Ensure DMA allocations are naturally aligned */
201e86d1aa8SWill Deacon #ifdef CONFIG_CMA_ALIGNMENT
202e86d1aa8SWill Deacon #define Q_MAX_SZ_SHIFT			(PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
203e86d1aa8SWill Deacon #else
204e86d1aa8SWill Deacon #define Q_MAX_SZ_SHIFT			(PAGE_SHIFT + MAX_ORDER - 1)
205e86d1aa8SWill Deacon #endif
206e86d1aa8SWill Deacon 
207e86d1aa8SWill Deacon /*
208e86d1aa8SWill Deacon  * Stream table.
209e86d1aa8SWill Deacon  *
210e86d1aa8SWill Deacon  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
211e86d1aa8SWill Deacon  * 2lvl: 128k L1 entries,
212e86d1aa8SWill Deacon  *       256 lazy entries per table (each table covers a PCI bus)
213e86d1aa8SWill Deacon  */
214e86d1aa8SWill Deacon #define STRTAB_L1_SZ_SHIFT		20
215e86d1aa8SWill Deacon #define STRTAB_SPLIT			8
216e86d1aa8SWill Deacon 
217e86d1aa8SWill Deacon #define STRTAB_L1_DESC_DWORDS		1
218e86d1aa8SWill Deacon #define STRTAB_L1_DESC_SPAN		GENMASK_ULL(4, 0)
219e86d1aa8SWill Deacon #define STRTAB_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 6)
220e86d1aa8SWill Deacon 
221e86d1aa8SWill Deacon #define STRTAB_STE_DWORDS		8
222e86d1aa8SWill Deacon #define STRTAB_STE_0_V			(1UL << 0)
223e86d1aa8SWill Deacon #define STRTAB_STE_0_CFG		GENMASK_ULL(3, 1)
224e86d1aa8SWill Deacon #define STRTAB_STE_0_CFG_ABORT		0
225e86d1aa8SWill Deacon #define STRTAB_STE_0_CFG_BYPASS		4
226e86d1aa8SWill Deacon #define STRTAB_STE_0_CFG_S1_TRANS	5
227e86d1aa8SWill Deacon #define STRTAB_STE_0_CFG_S2_TRANS	6
228e86d1aa8SWill Deacon 
229e86d1aa8SWill Deacon #define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)
230e86d1aa8SWill Deacon #define STRTAB_STE_0_S1FMT_LINEAR	0
231e86d1aa8SWill Deacon #define STRTAB_STE_0_S1FMT_64K_L2	2
232e86d1aa8SWill Deacon #define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
233e86d1aa8SWill Deacon #define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
234e86d1aa8SWill Deacon 
235e86d1aa8SWill Deacon #define STRTAB_STE_1_S1DSS		GENMASK_ULL(1, 0)
236e86d1aa8SWill Deacon #define STRTAB_STE_1_S1DSS_TERMINATE	0x0
237e86d1aa8SWill Deacon #define STRTAB_STE_1_S1DSS_BYPASS	0x1
238e86d1aa8SWill Deacon #define STRTAB_STE_1_S1DSS_SSID0	0x2
239e86d1aa8SWill Deacon 
240e86d1aa8SWill Deacon #define STRTAB_STE_1_S1C_CACHE_NC	0UL
241e86d1aa8SWill Deacon #define STRTAB_STE_1_S1C_CACHE_WBRA	1UL
242e86d1aa8SWill Deacon #define STRTAB_STE_1_S1C_CACHE_WT	2UL
243e86d1aa8SWill Deacon #define STRTAB_STE_1_S1C_CACHE_WB	3UL
244e86d1aa8SWill Deacon #define STRTAB_STE_1_S1CIR		GENMASK_ULL(3, 2)
245e86d1aa8SWill Deacon #define STRTAB_STE_1_S1COR		GENMASK_ULL(5, 4)
246e86d1aa8SWill Deacon #define STRTAB_STE_1_S1CSH		GENMASK_ULL(7, 6)
247e86d1aa8SWill Deacon 
248e86d1aa8SWill Deacon #define STRTAB_STE_1_S1STALLD		(1UL << 27)
249e86d1aa8SWill Deacon 
250e86d1aa8SWill Deacon #define STRTAB_STE_1_EATS		GENMASK_ULL(29, 28)
251e86d1aa8SWill Deacon #define STRTAB_STE_1_EATS_ABT		0UL
252e86d1aa8SWill Deacon #define STRTAB_STE_1_EATS_TRANS		1UL
253e86d1aa8SWill Deacon #define STRTAB_STE_1_EATS_S1CHK		2UL
254e86d1aa8SWill Deacon 
255e86d1aa8SWill Deacon #define STRTAB_STE_1_STRW		GENMASK_ULL(31, 30)
256e86d1aa8SWill Deacon #define STRTAB_STE_1_STRW_NSEL1		0UL
257e86d1aa8SWill Deacon #define STRTAB_STE_1_STRW_EL2		2UL
258e86d1aa8SWill Deacon 
259e86d1aa8SWill Deacon #define STRTAB_STE_1_SHCFG		GENMASK_ULL(45, 44)
260e86d1aa8SWill Deacon #define STRTAB_STE_1_SHCFG_INCOMING	1UL
261e86d1aa8SWill Deacon 
262e86d1aa8SWill Deacon #define STRTAB_STE_2_S2VMID		GENMASK_ULL(15, 0)
263e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR		GENMASK_ULL(50, 32)
264e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2T0SZ	GENMASK_ULL(5, 0)
265e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2SL0		GENMASK_ULL(7, 6)
266e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2IR0		GENMASK_ULL(9, 8)
267e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2OR0		GENMASK_ULL(11, 10)
268e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2SH0		GENMASK_ULL(13, 12)
269e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2TG		GENMASK_ULL(15, 14)
270e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2PS		GENMASK_ULL(18, 16)
271e86d1aa8SWill Deacon #define STRTAB_STE_2_S2AA64		(1UL << 51)
272e86d1aa8SWill Deacon #define STRTAB_STE_2_S2ENDI		(1UL << 52)
273e86d1aa8SWill Deacon #define STRTAB_STE_2_S2PTW		(1UL << 54)
274e86d1aa8SWill Deacon #define STRTAB_STE_2_S2R		(1UL << 58)
275e86d1aa8SWill Deacon 
276e86d1aa8SWill Deacon #define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)
277e86d1aa8SWill Deacon 
278e86d1aa8SWill Deacon /*
279e86d1aa8SWill Deacon  * Context descriptors.
280e86d1aa8SWill Deacon  *
281e86d1aa8SWill Deacon  * Linear: when less than 1024 SSIDs are supported
282e86d1aa8SWill Deacon  * 2lvl: at most 1024 L1 entries,
283e86d1aa8SWill Deacon  *       1024 lazy entries per table.
284e86d1aa8SWill Deacon  */
285e86d1aa8SWill Deacon #define CTXDESC_SPLIT			10
286e86d1aa8SWill Deacon #define CTXDESC_L2_ENTRIES		(1 << CTXDESC_SPLIT)
287e86d1aa8SWill Deacon 
288e86d1aa8SWill Deacon #define CTXDESC_L1_DESC_DWORDS		1
289e86d1aa8SWill Deacon #define CTXDESC_L1_DESC_V		(1UL << 0)
290e86d1aa8SWill Deacon #define CTXDESC_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 12)
291e86d1aa8SWill Deacon 
292e86d1aa8SWill Deacon #define CTXDESC_CD_DWORDS		8
293e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
294e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_TG0		GENMASK_ULL(7, 6)
295e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_IRGN0		GENMASK_ULL(9, 8)
296e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_ORGN0		GENMASK_ULL(11, 10)
297e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_SH0		GENMASK_ULL(13, 12)
298e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_EPD0		(1ULL << 14)
299e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_EPD1		(1ULL << 30)
300e86d1aa8SWill Deacon 
301e86d1aa8SWill Deacon #define CTXDESC_CD_0_ENDI		(1UL << 15)
302e86d1aa8SWill Deacon #define CTXDESC_CD_0_V			(1UL << 31)
303e86d1aa8SWill Deacon 
304e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
305e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
306e86d1aa8SWill Deacon 
307e86d1aa8SWill Deacon #define CTXDESC_CD_0_AA64		(1UL << 41)
308e86d1aa8SWill Deacon #define CTXDESC_CD_0_S			(1UL << 44)
309e86d1aa8SWill Deacon #define CTXDESC_CD_0_R			(1UL << 45)
310e86d1aa8SWill Deacon #define CTXDESC_CD_0_A			(1UL << 46)
311e86d1aa8SWill Deacon #define CTXDESC_CD_0_ASET		(1UL << 47)
312e86d1aa8SWill Deacon #define CTXDESC_CD_0_ASID		GENMASK_ULL(63, 48)
313e86d1aa8SWill Deacon 
314e86d1aa8SWill Deacon #define CTXDESC_CD_1_TTB0_MASK		GENMASK_ULL(51, 4)
315e86d1aa8SWill Deacon 
316e86d1aa8SWill Deacon /*
317e86d1aa8SWill Deacon  * When the SMMU only supports linear context descriptor tables, pick a
318e86d1aa8SWill Deacon  * reasonable size limit (64kB).
319e86d1aa8SWill Deacon  */
320e86d1aa8SWill Deacon #define CTXDESC_LINEAR_CDMAX		ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3))
321e86d1aa8SWill Deacon 
322e86d1aa8SWill Deacon /* Command queue */
323e86d1aa8SWill Deacon #define CMDQ_ENT_SZ_SHIFT		4
324e86d1aa8SWill Deacon #define CMDQ_ENT_DWORDS			((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
325e86d1aa8SWill Deacon #define CMDQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
326e86d1aa8SWill Deacon 
327e86d1aa8SWill Deacon #define CMDQ_CONS_ERR			GENMASK(30, 24)
328e86d1aa8SWill Deacon #define CMDQ_ERR_CERROR_NONE_IDX	0
329e86d1aa8SWill Deacon #define CMDQ_ERR_CERROR_ILL_IDX		1
330e86d1aa8SWill Deacon #define CMDQ_ERR_CERROR_ABT_IDX		2
331e86d1aa8SWill Deacon #define CMDQ_ERR_CERROR_ATC_INV_IDX	3
332e86d1aa8SWill Deacon 
333e86d1aa8SWill Deacon #define CMDQ_PROD_OWNED_FLAG		Q_OVERFLOW_FLAG
334e86d1aa8SWill Deacon 
335e86d1aa8SWill Deacon /*
336e86d1aa8SWill Deacon  * This is used to size the command queue and therefore must be at least
337e86d1aa8SWill Deacon  * BITS_PER_LONG so that the valid_map works correctly (it relies on the
338e86d1aa8SWill Deacon  * total number of queue entries being a multiple of BITS_PER_LONG).
339e86d1aa8SWill Deacon  */
340e86d1aa8SWill Deacon #define CMDQ_BATCH_ENTRIES		BITS_PER_LONG
341e86d1aa8SWill Deacon 
342e86d1aa8SWill Deacon #define CMDQ_0_OP			GENMASK_ULL(7, 0)
343e86d1aa8SWill Deacon #define CMDQ_0_SSV			(1UL << 11)
344e86d1aa8SWill Deacon 
345e86d1aa8SWill Deacon #define CMDQ_PREFETCH_0_SID		GENMASK_ULL(63, 32)
346e86d1aa8SWill Deacon #define CMDQ_PREFETCH_1_SIZE		GENMASK_ULL(4, 0)
347e86d1aa8SWill Deacon #define CMDQ_PREFETCH_1_ADDR_MASK	GENMASK_ULL(63, 12)
348e86d1aa8SWill Deacon 
349e86d1aa8SWill Deacon #define CMDQ_CFGI_0_SSID		GENMASK_ULL(31, 12)
350e86d1aa8SWill Deacon #define CMDQ_CFGI_0_SID			GENMASK_ULL(63, 32)
351e86d1aa8SWill Deacon #define CMDQ_CFGI_1_LEAF		(1UL << 0)
352e86d1aa8SWill Deacon #define CMDQ_CFGI_1_RANGE		GENMASK_ULL(4, 0)
353e86d1aa8SWill Deacon 
354e86d1aa8SWill Deacon #define CMDQ_TLBI_0_NUM			GENMASK_ULL(16, 12)
355e86d1aa8SWill Deacon #define CMDQ_TLBI_RANGE_NUM_MAX		31
356e86d1aa8SWill Deacon #define CMDQ_TLBI_0_SCALE		GENMASK_ULL(24, 20)
357e86d1aa8SWill Deacon #define CMDQ_TLBI_0_VMID		GENMASK_ULL(47, 32)
358e86d1aa8SWill Deacon #define CMDQ_TLBI_0_ASID		GENMASK_ULL(63, 48)
359e86d1aa8SWill Deacon #define CMDQ_TLBI_1_LEAF		(1UL << 0)
360e86d1aa8SWill Deacon #define CMDQ_TLBI_1_TTL			GENMASK_ULL(9, 8)
361e86d1aa8SWill Deacon #define CMDQ_TLBI_1_TG			GENMASK_ULL(11, 10)
362e86d1aa8SWill Deacon #define CMDQ_TLBI_1_VA_MASK		GENMASK_ULL(63, 12)
363e86d1aa8SWill Deacon #define CMDQ_TLBI_1_IPA_MASK		GENMASK_ULL(51, 12)
364e86d1aa8SWill Deacon 
365e86d1aa8SWill Deacon #define CMDQ_ATC_0_SSID			GENMASK_ULL(31, 12)
366e86d1aa8SWill Deacon #define CMDQ_ATC_0_SID			GENMASK_ULL(63, 32)
367e86d1aa8SWill Deacon #define CMDQ_ATC_0_GLOBAL		(1UL << 9)
368e86d1aa8SWill Deacon #define CMDQ_ATC_1_SIZE			GENMASK_ULL(5, 0)
369e86d1aa8SWill Deacon #define CMDQ_ATC_1_ADDR_MASK		GENMASK_ULL(63, 12)
370e86d1aa8SWill Deacon 
371e86d1aa8SWill Deacon #define CMDQ_PRI_0_SSID			GENMASK_ULL(31, 12)
372e86d1aa8SWill Deacon #define CMDQ_PRI_0_SID			GENMASK_ULL(63, 32)
373e86d1aa8SWill Deacon #define CMDQ_PRI_1_GRPID		GENMASK_ULL(8, 0)
374e86d1aa8SWill Deacon #define CMDQ_PRI_1_RESP			GENMASK_ULL(13, 12)
375e86d1aa8SWill Deacon 
376e86d1aa8SWill Deacon #define CMDQ_SYNC_0_CS			GENMASK_ULL(13, 12)
377e86d1aa8SWill Deacon #define CMDQ_SYNC_0_CS_NONE		0
378e86d1aa8SWill Deacon #define CMDQ_SYNC_0_CS_IRQ		1
379e86d1aa8SWill Deacon #define CMDQ_SYNC_0_CS_SEV		2
380e86d1aa8SWill Deacon #define CMDQ_SYNC_0_MSH			GENMASK_ULL(23, 22)
381e86d1aa8SWill Deacon #define CMDQ_SYNC_0_MSIATTR		GENMASK_ULL(27, 24)
382e86d1aa8SWill Deacon #define CMDQ_SYNC_0_MSIDATA		GENMASK_ULL(63, 32)
383e86d1aa8SWill Deacon #define CMDQ_SYNC_1_MSIADDR_MASK	GENMASK_ULL(51, 2)
384e86d1aa8SWill Deacon 
385e86d1aa8SWill Deacon /* Event queue */
386e86d1aa8SWill Deacon #define EVTQ_ENT_SZ_SHIFT		5
387e86d1aa8SWill Deacon #define EVTQ_ENT_DWORDS			((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
388e86d1aa8SWill Deacon #define EVTQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
389e86d1aa8SWill Deacon 
390e86d1aa8SWill Deacon #define EVTQ_0_ID			GENMASK_ULL(7, 0)
391e86d1aa8SWill Deacon 
392e86d1aa8SWill Deacon /* PRI queue */
393e86d1aa8SWill Deacon #define PRIQ_ENT_SZ_SHIFT		4
394e86d1aa8SWill Deacon #define PRIQ_ENT_DWORDS			((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
395e86d1aa8SWill Deacon #define PRIQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
396e86d1aa8SWill Deacon 
397e86d1aa8SWill Deacon #define PRIQ_0_SID			GENMASK_ULL(31, 0)
398e86d1aa8SWill Deacon #define PRIQ_0_SSID			GENMASK_ULL(51, 32)
399e86d1aa8SWill Deacon #define PRIQ_0_PERM_PRIV		(1UL << 58)
400e86d1aa8SWill Deacon #define PRIQ_0_PERM_EXEC		(1UL << 59)
401e86d1aa8SWill Deacon #define PRIQ_0_PERM_READ		(1UL << 60)
402e86d1aa8SWill Deacon #define PRIQ_0_PERM_WRITE		(1UL << 61)
403e86d1aa8SWill Deacon #define PRIQ_0_PRG_LAST			(1UL << 62)
404e86d1aa8SWill Deacon #define PRIQ_0_SSID_V			(1UL << 63)
405e86d1aa8SWill Deacon 
406e86d1aa8SWill Deacon #define PRIQ_1_PRG_IDX			GENMASK_ULL(8, 0)
407e86d1aa8SWill Deacon #define PRIQ_1_ADDR_MASK		GENMASK_ULL(63, 12)
408e86d1aa8SWill Deacon 
409e86d1aa8SWill Deacon /* High-level queue structures */
410e86d1aa8SWill Deacon #define ARM_SMMU_POLL_TIMEOUT_US	1000000 /* 1s! */
411e86d1aa8SWill Deacon #define ARM_SMMU_POLL_SPIN_COUNT	10
412e86d1aa8SWill Deacon 
413e86d1aa8SWill Deacon #define MSI_IOVA_BASE			0x8000000
414e86d1aa8SWill Deacon #define MSI_IOVA_LENGTH			0x100000
415e86d1aa8SWill Deacon 
416e86d1aa8SWill Deacon static bool disable_bypass = 1;
4179305d02aSBarry Song module_param(disable_bypass, bool, 0444);
418e86d1aa8SWill Deacon MODULE_PARM_DESC(disable_bypass,
419e86d1aa8SWill Deacon 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
420e86d1aa8SWill Deacon 
421bd07a20aSBarry Song static bool disable_msipolling;
422bd07a20aSBarry Song module_param(disable_msipolling, bool, 0444);
423bd07a20aSBarry Song MODULE_PARM_DESC(disable_msipolling,
424bd07a20aSBarry Song 	"Disable MSI-based polling for CMD_SYNC completion.");
425bd07a20aSBarry Song 
426e86d1aa8SWill Deacon enum pri_resp {
427e86d1aa8SWill Deacon 	PRI_RESP_DENY = 0,
428e86d1aa8SWill Deacon 	PRI_RESP_FAIL = 1,
429e86d1aa8SWill Deacon 	PRI_RESP_SUCC = 2,
430e86d1aa8SWill Deacon };
431e86d1aa8SWill Deacon 
432e86d1aa8SWill Deacon enum arm_smmu_msi_index {
433e86d1aa8SWill Deacon 	EVTQ_MSI_INDEX,
434e86d1aa8SWill Deacon 	GERROR_MSI_INDEX,
435e86d1aa8SWill Deacon 	PRIQ_MSI_INDEX,
436e86d1aa8SWill Deacon 	ARM_SMMU_MAX_MSIS,
437e86d1aa8SWill Deacon };
438e86d1aa8SWill Deacon 
439e86d1aa8SWill Deacon static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
440e86d1aa8SWill Deacon 	[EVTQ_MSI_INDEX] = {
441e86d1aa8SWill Deacon 		ARM_SMMU_EVTQ_IRQ_CFG0,
442e86d1aa8SWill Deacon 		ARM_SMMU_EVTQ_IRQ_CFG1,
443e86d1aa8SWill Deacon 		ARM_SMMU_EVTQ_IRQ_CFG2,
444e86d1aa8SWill Deacon 	},
445e86d1aa8SWill Deacon 	[GERROR_MSI_INDEX] = {
446e86d1aa8SWill Deacon 		ARM_SMMU_GERROR_IRQ_CFG0,
447e86d1aa8SWill Deacon 		ARM_SMMU_GERROR_IRQ_CFG1,
448e86d1aa8SWill Deacon 		ARM_SMMU_GERROR_IRQ_CFG2,
449e86d1aa8SWill Deacon 	},
450e86d1aa8SWill Deacon 	[PRIQ_MSI_INDEX] = {
451e86d1aa8SWill Deacon 		ARM_SMMU_PRIQ_IRQ_CFG0,
452e86d1aa8SWill Deacon 		ARM_SMMU_PRIQ_IRQ_CFG1,
453e86d1aa8SWill Deacon 		ARM_SMMU_PRIQ_IRQ_CFG2,
454e86d1aa8SWill Deacon 	},
455e86d1aa8SWill Deacon };
456e86d1aa8SWill Deacon 
457e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent {
458e86d1aa8SWill Deacon 	/* Common fields */
459e86d1aa8SWill Deacon 	u8				opcode;
460e86d1aa8SWill Deacon 	bool				substream_valid;
461e86d1aa8SWill Deacon 
462e86d1aa8SWill Deacon 	/* Command-specific fields */
463e86d1aa8SWill Deacon 	union {
464e86d1aa8SWill Deacon 		#define CMDQ_OP_PREFETCH_CFG	0x1
465e86d1aa8SWill Deacon 		struct {
466e86d1aa8SWill Deacon 			u32			sid;
467e86d1aa8SWill Deacon 			u8			size;
468e86d1aa8SWill Deacon 			u64			addr;
469e86d1aa8SWill Deacon 		} prefetch;
470e86d1aa8SWill Deacon 
471e86d1aa8SWill Deacon 		#define CMDQ_OP_CFGI_STE	0x3
472e86d1aa8SWill Deacon 		#define CMDQ_OP_CFGI_ALL	0x4
473e86d1aa8SWill Deacon 		#define CMDQ_OP_CFGI_CD		0x5
474e86d1aa8SWill Deacon 		#define CMDQ_OP_CFGI_CD_ALL	0x6
475e86d1aa8SWill Deacon 		struct {
476e86d1aa8SWill Deacon 			u32			sid;
477e86d1aa8SWill Deacon 			u32			ssid;
478e86d1aa8SWill Deacon 			union {
479e86d1aa8SWill Deacon 				bool		leaf;
480e86d1aa8SWill Deacon 				u8		span;
481e86d1aa8SWill Deacon 			};
482e86d1aa8SWill Deacon 		} cfgi;
483e86d1aa8SWill Deacon 
484e86d1aa8SWill Deacon 		#define CMDQ_OP_TLBI_NH_ASID	0x11
485e86d1aa8SWill Deacon 		#define CMDQ_OP_TLBI_NH_VA	0x12
486e86d1aa8SWill Deacon 		#define CMDQ_OP_TLBI_EL2_ALL	0x20
487e86d1aa8SWill Deacon 		#define CMDQ_OP_TLBI_S12_VMALL	0x28
488e86d1aa8SWill Deacon 		#define CMDQ_OP_TLBI_S2_IPA	0x2a
489e86d1aa8SWill Deacon 		#define CMDQ_OP_TLBI_NSNH_ALL	0x30
490e86d1aa8SWill Deacon 		struct {
491e86d1aa8SWill Deacon 			u8			num;
492e86d1aa8SWill Deacon 			u8			scale;
493e86d1aa8SWill Deacon 			u16			asid;
494e86d1aa8SWill Deacon 			u16			vmid;
495e86d1aa8SWill Deacon 			bool			leaf;
496e86d1aa8SWill Deacon 			u8			ttl;
497e86d1aa8SWill Deacon 			u8			tg;
498e86d1aa8SWill Deacon 			u64			addr;
499e86d1aa8SWill Deacon 		} tlbi;
500e86d1aa8SWill Deacon 
501e86d1aa8SWill Deacon 		#define CMDQ_OP_ATC_INV		0x40
502e86d1aa8SWill Deacon 		#define ATC_INV_SIZE_ALL	52
503e86d1aa8SWill Deacon 		struct {
504e86d1aa8SWill Deacon 			u32			sid;
505e86d1aa8SWill Deacon 			u32			ssid;
506e86d1aa8SWill Deacon 			u64			addr;
507e86d1aa8SWill Deacon 			u8			size;
508e86d1aa8SWill Deacon 			bool			global;
509e86d1aa8SWill Deacon 		} atc;
510e86d1aa8SWill Deacon 
511e86d1aa8SWill Deacon 		#define CMDQ_OP_PRI_RESP	0x41
512e86d1aa8SWill Deacon 		struct {
513e86d1aa8SWill Deacon 			u32			sid;
514e86d1aa8SWill Deacon 			u32			ssid;
515e86d1aa8SWill Deacon 			u16			grpid;
516e86d1aa8SWill Deacon 			enum pri_resp		resp;
517e86d1aa8SWill Deacon 		} pri;
518e86d1aa8SWill Deacon 
519e86d1aa8SWill Deacon 		#define CMDQ_OP_CMD_SYNC	0x46
520e86d1aa8SWill Deacon 		struct {
521e86d1aa8SWill Deacon 			u64			msiaddr;
522e86d1aa8SWill Deacon 		} sync;
523e86d1aa8SWill Deacon 	};
524e86d1aa8SWill Deacon };
525e86d1aa8SWill Deacon 
526e86d1aa8SWill Deacon struct arm_smmu_ll_queue {
527e86d1aa8SWill Deacon 	union {
528e86d1aa8SWill Deacon 		u64			val;
529e86d1aa8SWill Deacon 		struct {
530e86d1aa8SWill Deacon 			u32		prod;
531e86d1aa8SWill Deacon 			u32		cons;
532e86d1aa8SWill Deacon 		};
533e86d1aa8SWill Deacon 		struct {
534e86d1aa8SWill Deacon 			atomic_t	prod;
535e86d1aa8SWill Deacon 			atomic_t	cons;
536e86d1aa8SWill Deacon 		} atomic;
537e86d1aa8SWill Deacon 		u8			__pad[SMP_CACHE_BYTES];
538e86d1aa8SWill Deacon 	} ____cacheline_aligned_in_smp;
539e86d1aa8SWill Deacon 	u32				max_n_shift;
540e86d1aa8SWill Deacon };
541e86d1aa8SWill Deacon 
542e86d1aa8SWill Deacon struct arm_smmu_queue {
543e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue	llq;
544e86d1aa8SWill Deacon 	int				irq; /* Wired interrupt */
545e86d1aa8SWill Deacon 
546e86d1aa8SWill Deacon 	__le64				*base;
547e86d1aa8SWill Deacon 	dma_addr_t			base_dma;
548e86d1aa8SWill Deacon 	u64				q_base;
549e86d1aa8SWill Deacon 
550e86d1aa8SWill Deacon 	size_t				ent_dwords;
551e86d1aa8SWill Deacon 
552e86d1aa8SWill Deacon 	u32 __iomem			*prod_reg;
553e86d1aa8SWill Deacon 	u32 __iomem			*cons_reg;
554e86d1aa8SWill Deacon };
555e86d1aa8SWill Deacon 
556e86d1aa8SWill Deacon struct arm_smmu_queue_poll {
557e86d1aa8SWill Deacon 	ktime_t				timeout;
558e86d1aa8SWill Deacon 	unsigned int			delay;
559e86d1aa8SWill Deacon 	unsigned int			spin_cnt;
560e86d1aa8SWill Deacon 	bool				wfe;
561e86d1aa8SWill Deacon };
562e86d1aa8SWill Deacon 
563e86d1aa8SWill Deacon struct arm_smmu_cmdq {
564e86d1aa8SWill Deacon 	struct arm_smmu_queue		q;
565e86d1aa8SWill Deacon 	atomic_long_t			*valid_map;
566e86d1aa8SWill Deacon 	atomic_t			owner_prod;
567e86d1aa8SWill Deacon 	atomic_t			lock;
568e86d1aa8SWill Deacon };
569e86d1aa8SWill Deacon 
570e86d1aa8SWill Deacon struct arm_smmu_cmdq_batch {
571e86d1aa8SWill Deacon 	u64				cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
572e86d1aa8SWill Deacon 	int				num;
573e86d1aa8SWill Deacon };
574e86d1aa8SWill Deacon 
575e86d1aa8SWill Deacon struct arm_smmu_evtq {
576e86d1aa8SWill Deacon 	struct arm_smmu_queue		q;
577e86d1aa8SWill Deacon 	u32				max_stalls;
578e86d1aa8SWill Deacon };
579e86d1aa8SWill Deacon 
580e86d1aa8SWill Deacon struct arm_smmu_priq {
581e86d1aa8SWill Deacon 	struct arm_smmu_queue		q;
582e86d1aa8SWill Deacon };
583e86d1aa8SWill Deacon 
584e86d1aa8SWill Deacon /* High-level stream table and context descriptor structures */
585e86d1aa8SWill Deacon struct arm_smmu_strtab_l1_desc {
586e86d1aa8SWill Deacon 	u8				span;
587e86d1aa8SWill Deacon 
588e86d1aa8SWill Deacon 	__le64				*l2ptr;
589e86d1aa8SWill Deacon 	dma_addr_t			l2ptr_dma;
590e86d1aa8SWill Deacon };
591e86d1aa8SWill Deacon 
592e86d1aa8SWill Deacon struct arm_smmu_ctx_desc {
593e86d1aa8SWill Deacon 	u16				asid;
594e86d1aa8SWill Deacon 	u64				ttbr;
595e86d1aa8SWill Deacon 	u64				tcr;
596e86d1aa8SWill Deacon 	u64				mair;
597e86d1aa8SWill Deacon };
598e86d1aa8SWill Deacon 
599e86d1aa8SWill Deacon struct arm_smmu_l1_ctx_desc {
600e86d1aa8SWill Deacon 	__le64				*l2ptr;
601e86d1aa8SWill Deacon 	dma_addr_t			l2ptr_dma;
602e86d1aa8SWill Deacon };
603e86d1aa8SWill Deacon 
604e86d1aa8SWill Deacon struct arm_smmu_ctx_desc_cfg {
605e86d1aa8SWill Deacon 	__le64				*cdtab;
606e86d1aa8SWill Deacon 	dma_addr_t			cdtab_dma;
607e86d1aa8SWill Deacon 	struct arm_smmu_l1_ctx_desc	*l1_desc;
608e86d1aa8SWill Deacon 	unsigned int			num_l1_ents;
609e86d1aa8SWill Deacon };
610e86d1aa8SWill Deacon 
611e86d1aa8SWill Deacon struct arm_smmu_s1_cfg {
612e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc_cfg	cdcfg;
613e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc	cd;
614e86d1aa8SWill Deacon 	u8				s1fmt;
615e86d1aa8SWill Deacon 	u8				s1cdmax;
616e86d1aa8SWill Deacon };
617e86d1aa8SWill Deacon 
618e86d1aa8SWill Deacon struct arm_smmu_s2_cfg {
619e86d1aa8SWill Deacon 	u16				vmid;
620e86d1aa8SWill Deacon 	u64				vttbr;
621e86d1aa8SWill Deacon 	u64				vtcr;
622e86d1aa8SWill Deacon };
623e86d1aa8SWill Deacon 
624e86d1aa8SWill Deacon struct arm_smmu_strtab_cfg {
625e86d1aa8SWill Deacon 	__le64				*strtab;
626e86d1aa8SWill Deacon 	dma_addr_t			strtab_dma;
627e86d1aa8SWill Deacon 	struct arm_smmu_strtab_l1_desc	*l1_desc;
628e86d1aa8SWill Deacon 	unsigned int			num_l1_ents;
629e86d1aa8SWill Deacon 
630e86d1aa8SWill Deacon 	u64				strtab_base;
631e86d1aa8SWill Deacon 	u32				strtab_base_cfg;
632e86d1aa8SWill Deacon };
633e86d1aa8SWill Deacon 
634e86d1aa8SWill Deacon /* An SMMUv3 instance */
635e86d1aa8SWill Deacon struct arm_smmu_device {
636e86d1aa8SWill Deacon 	struct device			*dev;
637e86d1aa8SWill Deacon 	void __iomem			*base;
638e86d1aa8SWill Deacon 	void __iomem			*page1;
639e86d1aa8SWill Deacon 
640e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_2_LVL_STRTAB	(1 << 0)
641e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_2_LVL_CDTAB	(1 << 1)
642e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_TT_LE		(1 << 2)
643e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_TT_BE		(1 << 3)
644e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_PRI		(1 << 4)
645e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_ATS		(1 << 5)
646e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_SEV		(1 << 6)
647e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_MSI		(1 << 7)
648e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_COHERENCY		(1 << 8)
649e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_TRANS_S1		(1 << 9)
650e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_TRANS_S2		(1 << 10)
651e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_STALLS		(1 << 11)
652e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_HYP		(1 << 12)
653e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_STALL_FORCE	(1 << 13)
654e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_VAX		(1 << 14)
655e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_RANGE_INV		(1 << 15)
656e86d1aa8SWill Deacon 	u32				features;
657e86d1aa8SWill Deacon 
658e86d1aa8SWill Deacon #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
659e86d1aa8SWill Deacon #define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
660bd07a20aSBarry Song #define ARM_SMMU_OPT_MSIPOLL		(1 << 2)
661e86d1aa8SWill Deacon 	u32				options;
662e86d1aa8SWill Deacon 
663e86d1aa8SWill Deacon 	struct arm_smmu_cmdq		cmdq;
664e86d1aa8SWill Deacon 	struct arm_smmu_evtq		evtq;
665e86d1aa8SWill Deacon 	struct arm_smmu_priq		priq;
666e86d1aa8SWill Deacon 
667e86d1aa8SWill Deacon 	int				gerr_irq;
668e86d1aa8SWill Deacon 	int				combined_irq;
669e86d1aa8SWill Deacon 
670e86d1aa8SWill Deacon 	unsigned long			ias; /* IPA */
671e86d1aa8SWill Deacon 	unsigned long			oas; /* PA */
672e86d1aa8SWill Deacon 	unsigned long			pgsize_bitmap;
673e86d1aa8SWill Deacon 
674e86d1aa8SWill Deacon #define ARM_SMMU_MAX_ASIDS		(1 << 16)
675e86d1aa8SWill Deacon 	unsigned int			asid_bits;
676e86d1aa8SWill Deacon 
677e86d1aa8SWill Deacon #define ARM_SMMU_MAX_VMIDS		(1 << 16)
678e86d1aa8SWill Deacon 	unsigned int			vmid_bits;
679e86d1aa8SWill Deacon 	DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
680e86d1aa8SWill Deacon 
681e86d1aa8SWill Deacon 	unsigned int			ssid_bits;
682e86d1aa8SWill Deacon 	unsigned int			sid_bits;
683e86d1aa8SWill Deacon 
684e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg	strtab_cfg;
685e86d1aa8SWill Deacon 
686e86d1aa8SWill Deacon 	/* IOMMU core code handle */
687e86d1aa8SWill Deacon 	struct iommu_device		iommu;
688e86d1aa8SWill Deacon };
689e86d1aa8SWill Deacon 
690e86d1aa8SWill Deacon /* SMMU private data for each master */
691e86d1aa8SWill Deacon struct arm_smmu_master {
692e86d1aa8SWill Deacon 	struct arm_smmu_device		*smmu;
693e86d1aa8SWill Deacon 	struct device			*dev;
694e86d1aa8SWill Deacon 	struct arm_smmu_domain		*domain;
695e86d1aa8SWill Deacon 	struct list_head		domain_head;
696e86d1aa8SWill Deacon 	u32				*sids;
697e86d1aa8SWill Deacon 	unsigned int			num_sids;
698e86d1aa8SWill Deacon 	bool				ats_enabled;
699e86d1aa8SWill Deacon 	unsigned int			ssid_bits;
700e86d1aa8SWill Deacon };
701e86d1aa8SWill Deacon 
702e86d1aa8SWill Deacon /* SMMU private data for an IOMMU domain */
703e86d1aa8SWill Deacon enum arm_smmu_domain_stage {
704e86d1aa8SWill Deacon 	ARM_SMMU_DOMAIN_S1 = 0,
705e86d1aa8SWill Deacon 	ARM_SMMU_DOMAIN_S2,
706e86d1aa8SWill Deacon 	ARM_SMMU_DOMAIN_NESTED,
707e86d1aa8SWill Deacon 	ARM_SMMU_DOMAIN_BYPASS,
708e86d1aa8SWill Deacon };
709e86d1aa8SWill Deacon 
710e86d1aa8SWill Deacon struct arm_smmu_domain {
711e86d1aa8SWill Deacon 	struct arm_smmu_device		*smmu;
712e86d1aa8SWill Deacon 	struct mutex			init_mutex; /* Protects smmu pointer */
713e86d1aa8SWill Deacon 
714e86d1aa8SWill Deacon 	struct io_pgtable_ops		*pgtbl_ops;
715e86d1aa8SWill Deacon 	bool				non_strict;
716e86d1aa8SWill Deacon 	atomic_t			nr_ats_masters;
717e86d1aa8SWill Deacon 
718e86d1aa8SWill Deacon 	enum arm_smmu_domain_stage	stage;
719e86d1aa8SWill Deacon 	union {
720e86d1aa8SWill Deacon 		struct arm_smmu_s1_cfg	s1_cfg;
721e86d1aa8SWill Deacon 		struct arm_smmu_s2_cfg	s2_cfg;
722e86d1aa8SWill Deacon 	};
723e86d1aa8SWill Deacon 
724e86d1aa8SWill Deacon 	struct iommu_domain		domain;
725e86d1aa8SWill Deacon 
726e86d1aa8SWill Deacon 	struct list_head		devices;
727e86d1aa8SWill Deacon 	spinlock_t			devices_lock;
728e86d1aa8SWill Deacon };
729e86d1aa8SWill Deacon 
730e86d1aa8SWill Deacon struct arm_smmu_option_prop {
731e86d1aa8SWill Deacon 	u32 opt;
732e86d1aa8SWill Deacon 	const char *prop;
733e86d1aa8SWill Deacon };
734e86d1aa8SWill Deacon 
735e86d1aa8SWill Deacon static DEFINE_XARRAY_ALLOC1(asid_xa);
736e86d1aa8SWill Deacon 
737e86d1aa8SWill Deacon static struct arm_smmu_option_prop arm_smmu_options[] = {
738e86d1aa8SWill Deacon 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
739e86d1aa8SWill Deacon 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
740e86d1aa8SWill Deacon 	{ 0, NULL},
741e86d1aa8SWill Deacon };
742e86d1aa8SWill Deacon 
743e86d1aa8SWill Deacon static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
744e86d1aa8SWill Deacon 						 struct arm_smmu_device *smmu)
745e86d1aa8SWill Deacon {
746e86d1aa8SWill Deacon 	if (offset > SZ_64K)
747e86d1aa8SWill Deacon 		return smmu->page1 + offset - SZ_64K;
748e86d1aa8SWill Deacon 
749e86d1aa8SWill Deacon 	return smmu->base + offset;
750e86d1aa8SWill Deacon }
751e86d1aa8SWill Deacon 
752e86d1aa8SWill Deacon static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
753e86d1aa8SWill Deacon {
754e86d1aa8SWill Deacon 	return container_of(dom, struct arm_smmu_domain, domain);
755e86d1aa8SWill Deacon }
756e86d1aa8SWill Deacon 
757e86d1aa8SWill Deacon static void parse_driver_options(struct arm_smmu_device *smmu)
758e86d1aa8SWill Deacon {
759e86d1aa8SWill Deacon 	int i = 0;
760e86d1aa8SWill Deacon 
761e86d1aa8SWill Deacon 	do {
762e86d1aa8SWill Deacon 		if (of_property_read_bool(smmu->dev->of_node,
763e86d1aa8SWill Deacon 						arm_smmu_options[i].prop)) {
764e86d1aa8SWill Deacon 			smmu->options |= arm_smmu_options[i].opt;
765e86d1aa8SWill Deacon 			dev_notice(smmu->dev, "option %s\n",
766e86d1aa8SWill Deacon 				arm_smmu_options[i].prop);
767e86d1aa8SWill Deacon 		}
768e86d1aa8SWill Deacon 	} while (arm_smmu_options[++i].opt);
769e86d1aa8SWill Deacon }
770e86d1aa8SWill Deacon 
771e86d1aa8SWill Deacon /* Low-level queue manipulation functions */
772e86d1aa8SWill Deacon static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
773e86d1aa8SWill Deacon {
774e86d1aa8SWill Deacon 	u32 space, prod, cons;
775e86d1aa8SWill Deacon 
776e86d1aa8SWill Deacon 	prod = Q_IDX(q, q->prod);
777e86d1aa8SWill Deacon 	cons = Q_IDX(q, q->cons);
778e86d1aa8SWill Deacon 
779e86d1aa8SWill Deacon 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
780e86d1aa8SWill Deacon 		space = (1 << q->max_n_shift) - (prod - cons);
781e86d1aa8SWill Deacon 	else
782e86d1aa8SWill Deacon 		space = cons - prod;
783e86d1aa8SWill Deacon 
784e86d1aa8SWill Deacon 	return space >= n;
785e86d1aa8SWill Deacon }
786e86d1aa8SWill Deacon 
787e86d1aa8SWill Deacon static bool queue_full(struct arm_smmu_ll_queue *q)
788e86d1aa8SWill Deacon {
789e86d1aa8SWill Deacon 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
790e86d1aa8SWill Deacon 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
791e86d1aa8SWill Deacon }
792e86d1aa8SWill Deacon 
793e86d1aa8SWill Deacon static bool queue_empty(struct arm_smmu_ll_queue *q)
794e86d1aa8SWill Deacon {
795e86d1aa8SWill Deacon 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
796e86d1aa8SWill Deacon 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
797e86d1aa8SWill Deacon }
798e86d1aa8SWill Deacon 
799e86d1aa8SWill Deacon static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
800e86d1aa8SWill Deacon {
801e86d1aa8SWill Deacon 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
802e86d1aa8SWill Deacon 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
803e86d1aa8SWill Deacon 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
804e86d1aa8SWill Deacon 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
805e86d1aa8SWill Deacon }
806e86d1aa8SWill Deacon 
807e86d1aa8SWill Deacon static void queue_sync_cons_out(struct arm_smmu_queue *q)
808e86d1aa8SWill Deacon {
809e86d1aa8SWill Deacon 	/*
810e86d1aa8SWill Deacon 	 * Ensure that all CPU accesses (reads and writes) to the queue
811e86d1aa8SWill Deacon 	 * are complete before we update the cons pointer.
812e86d1aa8SWill Deacon 	 */
813e86d1aa8SWill Deacon 	mb();
814e86d1aa8SWill Deacon 	writel_relaxed(q->llq.cons, q->cons_reg);
815e86d1aa8SWill Deacon }
816e86d1aa8SWill Deacon 
817e86d1aa8SWill Deacon static void queue_inc_cons(struct arm_smmu_ll_queue *q)
818e86d1aa8SWill Deacon {
819e86d1aa8SWill Deacon 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
820e86d1aa8SWill Deacon 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
821e86d1aa8SWill Deacon }
822e86d1aa8SWill Deacon 
823e86d1aa8SWill Deacon static int queue_sync_prod_in(struct arm_smmu_queue *q)
824e86d1aa8SWill Deacon {
825e86d1aa8SWill Deacon 	int ret = 0;
826e86d1aa8SWill Deacon 	u32 prod = readl_relaxed(q->prod_reg);
827e86d1aa8SWill Deacon 
828e86d1aa8SWill Deacon 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
829e86d1aa8SWill Deacon 		ret = -EOVERFLOW;
830e86d1aa8SWill Deacon 
831e86d1aa8SWill Deacon 	q->llq.prod = prod;
832e86d1aa8SWill Deacon 	return ret;
833e86d1aa8SWill Deacon }
834e86d1aa8SWill Deacon 
835e86d1aa8SWill Deacon static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
836e86d1aa8SWill Deacon {
837e86d1aa8SWill Deacon 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
838e86d1aa8SWill Deacon 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
839e86d1aa8SWill Deacon }
840e86d1aa8SWill Deacon 
841e86d1aa8SWill Deacon static void queue_poll_init(struct arm_smmu_device *smmu,
842e86d1aa8SWill Deacon 			    struct arm_smmu_queue_poll *qp)
843e86d1aa8SWill Deacon {
844e86d1aa8SWill Deacon 	qp->delay = 1;
845e86d1aa8SWill Deacon 	qp->spin_cnt = 0;
846e86d1aa8SWill Deacon 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
847e86d1aa8SWill Deacon 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
848e86d1aa8SWill Deacon }
849e86d1aa8SWill Deacon 
850e86d1aa8SWill Deacon static int queue_poll(struct arm_smmu_queue_poll *qp)
851e86d1aa8SWill Deacon {
852e86d1aa8SWill Deacon 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
853e86d1aa8SWill Deacon 		return -ETIMEDOUT;
854e86d1aa8SWill Deacon 
855e86d1aa8SWill Deacon 	if (qp->wfe) {
856e86d1aa8SWill Deacon 		wfe();
857e86d1aa8SWill Deacon 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
858e86d1aa8SWill Deacon 		cpu_relax();
859e86d1aa8SWill Deacon 	} else {
860e86d1aa8SWill Deacon 		udelay(qp->delay);
861e86d1aa8SWill Deacon 		qp->delay *= 2;
862e86d1aa8SWill Deacon 		qp->spin_cnt = 0;
863e86d1aa8SWill Deacon 	}
864e86d1aa8SWill Deacon 
865e86d1aa8SWill Deacon 	return 0;
866e86d1aa8SWill Deacon }
867e86d1aa8SWill Deacon 
868e86d1aa8SWill Deacon static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
869e86d1aa8SWill Deacon {
870e86d1aa8SWill Deacon 	int i;
871e86d1aa8SWill Deacon 
872e86d1aa8SWill Deacon 	for (i = 0; i < n_dwords; ++i)
873e86d1aa8SWill Deacon 		*dst++ = cpu_to_le64(*src++);
874e86d1aa8SWill Deacon }
875e86d1aa8SWill Deacon 
876*376cdf66SJean-Philippe Brucker static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
877e86d1aa8SWill Deacon {
878e86d1aa8SWill Deacon 	int i;
879e86d1aa8SWill Deacon 
880e86d1aa8SWill Deacon 	for (i = 0; i < n_dwords; ++i)
881e86d1aa8SWill Deacon 		*dst++ = le64_to_cpu(*src++);
882e86d1aa8SWill Deacon }
883e86d1aa8SWill Deacon 
884e86d1aa8SWill Deacon static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
885e86d1aa8SWill Deacon {
886e86d1aa8SWill Deacon 	if (queue_empty(&q->llq))
887e86d1aa8SWill Deacon 		return -EAGAIN;
888e86d1aa8SWill Deacon 
889e86d1aa8SWill Deacon 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
890e86d1aa8SWill Deacon 	queue_inc_cons(&q->llq);
891e86d1aa8SWill Deacon 	queue_sync_cons_out(q);
892e86d1aa8SWill Deacon 	return 0;
893e86d1aa8SWill Deacon }
894e86d1aa8SWill Deacon 
895e86d1aa8SWill Deacon /* High-level queue accessors */
896e86d1aa8SWill Deacon static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
897e86d1aa8SWill Deacon {
898e86d1aa8SWill Deacon 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
899e86d1aa8SWill Deacon 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
900e86d1aa8SWill Deacon 
901e86d1aa8SWill Deacon 	switch (ent->opcode) {
902e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_EL2_ALL:
903e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_NSNH_ALL:
904e86d1aa8SWill Deacon 		break;
905e86d1aa8SWill Deacon 	case CMDQ_OP_PREFETCH_CFG:
906e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
907e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
908e86d1aa8SWill Deacon 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
909e86d1aa8SWill Deacon 		break;
910e86d1aa8SWill Deacon 	case CMDQ_OP_CFGI_CD:
911e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
912df561f66SGustavo A. R. Silva 		fallthrough;
913e86d1aa8SWill Deacon 	case CMDQ_OP_CFGI_STE:
914e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
915e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
916e86d1aa8SWill Deacon 		break;
917e86d1aa8SWill Deacon 	case CMDQ_OP_CFGI_CD_ALL:
918e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
919e86d1aa8SWill Deacon 		break;
920e86d1aa8SWill Deacon 	case CMDQ_OP_CFGI_ALL:
921e86d1aa8SWill Deacon 		/* Cover the entire SID range */
922e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
923e86d1aa8SWill Deacon 		break;
924e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_NH_VA:
925e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
926e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
927e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
928e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
929e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
930e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
931e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
932e86d1aa8SWill Deacon 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
933e86d1aa8SWill Deacon 		break;
934e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_S2_IPA:
935e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
936e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
937e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
938e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
939e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
940e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
941e86d1aa8SWill Deacon 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
942e86d1aa8SWill Deacon 		break;
943e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_NH_ASID:
944e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
945df561f66SGustavo A. R. Silva 		fallthrough;
946e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_S12_VMALL:
947e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
948e86d1aa8SWill Deacon 		break;
949e86d1aa8SWill Deacon 	case CMDQ_OP_ATC_INV:
950e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
951e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
952e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
953e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
954e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
955e86d1aa8SWill Deacon 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
956e86d1aa8SWill Deacon 		break;
957e86d1aa8SWill Deacon 	case CMDQ_OP_PRI_RESP:
958e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
959e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
960e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
961e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
962e86d1aa8SWill Deacon 		switch (ent->pri.resp) {
963e86d1aa8SWill Deacon 		case PRI_RESP_DENY:
964e86d1aa8SWill Deacon 		case PRI_RESP_FAIL:
965e86d1aa8SWill Deacon 		case PRI_RESP_SUCC:
966e86d1aa8SWill Deacon 			break;
967e86d1aa8SWill Deacon 		default:
968e86d1aa8SWill Deacon 			return -EINVAL;
969e86d1aa8SWill Deacon 		}
970e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
971e86d1aa8SWill Deacon 		break;
972e86d1aa8SWill Deacon 	case CMDQ_OP_CMD_SYNC:
973e86d1aa8SWill Deacon 		if (ent->sync.msiaddr) {
974e86d1aa8SWill Deacon 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
975e86d1aa8SWill Deacon 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
976e86d1aa8SWill Deacon 		} else {
977e86d1aa8SWill Deacon 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
978e86d1aa8SWill Deacon 		}
979e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
980e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
981e86d1aa8SWill Deacon 		break;
982e86d1aa8SWill Deacon 	default:
983e86d1aa8SWill Deacon 		return -ENOENT;
984e86d1aa8SWill Deacon 	}
985e86d1aa8SWill Deacon 
986e86d1aa8SWill Deacon 	return 0;
987e86d1aa8SWill Deacon }
988e86d1aa8SWill Deacon 
989e86d1aa8SWill Deacon static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
990e86d1aa8SWill Deacon 					 u32 prod)
991e86d1aa8SWill Deacon {
992e86d1aa8SWill Deacon 	struct arm_smmu_queue *q = &smmu->cmdq.q;
993e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent ent = {
994e86d1aa8SWill Deacon 		.opcode = CMDQ_OP_CMD_SYNC,
995e86d1aa8SWill Deacon 	};
996e86d1aa8SWill Deacon 
997e86d1aa8SWill Deacon 	/*
998e86d1aa8SWill Deacon 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
999e86d1aa8SWill Deacon 	 * payload, so the write will zero the entire command on that platform.
1000e86d1aa8SWill Deacon 	 */
1001bd07a20aSBarry Song 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
1002e86d1aa8SWill Deacon 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
1003e86d1aa8SWill Deacon 				   q->ent_dwords * 8;
1004e86d1aa8SWill Deacon 	}
1005e86d1aa8SWill Deacon 
1006e86d1aa8SWill Deacon 	arm_smmu_cmdq_build_cmd(cmd, &ent);
1007e86d1aa8SWill Deacon }
1008e86d1aa8SWill Deacon 
1009e86d1aa8SWill Deacon static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
1010e86d1aa8SWill Deacon {
1011e86d1aa8SWill Deacon 	static const char *cerror_str[] = {
1012e86d1aa8SWill Deacon 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
1013e86d1aa8SWill Deacon 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
1014e86d1aa8SWill Deacon 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
1015e86d1aa8SWill Deacon 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
1016e86d1aa8SWill Deacon 	};
1017e86d1aa8SWill Deacon 
1018e86d1aa8SWill Deacon 	int i;
1019e86d1aa8SWill Deacon 	u64 cmd[CMDQ_ENT_DWORDS];
1020e86d1aa8SWill Deacon 	struct arm_smmu_queue *q = &smmu->cmdq.q;
1021e86d1aa8SWill Deacon 	u32 cons = readl_relaxed(q->cons_reg);
1022e86d1aa8SWill Deacon 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
1023e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd_sync = {
1024e86d1aa8SWill Deacon 		.opcode = CMDQ_OP_CMD_SYNC,
1025e86d1aa8SWill Deacon 	};
1026e86d1aa8SWill Deacon 
1027e86d1aa8SWill Deacon 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
1028e86d1aa8SWill Deacon 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
1029e86d1aa8SWill Deacon 
1030e86d1aa8SWill Deacon 	switch (idx) {
1031e86d1aa8SWill Deacon 	case CMDQ_ERR_CERROR_ABT_IDX:
1032e86d1aa8SWill Deacon 		dev_err(smmu->dev, "retrying command fetch\n");
1033e86d1aa8SWill Deacon 	case CMDQ_ERR_CERROR_NONE_IDX:
1034e86d1aa8SWill Deacon 		return;
1035e86d1aa8SWill Deacon 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
1036e86d1aa8SWill Deacon 		/*
1037e86d1aa8SWill Deacon 		 * ATC Invalidation Completion timeout. CONS is still pointing
1038e86d1aa8SWill Deacon 		 * at the CMD_SYNC. Attempt to complete other pending commands
1039e86d1aa8SWill Deacon 		 * by repeating the CMD_SYNC, though we might well end up back
1040e86d1aa8SWill Deacon 		 * here since the ATC invalidation may still be pending.
1041e86d1aa8SWill Deacon 		 */
1042e86d1aa8SWill Deacon 		return;
1043e86d1aa8SWill Deacon 	case CMDQ_ERR_CERROR_ILL_IDX:
1044e86d1aa8SWill Deacon 	default:
1045e86d1aa8SWill Deacon 		break;
1046e86d1aa8SWill Deacon 	}
1047e86d1aa8SWill Deacon 
1048e86d1aa8SWill Deacon 	/*
1049e86d1aa8SWill Deacon 	 * We may have concurrent producers, so we need to be careful
1050e86d1aa8SWill Deacon 	 * not to touch any of the shadow cmdq state.
1051e86d1aa8SWill Deacon 	 */
1052e86d1aa8SWill Deacon 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
1053e86d1aa8SWill Deacon 	dev_err(smmu->dev, "skipping command in error state:\n");
1054e86d1aa8SWill Deacon 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
1055e86d1aa8SWill Deacon 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
1056e86d1aa8SWill Deacon 
1057e86d1aa8SWill Deacon 	/* Convert the erroneous command into a CMD_SYNC */
1058e86d1aa8SWill Deacon 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
1059e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
1060e86d1aa8SWill Deacon 		return;
1061e86d1aa8SWill Deacon 	}
1062e86d1aa8SWill Deacon 
1063e86d1aa8SWill Deacon 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
1064e86d1aa8SWill Deacon }
1065e86d1aa8SWill Deacon 
1066e86d1aa8SWill Deacon /*
1067e86d1aa8SWill Deacon  * Command queue locking.
1068e86d1aa8SWill Deacon  * This is a form of bastardised rwlock with the following major changes:
1069e86d1aa8SWill Deacon  *
1070e86d1aa8SWill Deacon  * - The only LOCK routines are exclusive_trylock() and shared_lock().
1071e86d1aa8SWill Deacon  *   Neither have barrier semantics, and instead provide only a control
1072e86d1aa8SWill Deacon  *   dependency.
1073e86d1aa8SWill Deacon  *
1074e86d1aa8SWill Deacon  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
1075e86d1aa8SWill Deacon  *   fails if the caller appears to be the last lock holder (yes, this is
1076e86d1aa8SWill Deacon  *   racy). All successful UNLOCK routines have RELEASE semantics.
1077e86d1aa8SWill Deacon  */
1078e86d1aa8SWill Deacon static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
1079e86d1aa8SWill Deacon {
1080e86d1aa8SWill Deacon 	int val;
1081e86d1aa8SWill Deacon 
1082e86d1aa8SWill Deacon 	/*
1083e86d1aa8SWill Deacon 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
1084e86d1aa8SWill Deacon 	 * lock counter. When held in exclusive state, the lock counter is set
1085e86d1aa8SWill Deacon 	 * to INT_MIN so these increments won't hurt as the value will remain
1086e86d1aa8SWill Deacon 	 * negative.
1087e86d1aa8SWill Deacon 	 */
1088e86d1aa8SWill Deacon 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
1089e86d1aa8SWill Deacon 		return;
1090e86d1aa8SWill Deacon 
1091e86d1aa8SWill Deacon 	do {
1092e86d1aa8SWill Deacon 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
1093e86d1aa8SWill Deacon 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
1094e86d1aa8SWill Deacon }
1095e86d1aa8SWill Deacon 
1096e86d1aa8SWill Deacon static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
1097e86d1aa8SWill Deacon {
1098e86d1aa8SWill Deacon 	(void)atomic_dec_return_release(&cmdq->lock);
1099e86d1aa8SWill Deacon }
1100e86d1aa8SWill Deacon 
1101e86d1aa8SWill Deacon static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
1102e86d1aa8SWill Deacon {
1103e86d1aa8SWill Deacon 	if (atomic_read(&cmdq->lock) == 1)
1104e86d1aa8SWill Deacon 		return false;
1105e86d1aa8SWill Deacon 
1106e86d1aa8SWill Deacon 	arm_smmu_cmdq_shared_unlock(cmdq);
1107e86d1aa8SWill Deacon 	return true;
1108e86d1aa8SWill Deacon }
1109e86d1aa8SWill Deacon 
1110e86d1aa8SWill Deacon #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
1111e86d1aa8SWill Deacon ({									\
1112e86d1aa8SWill Deacon 	bool __ret;							\
1113e86d1aa8SWill Deacon 	local_irq_save(flags);						\
1114e86d1aa8SWill Deacon 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
1115e86d1aa8SWill Deacon 	if (!__ret)							\
1116e86d1aa8SWill Deacon 		local_irq_restore(flags);				\
1117e86d1aa8SWill Deacon 	__ret;								\
1118e86d1aa8SWill Deacon })
1119e86d1aa8SWill Deacon 
1120e86d1aa8SWill Deacon #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
1121e86d1aa8SWill Deacon ({									\
1122e86d1aa8SWill Deacon 	atomic_set_release(&cmdq->lock, 0);				\
1123e86d1aa8SWill Deacon 	local_irq_restore(flags);					\
1124e86d1aa8SWill Deacon })
1125e86d1aa8SWill Deacon 
1126e86d1aa8SWill Deacon 
1127e86d1aa8SWill Deacon /*
1128e86d1aa8SWill Deacon  * Command queue insertion.
1129e86d1aa8SWill Deacon  * This is made fiddly by our attempts to achieve some sort of scalability
1130e86d1aa8SWill Deacon  * since there is one queue shared amongst all of the CPUs in the system.  If
1131e86d1aa8SWill Deacon  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
1132e86d1aa8SWill Deacon  * then you'll *love* this monstrosity.
1133e86d1aa8SWill Deacon  *
1134e86d1aa8SWill Deacon  * The basic idea is to split the queue up into ranges of commands that are
1135e86d1aa8SWill Deacon  * owned by a given CPU; the owner may not have written all of the commands
1136e86d1aa8SWill Deacon  * itself, but is responsible for advancing the hardware prod pointer when
1137e86d1aa8SWill Deacon  * the time comes. The algorithm is roughly:
1138e86d1aa8SWill Deacon  *
1139e86d1aa8SWill Deacon  * 	1. Allocate some space in the queue. At this point we also discover
1140e86d1aa8SWill Deacon  *	   whether the head of the queue is currently owned by another CPU,
1141e86d1aa8SWill Deacon  *	   or whether we are the owner.
1142e86d1aa8SWill Deacon  *
1143e86d1aa8SWill Deacon  *	2. Write our commands into our allocated slots in the queue.
1144e86d1aa8SWill Deacon  *
1145e86d1aa8SWill Deacon  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
1146e86d1aa8SWill Deacon  *
1147e86d1aa8SWill Deacon  *	4. If we are an owner:
1148e86d1aa8SWill Deacon  *		a. Wait for the previous owner to finish.
1149e86d1aa8SWill Deacon  *		b. Mark the queue head as unowned, which tells us the range
1150e86d1aa8SWill Deacon  *		   that we are responsible for publishing.
1151e86d1aa8SWill Deacon  *		c. Wait for all commands in our owned range to become valid.
1152e86d1aa8SWill Deacon  *		d. Advance the hardware prod pointer.
1153e86d1aa8SWill Deacon  *		e. Tell the next owner we've finished.
1154e86d1aa8SWill Deacon  *
1155e86d1aa8SWill Deacon  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
1156e86d1aa8SWill Deacon  *	   owner), then we need to stick around until it has completed:
1157e86d1aa8SWill Deacon  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
1158e86d1aa8SWill Deacon  *		   to clear the first 4 bytes.
1159e86d1aa8SWill Deacon  *		b. Otherwise, we spin waiting for the hardware cons pointer to
1160e86d1aa8SWill Deacon  *		   advance past our command.
1161e86d1aa8SWill Deacon  *
1162e86d1aa8SWill Deacon  * The devil is in the details, particularly the use of locking for handling
1163e86d1aa8SWill Deacon  * SYNC completion and freeing up space in the queue before we think that it is
1164e86d1aa8SWill Deacon  * full.
1165e86d1aa8SWill Deacon  */
1166e86d1aa8SWill Deacon static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
1167e86d1aa8SWill Deacon 					       u32 sprod, u32 eprod, bool set)
1168e86d1aa8SWill Deacon {
1169e86d1aa8SWill Deacon 	u32 swidx, sbidx, ewidx, ebidx;
1170e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue llq = {
1171e86d1aa8SWill Deacon 		.max_n_shift	= cmdq->q.llq.max_n_shift,
1172e86d1aa8SWill Deacon 		.prod		= sprod,
1173e86d1aa8SWill Deacon 	};
1174e86d1aa8SWill Deacon 
1175e86d1aa8SWill Deacon 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
1176e86d1aa8SWill Deacon 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
1177e86d1aa8SWill Deacon 
1178e86d1aa8SWill Deacon 	while (llq.prod != eprod) {
1179e86d1aa8SWill Deacon 		unsigned long mask;
1180e86d1aa8SWill Deacon 		atomic_long_t *ptr;
1181e86d1aa8SWill Deacon 		u32 limit = BITS_PER_LONG;
1182e86d1aa8SWill Deacon 
1183e86d1aa8SWill Deacon 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
1184e86d1aa8SWill Deacon 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
1185e86d1aa8SWill Deacon 
1186e86d1aa8SWill Deacon 		ptr = &cmdq->valid_map[swidx];
1187e86d1aa8SWill Deacon 
1188e86d1aa8SWill Deacon 		if ((swidx == ewidx) && (sbidx < ebidx))
1189e86d1aa8SWill Deacon 			limit = ebidx;
1190e86d1aa8SWill Deacon 
1191e86d1aa8SWill Deacon 		mask = GENMASK(limit - 1, sbidx);
1192e86d1aa8SWill Deacon 
1193e86d1aa8SWill Deacon 		/*
1194e86d1aa8SWill Deacon 		 * The valid bit is the inverse of the wrap bit. This means
1195e86d1aa8SWill Deacon 		 * that a zero-initialised queue is invalid and, after marking
1196e86d1aa8SWill Deacon 		 * all entries as valid, they become invalid again when we
1197e86d1aa8SWill Deacon 		 * wrap.
1198e86d1aa8SWill Deacon 		 */
1199e86d1aa8SWill Deacon 		if (set) {
1200e86d1aa8SWill Deacon 			atomic_long_xor(mask, ptr);
1201e86d1aa8SWill Deacon 		} else { /* Poll */
1202e86d1aa8SWill Deacon 			unsigned long valid;
1203e86d1aa8SWill Deacon 
1204e86d1aa8SWill Deacon 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
1205e86d1aa8SWill Deacon 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
1206e86d1aa8SWill Deacon 		}
1207e86d1aa8SWill Deacon 
1208e86d1aa8SWill Deacon 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
1209e86d1aa8SWill Deacon 	}
1210e86d1aa8SWill Deacon }
1211e86d1aa8SWill Deacon 
1212e86d1aa8SWill Deacon /* Mark all entries in the range [sprod, eprod) as valid */
1213e86d1aa8SWill Deacon static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
1214e86d1aa8SWill Deacon 					u32 sprod, u32 eprod)
1215e86d1aa8SWill Deacon {
1216e86d1aa8SWill Deacon 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
1217e86d1aa8SWill Deacon }
1218e86d1aa8SWill Deacon 
1219e86d1aa8SWill Deacon /* Wait for all entries in the range [sprod, eprod) to become valid */
1220e86d1aa8SWill Deacon static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
1221e86d1aa8SWill Deacon 					 u32 sprod, u32 eprod)
1222e86d1aa8SWill Deacon {
1223e86d1aa8SWill Deacon 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
1224e86d1aa8SWill Deacon }
1225e86d1aa8SWill Deacon 
1226e86d1aa8SWill Deacon /* Wait for the command queue to become non-full */
1227e86d1aa8SWill Deacon static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
1228e86d1aa8SWill Deacon 					     struct arm_smmu_ll_queue *llq)
1229e86d1aa8SWill Deacon {
1230e86d1aa8SWill Deacon 	unsigned long flags;
1231e86d1aa8SWill Deacon 	struct arm_smmu_queue_poll qp;
1232e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1233e86d1aa8SWill Deacon 	int ret = 0;
1234e86d1aa8SWill Deacon 
1235e86d1aa8SWill Deacon 	/*
1236e86d1aa8SWill Deacon 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
1237e86d1aa8SWill Deacon 	 * that fails, spin until somebody else updates it for us.
1238e86d1aa8SWill Deacon 	 */
1239e86d1aa8SWill Deacon 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
1240e86d1aa8SWill Deacon 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
1241e86d1aa8SWill Deacon 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
1242e86d1aa8SWill Deacon 		llq->val = READ_ONCE(cmdq->q.llq.val);
1243e86d1aa8SWill Deacon 		return 0;
1244e86d1aa8SWill Deacon 	}
1245e86d1aa8SWill Deacon 
1246e86d1aa8SWill Deacon 	queue_poll_init(smmu, &qp);
1247e86d1aa8SWill Deacon 	do {
1248e86d1aa8SWill Deacon 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1249e86d1aa8SWill Deacon 		if (!queue_full(llq))
1250e86d1aa8SWill Deacon 			break;
1251e86d1aa8SWill Deacon 
1252e86d1aa8SWill Deacon 		ret = queue_poll(&qp);
1253e86d1aa8SWill Deacon 	} while (!ret);
1254e86d1aa8SWill Deacon 
1255e86d1aa8SWill Deacon 	return ret;
1256e86d1aa8SWill Deacon }
1257e86d1aa8SWill Deacon 
1258e86d1aa8SWill Deacon /*
1259e86d1aa8SWill Deacon  * Wait until the SMMU signals a CMD_SYNC completion MSI.
1260e86d1aa8SWill Deacon  * Must be called with the cmdq lock held in some capacity.
1261e86d1aa8SWill Deacon  */
1262e86d1aa8SWill Deacon static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
1263e86d1aa8SWill Deacon 					  struct arm_smmu_ll_queue *llq)
1264e86d1aa8SWill Deacon {
1265e86d1aa8SWill Deacon 	int ret = 0;
1266e86d1aa8SWill Deacon 	struct arm_smmu_queue_poll qp;
1267e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1268e86d1aa8SWill Deacon 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
1269e86d1aa8SWill Deacon 
1270e86d1aa8SWill Deacon 	queue_poll_init(smmu, &qp);
1271e86d1aa8SWill Deacon 
1272e86d1aa8SWill Deacon 	/*
1273e86d1aa8SWill Deacon 	 * The MSI won't generate an event, since it's being written back
1274e86d1aa8SWill Deacon 	 * into the command queue.
1275e86d1aa8SWill Deacon 	 */
1276e86d1aa8SWill Deacon 	qp.wfe = false;
1277e86d1aa8SWill Deacon 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
1278e86d1aa8SWill Deacon 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
1279e86d1aa8SWill Deacon 	return ret;
1280e86d1aa8SWill Deacon }
1281e86d1aa8SWill Deacon 
1282e86d1aa8SWill Deacon /*
1283e86d1aa8SWill Deacon  * Wait until the SMMU cons index passes llq->prod.
1284e86d1aa8SWill Deacon  * Must be called with the cmdq lock held in some capacity.
1285e86d1aa8SWill Deacon  */
1286e86d1aa8SWill Deacon static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
1287e86d1aa8SWill Deacon 					       struct arm_smmu_ll_queue *llq)
1288e86d1aa8SWill Deacon {
1289e86d1aa8SWill Deacon 	struct arm_smmu_queue_poll qp;
1290e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1291e86d1aa8SWill Deacon 	u32 prod = llq->prod;
1292e86d1aa8SWill Deacon 	int ret = 0;
1293e86d1aa8SWill Deacon 
1294e86d1aa8SWill Deacon 	queue_poll_init(smmu, &qp);
1295e86d1aa8SWill Deacon 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1296e86d1aa8SWill Deacon 	do {
1297e86d1aa8SWill Deacon 		if (queue_consumed(llq, prod))
1298e86d1aa8SWill Deacon 			break;
1299e86d1aa8SWill Deacon 
1300e86d1aa8SWill Deacon 		ret = queue_poll(&qp);
1301e86d1aa8SWill Deacon 
1302e86d1aa8SWill Deacon 		/*
1303e86d1aa8SWill Deacon 		 * This needs to be a readl() so that our subsequent call
1304e86d1aa8SWill Deacon 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
1305e86d1aa8SWill Deacon 		 *
1306e86d1aa8SWill Deacon 		 * Specifically, we need to ensure that we observe all
1307e86d1aa8SWill Deacon 		 * shared_lock()s by other CMD_SYNCs that share our owner,
1308e86d1aa8SWill Deacon 		 * so that a failing call to tryunlock() means that we're
1309e86d1aa8SWill Deacon 		 * the last one out and therefore we can safely advance
1310e86d1aa8SWill Deacon 		 * cmdq->q.llq.cons. Roughly speaking:
1311e86d1aa8SWill Deacon 		 *
1312e86d1aa8SWill Deacon 		 * CPU 0		CPU1			CPU2 (us)
1313e86d1aa8SWill Deacon 		 *
1314e86d1aa8SWill Deacon 		 * if (sync)
1315e86d1aa8SWill Deacon 		 * 	shared_lock();
1316e86d1aa8SWill Deacon 		 *
1317e86d1aa8SWill Deacon 		 * dma_wmb();
1318e86d1aa8SWill Deacon 		 * set_valid_map();
1319e86d1aa8SWill Deacon 		 *
1320e86d1aa8SWill Deacon 		 * 			if (owner) {
1321e86d1aa8SWill Deacon 		 *				poll_valid_map();
1322e86d1aa8SWill Deacon 		 *				<control dependency>
1323e86d1aa8SWill Deacon 		 *				writel(prod_reg);
1324e86d1aa8SWill Deacon 		 *
1325e86d1aa8SWill Deacon 		 *						readl(cons_reg);
1326e86d1aa8SWill Deacon 		 *						tryunlock();
1327e86d1aa8SWill Deacon 		 *
1328e86d1aa8SWill Deacon 		 * Requires us to see CPU 0's shared_lock() acquisition.
1329e86d1aa8SWill Deacon 		 */
1330e86d1aa8SWill Deacon 		llq->cons = readl(cmdq->q.cons_reg);
1331e86d1aa8SWill Deacon 	} while (!ret);
1332e86d1aa8SWill Deacon 
1333e86d1aa8SWill Deacon 	return ret;
1334e86d1aa8SWill Deacon }
1335e86d1aa8SWill Deacon 
1336e86d1aa8SWill Deacon static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
1337e86d1aa8SWill Deacon 					 struct arm_smmu_ll_queue *llq)
1338e86d1aa8SWill Deacon {
1339bd07a20aSBarry Song 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
1340e86d1aa8SWill Deacon 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
1341e86d1aa8SWill Deacon 
1342e86d1aa8SWill Deacon 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
1343e86d1aa8SWill Deacon }
1344e86d1aa8SWill Deacon 
1345e86d1aa8SWill Deacon static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
1346e86d1aa8SWill Deacon 					u32 prod, int n)
1347e86d1aa8SWill Deacon {
1348e86d1aa8SWill Deacon 	int i;
1349e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue llq = {
1350e86d1aa8SWill Deacon 		.max_n_shift	= cmdq->q.llq.max_n_shift,
1351e86d1aa8SWill Deacon 		.prod		= prod,
1352e86d1aa8SWill Deacon 	};
1353e86d1aa8SWill Deacon 
1354e86d1aa8SWill Deacon 	for (i = 0; i < n; ++i) {
1355e86d1aa8SWill Deacon 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
1356e86d1aa8SWill Deacon 
1357e86d1aa8SWill Deacon 		prod = queue_inc_prod_n(&llq, i);
1358e86d1aa8SWill Deacon 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
1359e86d1aa8SWill Deacon 	}
1360e86d1aa8SWill Deacon }
1361e86d1aa8SWill Deacon 
1362e86d1aa8SWill Deacon /*
1363e86d1aa8SWill Deacon  * This is the actual insertion function, and provides the following
1364e86d1aa8SWill Deacon  * ordering guarantees to callers:
1365e86d1aa8SWill Deacon  *
1366e86d1aa8SWill Deacon  * - There is a dma_wmb() before publishing any commands to the queue.
1367e86d1aa8SWill Deacon  *   This can be relied upon to order prior writes to data structures
1368e86d1aa8SWill Deacon  *   in memory (such as a CD or an STE) before the command.
1369e86d1aa8SWill Deacon  *
1370e86d1aa8SWill Deacon  * - On completion of a CMD_SYNC, there is a control dependency.
1371e86d1aa8SWill Deacon  *   This can be relied upon to order subsequent writes to memory (e.g.
1372e86d1aa8SWill Deacon  *   freeing an IOVA) after completion of the CMD_SYNC.
1373e86d1aa8SWill Deacon  *
1374e86d1aa8SWill Deacon  * - Command insertion is totally ordered, so if two CPUs each race to
1375e86d1aa8SWill Deacon  *   insert their own list of commands then all of the commands from one
1376e86d1aa8SWill Deacon  *   CPU will appear before any of the commands from the other CPU.
1377e86d1aa8SWill Deacon  */
1378e86d1aa8SWill Deacon static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
1379e86d1aa8SWill Deacon 				       u64 *cmds, int n, bool sync)
1380e86d1aa8SWill Deacon {
1381e86d1aa8SWill Deacon 	u64 cmd_sync[CMDQ_ENT_DWORDS];
1382e86d1aa8SWill Deacon 	u32 prod;
1383e86d1aa8SWill Deacon 	unsigned long flags;
1384e86d1aa8SWill Deacon 	bool owner;
1385e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1386e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue llq = {
1387e86d1aa8SWill Deacon 		.max_n_shift = cmdq->q.llq.max_n_shift,
1388e86d1aa8SWill Deacon 	}, head = llq;
1389e86d1aa8SWill Deacon 	int ret = 0;
1390e86d1aa8SWill Deacon 
1391e86d1aa8SWill Deacon 	/* 1. Allocate some space in the queue */
1392e86d1aa8SWill Deacon 	local_irq_save(flags);
1393e86d1aa8SWill Deacon 	llq.val = READ_ONCE(cmdq->q.llq.val);
1394e86d1aa8SWill Deacon 	do {
1395e86d1aa8SWill Deacon 		u64 old;
1396e86d1aa8SWill Deacon 
1397e86d1aa8SWill Deacon 		while (!queue_has_space(&llq, n + sync)) {
1398e86d1aa8SWill Deacon 			local_irq_restore(flags);
1399e86d1aa8SWill Deacon 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
1400e86d1aa8SWill Deacon 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
1401e86d1aa8SWill Deacon 			local_irq_save(flags);
1402e86d1aa8SWill Deacon 		}
1403e86d1aa8SWill Deacon 
1404e86d1aa8SWill Deacon 		head.cons = llq.cons;
1405e86d1aa8SWill Deacon 		head.prod = queue_inc_prod_n(&llq, n + sync) |
1406e86d1aa8SWill Deacon 					     CMDQ_PROD_OWNED_FLAG;
1407e86d1aa8SWill Deacon 
1408e86d1aa8SWill Deacon 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
1409e86d1aa8SWill Deacon 		if (old == llq.val)
1410e86d1aa8SWill Deacon 			break;
1411e86d1aa8SWill Deacon 
1412e86d1aa8SWill Deacon 		llq.val = old;
1413e86d1aa8SWill Deacon 	} while (1);
1414e86d1aa8SWill Deacon 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
1415e86d1aa8SWill Deacon 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
1416e86d1aa8SWill Deacon 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
1417e86d1aa8SWill Deacon 
1418e86d1aa8SWill Deacon 	/*
1419e86d1aa8SWill Deacon 	 * 2. Write our commands into the queue
1420e86d1aa8SWill Deacon 	 * Dependency ordering from the cmpxchg() loop above.
1421e86d1aa8SWill Deacon 	 */
1422e86d1aa8SWill Deacon 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
1423e86d1aa8SWill Deacon 	if (sync) {
1424e86d1aa8SWill Deacon 		prod = queue_inc_prod_n(&llq, n);
1425e86d1aa8SWill Deacon 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
1426e86d1aa8SWill Deacon 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
1427e86d1aa8SWill Deacon 
1428e86d1aa8SWill Deacon 		/*
1429e86d1aa8SWill Deacon 		 * In order to determine completion of our CMD_SYNC, we must
1430e86d1aa8SWill Deacon 		 * ensure that the queue can't wrap twice without us noticing.
1431e86d1aa8SWill Deacon 		 * We achieve that by taking the cmdq lock as shared before
1432e86d1aa8SWill Deacon 		 * marking our slot as valid.
1433e86d1aa8SWill Deacon 		 */
1434e86d1aa8SWill Deacon 		arm_smmu_cmdq_shared_lock(cmdq);
1435e86d1aa8SWill Deacon 	}
1436e86d1aa8SWill Deacon 
1437e86d1aa8SWill Deacon 	/* 3. Mark our slots as valid, ensuring commands are visible first */
1438e86d1aa8SWill Deacon 	dma_wmb();
1439e86d1aa8SWill Deacon 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
1440e86d1aa8SWill Deacon 
1441e86d1aa8SWill Deacon 	/* 4. If we are the owner, take control of the SMMU hardware */
1442e86d1aa8SWill Deacon 	if (owner) {
1443e86d1aa8SWill Deacon 		/* a. Wait for previous owner to finish */
1444e86d1aa8SWill Deacon 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
1445e86d1aa8SWill Deacon 
1446e86d1aa8SWill Deacon 		/* b. Stop gathering work by clearing the owned flag */
1447e86d1aa8SWill Deacon 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
1448e86d1aa8SWill Deacon 						   &cmdq->q.llq.atomic.prod);
1449e86d1aa8SWill Deacon 		prod &= ~CMDQ_PROD_OWNED_FLAG;
1450e86d1aa8SWill Deacon 
1451e86d1aa8SWill Deacon 		/*
1452e86d1aa8SWill Deacon 		 * c. Wait for any gathered work to be written to the queue.
1453e86d1aa8SWill Deacon 		 * Note that we read our own entries so that we have the control
1454e86d1aa8SWill Deacon 		 * dependency required by (d).
1455e86d1aa8SWill Deacon 		 */
1456e86d1aa8SWill Deacon 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
1457e86d1aa8SWill Deacon 
1458e86d1aa8SWill Deacon 		/*
1459e86d1aa8SWill Deacon 		 * d. Advance the hardware prod pointer
1460e86d1aa8SWill Deacon 		 * Control dependency ordering from the entries becoming valid.
1461e86d1aa8SWill Deacon 		 */
1462e86d1aa8SWill Deacon 		writel_relaxed(prod, cmdq->q.prod_reg);
1463e86d1aa8SWill Deacon 
1464e86d1aa8SWill Deacon 		/*
1465e86d1aa8SWill Deacon 		 * e. Tell the next owner we're done
1466e86d1aa8SWill Deacon 		 * Make sure we've updated the hardware first, so that we don't
1467e86d1aa8SWill Deacon 		 * race to update prod and potentially move it backwards.
1468e86d1aa8SWill Deacon 		 */
1469e86d1aa8SWill Deacon 		atomic_set_release(&cmdq->owner_prod, prod);
1470e86d1aa8SWill Deacon 	}
1471e86d1aa8SWill Deacon 
1472e86d1aa8SWill Deacon 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
1473e86d1aa8SWill Deacon 	if (sync) {
1474e86d1aa8SWill Deacon 		llq.prod = queue_inc_prod_n(&llq, n);
1475e86d1aa8SWill Deacon 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
1476e86d1aa8SWill Deacon 		if (ret) {
1477e86d1aa8SWill Deacon 			dev_err_ratelimited(smmu->dev,
1478e86d1aa8SWill Deacon 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
1479e86d1aa8SWill Deacon 					    llq.prod,
1480e86d1aa8SWill Deacon 					    readl_relaxed(cmdq->q.prod_reg),
1481e86d1aa8SWill Deacon 					    readl_relaxed(cmdq->q.cons_reg));
1482e86d1aa8SWill Deacon 		}
1483e86d1aa8SWill Deacon 
1484e86d1aa8SWill Deacon 		/*
1485e86d1aa8SWill Deacon 		 * Try to unlock the cmdq lock. This will fail if we're the last
1486e86d1aa8SWill Deacon 		 * reader, in which case we can safely update cmdq->q.llq.cons
1487e86d1aa8SWill Deacon 		 */
1488e86d1aa8SWill Deacon 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
1489e86d1aa8SWill Deacon 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
1490e86d1aa8SWill Deacon 			arm_smmu_cmdq_shared_unlock(cmdq);
1491e86d1aa8SWill Deacon 		}
1492e86d1aa8SWill Deacon 	}
1493e86d1aa8SWill Deacon 
1494e86d1aa8SWill Deacon 	local_irq_restore(flags);
1495e86d1aa8SWill Deacon 	return ret;
1496e86d1aa8SWill Deacon }
1497e86d1aa8SWill Deacon 
1498e86d1aa8SWill Deacon static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
1499e86d1aa8SWill Deacon 				   struct arm_smmu_cmdq_ent *ent)
1500e86d1aa8SWill Deacon {
1501e86d1aa8SWill Deacon 	u64 cmd[CMDQ_ENT_DWORDS];
1502e86d1aa8SWill Deacon 
1503e86d1aa8SWill Deacon 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
1504e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
1505e86d1aa8SWill Deacon 			 ent->opcode);
1506e86d1aa8SWill Deacon 		return -EINVAL;
1507e86d1aa8SWill Deacon 	}
1508e86d1aa8SWill Deacon 
1509e86d1aa8SWill Deacon 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
1510e86d1aa8SWill Deacon }
1511e86d1aa8SWill Deacon 
1512e86d1aa8SWill Deacon static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1513e86d1aa8SWill Deacon {
1514e86d1aa8SWill Deacon 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
1515e86d1aa8SWill Deacon }
1516e86d1aa8SWill Deacon 
1517e86d1aa8SWill Deacon static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
1518e86d1aa8SWill Deacon 				    struct arm_smmu_cmdq_batch *cmds,
1519e86d1aa8SWill Deacon 				    struct arm_smmu_cmdq_ent *cmd)
1520e86d1aa8SWill Deacon {
1521e86d1aa8SWill Deacon 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
1522e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
1523e86d1aa8SWill Deacon 		cmds->num = 0;
1524e86d1aa8SWill Deacon 	}
1525e86d1aa8SWill Deacon 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
1526e86d1aa8SWill Deacon 	cmds->num++;
1527e86d1aa8SWill Deacon }
1528e86d1aa8SWill Deacon 
1529e86d1aa8SWill Deacon static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
1530e86d1aa8SWill Deacon 				      struct arm_smmu_cmdq_batch *cmds)
1531e86d1aa8SWill Deacon {
1532e86d1aa8SWill Deacon 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
1533e86d1aa8SWill Deacon }
1534e86d1aa8SWill Deacon 
1535e86d1aa8SWill Deacon /* Context descriptor manipulation functions */
1536e86d1aa8SWill Deacon static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
1537e86d1aa8SWill Deacon 			     int ssid, bool leaf)
1538e86d1aa8SWill Deacon {
1539e86d1aa8SWill Deacon 	size_t i;
1540e86d1aa8SWill Deacon 	unsigned long flags;
1541e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
1542e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_batch cmds = {};
1543e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1544e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd = {
1545e86d1aa8SWill Deacon 		.opcode	= CMDQ_OP_CFGI_CD,
1546e86d1aa8SWill Deacon 		.cfgi	= {
1547e86d1aa8SWill Deacon 			.ssid	= ssid,
1548e86d1aa8SWill Deacon 			.leaf	= leaf,
1549e86d1aa8SWill Deacon 		},
1550e86d1aa8SWill Deacon 	};
1551e86d1aa8SWill Deacon 
1552e86d1aa8SWill Deacon 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1553e86d1aa8SWill Deacon 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1554e86d1aa8SWill Deacon 		for (i = 0; i < master->num_sids; i++) {
1555e86d1aa8SWill Deacon 			cmd.cfgi.sid = master->sids[i];
1556e86d1aa8SWill Deacon 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1557e86d1aa8SWill Deacon 		}
1558e86d1aa8SWill Deacon 	}
1559e86d1aa8SWill Deacon 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1560e86d1aa8SWill Deacon 
1561e86d1aa8SWill Deacon 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1562e86d1aa8SWill Deacon }
1563e86d1aa8SWill Deacon 
1564e86d1aa8SWill Deacon static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1565e86d1aa8SWill Deacon 					struct arm_smmu_l1_ctx_desc *l1_desc)
1566e86d1aa8SWill Deacon {
1567e86d1aa8SWill Deacon 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1568e86d1aa8SWill Deacon 
1569e86d1aa8SWill Deacon 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1570e86d1aa8SWill Deacon 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1571e86d1aa8SWill Deacon 	if (!l1_desc->l2ptr) {
1572e86d1aa8SWill Deacon 		dev_warn(smmu->dev,
1573e86d1aa8SWill Deacon 			 "failed to allocate context descriptor table\n");
1574e86d1aa8SWill Deacon 		return -ENOMEM;
1575e86d1aa8SWill Deacon 	}
1576e86d1aa8SWill Deacon 	return 0;
1577e86d1aa8SWill Deacon }
1578e86d1aa8SWill Deacon 
1579e86d1aa8SWill Deacon static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1580e86d1aa8SWill Deacon 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1581e86d1aa8SWill Deacon {
1582e86d1aa8SWill Deacon 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1583e86d1aa8SWill Deacon 		  CTXDESC_L1_DESC_V;
1584e86d1aa8SWill Deacon 
1585e86d1aa8SWill Deacon 	/* See comment in arm_smmu_write_ctx_desc() */
1586e86d1aa8SWill Deacon 	WRITE_ONCE(*dst, cpu_to_le64(val));
1587e86d1aa8SWill Deacon }
1588e86d1aa8SWill Deacon 
1589e86d1aa8SWill Deacon static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1590e86d1aa8SWill Deacon 				   u32 ssid)
1591e86d1aa8SWill Deacon {
1592e86d1aa8SWill Deacon 	__le64 *l1ptr;
1593e86d1aa8SWill Deacon 	unsigned int idx;
1594e86d1aa8SWill Deacon 	struct arm_smmu_l1_ctx_desc *l1_desc;
1595e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1596e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1597e86d1aa8SWill Deacon 
1598e86d1aa8SWill Deacon 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1599e86d1aa8SWill Deacon 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1600e86d1aa8SWill Deacon 
1601e86d1aa8SWill Deacon 	idx = ssid >> CTXDESC_SPLIT;
1602e86d1aa8SWill Deacon 	l1_desc = &cdcfg->l1_desc[idx];
1603e86d1aa8SWill Deacon 	if (!l1_desc->l2ptr) {
1604e86d1aa8SWill Deacon 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1605e86d1aa8SWill Deacon 			return NULL;
1606e86d1aa8SWill Deacon 
1607e86d1aa8SWill Deacon 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1608e86d1aa8SWill Deacon 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1609e86d1aa8SWill Deacon 		/* An invalid L1CD can be cached */
1610e86d1aa8SWill Deacon 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1611e86d1aa8SWill Deacon 	}
1612e86d1aa8SWill Deacon 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1613e86d1aa8SWill Deacon 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1614e86d1aa8SWill Deacon }
1615e86d1aa8SWill Deacon 
1616e86d1aa8SWill Deacon static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
1617e86d1aa8SWill Deacon 				   int ssid, struct arm_smmu_ctx_desc *cd)
1618e86d1aa8SWill Deacon {
1619e86d1aa8SWill Deacon 	/*
1620e86d1aa8SWill Deacon 	 * This function handles the following cases:
1621e86d1aa8SWill Deacon 	 *
1622e86d1aa8SWill Deacon 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1623e86d1aa8SWill Deacon 	 * (2) Install a secondary CD, for SID+SSID traffic.
1624e86d1aa8SWill Deacon 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1625e86d1aa8SWill Deacon 	 *     CD, then invalidate the old entry and mappings.
1626e86d1aa8SWill Deacon 	 * (4) Remove a secondary CD.
1627e86d1aa8SWill Deacon 	 */
1628e86d1aa8SWill Deacon 	u64 val;
1629e86d1aa8SWill Deacon 	bool cd_live;
1630e86d1aa8SWill Deacon 	__le64 *cdptr;
1631e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1632e86d1aa8SWill Deacon 
1633e86d1aa8SWill Deacon 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1634e86d1aa8SWill Deacon 		return -E2BIG;
1635e86d1aa8SWill Deacon 
1636e86d1aa8SWill Deacon 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1637e86d1aa8SWill Deacon 	if (!cdptr)
1638e86d1aa8SWill Deacon 		return -ENOMEM;
1639e86d1aa8SWill Deacon 
1640e86d1aa8SWill Deacon 	val = le64_to_cpu(cdptr[0]);
1641e86d1aa8SWill Deacon 	cd_live = !!(val & CTXDESC_CD_0_V);
1642e86d1aa8SWill Deacon 
1643e86d1aa8SWill Deacon 	if (!cd) { /* (4) */
1644e86d1aa8SWill Deacon 		val = 0;
1645e86d1aa8SWill Deacon 	} else if (cd_live) { /* (3) */
1646e86d1aa8SWill Deacon 		val &= ~CTXDESC_CD_0_ASID;
1647e86d1aa8SWill Deacon 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1648e86d1aa8SWill Deacon 		/*
1649e86d1aa8SWill Deacon 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1650e86d1aa8SWill Deacon 		 * this substream's traffic
1651e86d1aa8SWill Deacon 		 */
1652e86d1aa8SWill Deacon 	} else { /* (1) and (2) */
1653e86d1aa8SWill Deacon 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1654e86d1aa8SWill Deacon 		cdptr[2] = 0;
1655e86d1aa8SWill Deacon 		cdptr[3] = cpu_to_le64(cd->mair);
1656e86d1aa8SWill Deacon 
1657e86d1aa8SWill Deacon 		/*
1658e86d1aa8SWill Deacon 		 * STE is live, and the SMMU might read dwords of this CD in any
1659e86d1aa8SWill Deacon 		 * order. Ensure that it observes valid values before reading
1660e86d1aa8SWill Deacon 		 * V=1.
1661e86d1aa8SWill Deacon 		 */
1662e86d1aa8SWill Deacon 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1663e86d1aa8SWill Deacon 
1664e86d1aa8SWill Deacon 		val = cd->tcr |
1665e86d1aa8SWill Deacon #ifdef __BIG_ENDIAN
1666e86d1aa8SWill Deacon 			CTXDESC_CD_0_ENDI |
1667e86d1aa8SWill Deacon #endif
1668e86d1aa8SWill Deacon 			CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1669e86d1aa8SWill Deacon 			CTXDESC_CD_0_AA64 |
1670e86d1aa8SWill Deacon 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1671e86d1aa8SWill Deacon 			CTXDESC_CD_0_V;
1672e86d1aa8SWill Deacon 
1673e86d1aa8SWill Deacon 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1674e86d1aa8SWill Deacon 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1675e86d1aa8SWill Deacon 			val |= CTXDESC_CD_0_S;
1676e86d1aa8SWill Deacon 	}
1677e86d1aa8SWill Deacon 
1678e86d1aa8SWill Deacon 	/*
1679e86d1aa8SWill Deacon 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1680e86d1aa8SWill Deacon 	 * "Configuration structures and configuration invalidation completion"
1681e86d1aa8SWill Deacon 	 *
1682e86d1aa8SWill Deacon 	 *   The size of single-copy atomic reads made by the SMMU is
1683e86d1aa8SWill Deacon 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1684e86d1aa8SWill Deacon 	 *   field within an aligned 64-bit span of a structure can be altered
1685e86d1aa8SWill Deacon 	 *   without first making the structure invalid.
1686e86d1aa8SWill Deacon 	 */
1687e86d1aa8SWill Deacon 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1688e86d1aa8SWill Deacon 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1689e86d1aa8SWill Deacon 	return 0;
1690e86d1aa8SWill Deacon }
1691e86d1aa8SWill Deacon 
1692e86d1aa8SWill Deacon static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1693e86d1aa8SWill Deacon {
1694e86d1aa8SWill Deacon 	int ret;
1695e86d1aa8SWill Deacon 	size_t l1size;
1696e86d1aa8SWill Deacon 	size_t max_contexts;
1697e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1698e86d1aa8SWill Deacon 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1699e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1700e86d1aa8SWill Deacon 
1701e86d1aa8SWill Deacon 	max_contexts = 1 << cfg->s1cdmax;
1702e86d1aa8SWill Deacon 
1703e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1704e86d1aa8SWill Deacon 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1705e86d1aa8SWill Deacon 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1706e86d1aa8SWill Deacon 		cdcfg->num_l1_ents = max_contexts;
1707e86d1aa8SWill Deacon 
1708e86d1aa8SWill Deacon 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1709e86d1aa8SWill Deacon 	} else {
1710e86d1aa8SWill Deacon 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1711e86d1aa8SWill Deacon 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1712e86d1aa8SWill Deacon 						  CTXDESC_L2_ENTRIES);
1713e86d1aa8SWill Deacon 
1714e86d1aa8SWill Deacon 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1715e86d1aa8SWill Deacon 					      sizeof(*cdcfg->l1_desc),
1716e86d1aa8SWill Deacon 					      GFP_KERNEL);
1717e86d1aa8SWill Deacon 		if (!cdcfg->l1_desc)
1718e86d1aa8SWill Deacon 			return -ENOMEM;
1719e86d1aa8SWill Deacon 
1720e86d1aa8SWill Deacon 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1721e86d1aa8SWill Deacon 	}
1722e86d1aa8SWill Deacon 
1723e86d1aa8SWill Deacon 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1724e86d1aa8SWill Deacon 					   GFP_KERNEL);
1725e86d1aa8SWill Deacon 	if (!cdcfg->cdtab) {
1726e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1727e86d1aa8SWill Deacon 		ret = -ENOMEM;
1728e86d1aa8SWill Deacon 		goto err_free_l1;
1729e86d1aa8SWill Deacon 	}
1730e86d1aa8SWill Deacon 
1731e86d1aa8SWill Deacon 	return 0;
1732e86d1aa8SWill Deacon 
1733e86d1aa8SWill Deacon err_free_l1:
1734e86d1aa8SWill Deacon 	if (cdcfg->l1_desc) {
1735e86d1aa8SWill Deacon 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1736e86d1aa8SWill Deacon 		cdcfg->l1_desc = NULL;
1737e86d1aa8SWill Deacon 	}
1738e86d1aa8SWill Deacon 	return ret;
1739e86d1aa8SWill Deacon }
1740e86d1aa8SWill Deacon 
1741e86d1aa8SWill Deacon static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1742e86d1aa8SWill Deacon {
1743e86d1aa8SWill Deacon 	int i;
1744e86d1aa8SWill Deacon 	size_t size, l1size;
1745e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1746e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1747e86d1aa8SWill Deacon 
1748e86d1aa8SWill Deacon 	if (cdcfg->l1_desc) {
1749e86d1aa8SWill Deacon 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1750e86d1aa8SWill Deacon 
1751e86d1aa8SWill Deacon 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1752e86d1aa8SWill Deacon 			if (!cdcfg->l1_desc[i].l2ptr)
1753e86d1aa8SWill Deacon 				continue;
1754e86d1aa8SWill Deacon 
1755e86d1aa8SWill Deacon 			dmam_free_coherent(smmu->dev, size,
1756e86d1aa8SWill Deacon 					   cdcfg->l1_desc[i].l2ptr,
1757e86d1aa8SWill Deacon 					   cdcfg->l1_desc[i].l2ptr_dma);
1758e86d1aa8SWill Deacon 		}
1759e86d1aa8SWill Deacon 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1760e86d1aa8SWill Deacon 		cdcfg->l1_desc = NULL;
1761e86d1aa8SWill Deacon 
1762e86d1aa8SWill Deacon 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1763e86d1aa8SWill Deacon 	} else {
1764e86d1aa8SWill Deacon 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1765e86d1aa8SWill Deacon 	}
1766e86d1aa8SWill Deacon 
1767e86d1aa8SWill Deacon 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1768e86d1aa8SWill Deacon 	cdcfg->cdtab_dma = 0;
1769e86d1aa8SWill Deacon 	cdcfg->cdtab = NULL;
1770e86d1aa8SWill Deacon }
1771e86d1aa8SWill Deacon 
1772e86d1aa8SWill Deacon static void arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1773e86d1aa8SWill Deacon {
1774e86d1aa8SWill Deacon 	if (!cd->asid)
1775e86d1aa8SWill Deacon 		return;
1776e86d1aa8SWill Deacon 
1777e86d1aa8SWill Deacon 	xa_erase(&asid_xa, cd->asid);
1778e86d1aa8SWill Deacon }
1779e86d1aa8SWill Deacon 
1780e86d1aa8SWill Deacon /* Stream table manipulation functions */
1781e86d1aa8SWill Deacon static void
1782e86d1aa8SWill Deacon arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1783e86d1aa8SWill Deacon {
1784e86d1aa8SWill Deacon 	u64 val = 0;
1785e86d1aa8SWill Deacon 
1786e86d1aa8SWill Deacon 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1787e86d1aa8SWill Deacon 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1788e86d1aa8SWill Deacon 
1789e86d1aa8SWill Deacon 	/* See comment in arm_smmu_write_ctx_desc() */
1790e86d1aa8SWill Deacon 	WRITE_ONCE(*dst, cpu_to_le64(val));
1791e86d1aa8SWill Deacon }
1792e86d1aa8SWill Deacon 
1793e86d1aa8SWill Deacon static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1794e86d1aa8SWill Deacon {
1795e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd = {
1796e86d1aa8SWill Deacon 		.opcode	= CMDQ_OP_CFGI_STE,
1797e86d1aa8SWill Deacon 		.cfgi	= {
1798e86d1aa8SWill Deacon 			.sid	= sid,
1799e86d1aa8SWill Deacon 			.leaf	= true,
1800e86d1aa8SWill Deacon 		},
1801e86d1aa8SWill Deacon 	};
1802e86d1aa8SWill Deacon 
1803e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1804e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_sync(smmu);
1805e86d1aa8SWill Deacon }
1806e86d1aa8SWill Deacon 
1807e86d1aa8SWill Deacon static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1808e86d1aa8SWill Deacon 				      __le64 *dst)
1809e86d1aa8SWill Deacon {
1810e86d1aa8SWill Deacon 	/*
1811e86d1aa8SWill Deacon 	 * This is hideously complicated, but we only really care about
1812e86d1aa8SWill Deacon 	 * three cases at the moment:
1813e86d1aa8SWill Deacon 	 *
1814e86d1aa8SWill Deacon 	 * 1. Invalid (all zero) -> bypass/fault (init)
1815e86d1aa8SWill Deacon 	 * 2. Bypass/fault -> translation/bypass (attach)
1816e86d1aa8SWill Deacon 	 * 3. Translation/bypass -> bypass/fault (detach)
1817e86d1aa8SWill Deacon 	 *
1818e86d1aa8SWill Deacon 	 * Given that we can't update the STE atomically and the SMMU
1819e86d1aa8SWill Deacon 	 * doesn't read the thing in a defined order, that leaves us
1820e86d1aa8SWill Deacon 	 * with the following maintenance requirements:
1821e86d1aa8SWill Deacon 	 *
1822e86d1aa8SWill Deacon 	 * 1. Update Config, return (init time STEs aren't live)
1823e86d1aa8SWill Deacon 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1824e86d1aa8SWill Deacon 	 * 3. Update Config, sync
1825e86d1aa8SWill Deacon 	 */
1826e86d1aa8SWill Deacon 	u64 val = le64_to_cpu(dst[0]);
1827e86d1aa8SWill Deacon 	bool ste_live = false;
1828e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = NULL;
1829e86d1aa8SWill Deacon 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1830e86d1aa8SWill Deacon 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1831e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = NULL;
1832e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1833e86d1aa8SWill Deacon 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1834e86d1aa8SWill Deacon 		.prefetch	= {
1835e86d1aa8SWill Deacon 			.sid	= sid,
1836e86d1aa8SWill Deacon 		},
1837e86d1aa8SWill Deacon 	};
1838e86d1aa8SWill Deacon 
1839e86d1aa8SWill Deacon 	if (master) {
1840e86d1aa8SWill Deacon 		smmu_domain = master->domain;
1841e86d1aa8SWill Deacon 		smmu = master->smmu;
1842e86d1aa8SWill Deacon 	}
1843e86d1aa8SWill Deacon 
1844e86d1aa8SWill Deacon 	if (smmu_domain) {
1845e86d1aa8SWill Deacon 		switch (smmu_domain->stage) {
1846e86d1aa8SWill Deacon 		case ARM_SMMU_DOMAIN_S1:
1847e86d1aa8SWill Deacon 			s1_cfg = &smmu_domain->s1_cfg;
1848e86d1aa8SWill Deacon 			break;
1849e86d1aa8SWill Deacon 		case ARM_SMMU_DOMAIN_S2:
1850e86d1aa8SWill Deacon 		case ARM_SMMU_DOMAIN_NESTED:
1851e86d1aa8SWill Deacon 			s2_cfg = &smmu_domain->s2_cfg;
1852e86d1aa8SWill Deacon 			break;
1853e86d1aa8SWill Deacon 		default:
1854e86d1aa8SWill Deacon 			break;
1855e86d1aa8SWill Deacon 		}
1856e86d1aa8SWill Deacon 	}
1857e86d1aa8SWill Deacon 
1858e86d1aa8SWill Deacon 	if (val & STRTAB_STE_0_V) {
1859e86d1aa8SWill Deacon 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1860e86d1aa8SWill Deacon 		case STRTAB_STE_0_CFG_BYPASS:
1861e86d1aa8SWill Deacon 			break;
1862e86d1aa8SWill Deacon 		case STRTAB_STE_0_CFG_S1_TRANS:
1863e86d1aa8SWill Deacon 		case STRTAB_STE_0_CFG_S2_TRANS:
1864e86d1aa8SWill Deacon 			ste_live = true;
1865e86d1aa8SWill Deacon 			break;
1866e86d1aa8SWill Deacon 		case STRTAB_STE_0_CFG_ABORT:
1867e86d1aa8SWill Deacon 			BUG_ON(!disable_bypass);
1868e86d1aa8SWill Deacon 			break;
1869e86d1aa8SWill Deacon 		default:
1870e86d1aa8SWill Deacon 			BUG(); /* STE corruption */
1871e86d1aa8SWill Deacon 		}
1872e86d1aa8SWill Deacon 	}
1873e86d1aa8SWill Deacon 
1874e86d1aa8SWill Deacon 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1875e86d1aa8SWill Deacon 	val = STRTAB_STE_0_V;
1876e86d1aa8SWill Deacon 
1877e86d1aa8SWill Deacon 	/* Bypass/fault */
1878e86d1aa8SWill Deacon 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1879e86d1aa8SWill Deacon 		if (!smmu_domain && disable_bypass)
1880e86d1aa8SWill Deacon 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1881e86d1aa8SWill Deacon 		else
1882e86d1aa8SWill Deacon 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1883e86d1aa8SWill Deacon 
1884e86d1aa8SWill Deacon 		dst[0] = cpu_to_le64(val);
1885e86d1aa8SWill Deacon 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1886e86d1aa8SWill Deacon 						STRTAB_STE_1_SHCFG_INCOMING));
1887e86d1aa8SWill Deacon 		dst[2] = 0; /* Nuke the VMID */
1888e86d1aa8SWill Deacon 		/*
1889e86d1aa8SWill Deacon 		 * The SMMU can perform negative caching, so we must sync
1890e86d1aa8SWill Deacon 		 * the STE regardless of whether the old value was live.
1891e86d1aa8SWill Deacon 		 */
1892e86d1aa8SWill Deacon 		if (smmu)
1893e86d1aa8SWill Deacon 			arm_smmu_sync_ste_for_sid(smmu, sid);
1894e86d1aa8SWill Deacon 		return;
1895e86d1aa8SWill Deacon 	}
1896e86d1aa8SWill Deacon 
1897e86d1aa8SWill Deacon 	if (s1_cfg) {
1898e86d1aa8SWill Deacon 		BUG_ON(ste_live);
1899e86d1aa8SWill Deacon 		dst[1] = cpu_to_le64(
1900e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1901e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1902e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1903e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1904e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1905e86d1aa8SWill Deacon 
1906e86d1aa8SWill Deacon 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1907e86d1aa8SWill Deacon 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1908e86d1aa8SWill Deacon 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1909e86d1aa8SWill Deacon 
1910e86d1aa8SWill Deacon 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1911e86d1aa8SWill Deacon 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1912e86d1aa8SWill Deacon 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1913e86d1aa8SWill Deacon 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1914e86d1aa8SWill Deacon 	}
1915e86d1aa8SWill Deacon 
1916e86d1aa8SWill Deacon 	if (s2_cfg) {
1917e86d1aa8SWill Deacon 		BUG_ON(ste_live);
1918e86d1aa8SWill Deacon 		dst[2] = cpu_to_le64(
1919e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1920e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1921e86d1aa8SWill Deacon #ifdef __BIG_ENDIAN
1922e86d1aa8SWill Deacon 			 STRTAB_STE_2_S2ENDI |
1923e86d1aa8SWill Deacon #endif
1924e86d1aa8SWill Deacon 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1925e86d1aa8SWill Deacon 			 STRTAB_STE_2_S2R);
1926e86d1aa8SWill Deacon 
1927e86d1aa8SWill Deacon 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1928e86d1aa8SWill Deacon 
1929e86d1aa8SWill Deacon 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1930e86d1aa8SWill Deacon 	}
1931e86d1aa8SWill Deacon 
1932e86d1aa8SWill Deacon 	if (master->ats_enabled)
1933e86d1aa8SWill Deacon 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1934e86d1aa8SWill Deacon 						 STRTAB_STE_1_EATS_TRANS));
1935e86d1aa8SWill Deacon 
1936e86d1aa8SWill Deacon 	arm_smmu_sync_ste_for_sid(smmu, sid);
1937e86d1aa8SWill Deacon 	/* See comment in arm_smmu_write_ctx_desc() */
1938e86d1aa8SWill Deacon 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1939e86d1aa8SWill Deacon 	arm_smmu_sync_ste_for_sid(smmu, sid);
1940e86d1aa8SWill Deacon 
1941e86d1aa8SWill Deacon 	/* It's likely that we'll want to use the new STE soon */
1942e86d1aa8SWill Deacon 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1943e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1944e86d1aa8SWill Deacon }
1945e86d1aa8SWill Deacon 
1946*376cdf66SJean-Philippe Brucker static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1947e86d1aa8SWill Deacon {
1948e86d1aa8SWill Deacon 	unsigned int i;
1949e86d1aa8SWill Deacon 
1950e86d1aa8SWill Deacon 	for (i = 0; i < nent; ++i) {
1951e86d1aa8SWill Deacon 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1952e86d1aa8SWill Deacon 		strtab += STRTAB_STE_DWORDS;
1953e86d1aa8SWill Deacon 	}
1954e86d1aa8SWill Deacon }
1955e86d1aa8SWill Deacon 
1956e86d1aa8SWill Deacon static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1957e86d1aa8SWill Deacon {
1958e86d1aa8SWill Deacon 	size_t size;
1959e86d1aa8SWill Deacon 	void *strtab;
1960e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1961e86d1aa8SWill Deacon 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1962e86d1aa8SWill Deacon 
1963e86d1aa8SWill Deacon 	if (desc->l2ptr)
1964e86d1aa8SWill Deacon 		return 0;
1965e86d1aa8SWill Deacon 
1966e86d1aa8SWill Deacon 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1967e86d1aa8SWill Deacon 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1968e86d1aa8SWill Deacon 
1969e86d1aa8SWill Deacon 	desc->span = STRTAB_SPLIT + 1;
1970e86d1aa8SWill Deacon 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1971e86d1aa8SWill Deacon 					  GFP_KERNEL);
1972e86d1aa8SWill Deacon 	if (!desc->l2ptr) {
1973e86d1aa8SWill Deacon 		dev_err(smmu->dev,
1974e86d1aa8SWill Deacon 			"failed to allocate l2 stream table for SID %u\n",
1975e86d1aa8SWill Deacon 			sid);
1976e86d1aa8SWill Deacon 		return -ENOMEM;
1977e86d1aa8SWill Deacon 	}
1978e86d1aa8SWill Deacon 
1979e86d1aa8SWill Deacon 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1980e86d1aa8SWill Deacon 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1981e86d1aa8SWill Deacon 	return 0;
1982e86d1aa8SWill Deacon }
1983e86d1aa8SWill Deacon 
1984e86d1aa8SWill Deacon /* IRQ and event handlers */
1985e86d1aa8SWill Deacon static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1986e86d1aa8SWill Deacon {
1987e86d1aa8SWill Deacon 	int i;
1988e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev;
1989e86d1aa8SWill Deacon 	struct arm_smmu_queue *q = &smmu->evtq.q;
1990e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue *llq = &q->llq;
1991e86d1aa8SWill Deacon 	u64 evt[EVTQ_ENT_DWORDS];
1992e86d1aa8SWill Deacon 
1993e86d1aa8SWill Deacon 	do {
1994e86d1aa8SWill Deacon 		while (!queue_remove_raw(q, evt)) {
1995e86d1aa8SWill Deacon 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1996e86d1aa8SWill Deacon 
1997e86d1aa8SWill Deacon 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1998e86d1aa8SWill Deacon 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1999e86d1aa8SWill Deacon 				dev_info(smmu->dev, "\t0x%016llx\n",
2000e86d1aa8SWill Deacon 					 (unsigned long long)evt[i]);
2001e86d1aa8SWill Deacon 
2002e86d1aa8SWill Deacon 		}
2003e86d1aa8SWill Deacon 
2004e86d1aa8SWill Deacon 		/*
2005e86d1aa8SWill Deacon 		 * Not much we can do on overflow, so scream and pretend we're
2006e86d1aa8SWill Deacon 		 * trying harder.
2007e86d1aa8SWill Deacon 		 */
2008e86d1aa8SWill Deacon 		if (queue_sync_prod_in(q) == -EOVERFLOW)
2009e86d1aa8SWill Deacon 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
2010e86d1aa8SWill Deacon 	} while (!queue_empty(llq));
2011e86d1aa8SWill Deacon 
2012e86d1aa8SWill Deacon 	/* Sync our overflow flag, as we believe we're up to speed */
2013e86d1aa8SWill Deacon 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
2014e86d1aa8SWill Deacon 		    Q_IDX(llq, llq->cons);
2015e86d1aa8SWill Deacon 	return IRQ_HANDLED;
2016e86d1aa8SWill Deacon }
2017e86d1aa8SWill Deacon 
2018e86d1aa8SWill Deacon static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
2019e86d1aa8SWill Deacon {
2020e86d1aa8SWill Deacon 	u32 sid, ssid;
2021e86d1aa8SWill Deacon 	u16 grpid;
2022e86d1aa8SWill Deacon 	bool ssv, last;
2023e86d1aa8SWill Deacon 
2024e86d1aa8SWill Deacon 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
2025e86d1aa8SWill Deacon 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
2026e86d1aa8SWill Deacon 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
2027e86d1aa8SWill Deacon 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
2028e86d1aa8SWill Deacon 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
2029e86d1aa8SWill Deacon 
2030e86d1aa8SWill Deacon 	dev_info(smmu->dev, "unexpected PRI request received:\n");
2031e86d1aa8SWill Deacon 	dev_info(smmu->dev,
2032e86d1aa8SWill Deacon 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
2033e86d1aa8SWill Deacon 		 sid, ssid, grpid, last ? "L" : "",
2034e86d1aa8SWill Deacon 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
2035e86d1aa8SWill Deacon 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
2036e86d1aa8SWill Deacon 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
2037e86d1aa8SWill Deacon 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
2038e86d1aa8SWill Deacon 		 evt[1] & PRIQ_1_ADDR_MASK);
2039e86d1aa8SWill Deacon 
2040e86d1aa8SWill Deacon 	if (last) {
2041e86d1aa8SWill Deacon 		struct arm_smmu_cmdq_ent cmd = {
2042e86d1aa8SWill Deacon 			.opcode			= CMDQ_OP_PRI_RESP,
2043e86d1aa8SWill Deacon 			.substream_valid	= ssv,
2044e86d1aa8SWill Deacon 			.pri			= {
2045e86d1aa8SWill Deacon 				.sid	= sid,
2046e86d1aa8SWill Deacon 				.ssid	= ssid,
2047e86d1aa8SWill Deacon 				.grpid	= grpid,
2048e86d1aa8SWill Deacon 				.resp	= PRI_RESP_DENY,
2049e86d1aa8SWill Deacon 			},
2050e86d1aa8SWill Deacon 		};
2051e86d1aa8SWill Deacon 
2052e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2053e86d1aa8SWill Deacon 	}
2054e86d1aa8SWill Deacon }
2055e86d1aa8SWill Deacon 
2056e86d1aa8SWill Deacon static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
2057e86d1aa8SWill Deacon {
2058e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev;
2059e86d1aa8SWill Deacon 	struct arm_smmu_queue *q = &smmu->priq.q;
2060e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue *llq = &q->llq;
2061e86d1aa8SWill Deacon 	u64 evt[PRIQ_ENT_DWORDS];
2062e86d1aa8SWill Deacon 
2063e86d1aa8SWill Deacon 	do {
2064e86d1aa8SWill Deacon 		while (!queue_remove_raw(q, evt))
2065e86d1aa8SWill Deacon 			arm_smmu_handle_ppr(smmu, evt);
2066e86d1aa8SWill Deacon 
2067e86d1aa8SWill Deacon 		if (queue_sync_prod_in(q) == -EOVERFLOW)
2068e86d1aa8SWill Deacon 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
2069e86d1aa8SWill Deacon 	} while (!queue_empty(llq));
2070e86d1aa8SWill Deacon 
2071e86d1aa8SWill Deacon 	/* Sync our overflow flag, as we believe we're up to speed */
2072e86d1aa8SWill Deacon 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
2073e86d1aa8SWill Deacon 		      Q_IDX(llq, llq->cons);
2074e86d1aa8SWill Deacon 	queue_sync_cons_out(q);
2075e86d1aa8SWill Deacon 	return IRQ_HANDLED;
2076e86d1aa8SWill Deacon }
2077e86d1aa8SWill Deacon 
2078e86d1aa8SWill Deacon static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
2079e86d1aa8SWill Deacon 
2080e86d1aa8SWill Deacon static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
2081e86d1aa8SWill Deacon {
2082e86d1aa8SWill Deacon 	u32 gerror, gerrorn, active;
2083e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev;
2084e86d1aa8SWill Deacon 
2085e86d1aa8SWill Deacon 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
2086e86d1aa8SWill Deacon 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
2087e86d1aa8SWill Deacon 
2088e86d1aa8SWill Deacon 	active = gerror ^ gerrorn;
2089e86d1aa8SWill Deacon 	if (!(active & GERROR_ERR_MASK))
2090e86d1aa8SWill Deacon 		return IRQ_NONE; /* No errors pending */
2091e86d1aa8SWill Deacon 
2092e86d1aa8SWill Deacon 	dev_warn(smmu->dev,
2093e86d1aa8SWill Deacon 		 "unexpected global error reported (0x%08x), this could be serious\n",
2094e86d1aa8SWill Deacon 		 active);
2095e86d1aa8SWill Deacon 
2096e86d1aa8SWill Deacon 	if (active & GERROR_SFM_ERR) {
2097e86d1aa8SWill Deacon 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
2098e86d1aa8SWill Deacon 		arm_smmu_device_disable(smmu);
2099e86d1aa8SWill Deacon 	}
2100e86d1aa8SWill Deacon 
2101e86d1aa8SWill Deacon 	if (active & GERROR_MSI_GERROR_ABT_ERR)
2102e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
2103e86d1aa8SWill Deacon 
2104e86d1aa8SWill Deacon 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
2105e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
2106e86d1aa8SWill Deacon 
2107e86d1aa8SWill Deacon 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
2108e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
2109e86d1aa8SWill Deacon 
2110e86d1aa8SWill Deacon 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
2111e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
2112e86d1aa8SWill Deacon 
2113e86d1aa8SWill Deacon 	if (active & GERROR_PRIQ_ABT_ERR)
2114e86d1aa8SWill Deacon 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
2115e86d1aa8SWill Deacon 
2116e86d1aa8SWill Deacon 	if (active & GERROR_EVTQ_ABT_ERR)
2117e86d1aa8SWill Deacon 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
2118e86d1aa8SWill Deacon 
2119e86d1aa8SWill Deacon 	if (active & GERROR_CMDQ_ERR)
2120e86d1aa8SWill Deacon 		arm_smmu_cmdq_skip_err(smmu);
2121e86d1aa8SWill Deacon 
2122e86d1aa8SWill Deacon 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
2123e86d1aa8SWill Deacon 	return IRQ_HANDLED;
2124e86d1aa8SWill Deacon }
2125e86d1aa8SWill Deacon 
2126e86d1aa8SWill Deacon static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
2127e86d1aa8SWill Deacon {
2128e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev;
2129e86d1aa8SWill Deacon 
2130e86d1aa8SWill Deacon 	arm_smmu_evtq_thread(irq, dev);
2131e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2132e86d1aa8SWill Deacon 		arm_smmu_priq_thread(irq, dev);
2133e86d1aa8SWill Deacon 
2134e86d1aa8SWill Deacon 	return IRQ_HANDLED;
2135e86d1aa8SWill Deacon }
2136e86d1aa8SWill Deacon 
2137e86d1aa8SWill Deacon static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
2138e86d1aa8SWill Deacon {
2139e86d1aa8SWill Deacon 	arm_smmu_gerror_handler(irq, dev);
2140e86d1aa8SWill Deacon 	return IRQ_WAKE_THREAD;
2141e86d1aa8SWill Deacon }
2142e86d1aa8SWill Deacon 
2143e86d1aa8SWill Deacon static void
2144e86d1aa8SWill Deacon arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
2145e86d1aa8SWill Deacon 			struct arm_smmu_cmdq_ent *cmd)
2146e86d1aa8SWill Deacon {
2147e86d1aa8SWill Deacon 	size_t log2_span;
2148e86d1aa8SWill Deacon 	size_t span_mask;
2149e86d1aa8SWill Deacon 	/* ATC invalidates are always on 4096-bytes pages */
2150e86d1aa8SWill Deacon 	size_t inval_grain_shift = 12;
2151e86d1aa8SWill Deacon 	unsigned long page_start, page_end;
2152e86d1aa8SWill Deacon 
2153e86d1aa8SWill Deacon 	*cmd = (struct arm_smmu_cmdq_ent) {
2154e86d1aa8SWill Deacon 		.opcode			= CMDQ_OP_ATC_INV,
2155e86d1aa8SWill Deacon 		.substream_valid	= !!ssid,
2156e86d1aa8SWill Deacon 		.atc.ssid		= ssid,
2157e86d1aa8SWill Deacon 	};
2158e86d1aa8SWill Deacon 
2159e86d1aa8SWill Deacon 	if (!size) {
2160e86d1aa8SWill Deacon 		cmd->atc.size = ATC_INV_SIZE_ALL;
2161e86d1aa8SWill Deacon 		return;
2162e86d1aa8SWill Deacon 	}
2163e86d1aa8SWill Deacon 
2164e86d1aa8SWill Deacon 	page_start	= iova >> inval_grain_shift;
2165e86d1aa8SWill Deacon 	page_end	= (iova + size - 1) >> inval_grain_shift;
2166e86d1aa8SWill Deacon 
2167e86d1aa8SWill Deacon 	/*
2168e86d1aa8SWill Deacon 	 * In an ATS Invalidate Request, the address must be aligned on the
2169e86d1aa8SWill Deacon 	 * range size, which must be a power of two number of page sizes. We
2170e86d1aa8SWill Deacon 	 * thus have to choose between grossly over-invalidating the region, or
2171e86d1aa8SWill Deacon 	 * splitting the invalidation into multiple commands. For simplicity
2172e86d1aa8SWill Deacon 	 * we'll go with the first solution, but should refine it in the future
2173e86d1aa8SWill Deacon 	 * if multiple commands are shown to be more efficient.
2174e86d1aa8SWill Deacon 	 *
2175e86d1aa8SWill Deacon 	 * Find the smallest power of two that covers the range. The most
2176e86d1aa8SWill Deacon 	 * significant differing bit between the start and end addresses,
2177e86d1aa8SWill Deacon 	 * fls(start ^ end), indicates the required span. For example:
2178e86d1aa8SWill Deacon 	 *
2179e86d1aa8SWill Deacon 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
2180e86d1aa8SWill Deacon 	 *		x = 0b1000 ^ 0b1011 = 0b11
2181e86d1aa8SWill Deacon 	 *		span = 1 << fls(x) = 4
2182e86d1aa8SWill Deacon 	 *
2183e86d1aa8SWill Deacon 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
2184e86d1aa8SWill Deacon 	 *		x = 0b0111 ^ 0b1010 = 0b1101
2185e86d1aa8SWill Deacon 	 *		span = 1 << fls(x) = 16
2186e86d1aa8SWill Deacon 	 */
2187e86d1aa8SWill Deacon 	log2_span	= fls_long(page_start ^ page_end);
2188e86d1aa8SWill Deacon 	span_mask	= (1ULL << log2_span) - 1;
2189e86d1aa8SWill Deacon 
2190e86d1aa8SWill Deacon 	page_start	&= ~span_mask;
2191e86d1aa8SWill Deacon 
2192e86d1aa8SWill Deacon 	cmd->atc.addr	= page_start << inval_grain_shift;
2193e86d1aa8SWill Deacon 	cmd->atc.size	= log2_span;
2194e86d1aa8SWill Deacon }
2195e86d1aa8SWill Deacon 
2196e86d1aa8SWill Deacon static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
2197e86d1aa8SWill Deacon {
2198e86d1aa8SWill Deacon 	int i;
2199e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd;
2200e86d1aa8SWill Deacon 
2201e86d1aa8SWill Deacon 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
2202e86d1aa8SWill Deacon 
2203e86d1aa8SWill Deacon 	for (i = 0; i < master->num_sids; i++) {
2204e86d1aa8SWill Deacon 		cmd.atc.sid = master->sids[i];
2205e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
2206e86d1aa8SWill Deacon 	}
2207e86d1aa8SWill Deacon 
2208e86d1aa8SWill Deacon 	return arm_smmu_cmdq_issue_sync(master->smmu);
2209e86d1aa8SWill Deacon }
2210e86d1aa8SWill Deacon 
2211e86d1aa8SWill Deacon static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2212e86d1aa8SWill Deacon 				   int ssid, unsigned long iova, size_t size)
2213e86d1aa8SWill Deacon {
2214e86d1aa8SWill Deacon 	int i;
2215e86d1aa8SWill Deacon 	unsigned long flags;
2216e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd;
2217e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
2218e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_batch cmds = {};
2219e86d1aa8SWill Deacon 
2220e86d1aa8SWill Deacon 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2221e86d1aa8SWill Deacon 		return 0;
2222e86d1aa8SWill Deacon 
2223e86d1aa8SWill Deacon 	/*
2224e86d1aa8SWill Deacon 	 * Ensure that we've completed prior invalidation of the main TLBs
2225e86d1aa8SWill Deacon 	 * before we read 'nr_ats_masters' in case of a concurrent call to
2226e86d1aa8SWill Deacon 	 * arm_smmu_enable_ats():
2227e86d1aa8SWill Deacon 	 *
2228e86d1aa8SWill Deacon 	 *	// unmap()			// arm_smmu_enable_ats()
2229e86d1aa8SWill Deacon 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
2230e86d1aa8SWill Deacon 	 *	smp_mb();			[...]
2231e86d1aa8SWill Deacon 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
2232e86d1aa8SWill Deacon 	 *
2233e86d1aa8SWill Deacon 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
2234e86d1aa8SWill Deacon 	 * ATS was enabled at the PCI device before completion of the TLBI.
2235e86d1aa8SWill Deacon 	 */
2236e86d1aa8SWill Deacon 	smp_mb();
2237e86d1aa8SWill Deacon 	if (!atomic_read(&smmu_domain->nr_ats_masters))
2238e86d1aa8SWill Deacon 		return 0;
2239e86d1aa8SWill Deacon 
2240e86d1aa8SWill Deacon 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
2241e86d1aa8SWill Deacon 
2242e86d1aa8SWill Deacon 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2243e86d1aa8SWill Deacon 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
2244e86d1aa8SWill Deacon 		if (!master->ats_enabled)
2245e86d1aa8SWill Deacon 			continue;
2246e86d1aa8SWill Deacon 
2247e86d1aa8SWill Deacon 		for (i = 0; i < master->num_sids; i++) {
2248e86d1aa8SWill Deacon 			cmd.atc.sid = master->sids[i];
2249e86d1aa8SWill Deacon 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
2250e86d1aa8SWill Deacon 		}
2251e86d1aa8SWill Deacon 	}
2252e86d1aa8SWill Deacon 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2253e86d1aa8SWill Deacon 
2254e86d1aa8SWill Deacon 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
2255e86d1aa8SWill Deacon }
2256e86d1aa8SWill Deacon 
2257e86d1aa8SWill Deacon /* IO_PGTABLE API */
2258e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_context(void *cookie)
2259e86d1aa8SWill Deacon {
2260e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = cookie;
2261e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2262e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd;
2263e86d1aa8SWill Deacon 
2264e86d1aa8SWill Deacon 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2265e86d1aa8SWill Deacon 		cmd.opcode	= CMDQ_OP_TLBI_NH_ASID;
2266e86d1aa8SWill Deacon 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
2267e86d1aa8SWill Deacon 		cmd.tlbi.vmid	= 0;
2268e86d1aa8SWill Deacon 	} else {
2269e86d1aa8SWill Deacon 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
2270e86d1aa8SWill Deacon 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2271e86d1aa8SWill Deacon 	}
2272e86d1aa8SWill Deacon 
2273e86d1aa8SWill Deacon 	/*
2274e86d1aa8SWill Deacon 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2275e86d1aa8SWill Deacon 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
2276e86d1aa8SWill Deacon 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2277e86d1aa8SWill Deacon 	 * insertion to guarantee those are observed before the TLBI. Do be
2278e86d1aa8SWill Deacon 	 * careful, 007.
2279e86d1aa8SWill Deacon 	 */
2280e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2281e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_sync(smmu);
2282e86d1aa8SWill Deacon 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2283e86d1aa8SWill Deacon }
2284e86d1aa8SWill Deacon 
2285e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
2286e86d1aa8SWill Deacon 				   size_t granule, bool leaf,
2287e86d1aa8SWill Deacon 				   struct arm_smmu_domain *smmu_domain)
2288e86d1aa8SWill Deacon {
2289e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2290e86d1aa8SWill Deacon 	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
2291e86d1aa8SWill Deacon 	size_t inv_range = granule;
2292e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_batch cmds = {};
2293e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd = {
2294e86d1aa8SWill Deacon 		.tlbi = {
2295e86d1aa8SWill Deacon 			.leaf	= leaf,
2296e86d1aa8SWill Deacon 		},
2297e86d1aa8SWill Deacon 	};
2298e86d1aa8SWill Deacon 
2299e86d1aa8SWill Deacon 	if (!size)
2300e86d1aa8SWill Deacon 		return;
2301e86d1aa8SWill Deacon 
2302e86d1aa8SWill Deacon 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2303e86d1aa8SWill Deacon 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
2304e86d1aa8SWill Deacon 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
2305e86d1aa8SWill Deacon 	} else {
2306e86d1aa8SWill Deacon 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
2307e86d1aa8SWill Deacon 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2308e86d1aa8SWill Deacon 	}
2309e86d1aa8SWill Deacon 
2310e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2311e86d1aa8SWill Deacon 		/* Get the leaf page size */
2312e86d1aa8SWill Deacon 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2313e86d1aa8SWill Deacon 
2314e86d1aa8SWill Deacon 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
2315e86d1aa8SWill Deacon 		cmd.tlbi.tg = (tg - 10) / 2;
2316e86d1aa8SWill Deacon 
2317e86d1aa8SWill Deacon 		/* Determine what level the granule is at */
2318e86d1aa8SWill Deacon 		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2319e86d1aa8SWill Deacon 
2320e86d1aa8SWill Deacon 		num_pages = size >> tg;
2321e86d1aa8SWill Deacon 	}
2322e86d1aa8SWill Deacon 
2323e86d1aa8SWill Deacon 	while (iova < end) {
2324e86d1aa8SWill Deacon 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2325e86d1aa8SWill Deacon 			/*
2326e86d1aa8SWill Deacon 			 * On each iteration of the loop, the range is 5 bits
2327e86d1aa8SWill Deacon 			 * worth of the aligned size remaining.
2328e86d1aa8SWill Deacon 			 * The range in pages is:
2329e86d1aa8SWill Deacon 			 *
2330e86d1aa8SWill Deacon 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
2331e86d1aa8SWill Deacon 			 */
2332e86d1aa8SWill Deacon 			unsigned long scale, num;
2333e86d1aa8SWill Deacon 
2334e86d1aa8SWill Deacon 			/* Determine the power of 2 multiple number of pages */
2335e86d1aa8SWill Deacon 			scale = __ffs(num_pages);
2336e86d1aa8SWill Deacon 			cmd.tlbi.scale = scale;
2337e86d1aa8SWill Deacon 
2338e86d1aa8SWill Deacon 			/* Determine how many chunks of 2^scale size we have */
2339e86d1aa8SWill Deacon 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2340e86d1aa8SWill Deacon 			cmd.tlbi.num = num - 1;
2341e86d1aa8SWill Deacon 
2342e86d1aa8SWill Deacon 			/* range is num * 2^scale * pgsize */
2343e86d1aa8SWill Deacon 			inv_range = num << (scale + tg);
2344e86d1aa8SWill Deacon 
2345e86d1aa8SWill Deacon 			/* Clear out the lower order bits for the next iteration */
2346e86d1aa8SWill Deacon 			num_pages -= num << scale;
2347e86d1aa8SWill Deacon 		}
2348e86d1aa8SWill Deacon 
2349e86d1aa8SWill Deacon 		cmd.tlbi.addr = iova;
2350e86d1aa8SWill Deacon 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
2351e86d1aa8SWill Deacon 		iova += inv_range;
2352e86d1aa8SWill Deacon 	}
2353e86d1aa8SWill Deacon 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
2354e86d1aa8SWill Deacon 
2355e86d1aa8SWill Deacon 	/*
2356e86d1aa8SWill Deacon 	 * Unfortunately, this can't be leaf-only since we may have
2357e86d1aa8SWill Deacon 	 * zapped an entire table.
2358e86d1aa8SWill Deacon 	 */
2359e86d1aa8SWill Deacon 	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
2360e86d1aa8SWill Deacon }
2361e86d1aa8SWill Deacon 
2362e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2363e86d1aa8SWill Deacon 					 unsigned long iova, size_t granule,
2364e86d1aa8SWill Deacon 					 void *cookie)
2365e86d1aa8SWill Deacon {
2366e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = cookie;
2367e86d1aa8SWill Deacon 	struct iommu_domain *domain = &smmu_domain->domain;
2368e86d1aa8SWill Deacon 
2369e86d1aa8SWill Deacon 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2370e86d1aa8SWill Deacon }
2371e86d1aa8SWill Deacon 
2372e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2373e86d1aa8SWill Deacon 				  size_t granule, void *cookie)
2374e86d1aa8SWill Deacon {
2375e86d1aa8SWill Deacon 	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
2376e86d1aa8SWill Deacon }
2377e86d1aa8SWill Deacon 
2378e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
2379e86d1aa8SWill Deacon 				  size_t granule, void *cookie)
2380e86d1aa8SWill Deacon {
2381e86d1aa8SWill Deacon 	arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
2382e86d1aa8SWill Deacon }
2383e86d1aa8SWill Deacon 
2384e86d1aa8SWill Deacon static const struct iommu_flush_ops arm_smmu_flush_ops = {
2385e86d1aa8SWill Deacon 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2386e86d1aa8SWill Deacon 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2387e86d1aa8SWill Deacon 	.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
2388e86d1aa8SWill Deacon 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2389e86d1aa8SWill Deacon };
2390e86d1aa8SWill Deacon 
2391e86d1aa8SWill Deacon /* IOMMU API */
2392e86d1aa8SWill Deacon static bool arm_smmu_capable(enum iommu_cap cap)
2393e86d1aa8SWill Deacon {
2394e86d1aa8SWill Deacon 	switch (cap) {
2395e86d1aa8SWill Deacon 	case IOMMU_CAP_CACHE_COHERENCY:
2396e86d1aa8SWill Deacon 		return true;
2397e86d1aa8SWill Deacon 	case IOMMU_CAP_NOEXEC:
2398e86d1aa8SWill Deacon 		return true;
2399e86d1aa8SWill Deacon 	default:
2400e86d1aa8SWill Deacon 		return false;
2401e86d1aa8SWill Deacon 	}
2402e86d1aa8SWill Deacon }
2403e86d1aa8SWill Deacon 
2404e86d1aa8SWill Deacon static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2405e86d1aa8SWill Deacon {
2406e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain;
2407e86d1aa8SWill Deacon 
2408e86d1aa8SWill Deacon 	if (type != IOMMU_DOMAIN_UNMANAGED &&
2409e86d1aa8SWill Deacon 	    type != IOMMU_DOMAIN_DMA &&
2410e86d1aa8SWill Deacon 	    type != IOMMU_DOMAIN_IDENTITY)
2411e86d1aa8SWill Deacon 		return NULL;
2412e86d1aa8SWill Deacon 
2413e86d1aa8SWill Deacon 	/*
2414e86d1aa8SWill Deacon 	 * Allocate the domain and initialise some of its data structures.
2415e86d1aa8SWill Deacon 	 * We can't really do anything meaningful until we've added a
2416e86d1aa8SWill Deacon 	 * master.
2417e86d1aa8SWill Deacon 	 */
2418e86d1aa8SWill Deacon 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2419e86d1aa8SWill Deacon 	if (!smmu_domain)
2420e86d1aa8SWill Deacon 		return NULL;
2421e86d1aa8SWill Deacon 
2422e86d1aa8SWill Deacon 	if (type == IOMMU_DOMAIN_DMA &&
2423e86d1aa8SWill Deacon 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
2424e86d1aa8SWill Deacon 		kfree(smmu_domain);
2425e86d1aa8SWill Deacon 		return NULL;
2426e86d1aa8SWill Deacon 	}
2427e86d1aa8SWill Deacon 
2428e86d1aa8SWill Deacon 	mutex_init(&smmu_domain->init_mutex);
2429e86d1aa8SWill Deacon 	INIT_LIST_HEAD(&smmu_domain->devices);
2430e86d1aa8SWill Deacon 	spin_lock_init(&smmu_domain->devices_lock);
2431e86d1aa8SWill Deacon 
2432e86d1aa8SWill Deacon 	return &smmu_domain->domain;
2433e86d1aa8SWill Deacon }
2434e86d1aa8SWill Deacon 
2435e86d1aa8SWill Deacon static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2436e86d1aa8SWill Deacon {
2437e86d1aa8SWill Deacon 	int idx, size = 1 << span;
2438e86d1aa8SWill Deacon 
2439e86d1aa8SWill Deacon 	do {
2440e86d1aa8SWill Deacon 		idx = find_first_zero_bit(map, size);
2441e86d1aa8SWill Deacon 		if (idx == size)
2442e86d1aa8SWill Deacon 			return -ENOSPC;
2443e86d1aa8SWill Deacon 	} while (test_and_set_bit(idx, map));
2444e86d1aa8SWill Deacon 
2445e86d1aa8SWill Deacon 	return idx;
2446e86d1aa8SWill Deacon }
2447e86d1aa8SWill Deacon 
2448e86d1aa8SWill Deacon static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2449e86d1aa8SWill Deacon {
2450e86d1aa8SWill Deacon 	clear_bit(idx, map);
2451e86d1aa8SWill Deacon }
2452e86d1aa8SWill Deacon 
2453e86d1aa8SWill Deacon static void arm_smmu_domain_free(struct iommu_domain *domain)
2454e86d1aa8SWill Deacon {
2455e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2456e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2457e86d1aa8SWill Deacon 
2458e86d1aa8SWill Deacon 	iommu_put_dma_cookie(domain);
2459e86d1aa8SWill Deacon 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2460e86d1aa8SWill Deacon 
2461e86d1aa8SWill Deacon 	/* Free the CD and ASID, if we allocated them */
2462e86d1aa8SWill Deacon 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2463e86d1aa8SWill Deacon 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2464e86d1aa8SWill Deacon 
2465e86d1aa8SWill Deacon 		if (cfg->cdcfg.cdtab)
2466e86d1aa8SWill Deacon 			arm_smmu_free_cd_tables(smmu_domain);
2467e86d1aa8SWill Deacon 		arm_smmu_free_asid(&cfg->cd);
2468e86d1aa8SWill Deacon 	} else {
2469e86d1aa8SWill Deacon 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2470e86d1aa8SWill Deacon 		if (cfg->vmid)
2471e86d1aa8SWill Deacon 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2472e86d1aa8SWill Deacon 	}
2473e86d1aa8SWill Deacon 
2474e86d1aa8SWill Deacon 	kfree(smmu_domain);
2475e86d1aa8SWill Deacon }
2476e86d1aa8SWill Deacon 
2477e86d1aa8SWill Deacon static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2478e86d1aa8SWill Deacon 				       struct arm_smmu_master *master,
2479e86d1aa8SWill Deacon 				       struct io_pgtable_cfg *pgtbl_cfg)
2480e86d1aa8SWill Deacon {
2481e86d1aa8SWill Deacon 	int ret;
2482e86d1aa8SWill Deacon 	u32 asid;
2483e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2484e86d1aa8SWill Deacon 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2485e86d1aa8SWill Deacon 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2486e86d1aa8SWill Deacon 
2487e86d1aa8SWill Deacon 	ret = xa_alloc(&asid_xa, &asid, &cfg->cd,
2488e86d1aa8SWill Deacon 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2489e86d1aa8SWill Deacon 	if (ret)
2490e86d1aa8SWill Deacon 		return ret;
2491e86d1aa8SWill Deacon 
2492e86d1aa8SWill Deacon 	cfg->s1cdmax = master->ssid_bits;
2493e86d1aa8SWill Deacon 
2494e86d1aa8SWill Deacon 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2495e86d1aa8SWill Deacon 	if (ret)
2496e86d1aa8SWill Deacon 		goto out_free_asid;
2497e86d1aa8SWill Deacon 
2498e86d1aa8SWill Deacon 	cfg->cd.asid	= (u16)asid;
2499e86d1aa8SWill Deacon 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2500e86d1aa8SWill Deacon 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2501e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2502e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2503e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2504e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2505e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2506e86d1aa8SWill Deacon 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2507e86d1aa8SWill Deacon 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2508e86d1aa8SWill Deacon 
2509e86d1aa8SWill Deacon 	/*
2510e86d1aa8SWill Deacon 	 * Note that this will end up calling arm_smmu_sync_cd() before
2511e86d1aa8SWill Deacon 	 * the master has been added to the devices list for this domain.
2512e86d1aa8SWill Deacon 	 * This isn't an issue because the STE hasn't been installed yet.
2513e86d1aa8SWill Deacon 	 */
2514e86d1aa8SWill Deacon 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2515e86d1aa8SWill Deacon 	if (ret)
2516e86d1aa8SWill Deacon 		goto out_free_cd_tables;
2517e86d1aa8SWill Deacon 
2518e86d1aa8SWill Deacon 	return 0;
2519e86d1aa8SWill Deacon 
2520e86d1aa8SWill Deacon out_free_cd_tables:
2521e86d1aa8SWill Deacon 	arm_smmu_free_cd_tables(smmu_domain);
2522e86d1aa8SWill Deacon out_free_asid:
2523e86d1aa8SWill Deacon 	arm_smmu_free_asid(&cfg->cd);
2524e86d1aa8SWill Deacon 	return ret;
2525e86d1aa8SWill Deacon }
2526e86d1aa8SWill Deacon 
2527e86d1aa8SWill Deacon static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2528e86d1aa8SWill Deacon 				       struct arm_smmu_master *master,
2529e86d1aa8SWill Deacon 				       struct io_pgtable_cfg *pgtbl_cfg)
2530e86d1aa8SWill Deacon {
2531e86d1aa8SWill Deacon 	int vmid;
2532e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2533e86d1aa8SWill Deacon 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2534e86d1aa8SWill Deacon 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2535e86d1aa8SWill Deacon 
2536e86d1aa8SWill Deacon 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2537e86d1aa8SWill Deacon 	if (vmid < 0)
2538e86d1aa8SWill Deacon 		return vmid;
2539e86d1aa8SWill Deacon 
2540e86d1aa8SWill Deacon 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2541e86d1aa8SWill Deacon 	cfg->vmid	= (u16)vmid;
2542e86d1aa8SWill Deacon 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2543e86d1aa8SWill Deacon 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2544e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2545e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2546e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2547e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2548e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2549e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2550e86d1aa8SWill Deacon 	return 0;
2551e86d1aa8SWill Deacon }
2552e86d1aa8SWill Deacon 
2553e86d1aa8SWill Deacon static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2554e86d1aa8SWill Deacon 				    struct arm_smmu_master *master)
2555e86d1aa8SWill Deacon {
2556e86d1aa8SWill Deacon 	int ret;
2557e86d1aa8SWill Deacon 	unsigned long ias, oas;
2558e86d1aa8SWill Deacon 	enum io_pgtable_fmt fmt;
2559e86d1aa8SWill Deacon 	struct io_pgtable_cfg pgtbl_cfg;
2560e86d1aa8SWill Deacon 	struct io_pgtable_ops *pgtbl_ops;
2561e86d1aa8SWill Deacon 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2562e86d1aa8SWill Deacon 				 struct arm_smmu_master *,
2563e86d1aa8SWill Deacon 				 struct io_pgtable_cfg *);
2564e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2565e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2566e86d1aa8SWill Deacon 
2567e86d1aa8SWill Deacon 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2568e86d1aa8SWill Deacon 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2569e86d1aa8SWill Deacon 		return 0;
2570e86d1aa8SWill Deacon 	}
2571e86d1aa8SWill Deacon 
2572e86d1aa8SWill Deacon 	/* Restrict the stage to what we can actually support */
2573e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2574e86d1aa8SWill Deacon 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2575e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2576e86d1aa8SWill Deacon 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2577e86d1aa8SWill Deacon 
2578e86d1aa8SWill Deacon 	switch (smmu_domain->stage) {
2579e86d1aa8SWill Deacon 	case ARM_SMMU_DOMAIN_S1:
2580e86d1aa8SWill Deacon 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2581e86d1aa8SWill Deacon 		ias = min_t(unsigned long, ias, VA_BITS);
2582e86d1aa8SWill Deacon 		oas = smmu->ias;
2583e86d1aa8SWill Deacon 		fmt = ARM_64_LPAE_S1;
2584e86d1aa8SWill Deacon 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2585e86d1aa8SWill Deacon 		break;
2586e86d1aa8SWill Deacon 	case ARM_SMMU_DOMAIN_NESTED:
2587e86d1aa8SWill Deacon 	case ARM_SMMU_DOMAIN_S2:
2588e86d1aa8SWill Deacon 		ias = smmu->ias;
2589e86d1aa8SWill Deacon 		oas = smmu->oas;
2590e86d1aa8SWill Deacon 		fmt = ARM_64_LPAE_S2;
2591e86d1aa8SWill Deacon 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2592e86d1aa8SWill Deacon 		break;
2593e86d1aa8SWill Deacon 	default:
2594e86d1aa8SWill Deacon 		return -EINVAL;
2595e86d1aa8SWill Deacon 	}
2596e86d1aa8SWill Deacon 
2597e86d1aa8SWill Deacon 	pgtbl_cfg = (struct io_pgtable_cfg) {
2598e86d1aa8SWill Deacon 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2599e86d1aa8SWill Deacon 		.ias		= ias,
2600e86d1aa8SWill Deacon 		.oas		= oas,
2601e86d1aa8SWill Deacon 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2602e86d1aa8SWill Deacon 		.tlb		= &arm_smmu_flush_ops,
2603e86d1aa8SWill Deacon 		.iommu_dev	= smmu->dev,
2604e86d1aa8SWill Deacon 	};
2605e86d1aa8SWill Deacon 
2606e86d1aa8SWill Deacon 	if (smmu_domain->non_strict)
2607e86d1aa8SWill Deacon 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2608e86d1aa8SWill Deacon 
2609e86d1aa8SWill Deacon 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2610e86d1aa8SWill Deacon 	if (!pgtbl_ops)
2611e86d1aa8SWill Deacon 		return -ENOMEM;
2612e86d1aa8SWill Deacon 
2613e86d1aa8SWill Deacon 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2614e86d1aa8SWill Deacon 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2615e86d1aa8SWill Deacon 	domain->geometry.force_aperture = true;
2616e86d1aa8SWill Deacon 
2617e86d1aa8SWill Deacon 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2618e86d1aa8SWill Deacon 	if (ret < 0) {
2619e86d1aa8SWill Deacon 		free_io_pgtable_ops(pgtbl_ops);
2620e86d1aa8SWill Deacon 		return ret;
2621e86d1aa8SWill Deacon 	}
2622e86d1aa8SWill Deacon 
2623e86d1aa8SWill Deacon 	smmu_domain->pgtbl_ops = pgtbl_ops;
2624e86d1aa8SWill Deacon 	return 0;
2625e86d1aa8SWill Deacon }
2626e86d1aa8SWill Deacon 
2627e86d1aa8SWill Deacon static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2628e86d1aa8SWill Deacon {
2629e86d1aa8SWill Deacon 	__le64 *step;
2630e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2631e86d1aa8SWill Deacon 
2632e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2633e86d1aa8SWill Deacon 		struct arm_smmu_strtab_l1_desc *l1_desc;
2634e86d1aa8SWill Deacon 		int idx;
2635e86d1aa8SWill Deacon 
2636e86d1aa8SWill Deacon 		/* Two-level walk */
2637e86d1aa8SWill Deacon 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2638e86d1aa8SWill Deacon 		l1_desc = &cfg->l1_desc[idx];
2639e86d1aa8SWill Deacon 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2640e86d1aa8SWill Deacon 		step = &l1_desc->l2ptr[idx];
2641e86d1aa8SWill Deacon 	} else {
2642e86d1aa8SWill Deacon 		/* Simple linear lookup */
2643e86d1aa8SWill Deacon 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2644e86d1aa8SWill Deacon 	}
2645e86d1aa8SWill Deacon 
2646e86d1aa8SWill Deacon 	return step;
2647e86d1aa8SWill Deacon }
2648e86d1aa8SWill Deacon 
2649e86d1aa8SWill Deacon static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2650e86d1aa8SWill Deacon {
2651e86d1aa8SWill Deacon 	int i, j;
2652e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = master->smmu;
2653e86d1aa8SWill Deacon 
2654e86d1aa8SWill Deacon 	for (i = 0; i < master->num_sids; ++i) {
2655e86d1aa8SWill Deacon 		u32 sid = master->sids[i];
2656e86d1aa8SWill Deacon 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2657e86d1aa8SWill Deacon 
2658e86d1aa8SWill Deacon 		/* Bridged PCI devices may end up with duplicated IDs */
2659e86d1aa8SWill Deacon 		for (j = 0; j < i; j++)
2660e86d1aa8SWill Deacon 			if (master->sids[j] == sid)
2661e86d1aa8SWill Deacon 				break;
2662e86d1aa8SWill Deacon 		if (j < i)
2663e86d1aa8SWill Deacon 			continue;
2664e86d1aa8SWill Deacon 
2665e86d1aa8SWill Deacon 		arm_smmu_write_strtab_ent(master, sid, step);
2666e86d1aa8SWill Deacon 	}
2667e86d1aa8SWill Deacon }
2668e86d1aa8SWill Deacon 
2669e86d1aa8SWill Deacon static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2670e86d1aa8SWill Deacon {
2671e86d1aa8SWill Deacon 	struct device *dev = master->dev;
2672e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = master->smmu;
2673e86d1aa8SWill Deacon 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2674e86d1aa8SWill Deacon 
2675e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2676e86d1aa8SWill Deacon 		return false;
2677e86d1aa8SWill Deacon 
2678e86d1aa8SWill Deacon 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2679e86d1aa8SWill Deacon 		return false;
2680e86d1aa8SWill Deacon 
2681e86d1aa8SWill Deacon 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2682e86d1aa8SWill Deacon }
2683e86d1aa8SWill Deacon 
2684e86d1aa8SWill Deacon static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2685e86d1aa8SWill Deacon {
2686e86d1aa8SWill Deacon 	size_t stu;
2687e86d1aa8SWill Deacon 	struct pci_dev *pdev;
2688e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = master->smmu;
2689e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = master->domain;
2690e86d1aa8SWill Deacon 
2691e86d1aa8SWill Deacon 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2692e86d1aa8SWill Deacon 	if (!master->ats_enabled)
2693e86d1aa8SWill Deacon 		return;
2694e86d1aa8SWill Deacon 
2695e86d1aa8SWill Deacon 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2696e86d1aa8SWill Deacon 	stu = __ffs(smmu->pgsize_bitmap);
2697e86d1aa8SWill Deacon 	pdev = to_pci_dev(master->dev);
2698e86d1aa8SWill Deacon 
2699e86d1aa8SWill Deacon 	atomic_inc(&smmu_domain->nr_ats_masters);
2700e86d1aa8SWill Deacon 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2701e86d1aa8SWill Deacon 	if (pci_enable_ats(pdev, stu))
2702e86d1aa8SWill Deacon 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2703e86d1aa8SWill Deacon }
2704e86d1aa8SWill Deacon 
2705e86d1aa8SWill Deacon static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2706e86d1aa8SWill Deacon {
2707e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = master->domain;
2708e86d1aa8SWill Deacon 
2709e86d1aa8SWill Deacon 	if (!master->ats_enabled)
2710e86d1aa8SWill Deacon 		return;
2711e86d1aa8SWill Deacon 
2712e86d1aa8SWill Deacon 	pci_disable_ats(to_pci_dev(master->dev));
2713e86d1aa8SWill Deacon 	/*
2714e86d1aa8SWill Deacon 	 * Ensure ATS is disabled at the endpoint before we issue the
2715e86d1aa8SWill Deacon 	 * ATC invalidation via the SMMU.
2716e86d1aa8SWill Deacon 	 */
2717e86d1aa8SWill Deacon 	wmb();
2718e86d1aa8SWill Deacon 	arm_smmu_atc_inv_master(master);
2719e86d1aa8SWill Deacon 	atomic_dec(&smmu_domain->nr_ats_masters);
2720e86d1aa8SWill Deacon }
2721e86d1aa8SWill Deacon 
2722e86d1aa8SWill Deacon static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2723e86d1aa8SWill Deacon {
2724e86d1aa8SWill Deacon 	int ret;
2725e86d1aa8SWill Deacon 	int features;
2726e86d1aa8SWill Deacon 	int num_pasids;
2727e86d1aa8SWill Deacon 	struct pci_dev *pdev;
2728e86d1aa8SWill Deacon 
2729e86d1aa8SWill Deacon 	if (!dev_is_pci(master->dev))
2730e86d1aa8SWill Deacon 		return -ENODEV;
2731e86d1aa8SWill Deacon 
2732e86d1aa8SWill Deacon 	pdev = to_pci_dev(master->dev);
2733e86d1aa8SWill Deacon 
2734e86d1aa8SWill Deacon 	features = pci_pasid_features(pdev);
2735e86d1aa8SWill Deacon 	if (features < 0)
2736e86d1aa8SWill Deacon 		return features;
2737e86d1aa8SWill Deacon 
2738e86d1aa8SWill Deacon 	num_pasids = pci_max_pasids(pdev);
2739e86d1aa8SWill Deacon 	if (num_pasids <= 0)
2740e86d1aa8SWill Deacon 		return num_pasids;
2741e86d1aa8SWill Deacon 
2742e86d1aa8SWill Deacon 	ret = pci_enable_pasid(pdev, features);
2743e86d1aa8SWill Deacon 	if (ret) {
2744e86d1aa8SWill Deacon 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2745e86d1aa8SWill Deacon 		return ret;
2746e86d1aa8SWill Deacon 	}
2747e86d1aa8SWill Deacon 
2748e86d1aa8SWill Deacon 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2749e86d1aa8SWill Deacon 				  master->smmu->ssid_bits);
2750e86d1aa8SWill Deacon 	return 0;
2751e86d1aa8SWill Deacon }
2752e86d1aa8SWill Deacon 
2753e86d1aa8SWill Deacon static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2754e86d1aa8SWill Deacon {
2755e86d1aa8SWill Deacon 	struct pci_dev *pdev;
2756e86d1aa8SWill Deacon 
2757e86d1aa8SWill Deacon 	if (!dev_is_pci(master->dev))
2758e86d1aa8SWill Deacon 		return;
2759e86d1aa8SWill Deacon 
2760e86d1aa8SWill Deacon 	pdev = to_pci_dev(master->dev);
2761e86d1aa8SWill Deacon 
2762e86d1aa8SWill Deacon 	if (!pdev->pasid_enabled)
2763e86d1aa8SWill Deacon 		return;
2764e86d1aa8SWill Deacon 
2765e86d1aa8SWill Deacon 	master->ssid_bits = 0;
2766e86d1aa8SWill Deacon 	pci_disable_pasid(pdev);
2767e86d1aa8SWill Deacon }
2768e86d1aa8SWill Deacon 
2769e86d1aa8SWill Deacon static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2770e86d1aa8SWill Deacon {
2771e86d1aa8SWill Deacon 	unsigned long flags;
2772e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = master->domain;
2773e86d1aa8SWill Deacon 
2774e86d1aa8SWill Deacon 	if (!smmu_domain)
2775e86d1aa8SWill Deacon 		return;
2776e86d1aa8SWill Deacon 
2777e86d1aa8SWill Deacon 	arm_smmu_disable_ats(master);
2778e86d1aa8SWill Deacon 
2779e86d1aa8SWill Deacon 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2780e86d1aa8SWill Deacon 	list_del(&master->domain_head);
2781e86d1aa8SWill Deacon 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2782e86d1aa8SWill Deacon 
2783e86d1aa8SWill Deacon 	master->domain = NULL;
2784e86d1aa8SWill Deacon 	master->ats_enabled = false;
2785e86d1aa8SWill Deacon 	arm_smmu_install_ste_for_dev(master);
2786e86d1aa8SWill Deacon }
2787e86d1aa8SWill Deacon 
2788e86d1aa8SWill Deacon static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2789e86d1aa8SWill Deacon {
2790e86d1aa8SWill Deacon 	int ret = 0;
2791e86d1aa8SWill Deacon 	unsigned long flags;
2792e86d1aa8SWill Deacon 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2793e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu;
2794e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2795e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
2796e86d1aa8SWill Deacon 
2797e86d1aa8SWill Deacon 	if (!fwspec)
2798e86d1aa8SWill Deacon 		return -ENOENT;
2799e86d1aa8SWill Deacon 
2800e86d1aa8SWill Deacon 	master = dev_iommu_priv_get(dev);
2801e86d1aa8SWill Deacon 	smmu = master->smmu;
2802e86d1aa8SWill Deacon 
2803e86d1aa8SWill Deacon 	arm_smmu_detach_dev(master);
2804e86d1aa8SWill Deacon 
2805e86d1aa8SWill Deacon 	mutex_lock(&smmu_domain->init_mutex);
2806e86d1aa8SWill Deacon 
2807e86d1aa8SWill Deacon 	if (!smmu_domain->smmu) {
2808e86d1aa8SWill Deacon 		smmu_domain->smmu = smmu;
2809e86d1aa8SWill Deacon 		ret = arm_smmu_domain_finalise(domain, master);
2810e86d1aa8SWill Deacon 		if (ret) {
2811e86d1aa8SWill Deacon 			smmu_domain->smmu = NULL;
2812e86d1aa8SWill Deacon 			goto out_unlock;
2813e86d1aa8SWill Deacon 		}
2814e86d1aa8SWill Deacon 	} else if (smmu_domain->smmu != smmu) {
2815e86d1aa8SWill Deacon 		dev_err(dev,
2816e86d1aa8SWill Deacon 			"cannot attach to SMMU %s (upstream of %s)\n",
2817e86d1aa8SWill Deacon 			dev_name(smmu_domain->smmu->dev),
2818e86d1aa8SWill Deacon 			dev_name(smmu->dev));
2819e86d1aa8SWill Deacon 		ret = -ENXIO;
2820e86d1aa8SWill Deacon 		goto out_unlock;
2821e86d1aa8SWill Deacon 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2822e86d1aa8SWill Deacon 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2823e86d1aa8SWill Deacon 		dev_err(dev,
2824e86d1aa8SWill Deacon 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2825e86d1aa8SWill Deacon 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2826e86d1aa8SWill Deacon 		ret = -EINVAL;
2827e86d1aa8SWill Deacon 		goto out_unlock;
2828e86d1aa8SWill Deacon 	}
2829e86d1aa8SWill Deacon 
2830e86d1aa8SWill Deacon 	master->domain = smmu_domain;
2831e86d1aa8SWill Deacon 
2832e86d1aa8SWill Deacon 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2833e86d1aa8SWill Deacon 		master->ats_enabled = arm_smmu_ats_supported(master);
2834e86d1aa8SWill Deacon 
2835e86d1aa8SWill Deacon 	arm_smmu_install_ste_for_dev(master);
2836e86d1aa8SWill Deacon 
2837e86d1aa8SWill Deacon 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2838e86d1aa8SWill Deacon 	list_add(&master->domain_head, &smmu_domain->devices);
2839e86d1aa8SWill Deacon 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2840e86d1aa8SWill Deacon 
2841e86d1aa8SWill Deacon 	arm_smmu_enable_ats(master);
2842e86d1aa8SWill Deacon 
2843e86d1aa8SWill Deacon out_unlock:
2844e86d1aa8SWill Deacon 	mutex_unlock(&smmu_domain->init_mutex);
2845e86d1aa8SWill Deacon 	return ret;
2846e86d1aa8SWill Deacon }
2847e86d1aa8SWill Deacon 
2848e86d1aa8SWill Deacon static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2849e86d1aa8SWill Deacon 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2850e86d1aa8SWill Deacon {
2851e86d1aa8SWill Deacon 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2852e86d1aa8SWill Deacon 
2853e86d1aa8SWill Deacon 	if (!ops)
2854e86d1aa8SWill Deacon 		return -ENODEV;
2855e86d1aa8SWill Deacon 
2856e46b3c0dSJoerg Roedel 	return ops->map(ops, iova, paddr, size, prot, gfp);
2857e86d1aa8SWill Deacon }
2858e86d1aa8SWill Deacon 
2859e86d1aa8SWill Deacon static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2860e86d1aa8SWill Deacon 			     size_t size, struct iommu_iotlb_gather *gather)
2861e86d1aa8SWill Deacon {
2862e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2863e86d1aa8SWill Deacon 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2864e86d1aa8SWill Deacon 
2865e86d1aa8SWill Deacon 	if (!ops)
2866e86d1aa8SWill Deacon 		return 0;
2867e86d1aa8SWill Deacon 
2868e86d1aa8SWill Deacon 	return ops->unmap(ops, iova, size, gather);
2869e86d1aa8SWill Deacon }
2870e86d1aa8SWill Deacon 
2871e86d1aa8SWill Deacon static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2872e86d1aa8SWill Deacon {
2873e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2874e86d1aa8SWill Deacon 
2875e86d1aa8SWill Deacon 	if (smmu_domain->smmu)
2876e86d1aa8SWill Deacon 		arm_smmu_tlb_inv_context(smmu_domain);
2877e86d1aa8SWill Deacon }
2878e86d1aa8SWill Deacon 
2879e86d1aa8SWill Deacon static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2880e86d1aa8SWill Deacon 				struct iommu_iotlb_gather *gather)
2881e86d1aa8SWill Deacon {
2882e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2883e86d1aa8SWill Deacon 
2884e86d1aa8SWill Deacon 	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
2885e86d1aa8SWill Deacon 			       gather->pgsize, true, smmu_domain);
2886e86d1aa8SWill Deacon }
2887e86d1aa8SWill Deacon 
2888e86d1aa8SWill Deacon static phys_addr_t
2889e86d1aa8SWill Deacon arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2890e86d1aa8SWill Deacon {
2891e86d1aa8SWill Deacon 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2892e86d1aa8SWill Deacon 
2893e86d1aa8SWill Deacon 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2894e86d1aa8SWill Deacon 		return iova;
2895e86d1aa8SWill Deacon 
2896e86d1aa8SWill Deacon 	if (!ops)
2897e86d1aa8SWill Deacon 		return 0;
2898e86d1aa8SWill Deacon 
2899e86d1aa8SWill Deacon 	return ops->iova_to_phys(ops, iova);
2900e86d1aa8SWill Deacon }
2901e86d1aa8SWill Deacon 
2902e86d1aa8SWill Deacon static struct platform_driver arm_smmu_driver;
2903e86d1aa8SWill Deacon 
2904e86d1aa8SWill Deacon static
2905e86d1aa8SWill Deacon struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2906e86d1aa8SWill Deacon {
2907e86d1aa8SWill Deacon 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2908e86d1aa8SWill Deacon 							  fwnode);
2909e86d1aa8SWill Deacon 	put_device(dev);
2910e86d1aa8SWill Deacon 	return dev ? dev_get_drvdata(dev) : NULL;
2911e86d1aa8SWill Deacon }
2912e86d1aa8SWill Deacon 
2913e86d1aa8SWill Deacon static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2914e86d1aa8SWill Deacon {
2915e86d1aa8SWill Deacon 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2916e86d1aa8SWill Deacon 
2917e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2918e86d1aa8SWill Deacon 		limit *= 1UL << STRTAB_SPLIT;
2919e86d1aa8SWill Deacon 
2920e86d1aa8SWill Deacon 	return sid < limit;
2921e86d1aa8SWill Deacon }
2922e86d1aa8SWill Deacon 
2923e86d1aa8SWill Deacon static struct iommu_ops arm_smmu_ops;
2924e86d1aa8SWill Deacon 
2925e86d1aa8SWill Deacon static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2926e86d1aa8SWill Deacon {
2927e86d1aa8SWill Deacon 	int i, ret;
2928e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu;
2929e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
2930e86d1aa8SWill Deacon 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2931e86d1aa8SWill Deacon 
2932e86d1aa8SWill Deacon 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2933e86d1aa8SWill Deacon 		return ERR_PTR(-ENODEV);
2934e86d1aa8SWill Deacon 
2935e86d1aa8SWill Deacon 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2936e86d1aa8SWill Deacon 		return ERR_PTR(-EBUSY);
2937e86d1aa8SWill Deacon 
2938e86d1aa8SWill Deacon 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2939e86d1aa8SWill Deacon 	if (!smmu)
2940e86d1aa8SWill Deacon 		return ERR_PTR(-ENODEV);
2941e86d1aa8SWill Deacon 
2942e86d1aa8SWill Deacon 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2943e86d1aa8SWill Deacon 	if (!master)
2944e86d1aa8SWill Deacon 		return ERR_PTR(-ENOMEM);
2945e86d1aa8SWill Deacon 
2946e86d1aa8SWill Deacon 	master->dev = dev;
2947e86d1aa8SWill Deacon 	master->smmu = smmu;
2948e86d1aa8SWill Deacon 	master->sids = fwspec->ids;
2949e86d1aa8SWill Deacon 	master->num_sids = fwspec->num_ids;
2950e86d1aa8SWill Deacon 	dev_iommu_priv_set(dev, master);
2951e86d1aa8SWill Deacon 
2952e86d1aa8SWill Deacon 	/* Check the SIDs are in range of the SMMU and our stream table */
2953e86d1aa8SWill Deacon 	for (i = 0; i < master->num_sids; i++) {
2954e86d1aa8SWill Deacon 		u32 sid = master->sids[i];
2955e86d1aa8SWill Deacon 
2956e86d1aa8SWill Deacon 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2957e86d1aa8SWill Deacon 			ret = -ERANGE;
2958e86d1aa8SWill Deacon 			goto err_free_master;
2959e86d1aa8SWill Deacon 		}
2960e86d1aa8SWill Deacon 
2961e86d1aa8SWill Deacon 		/* Ensure l2 strtab is initialised */
2962e86d1aa8SWill Deacon 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2963e86d1aa8SWill Deacon 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2964e86d1aa8SWill Deacon 			if (ret)
2965e86d1aa8SWill Deacon 				goto err_free_master;
2966e86d1aa8SWill Deacon 		}
2967e86d1aa8SWill Deacon 	}
2968e86d1aa8SWill Deacon 
2969e86d1aa8SWill Deacon 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2970e86d1aa8SWill Deacon 
2971e86d1aa8SWill Deacon 	/*
2972e86d1aa8SWill Deacon 	 * Note that PASID must be enabled before, and disabled after ATS:
2973e86d1aa8SWill Deacon 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2974e86d1aa8SWill Deacon 	 *
2975e86d1aa8SWill Deacon 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2976e86d1aa8SWill Deacon 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2977e86d1aa8SWill Deacon 	 *   are changed.
2978e86d1aa8SWill Deacon 	 */
2979e86d1aa8SWill Deacon 	arm_smmu_enable_pasid(master);
2980e86d1aa8SWill Deacon 
2981e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2982e86d1aa8SWill Deacon 		master->ssid_bits = min_t(u8, master->ssid_bits,
2983e86d1aa8SWill Deacon 					  CTXDESC_LINEAR_CDMAX);
2984e86d1aa8SWill Deacon 
2985e86d1aa8SWill Deacon 	return &smmu->iommu;
2986e86d1aa8SWill Deacon 
2987e86d1aa8SWill Deacon err_free_master:
2988e86d1aa8SWill Deacon 	kfree(master);
2989e86d1aa8SWill Deacon 	dev_iommu_priv_set(dev, NULL);
2990e86d1aa8SWill Deacon 	return ERR_PTR(ret);
2991e86d1aa8SWill Deacon }
2992e86d1aa8SWill Deacon 
2993e86d1aa8SWill Deacon static void arm_smmu_release_device(struct device *dev)
2994e86d1aa8SWill Deacon {
2995e86d1aa8SWill Deacon 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2996e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
2997e86d1aa8SWill Deacon 
2998e86d1aa8SWill Deacon 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2999e86d1aa8SWill Deacon 		return;
3000e86d1aa8SWill Deacon 
3001e86d1aa8SWill Deacon 	master = dev_iommu_priv_get(dev);
3002e86d1aa8SWill Deacon 	arm_smmu_detach_dev(master);
3003e86d1aa8SWill Deacon 	arm_smmu_disable_pasid(master);
3004e86d1aa8SWill Deacon 	kfree(master);
3005e86d1aa8SWill Deacon 	iommu_fwspec_free(dev);
3006e86d1aa8SWill Deacon }
3007e86d1aa8SWill Deacon 
3008e86d1aa8SWill Deacon static struct iommu_group *arm_smmu_device_group(struct device *dev)
3009e86d1aa8SWill Deacon {
3010e86d1aa8SWill Deacon 	struct iommu_group *group;
3011e86d1aa8SWill Deacon 
3012e86d1aa8SWill Deacon 	/*
3013e86d1aa8SWill Deacon 	 * We don't support devices sharing stream IDs other than PCI RID
3014e86d1aa8SWill Deacon 	 * aliases, since the necessary ID-to-device lookup becomes rather
3015e86d1aa8SWill Deacon 	 * impractical given a potential sparse 32-bit stream ID space.
3016e86d1aa8SWill Deacon 	 */
3017e86d1aa8SWill Deacon 	if (dev_is_pci(dev))
3018e86d1aa8SWill Deacon 		group = pci_device_group(dev);
3019e86d1aa8SWill Deacon 	else
3020e86d1aa8SWill Deacon 		group = generic_device_group(dev);
3021e86d1aa8SWill Deacon 
3022e86d1aa8SWill Deacon 	return group;
3023e86d1aa8SWill Deacon }
3024e86d1aa8SWill Deacon 
3025e86d1aa8SWill Deacon static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
3026e86d1aa8SWill Deacon 				    enum iommu_attr attr, void *data)
3027e86d1aa8SWill Deacon {
3028e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3029e86d1aa8SWill Deacon 
3030e86d1aa8SWill Deacon 	switch (domain->type) {
3031e86d1aa8SWill Deacon 	case IOMMU_DOMAIN_UNMANAGED:
3032e86d1aa8SWill Deacon 		switch (attr) {
3033e86d1aa8SWill Deacon 		case DOMAIN_ATTR_NESTING:
3034e86d1aa8SWill Deacon 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
3035e86d1aa8SWill Deacon 			return 0;
3036e86d1aa8SWill Deacon 		default:
3037e86d1aa8SWill Deacon 			return -ENODEV;
3038e86d1aa8SWill Deacon 		}
3039e86d1aa8SWill Deacon 		break;
3040e86d1aa8SWill Deacon 	case IOMMU_DOMAIN_DMA:
3041e86d1aa8SWill Deacon 		switch (attr) {
3042e86d1aa8SWill Deacon 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
3043e86d1aa8SWill Deacon 			*(int *)data = smmu_domain->non_strict;
3044e86d1aa8SWill Deacon 			return 0;
3045e86d1aa8SWill Deacon 		default:
3046e86d1aa8SWill Deacon 			return -ENODEV;
3047e86d1aa8SWill Deacon 		}
3048e86d1aa8SWill Deacon 		break;
3049e86d1aa8SWill Deacon 	default:
3050e86d1aa8SWill Deacon 		return -EINVAL;
3051e86d1aa8SWill Deacon 	}
3052e86d1aa8SWill Deacon }
3053e86d1aa8SWill Deacon 
3054e86d1aa8SWill Deacon static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
3055e86d1aa8SWill Deacon 				    enum iommu_attr attr, void *data)
3056e86d1aa8SWill Deacon {
3057e86d1aa8SWill Deacon 	int ret = 0;
3058e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3059e86d1aa8SWill Deacon 
3060e86d1aa8SWill Deacon 	mutex_lock(&smmu_domain->init_mutex);
3061e86d1aa8SWill Deacon 
3062e86d1aa8SWill Deacon 	switch (domain->type) {
3063e86d1aa8SWill Deacon 	case IOMMU_DOMAIN_UNMANAGED:
3064e86d1aa8SWill Deacon 		switch (attr) {
3065e86d1aa8SWill Deacon 		case DOMAIN_ATTR_NESTING:
3066e86d1aa8SWill Deacon 			if (smmu_domain->smmu) {
3067e86d1aa8SWill Deacon 				ret = -EPERM;
3068e86d1aa8SWill Deacon 				goto out_unlock;
3069e86d1aa8SWill Deacon 			}
3070e86d1aa8SWill Deacon 
3071e86d1aa8SWill Deacon 			if (*(int *)data)
3072e86d1aa8SWill Deacon 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
3073e86d1aa8SWill Deacon 			else
3074e86d1aa8SWill Deacon 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
3075e86d1aa8SWill Deacon 			break;
3076e86d1aa8SWill Deacon 		default:
3077e86d1aa8SWill Deacon 			ret = -ENODEV;
3078e86d1aa8SWill Deacon 		}
3079e86d1aa8SWill Deacon 		break;
3080e86d1aa8SWill Deacon 	case IOMMU_DOMAIN_DMA:
3081e86d1aa8SWill Deacon 		switch(attr) {
3082e86d1aa8SWill Deacon 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
3083e86d1aa8SWill Deacon 			smmu_domain->non_strict = *(int *)data;
3084e86d1aa8SWill Deacon 			break;
3085e86d1aa8SWill Deacon 		default:
3086e86d1aa8SWill Deacon 			ret = -ENODEV;
3087e86d1aa8SWill Deacon 		}
3088e86d1aa8SWill Deacon 		break;
3089e86d1aa8SWill Deacon 	default:
3090e86d1aa8SWill Deacon 		ret = -EINVAL;
3091e86d1aa8SWill Deacon 	}
3092e86d1aa8SWill Deacon 
3093e86d1aa8SWill Deacon out_unlock:
3094e86d1aa8SWill Deacon 	mutex_unlock(&smmu_domain->init_mutex);
3095e86d1aa8SWill Deacon 	return ret;
3096e86d1aa8SWill Deacon }
3097e86d1aa8SWill Deacon 
3098e86d1aa8SWill Deacon static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
3099e86d1aa8SWill Deacon {
3100e86d1aa8SWill Deacon 	return iommu_fwspec_add_ids(dev, args->args, 1);
3101e86d1aa8SWill Deacon }
3102e86d1aa8SWill Deacon 
3103e86d1aa8SWill Deacon static void arm_smmu_get_resv_regions(struct device *dev,
3104e86d1aa8SWill Deacon 				      struct list_head *head)
3105e86d1aa8SWill Deacon {
3106e86d1aa8SWill Deacon 	struct iommu_resv_region *region;
3107e86d1aa8SWill Deacon 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
3108e86d1aa8SWill Deacon 
3109e86d1aa8SWill Deacon 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
3110e86d1aa8SWill Deacon 					 prot, IOMMU_RESV_SW_MSI);
3111e86d1aa8SWill Deacon 	if (!region)
3112e86d1aa8SWill Deacon 		return;
3113e86d1aa8SWill Deacon 
3114e86d1aa8SWill Deacon 	list_add_tail(&region->list, head);
3115e86d1aa8SWill Deacon 
3116e86d1aa8SWill Deacon 	iommu_dma_get_resv_regions(dev, head);
3117e86d1aa8SWill Deacon }
3118e86d1aa8SWill Deacon 
3119e86d1aa8SWill Deacon static struct iommu_ops arm_smmu_ops = {
3120e86d1aa8SWill Deacon 	.capable		= arm_smmu_capable,
3121e86d1aa8SWill Deacon 	.domain_alloc		= arm_smmu_domain_alloc,
3122e86d1aa8SWill Deacon 	.domain_free		= arm_smmu_domain_free,
3123e86d1aa8SWill Deacon 	.attach_dev		= arm_smmu_attach_dev,
3124e86d1aa8SWill Deacon 	.map			= arm_smmu_map,
3125e86d1aa8SWill Deacon 	.unmap			= arm_smmu_unmap,
3126e86d1aa8SWill Deacon 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
3127e86d1aa8SWill Deacon 	.iotlb_sync		= arm_smmu_iotlb_sync,
3128e86d1aa8SWill Deacon 	.iova_to_phys		= arm_smmu_iova_to_phys,
3129e86d1aa8SWill Deacon 	.probe_device		= arm_smmu_probe_device,
3130e86d1aa8SWill Deacon 	.release_device		= arm_smmu_release_device,
3131e86d1aa8SWill Deacon 	.device_group		= arm_smmu_device_group,
3132e86d1aa8SWill Deacon 	.domain_get_attr	= arm_smmu_domain_get_attr,
3133e86d1aa8SWill Deacon 	.domain_set_attr	= arm_smmu_domain_set_attr,
3134e86d1aa8SWill Deacon 	.of_xlate		= arm_smmu_of_xlate,
3135e86d1aa8SWill Deacon 	.get_resv_regions	= arm_smmu_get_resv_regions,
3136e86d1aa8SWill Deacon 	.put_resv_regions	= generic_iommu_put_resv_regions,
3137e86d1aa8SWill Deacon 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
3138e86d1aa8SWill Deacon };
3139e86d1aa8SWill Deacon 
3140e86d1aa8SWill Deacon /* Probing and initialisation functions */
3141e86d1aa8SWill Deacon static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3142e86d1aa8SWill Deacon 				   struct arm_smmu_queue *q,
3143e86d1aa8SWill Deacon 				   unsigned long prod_off,
3144e86d1aa8SWill Deacon 				   unsigned long cons_off,
3145e86d1aa8SWill Deacon 				   size_t dwords, const char *name)
3146e86d1aa8SWill Deacon {
3147e86d1aa8SWill Deacon 	size_t qsz;
3148e86d1aa8SWill Deacon 
3149e86d1aa8SWill Deacon 	do {
3150e86d1aa8SWill Deacon 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3151e86d1aa8SWill Deacon 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3152e86d1aa8SWill Deacon 					      GFP_KERNEL);
3153e86d1aa8SWill Deacon 		if (q->base || qsz < PAGE_SIZE)
3154e86d1aa8SWill Deacon 			break;
3155e86d1aa8SWill Deacon 
3156e86d1aa8SWill Deacon 		q->llq.max_n_shift--;
3157e86d1aa8SWill Deacon 	} while (1);
3158e86d1aa8SWill Deacon 
3159e86d1aa8SWill Deacon 	if (!q->base) {
3160e86d1aa8SWill Deacon 		dev_err(smmu->dev,
3161e86d1aa8SWill Deacon 			"failed to allocate queue (0x%zx bytes) for %s\n",
3162e86d1aa8SWill Deacon 			qsz, name);
3163e86d1aa8SWill Deacon 		return -ENOMEM;
3164e86d1aa8SWill Deacon 	}
3165e86d1aa8SWill Deacon 
3166e86d1aa8SWill Deacon 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
3167e86d1aa8SWill Deacon 		dev_info(smmu->dev, "allocated %u entries for %s\n",
3168e86d1aa8SWill Deacon 			 1 << q->llq.max_n_shift, name);
3169e86d1aa8SWill Deacon 	}
3170e86d1aa8SWill Deacon 
3171e86d1aa8SWill Deacon 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
3172e86d1aa8SWill Deacon 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
3173e86d1aa8SWill Deacon 	q->ent_dwords	= dwords;
3174e86d1aa8SWill Deacon 
3175e86d1aa8SWill Deacon 	q->q_base  = Q_BASE_RWA;
3176e86d1aa8SWill Deacon 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3177e86d1aa8SWill Deacon 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3178e86d1aa8SWill Deacon 
3179e86d1aa8SWill Deacon 	q->llq.prod = q->llq.cons = 0;
3180e86d1aa8SWill Deacon 	return 0;
3181e86d1aa8SWill Deacon }
3182e86d1aa8SWill Deacon 
3183e86d1aa8SWill Deacon static void arm_smmu_cmdq_free_bitmap(void *data)
3184e86d1aa8SWill Deacon {
3185e86d1aa8SWill Deacon 	unsigned long *bitmap = data;
3186e86d1aa8SWill Deacon 	bitmap_free(bitmap);
3187e86d1aa8SWill Deacon }
3188e86d1aa8SWill Deacon 
3189e86d1aa8SWill Deacon static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
3190e86d1aa8SWill Deacon {
3191e86d1aa8SWill Deacon 	int ret = 0;
3192e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
3193e86d1aa8SWill Deacon 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3194e86d1aa8SWill Deacon 	atomic_long_t *bitmap;
3195e86d1aa8SWill Deacon 
3196e86d1aa8SWill Deacon 	atomic_set(&cmdq->owner_prod, 0);
3197e86d1aa8SWill Deacon 	atomic_set(&cmdq->lock, 0);
3198e86d1aa8SWill Deacon 
3199e86d1aa8SWill Deacon 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
3200e86d1aa8SWill Deacon 	if (!bitmap) {
3201e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
3202e86d1aa8SWill Deacon 		ret = -ENOMEM;
3203e86d1aa8SWill Deacon 	} else {
3204e86d1aa8SWill Deacon 		cmdq->valid_map = bitmap;
3205e86d1aa8SWill Deacon 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
3206e86d1aa8SWill Deacon 	}
3207e86d1aa8SWill Deacon 
3208e86d1aa8SWill Deacon 	return ret;
3209e86d1aa8SWill Deacon }
3210e86d1aa8SWill Deacon 
3211e86d1aa8SWill Deacon static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3212e86d1aa8SWill Deacon {
3213e86d1aa8SWill Deacon 	int ret;
3214e86d1aa8SWill Deacon 
3215e86d1aa8SWill Deacon 	/* cmdq */
3216e86d1aa8SWill Deacon 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
3217e86d1aa8SWill Deacon 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
3218e86d1aa8SWill Deacon 				      "cmdq");
3219e86d1aa8SWill Deacon 	if (ret)
3220e86d1aa8SWill Deacon 		return ret;
3221e86d1aa8SWill Deacon 
3222e86d1aa8SWill Deacon 	ret = arm_smmu_cmdq_init(smmu);
3223e86d1aa8SWill Deacon 	if (ret)
3224e86d1aa8SWill Deacon 		return ret;
3225e86d1aa8SWill Deacon 
3226e86d1aa8SWill Deacon 	/* evtq */
3227e86d1aa8SWill Deacon 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
3228e86d1aa8SWill Deacon 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
3229e86d1aa8SWill Deacon 				      "evtq");
3230e86d1aa8SWill Deacon 	if (ret)
3231e86d1aa8SWill Deacon 		return ret;
3232e86d1aa8SWill Deacon 
3233e86d1aa8SWill Deacon 	/* priq */
3234e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3235e86d1aa8SWill Deacon 		return 0;
3236e86d1aa8SWill Deacon 
3237e86d1aa8SWill Deacon 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
3238e86d1aa8SWill Deacon 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
3239e86d1aa8SWill Deacon 				       "priq");
3240e86d1aa8SWill Deacon }
3241e86d1aa8SWill Deacon 
3242e86d1aa8SWill Deacon static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
3243e86d1aa8SWill Deacon {
3244e86d1aa8SWill Deacon 	unsigned int i;
3245e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3246e86d1aa8SWill Deacon 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
3247e86d1aa8SWill Deacon 	void *strtab = smmu->strtab_cfg.strtab;
3248e86d1aa8SWill Deacon 
3249e86d1aa8SWill Deacon 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
3250e86d1aa8SWill Deacon 	if (!cfg->l1_desc) {
3251e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
3252e86d1aa8SWill Deacon 		return -ENOMEM;
3253e86d1aa8SWill Deacon 	}
3254e86d1aa8SWill Deacon 
3255e86d1aa8SWill Deacon 	for (i = 0; i < cfg->num_l1_ents; ++i) {
3256e86d1aa8SWill Deacon 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3257e86d1aa8SWill Deacon 		strtab += STRTAB_L1_DESC_DWORDS << 3;
3258e86d1aa8SWill Deacon 	}
3259e86d1aa8SWill Deacon 
3260e86d1aa8SWill Deacon 	return 0;
3261e86d1aa8SWill Deacon }
3262e86d1aa8SWill Deacon 
3263e86d1aa8SWill Deacon static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3264e86d1aa8SWill Deacon {
3265e86d1aa8SWill Deacon 	void *strtab;
3266e86d1aa8SWill Deacon 	u64 reg;
3267e86d1aa8SWill Deacon 	u32 size, l1size;
3268e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3269e86d1aa8SWill Deacon 
3270e86d1aa8SWill Deacon 	/* Calculate the L1 size, capped to the SIDSIZE. */
3271e86d1aa8SWill Deacon 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3272e86d1aa8SWill Deacon 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3273e86d1aa8SWill Deacon 	cfg->num_l1_ents = 1 << size;
3274e86d1aa8SWill Deacon 
3275e86d1aa8SWill Deacon 	size += STRTAB_SPLIT;
3276e86d1aa8SWill Deacon 	if (size < smmu->sid_bits)
3277e86d1aa8SWill Deacon 		dev_warn(smmu->dev,
3278e86d1aa8SWill Deacon 			 "2-level strtab only covers %u/%u bits of SID\n",
3279e86d1aa8SWill Deacon 			 size, smmu->sid_bits);
3280e86d1aa8SWill Deacon 
3281e86d1aa8SWill Deacon 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3282e86d1aa8SWill Deacon 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3283e86d1aa8SWill Deacon 				     GFP_KERNEL);
3284e86d1aa8SWill Deacon 	if (!strtab) {
3285e86d1aa8SWill Deacon 		dev_err(smmu->dev,
3286e86d1aa8SWill Deacon 			"failed to allocate l1 stream table (%u bytes)\n",
3287dc898eb8SZenghui Yu 			l1size);
3288e86d1aa8SWill Deacon 		return -ENOMEM;
3289e86d1aa8SWill Deacon 	}
3290e86d1aa8SWill Deacon 	cfg->strtab = strtab;
3291e86d1aa8SWill Deacon 
3292e86d1aa8SWill Deacon 	/* Configure strtab_base_cfg for 2 levels */
3293e86d1aa8SWill Deacon 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3294e86d1aa8SWill Deacon 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3295e86d1aa8SWill Deacon 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3296e86d1aa8SWill Deacon 	cfg->strtab_base_cfg = reg;
3297e86d1aa8SWill Deacon 
3298e86d1aa8SWill Deacon 	return arm_smmu_init_l1_strtab(smmu);
3299e86d1aa8SWill Deacon }
3300e86d1aa8SWill Deacon 
3301e86d1aa8SWill Deacon static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3302e86d1aa8SWill Deacon {
3303e86d1aa8SWill Deacon 	void *strtab;
3304e86d1aa8SWill Deacon 	u64 reg;
3305e86d1aa8SWill Deacon 	u32 size;
3306e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3307e86d1aa8SWill Deacon 
3308e86d1aa8SWill Deacon 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3309e86d1aa8SWill Deacon 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3310e86d1aa8SWill Deacon 				     GFP_KERNEL);
3311e86d1aa8SWill Deacon 	if (!strtab) {
3312e86d1aa8SWill Deacon 		dev_err(smmu->dev,
3313e86d1aa8SWill Deacon 			"failed to allocate linear stream table (%u bytes)\n",
3314e86d1aa8SWill Deacon 			size);
3315e86d1aa8SWill Deacon 		return -ENOMEM;
3316e86d1aa8SWill Deacon 	}
3317e86d1aa8SWill Deacon 	cfg->strtab = strtab;
3318e86d1aa8SWill Deacon 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3319e86d1aa8SWill Deacon 
3320e86d1aa8SWill Deacon 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3321e86d1aa8SWill Deacon 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3322e86d1aa8SWill Deacon 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3323e86d1aa8SWill Deacon 	cfg->strtab_base_cfg = reg;
3324e86d1aa8SWill Deacon 
3325e86d1aa8SWill Deacon 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3326e86d1aa8SWill Deacon 	return 0;
3327e86d1aa8SWill Deacon }
3328e86d1aa8SWill Deacon 
3329e86d1aa8SWill Deacon static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3330e86d1aa8SWill Deacon {
3331e86d1aa8SWill Deacon 	u64 reg;
3332e86d1aa8SWill Deacon 	int ret;
3333e86d1aa8SWill Deacon 
3334e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3335e86d1aa8SWill Deacon 		ret = arm_smmu_init_strtab_2lvl(smmu);
3336e86d1aa8SWill Deacon 	else
3337e86d1aa8SWill Deacon 		ret = arm_smmu_init_strtab_linear(smmu);
3338e86d1aa8SWill Deacon 
3339e86d1aa8SWill Deacon 	if (ret)
3340e86d1aa8SWill Deacon 		return ret;
3341e86d1aa8SWill Deacon 
3342e86d1aa8SWill Deacon 	/* Set the strtab base address */
3343e86d1aa8SWill Deacon 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3344e86d1aa8SWill Deacon 	reg |= STRTAB_BASE_RA;
3345e86d1aa8SWill Deacon 	smmu->strtab_cfg.strtab_base = reg;
3346e86d1aa8SWill Deacon 
3347e86d1aa8SWill Deacon 	/* Allocate the first VMID for stage-2 bypass STEs */
3348e86d1aa8SWill Deacon 	set_bit(0, smmu->vmid_map);
3349e86d1aa8SWill Deacon 	return 0;
3350e86d1aa8SWill Deacon }
3351e86d1aa8SWill Deacon 
3352e86d1aa8SWill Deacon static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3353e86d1aa8SWill Deacon {
3354e86d1aa8SWill Deacon 	int ret;
3355e86d1aa8SWill Deacon 
3356e86d1aa8SWill Deacon 	ret = arm_smmu_init_queues(smmu);
3357e86d1aa8SWill Deacon 	if (ret)
3358e86d1aa8SWill Deacon 		return ret;
3359e86d1aa8SWill Deacon 
3360e86d1aa8SWill Deacon 	return arm_smmu_init_strtab(smmu);
3361e86d1aa8SWill Deacon }
3362e86d1aa8SWill Deacon 
3363e86d1aa8SWill Deacon static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3364e86d1aa8SWill Deacon 				   unsigned int reg_off, unsigned int ack_off)
3365e86d1aa8SWill Deacon {
3366e86d1aa8SWill Deacon 	u32 reg;
3367e86d1aa8SWill Deacon 
3368e86d1aa8SWill Deacon 	writel_relaxed(val, smmu->base + reg_off);
3369e86d1aa8SWill Deacon 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3370e86d1aa8SWill Deacon 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3371e86d1aa8SWill Deacon }
3372e86d1aa8SWill Deacon 
3373e86d1aa8SWill Deacon /* GBPA is "special" */
3374e86d1aa8SWill Deacon static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3375e86d1aa8SWill Deacon {
3376e86d1aa8SWill Deacon 	int ret;
3377e86d1aa8SWill Deacon 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3378e86d1aa8SWill Deacon 
3379e86d1aa8SWill Deacon 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3380e86d1aa8SWill Deacon 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3381e86d1aa8SWill Deacon 	if (ret)
3382e86d1aa8SWill Deacon 		return ret;
3383e86d1aa8SWill Deacon 
3384e86d1aa8SWill Deacon 	reg &= ~clr;
3385e86d1aa8SWill Deacon 	reg |= set;
3386e86d1aa8SWill Deacon 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3387e86d1aa8SWill Deacon 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3388e86d1aa8SWill Deacon 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3389e86d1aa8SWill Deacon 
3390e86d1aa8SWill Deacon 	if (ret)
3391e86d1aa8SWill Deacon 		dev_err(smmu->dev, "GBPA not responding to update\n");
3392e86d1aa8SWill Deacon 	return ret;
3393e86d1aa8SWill Deacon }
3394e86d1aa8SWill Deacon 
3395e86d1aa8SWill Deacon static void arm_smmu_free_msis(void *data)
3396e86d1aa8SWill Deacon {
3397e86d1aa8SWill Deacon 	struct device *dev = data;
3398e86d1aa8SWill Deacon 	platform_msi_domain_free_irqs(dev);
3399e86d1aa8SWill Deacon }
3400e86d1aa8SWill Deacon 
3401e86d1aa8SWill Deacon static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3402e86d1aa8SWill Deacon {
3403e86d1aa8SWill Deacon 	phys_addr_t doorbell;
3404e86d1aa8SWill Deacon 	struct device *dev = msi_desc_to_dev(desc);
3405e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3406e86d1aa8SWill Deacon 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3407e86d1aa8SWill Deacon 
3408e86d1aa8SWill Deacon 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3409e86d1aa8SWill Deacon 	doorbell &= MSI_CFG0_ADDR_MASK;
3410e86d1aa8SWill Deacon 
3411e86d1aa8SWill Deacon 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3412e86d1aa8SWill Deacon 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3413e86d1aa8SWill Deacon 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3414e86d1aa8SWill Deacon }
3415e86d1aa8SWill Deacon 
3416e86d1aa8SWill Deacon static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3417e86d1aa8SWill Deacon {
3418e86d1aa8SWill Deacon 	struct msi_desc *desc;
3419e86d1aa8SWill Deacon 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3420e86d1aa8SWill Deacon 	struct device *dev = smmu->dev;
3421e86d1aa8SWill Deacon 
3422e86d1aa8SWill Deacon 	/* Clear the MSI address regs */
3423e86d1aa8SWill Deacon 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3424e86d1aa8SWill Deacon 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3425e86d1aa8SWill Deacon 
3426e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3427e86d1aa8SWill Deacon 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3428e86d1aa8SWill Deacon 	else
3429e86d1aa8SWill Deacon 		nvec--;
3430e86d1aa8SWill Deacon 
3431e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3432e86d1aa8SWill Deacon 		return;
3433e86d1aa8SWill Deacon 
3434e86d1aa8SWill Deacon 	if (!dev->msi_domain) {
3435e86d1aa8SWill Deacon 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3436e86d1aa8SWill Deacon 		return;
3437e86d1aa8SWill Deacon 	}
3438e86d1aa8SWill Deacon 
3439e86d1aa8SWill Deacon 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3440e86d1aa8SWill Deacon 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3441e86d1aa8SWill Deacon 	if (ret) {
3442e86d1aa8SWill Deacon 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3443e86d1aa8SWill Deacon 		return;
3444e86d1aa8SWill Deacon 	}
3445e86d1aa8SWill Deacon 
3446e86d1aa8SWill Deacon 	for_each_msi_entry(desc, dev) {
3447e86d1aa8SWill Deacon 		switch (desc->platform.msi_index) {
3448e86d1aa8SWill Deacon 		case EVTQ_MSI_INDEX:
3449e86d1aa8SWill Deacon 			smmu->evtq.q.irq = desc->irq;
3450e86d1aa8SWill Deacon 			break;
3451e86d1aa8SWill Deacon 		case GERROR_MSI_INDEX:
3452e86d1aa8SWill Deacon 			smmu->gerr_irq = desc->irq;
3453e86d1aa8SWill Deacon 			break;
3454e86d1aa8SWill Deacon 		case PRIQ_MSI_INDEX:
3455e86d1aa8SWill Deacon 			smmu->priq.q.irq = desc->irq;
3456e86d1aa8SWill Deacon 			break;
3457e86d1aa8SWill Deacon 		default:	/* Unknown */
3458e86d1aa8SWill Deacon 			continue;
3459e86d1aa8SWill Deacon 		}
3460e86d1aa8SWill Deacon 	}
3461e86d1aa8SWill Deacon 
3462e86d1aa8SWill Deacon 	/* Add callback to free MSIs on teardown */
3463e86d1aa8SWill Deacon 	devm_add_action(dev, arm_smmu_free_msis, dev);
3464e86d1aa8SWill Deacon }
3465e86d1aa8SWill Deacon 
3466e86d1aa8SWill Deacon static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3467e86d1aa8SWill Deacon {
3468e86d1aa8SWill Deacon 	int irq, ret;
3469e86d1aa8SWill Deacon 
3470e86d1aa8SWill Deacon 	arm_smmu_setup_msis(smmu);
3471e86d1aa8SWill Deacon 
3472e86d1aa8SWill Deacon 	/* Request interrupt lines */
3473e86d1aa8SWill Deacon 	irq = smmu->evtq.q.irq;
3474e86d1aa8SWill Deacon 	if (irq) {
3475e86d1aa8SWill Deacon 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3476e86d1aa8SWill Deacon 						arm_smmu_evtq_thread,
3477e86d1aa8SWill Deacon 						IRQF_ONESHOT,
3478e86d1aa8SWill Deacon 						"arm-smmu-v3-evtq", smmu);
3479e86d1aa8SWill Deacon 		if (ret < 0)
3480e86d1aa8SWill Deacon 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3481e86d1aa8SWill Deacon 	} else {
3482e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3483e86d1aa8SWill Deacon 	}
3484e86d1aa8SWill Deacon 
3485e86d1aa8SWill Deacon 	irq = smmu->gerr_irq;
3486e86d1aa8SWill Deacon 	if (irq) {
3487e86d1aa8SWill Deacon 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3488e86d1aa8SWill Deacon 				       0, "arm-smmu-v3-gerror", smmu);
3489e86d1aa8SWill Deacon 		if (ret < 0)
3490e86d1aa8SWill Deacon 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3491e86d1aa8SWill Deacon 	} else {
3492e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3493e86d1aa8SWill Deacon 	}
3494e86d1aa8SWill Deacon 
3495e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3496e86d1aa8SWill Deacon 		irq = smmu->priq.q.irq;
3497e86d1aa8SWill Deacon 		if (irq) {
3498e86d1aa8SWill Deacon 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3499e86d1aa8SWill Deacon 							arm_smmu_priq_thread,
3500e86d1aa8SWill Deacon 							IRQF_ONESHOT,
3501e86d1aa8SWill Deacon 							"arm-smmu-v3-priq",
3502e86d1aa8SWill Deacon 							smmu);
3503e86d1aa8SWill Deacon 			if (ret < 0)
3504e86d1aa8SWill Deacon 				dev_warn(smmu->dev,
3505e86d1aa8SWill Deacon 					 "failed to enable priq irq\n");
3506e86d1aa8SWill Deacon 		} else {
3507e86d1aa8SWill Deacon 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3508e86d1aa8SWill Deacon 		}
3509e86d1aa8SWill Deacon 	}
3510e86d1aa8SWill Deacon }
3511e86d1aa8SWill Deacon 
3512e86d1aa8SWill Deacon static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3513e86d1aa8SWill Deacon {
3514e86d1aa8SWill Deacon 	int ret, irq;
3515e86d1aa8SWill Deacon 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3516e86d1aa8SWill Deacon 
3517e86d1aa8SWill Deacon 	/* Disable IRQs first */
3518e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3519e86d1aa8SWill Deacon 				      ARM_SMMU_IRQ_CTRLACK);
3520e86d1aa8SWill Deacon 	if (ret) {
3521e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to disable irqs\n");
3522e86d1aa8SWill Deacon 		return ret;
3523e86d1aa8SWill Deacon 	}
3524e86d1aa8SWill Deacon 
3525e86d1aa8SWill Deacon 	irq = smmu->combined_irq;
3526e86d1aa8SWill Deacon 	if (irq) {
3527e86d1aa8SWill Deacon 		/*
3528e86d1aa8SWill Deacon 		 * Cavium ThunderX2 implementation doesn't support unique irq
3529e86d1aa8SWill Deacon 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3530e86d1aa8SWill Deacon 		 */
3531e86d1aa8SWill Deacon 		ret = devm_request_threaded_irq(smmu->dev, irq,
3532e86d1aa8SWill Deacon 					arm_smmu_combined_irq_handler,
3533e86d1aa8SWill Deacon 					arm_smmu_combined_irq_thread,
3534e86d1aa8SWill Deacon 					IRQF_ONESHOT,
3535e86d1aa8SWill Deacon 					"arm-smmu-v3-combined-irq", smmu);
3536e86d1aa8SWill Deacon 		if (ret < 0)
3537e86d1aa8SWill Deacon 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3538e86d1aa8SWill Deacon 	} else
3539e86d1aa8SWill Deacon 		arm_smmu_setup_unique_irqs(smmu);
3540e86d1aa8SWill Deacon 
3541e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3542e86d1aa8SWill Deacon 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3543e86d1aa8SWill Deacon 
3544e86d1aa8SWill Deacon 	/* Enable interrupt generation on the SMMU */
3545e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3546e86d1aa8SWill Deacon 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3547e86d1aa8SWill Deacon 	if (ret)
3548e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "failed to enable irqs\n");
3549e86d1aa8SWill Deacon 
3550e86d1aa8SWill Deacon 	return 0;
3551e86d1aa8SWill Deacon }
3552e86d1aa8SWill Deacon 
3553e86d1aa8SWill Deacon static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3554e86d1aa8SWill Deacon {
3555e86d1aa8SWill Deacon 	int ret;
3556e86d1aa8SWill Deacon 
3557e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3558e86d1aa8SWill Deacon 	if (ret)
3559e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to clear cr0\n");
3560e86d1aa8SWill Deacon 
3561e86d1aa8SWill Deacon 	return ret;
3562e86d1aa8SWill Deacon }
3563e86d1aa8SWill Deacon 
3564e86d1aa8SWill Deacon static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3565e86d1aa8SWill Deacon {
3566e86d1aa8SWill Deacon 	int ret;
3567e86d1aa8SWill Deacon 	u32 reg, enables;
3568e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd;
3569e86d1aa8SWill Deacon 
3570e86d1aa8SWill Deacon 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3571e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3572e86d1aa8SWill Deacon 	if (reg & CR0_SMMUEN) {
3573e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3574e86d1aa8SWill Deacon 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3575e86d1aa8SWill Deacon 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3576e86d1aa8SWill Deacon 	}
3577e86d1aa8SWill Deacon 
3578e86d1aa8SWill Deacon 	ret = arm_smmu_device_disable(smmu);
3579e86d1aa8SWill Deacon 	if (ret)
3580e86d1aa8SWill Deacon 		return ret;
3581e86d1aa8SWill Deacon 
3582e86d1aa8SWill Deacon 	/* CR1 (table and queue memory attributes) */
3583e86d1aa8SWill Deacon 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3584e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3585e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3586e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3587e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3588e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3589e86d1aa8SWill Deacon 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3590e86d1aa8SWill Deacon 
3591e86d1aa8SWill Deacon 	/* CR2 (random crap) */
3592e86d1aa8SWill Deacon 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3593e86d1aa8SWill Deacon 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3594e86d1aa8SWill Deacon 
3595e86d1aa8SWill Deacon 	/* Stream table */
3596e86d1aa8SWill Deacon 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3597e86d1aa8SWill Deacon 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3598e86d1aa8SWill Deacon 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3599e86d1aa8SWill Deacon 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3600e86d1aa8SWill Deacon 
3601e86d1aa8SWill Deacon 	/* Command queue */
3602e86d1aa8SWill Deacon 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3603e86d1aa8SWill Deacon 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3604e86d1aa8SWill Deacon 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3605e86d1aa8SWill Deacon 
3606e86d1aa8SWill Deacon 	enables = CR0_CMDQEN;
3607e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3608e86d1aa8SWill Deacon 				      ARM_SMMU_CR0ACK);
3609e86d1aa8SWill Deacon 	if (ret) {
3610e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to enable command queue\n");
3611e86d1aa8SWill Deacon 		return ret;
3612e86d1aa8SWill Deacon 	}
3613e86d1aa8SWill Deacon 
3614e86d1aa8SWill Deacon 	/* Invalidate any cached configuration */
3615e86d1aa8SWill Deacon 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3616e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3617e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_sync(smmu);
3618e86d1aa8SWill Deacon 
3619e86d1aa8SWill Deacon 	/* Invalidate any stale TLB entries */
3620e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3621e86d1aa8SWill Deacon 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3622e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3623e86d1aa8SWill Deacon 	}
3624e86d1aa8SWill Deacon 
3625e86d1aa8SWill Deacon 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3626e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3627e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_sync(smmu);
3628e86d1aa8SWill Deacon 
3629e86d1aa8SWill Deacon 	/* Event queue */
3630e86d1aa8SWill Deacon 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3631e86d1aa8SWill Deacon 	writel_relaxed(smmu->evtq.q.llq.prod,
3632e86d1aa8SWill Deacon 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3633e86d1aa8SWill Deacon 	writel_relaxed(smmu->evtq.q.llq.cons,
3634e86d1aa8SWill Deacon 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3635e86d1aa8SWill Deacon 
3636e86d1aa8SWill Deacon 	enables |= CR0_EVTQEN;
3637e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3638e86d1aa8SWill Deacon 				      ARM_SMMU_CR0ACK);
3639e86d1aa8SWill Deacon 	if (ret) {
3640e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to enable event queue\n");
3641e86d1aa8SWill Deacon 		return ret;
3642e86d1aa8SWill Deacon 	}
3643e86d1aa8SWill Deacon 
3644e86d1aa8SWill Deacon 	/* PRI queue */
3645e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3646e86d1aa8SWill Deacon 		writeq_relaxed(smmu->priq.q.q_base,
3647e86d1aa8SWill Deacon 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3648e86d1aa8SWill Deacon 		writel_relaxed(smmu->priq.q.llq.prod,
3649e86d1aa8SWill Deacon 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3650e86d1aa8SWill Deacon 		writel_relaxed(smmu->priq.q.llq.cons,
3651e86d1aa8SWill Deacon 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3652e86d1aa8SWill Deacon 
3653e86d1aa8SWill Deacon 		enables |= CR0_PRIQEN;
3654e86d1aa8SWill Deacon 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3655e86d1aa8SWill Deacon 					      ARM_SMMU_CR0ACK);
3656e86d1aa8SWill Deacon 		if (ret) {
3657e86d1aa8SWill Deacon 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3658e86d1aa8SWill Deacon 			return ret;
3659e86d1aa8SWill Deacon 		}
3660e86d1aa8SWill Deacon 	}
3661e86d1aa8SWill Deacon 
3662e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3663e86d1aa8SWill Deacon 		enables |= CR0_ATSCHK;
3664e86d1aa8SWill Deacon 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3665e86d1aa8SWill Deacon 					      ARM_SMMU_CR0ACK);
3666e86d1aa8SWill Deacon 		if (ret) {
3667e86d1aa8SWill Deacon 			dev_err(smmu->dev, "failed to enable ATS check\n");
3668e86d1aa8SWill Deacon 			return ret;
3669e86d1aa8SWill Deacon 		}
3670e86d1aa8SWill Deacon 	}
3671e86d1aa8SWill Deacon 
3672e86d1aa8SWill Deacon 	ret = arm_smmu_setup_irqs(smmu);
3673e86d1aa8SWill Deacon 	if (ret) {
3674e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to setup irqs\n");
3675e86d1aa8SWill Deacon 		return ret;
3676e86d1aa8SWill Deacon 	}
3677e86d1aa8SWill Deacon 
3678e86d1aa8SWill Deacon 	if (is_kdump_kernel())
3679e86d1aa8SWill Deacon 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3680e86d1aa8SWill Deacon 
3681e86d1aa8SWill Deacon 	/* Enable the SMMU interface, or ensure bypass */
3682e86d1aa8SWill Deacon 	if (!bypass || disable_bypass) {
3683e86d1aa8SWill Deacon 		enables |= CR0_SMMUEN;
3684e86d1aa8SWill Deacon 	} else {
3685e86d1aa8SWill Deacon 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3686e86d1aa8SWill Deacon 		if (ret)
3687e86d1aa8SWill Deacon 			return ret;
3688e86d1aa8SWill Deacon 	}
3689e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3690e86d1aa8SWill Deacon 				      ARM_SMMU_CR0ACK);
3691e86d1aa8SWill Deacon 	if (ret) {
3692e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3693e86d1aa8SWill Deacon 		return ret;
3694e86d1aa8SWill Deacon 	}
3695e86d1aa8SWill Deacon 
3696e86d1aa8SWill Deacon 	return 0;
3697e86d1aa8SWill Deacon }
3698e86d1aa8SWill Deacon 
3699e86d1aa8SWill Deacon static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3700e86d1aa8SWill Deacon {
3701e86d1aa8SWill Deacon 	u32 reg;
3702e86d1aa8SWill Deacon 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3703e86d1aa8SWill Deacon 
3704e86d1aa8SWill Deacon 	/* IDR0 */
3705e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3706e86d1aa8SWill Deacon 
3707e86d1aa8SWill Deacon 	/* 2-level structures */
3708e86d1aa8SWill Deacon 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3709e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3710e86d1aa8SWill Deacon 
3711e86d1aa8SWill Deacon 	if (reg & IDR0_CD2L)
3712e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3713e86d1aa8SWill Deacon 
3714e86d1aa8SWill Deacon 	/*
3715e86d1aa8SWill Deacon 	 * Translation table endianness.
3716e86d1aa8SWill Deacon 	 * We currently require the same endianness as the CPU, but this
3717e86d1aa8SWill Deacon 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3718e86d1aa8SWill Deacon 	 */
3719e86d1aa8SWill Deacon 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3720e86d1aa8SWill Deacon 	case IDR0_TTENDIAN_MIXED:
3721e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3722e86d1aa8SWill Deacon 		break;
3723e86d1aa8SWill Deacon #ifdef __BIG_ENDIAN
3724e86d1aa8SWill Deacon 	case IDR0_TTENDIAN_BE:
3725e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3726e86d1aa8SWill Deacon 		break;
3727e86d1aa8SWill Deacon #else
3728e86d1aa8SWill Deacon 	case IDR0_TTENDIAN_LE:
3729e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3730e86d1aa8SWill Deacon 		break;
3731e86d1aa8SWill Deacon #endif
3732e86d1aa8SWill Deacon 	default:
3733e86d1aa8SWill Deacon 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3734e86d1aa8SWill Deacon 		return -ENXIO;
3735e86d1aa8SWill Deacon 	}
3736e86d1aa8SWill Deacon 
3737e86d1aa8SWill Deacon 	/* Boolean feature flags */
3738e86d1aa8SWill Deacon 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3739e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_PRI;
3740e86d1aa8SWill Deacon 
3741e86d1aa8SWill Deacon 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3742e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_ATS;
3743e86d1aa8SWill Deacon 
3744e86d1aa8SWill Deacon 	if (reg & IDR0_SEV)
3745e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_SEV;
3746e86d1aa8SWill Deacon 
3747bd07a20aSBarry Song 	if (reg & IDR0_MSI) {
3748e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_MSI;
3749bd07a20aSBarry Song 		if (coherent && !disable_msipolling)
3750bd07a20aSBarry Song 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3751bd07a20aSBarry Song 	}
3752e86d1aa8SWill Deacon 
3753e86d1aa8SWill Deacon 	if (reg & IDR0_HYP)
3754e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_HYP;
3755e86d1aa8SWill Deacon 
3756e86d1aa8SWill Deacon 	/*
3757e86d1aa8SWill Deacon 	 * The coherency feature as set by FW is used in preference to the ID
3758e86d1aa8SWill Deacon 	 * register, but warn on mismatch.
3759e86d1aa8SWill Deacon 	 */
3760e86d1aa8SWill Deacon 	if (!!(reg & IDR0_COHACC) != coherent)
3761e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3762e86d1aa8SWill Deacon 			 coherent ? "true" : "false");
3763e86d1aa8SWill Deacon 
3764e86d1aa8SWill Deacon 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3765e86d1aa8SWill Deacon 	case IDR0_STALL_MODEL_FORCE:
3766e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3767df561f66SGustavo A. R. Silva 		fallthrough;
3768e86d1aa8SWill Deacon 	case IDR0_STALL_MODEL_STALL:
3769e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3770e86d1aa8SWill Deacon 	}
3771e86d1aa8SWill Deacon 
3772e86d1aa8SWill Deacon 	if (reg & IDR0_S1P)
3773e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3774e86d1aa8SWill Deacon 
3775e86d1aa8SWill Deacon 	if (reg & IDR0_S2P)
3776e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3777e86d1aa8SWill Deacon 
3778e86d1aa8SWill Deacon 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3779e86d1aa8SWill Deacon 		dev_err(smmu->dev, "no translation support!\n");
3780e86d1aa8SWill Deacon 		return -ENXIO;
3781e86d1aa8SWill Deacon 	}
3782e86d1aa8SWill Deacon 
3783e86d1aa8SWill Deacon 	/* We only support the AArch64 table format at present */
3784e86d1aa8SWill Deacon 	switch (FIELD_GET(IDR0_TTF, reg)) {
3785e86d1aa8SWill Deacon 	case IDR0_TTF_AARCH32_64:
3786e86d1aa8SWill Deacon 		smmu->ias = 40;
3787df561f66SGustavo A. R. Silva 		fallthrough;
3788e86d1aa8SWill Deacon 	case IDR0_TTF_AARCH64:
3789e86d1aa8SWill Deacon 		break;
3790e86d1aa8SWill Deacon 	default:
3791e86d1aa8SWill Deacon 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3792e86d1aa8SWill Deacon 		return -ENXIO;
3793e86d1aa8SWill Deacon 	}
3794e86d1aa8SWill Deacon 
3795e86d1aa8SWill Deacon 	/* ASID/VMID sizes */
3796e86d1aa8SWill Deacon 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3797e86d1aa8SWill Deacon 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3798e86d1aa8SWill Deacon 
3799e86d1aa8SWill Deacon 	/* IDR1 */
3800e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3801e86d1aa8SWill Deacon 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3802e86d1aa8SWill Deacon 		dev_err(smmu->dev, "embedded implementation not supported\n");
3803e86d1aa8SWill Deacon 		return -ENXIO;
3804e86d1aa8SWill Deacon 	}
3805e86d1aa8SWill Deacon 
3806e86d1aa8SWill Deacon 	/* Queue sizes, capped to ensure natural alignment */
3807e86d1aa8SWill Deacon 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3808e86d1aa8SWill Deacon 					     FIELD_GET(IDR1_CMDQS, reg));
3809e86d1aa8SWill Deacon 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3810e86d1aa8SWill Deacon 		/*
3811e86d1aa8SWill Deacon 		 * We don't support splitting up batches, so one batch of
3812e86d1aa8SWill Deacon 		 * commands plus an extra sync needs to fit inside the command
3813e86d1aa8SWill Deacon 		 * queue. There's also no way we can handle the weird alignment
3814e86d1aa8SWill Deacon 		 * restrictions on the base pointer for a unit-length queue.
3815e86d1aa8SWill Deacon 		 */
3816e86d1aa8SWill Deacon 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3817e86d1aa8SWill Deacon 			CMDQ_BATCH_ENTRIES);
3818e86d1aa8SWill Deacon 		return -ENXIO;
3819e86d1aa8SWill Deacon 	}
3820e86d1aa8SWill Deacon 
3821e86d1aa8SWill Deacon 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3822e86d1aa8SWill Deacon 					     FIELD_GET(IDR1_EVTQS, reg));
3823e86d1aa8SWill Deacon 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3824e86d1aa8SWill Deacon 					     FIELD_GET(IDR1_PRIQS, reg));
3825e86d1aa8SWill Deacon 
3826e86d1aa8SWill Deacon 	/* SID/SSID sizes */
3827e86d1aa8SWill Deacon 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3828e86d1aa8SWill Deacon 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3829e86d1aa8SWill Deacon 
3830e86d1aa8SWill Deacon 	/*
3831e86d1aa8SWill Deacon 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3832e86d1aa8SWill Deacon 	 * table, use a linear table instead.
3833e86d1aa8SWill Deacon 	 */
3834e86d1aa8SWill Deacon 	if (smmu->sid_bits <= STRTAB_SPLIT)
3835e86d1aa8SWill Deacon 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3836e86d1aa8SWill Deacon 
3837e86d1aa8SWill Deacon 	/* IDR3 */
3838e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3839e86d1aa8SWill Deacon 	if (FIELD_GET(IDR3_RIL, reg))
3840e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3841e86d1aa8SWill Deacon 
3842e86d1aa8SWill Deacon 	/* IDR5 */
3843e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3844e86d1aa8SWill Deacon 
3845e86d1aa8SWill Deacon 	/* Maximum number of outstanding stalls */
3846e86d1aa8SWill Deacon 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3847e86d1aa8SWill Deacon 
3848e86d1aa8SWill Deacon 	/* Page sizes */
3849e86d1aa8SWill Deacon 	if (reg & IDR5_GRAN64K)
3850e86d1aa8SWill Deacon 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3851e86d1aa8SWill Deacon 	if (reg & IDR5_GRAN16K)
3852e86d1aa8SWill Deacon 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3853e86d1aa8SWill Deacon 	if (reg & IDR5_GRAN4K)
3854e86d1aa8SWill Deacon 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3855e86d1aa8SWill Deacon 
3856e86d1aa8SWill Deacon 	/* Input address size */
3857e86d1aa8SWill Deacon 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3858e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_VAX;
3859e86d1aa8SWill Deacon 
3860e86d1aa8SWill Deacon 	/* Output address size */
3861e86d1aa8SWill Deacon 	switch (FIELD_GET(IDR5_OAS, reg)) {
3862e86d1aa8SWill Deacon 	case IDR5_OAS_32_BIT:
3863e86d1aa8SWill Deacon 		smmu->oas = 32;
3864e86d1aa8SWill Deacon 		break;
3865e86d1aa8SWill Deacon 	case IDR5_OAS_36_BIT:
3866e86d1aa8SWill Deacon 		smmu->oas = 36;
3867e86d1aa8SWill Deacon 		break;
3868e86d1aa8SWill Deacon 	case IDR5_OAS_40_BIT:
3869e86d1aa8SWill Deacon 		smmu->oas = 40;
3870e86d1aa8SWill Deacon 		break;
3871e86d1aa8SWill Deacon 	case IDR5_OAS_42_BIT:
3872e86d1aa8SWill Deacon 		smmu->oas = 42;
3873e86d1aa8SWill Deacon 		break;
3874e86d1aa8SWill Deacon 	case IDR5_OAS_44_BIT:
3875e86d1aa8SWill Deacon 		smmu->oas = 44;
3876e86d1aa8SWill Deacon 		break;
3877e86d1aa8SWill Deacon 	case IDR5_OAS_52_BIT:
3878e86d1aa8SWill Deacon 		smmu->oas = 52;
3879e86d1aa8SWill Deacon 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3880e86d1aa8SWill Deacon 		break;
3881e86d1aa8SWill Deacon 	default:
3882e86d1aa8SWill Deacon 		dev_info(smmu->dev,
3883e86d1aa8SWill Deacon 			"unknown output address size. Truncating to 48-bit\n");
3884df561f66SGustavo A. R. Silva 		fallthrough;
3885e86d1aa8SWill Deacon 	case IDR5_OAS_48_BIT:
3886e86d1aa8SWill Deacon 		smmu->oas = 48;
3887e86d1aa8SWill Deacon 	}
3888e86d1aa8SWill Deacon 
3889e86d1aa8SWill Deacon 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3890e86d1aa8SWill Deacon 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3891e86d1aa8SWill Deacon 	else
3892e86d1aa8SWill Deacon 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3893e86d1aa8SWill Deacon 
3894e86d1aa8SWill Deacon 	/* Set the DMA mask for our table walker */
3895e86d1aa8SWill Deacon 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3896e86d1aa8SWill Deacon 		dev_warn(smmu->dev,
3897e86d1aa8SWill Deacon 			 "failed to set DMA mask for table walker\n");
3898e86d1aa8SWill Deacon 
3899e86d1aa8SWill Deacon 	smmu->ias = max(smmu->ias, smmu->oas);
3900e86d1aa8SWill Deacon 
3901e86d1aa8SWill Deacon 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3902e86d1aa8SWill Deacon 		 smmu->ias, smmu->oas, smmu->features);
3903e86d1aa8SWill Deacon 	return 0;
3904e86d1aa8SWill Deacon }
3905e86d1aa8SWill Deacon 
3906e86d1aa8SWill Deacon #ifdef CONFIG_ACPI
3907e86d1aa8SWill Deacon static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3908e86d1aa8SWill Deacon {
3909e86d1aa8SWill Deacon 	switch (model) {
3910e86d1aa8SWill Deacon 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3911e86d1aa8SWill Deacon 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3912e86d1aa8SWill Deacon 		break;
3913e86d1aa8SWill Deacon 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3914e86d1aa8SWill Deacon 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3915e86d1aa8SWill Deacon 		break;
3916e86d1aa8SWill Deacon 	}
3917e86d1aa8SWill Deacon 
3918e86d1aa8SWill Deacon 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3919e86d1aa8SWill Deacon }
3920e86d1aa8SWill Deacon 
3921e86d1aa8SWill Deacon static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3922e86d1aa8SWill Deacon 				      struct arm_smmu_device *smmu)
3923e86d1aa8SWill Deacon {
3924e86d1aa8SWill Deacon 	struct acpi_iort_smmu_v3 *iort_smmu;
3925e86d1aa8SWill Deacon 	struct device *dev = smmu->dev;
3926e86d1aa8SWill Deacon 	struct acpi_iort_node *node;
3927e86d1aa8SWill Deacon 
3928e86d1aa8SWill Deacon 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3929e86d1aa8SWill Deacon 
3930e86d1aa8SWill Deacon 	/* Retrieve SMMUv3 specific data */
3931e86d1aa8SWill Deacon 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3932e86d1aa8SWill Deacon 
3933e86d1aa8SWill Deacon 	acpi_smmu_get_options(iort_smmu->model, smmu);
3934e86d1aa8SWill Deacon 
3935e86d1aa8SWill Deacon 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3936e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3937e86d1aa8SWill Deacon 
3938e86d1aa8SWill Deacon 	return 0;
3939e86d1aa8SWill Deacon }
3940e86d1aa8SWill Deacon #else
3941e86d1aa8SWill Deacon static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3942e86d1aa8SWill Deacon 					     struct arm_smmu_device *smmu)
3943e86d1aa8SWill Deacon {
3944e86d1aa8SWill Deacon 	return -ENODEV;
3945e86d1aa8SWill Deacon }
3946e86d1aa8SWill Deacon #endif
3947e86d1aa8SWill Deacon 
3948e86d1aa8SWill Deacon static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3949e86d1aa8SWill Deacon 				    struct arm_smmu_device *smmu)
3950e86d1aa8SWill Deacon {
3951e86d1aa8SWill Deacon 	struct device *dev = &pdev->dev;
3952e86d1aa8SWill Deacon 	u32 cells;
3953e86d1aa8SWill Deacon 	int ret = -EINVAL;
3954e86d1aa8SWill Deacon 
3955e86d1aa8SWill Deacon 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3956e86d1aa8SWill Deacon 		dev_err(dev, "missing #iommu-cells property\n");
3957e86d1aa8SWill Deacon 	else if (cells != 1)
3958e86d1aa8SWill Deacon 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3959e86d1aa8SWill Deacon 	else
3960e86d1aa8SWill Deacon 		ret = 0;
3961e86d1aa8SWill Deacon 
3962e86d1aa8SWill Deacon 	parse_driver_options(smmu);
3963e86d1aa8SWill Deacon 
3964e86d1aa8SWill Deacon 	if (of_dma_is_coherent(dev->of_node))
3965e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3966e86d1aa8SWill Deacon 
3967e86d1aa8SWill Deacon 	return ret;
3968e86d1aa8SWill Deacon }
3969e86d1aa8SWill Deacon 
3970e86d1aa8SWill Deacon static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3971e86d1aa8SWill Deacon {
3972e86d1aa8SWill Deacon 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3973e86d1aa8SWill Deacon 		return SZ_64K;
3974e86d1aa8SWill Deacon 	else
3975e86d1aa8SWill Deacon 		return SZ_128K;
3976e86d1aa8SWill Deacon }
3977e86d1aa8SWill Deacon 
3978e86d1aa8SWill Deacon static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3979e86d1aa8SWill Deacon {
3980e86d1aa8SWill Deacon 	int err;
3981e86d1aa8SWill Deacon 
3982e86d1aa8SWill Deacon #ifdef CONFIG_PCI
3983e86d1aa8SWill Deacon 	if (pci_bus_type.iommu_ops != ops) {
3984e86d1aa8SWill Deacon 		err = bus_set_iommu(&pci_bus_type, ops);
3985e86d1aa8SWill Deacon 		if (err)
3986e86d1aa8SWill Deacon 			return err;
3987e86d1aa8SWill Deacon 	}
3988e86d1aa8SWill Deacon #endif
3989e86d1aa8SWill Deacon #ifdef CONFIG_ARM_AMBA
3990e86d1aa8SWill Deacon 	if (amba_bustype.iommu_ops != ops) {
3991e86d1aa8SWill Deacon 		err = bus_set_iommu(&amba_bustype, ops);
3992e86d1aa8SWill Deacon 		if (err)
3993e86d1aa8SWill Deacon 			goto err_reset_pci_ops;
3994e86d1aa8SWill Deacon 	}
3995e86d1aa8SWill Deacon #endif
3996e86d1aa8SWill Deacon 	if (platform_bus_type.iommu_ops != ops) {
3997e86d1aa8SWill Deacon 		err = bus_set_iommu(&platform_bus_type, ops);
3998e86d1aa8SWill Deacon 		if (err)
3999e86d1aa8SWill Deacon 			goto err_reset_amba_ops;
4000e86d1aa8SWill Deacon 	}
4001e86d1aa8SWill Deacon 
4002e86d1aa8SWill Deacon 	return 0;
4003e86d1aa8SWill Deacon 
4004e86d1aa8SWill Deacon err_reset_amba_ops:
4005e86d1aa8SWill Deacon #ifdef CONFIG_ARM_AMBA
4006e86d1aa8SWill Deacon 	bus_set_iommu(&amba_bustype, NULL);
4007e86d1aa8SWill Deacon #endif
4008e86d1aa8SWill Deacon err_reset_pci_ops: __maybe_unused;
4009e86d1aa8SWill Deacon #ifdef CONFIG_PCI
4010e86d1aa8SWill Deacon 	bus_set_iommu(&pci_bus_type, NULL);
4011e86d1aa8SWill Deacon #endif
4012e86d1aa8SWill Deacon 	return err;
4013e86d1aa8SWill Deacon }
4014e86d1aa8SWill Deacon 
4015e86d1aa8SWill Deacon static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
4016e86d1aa8SWill Deacon 				      resource_size_t size)
4017e86d1aa8SWill Deacon {
4018e86d1aa8SWill Deacon 	struct resource res = {
4019e86d1aa8SWill Deacon 		.flags = IORESOURCE_MEM,
4020e86d1aa8SWill Deacon 		.start = start,
4021e86d1aa8SWill Deacon 		.end = start + size - 1,
4022e86d1aa8SWill Deacon 	};
4023e86d1aa8SWill Deacon 
4024e86d1aa8SWill Deacon 	return devm_ioremap_resource(dev, &res);
4025e86d1aa8SWill Deacon }
4026e86d1aa8SWill Deacon 
4027e86d1aa8SWill Deacon static int arm_smmu_device_probe(struct platform_device *pdev)
4028e86d1aa8SWill Deacon {
4029e86d1aa8SWill Deacon 	int irq, ret;
4030e86d1aa8SWill Deacon 	struct resource *res;
4031e86d1aa8SWill Deacon 	resource_size_t ioaddr;
4032e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu;
4033e86d1aa8SWill Deacon 	struct device *dev = &pdev->dev;
4034e86d1aa8SWill Deacon 	bool bypass;
4035e86d1aa8SWill Deacon 
4036e86d1aa8SWill Deacon 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4037e86d1aa8SWill Deacon 	if (!smmu) {
4038e86d1aa8SWill Deacon 		dev_err(dev, "failed to allocate arm_smmu_device\n");
4039e86d1aa8SWill Deacon 		return -ENOMEM;
4040e86d1aa8SWill Deacon 	}
4041e86d1aa8SWill Deacon 	smmu->dev = dev;
4042e86d1aa8SWill Deacon 
4043e86d1aa8SWill Deacon 	if (dev->of_node) {
4044e86d1aa8SWill Deacon 		ret = arm_smmu_device_dt_probe(pdev, smmu);
4045e86d1aa8SWill Deacon 	} else {
4046e86d1aa8SWill Deacon 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
4047e86d1aa8SWill Deacon 		if (ret == -ENODEV)
4048e86d1aa8SWill Deacon 			return ret;
4049e86d1aa8SWill Deacon 	}
4050e86d1aa8SWill Deacon 
4051e86d1aa8SWill Deacon 	/* Set bypass mode according to firmware probing result */
4052e86d1aa8SWill Deacon 	bypass = !!ret;
4053e86d1aa8SWill Deacon 
4054e86d1aa8SWill Deacon 	/* Base address */
4055e86d1aa8SWill Deacon 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4056e86d1aa8SWill Deacon 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4057e86d1aa8SWill Deacon 		dev_err(dev, "MMIO region too small (%pr)\n", res);
4058e86d1aa8SWill Deacon 		return -EINVAL;
4059e86d1aa8SWill Deacon 	}
4060e86d1aa8SWill Deacon 	ioaddr = res->start;
4061e86d1aa8SWill Deacon 
4062e86d1aa8SWill Deacon 	/*
4063e86d1aa8SWill Deacon 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4064e86d1aa8SWill Deacon 	 * the PMCG registers which are reserved by the PMU driver.
4065e86d1aa8SWill Deacon 	 */
4066e86d1aa8SWill Deacon 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4067e86d1aa8SWill Deacon 	if (IS_ERR(smmu->base))
4068e86d1aa8SWill Deacon 		return PTR_ERR(smmu->base);
4069e86d1aa8SWill Deacon 
4070e86d1aa8SWill Deacon 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
4071e86d1aa8SWill Deacon 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4072e86d1aa8SWill Deacon 					       ARM_SMMU_REG_SZ);
4073e86d1aa8SWill Deacon 		if (IS_ERR(smmu->page1))
4074e86d1aa8SWill Deacon 			return PTR_ERR(smmu->page1);
4075e86d1aa8SWill Deacon 	} else {
4076e86d1aa8SWill Deacon 		smmu->page1 = smmu->base;
4077e86d1aa8SWill Deacon 	}
4078e86d1aa8SWill Deacon 
4079e86d1aa8SWill Deacon 	/* Interrupt lines */
4080e86d1aa8SWill Deacon 
4081e86d1aa8SWill Deacon 	irq = platform_get_irq_byname_optional(pdev, "combined");
4082e86d1aa8SWill Deacon 	if (irq > 0)
4083e86d1aa8SWill Deacon 		smmu->combined_irq = irq;
4084e86d1aa8SWill Deacon 	else {
4085e86d1aa8SWill Deacon 		irq = platform_get_irq_byname_optional(pdev, "eventq");
4086e86d1aa8SWill Deacon 		if (irq > 0)
4087e86d1aa8SWill Deacon 			smmu->evtq.q.irq = irq;
4088e86d1aa8SWill Deacon 
4089e86d1aa8SWill Deacon 		irq = platform_get_irq_byname_optional(pdev, "priq");
4090e86d1aa8SWill Deacon 		if (irq > 0)
4091e86d1aa8SWill Deacon 			smmu->priq.q.irq = irq;
4092e86d1aa8SWill Deacon 
4093e86d1aa8SWill Deacon 		irq = platform_get_irq_byname_optional(pdev, "gerror");
4094e86d1aa8SWill Deacon 		if (irq > 0)
4095e86d1aa8SWill Deacon 			smmu->gerr_irq = irq;
4096e86d1aa8SWill Deacon 	}
4097e86d1aa8SWill Deacon 	/* Probe the h/w */
4098e86d1aa8SWill Deacon 	ret = arm_smmu_device_hw_probe(smmu);
4099e86d1aa8SWill Deacon 	if (ret)
4100e86d1aa8SWill Deacon 		return ret;
4101e86d1aa8SWill Deacon 
4102e86d1aa8SWill Deacon 	/* Initialise in-memory data structures */
4103e86d1aa8SWill Deacon 	ret = arm_smmu_init_structures(smmu);
4104e86d1aa8SWill Deacon 	if (ret)
4105e86d1aa8SWill Deacon 		return ret;
4106e86d1aa8SWill Deacon 
4107e86d1aa8SWill Deacon 	/* Record our private device structure */
4108e86d1aa8SWill Deacon 	platform_set_drvdata(pdev, smmu);
4109e86d1aa8SWill Deacon 
4110e86d1aa8SWill Deacon 	/* Reset the device */
4111e86d1aa8SWill Deacon 	ret = arm_smmu_device_reset(smmu, bypass);
4112e86d1aa8SWill Deacon 	if (ret)
4113e86d1aa8SWill Deacon 		return ret;
4114e86d1aa8SWill Deacon 
4115e86d1aa8SWill Deacon 	/* And we're up. Go go go! */
4116e86d1aa8SWill Deacon 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4117e86d1aa8SWill Deacon 				     "smmu3.%pa", &ioaddr);
4118e86d1aa8SWill Deacon 	if (ret)
4119e86d1aa8SWill Deacon 		return ret;
4120e86d1aa8SWill Deacon 
4121e86d1aa8SWill Deacon 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
4122e86d1aa8SWill Deacon 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
4123e86d1aa8SWill Deacon 
4124e86d1aa8SWill Deacon 	ret = iommu_device_register(&smmu->iommu);
4125e86d1aa8SWill Deacon 	if (ret) {
4126e86d1aa8SWill Deacon 		dev_err(dev, "Failed to register iommu\n");
4127e86d1aa8SWill Deacon 		return ret;
4128e86d1aa8SWill Deacon 	}
4129e86d1aa8SWill Deacon 
4130e86d1aa8SWill Deacon 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
4131e86d1aa8SWill Deacon }
4132e86d1aa8SWill Deacon 
4133e86d1aa8SWill Deacon static int arm_smmu_device_remove(struct platform_device *pdev)
4134e86d1aa8SWill Deacon {
4135e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4136e86d1aa8SWill Deacon 
4137e86d1aa8SWill Deacon 	arm_smmu_set_bus_ops(NULL);
4138e86d1aa8SWill Deacon 	iommu_device_unregister(&smmu->iommu);
4139e86d1aa8SWill Deacon 	iommu_device_sysfs_remove(&smmu->iommu);
4140e86d1aa8SWill Deacon 	arm_smmu_device_disable(smmu);
4141e86d1aa8SWill Deacon 
4142e86d1aa8SWill Deacon 	return 0;
4143e86d1aa8SWill Deacon }
4144e86d1aa8SWill Deacon 
4145e86d1aa8SWill Deacon static void arm_smmu_device_shutdown(struct platform_device *pdev)
4146e86d1aa8SWill Deacon {
4147e86d1aa8SWill Deacon 	arm_smmu_device_remove(pdev);
4148e86d1aa8SWill Deacon }
4149e86d1aa8SWill Deacon 
4150e86d1aa8SWill Deacon static const struct of_device_id arm_smmu_of_match[] = {
4151e86d1aa8SWill Deacon 	{ .compatible = "arm,smmu-v3", },
4152e86d1aa8SWill Deacon 	{ },
4153e86d1aa8SWill Deacon };
4154e86d1aa8SWill Deacon MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4155e86d1aa8SWill Deacon 
4156e86d1aa8SWill Deacon static struct platform_driver arm_smmu_driver = {
4157e86d1aa8SWill Deacon 	.driver	= {
4158e86d1aa8SWill Deacon 		.name			= "arm-smmu-v3",
4159e86d1aa8SWill Deacon 		.of_match_table		= arm_smmu_of_match,
4160e86d1aa8SWill Deacon 		.suppress_bind_attrs	= true,
4161e86d1aa8SWill Deacon 	},
4162e86d1aa8SWill Deacon 	.probe	= arm_smmu_device_probe,
4163e86d1aa8SWill Deacon 	.remove	= arm_smmu_device_remove,
4164e86d1aa8SWill Deacon 	.shutdown = arm_smmu_device_shutdown,
4165e86d1aa8SWill Deacon };
4166e86d1aa8SWill Deacon module_platform_driver(arm_smmu_driver);
4167e86d1aa8SWill Deacon 
4168e86d1aa8SWill Deacon MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4169e86d1aa8SWill Deacon MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4170e86d1aa8SWill Deacon MODULE_ALIAS("platform:arm-smmu-v3");
4171e86d1aa8SWill Deacon MODULE_LICENSE("GPL v2");
4172